From ca379768283e1213e6602b2ed8572c67104ad367 Mon Sep 17 00:00:00 2001 From: cinder <> Date: Thu, 18 Apr 2024 17:15:09 -0700 Subject: [PATCH] add a kernel that can run code in userspace --- build.lua | 56 ++++++++++- src/init.lua | 258 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 309 insertions(+), 5 deletions(-) create mode 100644 src/init.lua diff --git a/build.lua b/build.lua index 68aad68..8cd08cd 100644 --- a/build.lua +++ b/build.lua @@ -70,23 +70,69 @@ local function exportFile(srcpath, dstpath) dstpath = outdir .. srcpath end local srcf = assert(io.open(srcpath)) + + --load the file to validate syntax, as a very minimal linter + do + local ok, err = loadfile(srcpath) + if not ok then + stderr:write(err, '\n') + os.exit(1) + end + end + + local dstf = assert(io.open(dstpath, 'w')) --for now, run a very minimal minifier + local longEnd --nil normally, some "]==]--" string if we're in a long comment + for line in srcf:lines() do - local comment = line:find('--', nil, true) - if comment then - assert(line:sub(comment, comment + 3) ~= '--[', 'TODO block comments') - line = line:sub(1, comment-1) + ::stripComments:: --strip comments, including weird ones like "--[[ ]]-- --[=[ ]=]-- --text" all in one line + --first, if we're in a long comment, see if that's over + if longEnd then + local i1, i2 = line:find(longEnd, 1, true) + if i1 then + --strip the long comment and mark that it ended + line = line:sub(i2+1) + longEnd = nil + else + --still in a long comment, so don't output anything + goto nextLine + end end - line = assert(line:match('^%s*(.-)%s*$')) + + --look for the start of the next comment + local comment = line:find('--', 1, true) + if not comment then --nothing left to do, break out of the loop + goto emitLine + end + if line:byte(comment+2) == string.byte('[') then --start of a long comment + local depth = #line:sub(comment+2):match('^%[(%=*)%[') --validating syntax earlier ensures this will match + longEnd = string.format(']%s]--', string.rep('=', depth)) + line = line:sub(comment + 4 + depth) -- #'--[' + depth + #'[' + goto stripComments --start from the beginning in case the comment ends on the same line it starts on + else + --found a short comment, remove to end of line + line = line:sub(1, comment - 1) + end + + ::emitLine:: + + assert(not longEnd, 'shouldn\'t emit anything in a long comment') + line = assert(line:match('^%s*(.-)%s*$')) --strip leading and trailing whitespace if #line > 0 then dstf:write(line, '\n') end + + ::nextLine:: end + assert(not longEnd, 'unclosed long comment') end fs.makeDirectory(outdir) exportFile('boot.lua') +fs.makeDirectory(outdir .. 'kitn-core/') +exportFile('src/init.lua', outdir .. 'kitn-core/init.lua') + -- vi: set ts=2: diff --git a/src/init.lua b/src/init.lua new file mode 100644 index 0000000..a1c342a --- /dev/null +++ b/src/init.lua @@ -0,0 +1,258 @@ +--our own boot.lua gives us the boot [address, path] as arguments... +local bootfs = ... +-- ...but [lua bios, openloader] don't, so fall back to the usual way +bootfs = bootfs or computer.getBootAddress() + + +--store all functions we need in locals so userspace mischief can't replace them +--suffix 'K' (for kernel) on any functions userspace will also have a version of to ensure kernelspace +--function usage is intentional (ie prevent accidentally using the kernel's `load` in a userspace `loadfile`) +local envBase = _G --used for creating process environments +local assert, checkArg, error, loadK, ipairs, next, pairs, rawequal, rawset, setmetatable, tostring, type = assert, checkArg, error, load, ipairs, next, pairs, rawequal, rawset, setmetatable, tostring, type +local invoke = component.invoke +local pullSignalK, shutdown, uptime = computer.pullSignal, computer.shutdown, computer.uptime +local co_create, co_status, co_resumeK, co_yieldK = coroutine.create, coroutine.status, coroutine.resume, coroutine.yield +local huge = math.huge +local format = string.format +local pack, unpack = table.pack, table.unpack + +local big = math.maxinteger or huge --some large amount to read at once in readfileK + + +--completely disable the environment, ensuring the kernel only uses local variables, to prevent userspace +--mischief (somehow editing _G through a mirror) from affecting kernelspace +_ENV = setmetatable({}, { + __index = function(self, key) return error(format('accessed global %q', key), 2) end, + __newindex = function(self, key) return error(format('assigned global %q', key), 2) end, +}) + +local function readfileK(path, fs) + fs = fs or bootfs + local text, chunk, fd, err = '', nil, invoke(fs, 'open', path) + if not fd then + local msg = format('can\'t open %s on %s', path, fs:sub(1, 3)) + if not rawequal(path, err) then msg = msg .. ': ' .. err end + return nil, msg + end + + repeat + chunk, err = invoke(fs, 'read', fd, big) + text = text .. (chunk or '') + until not chunk + invoke(fs, 'close', fd) + if err then return nil, format('reading %s: %s', path, err) end + return text +end + + +--[=====[ process internals ]=====]-- +local processes = {} --key is thread, value is a table of process state (ie env) +local fillEnv --function(process:thread, env:table):() -- given a process and an empty environment, fill it in before the process runs + + +--[=====[ process environments ]=====]-- +-- + +-- _G (through envBase) is used as the base for process environments, so clear it of anything process-specific +envBase._OSVERSION = 'kitn 0.0.0' --prefer [openloader, openos] "$name $ver" over plan9k "$name/$ver" +envBase._G, envBase.load, envBase.coroutine.resume, envBase.coroutine.yield, envBase.computer.pullSignal = nil + +--environment sandboxing uses mirrors: tables that provide a read-only "reflection" of what's on the other side +local mirror --function(target:table[, mirror:table]):table -- create a mirror (or make a table into one) that reflects `target` +local mirrors = setmetatable({}, { __mode = 'k' }) --key is mirror given to userspace, value is what it mirrors + +local function mirrorNext(self, prev) + local reflected = assert(mirrors[self], 'mirror reflects nothing') + local key = next(reflected, prev) + return key, self[key] --ensure all accesses go through the mirror so table values are mirrored too +end + +local mirrormt = { + __metatable = '', --hide the metatable to prevent replacing or modifying it + __index = function(self, key) + local reflected = assert(mirrors[self], 'mirror reflects nothing') + local val = reflected[key] --if mirrors[self] is somehow nil, erroring is fine + if type(val) == 'table' then val = mirror(val) end --recursively mirror tables on first access + if val ~= nil then rawset(self, key, val) end + return val + end, + __pairs = function(self) + --reuse a single function with the mirror as the control variable, instead of creating a new function with + --upvalues every time, because that seems vaguely less memory-intensive (TODO benchmark that) + return mirrorNext, self + end, + --mirrors allow assignment but don't do anything with it +} + +function mirror(target, mirror) + mirror = mirror or {} + mirrors[mirror] = target + setmetatable(mirror, mirrormt) + return mirror +end + +function fillEnv(process, env) + -- + mirror(envBase, env) + + local function env_load(chunk, name, mode, fenv) + checkArg(1, chunk, 'function', 'string') + checkArg(2, name, 'string', 'nil') + checkArg(3, mode, 'string', 'nil') + checkArg(4, fenv, 'table', 'nil') + return loadK(chunk, name, mode, fenv or env) + end + local function env_loadfile(path, mode, fenv) + checkArg(1, path, 'string') + checkArg(2, mode, 'string', 'nil') + checkArg(3, newenv, 'table', 'nil') + + local code, err = readfileK(path) + if not code then return nil, err end + + code, err = env_load(code, '@' .. path, mode, fenv) + return code, err + end + local function env_dofile(path, ...) + checkArg(1, path, 'string') + local fn = assert(env_loadfile(path)) + return fn(...) + end + + env.load, env.loadfile, env.dofile = env_load, env_loadfile, env_dofile + env.coroutine = mirror(envBase.coroutine, { + resume = function(co, ...) + local results = pack(co_resume(co, ...)) --ok, os_reason, ... + if results[1] and results[2] then + return co_yieldK(unpack(results, 2, results.n)) + else + return unpack(results, 1, results.n) + end + end, + yield = function(...) return co_yieldK(false, ...) end, + }) + env.os = mirror(envBase.os, { + sleep = function(n) + checkArg(1, n, 'number') + return co_yieldK('deadline', n) + end, + }) + env.computer = mirror(envBase.computer, { + pullSignal = function(timeout) + checkArg(1, timeout, 'number', 'nil') + return co_yieldK('signal', timeout) + end, + }) +end + + +--[=====[ scheduler ]=====]-- +local runnable = {} --key is thread, value is pack(...)ed args to resume it with +local schedDeadline = {} --key is thread, value is absolute deadline of uptime() +local schedSignal = {} --key is thread, value is `true` (may allow specific signal names in the future) + + +--[=====[ init, pid 1 ]=====]-- +do + local initEnv = {} + --since init is intended to be configured per-computer, it lives in /etc + local initPath = '/etc/init.lua' + local initCode = assert(readfileK(initPath)) + local initFn = assert(loadK(initCode, '@' .. initPath, 'bt', initEnv)) + fillEnv(initFn, initEnv) + local initThread = co_create(initFn) + processes[initThread] = { env = initEnv } + runnable[initThread] = { initPath, n = 1 } +end + + +--[=====[ scheduler main loop ]=====]-- +--logic after co_resume(process, ...) returns, in its own function to use varargs instead of table.pack +local function postResume(co, ok, reason, ...) + if co_status(co) == 'dead' then + processes[co], runnable[co], schedDeadline[co], schedSignal[co] = nil + --TODO bubble to parent process + + --tostring could run userspace code and even yield, but at this point we're crashing so it doesn't matter + if not ok then error('process crashed: ' .. tostring(reason), 0) end + elseif not reason then + --userspace called coroutine.yield(); TODO figure out what that's supposed to do + --for now... yield until another process resumes it? + schedDeadline[co], schedSignal[co] = nil + elseif rawequal(reason, 'signal') then + local timeout = ... + schedDeadline[co] = timeout and (uptime() + timeout) + schedSignal[co] = true + elseif rawequal(reason, 'deadline') then + local timeout = ... + schedDeadline[co] = uptime() + timeout + schedSignal[co] = nil + end +end + +--logic after pullSignalK returns, in its own function to use varargs instead of table.pack +local function postPull(signal, ...) + local args = pack(nil) + for proc, deadline in pairs(schedDeadline) do + if uptime() >= deadline then + schedDeadline[proc] = nil + runnable[proc] = args + end + end + + --intentionally override {resumed by deadline} with {resumed by signal} + if signal then + args = pack(signal, ...) + for proc in pairs(schedSignal) do + schedDeadline[proc], schedSignal[proc] = nil + runnable[proc] = args + end + end +end + + +--goto instead of while-true for vague performance +::tickScheduler:: + +--clear the run queue +for proc, args in pairs(runnable) do + runnable[proc] = nil --the process is no longer runnable, unless it reschedules itself + postResume(proc, co_resumeK(proc, unpack(args, 1, args.n))) +end + +local deadline = nil +for proc, pdeadline in pairs(schedDeadline) do + if not deadline or pdeadline < deadline then + deadline = pdeadline + end +end + +if next(schedSignal) then + deadline = deadline or huge +end + +if not deadline then + --no processes are scheduled? are any still runnable somehow? + if next(runnable) then + --tail-return early to jump back to the run queue + goto tickScheduler + end + + --nothing scheduled, nothing runnable; have all processes exited? + if not next(processes) then + return shutdown() + end + + --this seems like a deadlock; the scheduler will never resume any process again + error('all processes are idle', 0) +end + + +--`deadline` is set, so at least one process is waiting for a deadline or signal +postPull(pullSignalK(deadline - uptime())) + +--then do it again +goto tickScheduler + + +-- vi: set ts=2: