From 50c749c2d1e32e220572b176ed4c139812731490 Mon Sep 17 00:00:00 2001 From: cinder <> Date: Fri, 19 Apr 2024 16:42:58 -0700 Subject: [PATCH] fix intermittent crashes when spawning new processes --- src/init.lua | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/src/init.lua b/src/init.lua index d2829c0..bec30dd 100644 --- a/src/init.lua +++ b/src/init.lua @@ -57,7 +57,7 @@ local fillEnv --function(proc:thread, env:table):() -- given a process and an em -- -- _G (through envBase) is used as the base for process environments, so clear it of anything process-specific -envBase._OSVERSION = 'kitn 0.1.0' --prefer [openloader, openos] "$name $ver" over plan9k "$name/$ver" +envBase._OSVERSION = 'kitn 0.1.1' --prefer [openloader, openos] "$name $ver" over plan9k "$name/$ver" envBase._G, envBase.load = nil envBase.coroutine.resume = function(co, ...) checkArg(1, co, 'thread') @@ -193,7 +193,7 @@ end --logic after co_resume(process, ...) returns, in its own function to use varargs instead of table.pack local function postResume(co, ok, reason, ...) if co_status(co) == 'dead' then - processes[co], runnable[co], schedDeadline[co], schedSignal[co] = nil + processes[co], schedDeadline[co], schedSignal[co] = nil --TODO bubble to parent process --tostring could run userspace code and even yield, but at this point we're crashing so it doesn't matter @@ -237,10 +237,26 @@ end --goto instead of while-true for vague feeling of performance (TODO benchmark this) ::tickScheduler:: ---clear the run queue -for proc, args in pairs(runnable) do - runnable[proc] = nil --the process is no longer runnable, unless it reschedules itself - postResume(proc, kresume(proc, unpack(args, 1, args.n))) +--[[clear the run queue +this is a delicate formulation. if proc creates new threads, those are added as new keys to runnable, which +makes next's behavior undefined*; removing proc before resuming causes the next loop iteration to crash with +"invalid key to 'next'". unconditionally removing proc after is also wrong, because proc may have rescheduled +itself immediately. instead we assign proc a value it should never have (false) before, test if it's still set +to that value after, and remove it if so. + +the outer loop is for the occasional case that some of the child processes end up before their parent in next's +order and so wouldn't be resumed until after another signal was pulled otherwise. + +*is this c-like ub that can cause memory corruption and segfaults? the manual doesn't say! (TODO make this not ub) +]] +while next(runnable) do + for proc, args in pairs(runnable) do + runnable[proc] = false + postResume(proc, kresume(proc, unpack(args, 1, args.n))) + if not runnable[proc] then + runnable[proc] = nil + end + end end local deadline = nil