// sched: round-robin scheduler with priority counters. // All extern-struct layouts (TaskStruct, CoreContext, MmStruct, UserPage, // KeRegs) live in src/task_layout.zig as the single source of truth. // The .S files (sched.S, entry.S) consume those layouts via raw offsets. const layout = #import("task_layout") const TaskStruct = layout.TaskStruct const TASK_RUNNING = layout.TASK_RUNNING const TASK_ZOMBIE = layout.TASK_ZOMBIE const TASK_INTERRUPTIBLE = layout.TASK_INTERRUPTIBLE const KTHREAD = layout.KTHREAD const MAX_PAGE_COUNT = layout.MAX_PAGE_COUNT const fdtable = #import("fdtable") const NR_TASKS usize = 64 extern fn core_switch_to(prev *mut TaskStruct, next *mut TaskStruct) void extern fn set_pgd(pgd u64) void extern fn irq_enable() void extern fn irq_disable() void extern fn free_page(p u64) void extern fn free_kernel_page(kp u64) void // Internal callers (schedule, timer_tick) reach _schedule_impl through // the patchable trampoline `_schedule` in // src/trace/patchable_trampolines.S, so tracing fires on every // scheduler entry, not only cross-module callers. extern fn _schedule() void var init_task TaskStruct = .{ .priority = 1, .flags = KTHREAD, } export var current ?*mut TaskStruct = null export var task [NR_TASKS]?*mut TaskStruct = .{null} ** NR_TASKS export var nr_tasks i32 = 1 // Monotonic pid allocator. init_task occupies pid 0; first user fork is pid 1. export var next_pid i32 = 1 export fn preempt_disable() void { current.?.preempt_count += 1 } export fn preempt_enable() void { current.?.preempt_count -= 1 } /// Index of the RUNNING task with the highest `counter`, or null if no /// task is RUNNING. Ties broken by lower index (strict `>` means the /// first equal-counter slot wins). Pure: walks the slice as-is, no /// mutation, no extern calls — host-testable. pub fn pick_next_running(tasks []?*mut TaskStruct) ?usize { var best ?usize = null var best_c i64 = -1 var i usize = 0 while i < tasks.len { if tasks[i] |p| { if p.state == TASK_RUNNING && p.counter > best_c { best_c = p.counter best = i } } i += 1 } return best } /// Refill every non-null task's counter to `(counter >> 1) + priority`. /// Called when the highest-counter RUNNING task has counter == 0 (round- /// end). `counter` is i64 — `>>` is arithmetic, so an over-decremented /// counter halves toward zero without flipping sign. pub fn refill_counters(tasks []?*mut TaskStruct) void { var i usize = 0 while i < tasks.len { if tasks[i] |p| { p.counter = (p.counter >> 1) + p.priority } i += 1 } } /// Flip `t` to ZOMBIE and wake an INTERRUPTIBLE parent. Caller must /// hold preempt_disable. Pure state mutation — no scheduling, no page /// frees. Shared between sys_kill (target-task) and exit_process (self). pub fn zombify_and_wake_parent(t *mut TaskStruct) void { t.state = TASK_ZOMBIE if t.parent |p| { if p.state == TASK_INTERRUPTIBLE { p.state = TASK_RUNNING } } } export fn _schedule_impl() void { preempt_disable() var idx usize = 0 outer: while true { if pick_next_running(&task) |i| { if task[i].?.counter != 0 { idx = i break :outer } } refill_counters(&task) } switch_to(task[idx].?) preempt_enable() } export fn schedule() void { current.?.counter = 0 _schedule() } export fn switch_to(next *mut TaskStruct) void { if current == next { return } const prev = current.? current = next // Kernel threads (mm.pgd == 0) share the boot-time id_pg_dir for // TTBR0; writing 0 there would unmap low memory and instantly fault // on the next ret to a low-VA kernel function (e.g. ret_from_fork). if next.mm.pgd != 0 { set_pgd(next.mm.pgd) } core_switch_to(prev, next) } export fn timer_tick() void { const cur = current.? cur.counter -= 1 if cur.counter > 0 || cur.preempt_count > 0 { return } cur.counter = 0 irq_enable() _schedule() irq_disable() } export fn exit_process() void { // Self-reap is unsafe here — the kernel page IS the running task's // stack + TaskStruct; the parent's sys_wait does the page sweep. preempt_disable() zombify_and_wake_parent(current.?) preempt_enable() schedule() } // Walk every slot of `list` and free the contained physical page if it // is non-zero. `field` selects the page address: pass null for slot // entries that are bare `u64` (kernel_pages), pass a field name for // struct slots (user_pages → "pa"). Inlined: the comptime field selector // is the whole point. inline fn free_page_list(list anytype, comptime field ?[]u8) void { var j usize = 0 while j < list.len { var v u64 = undefined if field |f| { v = #field(list[j], f) } else { v = list[j] } if v != 0 { free_page(v) } j += 1 } } // Free a task's entire user address space: the user-mapped physical pages // followed by the page-table pages (PGD/PUD/PMD/PTE live in kernel_pages). // This is the mm-sweep half of do_wait_impl's zombie reap, factored out so // fork's copy_process failure paths can release a partially- or fully-built // child mm without duplicating the loop. It does NOT touch fds (the caller // closes those first) or the TaskStruct page itself. A KTHREAD child never // populated an mm, so both lists are empty and this is a no-op for it. export fn release_user_mm(t *mut TaskStruct) void { free_page_list(&t.mm.user_pages, "pa") free_page_list(&t.mm.kernel_pages, null) } // Walk task[] for any child of `current`. If a zombie is found, free its // resources and return its pid. If children exist but none are zombies, // block (TASK_INTERRUPTIBLE) and retry on wake. Returns -1 if no children. export fn do_wait_impl() i32 { preempt_disable() while true { var have_children bool = false var i usize = 0 while i < NR_TASKS { if task[i] |c| { if c.parent == current.? { have_children = true if c.state == TASK_ZOMBIE { const pid i32 = c.pid // Close fds the zombie left open. unref drops // each Pipe/File refcount and frees the backing // page at refs == 0. Runs before the mm-page // sweep: a refcounted Pipe/File page (not in // user/kernel_pages) must not be double-freed. fdtable.closeAll(c) // Free the mm: user-mapped pages then the page-table // pages (PGD/PUD/PMD/PTE). Shared with fork's // copy_process failure-path cleanup. release_user_mm(c) // Drop the slot before freeing the kernel page so // a stale pointer can never be observed. task[i] = null if c.kstack != 0 { free_kernel_page(c.kstack) } free_kernel_page(#intFromPtr(c)) preempt_enable() return pid } } } i += 1 } if !have_children { preempt_enable() return -1 } current.?.state = TASK_INTERRUPTIBLE preempt_enable() schedule() preempt_disable() } } export fn sched_init() void { current = &init_task task[0] = &init_task } // --------------------------------------------------------------------- // Host tests. The pure helpers run against caller- // owned TaskStruct fixtures + a local `tasks` slice, so the global // `task` / `current` state is never touched and tests stay hermetic. // --------------------------------------------------------------------- const std = #import("std") test "refill_counters: each non-null task gets (c>>1) + priority" { var t1 TaskStruct = .{ .priority = 4, .counter = 10 } var t2 TaskStruct = .{ .priority = 2, .counter = 0 } var tasks [3]?*mut TaskStruct = .{ &t1, null, &t2 } refill_counters(&tasks) try std.testing.expectEqual(#as(i64, (10 >> 1) + 4), t1.counter) try std.testing.expectEqual(#as(i64, 0 + 2), t2.counter) } test "refill_counters: null slots are skipped" { var tasks [4]?*mut TaskStruct = .{ null, null, null, null } refill_counters(&tasks) } test "refill_counters: negative counter halves via arithmetic shift" { // counter is i64 — `>>` on signed types is arithmetic in Zig. Guards // the assumption that an over-decremented counter (e.g. a kill // racing the timer tick) still refills sanely. var t TaskStruct = .{ .priority = 5, .counter = -3 } var tasks [1]?*mut TaskStruct = .{&t} refill_counters(&tasks) // -3 >> 1 == -2 (arithmetic shift), + 5 == 3 try std.testing.expectEqual(#as(i64, 3), t.counter) } test "pick_next_running: returns index of highest-counter RUNNING task" { var t0 TaskStruct = .{ .state = TASK_RUNNING, .counter = 5 } var t1 TaskStruct = .{ .state = TASK_RUNNING, .counter = 9 } var t2 TaskStruct = .{ .state = TASK_RUNNING, .counter = 7 } var tasks [3]?*mut TaskStruct = .{ &t0, &t1, &t2 } try std.testing.expectEqual(#as(?usize, 1), pick_next_running(&tasks)) } test "pick_next_running: ignores ZOMBIE and INTERRUPTIBLE" { var t0 TaskStruct = .{ .state = TASK_ZOMBIE, .counter = 99 } var t1 TaskStruct = .{ .state = TASK_INTERRUPTIBLE, .counter = 50 } var t2 TaskStruct = .{ .state = TASK_RUNNING, .counter = 3 } var tasks [3]?*mut TaskStruct = .{ &t0, &t1, &t2 } try std.testing.expectEqual(#as(?usize, 2), pick_next_running(&tasks)) } test "pick_next_running: null when no RUNNING task exists" { var t0 TaskStruct = .{ .state = TASK_ZOMBIE } var tasks [2]?*mut TaskStruct = .{ &t0, null } try std.testing.expectEqual(#as(?usize, null), pick_next_running(&tasks)) } test "pick_next_running: first-match wins on counter ties" { var t0 TaskStruct = .{ .state = TASK_RUNNING, .counter = 5 } var t1 TaskStruct = .{ .state = TASK_RUNNING, .counter = 5 } var tasks [2]?*mut TaskStruct = .{ &t0, &t1 } // Strict `>` — later-equal cannot displace earlier, so t0 wins. try std.testing.expectEqual(#as(?usize, 0), pick_next_running(&tasks)) } test "zombify_and_wake_parent: child->ZOMBIE, INTERRUPTIBLE parent->RUNNING" { var parent TaskStruct = .{ .state = TASK_INTERRUPTIBLE } var child TaskStruct = .{ .state = TASK_RUNNING, .parent = &parent } zombify_and_wake_parent(&child) try std.testing.expectEqual(TASK_ZOMBIE, child.state) try std.testing.expectEqual(TASK_RUNNING, parent.state) } test "zombify_and_wake_parent: RUNNING parent stays RUNNING" { var parent TaskStruct = .{ .state = TASK_RUNNING } var child TaskStruct = .{ .state = TASK_RUNNING, .parent = &parent } zombify_and_wake_parent(&child) try std.testing.expectEqual(TASK_ZOMBIE, child.state) try std.testing.expectEqual(TASK_RUNNING, parent.state) } test "zombify_and_wake_parent: ZOMBIE parent stays ZOMBIE (orphan path)" { var parent TaskStruct = .{ .state = TASK_ZOMBIE } var child TaskStruct = .{ .state = TASK_RUNNING, .parent = &parent } zombify_and_wake_parent(&child) try std.testing.expectEqual(TASK_ZOMBIE, child.state) try std.testing.expectEqual(TASK_ZOMBIE, parent.state) } test "zombify_and_wake_parent: null parent does not crash" { var child TaskStruct = .{ .state = TASK_RUNNING, .parent = null } zombify_and_wake_parent(&child) try std.testing.expectEqual(TASK_ZOMBIE, child.state) } // host_stubs_sched.zig counts free_page calls so release_user_mm's sweep // is observable: every non-zero user_pages[*].pa and kernel_pages[*] slot // must be freed exactly once, empty slots skipped. extern fn sched_free_count() u64 extern fn sched_reset_free_count() void test "release_user_mm frees every populated user and kernel page" { sched_reset_free_count() var t TaskStruct = .{} t.mm.user_pages[0].pa = 0x40001000 t.mm.user_pages[1].pa = 0x40002000 t.mm.kernel_pages[0] = 0x40003000 // pgd t.mm.kernel_pages[1] = 0x40004000 release_user_mm(&t) try std.testing.expectEqual(#as(u64, 4), sched_free_count()) } test "release_user_mm is a no-op on an empty (KTHREAD) mm" { sched_reset_free_count() var t TaskStruct = .{} release_user_mm(&t) try std.testing.expectEqual(#as(u64, 0), sched_free_count()) }