// sys: syscall dispatch table and handlers. // Layouts (TaskStruct etc.) come from src/task_layout.zig — the single // source of truth shared with sched.zig / fork.zig / mm_user.zig. // Syscall IDs come from lib/syscall_defs.zig — the single source of // truth shared with user_space/kernel_tests.zig. const std = #import("std") const layout = #import("task_layout") const defs = #import("syscall_defs") const user_layout = #import("user_layout") const pipe_mod = #import("pipe") const console = #import("console") const sched = #import("sched") const vfs = #import("vfs") const file_mod = #import("file") const fdtable = #import("fdtable") const path_mod = #import("path") const klog_ring = #import("klog_ring") const sha256 = #import("sha256") const shadow = #import("shadow") const perm = #import("perm") const pwfile = #import("pwfile") // Kernel entropy source (salt minting for sys_passwd). Named module — // hwrng was promoted to a named module when it moved to Flash (the // generated .zig lives in the build cache, so a path import would not // resolve); start.zig force-includes the same module for the exported // hwrng_init, same pattern as sched/execve/utilc. const hwrng = #import("hwrng") const TaskStruct = layout.TaskStruct const UTHREAD = layout.UTHREAD const MAX_PAGE_COUNT = layout.MAX_PAGE_COUNT const MU i32 = 0 const NR_TASKS usize = 64 const PAGE_SIZE u64 = 1 << 12 extern var current ?*mut TaskStruct extern var task [NR_TASKS]?*mut TaskStruct extern fn preempt_disable() void extern fn preempt_enable() void extern fn main_output(interface i32, str [*:0]u8) void extern fn copy_process(clone_flags u64, fn_ptr u64, arg u64) i32 extern fn exit_process() void extern fn do_wait() i32 extern fn dump_free_count() u64 // Body lives in src/execve.zig; sys_execve below is the dispatch-table // wrapper. Activates the real path-resolve → stream-PT_LOAD → // encode-argv flow. extern fn execve_impl(path_ptr u64, argv_ptr u64) i32 extern fn unmap_user_range(t *mut TaskStruct, start_uva u64, end_uva u64) void extern fn set_pgd(pgd u64) void extern fn check_and_prefault_user_range(uva u64, len u64) i32 extern fn copy_from_user(kbuf [*]mut u8, uva u64, len u64) i32 extern fn copy_to_user(uva u64, kbuf [*]u8, len u64) i32 // Board driver bag, reached through C-ABI trampolines in src/start.zig. // The board bag is a named module imported by the kernel root module, so a // leaf Flash module — whose generated .zig lives in the build cache — cannot // @import it. start.zig (which is in the root and imports board) exports // these thin wrappers, the same boundary fork.zig bridges with // move_to_user_elf_argv. The usb pair backs console_tx; board_power_reboot // backs sys_reboot. extern fn board_usb_enumerated() bool extern fn board_usb_cdc_tx(ptr [*]u8, len u64) void extern fn board_power_reboot() noreturn // Mailbox-backed hardware monitors, reached through the same start.zig // trampolines (board.mailbox is rpi4b-real, virt-stubbed to 0). Both // return 0 = unknown on a board without the firmware. extern fn board_mailbox_temperature() u32 extern fn board_mailbox_cpu_clock() u32 // Allocatable pool size (SYS_MEMTOTAL) and seconds-since-boot // (SYS_UPTIME). Cross-module kernel reads, exported by page_alloc / // generic_timer. extern fn mem_total_count() u64 extern fn uptime_seconds() u64 const builtin = #import("builtin") // Syscalls run at EL1h with TTBR0 holding the *user* pgd (set by // prepare_move_to_user_elf). Each function pointer is ORed with // LINEAR_MAP_BASE so the `blr` in el0_svc lands in the kernel's // high-mem mapping. Replaces the earlier broken `cur + &_start` // formula, which doubled the address into .bss. const LINEAR_MAP_BASE u64 = if (builtin.target.os.tag == .freestanding) 0xFFFF000000000000 else 0 // Console echo flags. Default off preserves the historical // split — the kernel never echoes, userland readline owns echo (so fsh is // unaffected). SYS_SET_CONSOLE_MODE flips them; when echo is on, // readConsoleBytes echoes drained printable bytes, and when mask is on it // echoes a '*' per printable byte instead (password masking). /bin/login // turns echo on for the username prompt and mask on for the password, then // leaves both off before exec'ing the shell. var console_echo bool = false var console_mask bool = false // SYS CALL PROCESS CONTROL export fn sys_fork() i32 { return copy_process(UTHREAD, 0, 0) } // Path-resolved ELF loader. Thin wrapper over execve_impl // in src/execve.zig — keeps the dispatch-table binding adjacent to // the other process-control syscalls. x0 = path_ptr (NUL-terminated // absolute UVA), x1 = argv_ptr (UVA of NULL-terminated argv array). // Returns 0 (does-not-return on success — eret jumps to e_entry), // -1 on resolve / parse / alloc / argv-fault failure. export fn sys_execve(path_ptr u64, argv_ptr u64) i32 { return execve_impl(path_ptr, argv_ptr) } export fn sys_wait() i32 { return do_wait() } export fn sys_exit() void { exit_process() } // SYS_REBOOT — reset the board. board.power.reboot() is the per-board // reset (PSCI SYSTEM_RESET on virt, the BCM2711 watchdog on rpi4b) and // never returns, so neither does this handler: el0_svc never reaches the // eret back to the caller. EL0 cannot do this itself (privileged SMC / // MMIO), which is why it is a syscall. No privilege gate yet. export fn sys_reboot() noreturn { board_power_reboot() } // Walk task[] under preempt_disable for a matching .pid. On hit: flip to // TASK_ZOMBIE and wake any TASK_INTERRUPTIBLE parent (mirrors exit_process // in sched.zig). The slot stays occupied; the parent's existing do_wait // reaps it (frees user/kernel pages + the kernel page itself). Returns 0 // on hit, -1 on miss. Self-kill is rejected — the running task is its own // kernel page; sys_exit is the safe self-cancel path. export fn sys_kill(pid i32) i32 { if current |c| { if c.pid == pid { return -1 } } preempt_disable() var i usize = 0 while i < NR_TASKS { if task[i] |t| { if t.pid == pid { sched.zombify_and_wake_parent(t) preempt_enable() return 0 } } i += 1 } preempt_enable() return -1 } export fn sys_dump_free() u64 { return dump_free_count() } // SYS_MEMTOTAL — allocatable pool size in pages, frozen at boot. A tool // derives "used" as this minus SYS_DUMP_FREE; "total bytes" as pages << 12. export fn sys_mem_total() u64 { return mem_total_count() } // SYS_UPTIME — seconds since boot, from the architectural counter. export fn sys_uptime() u64 { return uptime_seconds() } // SYS_CPU_TEMP — SoC temperature in milli-degrees Celsius (0 = unknown). // SYS_CPU_FREQ — ARM clock in Hz (0 = unknown). Both run a mailbox // transaction over the shared prop_buf; preempt_disable serialises that // single-core-shared static against a task switch landing mid-transaction. export fn sys_cpu_temp() u64 { preempt_disable() const milli u64 = board_mailbox_temperature() preempt_enable() return milli } export fn sys_cpu_freq() u64 { preempt_disable() const hz u64 = board_mailbox_cpu_clock() preempt_enable() return hz } // SYS CALL FILE SYSTEM // // File access dispatches through the VFS layer: sys_openFile // resolves the path via vfs.vfs_open and stashes the backing superblock // in File.sb; seek and the unified read/write/close re-cast that opaque // pointer (vfsSb) and call through the backend vtable. The per-backend // arithmetic (initramfs's pointer walk, FAT32's cluster chains) lives // in the backend modules — these handlers are thin dispatchers. // // User pointers (path / buf) reach the kernel through copy_from_user / // copy_to_user. A wild UVA returns -1 to the caller via the soft path // in mm_user.check_and_prefault_user_range; the task does NOT zombify. // Re-type File.sb (an `?*anyopaque`, opaque to break the vfs<->file // import cycle) back to `*vfs.SuperBlock` for vtable dispatch. inline fn vfsSb(f *mut file_mod.File) ?*mut vfs.SuperBlock { const raw = f.sb orelse return null return #ptrCast(#alignCast(raw)) } // sys_openFile + joinResolve form the deepest kernel-stack chain on the // syscall path. The two path scratch buffers live as preempt-guarded // module statics rather than ~1.3 KiB of stack locals: the kernel stack // grows down toward the TaskStruct credential tail in the same page, so a // stack-heavy open could descend into uid/gid/euid/egid and a timer IRQ // taken in that window would save its register frame straight over the // credentials. Keeping the buffers off the stack bounds the frame well // clear of the creds. preempt_disable serialises the shared statics across // the whole resolve + open; the defer covers every early-return error path. var open_path_buf [1024]u8 = undefined var open_join_buf [layout.CWD_SIZE]u8 = undefined export fn sys_openFile(path_ptr u64) i32 { const c = current orelse return -1 preempt_disable() defer preempt_enable() var i usize = 0 while i < 1023 { var b u8 = 0 if copy_from_user(#ptrCast(&b), path_ptr + i, 1) < 0 { return -1 } open_path_buf[i] = b if b == 0 { break } i += 1 } open_path_buf[i] = 0 const raw_path = std.mem.span(#as([*:0]u8, #ptrCast(&open_path_buf))) // Relative paths (no leading '/') are joined against current.cwd // and `.` / `..` collapsed into a kernel scratch buffer; absolute // paths pass straight through. The post-join slice is what vfs // (still absolute-only) sees. The pure helper is host-tested. Join // buffer is sized to one CWD_SIZE — over-long resolved paths // (cwd 256B + rel 256B before collapse) return -1. var resolved []u8 = undefined if raw_path.len > 0 && raw_path[0] == '/' { resolved = raw_path } else { const cwd_slice = std.mem.sliceTo(#as([*:0]u8, #ptrCast(&c.cwd)), 0) resolved = path_mod.joinResolve(cwd_slice, raw_path, &open_join_buf) orelse return -1 } var open_result vfs.OpenResult = .{} const sb = vfs.vfs_open(resolved, &open_result) if sb == null { return -1 } // Permission gate: open is read-intent (this ABI has no // open flags — write permission is re-checked per write). A denied // read returns -EACCES, distinguishable from the -1 miss above, and // costs no File page since the check runs before the alloc. if !perm.checkAccess( open_result.mode, open_result.uid, open_result.gid, c.euid, c.egid, .read ) { return -defs.EACCES } const f = file_mod.alloc() orelse return -1 f.refs = 1 f.private = open_result.private f.size = open_result.size f.offset = 0 f.sb = sb // Carry the backend's permission metadata on the handle so the // per-write check in sys_write needs no fresh VFS lookup. f.mode = open_result.mode f.uid = open_result.uid f.gid = open_result.gid // Directory-entry location: FAT32 write() rewrites the entry's // first-cluster / size through it. Only writable handles (this path) // need it; the read-only open sites below leave the alloc-zeroed // default, and non-FAT backends never set it. f.dirent_lba = open_result.dirent_lba f.dirent_off = open_result.dirent_off const fd = fdtable.install(c, .file, f) if fd < 0 { file_mod.unref(f) return -1 } return fd } // Shared copy-path-from-user + cwd-resolve — the off-stack form // sys_create / sys_unlink / sys_rename use (the same logic sys_openFile // inlines). Copies the NUL-terminated user path into `raw_buf`, then // resolves it against the caller's cwd into `join_buf`: an absolute path // passes straight through, a relative one is `.`/`..`-collapsed by the // host-tested joinResolve. Returns the resolved slice, or null on a copy // fault or an over-long resolved path. Caller holds preempt_disable (the // buffers are shared statics). Every buffer here is off-stack, keeping // these handlers' frames well clear of the TaskStruct credential tail. fn copyResolvePath(c *mut TaskStruct, path_ptr u64, raw_buf *mut [1024]u8, join_buf *mut [layout.CWD_SIZE]u8) ?[]u8 { var i usize = 0 while i < 1023 { var b u8 = 0 if copy_from_user(#ptrCast(&b), path_ptr + i, 1) < 0 { return null } raw_buf[i] = b if b == 0 { break } i += 1 } raw_buf[i] = 0 const raw_path = std.mem.span(#as([*:0]u8, #ptrCast(raw_buf))) if raw_path.len > 0 && raw_path[0] == '/' { return raw_path } const cwd_slice = std.mem.sliceTo(#as([*:0]u8, #ptrCast(&c.cwd)), 0) return path_mod.joinResolve(cwd_slice, raw_path, join_buf) } // Second path scratch for sys_rename — its two paths must be resolved and // live simultaneously, so the new-path copy/join cannot reuse the old-path // buffers (open_path_buf / open_join_buf). Off-stack for the same stack-tail reason. var rename_new_buf [1024]u8 = undefined var rename_new_join [layout.CWD_SIZE]u8 = undefined // sys_create — creat(): make a new empty file and return a writable fd. The // deepest-stack twin of sys_openFile (shared off-stack path scratch): // resolve the path, dispatch vfs_create, then install a File the same way. // The new file is caller-owned (uid/gid = the caller's effective ids); the // backend supplies the 0644 mode baseline. /mnt is the only writable mount, // so a create elsewhere (initramfs) fails closed via its EROFS vtable stub. export fn sys_create(path_ptr u64) i32 { const c = current orelse return -1 preempt_disable() defer preempt_enable() const resolved = copyResolvePath(c, path_ptr, &open_path_buf, &open_join_buf) orelse return -1 var open_result vfs.OpenResult = .{} const sb = vfs.vfs_create(resolved, &open_result) if sb == null { return -1 } const f = file_mod.alloc() orelse return -1 f.refs = 1 f.private = open_result.private f.size = open_result.size f.offset = 0 f.sb = sb // Caller-owned: stamp the creating user's effective ids over the // backend's root baseline so the per-write check in sys_write lets the // owner write the file it just made. Persistence is a known ceiling — // a reboot reverts to the overlay default (see fat32_backend create). f.mode = open_result.mode f.uid = c.euid f.gid = c.egid f.dirent_lba = open_result.dirent_lba f.dirent_off = open_result.dirent_off const fd = fdtable.install(c, .file, f) if fd < 0 { file_mod.unref(f) return -1 } return fd } // sys_unlink — remove the file at `path`. Resolve + dispatch; the backend // tombstones the entry and frees its chain. Returns 0 on success, -1 on a // missing file, a read-only mount, or a fault. export fn sys_unlink(path_ptr u64) i32 { const c = current orelse return -1 preempt_disable() defer preempt_enable() const resolved = copyResolvePath(c, path_ptr, &open_path_buf, &open_join_buf) orelse return -1 return #intCast(vfs.vfs_unlink(resolved)) } // sys_rename — rename `old` to `new` within the same directory. Both paths // are copied + resolved into separate off-stack buffers (both must be live // for the dispatch) and handed to vfs_rename, which rejects a cross-mount // pair before the backend sees it. Returns 0 on success, -1 on a missing // source, a cross-directory/-mount move, a bad name, or a fault. export fn sys_rename(old_ptr u64, new_ptr u64) i32 { const c = current orelse return -1 preempt_disable() defer preempt_enable() const old_resolved = copyResolvePath(c, old_ptr, &open_path_buf, &open_join_buf) orelse return -1 const new_resolved = copyResolvePath(c, new_ptr, &rename_new_buf, &rename_new_join) orelse return -1 return #intCast(vfs.vfs_rename(old_resolved, new_resolved)) } // Post-lookup body for file reads. The VFS vtable walks // chunks of <=512 bytes and copies them to the caller's UVA. Returns // total bytes copied, -1 on copy_to_user fault with no progress so // far. Reached through the unified sys_read dispatcher. fn readFileBacked(f *mut file_mod.File, sb *mut vfs.SuperBlock, buf_uva u64, len u64) i64 { var total_copied u64 = 0 while total_copied < len { var kbuf [512]u8 = undefined const take = #min(len - total_copied, #as(u64, #intCast(kbuf.len))) preempt_disable() const n = vfs.vfs_read(sb, f, &kbuf, take) preempt_enable() if n < 0 { return if (total_copied > 0) #intCast(total_copied) else -1 } if n == 0 { break } if copy_to_user(buf_uva + total_copied, &kbuf, #intCast(n)) < 0 { return -1 } total_copied += #intCast(n) if n < take { break } } return #intCast(total_copied) } // Post-lookup body for file writes. Pulls up to 512 bytes // per iteration through copy_from_user and pushes them via the // backend's vfs_write vtable. Initramfs returns -1 (EROFS); FAT32 // honours the write via writeBack. Reached through the unified // sys_write dispatcher. fn writeFileBacked(f *mut file_mod.File, sb *mut vfs.SuperBlock, buf_uva u64, len u64) i64 { var total_pushed u64 = 0 while total_pushed < len { var kbuf [512]u8 = undefined const take = #min(len - total_pushed, #as(u64, #intCast(kbuf.len))) if copy_from_user(&kbuf, buf_uva + total_pushed, take) < 0 { return -1 } preempt_disable() const n = vfs.vfs_write(sb, f, &kbuf, take) preempt_enable() if n < 0 { return if (total_pushed > 0) #intCast(total_pushed) else -1 } if n == 0 { break } total_pushed += #intCast(n) if n < take { break } } return #intCast(total_pushed) } export fn sys_seek(fd i32, off i64, whence i32) i64 { const c = current orelse return -1 const f = fdtable.getFile(c, fd) orelse return -1 const sb = vfsSb(f) orelse return -1 preempt_disable() const ret = vfs.vfs_seek(sb, f, off, whence) preempt_enable() return ret } // MEMORY MANAGEMENT // Set the heap break to `addr` (rounded up to the next page boundary). // Returns the new break, or the current break if `addr == 0`. Returns // -1 on out-of-range requests (below HEAP_BASE, or above // STACK_TOP - STACK_BUDGET — the latter is the stack-budget upper // bound shared with mm_user.zig's do_data_abort guard logic). // // No pages are eagerly allocated on grow — touching a page in the new // range faults through do_data_abort and demand-allocates. On shrink // the released pages MUST be freed here (the per-process do_wait reap // loop only runs at process exit, so a long-lived process that grows // then shrinks would leak otherwise); unmap_user_range walks // `mm.user_pages` for entries in [new_brk, old_brk) and clears the // PTE + frees the PA + zeros the slot. set_pgd at the tail flushes the // TLB so a re-grow re-faults cleanly. export fn sys_brk(addr u64) i64 { const c = current orelse return -1 if addr == 0 { return #bitCast(c.mm.brk) } const new_brk u64 = (addr + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1) if new_brk < user_layout.HEAP_BASE { return -1 } if new_brk > user_layout.STACK_TOP - user_layout.STACK_BUDGET { return -1 } const old_brk u64 = c.mm.brk if new_brk < old_brk { unmap_user_range(c, new_brk, old_brk) // Re-install the same pgd to drive the full-TLB-flush path in // set_pgd (sched.S). Targeted `tlbi vae1is` would be surgical; // the heap-shrink path is rare enough that a full flush is fine. set_pgd(c.mm.pgd) } c.mm.brk = new_brk return #bitCast(new_brk) } // Convenience wrapper: brk(current_break + delta), returns the previous // break. Negative `delta` shrinks. The sys_brk path itself enforces // bounds (HEAP_BASE / STACK_TOP - user_layout.STACK_BUDGET); sbrk only // guards against signed-overflow on the addition. export fn sys_sbrk(delta i64) i64 { const c = current orelse return -1 const cur_brk u64 = c.mm.brk const cur_signed i64 = #bitCast(cur_brk) const new_signed = #addWithOverflow(cur_signed, delta) if new_signed[1] != 0 { return -1 } if new_signed[0] < 0 { return -1 } const target u64 = #bitCast(new_signed[0]) const ret = sys_brk(target) if ret < 0 { return -1 } return #bitCast(cur_brk) } export fn sys_mmap() void {} export fn sys_munmap() void {} export fn sys_mlock() void {} export fn sys_munlock() void {} // Interprocess Communication // // Anonymous-pipe ABI. Slot map in lib/syscall_defs.zig. // `sys_pipe` returns both fds in a single i64: low 32 bits = read fd, // high 32 bits = write fd. Negative on out-of-fds / alloc-failure. // Compact ABI keeps the user-side wrapper to one register and avoids // a copy_to_user for the pair. // // `buf` reaches the kernel through copy_from_user / copy_to_user; a // wild UVA returns -1 to the caller without zombifying the task. export fn sys_pipe() i64 { const c = current orelse return -1 const p = pipe_mod.alloc() orelse return -1 p.refs = 2 // one ref per fd installed below const rfd = fdtable.install(c, .pipe, p) if rfd < 0 { // Two unrefs: refs was set to 2 above before either fd was // installed; the page leaks otherwise. pipe_mod.unref(p) pipe_mod.unref(p) return -1 } const wfd = fdtable.install(c, .pipe, p) if wfd < 0 { // close() clears the read-end slot and drops its ref; one more // unref drops the write-end ref that was never installed. _ = fdtable.close(c, rfd) pipe_mod.unref(p) return -1 } return (#as(i64, wfd) << 32) | (#as(i64, rfd) & 0xFFFF_FFFF) } // Post-lookup body for pipe reads. One 512-byte kbuf-bounded drain // per call (POSIX short-read for pipes); the blocking is inside // pipe_mod.read. Reached through the unified sys_read dispatcher. fn readPipeBacked(p *mut pipe_mod.Pipe, buf_uva u64, len u64) i64 { var kbuf [512]u8 = undefined const n = #min(len, #as(u64, #intCast(kbuf.len))) const copied = pipe_mod.read(p, &kbuf, n) if copied > 0 { if copy_to_user(buf_uva, &kbuf, #intCast(copied)) < 0 { return -1 } } return copied } // Post-lookup body for pipe writes. Mirrors readPipeBacked: // 512-byte kbuf, single push per call (no loop — caller iterates if // it has more data than fits the ring). Reached through the unified // sys_write dispatcher. fn writePipeBacked(p *mut pipe_mod.Pipe, buf_uva u64, len u64) i64 { var kbuf [512]u8 = undefined const n = #min(len, #as(u64, #intCast(kbuf.len))) if copy_from_user(&kbuf, buf_uva, n) < 0 { return -1 } return pipe_mod.write(p, &kbuf, n) } export fn sys_socket() void {} export fn sys_msgget() void {} export fn sys_semget() void {} export fn sys_shmget() void {} // Device Management // // Console ABI. The unified (fd,buf,len) ABI at slots 32..35 routes // console fds through the same tagged `fds` table as pipes and files; // the post-lookup readConsoleBytes / writeConsoleBytes helpers below // back the sys_read / sys_write dispatchers. fd 0/1/2 are pre-installed // as console slots at PID-1 bring-up (src/kernel.zig:kernel_process), // so user code reaches stdin/stdout/stderr without an explicit open. // Post-lookup body for console reads. Console reads are short by // design — see src/console.zig:console_read for the blocking and // POSIX-TTY semantics. Reached through the unified sys_read // dispatcher. fn readConsoleBytes(buf_uva u64, len u64) i64 { var kbuf [256]u8 = undefined const n = #min(len, #as(u64, #intCast(kbuf.len))) const copied = console.console_read(&kbuf, n) if copied > 0 { if copy_to_user(buf_uva, &kbuf, #intCast(copied)) < 0 { return -1 } // Cooked-style echo/mask when enabled: printable bytes only, // one NUL-terminated byte at a time through the console mux. Control // bytes (CR/LF, and the [TEST] console-echo 0xC0..0xC7 injects) are // never emitted, so with both flags off (the default) this filter // leaves every existing scenario's serial output byte-identical; with // mask on, each printable byte is echoed as '*' instead of itself. if console_echo || console_mask { var j i64 = 0 while j < copied { const ch = kbuf[#intCast(j)] if ch >= 0x20 && ch < 0x7F { // mask wins over echo: show '*' instead of the secret. const out u8 = if (console_mask) '*' else ch var one [2]u8 = [2]u8{ out, 0 } console_tx(#ptrCast(&one), 1) } j += 1 } } } return copied } // Console-output sink (USB-C gadget console). Only the *user* // console-write path is muxed here: once the DWC2 CDC-ACM gadget is // enumerated on the host, fsh / user output streams out the bulk-IN // endpoint (board.usb.cdc_tx); otherwise it falls back to the Mini-UART // (main_output(MU, …)). This is a "switch", not a tee — the device-side // trace already gives a parallel debug channel on the MU. // // Kernel [Debug] traces keep calling main_output(MU, …) directly and are // deliberately NOT routed here, so boot diagnostics stay on the UART // regardless of USB state (they share main_output with the user path, so // the mux must live here, not inside main_output's MU case). // // `s` must be NUL-terminated at s[len] — the MU fallback is a C-string // walker; `len` carries the true byte count for the length-framed USB // bulk path. On virt, enumerated() is always false → MU fallback, so CI // over QEMU is unaffected. fn console_tx(s [*:0]u8, len u64) void { if board_usb_enumerated() { board_usb_cdc_tx(s, len) } else { main_output(MU, s) } } // Post-lookup body for console writes. Pulls bytes from // the user buffer in 255-byte chunks, NUL-terminates each chunk in // the kernel scratch buffer, and hands it to console_tx via the // existing C-string contract. Returns total bytes pushed. Reached // through the unified sys_write dispatcher. // // Limitation: embedded NULs in the payload truncate the affected // chunk because main_output dispatches to mini_uart_send_string / // pl011_uart_send_string, both NUL-terminated walkers. The // fd-redirect coverage is text-only; binary console output is future // work alongside a length-aware UART send path. fn writeConsoleBytes(buf_uva u64, len u64) i64 { var kbuf [256]u8 = undefined var done u64 = 0 while done < len { const take = #min(len - done, #as(u64, #intCast(kbuf.len - 1))) if copy_from_user(&kbuf, buf_uva + done, take) < 0 { return if (done > 0) #intCast(done) else -1 } kbuf[take] = 0 console_tx(#ptrCast(&kbuf), take) done += take } return #intCast(done) } // SYS_SET_CONSOLE_MODE (slot 25) — sets the // kernel console echo/mask flags. CONSOLE_MODE_ECHO on => readConsoleBytes // echoes drained printable bytes (cooked-style); CONSOLE_MODE_MASK on => // it echoes a '*' per printable byte instead (password masking); neither // (the default) keeps the historical split where the kernel never echoes and // userland readline owns echo. /bin/login uses ECHO to show the typed // username and MASK to acknowledge the password without revealing it. Full // termios / line discipline is still future work. SYS_CLOSE_CONSOLE stays // inert (the unified ABI absorbs the close side via SYS_CLOSE on a console fd). export fn sys_setConsoleMode(mode u64) i64 { console_echo = (mode & defs.CONSOLE_MODE_ECHO) != 0 console_mask = (mode & defs.CONSOLE_MODE_MASK) != 0 return 0 } export fn sys_closeConsole() void {} // Debug-only — not part of the stable ABI. // Pushes one byte into the kernel RX ring as if it had arrived on // the UART. Powers deterministic [TEST] console-echo coverage on // QEMU where there is no external input driver. Document as debug-only // in DOCUMENTATION.md §5 and remove once a real host-input driver lands. export fn sys_console_inject(byte u64) void { console.console_test_push(#truncate(byte)) } // Retired ABI slots. The numbers stay reserved forever — a stale binary // invoking one gets a clean -1, never a silently different syscall. export fn sys_retired() i64 { return -1 } // ---- Unified fd-table ABI ---- // // SYS_READ / SYS_WRITE / SYS_CLOSE / SYS_DUP2 dispatch by the fd's // kind tag in current.fds and route through the post-lookup backend // helpers (readConsoleBytes / writeConsoleBytes / readPipeBacked / // writePipeBacked / readFileBacked / writeFileBacked) — one code path // per backend. This is the sole entry point for all console / pipe / // file I/O; the legacy per-kind shims that once shared these helpers // were retired (see the retired-slots note at sys_retired). export fn sys_read(fd i32, buf_uva u64, len u64) i64 { const c = current orelse return -1 const s = fdtable.get(c, fd) orelse return -1 return switch #as(fdtable.Kind, #enumFromInt(s.kind)) { .console => readConsoleBytes(buf_uva, len), .pipe => readPipeBacked(#ptrCast(#alignCast(s.ptr.?)), buf_uva, len), .file => blk: { const f *mut file_mod.File = #ptrCast(#alignCast(s.ptr.?)) const sb = vfsSb(f) orelse break :blk #as(i64, -1) break :blk readFileBacked(f, sb, buf_uva, len) }, .none => -1, } } export fn sys_write(fd i32, buf_uva u64, len u64) i64 { const c = current orelse return -1 const s = fdtable.get(c, fd) orelse return -1 return switch #as(fdtable.Kind, #enumFromInt(s.kind)) { .console => writeConsoleBytes(buf_uva, len), .pipe => writePipeBacked(#ptrCast(#alignCast(s.ptr.?)), buf_uva, len), .file => blk: { const f *mut file_mod.File = #ptrCast(#alignCast(s.ptr.?)) const sb = vfsSb(f) orelse break :blk #as(i64, -1) // Permission gate: write-intent check against the // metadata carried on the File since open. Open is read- // intent only in this ABI, so a readable-but-not-writable // file (0644 root, non-root caller) opens fine and fails // here with -EACCES instead of a backend -1. if !perm.checkAccess(f.mode, f.uid, f.gid, c.euid, c.egid, .write) { break :blk #as(i64, -defs.EACCES) } break :blk writeFileBacked(f, sb, buf_uva, len) }, .none => -1, } } // Unified close. File fds need an extra step before the slot is // cleared: vfs_close runs the backend's flush (FAT32 cluster / // dir-entry / FSInfo writeback; initramfs no-op). Pipe and console // slots route straight through fdtable.close — refcount handles the // pipe-page free, console is refcount-exempt. export fn sys_close(fd i32) i32 { const c = current orelse return -1 if fdtable.getFile(c, fd) |f| { if vfsSb(f) |sb| { preempt_disable() vfs.vfs_close(sb, f) preempt_enable() } } return fdtable.close(c, fd) } export fn sys_dup2(oldfd i32, newfd i32) i32 { const c = current orelse return -1 return fdtable.dup2(c, oldfd, newfd) } // Working-directory ABI. Stores a NUL-terminated, // `.` / `..`-collapsed absolute path into current.cwd. Relative // arguments are joined against the existing cwd and then collapsed; // absolute arguments are collapsed in place. No backend existence // check — sys_readdir lands the directory probe; until // then `chdir` is a pure store the open/execve boundary trusts. // Returns 0 on success, -1 on a wild user pointer / un-NUL-terminated // input / oversize composition / oversize resolved path. export fn sys_chdir(path_ptr u64) i32 { const c = current orelse return -1 var kpath [layout.CWD_SIZE]u8 = undefined var i usize = 0 var nul_found bool = false while i < kpath.len - 1 { var b u8 = 0 if copy_from_user(#ptrCast(&b), path_ptr + i, 1) < 0 { return -1 } kpath[i] = b if b == 0 { nul_found = true break } i += 1 } if !nul_found { return -1 } const rel = std.mem.span(#as([*:0]u8, #ptrCast(&kpath))) const cwd_slice = std.mem.sliceTo(#as([*:0]u8, #ptrCast(&c.cwd)), 0) // Resolve into a fresh scratch buffer first, then swap into the // task slot only after a successful normalisation — keeps `cwd` // intact on overflow / overlong-collapse failure. var resolved_buf [layout.CWD_SIZE]u8 = undefined // Leave one byte for the trailing NUL in cwd[]. const resolved = path_mod.joinResolve(cwd_slice, rel, resolved_buf[0 .. layout.CWD_SIZE - 1]) orelse return -1 #memcpy(c.cwd[0..resolved.len], resolved) c.cwd[resolved.len] = 0 return 0 } // Working-directory readback. Copies the calling task's // NUL-terminated `cwd` into the user buffer (path plus terminator) and // returns the path length excluding the NUL. The readback half of // sys_chdir: `cwd` is a plain TaskStruct field, so this allocates // nothing and the harness free-page baseline is untouched. Returns -1 on // a wild buffer UVA or a `len` too small to hold the path plus its NUL — // a short buffer gets nothing, never a truncated path. export fn sys_getcwd(buf_uva u64, len u64) i64 { const c = current orelse return -1 const cwd = std.mem.sliceTo(#as([*:0]u8, #ptrCast(&c.cwd)), 0) if len < cwd.len + 1 { return -1 } if copy_to_user(buf_uva, cwd.ptr, cwd.len) < 0 { return -1 } const nul = [_]u8{0} if copy_to_user(buf_uva + cwd.len, &nul, 1) < 0 { return -1 } return #intCast(cwd.len) } // Directory enumeration. Stateless index walk: fill // the `index`-th entry of the directory at `path` into the caller's // Dirent and return 0; return -1 at end-of-directory, a bad/unmounted // path, or a wild user pointer. There is no fd cursor — see // lib/syscall_defs.zig SYS_READDIR for the stateless-ABI rationale. The // path reaches the kernel through the soft copy_from_user (a wild UVA // returns -1 with no zombification); relative paths join against // current.cwd exactly as sys_openFile does, since vfs.resolve is still // absolute-only. Allocates nothing — a future OOM audit inherits no // new site from readdir (the core reason the ABI is stateless). export fn sys_readdir(path_ptr u64, index u64, dirent_uva u64) i32 { const c = current orelse return -1 var kpath [layout.CWD_SIZE]u8 = undefined var i usize = 0 var nul_found bool = false while i < kpath.len - 1 { var b u8 = 0 if copy_from_user(#ptrCast(&b), path_ptr + i, 1) < 0 { return -1 } kpath[i] = b if b == 0 { nul_found = true break } i += 1 } if !nul_found { return -1 } const raw_path = std.mem.span(#as([*:0]u8, #ptrCast(&kpath))) var join_buf [layout.CWD_SIZE]u8 = undefined var resolved []u8 = undefined if raw_path.len > 0 && raw_path[0] == '/' { resolved = raw_path } else { const cwd_slice = std.mem.sliceTo(#as([*:0]u8, #ptrCast(&c.cwd)), 0) resolved = path_mod.joinResolve(cwd_slice, raw_path, &join_buf) orelse return -1 } var dirent defs.Dirent = .{} preempt_disable() const r = vfs.vfs_readdir(resolved, index, &dirent) preempt_enable() if r < 0 { return -1 } if copy_to_user(dirent_uva, std.mem.asBytes(&dirent), #sizeOf(defs.Dirent)) < 0 { return -1 } return 0 } // Kernel-log read. Snapshots the most-recent min(len, retained) // bytes of the kernel byte-ring (src/klog_ring.zig) into the caller's // buffer, oldest-first, and returns the count (0 on an empty ring). The // window head/tail are read once up front so a concurrent main_output // push cannot move `start` out from under the copy; the bytes are bounced // through a 512-byte kernel buffer — the ring data wraps the modulo // boundary, so it is not contiguous for a single copy_to_user — exactly // like readFileBacked. Allocates nothing (the ring is static BSS), so the // harness free-page baseline is untouched. A wild buffer UVA returns -1 // via the soft copy_to_user path; the task does not zombify. export fn sys_klog_read(buf_uva u64, len u64) i64 { _ = current orelse return -1 // Snapshot the window bounds together: head/tail are monotone, so even // if a push lands mid-copy the indices stay masked and in-bounds, and // reading them as a pair keeps `start` consistent with `total`. const head = klog_ring.klog.head const tail = klog_ring.klog.tail const total = #min(len, head -% tail) const start = head -% total // most recent `total` bytes var copied u64 = 0 while copied < total { var kbuf [512]u8 = undefined const take = #min(total - copied, #as(u64, #intCast(kbuf.len))) var i u64 = 0 while i < take { kbuf[#intCast(i)] = klog_ring.klog.byteAt(start +% copied +% i) i += 1 } if copy_to_user(buf_uva + copied, &kbuf, take) < 0 { return if (copied > 0) #intCast(copied) else -1 } copied += take } return #intCast(copied) } // ---- Process credentials ---- // // The identity layer for the login/auth flow. Getters report the calling // task's real / effective uid / gid (carried on TaskStruct, inherited by // fork, preserved by execve). setuid / setgid apply a root-gated policy: // an euid-0 (root) caller sets BOTH the real and effective id to any // value; a dropped (non-root) caller may only reset to an id it already // holds — so /bin/login (root) can drop to a user, but that user can // never climb back. Failure returns -1 (EPERM-lite); the i64 return makes // the sentinel representable. `current` is always set in EL0 syscall // context — the orelse -1 is for the impossible null only. export fn sys_getuid() i64 { const c = current orelse return -1 return #intCast(c.uid) } export fn sys_geteuid() i64 { const c = current orelse return -1 return #intCast(c.euid) } export fn sys_getgid() i64 { const c = current orelse return -1 return #intCast(c.gid) } export fn sys_getegid() i64 { const c = current orelse return -1 return #intCast(c.egid) } export fn sys_setuid(uid u32) i64 { const c = current orelse return -1 if c.euid == 0 { c.uid = uid c.euid = uid return 0 } if (uid == c.uid) || (uid == c.euid) { c.euid = uid return 0 } return -1 } export fn sys_setgid(gid u32) i64 { const c = current orelse return -1 if c.euid == 0 { c.gid = gid c.egid = gid return 0 } if (gid == c.gid) || (gid == c.egid) { c.egid = gid return 0 } return -1 } // ---- Authentication ---- // The initramfs seed copy — read-only, baked into the kernel image, always // present. The recovery anchor of the anti-brick design. const SHADOW_PATH []u8 = "/etc/shadow" // The writable FAT32 copy — what /bin/passwd rewrites. Consulted first so // password changes take effect; absent on QEMU virt (no SD card) and on a // freshly formatted card, in which case auth falls back to the seed. const MNT_SHADOW_PATH []u8 = "/mnt/shadow" // Auth working buffers — static, NOT stack. The per-task kernel stack // shares its 4 KiB page with TaskStruct (~2.4 KiB usable above KeRegs), // and the PBKDF2 / HMAC / SHA-256 call frames below already need a large // share of that. Carrying another ~1.4 KiB of credential / file / digest // buffers in sys_authenticate's own frame overflows the page and smashes // the TaskStruct tail (fds table → wild vtable dispatch on the next // sys_write). Statics sidestep that, exactly like execve.zig's exec_buf / // argv_scratch. Same serialization argument too: single core, and the only // callers are PID-1's [TEST] scenarios, /bin/login, and /bin/passwd — never // concurrent. The password copy is overwritten by the next call; nothing // here persists secrets beyond the syscall that wrote them. const AuthScratch = struct { user [64]u8, pass [128]u8, fbuf [1024]u8, salt [64]u8, stored [64]u8, derived [32]u8, } var auth_scratch AuthScratch = undefined const ReadFileError = error{ OpenFailed, ReadFailed } // In-kernel whole-file read through the privileged VFS door (the // execve.zig stack-File recipe: no file_mod.alloc → no page → the harness // free-page baseline is untouched; preempt-guarded per VFS call). Returns // the filled prefix of `buf`. OpenFailed = path does not resolve (not // mounted / absent); ReadFailed = it resolved but a backend read errored // (the corruption signal the fallback chain reports loudly). fn readWholeFile(path []u8, buf []mut u8) ReadFileError![]u8 { var open_result vfs.OpenResult = .{} preempt_disable() const sb_opt = vfs.vfs_open(path, &open_result) preempt_enable() const sb = sb_opt orelse return error.OpenFailed var f file_mod.File = .{} f.private = open_result.private f.size = open_result.size f.offset = 0 var off usize = 0 var failed bool = false while off < buf.len { const take u64 = buf.len - off preempt_disable() const got = vfs.vfs_read(sb, &f, buf[off..].ptr, take) preempt_enable() if got < 0 { failed = true break } if got == 0 { break } off += #intCast(got) } preempt_disable() vfs.vfs_close(sb, &f) preempt_enable() if failed { return error.ReadFailed } return buf[0..off] } // Outcome of checking one credential pair against one shadow database. // The distinction between no_user and corrupt drives the fallback chain: // a parseable file that simply lacks the user is an authoritative denial, // while a file with nothing parseable in it (truncation, garbage, a // half-finished rewrite) falls back to the initramfs seed. const VerifyResult = enum { match, mismatch, no_user, corrupt } // Walk `content` line by line and verify `password` against the first // line whose user field equals `username`. Uses auth_scratch.salt / // .stored / .derived as decode + KDF scratch (single-caller discipline, // see auth_scratch above). fn verifyAgainst(content []u8, username []u8, password []u8) VerifyResult { var any_parseable bool = false var line_start usize = 0 var k usize = 0 while k <= content.len { if (k == content.len) || (content[k] == '\n') { const line = content[line_start..k] line_start = k + 1 if line.len != 0 { if shadow.parseLine(line) |entry| { any_parseable = true // Demo-grade ceiling: PBKDF2 runs only after a username match, so // a miss returns sooner than a hit — a username-enumeration timing // oracle. Left unmitigated on purpose: the shipped accounts are // build-time public (named in the README), so the oracle reveals // nothing secret. If accounts ever become private, run a dummy KDF // on the miss path so a miss costs the same as a hit. if std.mem.eql(u8, entry.user, username) { // A matching line with undecodable hex is corruption, not denial. const salt_n = shadow.hexDecode(entry.salt_hex, &auth_scratch.salt) orelse return .corrupt const hash_n = shadow.hexDecode(entry.hash_hex, &auth_scratch.stored) orelse return .corrupt if (hash_n == 0) || (hash_n > 32) { return .corrupt } sha256.pbkdf2HmacSha256( password, auth_scratch.salt[0..salt_n], entry.iterations, auth_scratch.derived[0..hash_n] ) if sha256.ctEql(auth_scratch.derived[0..hash_n], auth_scratch.stored[0..hash_n]) { return .match } return .mismatch } } } } k += 1 } return if (any_parseable) .no_user else .corrupt } // sys_authenticate — the kernel-owned credential verifier. /bin/login // passes a username + plaintext password; the kernel reads the active // shadow database, finds the matching line, runs PBKDF2-HMAC-SHA256 over // the password with the stored salt + iteration count, and constant-time- // compares the result to the stored verifier. Returns 0 on a match, -1 on // anything else (no such user, malformed line, wild pointer, hash // mismatch). Userland never sees a salt or hash — only pass/fail; the KDF // lives here (the design intent committed in src/sha256.zig's header). // // Shadow source order: the writable FAT32 copy (/mnt/shadow) is // authoritative when it is present and parseable — that is where // sys_passwd writes. The initramfs seed (/etc/shadow) is the fallback for // QEMU virt (no SD), a fresh card, or a corrupt FAT32 copy — the latter // two announce themselves loudly (anti-brick: corruption never locks the // operator out, it falls back to the baked-in seed credentials). // // The plaintext password crosses the user→kernel boundary exactly once, // into a static scratch buffer that the next call overwrites. export fn sys_authenticate(user_uva u64, user_len u64, pass_uva u64, pass_len u64) i64 { _ = current orelse return -1 // Scrub the plaintext password and the derived verifier on every exit // path. These live in static BSS (single-caller scratch), so // without this the last login's secret lingers until the next call happens // to overwrite it — a post-boot memory dump could lift it. Plain @memset // (not a volatile loop) suffices: auth_scratch's address escapes to the // extern copy_from_user below, so the stores are not dead-store-eliminable. // Mirrors execve.zig's argv_scratch scrub. Runs after the result is // computed, so pass/fail timing is unchanged. defer { #memset(&auth_scratch.pass, 0) #memset(&auth_scratch.derived, 0) } // Copy the credentials under hard caps. Soft-fail on overflow or a wild // UVA (same contract as sys_openFile — no zombify). if (user_len == 0) || (user_len > auth_scratch.user.len) { return -1 } if pass_len > auth_scratch.pass.len { return -1 } if copy_from_user(&auth_scratch.user, user_uva, user_len) < 0 { return -1 } if (pass_len > 0) && (copy_from_user(&auth_scratch.pass, pass_uva, pass_len) < 0) { return -1 } const username = auth_scratch.user[0..user_len] const password = auth_scratch.pass[0..pass_len] // 1. The writable FAT32 shadow, when it exists and is intact. if readWholeFile(MNT_SHADOW_PATH, &auth_scratch.fbuf) |content| { switch verifyAgainst(content, username, password) { .match => return 0, .mismatch, .no_user => return -1, // Nothing parseable → announce + fall through to the seed. .corrupt => main_output(MU, "[Debug] /mnt/shadow corrupt - falling back to initramfs seed\n"), } } else |err| { // OpenFailed is the normal miss (virt / fresh card) → silent. // ReadFailed means the file is there but unreadable → announce. // (`unreadable` is bound first so the else-block holds two // statements: a lone `if` in an error-capture else lowers to a // capture-less `else if` in stage1, dropping the `|err|` binding.) const unreadable = err == error.ReadFailed if unreadable { main_output(MU, "[Debug] /mnt/shadow unreadable - falling back to initramfs seed\n") } } // 2. The initramfs seed (always present, read-only). const content = readWholeFile(SHADOW_PATH, &auth_scratch.fbuf) catch return -1 return switch verifyAgainst(content, username, password) { .match => 0, else => -1, } } // ---- Password change ---- // The /etc/passwd account database (initramfs, read-only). sys_passwd // reads it to map the caller's uid back to a login name for the // "non-root may only change its own record" rule. The account LIST is // build-time-immutable; only passwords are mutable state. const PASSWD_PATH []u8 = "/etc/passwd" // sys_passwd working buffers — static for the same stack-budget and // single-caller reasons as auth_scratch above (the PBKDF2 frames plus // these would smash the 2.4 KiB kernel stack). The shadow file content // and the KDF decode/derive scratch live in auth_scratch (fbuf / salt / // stored / derived) — sys_passwd and sys_authenticate never run // concurrently, so sharing those buffers is free. const PasswdScratch = struct { user [64]u8, old_pass [128]u8, new_pass [128]u8, pwbuf [512]u8, salt_raw [16]u8, salt_hex [32]u8, hash_hex [64]u8, } var passwd_scratch PasswdScratch = undefined // In-kernel whole-file overwrite through the privileged VFS door. The // caller guarantees content.len equals the file's current size (the // same-length rewrite contract), so the write never grows the file and // the FAT32 dir-entry resize branch is never taken. fn writeWholeFile(path []u8, content []u8) bool { var open_result vfs.OpenResult = .{} preempt_disable() const sb_opt = vfs.vfs_open(path, &open_result) preempt_enable() const sb = sb_opt orelse return false var f file_mod.File = .{} f.private = open_result.private f.size = open_result.size f.offset = 0 var off usize = 0 var ok bool = true while off < content.len { preempt_disable() const n = vfs.vfs_write(sb, &f, content[off..].ptr, content.len - off) preempt_enable() if n <= 0 { ok = false break } off += #intCast(n) } preempt_disable() vfs.vfs_close(sb, &f) preempt_enable() return ok } // sys_passwd — kernel-owned password change (slot 46). Rewrites `user`'s // record in the writable FAT32 shadow with a fresh kernel-minted salt and // a PBKDF2 re-hash of the new password, in place and at the same byte // length (the splice-safety contract — see shadow.rewriteLineInPlace). // // Authorization: // * root (euid 0) — any record, old password not required (this is the // recovery path: root resets a forgotten user password). // * everyone else — only the record whose login name maps to the // caller's own uid via /etc/passwd, and only with the correct old // password. Violations return -EACCES. // // Returns 0 on success; -EACCES on an authorization failure; -1 when // there is no writable shadow (QEMU virt / fresh card — /mnt/shadow is // the only rewrite target, the initramfs seed is immutable), the target // user has no shadow record, the input is malformed, or the rewrite // would change the record length. // // The salt source is the kernel entropy fallback (timer mix) — weak but // fresh per change; the RNG200 hardware source is a named carve-out. export fn sys_passwd(user_uva u64, user_len u64, old_uva u64, old_len u64, new_uva u64, new_len u64) i64 { const c = current orelse return -1 // Scrub both plaintext passwords + the derived verifier on every exit // path (same rationale as sys_authenticate). The salt/hash hex are public // verifier material, not secret, so they need no scrub. defer { #memset(&passwd_scratch.old_pass, 0) #memset(&passwd_scratch.new_pass, 0) #memset(&auth_scratch.derived, 0) } // Copy all three strings under hard caps (sys_authenticate contract: // soft-fail on overflow or a wild UVA, no zombify). if (user_len == 0) || (user_len > passwd_scratch.user.len) { return -1 } if old_len > passwd_scratch.old_pass.len { return -1 } if (new_len == 0) || (new_len > passwd_scratch.new_pass.len) { return -1 } if copy_from_user(&passwd_scratch.user, user_uva, user_len) < 0 { return -1 } if (old_len > 0) && (copy_from_user(&passwd_scratch.old_pass, old_uva, old_len) < 0) { return -1 } if copy_from_user(&passwd_scratch.new_pass, new_uva, new_len) < 0 { return -1 } const username = passwd_scratch.user[0..user_len] const old_password = passwd_scratch.old_pass[0..old_len] const new_password = passwd_scratch.new_pass[0..new_len] // Authorization for non-root callers: own record only. if c.euid != 0 { const pw_content = readWholeFile(PASSWD_PATH, &passwd_scratch.pwbuf) catch return -1 const own = pwfile.lookupByUid(pw_content, c.uid) orelse return -defs.EACCES if !std.mem.eql(u8, own.user, username) { return -defs.EACCES } } // The rewrite target must exist and be readable: /mnt/shadow only. // Its absence is the graceful no-writable-shadow case (QEMU virt). const content = readWholeFile(MNT_SHADOW_PATH, &auth_scratch.fbuf) catch return -1 // The target record must exist and parse (we need its iteration count // — the rewrite keeps it, which is half of the same-length contract). const span = shadow.findUserLine(content, username) orelse return -1 const old_entry = shadow.parseLine(content[span.start..span.end]) orelse return -1 // Non-root callers must prove knowledge of the old password against // the very record being replaced. if c.euid != 0 { switch verifyAgainst(content, username, old_password) { .match => {}, .mismatch, .no_user => return -defs.EACCES, .corrupt => return -1, } } // Mint the new verifier: fresh salt, PBKDF2 over the new password with // the record's existing iteration count, both hex-encoded at the fixed // widths the same-length contract relies on. _ = hwrng.fill(&passwd_scratch.salt_raw) _ = shadow.hexEncode(&passwd_scratch.salt_raw, &passwd_scratch.salt_hex) orelse return -1 sha256.pbkdf2HmacSha256( new_password, &passwd_scratch.salt_raw, old_entry.iterations, auth_scratch.derived[0..32] ) _ = shadow.hexEncode(auth_scratch.derived[0..32], &passwd_scratch.hash_hex) orelse return -1 // Same-length in-place rewrite, then push the whole file back. // auth_scratch.fbuf still holds the file content; rewrite it there. const mut_content = auth_scratch.fbuf[0..content.len] if !shadow.rewriteLineInPlace( mut_content, username, &passwd_scratch.salt_hex, &passwd_scratch.hash_hex ) { return -1 } if !writeWholeFile(MNT_SHADOW_PATH, mut_content) { return -1 } return 0 } /// Syscall dispatch table — referenced from entry.S (`adr x27, sys_call_table`). /// Slot ↔ constant binding is compiler-enforced via the indexed /// `t[defs.SYS_*]` writes below — a renumbering in lib/syscall_defs.zig /// propagates here automatically and any duplicate id would overwrite /// (and any gap would leave a null that still traps cleanly through the /// unreachable kernel code path). The upper dispatch bound is /// NR_SYSCALLS in arch/aarch64/asm_defs_common.inc (`b.hs` in entry.S); keep it /// in lockstep with the highest user-facing id +1. /// /// The unified ABI (slots 32..35) carries all console / pipe / /// file I/O. The legacy per-kind shims at slots 0 / 5 / 8 / 9 / 11 / /// 23 / 24 / 27..29 were retired: those slots route to sys_retired /// (a clean -1) and their numbers are never reused. export var sys_call_table = blk: { var t [defs.NR_SYSCALLS]?*anyopaque = [_]?*anyopaque{null} ** defs.NR_SYSCALLS t[defs.SYS_FORK] = #ptrCast(&sys_fork) t[defs.SYS_EXIT] = #ptrCast(&sys_exit) t[defs.SYS_WAIT] = #ptrCast(&sys_wait) t[defs.SYS_DUMP_FREE] = #ptrCast(&sys_dump_free) t[defs.SYS_KILL] = #ptrCast(&sys_kill) t[defs.SYS_EXECVE] = #ptrCast(&sys_execve) t[defs.SYS_OPEN_FILE] = #ptrCast(&sys_openFile) t[defs.SYS_SEEK] = #ptrCast(&sys_seek) t[defs.SYS_BRK] = #ptrCast(&sys_brk) t[defs.SYS_SBRK] = #ptrCast(&sys_sbrk) t[defs.SYS_MMAP] = #ptrCast(&sys_mmap) t[defs.SYS_MUNMAP] = #ptrCast(&sys_munmap) t[defs.SYS_MLOCK] = #ptrCast(&sys_mlock) t[defs.SYS_MUNLOCK] = #ptrCast(&sys_munlock) t[defs.SYS_PIPE] = #ptrCast(&sys_pipe) t[defs.SYS_SOCKET] = #ptrCast(&sys_socket) t[defs.SYS_MSGGET] = #ptrCast(&sys_msgget) t[defs.SYS_SEMGET] = #ptrCast(&sys_semget) t[defs.SYS_SHMGET] = #ptrCast(&sys_shmget) t[defs.SYS_SET_CONSOLE_MODE] = #ptrCast(&sys_setConsoleMode) t[defs.SYS_CLOSE_CONSOLE] = #ptrCast(&sys_closeConsole) t[defs.SYS_CONSOLE_INJECT] = #ptrCast(&sys_console_inject) t[defs.SYS_READ] = #ptrCast(&sys_read) t[defs.SYS_WRITE] = #ptrCast(&sys_write) t[defs.SYS_CLOSE] = #ptrCast(&sys_close) t[defs.SYS_DUP2] = #ptrCast(&sys_dup2) t[defs.SYS_CHDIR] = #ptrCast(&sys_chdir) t[defs.SYS_GETCWD] = #ptrCast(&sys_getcwd) t[defs.SYS_READDIR] = #ptrCast(&sys_readdir) t[defs.SYS_KLOG_READ] = #ptrCast(&sys_klog_read) t[defs.SYS_GETUID] = #ptrCast(&sys_getuid) t[defs.SYS_GETEUID] = #ptrCast(&sys_geteuid) t[defs.SYS_GETGID] = #ptrCast(&sys_getgid) t[defs.SYS_GETEGID] = #ptrCast(&sys_getegid) t[defs.SYS_SETUID] = #ptrCast(&sys_setuid) t[defs.SYS_SETGID] = #ptrCast(&sys_setgid) t[defs.SYS_AUTHENTICATE] = #ptrCast(&sys_authenticate) t[defs.SYS_PASSWD] = #ptrCast(&sys_passwd) t[defs.SYS_REBOOT] = #ptrCast(&sys_reboot) t[defs.SYS_MEMTOTAL] = #ptrCast(&sys_mem_total) t[defs.SYS_UPTIME] = #ptrCast(&sys_uptime) t[defs.SYS_CPU_TEMP] = #ptrCast(&sys_cpu_temp) t[defs.SYS_CPU_FREQ] = #ptrCast(&sys_cpu_freq) t[defs.SYS_CREATE] = #ptrCast(&sys_create) t[defs.SYS_UNLINK] = #ptrCast(&sys_unlink) t[defs.SYS_RENAME] = #ptrCast(&sys_rename) // Retired: legacy per-kind console / file / pipe / exec shims // (write_str, exec, readFile, writeFile, closeFile, openConsole, // readConsole, pipe_read, pipe_write, pipe_close). Slot numbers are // never reused; any caller gets -1. for retired in ([_]usize{ 0, 5, 8, 9, 11, 23, 24, 27, 28, 29 }) { t[retired] = #ptrCast(&sys_retired) } break :blk t } // Build-time guard: arch/aarch64/asm_defs_common.inc must declare // `#define NR_SYSCALLS 56` to match. If you bump the highest SYS_* // constant in lib/syscall_defs.flash, also bump the asm-side literal, // then update this comptime check. comptime { if defs.NR_SYSCALLS != 56 { #compileError("NR_SYSCALLS drifted from arch/aarch64/asm_defs_common.inc — keep both in lockstep") } } /// Map each syscall function pointer to its high-mem (TTBR1) alias so /// el0_svc can `blr` through the table after the user pgd has been /// installed in TTBR0. export fn sys_call_table_relocate() void { var i usize = 0 while i < defs.NR_SYSCALLS { const cur u64 = #intFromPtr(sys_call_table[i]) sys_call_table[i] = #ptrFromInt(cur | LINEAR_MAP_BASE) i += 1 } }