// kernel: boot and main loop. const initramfs = #import("initramfs") const initramfs_backend = #import("initramfs_backend") const fat32_backend = #import("fat32_backend") const fdtable = #import("fdtable") const task_layout = #import("task_layout") const MU i32 = 0 const PL i32 = 1 // Boot status lines render through the shared console_ui module (lib/ // console_ui/) — the one place a bracket tag or an ANSI color is spelled. // `boot` binds the Mini-UART console as the sink, so each bring-up step logs // as `boot.ok(...)` / `boot.skip(...)` / `boot.warn(...)`. Restyle the whole // boot log by editing console_ui, not here. Cosmetic — none of these lines are // grepped by the boot contract. (The userspace contract markers in fsh.zig / // login_elf.zig still hand-roll the `[ OK ]` form; migrating them onto // console_ui is a follow-up.) const console_ui = #import("console_ui") // console_ui Sink bound to the Mini-UART boot console. Byte-at-a-time via // main_output_char so the slice-based renderer meets the kernel's // NUL-terminated main_output without a buffer — and without growing the tight // per-task kernel stack. fn bootSink(bytes []u8) void { for b in bytes { main_output_char(MU, b) } } const boot = console_ui.logger(&bootSink) const KTHREAD u64 = 1 // IRQ numbers const VC_AUX_IRQ u32 = 125 const NS_PHYS_TIMER_IRQ u32 = 30 // UART / utils extern fn mini_uart_init() void extern fn main_output(interface i32, str [*:0]u8) void extern fn main_output_u64(interface i32, n u64) void extern fn main_output_char(interface i32, ch u8) void extern fn main_output_process(interface i32, p *mut task_layout.TaskStruct) void extern fn delay(ticks u64) void extern fn get_el() u32 // Generic timer extern fn generic_timer_init() void extern fn get_sys_count() u64 extern fn hwrng_init() void // IRQ extern fn enable_interrupt_gic(intid u32, core u32) void extern fn irq_init_vectors() void extern fn irq_enable() void // Fork / sched extern fn copy_process(clone_flags u64, fn_ptr u64, arg u64) i32 extern fn prepare_move_to_user_elf(blob_addr_kva u64, blob_size u64) i32 extern fn sched_init() void extern fn schedule() void extern var current ?*mut task_layout.TaskStruct // Syscall table extern fn sys_call_table_relocate() void // Board-driver trampolines. kernel.zig became a named module (src/kernel.flash); // its generated .zig lives in the build cache, so it can no longer reach the // board bag by a relative @import. The thin C-ABI wrappers live in the build // root (src/start.zig), which imports the board bag as a named module — the same // role fork.zig's move_to_user_elf_argv plays for execve. Reached here by symbol. extern fn board_irq_init() void extern fn board_usb_init() i32 extern fn board_usb_poll() void extern fn board_emmc2_init() i32 extern fn board_emmc2_write_block(lba u32, buf *[512]u8) i32 extern fn board_emmc2_read_block(lba u32, buf *mut [512]u8) i32 extern fn board_uart_poll_rx_into_console() void // Trace extern fn trace_init() void extern fn trace_output_kernel_pts(interface i32) void extern fn pl011_uart_init() void extern fn ksyms_init() void // Page allocator extern fn mem_map_init() void extern fn mem_map_reserve_below(end_pa u64) void extern fn mem_map_reserve_above(start_pa u64) void // PA marker emitted by both board linker scripts: the page just past the // kernel image and its board-specific reserved regions (page tables on // rpi4b; page tables + 64 MiB sdscratch on virt). Read at boot so the // page allocator never returns a PA that overlaps the kernel image. extern var _kernel_pa_end u8 const build_options = #import("build_options") extern fn dump_free_count() u64 // Cross-core boot synchronization export var state u32 = 0 /// Run by PID 1; returns to entry.S and does a kernel_exit 0. /// /// PID 1 is ELF-loaded: `/sbin/init` is the `pid1.elf` /// artifact baked into the embedded initramfs. Its bytes (already /// TTBR1-mapped, no allocation) go to `prepare_move_to_user_elf`, /// the same loader the exec-elf / flibc test payloads use. export fn kernel_process() void { const entry_opt = initramfs.locate("/sbin/init") catch null if (entry_opt == null) { main_output(MU, "PID 1: /sbin/init missing from initramfs\n") return } const entry = entry_opt.? // Pre-install stdio as console fds before handing control to EL0. // Console slots are refcount-exempt // shared singletons (ptr=null, kind=console) so the three installs // allocate no page and leave the free-page baseline untouched. // fork() inherits them via fdtable.dupAll; execve() preserves them. // User-space sees fd 0/1/2 already wired to the mini-UART. const cur *mut task_layout.TaskStruct = current.? _ = fdtable.install(cur, .console, null) _ = fdtable.install(cur, .console, null) _ = fdtable.install(cur, .console, null) const blob_kva u64 = #intFromPtr(entry.data.ptr) const err = prepare_move_to_user_elf(blob_kva, entry.data.len) if (err < 0) { main_output(MU, "PID 1: ELF load failed\n") } } // Scratch LBA for the EL1 block-I/O smoke check. Retargeted from // LBA 34_816 to LBA 2064: the single-partition // format_sd.sh means the old 34_816 falls inside the FAT32 data // region and would collide with user files once the disk fills in // LBA 2064 sits in the FAT32 reserved-sector window // (partition start LBA 2048 + 16 = 17th reserved sector, between the // BPB at LBA 2048 and FAT1 around LBA 2080), which no FAT32 driver // reads or writes. The 16-sector offset matches the BPB's // `reserved_sec_cnt = 32` window minus the first BPB sector and the // FSInfo at LBA 2049 — well clear of both. One-constant permanent fix. const EMMC2_BLOCK_LBA u32 = 2064 // EL1-side block-I/O smoke check. Writes a deterministic pattern to // EMMC2_BLOCK_LBA, reads it back through the same vtable, byte- // compares. Emits `[PASS] emmc2-block` on match and `[FAIL] // emmc2-block` (with a short reason tag) otherwise. Both buffers // live on the kernel stack — no page allocation, no shift to the // free-page baseline. scripts/run_qemu_test.sh greps for `[FAIL] // emmc2-block` and fails the run if present; the EL0 16/16 tally is // unaffected because this scenario runs before PID 1 is forked. fn run_emmc2_smoke() void { var write_buf [512]u8 = undefined var read_buf [512]u8 = undefined var i usize = 0 while (i < 512) { write_buf[i] = #intCast((i + 0x42) & 0xFF) i += 1 } main_output(MU, "[TEST] emmc2-block\n") if (board_emmc2_write_block(EMMC2_BLOCK_LBA, &write_buf) != 0) { main_output(MU, "[FAIL] emmc2-block (write)\n") return } if (board_emmc2_read_block(EMMC2_BLOCK_LBA, &read_buf) != 0) { main_output(MU, "[FAIL] emmc2-block (read)\n") return } i = 0 while (i < 512) { if (read_buf[i] != write_buf[i]) { main_output(MU, "[FAIL] emmc2-block (mismatch)\n") return } i += 1 } main_output(MU, "[PASS] emmc2-block\n") } export fn kernel_main_impl(id u64) void { // core 0 initializes mini-uart and handles uart interrupts if (id == 0) { // Page allocator bitmap zeroed first so anything later in bring-up // can hit get_free_page without a lazy-init branch. mem_map_init() // Reserve PAs occupied by the kernel image so get_free_page never // hands out a page that overlaps `.text` / `.data` / `.bss` / // page tables / sdscratch. On rpi4b the kernel sits below the // pool — reserve_below is a no-op. On virt the kernel is loaded // inside the pool window and the reservation is load-bearing. mem_map_reserve_below(#intFromPtr(&_kernel_pa_end)) // Cap the pool at the actual RAM end on virt (QEMU `-m 1G` ⇒ // RAM ends at 0x80000000, well below MALLOC_END's RPi-derived // 0xFC000000). Without this, an exhausting allocator path would // hand out PAs that map to nothing once the in-RAM half is full. if (build_options.board == .virt) { mem_map_reserve_above(0x80000000) } // Mini-UART first so the boot status lines land on the same cable // (pin 14/15) as the exception handler's "ERROR CAUGHT" output. mini_uart_init() boot.ok("Initialized Mini-UART console") // Startup banner right after the console comes up, so the log reads // chronologically: core 0 is the first thing running, before any of // the subsystem bring-up below. (Secondary cores park at the // `while (id != 0)` gate and never reach here, so this is core-0 only.) console_ui.tagged(&bootSink, console_ui.ok) bootSink("Booted core ") main_output_char(MU, #intCast(id + '0')) bootSink(" (EL") main_output_char(MU, #intCast(get_el() + '0')) bootSink(")\n") pl011_uart_init() boot.ok("Initialized PL011 trace UART") irq_init_vectors() boot.ok("Loaded exception vectors") // Board-specific GIC bring-up: GICv3 needs ICC_*_EL1 + per-core // redistributor wakeup. Pi's GICv2 inlines to nothing. board_irq_init() enable_interrupt_gic(VC_AUX_IRQ, #intCast(id)) boot.ok("Enabled interrupt controller") // USB-OTG gadget bring-up (DWC2). The device MMIO at 0xFE980000 is // already device-mapped by boot.S, so this needs no page allocator. // Fails soft on QEMU (no DWC2 device path) — bounded waits return // -1 and the polled console simply never enumerates. Serviced from // the PID-0 idle loop below. if (board_usb_init() < 0) { boot.skip("USB gadget (no controller)") } else { boot.ok("Started USB gadget") } ksyms_init() boot.ok("Loaded kernel symbols") sys_call_table_relocate() boot.ok("Relocated syscall table") trace_init() boot.ok("Initialized trace subsystem") trace_output_kernel_pts(PL) boot.ok("Started kernel trace output") // VFS root mount bring-up. initramfs_backend // only sets pointers — no get_free_page — so it slots in ahead // of the free-page baseline emit without shifting it. The FAT32 // /mnt mount is wired later, after board.emmc2.init() has wired // block_dev.sd_dev (fat32_backend.init issues block reads). initramfs_backend.init() boot.ok("Mounted initramfs root") // Block-device bring-up. On virt // the memory-backed fake never fails — graceful degradation // (log + continue) is still the contract for the rpi4b // driver, which can fail on missing SD card. // The smoke check below covers acceptance #2 + #7 in one // shot: it exercises the BlockDev vtable end-to-end and // proves init() wired `block_dev.sd_dev`. if (board_emmc2_init() < 0) { boot.skip("EMMC2 block device (init failed)") } else { boot.ok("Initialized EMMC2 block device") // Pre-PID-1 block-device smoke — part of the boot-as-test path, // gated so a clean (non-selftest) boot stays quiet. if (build_options.boot_selftest) { run_emmc2_smoke() } // FAT32 /mnt mount — needs block_dev.sd_dev, wired just // above by board.emmc2.init(). Fails soft: a blank/bad // disk leaves mount_table[1] null and /mnt/* resolves to // ENOENT. if (fat32_backend.init() < 0) { boot.skip("/mnt (no FAT32 volume)") } else { boot.ok("Mounted /mnt (FAT32)") // Permission overlay: init() parsed PERMS.TAB // into the backend's table. A mounted volume without a // parseable overlay is the loud anti-brick announcement: // /mnt runs on defaults (shadow floored 0600 root:root) // until the operator reseeds the overlay file. if (!fat32_backend.overlay_ok) { boot.warn("/mnt overlay missing - defaults active, shadow floored") } } } // Entropy source bring-up. Seeds the fallback generator // from CNTPCT (readable from reset — independent of the // generic-timer IRQ setup below), self-tests, and announces the // active source. The announce line tees into the kernel log ring, // where [TEST] rng asserts it later. Allocates nothing. hwrng_init() // Boot-time free-page baseline. Logged before any task is created // so the user-space dumps later in the trace can be compared // against this absolute reference. if (build_options.boot_selftest) { _ = dump_free_count() } state = 0 } // single core for now while (id != 0) {} delay(30000) // generic timer and timer IRQ (vectors already loaded on core 0) generic_timer_init() enable_interrupt_gic(NS_PHYS_TIMER_IRQ, #intCast(id)) irq_enable() // let the next core run state += 1 while (true) { if (id != 0 || state != 1) { continue } sched_init() // create pid 1, kernel threads don't need a user stack page const res = copy_process(KTHREAD, #intFromPtr(&kernel_process), 0) if (res <= 0) { main_output(MU, "fork error\n") } while (true) { // Idle-path UART RX poll (PID 0) — defensive backstop. The AUX // RX interrupt is the primary drain and reaches handle_irq on // real hardware; this only catches a byte left between IRQ // slots. No-op on virt. board_uart_poll_rx_into_console() board_usb_poll() schedule() } } }