// page_alloc: physical page allocator for kernel memory. // Isolated from scheduler state — no dependency on task_struct. // Constants pub const PAGE_SIZE u64 = 1 << 12 pub const MALLOC_START u64 = 0x40000000 pub const MALLOC_END u64 = 0xFC000000 pub const MALLOC_SIZE u64 = MALLOC_END - MALLOC_START pub const MALLOC_PAGES u64 = MALLOC_SIZE / PAGE_SIZE const LINEAR_MAP_BASE u64 = 0xFFFF000000000000 fn pa_to_kva(pa u64) u64 { return pa + LINEAR_MAP_BASE } fn kva_to_pa(kva u64) u64 { return kva - LINEAR_MAP_BASE } // Memory map: tracks which physical pages are allocated (1 = allocated, 0 = free) // Stored in kernel BSS section. Must be initialized once via mem_map_init // from the boot path before any get_free_page / free_page / dump_free_count // call. The init is idempotent (re-zeroes the bitmap), so callers in test // code can reset state by calling it again. var mem_map [MALLOC_PAGES]u8 = undefined // Allocatable pool size in pages, frozen once boot-time reservation is // done — total pool minus the pages reserve_below/above carve out for the // kernel image and the out-of-RAM tail. Unlike the live free count, this // does NOT move as pages are handed out, so SYS_MEMTOTAL can derive // "used = total - free". Seeded to the whole pool by mem_map_init and // decremented only on a fresh 0->1 reservation (see the reserve fns). var pool_total u64 = MALLOC_PAGES // Zero the memory bitmap. Called eagerly from kernel_main on core 0 // before any allocator user runs. export fn mem_map_init() void { for i in 0..MALLOC_PAGES { mem_map[i] = 0 } pool_total = MALLOC_PAGES } // Mark every page whose PA is below `end_pa` as allocated. Boot-only // (called from kernel_main after mem_map_init) — prevents get_free_page // from handing out PAs that overlap the kernel image and its reserved // regions. The `_kernel_pa_end` linker symbol is the canonical input on // each board: on virt the kernel image is loaded inside the pool window // (PA 0x40080000, MALLOC_START = 0x40000000), so reserving up to and // including the 64 MiB `.sdscratch` buffer is what keeps memzero from // scribbling over its own code on a deep enough allocator run (the // original ~9–12-fork stall). On rpi4b the kernel sits at PA 0x80000 // — below the pool — so the reserved range is empty and this is a // no-op aside from the linear scan. export fn mem_map_reserve_below(end_pa u64) void { if end_pa <= MALLOC_START { return } var i usize = 0 while i < MALLOC_PAGES { const pa u64 = MALLOC_START + #as(u64, #intCast(i)) * PAGE_SIZE if pa >= end_pa { break } if mem_map[i] == 0 { pool_total -= 1 } mem_map[i] = 1 i += 1 } } // Mark every page whose PA is at or above `start_pa` as allocated. Used // on virt (`-m 1G` ⇒ RAM ends at 0x80000000) to cap the pool at the // actual RAM end, since MALLOC_END's RPi-derived 0xFC000000 sits beyond // the virt RAM window — without this, get_free_page would hand out PAs // that map to nothing once allocations exhausted the in-RAM half. export fn mem_map_reserve_above(start_pa u64) void { if start_pa >= MALLOC_END { return } var i usize = 0 while i < MALLOC_PAGES { const pa u64 = MALLOC_START + #as(u64, #intCast(i)) * PAGE_SIZE if pa >= start_pa { if mem_map[i] == 0 { pool_total -= 1 } mem_map[i] = 1 } i += 1 } } // Allocate a physical page; returns its physical address, or `0` on // exhaustion. `0` is an unambiguous sentinel: the pool starts at // `MALLOC_START` (0x40000000), so no live allocation is ever PA 0. // Callers must check `== 0` and fail their operation cleanly rather // than relying on the allocator to abort. export fn get_free_page() u64 { for i in 0..MALLOC_PAGES { if mem_map[i] == 0 { mem_map[i] = 1 // Mark as allocated const ret u64 = MALLOC_START + #as(u64, #intCast(i)) * PAGE_SIZE // Zero the page before handing it out. memzero(pa_to_kva(ret), PAGE_SIZE) return ret } } // Out of physical memory — return the sentinel; the caller handles it. return 0 } // Free a physical page. Argument must be a PA from get_free_page. export fn free_page(p u64) void { const index usize = #intCast((p - MALLOC_START) / PAGE_SIZE) if index < MALLOC_PAGES { mem_map[index] = 0 } } // Allocate a page and return its kernel virtual address, or `0` on // exhaustion. The sentinel must propagate as a raw `0`: `pa_to_kva(0)` // is `LINEAR_MAP_BASE` (≠ 0), so wrapping the zero PA would hide the // failure behind a valid-looking KVA. export fn get_kernel_page() u64 { const phys_page = get_free_page() if phys_page == 0 { return 0 } return pa_to_kva(phys_page) } // Free a kernel page. Argument must be a KVA from get_kernel_page. export fn free_kernel_page(kp u64) void { const pa = kva_to_pa(kp) free_page(pa) } // Print the count of currently-free physical pages over Mini-UART and // return it. Format: `free_pages: <16-hex>\n`. Cheap (linear scan of // mem_map) but only invoked at sync points by the leak-test path — a // kernel boot baseline in kernel_main and again from user space via // sys_dump_free before/after each scenario. The returned value powers // the in-kernel test harness's [PASS]/[FAIL] decision; void callers // (kernel_main) ignore it. export fn dump_free_count() u64 { var free_count u64 = 0 for i in 0..MALLOC_PAGES { if mem_map[i] == 0 { free_count += 1 } } main_output(MU, "free_pages: ") main_output_u64(MU, free_count) main_output(MU, "\n") return free_count } // Allocatable pool size in pages, backing SYS_MEMTOTAL. Constant after // boot reservation (see `pool_total`) — a tool computes used pages as // this minus the live `dump_free_count`. Silent: unlike dump_free_count // it is a userland metric, not a leak-test sync point. export fn mem_total_count() u64 { return pool_total } // External C function declarations extern fn memzero(start u64, size u64) void extern fn main_output(interface i32, str [*:0]u8) void extern fn main_output_u64(interface i32, inw u64) void const MU i32 = 0 // --------------------------------------------------------------------------- // Host-only unit tests. Compiled out of the kernel binary; `zig build test` // links each per-module test target against `tests/host_stubs.zig`, which // stubs the assembly-only externs (`memzero`, `panic`, `main_output*`) // the kernel modules normally depend on. // --------------------------------------------------------------------------- const std = #import("std") fn reset_for_test() void { mem_map_init() } test "pa_to_kva / kva_to_pa round-trip" { const pa u64 = MALLOC_START + 7 * PAGE_SIZE try std.testing.expectEqual(pa, kva_to_pa(pa_to_kva(pa))) } test "mem_map_init zeroes the bitmap" { for i in 0..MALLOC_PAGES { mem_map[i] = 0xFF } mem_map_init() for i in 0..MALLOC_PAGES { try std.testing.expectEqual(#as(u8, 0), mem_map[i]) } } test "get_free_page returns sequential pages from MALLOC_START" { reset_for_test() const a = get_free_page() const b = get_free_page() const c = get_free_page() try std.testing.expectEqual(#as(u64, MALLOC_START), a) try std.testing.expectEqual(#as(u64, MALLOC_START + PAGE_SIZE), b) try std.testing.expectEqual(#as(u64, MALLOC_START + 2 * PAGE_SIZE), c) } test "free_page reuses the slot on next allocation" { reset_for_test() const a = get_free_page() _ = get_free_page() free_page(a) const reused = get_free_page() try std.testing.expectEqual(a, reused) } test "dump_free_count tracks allocations" { reset_for_test() try std.testing.expectEqual(MALLOC_PAGES, dump_free_count()) _ = get_free_page() _ = get_free_page() _ = get_free_page() try std.testing.expectEqual(MALLOC_PAGES - 3, dump_free_count()) } test "free_page silently ignores above-range PA" { reset_for_test() const before = dump_free_count() free_page(MALLOC_END + PAGE_SIZE) free_page(MALLOC_END + 1024 * PAGE_SIZE) const after = dump_free_count() try std.testing.expectEqual(before, after) } test "get_kernel_page returns KVA of a free physical page" { reset_for_test() const kva = get_kernel_page() try std.testing.expect(kva >= LINEAR_MAP_BASE + MALLOC_START) free_kernel_page(kva) try std.testing.expectEqual(MALLOC_PAGES, dump_free_count()) } test "get_free_page returns the 0 sentinel when the pool is exhausted" { reset_for_test() // Mark every page allocated directly. Draining via get_free_page // would rescan from index 0 on each call (O(n^2) over ~770k pages). for i in 0..MALLOC_PAGES { mem_map[i] = 1 } try std.testing.expectEqual(#as(u64, 0), get_free_page()) } test "get_kernel_page propagates the 0 sentinel (not LINEAR_MAP_BASE)" { reset_for_test() for i in 0..MALLOC_PAGES { mem_map[i] = 1 } // The raw sentinel must survive: pa_to_kva(0) == LINEAR_MAP_BASE is a // non-zero, valid-looking KVA that would hide the exhaustion. try std.testing.expectEqual(#as(u64, 0), get_kernel_page()) } test "mem_map_reserve_below marks the kernel-image prefix allocated" { reset_for_test() // Simulate virt: the kernel image ends 5 pages into the pool. The // first free page handed out must be the 6th, never a PA inside the // image (the page_alloc/memzero self-corruption the fix prevents). const end_pa = MALLOC_START + 5 * PAGE_SIZE mem_map_reserve_below(end_pa) try std.testing.expectEqual(MALLOC_PAGES - 5, dump_free_count()) try std.testing.expectEqual(end_pa, get_free_page()) } test "mem_map_reserve_below is a no-op when end_pa <= MALLOC_START (rpi4b)" { reset_for_test() // rpi4b: kernel at PA 0x80000, far below the pool — nothing reserved. mem_map_reserve_below(MALLOC_START) try std.testing.expectEqual(MALLOC_PAGES, dump_free_count()) mem_map_reserve_below(0x80000) try std.testing.expectEqual(MALLOC_PAGES, dump_free_count()) try std.testing.expectEqual(#as(u64, MALLOC_START), get_free_page()) } test "mem_map_reserve_above caps the pool at the RAM end" { reset_for_test() // virt: RAM ends at 0x80000000, below MALLOC_END (0xFC000000). Pages // at or above the cap must never be handed out (they map to nothing). const ram_end u64 = 0x80000000 mem_map_reserve_above(ram_end) const in_ram_pages = (ram_end - MALLOC_START) / PAGE_SIZE try std.testing.expectEqual(in_ram_pages, dump_free_count()) // Mark every in-RAM page allocated except the last legal one, directly. // Draining via get_free_page would rescan from index 0 on each call // (O(n^2) over ~260k pages — see the exhaustion tests above). for i in 0..(in_ram_pages - 1) { mem_map[i] = 1 } const last = get_free_page() try std.testing.expectEqual(ram_end - PAGE_SIZE, last) // Pool now exhausted within the RAM window — sentinel, no OOB PA. try std.testing.expectEqual(#as(u64, 0), get_free_page()) } test "mem_total_count is the post-reserve pool size and ignores allocations" { reset_for_test() // Whole pool before any reservation. try std.testing.expectEqual(MALLOC_PAGES, mem_total_count()) // Reserve a 5-page kernel prefix (virt-style): the total drops by 5. mem_map_reserve_below(MALLOC_START + 5 * PAGE_SIZE) try std.testing.expectEqual(MALLOC_PAGES - 5, mem_total_count()) // Handing out pages must NOT move the total — only the free count does. _ = get_free_page() _ = get_free_page() try std.testing.expectEqual(MALLOC_PAGES - 5, mem_total_count()) } test "mem_total_count counts overlapping reservations once" { reset_for_test() // reserve_above then reserve_below over an overlapping range must not // double-decrement: a page already reserved stays a single subtraction. const ram_end u64 = 0x80000000 mem_map_reserve_above(ram_end) const above = MALLOC_PAGES - (ram_end - MALLOC_START) / PAGE_SIZE try std.testing.expectEqual(MALLOC_PAGES - above, mem_total_count()) // A below-reservation that runs past the cap re-touches reserved pages; // only the fresh prefix below the cap should subtract. mem_map_reserve_below(ram_end + 10 * PAGE_SIZE) const below = (ram_end - MALLOC_START) / PAGE_SIZE try std.testing.expectEqual(MALLOC_PAGES - above - below, mem_total_count()) }