#include "asm_defs.inc" /* changes tmp1, tmp2 only */ .macro create_table_entry, tbl, ntbl, va, shift, flags, tmp1, tmp2 /* get entry index in tmp1 */ lsr \tmp1, \va, #\shift and \tmp1, \tmp1, #ENTRIES_PER_TABLE - 1 /* tmp2 = entry value */ mov \tmp2, \ntbl orr \tmp2, \tmp2, #\flags /* install entry */ str \tmp2, [\tbl, \tmp1, lsl #3] .endm /* changes vstart, vend, pa, tmp1 */ /* vstart and vend must differ by at least one block. */ /* `flags_label` is a PC-relative label in .text.boot.literals — the * literal table is kept out of .text.boot.late so that section stays * 4-byte aligned and the linker does not pad before it. */ .macro create_block_map, pmd, vstart, vend, pa, flags_label, tmp1 /* turn vstart, vend into indices */ lsr \vstart, \vstart, #SECTION_SHIFT and \vstart, \vstart, #ENTRIES_PER_TABLE - 1 lsr \vend, \vend, #SECTION_SHIFT /* minus one to handle the last entry */ sub \vend, \vend, #1 and \vend, \vend, #ENTRIES_PER_TABLE - 1 /* loop init, pa = pa | flags */ lsr \pa, \pa, #SECTION_SHIFT lsl \pa, \pa, #SECTION_SHIFT ldr \tmp1, \flags_label orr \pa, \pa, \tmp1 /* loop */ /* pmd[vstart] = pa */ 2: str \pa, [\pmd, \vstart, lsl #3] /* pa += section size */ add \pa, \pa, #SECTION_SIZE /* vstart += 1 */ add \vstart, \vstart, #1 cmp \vstart, \vend b.le 2b .endm .section ".text.boot" .globl _start .globl _start_real _start: _start_real: /* only core 0 starts here */ /* On virt the Linux arm64 image header (board/virt/image_header.S) * lives in .text.boot.header at the image base and branches here * via `_start_real`; on Pi the firmware enters at offset 0, which * is `_start` itself. Both labels alias the same instruction so * Pi kernel8.img stays byte-identical. */ b master /* unreachable */ b proc_hang master: /* entry point of the primary core */ /* Save the DTB physical address that UEFI / QEMU `-kernel` hand * off in x0 (Linux arm64 boot protocol) — must happen before any * `bl` that might clobber x0. The macro is board-specific: * virt stores into the `.bss` global `dtb_pa`; Pi expands to * nothing because no firmware on Pi 4 hands off a DTB pointer. */ save_dtb_pa x0 bl drop_to_el1 /* Board-specific FP/SIMD enable at EL1 — virt sets * CPACR_EL1.FPEN, Pi inlines to nothing (armstub already did * it at EL3 and Pi's Zig binary contains no NEON). Must run * before any Zig code, so before the jump into kernel_main. */ enable_fp_simd_el1 /* Board-specific stack init — Pi expands to `mov sp, #LOW_MEMORY` * (single 4-byte instruction, baseline-identical); virt expands * to `ldr x9, =LOW_MEMORY; mov sp, x9` because its LOW_MEMORY * does not fit the immediate field. */ mov_sp_low_memory x9 /* Compute the BSS range as load (physical) addresses — the MMU is * still off here (map_identity/map_high run below). adr's * ADR_PREL_LO21 reach is only ±1 MiB; once kernel .bss grew past * that (large statics, e.g. execve's exec_buf) bss_end fell out of * range, so use the adrp/add ±4 GiB pair. */ adrp x0, bss_begin add x0, x0, :lo12:bss_begin adrp x1, bss_end add x1, x1, :lo12:bss_end sub x1, x1, x0 /* clear out the bss section */ bl memzero bl map_identity bl map_high bl wake_up_cores /* save kernel pa base */ adr x0, _start adr x1, KERNEL_PA_BASE str x0, [x1] /* set ttbr's */ adrp x0, id_pg_dir msr ttbr0_el1, x0 adrp x0, high_pg_dir msr ttbr1_el1, x0 /* MAIR/TCR/SCTLR rewrite: always-safe (idempotent on HW); required on * QEMU's -kernel shim which skips the EL3-side init armstub does on * real Pi 4. Without this the first translation walk after MMU enable * faults under QEMU. */ /* Most literals (MAIR/LINEAR_MAP_BASE/HCR/SPSR) get GAS-optimised to * inline `movz` so they emit no pool entry. The two values that do * end up in the pool — TCR_EL1_VAL and SCTLR_EL1_VAL_MMU_ENABLED — * are routed through explicit labels in `.text.boot.literals`, which * keeps boot.S `.text.boot` literal-pool-free and stops GAS from * dumping a pool between el1_entry and the board's boot_quirks. */ ldr x0, =MAIR_EL1_VAL msr mair_el1, x0 ldr x0, .Ltcr_el1_val msr tcr_el1, x0 adr x0, vectors msr vbar_el1, x0 isb /* turn on the mmu */ ldr x0, .Lsctlr_mmu_enabled msr sctlr_el1, x0 isb /* prepare jumping to high mem */ ldr x2, =LINEAR_MAP_BASE add sp, sp, x2 adr x1, kernel_main add x1, x1, x2 /* core 0 */ mov x0, #0 /* jump to high mem */ blr x1 /* unreachable */ b proc_hang .globl app app: /* entry point of the secondary cores */ bl drop_to_el1 /* setup stack */ mrs x0, mpidr_el1 and x0, x0, #0xFF mov x1, #SECTION_SIZE mul x1, x1, x0 /* Board-specific add of LOW_MEMORY (see master:'s comment). */ add_low_memory x1, x1, x9 mov sp, x1 bl kernel_main drop_to_el1: /* Three valid entry paths reach this routine: * * armstub8.bin runs first (real Pi 4 hardware): EL3 with * SPSR_EL3 pre-loaded for the EL1h drop. * * QEMU `-M raspi4b -kernel` shim hands off at EL2 with no * SPSR setup. * * QEMU `-M virt -kernel` (and UEFI/GRUB chain) hands off * directly at EL1. * CurrentEL discriminates: the EL1 case is matched by the * board macro, EL2 by the b.eq below, EL3 falls through. */ mrs x0, CurrentEL check_el1_already x0 cmp x0, #(2 << 2) b.eq drop_from_el2 /* EL3 path: armstub already wrote SPSR_EL3 / HCR_EL2 / SCR_EL3 * etc. Eret to el1_entry. */ adr x0, el1_entry msr ELR_EL3, x0 eret drop_from_el2: /* EL2 path: replicate the bits of armstub's setup that matter for * dropping to EL1. Both values fit in a single movz, so GAS keeps * them inline — no pool entry, no boundary shift. */ ldr x0, =HCR_EL2_VAL msr HCR_EL2, x0 ldr x0, =SPSR_EL3_VAL msr SPSR_EL2, x0 adr x0, el1_entry msr ELR_EL2, x0 eret el1_entry: ret /* Board-specific wake_up_cores lives in * src/board//boot_quirks.S; the linker concatenates its * ".text.boot" between this file's ".text.boot" and * ".text.boot.late" below, preserving the original layout. */ .section ".text.boot.late" map_identity: /* save return address */ mov x29, x30 adrp x0, id_pg_dir mov x1, #ID_MAP_TABLE_SIZE /* clear id page tables */ bl memzero adrp x0, id_pg_dir /* x1 = address of id map pud */ add x1, x0, #PAGE_SIZE /* Board-specific PUD/PMD setup. `.macro map_identity_regions` is * defined in src/board//board_asm_defs.inc — Pi maps PA * 0..0x1000000 via PUD index 0; virt maps PA * 0x40000000..0x41000000 via PUD index 1 so the kernel image at * PA 0x40080000 stays addressable across the MMU-enable point. * The macro expands inline; rpi4b output is byte-identical. */ map_identity_regions /* restore return address */ mov x30, x29 ret map_high: /* save return address */ mov x29, x30 adrp x0, high_pg_dir mov x1, #HIGH_MAP_TABLE_SIZE /* clear high page tables */ bl memzero adrp x0, high_pg_dir /* x1 = address of high map pud */ add x1, x0, #PAGE_SIZE /* x4 = address of the mapped va (pgd) */ ldr x4, =LINEAR_MAP_BASE /* install PGD entry */ create_table_entry x0, x1, x4, PGD_SHIFT, TD_KERNEL_TABLE_FLAGS, x2, x3 /* goto next level */ add x0, x0, #PAGE_SIZE add x1, x1, #PAGE_SIZE /* Board-specific PUD/PMD setup + create_block_map calls. * `.macro map_high_regions` is defined in * src/board//board_asm_defs.inc — Pi 4 fans out to four * 1 GiB PUD slots with a 64 MiB device window in the last PMD; * other boards adapt freely. The macro expands inline, so the * emitted bytes for rpi4b stay byte-identical to the previous * unrolled form. */ map_high_regions /* restore return address */ mov x30, x29 ret proc_hang: /* wait for event */ wfe b proc_hang /* Explicit literal table — kept out of .text.boot.late so that section * stays 4-byte aligned (no 8-byte data inside ⇒ no padding before it * when ld concatenates it after .text.boot). Order matches the GAS * literal-pool encounter order pre-split, so the table is layout- * identical to the single-section baseline. Only values that GAS * could not movz-fold into a single inline instruction live here: * 32+-bit non-single-chunk values (TCR, SCTLR, HIGH_MAP_*_END, * HIGH_MAP_SECOND_START, TD_*_BLOCK_FLAGS, HIGH_MAP_DEVICE_END). * linker.ld places .text.boot.literals right after .text.boot.late * inside the output .text.boot section. */ .section ".text.boot.literals" .Ltcr_el1_val: .quad TCR_EL1_VAL .Lsctlr_mmu_enabled: .quad SCTLR_EL1_VAL_MMU_ENABLED .Ltd_kernel_block_flags: .quad TD_KERNEL_BLOCK_FLAGS .Lhigh_map_first_end: .quad HIGH_MAP_FIRST_END .Lhigh_map_second_start: .quad HIGH_MAP_SECOND_START .Lhigh_map_second_end: /* same value as HIGH_MAP_THIRD_START; one entry, two consumers */ .quad HIGH_MAP_SECOND_END .Lhigh_map_third_end: /* same value as HIGH_MAP_FOURTH_START */ .quad HIGH_MAP_THIRD_END .Lhigh_map_fourth_end: /* same value as HIGH_MAP_DEVICE_START */ .quad HIGH_MAP_FOURTH_END .Lhigh_map_device_end: .quad HIGH_MAP_DEVICE_END .Ltd_device_block_flags: .quad TD_DEVICE_BLOCK_FLAGS