This required some iteration with the model to add bits it missed in the initial implementation. I ended up debugging and then prompting for the missing bits:
- a basic gicv3 implementation - a couple of rounds of tweaking the config - mapping the GIC via page tables - fixing the compilation of the support library - extending the elapsed time check for WFE - light re-ordering of Makefile, setting correct machine opts Signed-off-by: Alex Bennée <[email protected]> --- tests/tcg/aarch64/system/lib/gicv3.h | 56 +++++++++++++++++ tests/tcg/aarch64/system/lib/gicv3.c | 77 +++++++++++++++++++++++ tests/tcg/aarch64/system/wfx.c | 17 ++++- tests/tcg/aarch64/Makefile.softmmu-target | 13 +++- tests/tcg/aarch64/system/boot.S | 55 ++++++++++------ 5 files changed, 196 insertions(+), 22 deletions(-) create mode 100644 tests/tcg/aarch64/system/lib/gicv3.h create mode 100644 tests/tcg/aarch64/system/lib/gicv3.c diff --git a/tests/tcg/aarch64/system/lib/gicv3.h b/tests/tcg/aarch64/system/lib/gicv3.h new file mode 100644 index 00000000000..9a1268937c6 --- /dev/null +++ b/tests/tcg/aarch64/system/lib/gicv3.h @@ -0,0 +1,56 @@ +/* + * GICv3 Helper Library + * + * Copyright (c) 2024 Linaro Ltd + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef GICV3_H +#define GICV3_H + +#include <stdint.h> + +/* Virt machine GICv3 base addresses */ +#define GICD_BASE 0x08000000 /* c.f. VIRT_GIC_DIST */ +#define GICR_BASE 0x080a0000 /* c.f. VIRT_GIC_REDIST */ + +/* Distributor registers */ +#define GICD_CTLR (GICD_BASE + 0x0000) +#define GICD_TYPER (GICD_BASE + 0x0004) +#define GICD_IIDR (GICD_BASE + 0x0008) + +/* Redistributor registers (per-CPU) */ +#define GICR_SGI_OFFSET 0x00010000 + +#define GICR_CTLR 0x0000 +#define GICR_WAKER 0x0014 +#define GICR_IGROUPR0 (GICR_SGI_OFFSET + 0x0080) +#define GICR_ISENABLER0 (GICR_SGI_OFFSET + 0x0100) +#define GICR_IPRIORITYR0 (GICR_SGI_OFFSET + 0x0400) + +/* GICD_CTLR bits */ +#define GICD_CTLR_ARE_NS (1U << 4) +#define GICD_CTLR_ENA_G1NS (1U << 1) +#define GICD_CTLR_ENA_G0 (1U << 0) + +/* GICR_WAKER bits */ +#define GICR_WAKER_ChildrenAsleep (1U << 2) +#define GICR_WAKER_ProcessorSleep (1U << 1) + +/** + * gicv3_init: + * + * Initialize GICv3 distributor and the redistributor for the current CPU. + */ +void gicv3_init(void); + +/** + * gicv3_enable_irq: + * @irq: The IRQ number to enable + * + * Enable the specified IRQ (SPI or PPI). + */ +void gicv3_enable_irq(unsigned int irq); + +#endif /* GICV3_H */ diff --git a/tests/tcg/aarch64/system/lib/gicv3.c b/tests/tcg/aarch64/system/lib/gicv3.c new file mode 100644 index 00000000000..a09a0e430e6 --- /dev/null +++ b/tests/tcg/aarch64/system/lib/gicv3.c @@ -0,0 +1,77 @@ +/* + * GICv3 Helper Library Implementation + * + * Copyright (c) 2024 Linaro Ltd + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "gicv3.h" + +#define write_sysreg(r, v) do { \ + uint64_t __val = (uint64_t)(v); \ + asm volatile("msr " #r ", %x0" \ + : : "rZ" (__val)); \ +} while (0) + +#define isb() asm volatile("isb" : : : "memory") + +static inline void write_reg(uintptr_t addr, uint32_t val) +{ + *(volatile uint32_t *)addr = val; +} + +static inline uint32_t read_reg(uintptr_t addr) +{ + return *(volatile uint32_t *)addr; +} + +void gicv3_init(void) +{ + uint32_t val; + + /* 1. Enable Distributor ARE and Group 1 NS */ + val = read_reg(GICD_CTLR); + val |= GICD_CTLR_ARE_NS | GICD_CTLR_ENA_G1NS; + write_reg(GICD_CTLR, val); + + /* 2. Wake up Redistributor 0 */ + /* Clear ProcessorSleep */ + val = read_reg(GICR_BASE + GICR_WAKER); + val &= ~GICR_WAKER_ProcessorSleep; + write_reg(GICR_BASE + GICR_WAKER, val); + + /* Wait for ChildrenAsleep to be cleared */ + while (read_reg(GICR_BASE + GICR_WAKER) & GICR_WAKER_ChildrenAsleep) { + /* spin */ + } + + /* 3. Enable CPU interface */ + /* Set Priority Mask to allow all interrupts */ + write_sysreg(ICC_PMR_EL1, 0xff); + /* Enable Group 1 Non-Secure interrupts */ + write_sysreg(ICC_IGRPEN1_EL1, 1); + isb(); +} + +void gicv3_enable_irq(unsigned int irq) +{ + if (irq < 32) { + /* PPI: use GICR_ISENABLER0 */ + uintptr_t addr; + + /* Set Group 1 */ + addr = GICR_BASE + GICR_IGROUPR0; + write_reg(addr, read_reg(addr) | (1U << irq)); + + /* Set priority (0xa0) */ + addr = GICR_BASE + GICR_IPRIORITYR0 + irq; + *(volatile uint8_t *)addr = 0xa0; + + /* Enable it */ + addr = GICR_BASE + GICR_ISENABLER0; + write_reg(addr, 1U << irq); + } else { + /* SPI: not implemented yet */ + } +} diff --git a/tests/tcg/aarch64/system/wfx.c b/tests/tcg/aarch64/system/wfx.c index 59436c381fd..567d9e59c70 100644 --- a/tests/tcg/aarch64/system/wfx.c +++ b/tests/tcg/aarch64/system/wfx.c @@ -8,6 +8,7 @@ #include <stdint.h> #include <minilib.h> +#include "gicv3.h" #define __stringify_1(x...) #x #define __stringify(x...) __stringify_1(x) @@ -31,6 +32,9 @@ #define wfit(reg) asm volatile("wfit %0" : : "r" (reg) : "memory") #define wfet(reg) asm volatile("wfet %0" : : "r" (reg) : "memory") +#define enable_irq() asm volatile("msr daifclr, #2" : : : "memory") +#define disable_irq() asm volatile("msr daifset, #2" : : : "memory") + static void wait_ticks(uint64_t ticks) { uint64_t start = read_sysreg(cntvct_el0); @@ -44,6 +48,9 @@ int main(void) uint64_t start, end, elapsed; uint64_t timeout; + gicv3_init(); + gicv3_enable_irq(27); /* Virtual Timer PPI */ + ml_printf("WFX Test\n"); /* 1. Test WFI with timer interrupt */ @@ -58,8 +65,13 @@ int main(void) * We don't have a full interrupt handler, but WFI should wake up * when the interrupt is pending even if we have it masked at the CPU. * PSTATE.I is set by boot code. + * + * We unmask interrupts here to ensure the CPU can take the minimal + * exception handler defined in boot.S. */ + enable_irq(); wfi(); + disable_irq(); end = read_sysreg(cntvct_el0); elapsed = end - start; if (elapsed < 100000) { @@ -76,11 +88,12 @@ int main(void) wfe(); /* Should return immediately */ end = read_sysreg(cntvct_el0); elapsed = end - start; - if (elapsed > 1000) { /* Should be very fast */ + /* while this should be fast there is some overhead from TCG */ + if (elapsed > 20000) { ml_printf("FAILED: WFE slept despite SEV (%ld ticks)\n", elapsed); return 1; } - ml_printf("PASSED\n"); + ml_printf("PASSED (%ld ticks)\n", elapsed); /* 3. Test WFIT */ ml_printf("Testing WFIT..."); diff --git a/tests/tcg/aarch64/Makefile.softmmu-target b/tests/tcg/aarch64/Makefile.softmmu-target index 84342c52cd7..9a5b95de621 100644 --- a/tests/tcg/aarch64/Makefile.softmmu-target +++ b/tests/tcg/aarch64/Makefile.softmmu-target @@ -4,8 +4,9 @@ AARCH64_SRC=$(SRC_PATH)/tests/tcg/aarch64 AARCH64_SYSTEM_SRC=$(AARCH64_SRC)/system +AARCH64_SYSTEM_LIB_SRC=$(AARCH64_SYSTEM_SRC)/lib -VPATH+=$(AARCH64_SYSTEM_SRC) +VPATH+=$(AARCH64_SYSTEM_SRC) $(AARCH64_SYSTEM_LIB_SRC) # These objects provide the basic boot code and helper functions for all tests CRT_OBJS=boot.o @@ -24,7 +25,7 @@ LINK_SCRIPT=$(AARCH64_SYSTEM_SRC)/kernel.ld LDFLAGS=-Wl,-T$(LINK_SCRIPT) TESTS+=$(AARCH64_TESTS) $(MULTIARCH_TESTS) EXTRA_RUNS+=$(MULTIARCH_RUNS) -CFLAGS+=-nostdlib -ggdb -O0 $(MINILIB_INC) +CFLAGS+=-nostdlib -ggdb -O0 $(MINILIB_INC) -I$(AARCH64_SYSTEM_LIB_SRC) LDFLAGS+=-static -nostdlib $(CRT_OBJS) $(MINILIB_OBJS) -lgcc config-cc.mak: Makefile @@ -102,7 +103,15 @@ run-pauth-3: $(call skip-test, "RUN of pauth-3", "not built") endif +gicv3.o: gicv3.c gicv3.h + $(CC) $(CFLAGS) $(EXTRA_CFLAGS) -c $< -o $@ + wfx: CFLAGS += -march=armv8.7-a +wfx: LDFLAGS += gicv3.o +wfx: gicv3.o + +QEMU_GICV3_MACHINE=-M virt,gic-version=3 -cpu max -display none +run-wfx: QEMU_OPTS=$(QEMU_GICV3_MACHINE) $(QEMU_BASE_ARGS) -kernel ifneq ($(CROSS_CC_HAS_ARMV8_MTE),) QEMU_MTE_ENABLED_MACHINE=-M virt,mte=on -cpu max -display none diff --git a/tests/tcg/aarch64/system/boot.S b/tests/tcg/aarch64/system/boot.S index 8bfa4e4efc7..6a71fc0da5a 100644 --- a/tests/tcg/aarch64/system/boot.S +++ b/tests/tcg/aarch64/system/boot.S @@ -60,7 +60,6 @@ curr_sp0_irq: curr_sp0_fiq: curr_sp0_serror: curr_spx_sync: -curr_spx_irq: curr_spx_fiq: curr_spx_serror: lower_a64_sync: @@ -248,29 +247,34 @@ at_testel: msr ttbr0_el1, x0 /* - * Setup a flat address mapping page-tables. Stage one simply - * maps RAM to the first Gb. The stage2 tables have two 2mb - * translation block entries covering a series of adjacent - * 4k pages. + * Setup a flat address mapping page-tables. + * + * ttb (Level 1): + * - Entry 0 [0 - 1GB]: 1GB Device block (for GIC and other H/W) + * - Entry 1 [1GB - 2GB]: Table entry pointing to ttb_stage2 (for RAM) */ - /* Stage 1 entry: indexed by IA[38:30] */ - adr x1, . /* phys address */ - bic x1, x1, #(1 << 30) - 1 /* 1GB alignment*/ - add x2, x0, x1, lsr #(30 - 3) /* offset in l1 page table */ + /* Entry 0: 1GB Device block mapping at 0x0 */ + ldr x1, =0x401 | (1 << 2) /* AF=1, block, AttrIndx=Attr1 (Device) */ + str x1, [x0] - /* point to stage 2 table [47:12] */ - adrp x0, ttb_stage2 - orr x1, x0, #3 /* ptr to stage 2 */ - str x1, [x2] + /* Entry 1: Table entry pointing to ttb_stage2 */ + adrp x1, ttb_stage2 + orr x1, x1, #3 /* ptr to table (type=3) */ + str x1, [x0, #8] - /* Stage 2 entries: indexed by IA[29:21] */ + /* Stage 2 entries: indexed by IA[29:21] (within 1GB-2GB range) */ + adrp x0, ttb_stage2 + add x0, x0, :lo12:ttb_stage2 ldr x5, =(((1 << 9) - 1) << 21) /* First block: .text/RO/execute enabled */ adr x1, . /* phys address */ bic x1, x1, #(1 << 21) - 1 /* 2mb block alignment */ - and x4, x1, x5 /* IA[29:21] */ + /* Note: we assume RAM is in the 1GB-2GB range, so IA[30] is 1 */ + mov x4, x1 + bic x4, x4, #(1 << 30) /* remove 1GB offset for L2 index */ + and x4, x4, x5 /* IA[29:21] */ add x2, x0, x4, lsr #(21 - 3) /* offset in l2 page table */ ldr x3, =0x401 /* attr(AF, block) */ orr x1, x1, x3 @@ -280,7 +284,9 @@ at_testel: adrp x1, .data add x1, x1, :lo12:.data bic x1, x1, #(1 << 21) - 1 /* 2mb block alignment */ - and x4, x1, x5 /* IA[29:21] */ + mov x4, x1 + bic x4, x4, #(1 << 30) /* remove 1GB offset for L2 index */ + and x4, x4, x5 /* IA[29:21] */ add x2, x0, x4, lsr #(21 - 3) /* offset in l2 page table */ ldr x3, =(3 << 53) | 0x401 /* attr(AF, NX, block) */ orr x1, x1, x3 @@ -290,7 +296,9 @@ at_testel: adrp x1, mte_page add x1, x1, :lo12:mte_page bic x1, x1, #(1 << 21) - 1 - and x4, x1, x5 + mov x4, x1 + bic x4, x4, #(1 << 30) /* remove 1GB offset for L2 index */ + and x4, x4, x5 add x2, x0, x4, lsr #(21 - 3) /* attr(AF, NX, block, AttrIndx=Attr1) */ ldr x3, =(3 << 53) | 0x401 | (1 << 2) @@ -317,7 +325,7 @@ at_testel: ldr x0, = (2 << 32) | 25 | (3 << 10) | (3 << 8) msr tcr_el1, x0 - mov x0, #0xee /* Inner/outer cacheable WB */ + ldr x0, =0x04ee /* Attr1: Device-nGnRE, Attr0: Normal WB */ msr mair_el1, x0 isb @@ -370,6 +378,17 @@ _exit: semihosting_call /* never returns */ + /* + * IRQ handler + */ + .global curr_spx_irq +curr_spx_irq: + /* Minimal IRQ handler: just mask the timer and return */ + mrs x0, cntv_ctl_el0 + orr x0, x0, #2 /* IMASK=1 */ + msr cntv_ctl_el0, x0 + eret + /* * Helper Functions */ -- 2.47.3
