This required some iteration with the model to add bits it missed in
the initial implementation. I ended up debugging and then prompting
for the missing bits:

  - a basic gicv3 implementation
  - a couple of rounds of tweaking the config
  - mapping the GIC via page tables
  - fixing the compilation of the support library
  - extending the elapsed time check for WFE
  - light re-ordering of Makefile, setting correct machine opts

Signed-off-by: Alex Bennée <[email protected]>
---
 tests/tcg/aarch64/system/lib/gicv3.h      | 56 +++++++++++++++++
 tests/tcg/aarch64/system/lib/gicv3.c      | 77 +++++++++++++++++++++++
 tests/tcg/aarch64/system/wfx.c            | 17 ++++-
 tests/tcg/aarch64/Makefile.softmmu-target | 13 +++-
 tests/tcg/aarch64/system/boot.S           | 55 ++++++++++------
 5 files changed, 196 insertions(+), 22 deletions(-)
 create mode 100644 tests/tcg/aarch64/system/lib/gicv3.h
 create mode 100644 tests/tcg/aarch64/system/lib/gicv3.c

diff --git a/tests/tcg/aarch64/system/lib/gicv3.h 
b/tests/tcg/aarch64/system/lib/gicv3.h
new file mode 100644
index 00000000000..9a1268937c6
--- /dev/null
+++ b/tests/tcg/aarch64/system/lib/gicv3.h
@@ -0,0 +1,56 @@
+/*
+ * GICv3 Helper Library
+ *
+ * Copyright (c) 2024 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef GICV3_H
+#define GICV3_H
+
+#include <stdint.h>
+
+/* Virt machine GICv3 base addresses */
+#define GICD_BASE       0x08000000  /* c.f. VIRT_GIC_DIST */
+#define GICR_BASE       0x080a0000  /* c.f. VIRT_GIC_REDIST */
+
+/* Distributor registers */
+#define GICD_CTLR       (GICD_BASE + 0x0000)
+#define GICD_TYPER      (GICD_BASE + 0x0004)
+#define GICD_IIDR       (GICD_BASE + 0x0008)
+
+/* Redistributor registers (per-CPU) */
+#define GICR_SGI_OFFSET 0x00010000
+
+#define GICR_CTLR       0x0000
+#define GICR_WAKER      0x0014
+#define GICR_IGROUPR0   (GICR_SGI_OFFSET + 0x0080)
+#define GICR_ISENABLER0 (GICR_SGI_OFFSET + 0x0100)
+#define GICR_IPRIORITYR0 (GICR_SGI_OFFSET + 0x0400)
+
+/* GICD_CTLR bits */
+#define GICD_CTLR_ARE_NS (1U << 4)
+#define GICD_CTLR_ENA_G1NS (1U << 1)
+#define GICD_CTLR_ENA_G0 (1U << 0)
+
+/* GICR_WAKER bits */
+#define GICR_WAKER_ChildrenAsleep (1U << 2)
+#define GICR_WAKER_ProcessorSleep (1U << 1)
+
+/**
+ * gicv3_init:
+ *
+ * Initialize GICv3 distributor and the redistributor for the current CPU.
+ */
+void gicv3_init(void);
+
+/**
+ * gicv3_enable_irq:
+ * @irq: The IRQ number to enable
+ *
+ * Enable the specified IRQ (SPI or PPI).
+ */
+void gicv3_enable_irq(unsigned int irq);
+
+#endif /* GICV3_H */
diff --git a/tests/tcg/aarch64/system/lib/gicv3.c 
b/tests/tcg/aarch64/system/lib/gicv3.c
new file mode 100644
index 00000000000..a09a0e430e6
--- /dev/null
+++ b/tests/tcg/aarch64/system/lib/gicv3.c
@@ -0,0 +1,77 @@
+/*
+ * GICv3 Helper Library Implementation
+ *
+ * Copyright (c) 2024 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "gicv3.h"
+
+#define write_sysreg(r, v) do {                     \
+        uint64_t __val = (uint64_t)(v);             \
+        asm volatile("msr " #r ", %x0"              \
+                 : : "rZ" (__val));                 \
+} while (0)
+
+#define isb() asm volatile("isb" : : : "memory")
+
+static inline void write_reg(uintptr_t addr, uint32_t val)
+{
+    *(volatile uint32_t *)addr = val;
+}
+
+static inline uint32_t read_reg(uintptr_t addr)
+{
+    return *(volatile uint32_t *)addr;
+}
+
+void gicv3_init(void)
+{
+    uint32_t val;
+
+    /* 1. Enable Distributor ARE and Group 1 NS */
+    val = read_reg(GICD_CTLR);
+    val |= GICD_CTLR_ARE_NS | GICD_CTLR_ENA_G1NS;
+    write_reg(GICD_CTLR, val);
+
+    /* 2. Wake up Redistributor 0 */
+    /* Clear ProcessorSleep */
+    val = read_reg(GICR_BASE + GICR_WAKER);
+    val &= ~GICR_WAKER_ProcessorSleep;
+    write_reg(GICR_BASE + GICR_WAKER, val);
+
+    /* Wait for ChildrenAsleep to be cleared */
+    while (read_reg(GICR_BASE + GICR_WAKER) & GICR_WAKER_ChildrenAsleep) {
+        /* spin */
+    }
+
+    /* 3. Enable CPU interface */
+    /* Set Priority Mask to allow all interrupts */
+    write_sysreg(ICC_PMR_EL1, 0xff);
+    /* Enable Group 1 Non-Secure interrupts */
+    write_sysreg(ICC_IGRPEN1_EL1, 1);
+    isb();
+}
+
+void gicv3_enable_irq(unsigned int irq)
+{
+    if (irq < 32) {
+        /* PPI: use GICR_ISENABLER0 */
+        uintptr_t addr;
+
+        /* Set Group 1 */
+        addr = GICR_BASE + GICR_IGROUPR0;
+        write_reg(addr, read_reg(addr) | (1U << irq));
+
+        /* Set priority (0xa0) */
+        addr = GICR_BASE + GICR_IPRIORITYR0 + irq;
+        *(volatile uint8_t *)addr = 0xa0;
+
+        /* Enable it */
+        addr = GICR_BASE + GICR_ISENABLER0;
+        write_reg(addr, 1U << irq);
+    } else {
+        /* SPI: not implemented yet */
+    }
+}
diff --git a/tests/tcg/aarch64/system/wfx.c b/tests/tcg/aarch64/system/wfx.c
index 59436c381fd..567d9e59c70 100644
--- a/tests/tcg/aarch64/system/wfx.c
+++ b/tests/tcg/aarch64/system/wfx.c
@@ -8,6 +8,7 @@
 
 #include <stdint.h>
 #include <minilib.h>
+#include "gicv3.h"
 
 #define __stringify_1(x...) #x
 #define __stringify(x...)   __stringify_1(x)
@@ -31,6 +32,9 @@
 #define wfit(reg) asm volatile("wfit %0" : : "r" (reg) : "memory")
 #define wfet(reg) asm volatile("wfet %0" : : "r" (reg) : "memory")
 
+#define enable_irq()  asm volatile("msr daifclr, #2" : : : "memory")
+#define disable_irq() asm volatile("msr daifset, #2" : : : "memory")
+
 static void wait_ticks(uint64_t ticks)
 {
     uint64_t start = read_sysreg(cntvct_el0);
@@ -44,6 +48,9 @@ int main(void)
     uint64_t start, end, elapsed;
     uint64_t timeout;
 
+    gicv3_init();
+    gicv3_enable_irq(27); /* Virtual Timer PPI */
+
     ml_printf("WFX Test\n");
 
     /* 1. Test WFI with timer interrupt */
@@ -58,8 +65,13 @@ int main(void)
      * We don't have a full interrupt handler, but WFI should wake up
      * when the interrupt is pending even if we have it masked at the CPU.
      * PSTATE.I is set by boot code.
+     *
+     * We unmask interrupts here to ensure the CPU can take the minimal
+     * exception handler defined in boot.S.
      */
+    enable_irq();
     wfi();
+    disable_irq();
     end = read_sysreg(cntvct_el0);
     elapsed = end - start;
     if (elapsed < 100000) {
@@ -76,11 +88,12 @@ int main(void)
     wfe(); /* Should return immediately */
     end = read_sysreg(cntvct_el0);
     elapsed = end - start;
-    if (elapsed > 1000) { /* Should be very fast */
+    /* while this should be fast there is some overhead from TCG */
+    if (elapsed > 20000) {
         ml_printf("FAILED: WFE slept despite SEV (%ld ticks)\n", elapsed);
         return 1;
     }
-    ml_printf("PASSED\n");
+    ml_printf("PASSED (%ld ticks)\n", elapsed);
 
     /* 3. Test WFIT */
     ml_printf("Testing WFIT...");
diff --git a/tests/tcg/aarch64/Makefile.softmmu-target 
b/tests/tcg/aarch64/Makefile.softmmu-target
index 84342c52cd7..9a5b95de621 100644
--- a/tests/tcg/aarch64/Makefile.softmmu-target
+++ b/tests/tcg/aarch64/Makefile.softmmu-target
@@ -4,8 +4,9 @@
 
 AARCH64_SRC=$(SRC_PATH)/tests/tcg/aarch64
 AARCH64_SYSTEM_SRC=$(AARCH64_SRC)/system
+AARCH64_SYSTEM_LIB_SRC=$(AARCH64_SYSTEM_SRC)/lib
 
-VPATH+=$(AARCH64_SYSTEM_SRC)
+VPATH+=$(AARCH64_SYSTEM_SRC) $(AARCH64_SYSTEM_LIB_SRC)
 
 # These objects provide the basic boot code and helper functions for all tests
 CRT_OBJS=boot.o
@@ -24,7 +25,7 @@ LINK_SCRIPT=$(AARCH64_SYSTEM_SRC)/kernel.ld
 LDFLAGS=-Wl,-T$(LINK_SCRIPT)
 TESTS+=$(AARCH64_TESTS) $(MULTIARCH_TESTS)
 EXTRA_RUNS+=$(MULTIARCH_RUNS)
-CFLAGS+=-nostdlib -ggdb -O0 $(MINILIB_INC)
+CFLAGS+=-nostdlib -ggdb -O0 $(MINILIB_INC) -I$(AARCH64_SYSTEM_LIB_SRC)
 LDFLAGS+=-static -nostdlib $(CRT_OBJS) $(MINILIB_OBJS) -lgcc
 
 config-cc.mak: Makefile
@@ -102,7 +103,15 @@ run-pauth-3:
        $(call skip-test, "RUN of pauth-3", "not built")
 endif
 
+gicv3.o: gicv3.c gicv3.h
+       $(CC) $(CFLAGS) $(EXTRA_CFLAGS) -c $< -o $@
+
 wfx: CFLAGS += -march=armv8.7-a
+wfx: LDFLAGS += gicv3.o
+wfx: gicv3.o
+
+QEMU_GICV3_MACHINE=-M virt,gic-version=3 -cpu max -display none
+run-wfx: QEMU_OPTS=$(QEMU_GICV3_MACHINE) $(QEMU_BASE_ARGS) -kernel
 
 ifneq ($(CROSS_CC_HAS_ARMV8_MTE),)
 QEMU_MTE_ENABLED_MACHINE=-M virt,mte=on -cpu max -display none
diff --git a/tests/tcg/aarch64/system/boot.S b/tests/tcg/aarch64/system/boot.S
index 8bfa4e4efc7..6a71fc0da5a 100644
--- a/tests/tcg/aarch64/system/boot.S
+++ b/tests/tcg/aarch64/system/boot.S
@@ -60,7 +60,6 @@ curr_sp0_irq:
 curr_sp0_fiq:
 curr_sp0_serror:
 curr_spx_sync:
-curr_spx_irq:
 curr_spx_fiq:
 curr_spx_serror:
 lower_a64_sync:
@@ -248,29 +247,34 @@ at_testel:
        msr     ttbr0_el1, x0
 
        /*
-        * Setup a flat address mapping page-tables. Stage one simply
-        * maps RAM to the first Gb. The stage2 tables have two 2mb
-        * translation block entries covering a series of adjacent
-        * 4k pages.
+        * Setup a flat address mapping page-tables.
+        *
+        * ttb (Level 1):
+        *   - Entry 0 [0 - 1GB]: 1GB Device block (for GIC and other H/W)
+        *   - Entry 1 [1GB - 2GB]: Table entry pointing to ttb_stage2 (for RAM)
         */
 
-       /* Stage 1 entry: indexed by IA[38:30] */
-       adr     x1, .                           /* phys address */
-       bic     x1, x1, #(1 << 30) - 1          /* 1GB alignment*/
-       add     x2, x0, x1, lsr #(30 - 3)       /* offset in l1 page table */
+       /* Entry 0: 1GB Device block mapping at 0x0 */
+       ldr     x1, =0x401 | (1 << 2)           /* AF=1, block, AttrIndx=Attr1 
(Device) */
+       str     x1, [x0]
 
-       /* point to stage 2 table [47:12] */
-       adrp    x0, ttb_stage2
-       orr     x1, x0, #3                      /* ptr to stage 2 */
-       str     x1, [x2]
+       /* Entry 1: Table entry pointing to ttb_stage2 */
+       adrp    x1, ttb_stage2
+       orr     x1, x1, #3                      /* ptr to table (type=3) */
+       str     x1, [x0, #8]
 
-       /* Stage 2 entries: indexed by IA[29:21] */
+       /* Stage 2 entries: indexed by IA[29:21] (within 1GB-2GB range) */
+       adrp    x0, ttb_stage2
+       add     x0, x0, :lo12:ttb_stage2
        ldr     x5, =(((1 << 9) - 1) << 21)
 
        /* First block: .text/RO/execute enabled */
        adr     x1, .                           /* phys address */
        bic     x1, x1, #(1 << 21) - 1          /* 2mb block alignment  */
-       and     x4, x1, x5                      /* IA[29:21] */
+       /* Note: we assume RAM is in the 1GB-2GB range, so IA[30] is 1 */
+       mov     x4, x1
+       bic     x4, x4, #(1 << 30)              /* remove 1GB offset for L2 
index */
+       and     x4, x4, x5                      /* IA[29:21] */
        add     x2, x0, x4, lsr #(21 - 3)       /* offset in l2 page table */
        ldr     x3, =0x401                      /* attr(AF, block) */
        orr     x1, x1, x3
@@ -280,7 +284,9 @@ at_testel:
        adrp    x1, .data
        add     x1, x1, :lo12:.data
        bic     x1, x1, #(1 << 21) - 1          /* 2mb block alignment */
-       and     x4, x1, x5                      /* IA[29:21] */
+       mov     x4, x1
+       bic     x4, x4, #(1 << 30)              /* remove 1GB offset for L2 
index */
+       and     x4, x4, x5                      /* IA[29:21] */
        add     x2, x0, x4, lsr #(21 - 3)       /* offset in l2 page table */
        ldr     x3, =(3 << 53) | 0x401          /* attr(AF, NX, block) */
        orr     x1, x1, x3
@@ -290,7 +296,9 @@ at_testel:
        adrp    x1, mte_page
        add     x1, x1, :lo12:mte_page
        bic     x1, x1, #(1 << 21) - 1
-       and     x4, x1, x5
+       mov     x4, x1
+       bic     x4, x4, #(1 << 30)              /* remove 1GB offset for L2 
index */
+       and     x4, x4, x5
        add     x2, x0, x4, lsr #(21 - 3)
        /* attr(AF, NX, block, AttrIndx=Attr1) */
        ldr     x3, =(3 << 53) | 0x401 | (1 << 2)
@@ -317,7 +325,7 @@ at_testel:
        ldr     x0, = (2 << 32) | 25 | (3 << 10) | (3 << 8)
        msr     tcr_el1, x0
 
-       mov     x0, #0xee                       /* Inner/outer cacheable WB */
+       ldr     x0, =0x04ee                     /* Attr1: Device-nGnRE, Attr0: 
Normal WB */
        msr     mair_el1, x0
        isb
 
@@ -370,6 +378,17 @@ _exit:
        semihosting_call
        /* never returns */
 
+       /*
+        * IRQ handler
+        */
+       .global curr_spx_irq
+curr_spx_irq:
+       /* Minimal IRQ handler: just mask the timer and return */
+       mrs     x0, cntv_ctl_el0
+       orr     x0, x0, #2              /* IMASK=1 */
+       msr     cntv_ctl_el0, x0
+       eret
+
        /*
         * Helper Functions
        */
-- 
2.47.3


Reply via email to