On Fri, May 08, 2026 at 03:51:29PM -0500, Andrew Jones wrote:
> The Svnapot extension encodes a 64KB leaf PTE by setting PTE_N and
> storing bits [3:0] of the PPN as a NAPOT size indicator. The IOMMU
> model wasn't checking PTE_N and therefore was using the raw (NAPOT-
> encoded) PPN directly in the physical address, yielding an address
> 32 KB above the correct base.
>
> Fix both riscv_iommu_spa_fetch() and pdt_memory_read() by mirroring
> the Svnapot handling already present in target/riscv/cpu_helper.c:
>
> napot_bits = ctz64(ppn) + 1 /* 4 for 64KB */
> napot_mask = (1 << napot_bits) - 1 /* 0xF */
> phys_base = PPN_PHYS(ppn & ~napot_mask)
> page_offset = addr & (PPN_PHYS(napot_mask) | (TARGET_PAGE_SIZE - 1))
>
> The spec only defines napot_bits == 4 (64KB); any other value is
> treated as a reserved encoding.
>
> This is a fix, rather than new feature support, because the spec
> says "IOMMU implementations must support the Svnapot standard
> extension for NAPOT Translation Contiguity."
>
> Fixes: 0c54acb8243d ("hw/riscv: add RISC-V IOMMU base emulation")
> Signed-off-by: Andrew Jones <[email protected]>
I should have added
Cc: [email protected]
because as soon as [1] is merged into Linux Linux will no longer boot
on any QEMU build without this patch.
[1]
https://lore.kernel.org/all/[email protected]/
Thanks,
drew
> ---
> hw/riscv/riscv-iommu.c | 44 ++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 40 insertions(+), 4 deletions(-)
>
> diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c
> index 25a356d1d366..eb09cc974812 100644
> --- a/hw/riscv/riscv-iommu.c
> +++ b/hw/riscv/riscv-iommu.c
> @@ -237,6 +237,25 @@ static bool riscv_iommu_msi_check(RISCVIOMMUState *s,
> RISCVIOMMUContext *ctx,
> return true;
> }
>
> +/* Returns the NAPOT page mask, or 0 for reserved encodings. */
> +static hwaddr riscv_iommu_napot_page_mask(hwaddr ppn, hwaddr addr, hwaddr
> *out)
> +{
> + int napot_bits = ctz64(ppn) + 1;
> + hwaddr napot_mask, page_mask;
> +
> + /* The spec only defines 64KB (napot_bits == 4) */
> + if (napot_bits != 4) {
> + return 0;
> + }
> +
> + napot_mask = (1ULL << napot_bits) - 1;
> + page_mask = PPN_PHYS(napot_mask) | (TARGET_PAGE_SIZE - 1);
> +
> + *out = PPN_PHYS(ppn & ~napot_mask) | (addr & page_mask);
> +
> + return page_mask;
> +}
> +
> /*
> * RISCV IOMMU Address Translation Lookup - Page Table Walk
> *
> @@ -458,9 +477,20 @@ static int riscv_iommu_spa_fetch(RISCVIOMMUState *s,
> RISCVIOMMUContext *ctx,
> } else {
> /* Leaf PTE, translation completed. */
> sc[pass].step = sc[pass].levels;
> - base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1));
> - /* Update address mask based on smallest translation granularity
> */
> - iotlb->addr_mask &= (1ULL << va_skip) - 1;
> +
> + if (pte & PTE_N) {
> + hwaddr mask = riscv_iommu_napot_page_mask(ppn, addr, &base);
> +
> + if (!mask) {
> + break;
> + }
> + iotlb->addr_mask &= mask;
> + } else {
> + base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1));
> + /* Update address mask based on smallest translation
> granularity */
> + iotlb->addr_mask &= (1ULL << va_skip) - 1;
> + }
> +
> /* Continue with S-Stage translation? */
> if (pass && sc[0].step != sc[0].levels) {
> pass = S_STAGE;
> @@ -997,7 +1027,13 @@ static MemTxResult pdt_memory_read(RISCVIOMMUState *s,
> return MEMTX_ACCESS_ERROR; /* Misaligned PPN */
> } else {
> /* Leaf PTE, translation completed. */
> - base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1));
> + if (pte & PTE_N) {
> + if (!riscv_iommu_napot_page_mask(ppn, addr, &base)) {
> + return MEMTX_ACCESS_ERROR;
> + }
> + } else {
> + base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1));
> + }
> break;
> }
>
> --
> 2.43.0
>