On Tue, Sep 3, 2024 at 4:46 AM Daniel Henrique Barboza <dbarb...@ventanamicro.com> wrote: > > > > On 8/26/24 11:18 PM, Tomasz Jeznach wrote: > > On Fri, Aug 23, 2024 at 5:42 AM Daniel Henrique Barboza > > <dbarb...@ventanamicro.com> wrote: > >> > >> > >> > >> On 8/20/24 12:16 PM, Jason Chien wrote: > >>> Hi Daniel, > >>> > >>> On 2024/8/1 下午 11:43, Daniel Henrique Barboza wrote: > >>>> From: Tomasz Jeznach <tjezn...@rivosinc.com> > >>>> > >>>> The RISC-V IOMMU specification is now ratified as-per the RISC-V > >>>> international process. The latest frozen specifcation can be found at: > >>>> > >>>> https://github.com/riscv-non-isa/riscv-iommu/releases/download/v1.0/riscv-iommu.pdf > >>>> > >>>> Add the foundation of the device emulation for RISC-V IOMMU. It includes > >>>> support for s-stage (sv32, sv39, sv48, sv57 caps) and g-stage (sv32x4, > >>>> sv39x4, sv48x4, sv57x4 caps). > >>>> > >>>> Other capabilities like ATS and DBG support will be added incrementally > >>>> in the next patches. > >>>> > >>>> Co-developed-by: Sebastien Boeuf <s...@rivosinc.com> > >>>> Signed-off-by: Sebastien Boeuf <s...@rivosinc.com> > >>>> Signed-off-by: Tomasz Jeznach <tjezn...@rivosinc.com> > >>>> Signed-off-by: Daniel Henrique Barboza <dbarb...@ventanamicro.com> > >>>> --- > >>>> hw/riscv/Kconfig | 4 + > >>>> hw/riscv/meson.build | 1 + > >>>> hw/riscv/riscv-iommu-bits.h | 14 + > >>>> hw/riscv/riscv-iommu.c | 2017 +++++++++++++++++++++++++++++++++++ > >>>> hw/riscv/riscv-iommu.h | 148 +++ > >>>> hw/riscv/trace-events | 12 + > >>>> hw/riscv/trace.h | 1 + > >>>> include/hw/riscv/iommu.h | 36 + > >>>> meson.build | 1 + > >>>> 9 files changed, 2234 insertions(+) > >>>> create mode 100644 hw/riscv/riscv-iommu.c > >>>> create mode 100644 hw/riscv/riscv-iommu.h > >>>> create mode 100644 hw/riscv/trace-events > >>>> create mode 100644 hw/riscv/trace.h > >>>> create mode 100644 include/hw/riscv/iommu.h > >>>> > >>>> diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig > >>>> index a2030e3a6f..f69d6e3c8e 100644 > >>>> --- a/hw/riscv/Kconfig > >>>> +++ b/hw/riscv/Kconfig > >>>> @@ -1,3 +1,6 @@ > >>>> +config RISCV_IOMMU > >>>> + bool > >>>> + > >>>> config RISCV_NUMA > >>>> bool > >>>> @@ -47,6 +50,7 @@ config RISCV_VIRT > >>>> select SERIAL > >>>> select RISCV_ACLINT > >>>> select RISCV_APLIC > >>>> + select RISCV_IOMMU > >>>> select RISCV_IMSIC > >>>> select SIFIVE_PLIC > >>>> select SIFIVE_TEST > >>>> diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build > >>>> index f872674093..cbc99c6e8e 100644 > >>>> --- a/hw/riscv/meson.build > >>>> +++ b/hw/riscv/meson.build > >>>> @@ -10,5 +10,6 @@ riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: > >>>> files('sifive_u.c')) > >>>> riscv_ss.add(when: 'CONFIG_SPIKE', if_true: files('spike.c')) > >>>> riscv_ss.add(when: 'CONFIG_MICROCHIP_PFSOC', if_true: > >>>> files('microchip_pfsoc.c')) > >>>> riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c')) > >>>> +riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: > >>>> files('riscv-iommu.c')) > >>>> hw_arch += {'riscv': riscv_ss} > >>>> diff --git a/hw/riscv/riscv-iommu-bits.h b/hw/riscv/riscv-iommu-bits.h > >>>> index 9d9512ca71..6f1b9ab61b 100644 > >>>> --- a/hw/riscv/riscv-iommu-bits.h > >>>> +++ b/hw/riscv/riscv-iommu-bits.h > >>>> @@ -69,6 +69,14 @@ struct riscv_iommu_pq_record { > >>>> /* 5.3 IOMMU Capabilities (64bits) */ > >>>> #define RISCV_IOMMU_REG_CAP 0x0000 > >>>> #define RISCV_IOMMU_CAP_VERSION GENMASK_ULL(7, 0) > >>>> +#define RISCV_IOMMU_CAP_SV32 BIT_ULL(8) > >>>> +#define RISCV_IOMMU_CAP_SV39 BIT_ULL(9) > >>>> +#define RISCV_IOMMU_CAP_SV48 BIT_ULL(10) > >>>> +#define RISCV_IOMMU_CAP_SV57 BIT_ULL(11) > >>>> +#define RISCV_IOMMU_CAP_SV32X4 BIT_ULL(16) > >>>> +#define RISCV_IOMMU_CAP_SV39X4 BIT_ULL(17) > >>>> +#define RISCV_IOMMU_CAP_SV48X4 BIT_ULL(18) > >>>> +#define RISCV_IOMMU_CAP_SV57X4 BIT_ULL(19) > >>>> #define RISCV_IOMMU_CAP_MSI_FLAT BIT_ULL(22) > >>>> #define RISCV_IOMMU_CAP_MSI_MRIF BIT_ULL(23) > >>>> #define RISCV_IOMMU_CAP_T2GPA BIT_ULL(26) > >>>> @@ -80,7 +88,9 @@ struct riscv_iommu_pq_record { > >>>> /* 5.4 Features control register (32bits) */ > >>>> #define RISCV_IOMMU_REG_FCTL 0x0008 > >>>> +#define RISCV_IOMMU_FCTL_BE BIT(0) > >>>> #define RISCV_IOMMU_FCTL_WSI BIT(1) > >>>> +#define RISCV_IOMMU_FCTL_GXL BIT(2) > >>>> /* 5.5 Device-directory-table pointer (64bits) */ > >>>> #define RISCV_IOMMU_REG_DDTP 0x0010 > >>>> @@ -203,6 +213,8 @@ struct riscv_iommu_dc { > >>>> #define RISCV_IOMMU_DC_TC_DTF BIT_ULL(4) > >>>> #define RISCV_IOMMU_DC_TC_PDTV BIT_ULL(5) > >>>> #define RISCV_IOMMU_DC_TC_PRPR BIT_ULL(6) > >>>> +#define RISCV_IOMMU_DC_TC_GADE BIT_ULL(7) > >>>> +#define RISCV_IOMMU_DC_TC_SADE BIT_ULL(8) > >>>> #define RISCV_IOMMU_DC_TC_DPE BIT_ULL(9) > >>>> #define RISCV_IOMMU_DC_TC_SBE BIT_ULL(10) > >>>> #define RISCV_IOMMU_DC_TC_SXL BIT_ULL(11) > >>>> @@ -309,9 +321,11 @@ enum riscv_iommu_fq_causes { > >>>> /* Translation attributes fields */ > >>>> #define RISCV_IOMMU_PC_TA_V BIT_ULL(0) > >>>> +#define RISCV_IOMMU_PC_TA_RESERVED GENMASK_ULL(63, 32) > >>>> /* First stage context fields */ > >>>> #define RISCV_IOMMU_PC_FSC_PPN GENMASK_ULL(43, 0) > >>>> +#define RISCV_IOMMU_PC_FSC_RESERVED GENMASK_ULL(59, 44) > >>>> enum riscv_iommu_fq_ttypes { > >>>> RISCV_IOMMU_FQ_TTYPE_NONE = 0, > >>>> diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c > >>>> new file mode 100644 > >>>> index 0000000000..ebe3a53a04 > >>>> --- /dev/null > >>>> +++ b/hw/riscv/riscv-iommu.c > >>>> @@ -0,0 +1,2017 @@ > >> (...) > >> > >>>> + > >>>> +/* > >>>> + * RISCV IOMMU Address Translation Lookup - Page Table Walk > >>>> + * > >>>> + * Note: Code is based on get_physical_address() from > >>>> target/riscv/cpu_helper.c > >>>> + * Both implementation can be merged into single helper function in > >>>> future. > >>>> + * Keeping them separate for now, as error reporting and flow specifics > >>>> are > >>>> + * sufficiently different for separate implementation. > >>>> + * > >>>> + * @s : IOMMU Device State > >>>> + * @ctx : Translation context for device id and process address > >>>> space id. > >>>> + * @iotlb : translation data: physical address and access mode. > >>>> + * @return : success or fault cause code. > >>>> + */ > >>>> +static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext > >>>> *ctx, > >>>> + IOMMUTLBEntry *iotlb) > >>>> +{ > >>>> + dma_addr_t addr, base; > >>>> + uint64_t satp, gatp, pte; > >>>> + bool en_s, en_g; > >>>> + struct { > >>>> + unsigned char step; > >>>> + unsigned char levels; > >>>> + unsigned char ptidxbits; > >>>> + unsigned char ptesize; > >>>> + } sc[2]; > >>>> + /* Translation stage phase */ > >>>> + enum { > >>>> + S_STAGE = 0, > >>>> + G_STAGE = 1, > >>>> + } pass; > >>>> + > >>>> + satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD); > >>>> + gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); > >>>> + > >>>> + en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE; > >>>> + en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE; > >>>> + > >>>> + /* Early check for MSI address match when IOVA == GPA */ > >>>> + if ((iotlb->perm & IOMMU_WO) && > >>>> + riscv_iommu_msi_check(s, ctx, iotlb->iova)) { > >>> I think the IOVA here may be a GVA and we should not use a GVA to perform > >>> msi check. Section 2.3 states that we should always walk first stage page > >>> table to get the GPA (step 17) and then use the GPA to do the msi check > >>> (step 18). > >> > >> That's a good point. Even if we rule out the address as a GVA by doing a > >> second stage > >> check (via en_g) we're still assuming that iotlb->iova = GPA without going > >> through > >> the first stage translation. We're basically gambling for a match calling > >> riscv_iommu_msi_check() this early. > >> > >> I'll remove this early check entirely since we're doing a proper msi check > >> with a > >> translated GPA at the end of first stage: > >> > >> > > > > This check was required to ensure MSI translation is applied in case > > first-stage translation is set to BARE mode. In this case IOVA > > provided is a valid GPA. Running translation through page walk w/ > > second stage translation will incorrectly try to translate GPA to host > > physical page, likely hitting IOPF. > > Right. Let's keep the check adding this observation about why it is needed. > > > > > Looking back, I've missed removal of an important check for en_s flag > > in v2->v3 changes: > > > > /* Early check for MSI address match when IOVA == GPA */ > > - if (!en_s && (iotlb->perm & IOMMU_WO) && > > + if ((iotlb->perm & IOMMU_WO) && > > riscv_iommu_msi_check(s, ctx, iotlb->iova)) { > > iotlb->target_as = &s->trap_as; > > > > I'd suggest fixing the missing en_s check, to enable proper handling of > > MSIP. > > The '!en_s' check was removed because it was breaking irqbypass. I'll let > Drew explain > more about it since he's been working in the kernel support for this use case. > >
Is the missing `!en_s` check still problematic? Re-reading the code I'd say it's required here to catch only GPAs if S stage is BARE, not untranslated IOVA. Thank you, - Tomasz > Thanks, > > Daniel > > > > > > Thanks, > > - Tomasz > > > >>>> + /* Translation phase completed (GPA or SPA) */ > >>>> + iotlb->translated_addr = base; > >>>> + iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : > >>>> IOMMU_WO) > >>>> + : IOMMU_RO; > >>>> + > >>>> + /* Check MSI GPA address match */ > >>>> + if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) && > >>>> + riscv_iommu_msi_check(s, ctx, base)) { > >>>> + /* Trap MSI writes and return GPA address. */ > >>>> + iotlb->target_as = &s->trap_as; > >>>> + iotlb->addr_mask = ~TARGET_PAGE_MASK; > >>>> + return 0; > >>>> + } > >> > >> > >> This will be suboptimal for cases where IOVA == GPA and we would have an > >> early exit by > >> guessing it right, but I'm more confortable adhering fully with the spec > >> this time. > >> > >> > >> Thanks, > >> > >> Daniel > >> > >> > >>>> + iotlb->target_as = &s->trap_as; > >>>> + iotlb->translated_addr = iotlb->iova; > >>>> + iotlb->addr_mask = ~TARGET_PAGE_MASK; > >>>> + return 0; > >>>> + } > >>>> + > >>>> + /* Exit early for pass-through mode. */ > >>>> + if (!(en_s || en_g)) { > >>>> + iotlb->translated_addr = iotlb->iova; > >>>> + iotlb->addr_mask = ~TARGET_PAGE_MASK; > >>>> + /* Allow R/W in pass-through mode */ > >>>> + iotlb->perm = IOMMU_RW; > >>>> + return 0; > >>>> + } > >>>> + > >>>> + /* S/G translation parameters. */ > >>>> + for (pass = 0; pass < 2; pass++) { > >>>> + uint32_t sv_mode; > >>>> + > >>>> + sc[pass].step = 0; > >>>> + if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) : > >>>> + (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) { > >>>> + /* 32bit mode for GXL/SXL == 1 */ > >>>> + switch (pass ? gatp : satp) { > >>>> + case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: > >>>> + sc[pass].levels = 0; > >>>> + sc[pass].ptidxbits = 0; > >>>> + sc[pass].ptesize = 0; > >>>> + break; > >>>> + case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: > >>>> + sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : > >>>> RISCV_IOMMU_CAP_SV32; > >>>> + if (!(s->cap & sv_mode)) { > >>>> + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; > >>>> + } > >>>> + sc[pass].levels = 2; > >>>> + sc[pass].ptidxbits = 10; > >>>> + sc[pass].ptesize = 4; > >>>> + break; > >>>> + default: > >>>> + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; > >>>> + } > >>>> + } else { > >>>> + /* 64bit mode for GXL/SXL == 0 */ > >>>> + switch (pass ? gatp : satp) { > >>>> + case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: > >>>> + sc[pass].levels = 0; > >>>> + sc[pass].ptidxbits = 0; > >>>> + sc[pass].ptesize = 0; > >>>> + break; > >>>> + case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: > >>>> + sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : > >>>> RISCV_IOMMU_CAP_SV39; > >>>> + if (!(s->cap & sv_mode)) { > >>>> + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; > >>>> + } > >>>> + sc[pass].levels = 3; > >>>> + sc[pass].ptidxbits = 9; > >>>> + sc[pass].ptesize = 8; > >>>> + break; > >>>> + case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: > >>>> + sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : > >>>> RISCV_IOMMU_CAP_SV48; > >>>> + if (!(s->cap & sv_mode)) { > >>>> + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; > >>>> + } > >>>> + sc[pass].levels = 4; > >>>> + sc[pass].ptidxbits = 9; > >>>> + sc[pass].ptesize = 8; > >>>> + break; > >>>> + case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: > >>>> + sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : > >>>> RISCV_IOMMU_CAP_SV57; > >>>> + if (!(s->cap & sv_mode)) { > >>>> + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; > >>>> + } > >>>> + sc[pass].levels = 5; > >>>> + sc[pass].ptidxbits = 9; > >>>> + sc[pass].ptesize = 8; > >>>> + break; > >>>> + default: > >>>> + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; > >>>> + } > >>>> + } > >>>> + }; > >>>> + > >>>> + /* S/G stages translation tables root pointers */ > >>>> + gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD)); > >>>> + satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD)); > >>>> + addr = (en_s && en_g) ? satp : iotlb->iova; > >>>> + base = en_g ? gatp : satp; > >>>> + pass = en_g ? G_STAGE : S_STAGE; > >>>> + > >>>> + do { > >>>> + const unsigned widened = (pass && !sc[pass].step) ? 2 : 0; > >>>> + const unsigned va_bits = widened + sc[pass].ptidxbits; > >>>> + const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits * > >>>> + (sc[pass].levels - 1 - sc[pass].step); > >>>> + const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1); > >>>> + const dma_addr_t pte_addr = base + idx * sc[pass].ptesize; > >>>> + const bool ade = > >>>> + ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : > >>>> RISCV_IOMMU_DC_TC_SADE); > >>>> + > >>>> + /* Address range check before first level lookup */ > >>>> + if (!sc[pass].step) { > >>>> + const uint64_t va_mask = (1ULL << (va_skip + va_bits)) - 1; > >>>> + if ((addr & va_mask) != addr) { > >>>> + return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED; > >>>> + } > >>>> + } > >>>> + > >>>> + /* Read page table entry */ > >>>> + if (dma_memory_read(s->target_as, pte_addr, &pte, > >>>> + sc[pass].ptesize, MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { > >>>> + return (iotlb->perm & IOMMU_WO) ? > >>>> RISCV_IOMMU_FQ_CAUSE_WR_FAULT > >>>> + : > >>>> RISCV_IOMMU_FQ_CAUSE_RD_FAULT; > >>>> + } > >>>> + > >>>> + if (sc[pass].ptesize == 4) { > >>>> + pte = (uint64_t) le32_to_cpu(*((uint32_t *)&pte)); > >>>> + } else { > >>>> + pte = le64_to_cpu(pte); > >>>> + } > >>>> + > >>>> + sc[pass].step++; > >>>> + hwaddr ppn = pte >> PTE_PPN_SHIFT; > >>>> + > >>>> + if (!(pte & PTE_V)) { > >>>> + break; /* Invalid PTE */ > >>>> + } else if (!(pte & (PTE_R | PTE_W | PTE_X))) { > >>>> + base = PPN_PHYS(ppn); /* Inner PTE, continue walking */ > >>>> + } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) { > >>>> + break; /* Reserved leaf PTE flags: PTE_W */ > >>>> + } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) { > >>>> + break; /* Reserved leaf PTE flags: PTE_W + > >>>> PTE_X */ > >>>> + } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) { > >>>> + break; /* Misaligned PPN */ > >>>> + } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) { > >>>> + break; /* Read access check failed */ > >>>> + } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) { > >>>> + break; /* Write access check failed */ > >>>> + } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) { > >>>> + break; /* Access bit not set */ > >>>> + } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) { > >>>> + break; /* Dirty bit not set */ > >>>> + } else { > >>>> + /* Leaf PTE, translation completed. */ > >>>> + sc[pass].step = sc[pass].levels; > >>>> + base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1)); > >>>> + /* Update address mask based on smallest translation > >>>> granularity */ > >>>> + iotlb->addr_mask &= (1ULL << va_skip) - 1; > >>>> + /* Continue with S-Stage translation? */ > >>>> + if (pass && sc[0].step != sc[0].levels) { > >>>> + pass = S_STAGE; > >>>> + addr = iotlb->iova; > >>>> + continue; > >>>> + } > >>>> + /* Translation phase completed (GPA or SPA) */ > >>>> + iotlb->translated_addr = base; > >>>> + iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : > >>>> IOMMU_WO) > >>>> + : IOMMU_RO; > >>>> + > >>>> + /* Check MSI GPA address match */ > >>>> + if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) && > >>>> + riscv_iommu_msi_check(s, ctx, base)) { > >>>> + /* Trap MSI writes and return GPA address. */ > >>>> + iotlb->target_as = &s->trap_as; > >>>> + iotlb->addr_mask = ~TARGET_PAGE_MASK; > >>>> + return 0; > >>>> + } > >>>> + > >>>> + /* Continue with G-Stage translation? */ > >>>> + if (!pass && en_g) { > >>>> + pass = G_STAGE; > >>>> + addr = base; > >>>> + base = gatp; > >>>> + sc[pass].step = 0; > >>>> + continue; > >>>> + } > >>>> + > >>>> + return 0; > >>>> + } > >>>> + > >>>> + if (sc[pass].step == sc[pass].levels) { > >>>> + break; /* Can't find leaf PTE */ > >>>> + } > >>>> + > >>>> + /* Continue with G-Stage translation? */ > >>>> + if (!pass && en_g) { > >>>> + pass = G_STAGE; > >>>> + addr = base; > >>>> + base = gatp; > >>>> + sc[pass].step = 0; > >>>> + } > >>>> + } while (1); > >>>> + > >>>> + return (iotlb->perm & IOMMU_WO) ? > >>>> + (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS : > >>>> + RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) : > >>>> + (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS : > >>>> + RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S); > >>>> +} > >>>> + > >>>> +static void riscv_iommu_report_fault(RISCVIOMMUState *s, > >>>> + RISCVIOMMUContext *ctx, > >>>> + uint32_t fault_type, uint32_t > >>>> cause, > >>>> + bool pv, > >>>> + uint64_t iotval, uint64_t iotval2) > >>>> +{ > >>>> + struct riscv_iommu_fq_record ev = { 0 }; > >>>> + > >>>> + if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) { > >>>> + switch (cause) { > >>>> + case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED: > >>>> + case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT: > >>>> + case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID: > >>>> + case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED: > >>>> + case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED: > >>>> + case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR: > >>>> + case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT: > >>>> + break; > >>>> + default: > >>>> + /* DTF prevents reporting a fault for this given cause */ > >>>> + return; > >>>> + } > >>>> + } > >>>> + > >>>> + ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause); > >>>> + ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type); > >>>> + ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid); > >>>> + ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true); > >>>> + > >>>> + if (pv) { > >>>> + ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, > >>>> ctx->process_id); > >>>> + } > >>>> + > >>>> + ev.iotval = iotval; > >>>> + ev.iotval2 = iotval2; > >>>> + > >>>> + riscv_iommu_fault(s, &ev); > >>>> +} > >>>> + > >>>> +/* Redirect MSI write for given GPA. */ > >>>> +static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s, > >>>> + RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data, > >>>> + unsigned size, MemTxAttrs attrs) > >>>> +{ > >>>> + MemTxResult res; > >>>> + dma_addr_t addr; > >>>> + uint64_t intn; > >>>> + uint32_t n190; > >>>> + uint64_t pte[2]; > >>>> + int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; > >>>> + int cause; > >>>> + > >>>> + /* Interrupt File Number */ > >>>> + intn = _pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask); > >>>> + if (intn >= 256) { > >>>> + /* Interrupt file number out of range */ > >>>> + res = MEMTX_ACCESS_ERROR; > >>>> + cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; > >>>> + goto err; > >>>> + } > >>>> + > >>>> + /* fetch MSI PTE */ > >>>> + addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN)); > >>>> + addr = addr | (intn * sizeof(pte)); > >>>> + res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte), > >>>> + MEMTXATTRS_UNSPECIFIED); > >>>> + if (res != MEMTX_OK) { > >>>> + if (res == MEMTX_DECODE_ERROR) { > >>>> + cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED; > >>>> + } else { > >>>> + cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; > >>>> + } > >>>> + goto err; > >>>> + } > >>>> + > >>>> + le64_to_cpus(&pte[0]); > >>>> + le64_to_cpus(&pte[1]); > >>>> + > >>>> + if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & > >>>> RISCV_IOMMU_MSI_PTE_C)) { > >>>> + /* > >>>> + * The spec mentions that: "If msipte.C == 1, then further > >>>> + * processing to interpret the PTE is implementation > >>>> + * defined.". We'll abort with cause = 262 for this > >>>> + * case too. > >>>> + */ > >>>> + res = MEMTX_ACCESS_ERROR; > >>>> + cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID; > >>>> + goto err; > >>>> + } > >>>> + > >>>> + switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) { > >>>> + case RISCV_IOMMU_MSI_PTE_M_BASIC: > >>>> + /* MSI Pass-through mode */ > >>>> + addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN)); > >>>> + addr = addr | (gpa & TARGET_PAGE_MASK); > >>>> + > >>>> + trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), > >>>> + PCI_SLOT(ctx->devid), > >>>> PCI_FUNC(ctx->devid), > >>>> + gpa, addr); > >>>> + > >>>> + res = dma_memory_write(s->target_as, addr, &data, size, attrs); > >>>> + if (res != MEMTX_OK) { > >>>> + cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; > >>>> + goto err; > >>>> + } > >>>> + > >>>> + return MEMTX_OK; > >>>> + case RISCV_IOMMU_MSI_PTE_M_MRIF: > >>>> + /* MRIF mode, continue. */ > >>>> + break; > >>>> + default: > >>>> + res = MEMTX_ACCESS_ERROR; > >>>> + cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; > >>>> + goto err; > >>>> + } > >>>> + > >>>> + /* > >>>> + * Report an error for interrupt identities exceeding the maximum > >>>> allowed > >>>> + * for an IMSIC interrupt file (2047) or destination address is not > >>>> 32-bit > >>>> + * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables. > >>>> + */ > >>>> + if ((data > 2047) || (gpa & 3)) { > >>>> + res = MEMTX_ACCESS_ERROR; > >>>> + cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; > >>>> + goto err; > >>>> + } > >>>> + > >>>> + /* MSI MRIF mode, non atomic pending bit update */ > >>>> + > >>>> + /* MRIF pending bit address */ > >>>> + addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9; > >>>> + addr = addr | ((data & 0x7c0) >> 3); > >>>> + > >>>> + trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), > >>>> + PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), > >>>> + gpa, addr); > >>>> + > >>>> + /* MRIF pending bit mask */ > >>>> + data = 1ULL << (data & 0x03f); > >>>> + res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), > >>>> attrs); > >>>> + if (res != MEMTX_OK) { > >>>> + cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; > >>>> + goto err; > >>>> + } > >>>> + > >>>> + intn = intn | data; > >>>> + res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), > >>>> attrs); > >>>> + if (res != MEMTX_OK) { > >>>> + cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; > >>>> + goto err; > >>>> + } > >>>> + > >>>> + /* Get MRIF enable bits */ > >>>> + addr = addr + sizeof(intn); > >>>> + res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), > >>>> attrs); > >>>> + if (res != MEMTX_OK) { > >>>> + cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; > >>>> + goto err; > >>>> + } > >>>> + > >>>> + if (!(intn & data)) { > >>>> + /* notification disabled, MRIF update completed. */ > >>>> + return MEMTX_OK; > >>>> + } > >>>> + > >>>> + /* Send notification message */ > >>>> + addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN)); > >>>> + n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) | > >>>> + (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10); > >>>> + > >>>> + res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), > >>>> attrs); > >>>> + if (res != MEMTX_OK) { > >>>> + cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; > >>>> + goto err; > >>>> + } > >>>> + > >>>> + trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr); > >>>> + > >>>> + return MEMTX_OK; > >>>> + > >>>> +err: > >>>> + riscv_iommu_report_fault(s, ctx, fault_type, cause, > >>>> + !!ctx->process_id, 0, 0); > >>>> + return res; > >>>> +} > >>>> + > >>>> +/* > >>>> + * Check device context configuration as described by the > >>>> + * riscv-iommu spec section "Device-context configuration > >>>> + * checks". > >>>> + */ > >>>> +static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s, > >>>> + RISCVIOMMUContext *ctx) > >>>> +{ > >>>> + uint32_t fsc_mode, msi_mode; > >>>> + > >>>> + if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) && > >>>> + ctx->tc & RISCV_IOMMU_DC_TC_PRPR) { > >>>> + return false; > >>>> + } > >>>> + > >>>> + if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) && > >>>> + ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) { > >>>> + return false; > >>>> + } > >>>> + > >>>> + if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) { > >>>> + msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE); > >>>> + > >>>> + if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF && > >>>> + msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { > >>>> + return false; > >>>> + } > >>>> + } > >>>> + > >>>> + fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); > >>>> + > >>>> + if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) { > >>>> + switch (fsc_mode) { > >>>> + case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8: > >>>> + if (!(s->cap & RISCV_IOMMU_CAP_PD8)) { > >>>> + return false; > >>>> + } > >>>> + break; > >>>> + case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17: > >>>> + if (!(s->cap & RISCV_IOMMU_CAP_PD17)) { > >>>> + return false; > >>>> + } > >>>> + break; > >>>> + case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20: > >>>> + if (!(s->cap & RISCV_IOMMU_CAP_PD20)) { > >>>> + return false; > >>>> + } > >>>> + break; > >>>> + } > >>>> + } else { > >>>> + /* DC.tc.PDTV is 0 */ > >>>> + if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) { > >>>> + return false; > >>>> + } > >>>> + > >>>> + if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { > >>>> + if (fsc_mode == RISCV_IOMMU_CAP_SV32 && > >>>> + !(s->cap & RISCV_IOMMU_CAP_SV32)) { > >>>> + return false; > >>>> + } > >>>> + } else { > >>>> + switch (fsc_mode) { > >>>> + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: > >>>> + if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { > >>>> + return false; > >>>> + } > >>>> + break; > >>>> + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: > >>>> + if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { > >>>> + return false; > >>>> + } > >>>> + break; > >>>> + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: > >>>> + if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { > >>>> + return false; > >>>> + } > >>>> + break; > >>>> + } > >>>> + } > >>>> + } > >>>> + > >>>> + /* > >>>> + * CAP_END is always zero (only one endianess). FCTL_BE is > >>>> + * always zero (little-endian accesses). Thus TC_SBE must > >>>> + * always be LE, i.e. zero. > >>>> + */ > >>>> + if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) { > >>>> + return false; > >>>> + } > >>>> + > >>>> + return true; > >>>> +} > >>>> + > >>>> +/* > >>>> + * Validate process context (PC) according to section > >>>> + * "Process-context configuration checks". > >>>> + */ > >>>> +static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s, > >>>> + RISCVIOMMUContext *ctx) > >>>> +{ > >>>> + uint32_t mode; > >>>> + > >>>> + if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) { > >>>> + return false; > >>>> + } > >>>> + > >>>> + if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) { > >>>> + return false; > >>>> + } > >>>> + > >>>> + mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); > >>>> + switch (mode) { > >>>> + case RISCV_IOMMU_DC_FSC_MODE_BARE: > >>>> + /* sv39 and sv32 modes have the same value (8) */ > >>>> + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: > >>>> + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: > >>>> + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: > >>>> + break; > >>>> + default: > >>>> + return false; > >>>> + } > >>>> + > >>>> + if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { > >>>> + if (mode == RISCV_IOMMU_CAP_SV32 && > >>>> + !(s->cap & RISCV_IOMMU_CAP_SV32)) { > >>>> + return false; > >>>> + } > >>>> + } else { > >>>> + switch (mode) { > >>>> + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: > >>>> + if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { > >>>> + return false; > >>>> + } > >>>> + break; > >>>> + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: > >>>> + if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { > >>>> + return false; > >>>> + } > >>>> + break; > >>>> + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: > >>>> + if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { > >>>> + return false; > >>>> + } > >>>> + break; > >>>> + } > >>>> + } > >>>> + > >>>> + return true; > >>>> +} > >>>> + > >>>> +/* > >>>> + * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk > >>>> + * > >>>> + * @s : IOMMU Device State > >>>> + * @ctx : Device Translation Context with devid and process_id > >>>> set. > >>>> + * @return : success or fault code. > >>>> + */ > >>>> +static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext > >>>> *ctx) > >>>> +{ > >>>> + const uint64_t ddtp = s->ddtp; > >>>> + unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE); > >>>> + dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN)); > >>>> + struct riscv_iommu_dc dc; > >>>> + /* Device Context format: 0: extended (64 bytes) | 1: base (32 > >>>> bytes) */ > >>>> + const int dc_fmt = !s->enable_msi; > >>>> + const size_t dc_len = sizeof(dc) >> dc_fmt; > >>>> + unsigned depth; > >>>> + uint64_t de; > >>>> + > >>>> + switch (mode) { > >>>> + case RISCV_IOMMU_DDTP_MODE_OFF: > >>>> + return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED; > >>>> + > >>>> + case RISCV_IOMMU_DDTP_MODE_BARE: > >>>> + /* mock up pass-through translation context */ > >>>> + ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, > >>>> + RISCV_IOMMU_DC_IOHGATP_MODE_BARE); > >>>> + ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, > >>>> + RISCV_IOMMU_DC_FSC_MODE_BARE); > >>>> + ctx->tc = RISCV_IOMMU_DC_TC_V; > >>>> + ctx->ta = 0; > >>>> + ctx->msiptp = 0; > >>>> + return 0; > >>>> + > >>>> + case RISCV_IOMMU_DDTP_MODE_1LVL: > >>>> + depth = 0; > >>>> + break; > >>>> + > >>>> + case RISCV_IOMMU_DDTP_MODE_2LVL: > >>>> + depth = 1; > >>>> + break; > >>>> + > >>>> + case RISCV_IOMMU_DDTP_MODE_3LVL: > >>>> + depth = 2; > >>>> + break; > >>>> + > >>>> + default: > >>>> + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; > >>>> + } > >>>> + > >>>> + /* > >>>> + * Check supported device id width (in bits). > >>>> + * See IOMMU Specification, Chapter 6. Software guidelines. > >>>> + * - if extended device-context format is used: > >>>> + * 1LVL: 6, 2LVL: 15, 3LVL: 24 > >>>> + * - if base device-context format is used: > >>>> + * 1LVL: 7, 2LVL: 16, 3LVL: 24 > >>>> + */ > >>>> + if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) { > >>>> + return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; > >>>> + } > >>>> + > >>>> + /* Device directory tree walk */ > >>>> + for (; depth-- > 0; ) { > >>>> + /* > >>>> + * Select device id index bits based on device directory tree > >>>> level > >>>> + * and device context format. > >>>> + * See IOMMU Specification, Chapter 2. Data Structures. > >>>> + * - if extended device-context format is used: > >>>> + * device index: [23:15][14:6][5:0] > >>>> + * - if base device-context format is used: > >>>> + * device index: [23:16][15:7][6:0] > >>>> + */ > >>>> + const int split = depth * 9 + 6 + dc_fmt; > >>>> + addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK; > >>>> + if (dma_memory_read(s->target_as, addr, &de, sizeof(de), > >>>> + MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { > >>>> + return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; > >>>> + } > >>>> + le64_to_cpus(&de); > >>>> + if (!(de & RISCV_IOMMU_DDTE_VALID)) { > >>>> + /* invalid directory entry */ > >>>> + return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; > >>>> + } > >>>> + if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) { > >>>> + /* reserved bits set */ > >>>> + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; > >>>> + } > >>>> + addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN)); > >>>> + } > >>>> + > >>>> + /* index into device context entry page */ > >>>> + addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK; > >>>> + > >>>> + memset(&dc, 0, sizeof(dc)); > >>>> + if (dma_memory_read(s->target_as, addr, &dc, dc_len, > >>>> + MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { > >>>> + return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; > >>>> + } > >>>> + > >>>> + /* Set translation context. */ > >>>> + ctx->tc = le64_to_cpu(dc.tc); > >>>> + ctx->gatp = le64_to_cpu(dc.iohgatp); > >>>> + ctx->satp = le64_to_cpu(dc.fsc); > >>>> + ctx->ta = le64_to_cpu(dc.ta); > >>>> + ctx->msiptp = le64_to_cpu(dc.msiptp); > >>>> + ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask); > >>>> + ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern); > >>>> + > >>>> + if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) { > >>>> + return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; > >>>> + } > >>>> + > >>>> + if (!riscv_iommu_validate_device_ctx(s, ctx)) { > >>>> + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; > >>>> + } > >>>> + > >>>> + /* FSC field checks */ > >>>> + mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); > >>>> + addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN)); > >>>> + > >>>> + if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) { > >>>> + if (ctx->process_id != RISCV_IOMMU_NOPROCID) { > >>>> + /* PID is disabled */ > >>>> + return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; > >>>> + } > >>>> + if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) { > >>>> + /* Invalid translation mode */ > >>>> + return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; > >>>> + } > >>>> + return 0; > >>>> + } > >>>> + > >>>> + if (ctx->process_id == RISCV_IOMMU_NOPROCID) { > >>>> + if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) { > >>>> + /* No default process_id enabled, set BARE mode */ > >>>> + ctx->satp = 0ULL; > >>>> + return 0; > >>>> + } else { > >>>> + /* Use default process_id #0 */ > >>>> + ctx->process_id = 0; > >>>> + } > >>>> + } > >>>> + > >>>> + if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) { > >>>> + /* No S-Stage translation, done. */ > >>>> + return 0; > >>>> + } > >>>> + > >>>> + /* FSC.TC.PDTV enabled */ > >>>> + if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) { > >>>> + /* Invalid PDTP.MODE */ > >>>> + return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; > >>>> + } > >>>> + > >>>> + for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; > >>>> ) { > >>>> + /* > >>>> + * Select process id index bits based on process directory tree > >>>> + * level. See IOMMU Specification, 2.2. Process-Directory-Table. > >>>> + */ > >>>> + const int split = depth * 9 + 8; > >>>> + addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK; > >>>> + if (dma_memory_read(s->target_as, addr, &de, sizeof(de), > >>>> + MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { > >>>> + return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; > >>>> + } > >>>> + le64_to_cpus(&de); > >>>> + if (!(de & RISCV_IOMMU_PC_TA_V)) { > >>>> + return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; > >>>> + } > >>>> + addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PC_FSC_PPN)); > >>>> + } > >>>> + > >>>> + /* Leaf entry in PDT */ > >>>> + addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK; > >>>> + if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * > >>>> 2, > >>>> + MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { > >>>> + return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; > >>>> + } > >>>> + > >>>> + /* Use FSC and TA from process directory entry. */ > >>>> + ctx->ta = le64_to_cpu(dc.ta); > >>>> + ctx->satp = le64_to_cpu(dc.fsc); > >>>> + > >>>> + if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) { > >>>> + return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; > >>>> + } > >>>> + > >>>> + if (!riscv_iommu_validate_process_ctx(s, ctx)) { > >>>> + return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; > >>>> + } > >>>> + > >>>> + return 0; > >>>> +} > >>>> + > >>>> +/* Translation Context cache support */ > >>>> +static gboolean __ctx_equal(gconstpointer v1, gconstpointer v2) > >>>> +{ > >>>> + RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1; > >>>> + RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2; > >>>> + return c1->devid == c2->devid && > >>>> + c1->process_id == c2->process_id; > >>>> +} > >>>> + > >>>> +static guint __ctx_hash(gconstpointer v) > >>>> +{ > >>>> + RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v; > >>>> + /* > >>>> + * Generate simple hash of (process_id, devid) > >>>> + * assuming 24-bit wide devid. > >>>> + */ > >>>> + return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24); > >>>> +} > >>>> + > >>>> +static void __ctx_inval_devid_procid(gpointer key, gpointer value, > >>>> + gpointer data) > >>>> +{ > >>>> + RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; > >>>> + RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; > >>>> + if (ctx->tc & RISCV_IOMMU_DC_TC_V && > >>>> + ctx->devid == arg->devid && > >>>> + ctx->process_id == arg->process_id) { > >>>> + ctx->tc &= ~RISCV_IOMMU_DC_TC_V; > >>>> + } > >>>> +} > >>>> + > >>>> +static void __ctx_inval_devid(gpointer key, gpointer value, gpointer > >>>> data) > >>>> +{ > >>>> + RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; > >>>> + RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; > >>>> + if (ctx->tc & RISCV_IOMMU_DC_TC_V && > >>>> + ctx->devid == arg->devid) { > >>>> + ctx->tc &= ~RISCV_IOMMU_DC_TC_V; > >>>> + } > >>>> +} > >>>> + > >>>> +static void __ctx_inval_all(gpointer key, gpointer value, gpointer data) > >>>> +{ > >>>> + RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; > >>>> + if (ctx->tc & RISCV_IOMMU_DC_TC_V) { > >>>> + ctx->tc &= ~RISCV_IOMMU_DC_TC_V; > >>>> + } > >>>> +} > >>>> + > >>>> +static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func, > >>>> + uint32_t devid, uint32_t process_id) > >>>> +{ > >>>> + GHashTable *ctx_cache; > >>>> + RISCVIOMMUContext key = { > >>>> + .devid = devid, > >>>> + .process_id = process_id, > >>>> + }; > >>>> + ctx_cache = g_hash_table_ref(s->ctx_cache); > >>>> + qemu_mutex_lock(&s->ctx_lock); > >>>> + g_hash_table_foreach(ctx_cache, func, &key); > >>>> + qemu_mutex_unlock(&s->ctx_lock); > >>>> + g_hash_table_unref(ctx_cache); > >>>> +} > >>>> + > >>>> +/* Find or allocate translation context for a given {device_id, > >>>> process_id} */ > >>>> +static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s, > >>>> + unsigned devid, unsigned > >>>> process_id, > >>>> + void **ref) > >>>> +{ > >>>> + GHashTable *ctx_cache; > >>>> + RISCVIOMMUContext *ctx; > >>>> + RISCVIOMMUContext key = { > >>>> + .devid = devid, > >>>> + .process_id = process_id, > >>>> + }; > >>>> + > >>>> + ctx_cache = g_hash_table_ref(s->ctx_cache); > >>>> + qemu_mutex_lock(&s->ctx_lock); > >>>> + ctx = g_hash_table_lookup(ctx_cache, &key); > >>>> + qemu_mutex_unlock(&s->ctx_lock); > >>>> + > >>>> + if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) { > >>>> + *ref = ctx_cache; > >>>> + return ctx; > >>>> + } > >>>> + > >>>> + ctx = g_new0(RISCVIOMMUContext, 1); > >>>> + ctx->devid = devid; > >>>> + ctx->process_id = process_id; > >>>> + > >>>> + int fault = riscv_iommu_ctx_fetch(s, ctx); > >>>> + if (!fault) { > >>>> + qemu_mutex_lock(&s->ctx_lock); > >>>> + if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) { > >>>> + g_hash_table_unref(ctx_cache); > >>>> + ctx_cache = g_hash_table_new_full(__ctx_hash, __ctx_equal, > >>>> + g_free, NULL); > >>>> + g_hash_table_ref(ctx_cache); > >>>> + g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache)); > >>>> + } > >>>> + g_hash_table_add(ctx_cache, ctx); > >>>> + qemu_mutex_unlock(&s->ctx_lock); > >>>> + *ref = ctx_cache; > >>>> + return ctx; > >>>> + } > >>>> + > >>>> + g_hash_table_unref(ctx_cache); > >>>> + *ref = NULL; > >>>> + > >>>> + riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD, > >>>> + fault, !!process_id, 0, 0); > >>>> + > >>>> + g_free(ctx); > >>>> + return NULL; > >>>> +} > >>>> + > >>>> +static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref) > >>>> +{ > >>>> + if (ref) { > >>>> + g_hash_table_unref((GHashTable *)ref); > >>>> + } > >>>> +} > >>>> + > >>>> +/* Find or allocate address space for a given device */ > >>>> +static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t > >>>> devid) > >>>> +{ > >>>> + RISCVIOMMUSpace *as; > >>>> + > >>>> + /* FIXME: PCIe bus remapping for attached endpoints. */ > >>>> + devid |= s->bus << 8; > >>>> + > >>>> + qemu_mutex_lock(&s->core_lock); > >>>> + QLIST_FOREACH(as, &s->spaces, list) { > >>>> + if (as->devid == devid) { > >>>> + break; > >>>> + } > >>>> + } > >>>> + qemu_mutex_unlock(&s->core_lock); > >>>> + > >>>> + if (as == NULL) { > >>>> + char name[64]; > >>>> + as = g_new0(RISCVIOMMUSpace, 1); > >>>> + > >>>> + as->iommu = s; > >>>> + as->devid = devid; > >>>> + > >>>> + snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova", > >>>> + PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), > >>>> PCI_FUNC(as->devid)); > >>>> + > >>>> + /* IOVA address space, untranslated addresses */ > >>>> + memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr), > >>>> + TYPE_RISCV_IOMMU_MEMORY_REGION, > >>>> + OBJECT(as), "riscv_iommu", UINT64_MAX); > >>>> + address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), > >>>> name); > >>>> + > >>>> + qemu_mutex_lock(&s->core_lock); > >>>> + QLIST_INSERT_HEAD(&s->spaces, as, list); > >>>> + qemu_mutex_unlock(&s->core_lock); > >>>> + > >>>> + trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid), > >>>> + PCI_SLOT(as->devid), PCI_FUNC(as->devid)); > >>>> + } > >>>> + return &as->iova_as; > >>>> +} > >>>> + > >>>> +static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext > >>>> *ctx, > >>>> + IOMMUTLBEntry *iotlb) > >>>> +{ > >>>> + bool enable_pid; > >>>> + bool enable_pri; > >>>> + int fault; > >>>> + > >>>> + /* > >>>> + * TC[32] is reserved for custom extensions, used here to > >>>> temporarily > >>>> + * enable automatic page-request generation for ATS queries. > >>>> + */ > >>>> + enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32)); > >>>> + enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV); > >>>> + > >>>> + /* Translate using device directory / page table information. */ > >>>> + fault = riscv_iommu_spa_fetch(s, ctx, iotlb); > >>>> + > >>>> + if (enable_pri && fault) { > >>>> + struct riscv_iommu_pq_record pr = {0}; > >>>> + if (enable_pid) { > >>>> + pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV, > >>>> + RISCV_IOMMU_PREQ_HDR_PID, > >>>> ctx->process_id); > >>>> + } > >>>> + pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, > >>>> ctx->devid); > >>>> + pr.payload = (iotlb->iova & TARGET_PAGE_MASK) | > >>>> + RISCV_IOMMU_PREQ_PAYLOAD_M; > >>>> + riscv_iommu_pri(s, &pr); > >>>> + return fault; > >>>> + } > >>>> + > >>>> + if (fault) { > >>>> + unsigned ttype; > >>>> + > >>>> + if (iotlb->perm & IOMMU_RW) { > >>>> + ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; > >>>> + } else { > >>>> + ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD; > >>>> + } > >>>> + > >>>> + riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid, > >>>> + iotlb->iova, iotlb->translated_addr); > >>>> + return fault; > >>>> + } > >>>> + > >>>> + return 0; > >>>> +} > >>>> + > >>>> +/* IOMMU Command Interface */ > >>>> +static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify, > >>>> + uint64_t addr, uint32_t data) > >>>> +{ > >>>> + /* > >>>> + * ATS processing in this implementation of the IOMMU is > >>>> synchronous, > >>>> + * no need to wait for completions here. > >>>> + */ > >>>> + if (!notify) { > >>>> + return MEMTX_OK; > >>>> + } > >>>> + > >>>> + return dma_memory_write(s->target_as, addr, &data, sizeof(data), > >>>> + MEMTXATTRS_UNSPECIFIED); > >>>> +} > >>>> + > >>>> +static void riscv_iommu_process_ddtp(RISCVIOMMUState *s) > >>>> +{ > >>>> + uint64_t old_ddtp = s->ddtp; > >>>> + uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP); > >>>> + unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE); > >>>> + unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE); > >>>> + bool ok = false; > >>>> + > >>>> + /* > >>>> + * Check for allowed DDTP.MODE transitions: > >>>> + * {OFF, BARE} -> {OFF, BARE, 1LVL, 2LVL, 3LVL} > >>>> + * {1LVL, 2LVL, 3LVL} -> {OFF, BARE} > >>>> + */ > >>>> + if (new_mode == old_mode || > >>>> + new_mode == RISCV_IOMMU_DDTP_MODE_OFF || > >>>> + new_mode == RISCV_IOMMU_DDTP_MODE_BARE) { > >>>> + ok = true; > >>>> + } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL || > >>>> + new_mode == RISCV_IOMMU_DDTP_MODE_2LVL || > >>>> + new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) { > >>>> + ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF || > >>>> + old_mode == RISCV_IOMMU_DDTP_MODE_BARE; > >>>> + } > >>>> + > >>>> + if (ok) { > >>>> + /* clear reserved and busy bits, report back sanitized version > >>>> */ > >>>> + new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN, > >>>> + RISCV_IOMMU_DDTP_MODE, new_mode); > >>>> + } else { > >>>> + new_ddtp = old_ddtp; > >>>> + } > >>>> + s->ddtp = new_ddtp; > >>>> + > >>>> + riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp); > >>>> +} > >>>> + > >>>> +/* Command function and opcode field. */ > >>>> +#define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op)) > >>>> + > >>>> +static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s) > >>>> +{ > >>>> + struct riscv_iommu_command cmd; > >>>> + MemTxResult res; > >>>> + dma_addr_t addr; > >>>> + uint32_t tail, head, ctrl; > >>>> + uint64_t cmd_opcode; > >>>> + GHFunc func; > >>>> + > >>>> + ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); > >>>> + tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask; > >>>> + head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask; > >>>> + > >>>> + /* Check for pending error or queue processing disabled */ > >>>> + if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) || > >>>> + !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | > >>>> RISCV_IOMMU_CQCSR_CQMF))) { > >>>> + return; > >>>> + } > >>>> + > >>>> + while (tail != head) { > >>>> + addr = s->cq_addr + head * sizeof(cmd); > >>>> + res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd), > >>>> + MEMTXATTRS_UNSPECIFIED); > >>>> + > >>>> + if (res != MEMTX_OK) { > >>>> + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, > >>>> + RISCV_IOMMU_CQCSR_CQMF, 0); > >>>> + goto fault; > >>>> + } > >>>> + > >>>> + trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1); > >>>> + > >>>> + cmd_opcode = get_field(cmd.dword0, > >>>> + RISCV_IOMMU_CMD_OPCODE | > >>>> RISCV_IOMMU_CMD_FUNC); > >>>> + > >>>> + switch (cmd_opcode) { > >>>> + case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C, > >>>> + RISCV_IOMMU_CMD_IOFENCE_OPCODE): > >>>> + res = riscv_iommu_iofence(s, > >>>> + cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1, > >>>> + get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA)); > >>>> + > >>>> + if (res != MEMTX_OK) { > >>>> + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, > >>>> + RISCV_IOMMU_CQCSR_CQMF, 0); > >>>> + goto fault; > >>>> + } > >>>> + break; > >>>> + > >>>> + case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA, > >>>> + RISCV_IOMMU_CMD_IOTINVAL_OPCODE): > >>>> + if (cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV) { > >>>> + /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 > >>>> */ > >>>> + goto cmd_ill; > >>>> + } > >>>> + /* translation cache not implemented yet */ > >>>> + break; > >>>> + > >>>> + case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA, > >>>> + RISCV_IOMMU_CMD_IOTINVAL_OPCODE): > >>>> + /* translation cache not implemented yet */ > >>>> + break; > >>>> + > >>>> + case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT, > >>>> + RISCV_IOMMU_CMD_IODIR_OPCODE): > >>>> + if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { > >>>> + /* invalidate all device context cache mappings */ > >>>> + func = __ctx_inval_all; > >>>> + } else { > >>>> + /* invalidate all device context matching DID */ > >>>> + func = __ctx_inval_devid; > >>>> + } > >>>> + riscv_iommu_ctx_inval(s, func, > >>>> + get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0); > >>>> + break; > >>>> + > >>>> + case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT, > >>>> + RISCV_IOMMU_CMD_IODIR_OPCODE): > >>>> + if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { > >>>> + /* illegal command arguments IODIR_PDT & DV == 0 */ > >>>> + goto cmd_ill; > >>>> + } else { > >>>> + func = __ctx_inval_devid_procid; > >>>> + } > >>>> + riscv_iommu_ctx_inval(s, func, > >>>> + get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), > >>>> + get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID)); > >>>> + break; > >>>> + > >>>> + default: > >>>> + cmd_ill: > >>>> + /* Invalid instruction, do not advance instruction index. */ > >>>> + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, > >>>> + RISCV_IOMMU_CQCSR_CMD_ILL, 0); > >>>> + goto fault; > >>>> + } > >>>> + > >>>> + /* Advance and update head pointer after command completes. */ > >>>> + head = (head + 1) & s->cq_mask; > >>>> + riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head); > >>>> + } > >>>> + return; > >>>> + > >>>> +fault: > >>>> + if (ctrl & RISCV_IOMMU_CQCSR_CIE) { > >>>> + riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ); > >>>> + } > >>>> +} > >>>> + > >>>> +static void riscv_iommu_process_cq_control(RISCVIOMMUState *s) > >>>> +{ > >>>> + uint64_t base; > >>>> + uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); > >>>> + uint32_t ctrl_clr; > >>>> + bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN); > >>>> + bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON); > >>>> + > >>>> + if (enable && !active) { > >>>> + base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB); > >>>> + s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) > >>>> - 1; > >>>> + s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN)); > >>>> + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask); > >>>> + stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0); > >>>> + stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0); > >>>> + ctrl_set = RISCV_IOMMU_CQCSR_CQON; > >>>> + ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF | > >>>> + RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO > >>>> | > >>>> + RISCV_IOMMU_CQCSR_FENCE_W_IP; > >>>> + } else if (!enable && active) { > >>>> + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0); > >>>> + ctrl_set = 0; > >>>> + ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON; > >>>> + } else { > >>>> + ctrl_set = 0; > >>>> + ctrl_clr = RISCV_IOMMU_CQCSR_BUSY; > >>>> + } > >>>> + > >>>> + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr); > >>>> +} > >>>> + > >>>> +static void riscv_iommu_process_fq_control(RISCVIOMMUState *s) > >>>> +{ > >>>> + uint64_t base; > >>>> + uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); > >>>> + uint32_t ctrl_clr; > >>>> + bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN); > >>>> + bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON); > >>>> + > >>>> + if (enable && !active) { > >>>> + base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB); > >>>> + s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) > >>>> - 1; > >>>> + s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN)); > >>>> + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask); > >>>> + stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0); > >>>> + stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0); > >>>> + ctrl_set = RISCV_IOMMU_FQCSR_FQON; > >>>> + ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF | > >>>> + RISCV_IOMMU_FQCSR_FQOF; > >>>> + } else if (!enable && active) { > >>>> + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0); > >>>> + ctrl_set = 0; > >>>> + ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON; > >>>> + } else { > >>>> + ctrl_set = 0; > >>>> + ctrl_clr = RISCV_IOMMU_FQCSR_BUSY; > >>>> + } > >>>> + > >>>> + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr); > >>>> +} > >>>> + > >>>> +static void riscv_iommu_process_pq_control(RISCVIOMMUState *s) > >>>> +{ > >>>> + uint64_t base; > >>>> + uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); > >>>> + uint32_t ctrl_clr; > >>>> + bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN); > >>>> + bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON); > >>>> + > >>>> + if (enable && !active) { > >>>> + base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB); > >>>> + s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) > >>>> - 1; > >>>> + s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN)); > >>>> + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask); > >>>> + stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0); > >>>> + stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0); > >>>> + ctrl_set = RISCV_IOMMU_PQCSR_PQON; > >>>> + ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF | > >>>> + RISCV_IOMMU_PQCSR_PQOF; > >>>> + } else if (!enable && active) { > >>>> + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0); > >>>> + ctrl_set = 0; > >>>> + ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON; > >>>> + } else { > >>>> + ctrl_set = 0; > >>>> + ctrl_clr = RISCV_IOMMU_PQCSR_BUSY; > >>>> + } > >>>> + > >>>> + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr); > >>>> +} > >>>> + > >>>> +static void riscv_iommu_process_icvec_update(RISCVIOMMUState *s) > >>>> +{ > >>>> + uint32_t icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC); > >>>> + > >>>> + s->icvec_update(s, > >>>> + riscv_iommu_get_icvec_vector(icvec, > >>>> RISCV_IOMMU_INTR_CQ), > >>>> + riscv_iommu_get_icvec_vector(icvec, > >>>> RISCV_IOMMU_INTR_FQ), > >>>> + riscv_iommu_get_icvec_vector(icvec, > >>>> RISCV_IOMMU_INTR_PM), > >>>> + riscv_iommu_get_icvec_vector(icvec, > >>>> RISCV_IOMMU_INTR_PQ)); > >>>> +} > >>>> + > >>>> +typedef void riscv_iommu_process_fn(RISCVIOMMUState *s); > >>>> + > >>>> +static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data) > >>>> +{ > >>>> + uint32_t cqcsr, fqcsr, pqcsr; > >>>> + uint32_t ipsr_set = 0; > >>>> + uint32_t ipsr_clr = 0; > >>>> + > >>>> + if (data & RISCV_IOMMU_IPSR_CIP) { > >>>> + cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); > >>>> + > >>>> + if (cqcsr & RISCV_IOMMU_CQCSR_CIE && > >>>> + (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP || > >>>> + cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL || > >>>> + cqcsr & RISCV_IOMMU_CQCSR_CMD_TO || > >>>> + cqcsr & RISCV_IOMMU_CQCSR_CQMF)) { > >>>> + ipsr_set |= RISCV_IOMMU_IPSR_CIP; > >>>> + } else { > >>>> + ipsr_clr |= RISCV_IOMMU_IPSR_CIP; > >>>> + } > >>>> + } else { > >>>> + ipsr_clr |= RISCV_IOMMU_IPSR_CIP; > >>>> + } > >>>> + > >>>> + if (data & RISCV_IOMMU_IPSR_FIP) { > >>>> + fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); > >>>> + > >>>> + if (fqcsr & RISCV_IOMMU_FQCSR_FIE && > >>>> + (fqcsr & RISCV_IOMMU_FQCSR_FQOF || > >>>> + fqcsr & RISCV_IOMMU_FQCSR_FQMF)) { > >>>> + ipsr_set |= RISCV_IOMMU_IPSR_FIP; > >>>> + } else { > >>>> + ipsr_clr |= RISCV_IOMMU_IPSR_FIP; > >>>> + } > >>>> + } else { > >>>> + ipsr_clr |= RISCV_IOMMU_IPSR_FIP; > >>>> + } > >>>> + > >>>> + if (data & RISCV_IOMMU_IPSR_PIP) { > >>>> + pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); > >>>> + > >>>> + if (pqcsr & RISCV_IOMMU_PQCSR_PIE && > >>>> + (pqcsr & RISCV_IOMMU_PQCSR_PQOF || > >>>> + pqcsr & RISCV_IOMMU_PQCSR_PQMF)) { > >>>> + ipsr_set |= RISCV_IOMMU_IPSR_PIP; > >>>> + } else { > >>>> + ipsr_clr |= RISCV_IOMMU_IPSR_PIP; > >>>> + } > >>>> + } else { > >>>> + ipsr_clr |= RISCV_IOMMU_IPSR_PIP; > >>>> + } > >>>> + > >>>> + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr); > >>>> +} > >>>> + > >>>> +static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr, > >>>> + uint64_t data, unsigned size, MemTxAttrs attrs) > >>>> +{ > >>>> + riscv_iommu_process_fn *process_fn = NULL; > >>>> + RISCVIOMMUState *s = opaque; > >>>> + uint32_t regb = addr & ~3; > >>>> + uint32_t busy = 0; > >>>> + uint64_t val = 0; > >>>> + > >>>> + if ((addr & (size - 1)) != 0) { > >>>> + /* Unsupported MMIO alignment or access size */ > >>>> + return MEMTX_ERROR; > >>>> + } > >>>> + > >>>> + if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { > >>>> + /* Unsupported MMIO access location. */ > >>>> + return MEMTX_ACCESS_ERROR; > >>>> + } > >>>> + > >>>> + /* Track actionable MMIO write. */ > >>>> + switch (regb) { > >>>> + case RISCV_IOMMU_REG_DDTP: > >>>> + case RISCV_IOMMU_REG_DDTP + 4: > >>>> + process_fn = riscv_iommu_process_ddtp; > >>>> + regb = RISCV_IOMMU_REG_DDTP; > >>>> + busy = RISCV_IOMMU_DDTP_BUSY; > >>>> + break; > >>>> + > >>>> + case RISCV_IOMMU_REG_CQT: > >>>> + process_fn = riscv_iommu_process_cq_tail; > >>>> + break; > >>>> + > >>>> + case RISCV_IOMMU_REG_CQCSR: > >>>> + process_fn = riscv_iommu_process_cq_control; > >>>> + busy = RISCV_IOMMU_CQCSR_BUSY; > >>>> + break; > >>>> + > >>>> + case RISCV_IOMMU_REG_FQCSR: > >>>> + process_fn = riscv_iommu_process_fq_control; > >>>> + busy = RISCV_IOMMU_FQCSR_BUSY; > >>>> + break; > >>>> + > >>>> + case RISCV_IOMMU_REG_PQCSR: > >>>> + process_fn = riscv_iommu_process_pq_control; > >>>> + busy = RISCV_IOMMU_PQCSR_BUSY; > >>>> + break; > >>>> + > >>>> + case RISCV_IOMMU_REG_ICVEC: > >>>> + process_fn = riscv_iommu_process_icvec_update; > >>>> + break; > >>>> + > >>>> + case RISCV_IOMMU_REG_IPSR: > >>>> + /* > >>>> + * IPSR has special procedures to update. Execute it > >>>> + * and exit. > >>>> + */ > >>>> + if (size == 4) { > >>>> + uint32_t ro = ldl_le_p(&s->regs_ro[addr]); > >>>> + uint32_t wc = ldl_le_p(&s->regs_wc[addr]); > >>>> + uint32_t rw = ldl_le_p(&s->regs_rw[addr]); > >>>> + stl_le_p(&val, ((rw & ro) | (data & ~ro)) & ~(data & wc)); > >>>> + } else if (size == 8) { > >>>> + uint64_t ro = ldq_le_p(&s->regs_ro[addr]); > >>>> + uint64_t wc = ldq_le_p(&s->regs_wc[addr]); > >>>> + uint64_t rw = ldq_le_p(&s->regs_rw[addr]); > >>>> + stq_le_p(&val, ((rw & ro) | (data & ~ro)) & ~(data & wc)); > >>>> + } > >>>> + > >>>> + riscv_iommu_update_ipsr(s, val); > >>>> + > >>>> + return MEMTX_OK; > >>>> + > >>>> + default: > >>>> + break; > >>>> + } > >>>> + > >>>> + /* > >>>> + * Registers update might be not synchronized with core logic. > >>>> + * If system software updates register when relevant BUSY bit > >>>> + * is set IOMMU behavior of additional writes to the register > >>>> + * is UNSPECIFIED. > >>>> + */ > >>>> + qemu_spin_lock(&s->regs_lock); > >>>> + if (size == 1) { > >>>> + uint8_t ro = s->regs_ro[addr]; > >>>> + uint8_t wc = s->regs_wc[addr]; > >>>> + uint8_t rw = s->regs_rw[addr]; > >>>> + s->regs_rw[addr] = ((rw & ro) | (data & ~ro)) & ~(data & wc); > >>>> + } else if (size == 2) { > >>>> + uint16_t ro = lduw_le_p(&s->regs_ro[addr]); > >>>> + uint16_t wc = lduw_le_p(&s->regs_wc[addr]); > >>>> + uint16_t rw = lduw_le_p(&s->regs_rw[addr]); > >>>> + stw_le_p(&s->regs_rw[addr], ((rw & ro) | (data & ~ro)) & ~(data > >>>> & wc)); > >>>> + } else if (size == 4) { > >>>> + uint32_t ro = ldl_le_p(&s->regs_ro[addr]); > >>>> + uint32_t wc = ldl_le_p(&s->regs_wc[addr]); > >>>> + uint32_t rw = ldl_le_p(&s->regs_rw[addr]); > >>>> + stl_le_p(&s->regs_rw[addr], ((rw & ro) | (data & ~ro)) & ~(data > >>>> & wc)); > >>>> + } else if (size == 8) { > >>>> + uint64_t ro = ldq_le_p(&s->regs_ro[addr]); > >>>> + uint64_t wc = ldq_le_p(&s->regs_wc[addr]); > >>>> + uint64_t rw = ldq_le_p(&s->regs_rw[addr]); > >>>> + stq_le_p(&s->regs_rw[addr], ((rw & ro) | (data & ~ro)) & ~(data > >>>> & wc)); > >>>> + } > >>>> + > >>>> + /* Busy flag update, MSB 4-byte register. */ > >>>> + if (busy) { > >>>> + uint32_t rw = ldl_le_p(&s->regs_rw[regb]); > >>>> + stl_le_p(&s->regs_rw[regb], rw | busy); > >>>> + } > >>>> + qemu_spin_unlock(&s->regs_lock); > >>>> + > >>>> + if (process_fn) { > >>>> + qemu_mutex_lock(&s->core_lock); > >>>> + process_fn(s); > >>>> + qemu_mutex_unlock(&s->core_lock); > >>>> + } > >>>> + > >>>> + return MEMTX_OK; > >>>> +} > >>>> + > >>>> +static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr, > >>>> + uint64_t *data, unsigned size, MemTxAttrs attrs) > >>>> +{ > >>>> + RISCVIOMMUState *s = opaque; > >>>> + uint64_t val = -1; > >>>> + uint8_t *ptr; > >>>> + > >>>> + if ((addr & (size - 1)) != 0) { > >>>> + /* Unsupported MMIO alignment. */ > >>>> + return MEMTX_ERROR; > >>>> + } > >>>> + > >>>> + if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { > >>>> + return MEMTX_ACCESS_ERROR; > >>>> + } > >>>> + > >>>> + ptr = &s->regs_rw[addr]; > >>>> + > >>>> + if (size == 1) { > >>>> + val = (uint64_t)*ptr; > >>>> + } else if (size == 2) { > >>>> + val = lduw_le_p(ptr); > >>>> + } else if (size == 4) { > >>>> + val = ldl_le_p(ptr); > >>>> + } else if (size == 8) { > >>>> + val = ldq_le_p(ptr); > >>>> + } else { > >>>> + return MEMTX_ERROR; > >>>> + } > >>>> + > >>>> + *data = val; > >>>> + > >>>> + return MEMTX_OK; > >>>> +} > >>>> + > >>>> +static const MemoryRegionOps riscv_iommu_mmio_ops = { > >>>> + .read_with_attrs = riscv_iommu_mmio_read, > >>>> + .write_with_attrs = riscv_iommu_mmio_write, > >>>> + .endianness = DEVICE_NATIVE_ENDIAN, > >>>> + .impl = { > >>>> + .min_access_size = 4, > >>>> + .max_access_size = 8, > >>>> + .unaligned = false, > >>>> + }, > >>>> + .valid = { > >>>> + .min_access_size = 4, > >>>> + .max_access_size = 8, > >>>> + } > >>>> +}; > >>>> + > >>>> +/* > >>>> + * Translations matching MSI pattern check are redirected to > >>>> "riscv-iommu-trap" > >>>> + * memory region as untranslated address, for additional MSI/MRIF > >>>> interception > >>>> + * by IOMMU interrupt remapping implementation. > >>>> + * Note: Device emulation code generating an MSI is expected to provide > >>>> a valid > >>>> + * memory transaction attributes with requested_id set. > >>>> + */ > >>>> +static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr, > >>>> + uint64_t data, unsigned size, MemTxAttrs attrs) > >>>> +{ > >>>> + RISCVIOMMUState* s = (RISCVIOMMUState *)opaque; > >>>> + RISCVIOMMUContext *ctx; > >>>> + MemTxResult res; > >>>> + void *ref; > >>>> + uint32_t devid = attrs.requester_id; > >>>> + > >>>> + if (attrs.unspecified) { > >>>> + return MEMTX_ACCESS_ERROR; > >>>> + } > >>>> + > >>>> + /* FIXME: PCIe bus remapping for attached endpoints. */ > >>>> + devid |= s->bus << 8; > >>>> + > >>>> + ctx = riscv_iommu_ctx(s, devid, 0, &ref); > >>>> + if (ctx == NULL) { > >>>> + res = MEMTX_ACCESS_ERROR; > >>>> + } else { > >>>> + res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs); > >>>> + } > >>>> + riscv_iommu_ctx_put(s, ref); > >>>> + return res; > >>>> +} > >>>> + > >>>> +static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr, > >>>> + uint64_t *data, unsigned size, MemTxAttrs attrs) > >>>> +{ > >>>> + return MEMTX_ACCESS_ERROR; > >>>> +} > >>>> + > >>>> +static const MemoryRegionOps riscv_iommu_trap_ops = { > >>>> + .read_with_attrs = riscv_iommu_trap_read, > >>>> + .write_with_attrs = riscv_iommu_trap_write, > >>>> + .endianness = DEVICE_LITTLE_ENDIAN, > >>>> + .impl = { > >>>> + .min_access_size = 4, > >>>> + .max_access_size = 8, > >>>> + .unaligned = true, > >>>> + }, > >>>> + .valid = { > >>>> + .min_access_size = 4, > >>>> + .max_access_size = 8, > >>>> + } > >>>> +}; > >>>> + > >>>> +static void riscv_iommu_realize(DeviceState *dev, Error **errp) > >>>> +{ > >>>> + RISCVIOMMUState *s = RISCV_IOMMU(dev); > >>>> + > >>>> + s->cap = s->version & RISCV_IOMMU_CAP_VERSION; > >>>> + if (s->enable_msi) { > >>>> + s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF; > >>>> + } > >>>> + if (s->enable_s_stage) { > >>>> + s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 | > >>>> + RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57; > >>>> + } > >>>> + if (s->enable_g_stage) { > >>>> + s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 | > >>>> + RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4; > >>>> + } > >>>> + /* Report QEMU target physical address space limits */ > >>>> + s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS, > >>>> + TARGET_PHYS_ADDR_SPACE_BITS); > >>>> + > >>>> + /* TODO: method to report supported PID bits */ > >>>> + s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */ > >>>> + s->cap |= RISCV_IOMMU_CAP_PD8; > >>>> + > >>>> + /* Out-of-reset translation mode: OFF (DMA disabled) BARE > >>>> (passthrough) */ > >>>> + s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ? > >>>> + RISCV_IOMMU_DDTP_MODE_OFF : > >>>> RISCV_IOMMU_DDTP_MODE_BARE); > >>>> + > >>>> + /* register storage */ > >>>> + s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); > >>>> + s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); > >>>> + s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); > >>>> + > >>>> + /* Mark all registers read-only */ > >>>> + memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE); > >>>> + > >>>> + /* > >>>> + * Register complete MMIO space, including MSI/PBA registers. > >>>> + * Note, PCIDevice implementation will add overlapping MR for > >>>> MSI/PBA, > >>>> + * managed directly by the PCIDevice implementation. > >>>> + */ > >>>> + memory_region_init_io(&s->regs_mr, OBJECT(dev), > >>>> &riscv_iommu_mmio_ops, s, > >>>> + "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE); > >>>> + > >>>> + /* Set power-on register state */ > >>>> + stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap); > >>>> + stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0); > >>>> + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL], > >>>> + ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI)); > >>>> + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP], > >>>> + ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE)); > >>>> + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB], > >>>> + ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN)); > >>>> + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB], > >>>> + ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN)); > >>>> + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB], > >>>> + ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN)); > >>>> + stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF > >>>> | > >>>> + RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL); > >>>> + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON > >>>> | > >>>> + RISCV_IOMMU_CQCSR_BUSY); > >>>> + stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF > >>>> | > >>>> + RISCV_IOMMU_FQCSR_FQOF); > >>>> + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON > >>>> | > >>>> + RISCV_IOMMU_FQCSR_BUSY); > >>>> + stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF > >>>> | > >>>> + RISCV_IOMMU_PQCSR_PQOF); > >>>> + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON > >>>> | > >>>> + RISCV_IOMMU_PQCSR_BUSY); > >>>> + stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0); > >>>> + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0); > >>>> + stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp); > >>>> + > >>>> + /* Memory region for downstream access, if specified. */ > >>>> + if (s->target_mr) { > >>>> + s->target_as = g_new0(AddressSpace, 1); > >>>> + address_space_init(s->target_as, s->target_mr, > >>>> + "riscv-iommu-downstream"); > >>>> + } else { > >>>> + /* Fallback to global system memory. */ > >>>> + s->target_as = &address_space_memory; > >>>> + } > >>>> + > >>>> + /* Memory region for untranslated MRIF/MSI writes */ > >>>> + memory_region_init_io(&s->trap_mr, OBJECT(dev), > >>>> &riscv_iommu_trap_ops, s, > >>>> + "riscv-iommu-trap", ~0ULL); > >>>> + address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as"); > >>>> + > >>>> + /* Device translation context cache */ > >>>> + s->ctx_cache = g_hash_table_new_full(__ctx_hash, __ctx_equal, > >>>> + g_free, NULL); > >>>> + qemu_mutex_init(&s->ctx_lock); > >>>> + > >>>> + s->iommus.le_next = NULL; > >>>> + s->iommus.le_prev = NULL; > >>>> + QLIST_INIT(&s->spaces); > >>>> + qemu_mutex_init(&s->core_lock); > >>>> + qemu_spin_init(&s->regs_lock); > >>>> +} > >>>> + > >>>> +static void riscv_iommu_unrealize(DeviceState *dev) > >>>> +{ > >>>> + RISCVIOMMUState *s = RISCV_IOMMU(dev); > >>>> + > >>>> + qemu_mutex_destroy(&s->core_lock); > >>>> + g_hash_table_unref(s->ctx_cache); > >>>> +} > >>>> + > >>>> +static Property riscv_iommu_properties[] = { > >>>> + DEFINE_PROP_UINT32("version", RISCVIOMMUState, version, > >>>> + RISCV_IOMMU_SPEC_DOT_VER), > >>>> + DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0), > >>>> + DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE), > >>>> + DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE), > >>>> + DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE), > >>>> + DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE), > >>>> + DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr, > >>>> + TYPE_MEMORY_REGION, MemoryRegion *), > >>>> + DEFINE_PROP_END_OF_LIST(), > >>>> +}; > >>>> + > >>>> +static void riscv_iommu_class_init(ObjectClass *klass, void* data) > >>>> +{ > >>>> + DeviceClass *dc = DEVICE_CLASS(klass); > >>>> + > >>>> + /* internal device for riscv-iommu-{pci/sys}, not user-creatable */ > >>>> + dc->user_creatable = false; > >>>> + dc->realize = riscv_iommu_realize; > >>>> + dc->unrealize = riscv_iommu_unrealize; > >>>> + device_class_set_props(dc, riscv_iommu_properties); > >>>> +} > >>>> + > >>>> +static const TypeInfo riscv_iommu_info = { > >>>> + .name = TYPE_RISCV_IOMMU, > >>>> + .parent = TYPE_DEVICE, > >>>> + .instance_size = sizeof(RISCVIOMMUState), > >>>> + .class_init = riscv_iommu_class_init, > >>>> +}; > >>>> + > >>>> +static const char *IOMMU_FLAG_STR[] = { > >>>> + "NA", > >>>> + "RO", > >>>> + "WR", > >>>> + "RW", > >>>> +}; > >>>> + > >>>> +/* RISC-V IOMMU Memory Region - Address Translation Space */ > >>>> +static IOMMUTLBEntry riscv_iommu_memory_region_translate( > >>>> + IOMMUMemoryRegion *iommu_mr, hwaddr addr, > >>>> + IOMMUAccessFlags flag, int iommu_idx) > >>>> +{ > >>>> + RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, > >>>> iova_mr); > >>>> + RISCVIOMMUContext *ctx; > >>>> + void *ref; > >>>> + IOMMUTLBEntry iotlb = { > >>>> + .iova = addr, > >>>> + .target_as = as->iommu->target_as, > >>>> + .addr_mask = ~0ULL, > >>>> + .perm = flag, > >>>> + }; > >>>> + > >>>> + ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref); > >>>> + if (ctx == NULL) { > >>>> + /* Translation disabled or invalid. */ > >>>> + iotlb.addr_mask = 0; > >>>> + iotlb.perm = IOMMU_NONE; > >>>> + } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb)) { > >>>> + /* Translation disabled or fault reported. */ > >>>> + iotlb.addr_mask = 0; > >>>> + iotlb.perm = IOMMU_NONE; > >>>> + } > >>>> + > >>>> + /* Trace all dma translations with original access flags. */ > >>>> + trace_riscv_iommu_dma(as->iommu->parent_obj.id, > >>>> PCI_BUS_NUM(as->devid), > >>>> + PCI_SLOT(as->devid), PCI_FUNC(as->devid), > >>>> iommu_idx, > >>>> + IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova, > >>>> + iotlb.translated_addr); > >>>> + > >>>> + riscv_iommu_ctx_put(as->iommu, ref); > >>>> + > >>>> + return iotlb; > >>>> +} > >>>> + > >>>> +static int riscv_iommu_memory_region_notify( > >>>> + IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old, > >>>> + IOMMUNotifierFlag new, Error **errp) > >>>> +{ > >>>> + RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, > >>>> iova_mr); > >>>> + > >>>> + if (old == IOMMU_NOTIFIER_NONE) { > >>>> + as->notifier = true; > >>>> + trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name); > >>>> + } else if (new == IOMMU_NOTIFIER_NONE) { > >>>> + as->notifier = false; > >>>> + trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name); > >>>> + } > >>>> + > >>>> + return 0; > >>>> +} > >>>> + > >>>> +static inline bool pci_is_iommu(PCIDevice *pdev) > >>>> +{ > >>>> + return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806; > >>>> +} > >>>> + > >>>> +static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int > >>>> devfn) > >>>> +{ > >>>> + RISCVIOMMUState *s = (RISCVIOMMUState *) opaque; > >>>> + PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn); > >>>> + AddressSpace *as = NULL; > >>>> + > >>>> + if (pdev && pci_is_iommu(pdev)) { > >>>> + return s->target_as; > >>>> + } > >>>> + > >>>> + /* Find first registered IOMMU device */ > >>>> + while (s->iommus.le_prev) { > >>>> + s = *(s->iommus.le_prev); > >>>> + } > >>>> + > >>>> + /* Find first matching IOMMU */ > >>>> + while (s != NULL && as == NULL) { > >>>> + as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), > >>>> devfn)); > >>>> + s = s->iommus.le_next; > >>>> + } > >>>> + > >>>> + return as ? as : &address_space_memory; > >>>> +} > >>>> + > >>>> +static const PCIIOMMUOps riscv_iommu_ops = { > >>>> + .get_address_space = riscv_iommu_find_as, > >>>> +}; > >>>> + > >>>> +void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus, > >>>> + Error **errp) > >>>> +{ > >>>> + if (bus->iommu_ops && > >>>> + bus->iommu_ops->get_address_space == riscv_iommu_find_as) { > >>>> + /* Allow multiple IOMMUs on the same PCIe bus, link known > >>>> devices */ > >>>> + RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque; > >>>> + QLIST_INSERT_AFTER(last, iommu, iommus); > >>>> + } else if (!bus->iommu_ops && !bus->iommu_opaque) { > >>>> + pci_setup_iommu(bus, &riscv_iommu_ops, iommu); > >>>> + } else { > >>>> + error_setg(errp, "can't register secondary IOMMU for PCI bus > >>>> #%d", > >>>> + pci_bus_num(bus)); > >>>> + } > >>>> +} > >>>> + > >>>> +static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr, > >>>> + MemTxAttrs attrs) > >>>> +{ > >>>> + return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid; > >>>> +} > >>>> + > >>>> +static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion > >>>> *iommu_mr) > >>>> +{ > >>>> + RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, > >>>> iova_mr); > >>>> + return 1 << as->iommu->pid_bits; > >>>> +} > >>>> + > >>>> +static void riscv_iommu_memory_region_init(ObjectClass *klass, void > >>>> *data) > >>>> +{ > >>>> + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); > >>>> + > >>>> + imrc->translate = riscv_iommu_memory_region_translate; > >>>> + imrc->notify_flag_changed = riscv_iommu_memory_region_notify; > >>>> + imrc->attrs_to_index = riscv_iommu_memory_region_index; > >>>> + imrc->num_indexes = riscv_iommu_memory_region_index_len; > >>>> +} > >>>> + > >>>> +static const TypeInfo riscv_iommu_memory_region_info = { > >>>> + .parent = TYPE_IOMMU_MEMORY_REGION, > >>>> + .name = TYPE_RISCV_IOMMU_MEMORY_REGION, > >>>> + .class_init = riscv_iommu_memory_region_init, > >>>> +}; > >>>> + > >>>> +static void riscv_iommu_register_mr_types(void) > >>>> +{ > >>>> + type_register_static(&riscv_iommu_memory_region_info); > >>>> + type_register_static(&riscv_iommu_info); > >>>> +} > >>>> + > >>>> +type_init(riscv_iommu_register_mr_types); > >>>> diff --git a/hw/riscv/riscv-iommu.h b/hw/riscv/riscv-iommu.h > >>>> new file mode 100644 > >>>> index 0000000000..6d76cb9b1a > >>>> --- /dev/null > >>>> +++ b/hw/riscv/riscv-iommu.h > >>>> @@ -0,0 +1,148 @@ > >>>> +/* > >>>> + * QEMU emulation of an RISC-V IOMMU > >>>> + * > >>>> + * Copyright (C) 2022-2023 Rivos Inc. > >>>> + * > >>>> + * This program is free software; you can redistribute it and/or modify > >>>> + * it under the terms of the GNU General Public License as published by > >>>> + * the Free Software Foundation; either version 2 of the License. > >>>> + * > >>>> + * This program is distributed in the hope that it will be useful, > >>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of > >>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > >>>> + * GNU General Public License for more details. > >>>> + * > >>>> + * You should have received a copy of the GNU General Public License > >>>> along > >>>> + * with this program; if not, see <http://www.gnu.org/licenses/>. > >>>> + */ > >>>> + > >>>> +#ifndef HW_RISCV_IOMMU_STATE_H > >>>> +#define HW_RISCV_IOMMU_STATE_H > >>>> + > >>>> +#include "qemu/osdep.h" > >>>> +#include "qom/object.h" > >>>> + > >>>> +#include "hw/riscv/iommu.h" > >>>> + > >>>> +struct RISCVIOMMUState { > >>>> + /*< private >*/ > >>>> + DeviceState parent_obj; > >>>> + > >>>> + /*< public >*/ > >>>> + uint32_t version; /* Reported interface version number */ > >>>> + uint32_t pid_bits; /* process identifier width */ > >>>> + uint32_t bus; /* PCI bus mapping for non-root endpoints */ > >>>> + > >>>> + uint64_t cap; /* IOMMU supported capabilities */ > >>>> + uint64_t fctl; /* IOMMU enabled features */ > >>>> + > >>>> + bool enable_off; /* Enable out-of-reset OFF mode (DMA > >>>> disabled) */ > >>>> + bool enable_msi; /* Enable MSI remapping */ > >>>> + bool enable_s_stage; /* Enable S/VS-Stage translation */ > >>>> + bool enable_g_stage; /* Enable G-Stage translation */ > >>>> + > >>>> + /* IOMMU Internal State */ > >>>> + uint64_t ddtp; /* Validated Device Directory Tree Root > >>>> Pointer */ > >>>> + > >>>> + dma_addr_t cq_addr; /* Command queue base physical address */ > >>>> + dma_addr_t fq_addr; /* Fault/event queue base physical address */ > >>>> + dma_addr_t pq_addr; /* Page request queue base physical address */ > >>>> + > >>>> + uint32_t cq_mask; /* Command queue index bit mask */ > >>>> + uint32_t fq_mask; /* Fault/event queue index bit mask */ > >>>> + uint32_t pq_mask; /* Page request queue index bit mask */ > >>>> + > >>>> + /* interrupt notifier */ > >>>> + void (*notify)(RISCVIOMMUState *iommu, unsigned vector); > >>>> + /* register icvec interrupts */ > >>>> + void (*icvec_update)(RISCVIOMMUState *iommu, > >>>> + uint32_t civ, uint32_t fiv, > >>>> + uint32_t pmiv, uint32_t piv); > >>>> + > >>>> + /* IOMMU State Machine */ > >>>> + QemuThread core_proc; /* Background processing thread */ > >>>> + QemuMutex core_lock; /* Global IOMMU lock, used for cache/regs > >>>> updates */ > >>>> + QemuCond core_cond; /* Background processing wake up signal */ > >>>> + unsigned core_exec; /* Processing thread execution actions */ > >>>> + > >>>> + /* IOMMU target address space */ > >>>> + AddressSpace *target_as; > >>>> + MemoryRegion *target_mr; > >>>> + > >>>> + /* MSI / MRIF access trap */ > >>>> + AddressSpace trap_as; > >>>> + MemoryRegion trap_mr; > >>>> + > >>>> + GHashTable *ctx_cache; /* Device translation Context Cache > >>>> */ > >>>> + QemuMutex ctx_lock; /* Device translation Cache update lock */ > >>>> + > >>>> + /* MMIO Hardware Interface */ > >>>> + MemoryRegion regs_mr; > >>>> + QemuSpin regs_lock; > >>>> + uint8_t *regs_rw; /* register state (user write) */ > >>>> + uint8_t *regs_wc; /* write-1-to-clear mask */ > >>>> + uint8_t *regs_ro; /* read-only mask */ > >>>> + > >>>> + QLIST_ENTRY(RISCVIOMMUState) iommus; > >>>> + QLIST_HEAD(, RISCVIOMMUSpace) spaces; > >>>> +}; > >>>> + > >>>> +void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus, > >>>> + Error **errp); > >>>> + > >>>> +/* private helpers */ > >>>> + > >>>> +/* Register helper functions */ > >>>> +static inline uint32_t riscv_iommu_reg_mod32(RISCVIOMMUState *s, > >>>> + unsigned idx, uint32_t set, uint32_t clr) > >>>> +{ > >>>> + uint32_t val; > >>>> + qemu_spin_lock(&s->regs_lock); > >>>> + val = ldl_le_p(s->regs_rw + idx); > >>>> + stl_le_p(s->regs_rw + idx, (val & ~clr) | set); > >>>> + qemu_spin_unlock(&s->regs_lock); > >>>> + return val; > >>>> +} > >>>> + > >>>> +static inline void riscv_iommu_reg_set32(RISCVIOMMUState *s, > >>>> + unsigned idx, uint32_t set) > >>>> +{ > >>>> + qemu_spin_lock(&s->regs_lock); > >>>> + stl_le_p(s->regs_rw + idx, set); > >>>> + qemu_spin_unlock(&s->regs_lock); > >>>> +} > >>>> + > >>>> +static inline uint32_t riscv_iommu_reg_get32(RISCVIOMMUState *s, > >>>> + unsigned idx) > >>>> +{ > >>>> + return ldl_le_p(s->regs_rw + idx); > >>>> +} > >>>> + > >>>> +static inline uint64_t riscv_iommu_reg_mod64(RISCVIOMMUState *s, > >>>> + unsigned idx, uint64_t set, uint64_t clr) > >>>> +{ > >>>> + uint64_t val; > >>>> + qemu_spin_lock(&s->regs_lock); > >>>> + val = ldq_le_p(s->regs_rw + idx); > >>>> + stq_le_p(s->regs_rw + idx, (val & ~clr) | set); > >>>> + qemu_spin_unlock(&s->regs_lock); > >>>> + return val; > >>>> +} > >>>> + > >>>> +static inline void riscv_iommu_reg_set64(RISCVIOMMUState *s, > >>>> + unsigned idx, uint64_t set) > >>>> +{ > >>>> + qemu_spin_lock(&s->regs_lock); > >>>> + stq_le_p(s->regs_rw + idx, set); > >>>> + qemu_spin_unlock(&s->regs_lock); > >>>> +} > >>>> + > >>>> +static inline uint64_t riscv_iommu_reg_get64(RISCVIOMMUState *s, > >>>> + unsigned idx) > >>>> +{ > >>>> + return ldq_le_p(s->regs_rw + idx); > >>>> +} > >>>> + > >>>> + > >>>> + > >>>> +#endif > >>>> diff --git a/hw/riscv/trace-events b/hw/riscv/trace-events > >>>> new file mode 100644 > >>>> index 0000000000..bdd8b657a6 > >>>> --- /dev/null > >>>> +++ b/hw/riscv/trace-events > >>>> @@ -0,0 +1,12 @@ > >>>> +# See documentation at docs/devel/tracing.rst > >>>> + > >>>> +# riscv-iommu.c > >>>> +riscv_iommu_new(const char *id, unsigned b, unsigned d, unsigned f) > >>>> "%s: device attached %04x:%02x.%d" > >>>> +riscv_iommu_flt(const char *id, unsigned b, unsigned d, unsigned f, > >>>> uint64_t reason, uint64_t iova) "%s: fault %04x:%02x.%u reason: > >>>> 0x%"PRIx64" iova: 0x%"PRIx64 > >>>> +riscv_iommu_pri(const char *id, unsigned b, unsigned d, unsigned f, > >>>> uint64_t iova) "%s: page request %04x:%02x.%u iova: 0x%"PRIx64 > >>>> +riscv_iommu_dma(const char *id, unsigned b, unsigned d, unsigned f, > >>>> unsigned pasid, const char *dir, uint64_t iova, uint64_t phys) "%s: > >>>> translate %04x:%02x.%u #%u %s 0x%"PRIx64" -> 0x%"PRIx64 > >>>> +riscv_iommu_msi(const char *id, unsigned b, unsigned d, unsigned f, > >>>> uint64_t iova, uint64_t phys) "%s: translate %04x:%02x.%u MSI > >>>> 0x%"PRIx64" -> 0x%"PRIx64 > >>>> +riscv_iommu_mrif_notification(const char *id, uint32_t nid, uint64_t > >>>> phys) "%s: sent MRIF notification 0x%x to 0x%"PRIx64 > >>>> +riscv_iommu_cmd(const char *id, uint64_t l, uint64_t u) "%s: command > >>>> 0x%"PRIx64" 0x%"PRIx64 > >>>> +riscv_iommu_notifier_add(const char *id) "%s: dev-iotlb notifier added" > >>>> +riscv_iommu_notifier_del(const char *id) "%s: dev-iotlb notifier > >>>> removed" > >>>> diff --git a/hw/riscv/trace.h b/hw/riscv/trace.h > >>>> new file mode 100644 > >>>> index 0000000000..8c0e3ca1f3 > >>>> --- /dev/null > >>>> +++ b/hw/riscv/trace.h > >>>> @@ -0,0 +1 @@ > >>>> +#include "trace/trace-hw_riscv.h" > >>>> diff --git a/include/hw/riscv/iommu.h b/include/hw/riscv/iommu.h > >>>> new file mode 100644 > >>>> index 0000000000..070ee69973 > >>>> --- /dev/null > >>>> +++ b/include/hw/riscv/iommu.h > >>>> @@ -0,0 +1,36 @@ > >>>> +/* > >>>> + * QEMU emulation of an RISC-V IOMMU > >>>> + * > >>>> + * Copyright (C) 2022-2023 Rivos Inc. > >>>> + * > >>>> + * This program is free software; you can redistribute it and/or modify > >>>> + * it under the terms of the GNU General Public License as published by > >>>> + * the Free Software Foundation; either version 2 of the License. > >>>> + * > >>>> + * This program is distributed in the hope that it will be useful, > >>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of > >>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > >>>> + * GNU General Public License for more details. > >>>> + * > >>>> + * You should have received a copy of the GNU General Public License > >>>> along > >>>> + * with this program; if not, see <http://www.gnu.org/licenses/>. > >>>> + */ > >>>> + > >>>> +#ifndef HW_RISCV_IOMMU_H > >>>> +#define HW_RISCV_IOMMU_H > >>>> + > >>>> +#include "qemu/osdep.h" > >>>> +#include "qom/object.h" > >>>> + > >>>> +#define TYPE_RISCV_IOMMU "riscv-iommu" > >>>> +OBJECT_DECLARE_SIMPLE_TYPE(RISCVIOMMUState, RISCV_IOMMU) > >>>> +typedef struct RISCVIOMMUState RISCVIOMMUState; > >>>> + > >>>> +#define TYPE_RISCV_IOMMU_MEMORY_REGION "riscv-iommu-mr" > >>>> +typedef struct RISCVIOMMUSpace RISCVIOMMUSpace; > >>>> + > >>>> +#define TYPE_RISCV_IOMMU_PCI "riscv-iommu-pci" > >>>> +OBJECT_DECLARE_SIMPLE_TYPE(RISCVIOMMUStatePci, RISCV_IOMMU_PCI) > >>>> +typedef struct RISCVIOMMUStatePci RISCVIOMMUStatePci; > >>>> + > >>>> +#endif > >>>> diff --git a/meson.build b/meson.build > >>>> index a1e51277b0..359f836d8e 100644 > >>>> --- a/meson.build > >>>> +++ b/meson.build > >>>> @@ -3373,6 +3373,7 @@ if have_system > >>>> 'hw/pci-host', > >>>> 'hw/ppc', > >>>> 'hw/rtc', > >>>> + 'hw/riscv', > >>>> 'hw/s390x', > >>>> 'hw/scsi', > >>>> 'hw/sd',