AMD IOMMU supports upto 2048 MSIs for a single device function when NUM_INT_REMAP_SUP Extended-Feature-Register-2 bit is set to one. Software can enable this feature by writing one to NUM_INT_REMAP_MODE in the control register. MSI address destination mode (DM) bit decides how many MSI data bits are used by IOMMU to index into IRT. When DM = 0, IOMMU uses bits 8:0 (max 512) for the index, otherwise (DM = 1) IOMMU uses bits 10:0 (max 2048) for IRT index.
This feature can be enabled with flag `numint2k=on`. In case of passhthrough devices viommu uses control register provided by vendor capabilites to determine if host IOMMU has enabled 2048 MSIs. If host IOMMU has not enabled it then the guest feature is disabled. example command line ''' -object iommufd,id=fd0 \ -device amd_iommu,dma-remap=on,numint2k=on \ -device vfio-host,host=<DEVID>,iommufd=fd0 \ ''' NOTE: In case of legacy VFIO container the guest will always fall back to 512 MSIs. Signed-off-by: Sairaj Kodilkar <[email protected]> --- hw/i386/amd_iommu.c | 74 ++++++++++++++++++++++++++++++++++++++++----- hw/i386/amd_iommu.h | 12 ++++++++ 2 files changed, 79 insertions(+), 7 deletions(-) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 3221bf5a0303..4f62c4ee3671 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -116,7 +116,12 @@ uint64_t amdvi_extended_feature_register(AMDVIState *s) uint64_t amdvi_extended_feature_register2(AMDVIState *s) { - return AMDVI_DEFAULT_EXT_FEATURES2; + uint64_t feature = AMDVI_DEFAULT_EXT_FEATURES2; + if (s->num_int_sup_2k) { + feature |= AMDVI_FEATURE_NUM_INT_REMAP_SUP; + } + + return feature; } /* configure MMIO registers at startup/reset */ @@ -1538,6 +1543,9 @@ static void amdvi_handle_control_write(AMDVIState *s) AMDVI_MMIO_CONTROL_CMDBUFLEN); s->ga_enabled = !!(control & AMDVI_MMIO_CONTROL_GAEN); + s->num_int_enabled = (control >> AMDVI_MMIO_CONTROL_NUM_INT_REMAP_SHIFT) & + AMDVI_MMIO_CONTROL_NUM_INT_REMAP_MASK; + /* update the flags depending on the control register */ if (s->cmdbuf_enabled) { amdvi_assign_orq(s, AMDVI_MMIO_STATUS, AMDVI_MMIO_STATUS_CMDBUF_RUN); @@ -2119,6 +2127,25 @@ static int amdvi_int_remap_msi(AMDVIState *iommu, * (page 5) */ delivery_mode = (origin->data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 7; + /* + * The MSI address register bit[2] is used to get the destination + * mode. The dest_mode 1 is valid for fixed and arbitrated interrupts + * and when IOMMU supports upto 2048 interrupts. + */ + dest_mode = (origin->address >> MSI_ADDR_DEST_MODE_SHIFT) & 1; + + if (dest_mode && + iommu->num_int_enabled == AMDVI_MMIO_CONTROL_NUM_INT_REMAP_2K) { + + trace_amdvi_ir_delivery_mode("2K interrupt mode"); + ret = __amdvi_int_remap_msi(iommu, origin, translated, dte, &irq, sid); + if (ret < 0) { + goto remap_fail; + } + /* Translate IRQ to MSI messages */ + x86_iommu_irq_to_msi_message(&irq, translated); + goto out; + } switch (delivery_mode) { case AMDVI_IOAPIC_INT_TYPE_FIXED: @@ -2159,12 +2186,6 @@ static int amdvi_int_remap_msi(AMDVIState *iommu, goto remap_fail; } - /* - * The MSI address register bit[2] is used to get the destination - * mode. The dest_mode 1 is valid for fixed and arbitrated interrupts - * only. - */ - dest_mode = (origin->address >> MSI_ADDR_DEST_MODE_SHIFT) & 1; if (dest_mode) { trace_amdvi_ir_err("invalid dest_mode"); ret = -AMDVI_IR_ERR; @@ -2322,6 +2343,30 @@ static AddressSpace *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) return &iommu_as[devfn]->as; } +static void amdvi_refresh_efrs_hwinfo(struct AMDVIState *s, + struct iommu_hw_info_amd *hwinfo) +{ + /* Check if host OS has enabled 2K interrupts */ + bool hwinfo_ctrl_2k; + + if (s->num_int_sup_2k && !hwinfo) { + warn_report("AMDVI: Disabling 2048 MSI for guest, " + "use IOMMUFD for device passthrough to support it"); + s->num_int_sup_2k = 0; + } + + hwinfo_ctrl_2k = ((hwinfo->control_register + >> AMDVI_MMIO_CONTROL_NUM_INT_REMAP_SHIFT) + & AMDVI_MMIO_CONTROL_NUM_INT_REMAP_2K); + + if (s->num_int_sup_2k && !hwinfo_ctrl_2k) { + warn_report("AMDVI: Disabling 2048 MSIs for guest, " + "as host kernel does not support this feature"); + s->num_int_sup_2k = 0; + } + + amdvi_refresh_efrs(s); +} static bool amdvi_set_iommu_device(PCIBus *bus, void *opaque, int devfn, HostIOMMUDevice *hiod, Error **errp) @@ -2354,6 +2399,20 @@ static bool amdvi_set_iommu_device(PCIBus *bus, void *opaque, int devfn, object_ref(hiod); g_hash_table_insert(s->hiod_hash, new_key, hiod); + if (hiod->caps.type == IOMMU_HW_INFO_TYPE_AMD) { + /* + * Refresh the MMIO efr registers so that changes are visible to the + * guest. + */ + amdvi_refresh_efrs_hwinfo(s, &hiod->caps.vendor_caps.amd); + } else { + /* + * Pass NULL hardware registers when we have non-IOMMUFD + * passthrough device + */ + amdvi_refresh_efrs_hwinfo(s, NULL); + } + return true; } @@ -2641,6 +2700,7 @@ static const Property amdvi_properties[] = { DEFINE_PROP_BOOL("xtsup", AMDVIState, xtsup, false), DEFINE_PROP_STRING("pci-id", AMDVIState, pci_id), DEFINE_PROP_BOOL("dma-remap", AMDVIState, dma_remap, false), + DEFINE_PROP_BOOL("numint2k", AMDVIState, num_int_sup_2k, false), }; static const VMStateDescription vmstate_amdvi_sysbus = { diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index c8eaf229b50e..588725fe0c25 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -107,6 +107,9 @@ #define AMDVI_MMIO_CONTROL_COMWAITINTEN (1ULL << 4) #define AMDVI_MMIO_CONTROL_CMDBUFLEN (1ULL << 12) #define AMDVI_MMIO_CONTROL_GAEN (1ULL << 17) +#define AMDVI_MMIO_CONTROL_NUM_INT_REMAP_MASK (0x3) +#define AMDVI_MMIO_CONTROL_NUM_INT_REMAP_SHIFT (43) +#define AMDVI_MMIO_CONTROL_NUM_INT_REMAP_2K (0x1) /* MMIO status register bits */ #define AMDVI_MMIO_STATUS_CMDBUF_RUN (1 << 4) @@ -160,6 +163,7 @@ #define AMDVI_PERM_READ (1 << 0) #define AMDVI_PERM_WRITE (1 << 1) +/* EFR */ #define AMDVI_FEATURE_PREFETCH (1ULL << 0) /* page prefetch */ #define AMDVI_FEATURE_PPR (1ULL << 1) /* PPR Support */ #define AMDVI_FEATURE_XT (1ULL << 2) /* x2APIC Support */ @@ -169,6 +173,9 @@ #define AMDVI_FEATURE_HE (1ULL << 8) /* hardware error regs */ #define AMDVI_FEATURE_PC (1ULL << 9) /* Perf counters */ +/* EFR2 */ +#define AMDVI_FEATURE_NUM_INT_REMAP_SUP (1ULL << 8) /* 2K int support */ + /* reserved DTE bits */ #define AMDVI_DTE_QUAD0_RESERVED (GENMASK64(6, 2) | GENMASK64(63, 63)) #define AMDVI_DTE_QUAD1_RESERVED 0 @@ -380,6 +387,8 @@ struct AMDVIState { bool evtlog_enabled; /* event log enabled */ bool excl_enabled; + uint8_t num_int_enabled; + hwaddr devtab; /* base address device table */ uint64_t devtab_len; /* device table length */ @@ -433,6 +442,9 @@ struct AMDVIState { /* DMA address translation */ bool dma_remap; + + /* upto 2048 interrupt support */ + bool num_int_sup_2k; }; uint64_t amdvi_extended_feature_register(AMDVIState *s); -- 2.34.1
