Accelerated SMMUv3 is only useful when the device can take advantage of the host's SMMUv3 in nested mode. To keep things simple and correct, we only allow this feature for vfio-pci endpoint devices that use the iommufd backend. We also allow non-endpoint emulated devices like PCI bridges and root ports, so that users can plug in these vfio-pci devices.
Another reason for this limit is to avoid problems with IOTLB invalidations. Some commands (e.g., CMD_TLBI_NH_ASID) lack an associated SID, making it difficult to trace the originating device. If we allowed emulated endpoint devices, QEMU would have to invalidate both its own software IOTLB and the host's hardware IOTLB, which could slow things down. Since vfio-pci devices in nested mode rely on the host SMMUv3's nested translation (S1+S2), their get_address_space() callback must return the system address space to enable correct S2 mappings of guest RAM. So in short: - vfio-pci devices return the system address space - bridges and root ports return the IOMMU address space Note: On ARM, MSI doorbell addresses are also translated via SMMUv3. Hence, if a vfio-pci device is behind the SMMuv3 with translation enabled, it must return the IOMMU address space for MSI. Support for this will be added in a follow-up patch. Signed-off-by: Shameer Kolothum <shameerali.kolothum.th...@huawei.com> --- hw/arm/smmuv3-accel.c | 50 ++++++++++++++++++++++++++++- hw/arm/smmuv3-accel.h | 15 +++++++++ hw/arm/smmuv3.c | 4 +++ hw/pci-bridge/pci_expander_bridge.c | 1 - include/hw/arm/smmuv3.h | 1 + include/hw/pci/pci_bridge.h | 1 + 6 files changed, 70 insertions(+), 2 deletions(-) diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c index 2eac9c6ff4..0b0ddb03e2 100644 --- a/hw/arm/smmuv3-accel.c +++ b/hw/arm/smmuv3-accel.c @@ -7,13 +7,19 @@ */ #include "qemu/osdep.h" +#include "qemu/error-report.h" #include "hw/arm/smmuv3.h" +#include "hw/pci/pci_bridge.h" +#include "hw/pci-host/gpex.h" +#include "hw/vfio/pci.h" + #include "smmuv3-accel.h" static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState *bs, SMMUPciBus *sbus, PCIBus *bus, int devfn) { + SMMUv3State *s = ARM_SMMUV3(bs); SMMUDevice *sdev = sbus->pbdev[devfn]; SMMUv3AccelDevice *accel_dev; @@ -25,30 +31,72 @@ static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState *bs, SMMUPciBus *sbus, sbus->pbdev[devfn] = sdev; smmu_init_sdev(bs, sdev, bus, devfn); + address_space_init(&accel_dev->as_sysmem, &s->s_accel->root, + "smmuv3-accel-sysmem"); } return accel_dev; } +static bool smmuv3_accel_pdev_allowed(PCIDevice *pdev, bool *vfio_pci) +{ + + if (object_dynamic_cast(OBJECT(pdev), TYPE_PCI_BRIDGE) || + object_dynamic_cast(OBJECT(pdev), "pxb-pcie") || + object_dynamic_cast(OBJECT(pdev), "gpex-root")) { + return true; + } else if ((object_dynamic_cast(OBJECT(pdev), TYPE_VFIO_PCI) && + object_property_find(OBJECT(pdev), "iommufd"))) { + *vfio_pci = true; + return true; + } + return false; +} + static AddressSpace *smmuv3_accel_find_add_as(PCIBus *bus, void *opaque, int devfn) { + PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn); SMMUState *bs = opaque; + bool vfio_pci = false; SMMUPciBus *sbus; SMMUv3AccelDevice *accel_dev; SMMUDevice *sdev; + if (pdev && !smmuv3_accel_pdev_allowed(pdev, &vfio_pci)) { + error_report("Device(%s) not allowed. Only PCIe root complex devices " + "or PCI bridge devices or vfio-pci endpoint devices with " + "iommufd as backend is allowed with arm-smmuv3,accel=on", + pdev->name); + exit(1); + } sbus = smmu_get_sbus(bs, bus); accel_dev = smmuv3_accel_get_dev(bs, sbus, bus, devfn); sdev = &accel_dev->sdev; - return &sdev->as; + if (vfio_pci) { + return &accel_dev->as_sysmem; + } else { + return &sdev->as; + } } static const PCIIOMMUOps smmuv3_accel_ops = { .get_address_space = smmuv3_accel_find_add_as, }; +void smmuv3_accel_init(SMMUv3State *s) +{ + SMMUv3AccelState *s_accel; + + s->s_accel = s_accel = g_new0(SMMUv3AccelState, 1); + memory_region_init(&s_accel->root, OBJECT(s), "root", UINT64_MAX); + memory_region_init_alias(&s_accel->sysmem, OBJECT(s), + "smmuv3-accel-sysmem", get_system_memory(), 0, + memory_region_size(get_system_memory())); + memory_region_add_subregion(&s_accel->root, 0, &s_accel->sysmem); +} + static void smmuv3_accel_class_init(ObjectClass *oc, const void *data) { SMMUBaseClass *sbc = ARM_SMMU_CLASS(oc); diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h index 4cf30b1291..2cd343103f 100644 --- a/hw/arm/smmuv3-accel.h +++ b/hw/arm/smmuv3-accel.h @@ -9,11 +9,26 @@ #ifndef HW_ARM_SMMUV3_ACCEL_H #define HW_ARM_SMMUV3_ACCEL_H +#include "hw/arm/smmuv3.h" #include "hw/arm/smmu-common.h" #include CONFIG_DEVICES typedef struct SMMUv3AccelDevice { SMMUDevice sdev; + AddressSpace as_sysmem; } SMMUv3AccelDevice; +typedef struct SMMUv3AccelState { + MemoryRegion root; + MemoryRegion sysmem; +} SMMUv3AccelState; + +#if defined(CONFIG_ARM_SMMUV3) && defined(CONFIG_IOMMUFD) +void smmuv3_accel_init(SMMUv3State *s); +#else +static inline void smmuv3_accel_init(SMMUv3State *d) +{ +} +#endif + #endif /* HW_ARM_SMMUV3_ACCEL_H */ diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c index bcf8af8dc7..2f5a8157dd 100644 --- a/hw/arm/smmuv3.c +++ b/hw/arm/smmuv3.c @@ -32,6 +32,7 @@ #include "qapi/error.h" #include "hw/arm/smmuv3.h" +#include "smmuv3-accel.h" #include "smmuv3-internal.h" #include "smmu-internal.h" @@ -1898,6 +1899,9 @@ static void smmu_realize(DeviceState *d, Error **errp) sysbus_init_mmio(dev, &sys->iomem); smmu_init_irq(s, dev); + if (sys->accel) { + smmuv3_accel_init(s); + } } static const VMStateDescription vmstate_smmuv3_queue = { diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c index 1bcceddbc4..a8eb2d2426 100644 --- a/hw/pci-bridge/pci_expander_bridge.c +++ b/hw/pci-bridge/pci_expander_bridge.c @@ -48,7 +48,6 @@ struct PXBBus { char bus_path[8]; }; -#define TYPE_PXB_PCIE_DEV "pxb-pcie" OBJECT_DECLARE_SIMPLE_TYPE(PXBPCIEDev, PXB_PCIE_DEV) static GList *pxb_dev_list; diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h index d183a62766..3bdb92391a 100644 --- a/include/hw/arm/smmuv3.h +++ b/include/hw/arm/smmuv3.h @@ -63,6 +63,7 @@ struct SMMUv3State { qemu_irq irq[4]; QemuMutex mutex; char *stage; + struct SMMUv3AccelState *s_accel; }; typedef enum { diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h index a055fd8d32..b61360b900 100644 --- a/include/hw/pci/pci_bridge.h +++ b/include/hw/pci/pci_bridge.h @@ -106,6 +106,7 @@ typedef struct PXBPCIEDev { #define TYPE_PXB_PCIE_BUS "pxb-pcie-bus" #define TYPE_PXB_CXL_BUS "pxb-cxl-bus" +#define TYPE_PXB_PCIE_DEV "pxb-pcie" #define TYPE_PXB_DEV "pxb" OBJECT_DECLARE_SIMPLE_TYPE(PXBDev, PXB_DEV) -- 2.34.1