>-----Original Message-----
>From: Shameer Kolothum <shameerali.kolothum.th...@huawei.com>
>Subject: [RFC PATCH v3 06/15] hw/arm/smmuv3-accel: Restrict accelerated
>SMMUv3 to vfio-pci endpoints with iommufd
>
>Accelerated SMMUv3 is only useful when the device can take advantage of
>the host's SMMUv3 in nested mode. To keep things simple and correct, we
>only allow this feature for vfio-pci endpoint devices that use the iommufd
>backend. We also allow non-endpoint emulated devices like PCI bridges and
>root ports, so that users can plug in these vfio-pci devices.
>
>Another reason for this limit is to avoid problems with IOTLB
>invalidations. Some commands (e.g., CMD_TLBI_NH_ASID) lack an associated
>SID, making it difficult to trace the originating device. If we allowed
>emulated endpoint devices, QEMU would have to invalidate both its own
>software IOTLB and the host's hardware IOTLB, which could slow things
>down.
>
>Since vfio-pci devices in nested mode rely on the host SMMUv3's nested
>translation (S1+S2), their get_address_space() callback must return the
>system address space to enable correct S2 mappings of guest RAM.
>
>So in short:
> - vfio-pci devices return the system address space
> - bridges and root ports return the IOMMU address space
>
>Note: On ARM, MSI doorbell addresses are also translated via SMMUv3.
So the translation result is a doorbell addr(gpa) for guest?
IIUC, there should be a mapping between guest doorbell addr(gpa) to host
doorbell addr(hpa) in stage2 page table? Where is this mapping setup?
>Hence, if a vfio-pci device is behind the SMMuv3 with translation enabled,
>it must return the IOMMU address space for MSI. Support for this will be
>added in a follow-up patch.
>
>Signed-off-by: Shameer Kolothum
><shameerali.kolothum.th...@huawei.com>
>---
> hw/arm/smmuv3-accel.c | 50
>++++++++++++++++++++++++++++-
> hw/arm/smmuv3-accel.h | 15 +++++++++
> hw/arm/smmuv3.c | 4 +++
> hw/pci-bridge/pci_expander_bridge.c | 1 -
> include/hw/arm/smmuv3.h | 1 +
> include/hw/pci/pci_bridge.h | 1 +
> 6 files changed, 70 insertions(+), 2 deletions(-)
>
>diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
>index 2eac9c6ff4..0b0ddb03e2 100644
>--- a/hw/arm/smmuv3-accel.c
>+++ b/hw/arm/smmuv3-accel.c
>@@ -7,13 +7,19 @@
> */
>
> #include "qemu/osdep.h"
>+#include "qemu/error-report.h"
>
> #include "hw/arm/smmuv3.h"
>+#include "hw/pci/pci_bridge.h"
>+#include "hw/pci-host/gpex.h"
>+#include "hw/vfio/pci.h"
>+
> #include "smmuv3-accel.h"
>
> static SMMUv3AccelDevice *smmuv3_accel_get_dev(SMMUState *bs,
>SMMUPciBus *sbus,
> PCIBus *bus, int
>devfn)
> {
>+ SMMUv3State *s = ARM_SMMUV3(bs);
> SMMUDevice *sdev = sbus->pbdev[devfn];
> SMMUv3AccelDevice *accel_dev;
>
>@@ -25,30 +31,72 @@ static SMMUv3AccelDevice
>*smmuv3_accel_get_dev(SMMUState *bs, SMMUPciBus *sbus,
>
> sbus->pbdev[devfn] = sdev;
> smmu_init_sdev(bs, sdev, bus, devfn);
>+ address_space_init(&accel_dev->as_sysmem, &s->s_accel->root,
>+ "smmuv3-accel-sysmem");
> }
>
> return accel_dev;
> }
>
>+static bool smmuv3_accel_pdev_allowed(PCIDevice *pdev, bool *vfio_pci)
>+{
>+
>+ if (object_dynamic_cast(OBJECT(pdev), TYPE_PCI_BRIDGE) ||
>+ object_dynamic_cast(OBJECT(pdev), "pxb-pcie") ||
>+ object_dynamic_cast(OBJECT(pdev), "gpex-root")) {
>+ return true;
>+ } else if ((object_dynamic_cast(OBJECT(pdev), TYPE_VFIO_PCI) &&
>+ object_property_find(OBJECT(pdev), "iommufd"))) {
Will this always return true?
>+ *vfio_pci = true;
>+ return true;
>+ }
>+ return false;
>+}
>+
> static AddressSpace *smmuv3_accel_find_add_as(PCIBus *bus, void
>*opaque,
> int devfn)
> {
>+ PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn);
> SMMUState *bs = opaque;
>+ bool vfio_pci = false;
> SMMUPciBus *sbus;
> SMMUv3AccelDevice *accel_dev;
> SMMUDevice *sdev;
>
>+ if (pdev && !smmuv3_accel_pdev_allowed(pdev, &vfio_pci)) {
>+ error_report("Device(%s) not allowed. Only PCIe root complex
>devices "
>+ "or PCI bridge devices or vfio-pci endpoint devices
>with "
>+ "iommufd as backend is allowed with
>arm-smmuv3,accel=on",
>+ pdev->name);
>+ exit(1);
Seems aggressive for a hotplug, could we fail hotplug instead of kill QEMU?
Thanks
Zhenzhong
>+ }
> sbus = smmu_get_sbus(bs, bus);
> accel_dev = smmuv3_accel_get_dev(bs, sbus, bus, devfn);
> sdev = &accel_dev->sdev;
>
>- return &sdev->as;
>+ if (vfio_pci) {
>+ return &accel_dev->as_sysmem;
>+ } else {
>+ return &sdev->as;
>+ }
> }
>
> static const PCIIOMMUOps smmuv3_accel_ops = {
> .get_address_space = smmuv3_accel_find_add_as,
> };
>
>+void smmuv3_accel_init(SMMUv3State *s)
>+{
>+ SMMUv3AccelState *s_accel;
>+
>+ s->s_accel = s_accel = g_new0(SMMUv3AccelState, 1);
>+ memory_region_init(&s_accel->root, OBJECT(s), "root", UINT64_MAX);
>+ memory_region_init_alias(&s_accel->sysmem, OBJECT(s),
>+ "smmuv3-accel-sysmem",
>get_system_memory(), 0,
>+
>memory_region_size(get_system_memory()));
>+ memory_region_add_subregion(&s_accel->root, 0,
>&s_accel->sysmem);
>+}
>+
> static void smmuv3_accel_class_init(ObjectClass *oc, const void *data)
> {
> SMMUBaseClass *sbc = ARM_SMMU_CLASS(oc);
>diff --git a/hw/arm/smmuv3-accel.h b/hw/arm/smmuv3-accel.h
>index 4cf30b1291..2cd343103f 100644
>--- a/hw/arm/smmuv3-accel.h
>+++ b/hw/arm/smmuv3-accel.h
>@@ -9,11 +9,26 @@
> #ifndef HW_ARM_SMMUV3_ACCEL_H
> #define HW_ARM_SMMUV3_ACCEL_H
>
>+#include "hw/arm/smmuv3.h"
> #include "hw/arm/smmu-common.h"
> #include CONFIG_DEVICES
>
> typedef struct SMMUv3AccelDevice {
> SMMUDevice sdev;
>+ AddressSpace as_sysmem;
> } SMMUv3AccelDevice;
>
>+typedef struct SMMUv3AccelState {
>+ MemoryRegion root;
>+ MemoryRegion sysmem;
>+} SMMUv3AccelState;
>+
>+#if defined(CONFIG_ARM_SMMUV3) && defined(CONFIG_IOMMUFD)
>+void smmuv3_accel_init(SMMUv3State *s);
>+#else
>+static inline void smmuv3_accel_init(SMMUv3State *d)
>+{
>+}
>+#endif
>+
> #endif /* HW_ARM_SMMUV3_ACCEL_H */
>diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
>index bcf8af8dc7..2f5a8157dd 100644
>--- a/hw/arm/smmuv3.c
>+++ b/hw/arm/smmuv3.c
>@@ -32,6 +32,7 @@
> #include "qapi/error.h"
>
> #include "hw/arm/smmuv3.h"
>+#include "smmuv3-accel.h"
> #include "smmuv3-internal.h"
> #include "smmu-internal.h"
>
>@@ -1898,6 +1899,9 @@ static void smmu_realize(DeviceState *d, Error
>**errp)
> sysbus_init_mmio(dev, &sys->iomem);
>
> smmu_init_irq(s, dev);
>+ if (sys->accel) {
>+ smmuv3_accel_init(s);
>+ }
> }
>
> static const VMStateDescription vmstate_smmuv3_queue = {
>diff --git a/hw/pci-bridge/pci_expander_bridge.c
>b/hw/pci-bridge/pci_expander_bridge.c
>index 1bcceddbc4..a8eb2d2426 100644
>--- a/hw/pci-bridge/pci_expander_bridge.c
>+++ b/hw/pci-bridge/pci_expander_bridge.c
>@@ -48,7 +48,6 @@ struct PXBBus {
> char bus_path[8];
> };
>
>-#define TYPE_PXB_PCIE_DEV "pxb-pcie"
> OBJECT_DECLARE_SIMPLE_TYPE(PXBPCIEDev, PXB_PCIE_DEV)
>
> static GList *pxb_dev_list;
>diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
>index d183a62766..3bdb92391a 100644
>--- a/include/hw/arm/smmuv3.h
>+++ b/include/hw/arm/smmuv3.h
>@@ -63,6 +63,7 @@ struct SMMUv3State {
> qemu_irq irq[4];
> QemuMutex mutex;
> char *stage;
>+ struct SMMUv3AccelState *s_accel;
> };
>
> typedef enum {
>diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h
>index a055fd8d32..b61360b900 100644
>--- a/include/hw/pci/pci_bridge.h
>+++ b/include/hw/pci/pci_bridge.h
>@@ -106,6 +106,7 @@ typedef struct PXBPCIEDev {
>
> #define TYPE_PXB_PCIE_BUS "pxb-pcie-bus"
> #define TYPE_PXB_CXL_BUS "pxb-cxl-bus"
>+#define TYPE_PXB_PCIE_DEV "pxb-pcie"
> #define TYPE_PXB_DEV "pxb"
> OBJECT_DECLARE_SIMPLE_TYPE(PXBDev, PXB_DEV)
>
>--
>2.34.1