On ARM, when a device is behind an IOMMU, its MSI doorbell address is
subject to translation by the IOMMU. This behavior affects vfio-pci
passthrough devices assigned to guests using an accelerated SMMUv3.
In this setup, we configure the host SMMUv3 in nested mode, where
VFIO sets up the Stage-2 (S2) mappings for guest RAM, while the guest
controls Stage-1 (S1). To allow VFIO to correctly configure S2 mappings,
we currently return the system address space via the get_address_space()
callback for vfio-pci devices.
However, QEMU/KVM also uses this same callback path when resolving the
address space for MSI doorbells:
kvm_irqchip_add_msi_route()
kvm_arch_fixup_msi_route()
pci_device_iommu_address_space()
get_address_space()
This will cause the device to be configured with wrong MSI doorbell
address if it return the system address space.
Introduce an optional get_msi_address_space() callback and use that in
the above path if available. This will enable IOMMU implementations to
make use of this if required.
Suggested-by: Nicolin Chen <[email protected]>
Signed-off-by: Shameer Kolothum <[email protected]>
Signed-off-by: Shameer Kolothum <[email protected]>
---
hw/pci/pci.c | 19 +++++++++++++++++++
include/hw/pci/pci.h | 16 ++++++++++++++++
target/arm/kvm.c | 2 +-
3 files changed, 36 insertions(+), 1 deletion(-)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 1315ef13ea..6f9e1616dd 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2964,6 +2964,25 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice
*dev)
return &address_space_memory;
}
+AddressSpace *pci_device_iommu_msi_address_space(PCIDevice *dev)
+{
+ PCIBus *bus;
+ PCIBus *iommu_bus;
+ int devfn;
+
+ pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn);
+ if (iommu_bus) {
+ if (iommu_bus->iommu_ops->get_msi_address_space) {
+ return iommu_bus->iommu_ops->get_msi_address_space(bus,
+ iommu_bus->iommu_opaque, devfn);
+ } else {
+ return iommu_bus->iommu_ops->get_address_space(bus,
+ iommu_bus->iommu_opaque, devfn);
+ }
+ }
+ return &address_space_memory;
+}
+
int pci_iommu_init_iotlb_notifier(PCIDevice *dev, IOMMUNotifier *n,
IOMMUNotify fn, void *opaque)
{
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index c54f2b53ae..0d3b351903 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -652,6 +652,21 @@ typedef struct PCIIOMMUOps {
uint32_t pasid, bool priv_req, bool exec_req,
hwaddr addr, bool lpig, uint16_t prgi, bool
is_read,
bool is_write);
+ /**
+ * @get_msi_address_space: get the address space for MSI doorbell address
+ * for devices
+ *
+ * Optional callback which returns a pointer to an #AddressSpace. This
+ * is required if MSI doorbell also gets translated through IOMMU(eg: ARM)
+ *
+ * @bus: the #PCIBus being accessed.
+ *
+ * @opaque: the data passed to pci_setup_iommu().
+ *
+ * @devfn: device and function number
+ */
+ AddressSpace * (*get_msi_address_space)(PCIBus *bus, void *opaque,
+ int devfn);
} PCIIOMMUOps;
bool pci_device_get_iommu_bus_devfn(PCIDevice *dev, PCIBus **piommu_bus,
@@ -660,6 +675,7 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice
*dev);
bool pci_device_set_iommu_device(PCIDevice *dev, HostIOMMUDevice *hiod,
Error **errp);
void pci_device_unset_iommu_device(PCIDevice *dev);
+AddressSpace *pci_device_iommu_msi_address_space(PCIDevice *dev);
/**
* pci_device_get_viommu_flags: get vIOMMU flags.
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index b8a1c071f5..10eb8655c6 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -1611,7 +1611,7 @@ int kvm_arm_set_irq(int cpu, int irqtype, int irq, int
level)
int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
uint64_t address, uint32_t data, PCIDevice *dev)
{
- AddressSpace *as = pci_device_iommu_address_space(dev);
+ AddressSpace *as = pci_device_iommu_msi_address_space(dev);
hwaddr xlat, len, doorbell_gpa;
MemoryRegionSection mrs;
MemoryRegion *mr;
--
2.43.0