> -----Original Message-----
> From: Eric Auger <[email protected]>
> Sent: 06 November 2025 07:43
> To: Jason Gunthorpe <[email protected]>; Nicolin Chen <[email protected]>
> Cc: Shameer Kolothum <[email protected]>; qemu-
> [email protected]; [email protected]; [email protected];
> [email protected]; [email protected]; Nathan Chen
> <[email protected]>; Matt Ochs <[email protected]>;
> [email protected]; [email protected];
> [email protected]; [email protected];
> [email protected]; [email protected]; [email protected];
> Krishnakant Jaju <[email protected]>
> Subject: Re: [PATCH v5 15/32] hw/pci/pci: Introduce optional
> get_msi_address_space() callback
> 
> External email: Use caution opening links or attachments
> 
> 
> On 11/5/25 7:58 PM, Jason Gunthorpe wrote:
> > On Wed, Nov 05, 2025 at 10:33:08AM -0800, Nicolin Chen wrote:
> >> On Wed, Nov 05, 2025 at 02:10:49PM -0400, Jason Gunthorpe wrote:
> >>> On Wed, Nov 05, 2025 at 06:25:05PM +0100, Eric Auger wrote:
> >>>> if the guest doorbell address is wrong because not properly translated,
> >>>> vgic_msi_to_its() will fail to identify the ITS to inject the MSI in.
> >>>> See kernel kvm/vgic/vgic-its.c vgic_msi_to_its and
> >>>> vgic_its_inject_msi
> >>> Which has been exactly my point to Nicolin. There is no way to
> >>> "properly translate" the vMSI address in a HW accelerated SMMU
> >>> emulation.
> >> Hmm, I still can't connect the dots here. QEMU knows where the
> >> guest CD table is to get the stage-1 translation table to walk
> >> through. We could choose to not let it walk through. Yet, why?
> > You cannot walk any tables in guest memory without fully trapping all
> > invalidation on all command queues. Like real HW qemu needs to fence
> > its walks with any concurrent invalidate & sync to ensure it doesn't
> > walk into a UAF situation.
> But at the moment we do trap IOTLB invalidates so logically we can still
> do the translate in that config. The problem you describe will show up
> with vCMDQ which is not part of this series.
> >
> > Since we can't trap or mediate vCMDQ the walking simply cannot be
> > done.
> >
> > Thus, the general principle of the HW accelerated vSMMU is that it
> > NEVER walks any of these guest tables for any reason.
> >
> > Thus, we cannot do anything with vMSI address beyond program it
> > directly into a real PCI device so it undergoes real HW translation.
> But anyway you need to provide KVM a valid info about the guest doorbell
> for this latter to setup irqfd gsi routing and also program ITS
> translation tables. At the moment we have a single vITS in qemu so maybe
> we can cheat.

I have tried to address the “translate” issue below. This introduces a new
get_msi_address() callback to retrieve the MSI doorbell address directly
from the vIOMMU, so we can drop the existing get_msi_address_space() logic.
Please take a look and let me know your thoughts.

Thanks,
Shameer

---
 hw/arm/smmuv3-accel.c   | 10 ++++++++++
 hw/arm/smmuv3.c         |  1 +
 hw/arm/virt.c           |  4 ++++
 hw/pci/pci.c            | 17 +++++++++++++++++
 include/hw/arm/smmuv3.h |  1 +
 include/hw/pci/pci.h    | 15 +++++++++++++++
 target/arm/kvm.c        | 14 ++++++++++++--
 7 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/hw/arm/smmuv3-accel.c b/hw/arm/smmuv3-accel.c
index e6c81c4786..8b2a45a915 100644
--- a/hw/arm/smmuv3-accel.c
+++ b/hw/arm/smmuv3-accel.c
@@ -667,6 +667,15 @@ static void smmuv3_accel_unset_iommu_device(PCIBus *bus, 
void *opaque,
     }
 }

+static uint64_t smmuv3_accel_get_msi_address(PCIBus *bus, void *opaque,
+                                             int devfn)
+{
+    SMMUState *bs = opaque;
+    SMMUv3State *s = ARM_SMMUV3(bs);
+
+    g_assert(s->msi_doorbell);
+    return s->msi_doorbell;
+}
 static AddressSpace *smmuv3_accel_get_msi_as(PCIBus *bus, void *opaque,
                                              int devfn)
 {
@@ -788,6 +797,7 @@ static const PCIIOMMUOps smmuv3_accel_ops = {
     .set_iommu_device = smmuv3_accel_set_iommu_device,
     .unset_iommu_device = smmuv3_accel_unset_iommu_device,
     .get_msi_address_space = smmuv3_accel_get_msi_as,
+    .get_msi_address = smmuv3_accel_get_msi_address,
 };

 void smmuv3_accel_idr_override(SMMUv3State *s)
diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
index 43d297698b..3f2ee8bcce 100644
--- a/hw/arm/smmuv3.c
+++ b/hw/arm/smmuv3.c
@@ -2120,6 +2120,7 @@ static const Property smmuv3_properties[] = {
     DEFINE_PROP_BOOL("ats", SMMUv3State, ats, false),
     DEFINE_PROP_UINT8("oas", SMMUv3State, oas, 44),
     DEFINE_PROP_BOOL("pasid", SMMUv3State, pasid, false),
+    DEFINE_PROP_UINT64("msi-doorbell", SMMUv3State, msi_doorbell, 0),
 };

 static void smmuv3_instance_init(Object *obj)
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 2498e3beff..d2dcb89235 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -3097,6 +3097,8 @@ static void virt_machine_device_plug_cb(HotplugHandler 
*hotplug_dev,

             create_smmuv3_dev_dtb(vms, dev, bus, errp);
             if (object_property_get_bool(OBJECT(dev), "accel", &error_abort)) {
+                hwaddr db_start = base_memmap[VIRT_GIC_ITS].base +
+                                  ITS_TRANS_SIZE + GITS_TRANSLATER;
                 char *stage;
                 stage = object_property_get_str(OBJECT(dev), "stage",
                                                 &error_fatal);
@@ -3107,6 +3109,8 @@ static void virt_machine_device_plug_cb(HotplugHandler 
*hotplug_dev,
                     return;
                 }
                 vms->pci_preserve_config = true;
+                object_property_set_uint(OBJECT(dev), "msi-doorbell", db_start,
+                                         &error_abort);
             }
         }
     }
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 1edd711247..45e79a3c23 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -2982,6 +2982,23 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice 
*dev)
     return &address_space_memory;
 }

+bool pci_device_iommu_msi_direct_address(PCIDevice *dev, hwaddr *out_doorbell)
+{
+    PCIBus *bus;
+    PCIBus *iommu_bus;
+    int devfn;
+
+    pci_device_get_iommu_bus_devfn(dev, &iommu_bus, &bus, &devfn);
+    if (iommu_bus) {
+        if (iommu_bus->iommu_ops->get_msi_address) {
+            *out_doorbell = iommu_bus->iommu_ops->get_msi_address(bus,
+                                 iommu_bus->iommu_opaque, devfn);
+            return true;
+        }
+    }
+    return false;
+}
+
 AddressSpace *pci_device_iommu_msi_address_space(PCIDevice *dev)
 {
     PCIBus *bus;
diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
index ee0b5ed74f..f50d8c72bd 100644
--- a/include/hw/arm/smmuv3.h
+++ b/include/hw/arm/smmuv3.h
@@ -72,6 +72,7 @@ struct SMMUv3State {
     bool ats;
     uint8_t oas;
     bool pasid;
+    uint64_t msi_doorbell;
 };

 typedef enum {
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index b731443c67..e1709b0bfe 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -679,6 +679,20 @@ typedef struct PCIIOMMUOps {
      */
     AddressSpace * (*get_msi_address_space)(PCIBus *bus, void *opaque,
                                             int devfn);
+    /**
+     * @get_msi_address: get the address of MSI doorbell for the device
+     * on a PCI bus.
+     *
+     * Optional callback, if implemented must return a valid MSI doorbell
+     * address.
+     *
+     * @bus: the #PCIBus being accessed.
+     *
+     * @opaque: the data passed to pci_setup_iommu().
+     *
+     * @devfn: device and function number
+     */
+    uint64_t (*get_msi_address)(PCIBus *bus, void *opaque, int devfn);
 } PCIIOMMUOps;

 bool pci_device_get_iommu_bus_devfn(PCIDevice *dev, PCIBus **piommu_bus,
@@ -688,6 +702,7 @@ bool pci_device_set_iommu_device(PCIDevice *dev, 
HostIOMMUDevice *hiod,
                                  Error **errp);
 void pci_device_unset_iommu_device(PCIDevice *dev);
 AddressSpace *pci_device_iommu_msi_address_space(PCIDevice *dev);
+bool pci_device_iommu_msi_direct_address(PCIDevice *dev, hwaddr *out_doorbell);

 /**
  * pci_device_get_viommu_flags: get vIOMMU flags.
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 0df41128d0..8d4d2be0bc 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -1611,35 +1611,45 @@ int kvm_arm_set_irq(int cpu, int irqtype, int irq, int 
level)
 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
                              uint64_t address, uint32_t data, PCIDevice *dev)
 {
-    AddressSpace *as = pci_device_iommu_msi_address_space(dev);
+    AddressSpace *as;
     hwaddr xlat, len, doorbell_gpa;
     MemoryRegionSection mrs;
     MemoryRegion *mr;

+    /* Check if there is a direct msi address available */
+    if (pci_device_iommu_msi_direct_address(dev, &doorbell_gpa)) {
+        goto set_doorbell;
+    }
+
+    as = pci_device_iommu_msi_address_space(dev);
     if (as == &address_space_memory) {
         return 0;
     }

     /* MSI doorbell address is translated by an IOMMU */

-    RCU_READ_LOCK_GUARD();
+    rcu_read_lock();

     mr = address_space_translate(as, address, &xlat, &len, true,
                                  MEMTXATTRS_UNSPECIFIED);

     if (!mr) {
+        rcu_read_unlock();
         return 1;
     }

     mrs = memory_region_find(mr, xlat, 1);

     if (!mrs.mr) {
+        rcu_read_unlock();
         return 1;
     }

     doorbell_gpa = mrs.offset_within_address_space;
     memory_region_unref(mrs.mr);
+    rcu_read_unlock();

+set_doorbell:
     route->u.msi.address_lo = doorbell_gpa;
     route->u.msi.address_hi = doorbell_gpa >> 32;

--






Reply via email to