From: Peter Xu <pet...@redhat.com>

This fix SID verification failure when IOMMU IR is enabled with PCI
bridges. Existing pci_requester_id() is more like getting BDF info
only. Renaming it to pci_get_bdf(). Meanwhile, we provide the correct
implementation to get requester ID. VT-d spec 5.1.1 is a good reference
to go, though it talks only about interrupt delivery, the rule works
exactly the same for non-interrupt cases.

Currently, there are three use cases for pci_requester_id():

- PCIX status bits: here we need BDF only, not requester ID. Replacing
  with pci_get_bdf().
- PCIe Error injection and MSI delivery: for both these cases, we are
  looking for requester IDs. Here we should use the new impl.

To avoid a PCI walk every time we send MSI message, one requester_id
cache field is added to PCIDevice to cache the result when initialize
PCI device.

Signed-off-by: Peter Xu <pet...@redhat.com>
Reviewed-by: Michael S. Tsirkin <m...@redhat.com>
Signed-off-by: Michael S. Tsirkin <m...@redhat.com>
Acked-by: Michael S. Tsirkin <m...@redhat.com>
Tested-by: Michael S. Tsirkin <m...@redhat.com>
Reviewed-by: Michael S. Tsirkin <m...@redhat.com>
Signed-off-by: Michael S. Tsirkin <m...@redhat.com>
---
 include/hw/pci/pci.h     | 26 +++++++++++++++--
 hw/i386/kvm/pci-assign.c |  2 +-
 hw/pci/pci.c             | 76 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 101 insertions(+), 3 deletions(-)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 4420f47..9ed1624 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -15,6 +15,7 @@
 #define PCI_DEVFN(slot, func)   ((((slot) & 0x1f) << 3) | ((func) & 0x07))
 #define PCI_SLOT(devfn)         (((devfn) >> 3) & 0x1f)
 #define PCI_FUNC(devfn)         ((devfn) & 0x07)
+#define PCI_BUILD_BDF(bus, devfn)     ((bus << 8) | (devfn))
 #define PCI_SLOT_MAX            32
 #define PCI_FUNC_MAX            8
 
@@ -230,6 +231,20 @@ typedef void (*MSIVectorPollNotifier)(PCIDevice *dev,
                                       unsigned int vector_start,
                                       unsigned int vector_end);
 
+enum PCIReqIDType {
+    PCI_REQ_ID_INVALID = 0,
+    PCI_REQ_ID_BDF,
+    PCI_REQ_ID_SECONDARY_BUS,
+    PCI_REQ_ID_MAX,
+};
+typedef enum PCIReqIDType PCIReqIDType;
+
+struct PCIReqIDCache {
+    PCIDevice *dev;
+    PCIReqIDType type;
+};
+typedef struct PCIReqIDCache PCIReqIDCache;
+
 struct PCIDevice {
     DeviceState qdev;
 
@@ -252,6 +267,11 @@ struct PCIDevice {
     /* the following fields are read only */
     PCIBus *bus;
     int32_t devfn;
+    /* Cached device to fetch requester ID from, to avoid the PCI
+     * tree walking every time we invoke PCI request (e.g.,
+     * MSI). For conventional PCI root complex, this field is
+     * meaningless. */
+    PCIReqIDCache requester_id_cache;
     char name[64];
     PCIIORegion io_regions[PCI_NUM_REGIONS];
     AddressSpace bus_master_as;
@@ -692,11 +712,13 @@ static inline uint32_t pci_config_size(const PCIDevice *d)
     return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : PCI_CONFIG_SPACE_SIZE;
 }
 
-static inline uint16_t pci_requester_id(PCIDevice *dev)
+static inline uint16_t pci_get_bdf(PCIDevice *dev)
 {
-    return (pci_bus_num(dev->bus) << 8) | dev->devfn;
+    return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn);
 }
 
+uint16_t pci_requester_id(PCIDevice *dev);
+
 /* DMA access functions */
 static inline AddressSpace *pci_get_address_space(PCIDevice *dev)
 {
diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index db2cbd2..bceed09 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -1482,7 +1482,7 @@ static int assigned_device_pci_cap_init(PCIDevice 
*pci_dev, Error **errp)
          * error bits, leave the rest. */
         status = pci_get_long(pci_dev->config + pos + PCI_X_STATUS);
         status &= ~(PCI_X_STATUS_BUS | PCI_X_STATUS_DEVFN);
-        status |= pci_requester_id(pci_dev);
+        status |= pci_get_bdf(pci_dev);
         status &= ~(PCI_X_STATUS_SPL_DISC | PCI_X_STATUS_UNX_SPL |
                     PCI_X_STATUS_SPL_ERR);
         pci_set_long(pci_dev->config + pos + PCI_X_STATUS, status);
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index bb605ef..87bea47 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -836,6 +836,81 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
     address_space_destroy(&pci_dev->bus_master_as);
 }
 
+/* Extract PCIReqIDCache into BDF format */
+static uint16_t pci_req_id_cache_extract(PCIReqIDCache *cache)
+{
+    uint8_t bus_n;
+    uint16_t result;
+
+    switch (cache->type) {
+    case PCI_REQ_ID_BDF:
+        result = pci_get_bdf(cache->dev);
+        break;
+    case PCI_REQ_ID_SECONDARY_BUS:
+        bus_n = pci_bus_num(cache->dev->bus);
+        result = PCI_BUILD_BDF(bus_n, 0);
+        break;
+    default:
+        error_printf("Invalid PCI requester ID cache type: %d\n",
+                     cache->type);
+        exit(1);
+        break;
+    }
+
+    return result;
+}
+
+/* Parse bridges up to the root complex and return requester ID
+ * cache for specific device.  For full PCIe topology, the cache
+ * result would be exactly the same as getting BDF of the device.
+ * However, several tricks are required when system mixed up with
+ * legacy PCI devices and PCIe-to-PCI bridges.
+ *
+ * Here we cache the proxy device (and type) not requester ID since
+ * bus number might change from time to time.
+ */
+static PCIReqIDCache pci_req_id_cache_get(PCIDevice *dev)
+{
+    PCIDevice *parent;
+    PCIReqIDCache cache = {
+        .dev = dev,
+        .type = PCI_REQ_ID_BDF,
+    };
+
+    while (!pci_bus_is_root(dev->bus)) {
+        /* We are under PCI/PCIe bridges */
+        parent = dev->bus->parent_dev;
+        if (pci_is_express(parent)) {
+            if (pcie_cap_get_type(parent) == PCI_EXP_TYPE_PCI_BRIDGE) {
+                /* When we pass through PCIe-to-PCI/PCIX bridges, we
+                 * override the requester ID using secondary bus
+                 * number of parent bridge with zeroed devfn
+                 * (pcie-to-pci bridge spec chap 2.3). */
+                cache.type = PCI_REQ_ID_SECONDARY_BUS;
+                cache.dev = dev;
+            }
+        } else {
+            /* Legacy PCI, override requester ID with the bridge's
+             * BDF upstream.  When the root complex connects to
+             * legacy PCI devices (including buses), it can only
+             * obtain requester ID info from directly attached
+             * devices.  If devices are attached under bridges, only
+             * the requester ID of the bridge that is directly
+             * attached to the root complex can be recognized. */
+            cache.type = PCI_REQ_ID_BDF;
+            cache.dev = parent;
+        }
+        dev = parent;
+    }
+
+    return cache;
+}
+
+uint16_t pci_requester_id(PCIDevice *dev)
+{
+    return pci_req_id_cache_extract(&dev->requester_id_cache);
+}
+
 /* -1 for devfn means auto assign */
 static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
                                          const char *name, int devfn,
@@ -885,6 +960,7 @@ static PCIDevice *do_pci_register_device(PCIDevice 
*pci_dev, PCIBus *bus,
     }
 
     pci_dev->devfn = devfn;
+    pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
     dma_as = pci_device_iommu_address_space(pci_dev);
 
     memory_region_init_alias(&pci_dev->bus_master_enable_region,
-- 
MST


Reply via email to