VT-d code currently makes use of pci_find_upstream_pcie_bridge() in
order to find the topology based alias of a device.  This function has
a few problems.  First, it doesn't check the entire alias path of the
device to the root bus, therefore if a PCIe device is masked upstream,
the wrong result is produced.  Also, it's known to get confused and
give up when it crosses a bridge from a conventional PCI bus to a PCIe
bus that lacks a PCIe capability.  The PCI-core provided DMA alias
support solves both of these problems and additionally adds support
for DMA function quirks allowing VT-d to work with devices like
Marvell and Ricoh with known broken requester IDs.

Signed-off-by: Alex Williamson <alex.william...@redhat.com>
Cc: David Woodhouse <david.woodho...@intel.com>
---
 drivers/iommu/intel-iommu.c         |  228 ++++++++++++++++-------------------
 drivers/iommu/intel_irq_remapping.c |   55 ++++++--
 2 files changed, 145 insertions(+), 138 deletions(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 5f0f352..c4f11c0 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -1840,54 +1840,56 @@ static int domain_context_mapping_one(struct 
dmar_domain *domain,
        return 0;
 }
 
+struct domain_context_mapping_data {
+       struct dmar_domain *domain;
+       struct intel_iommu *iommu;
+       int translation;
+};
+
+static int domain_context_mapping_cb(struct pci_dev *pdev,
+                                    u16 alias, void *opaque)
+{
+       struct domain_context_mapping_data *data = opaque;
+
+       return domain_context_mapping_one(data->domain, data->iommu,
+                                         PCI_BUS_NUM(alias), alias & 0xff,
+                                         data->translation);
+}
+
 static int
 domain_context_mapping(struct dmar_domain *domain, struct device *dev,
                       int translation)
 {
-       int ret;
-       struct pci_dev *pdev, *tmp, *parent;
        struct intel_iommu *iommu;
        u8 bus, devfn;
+       struct domain_context_mapping_data data;
 
        iommu = device_to_iommu(dev, &bus, &devfn);
        if (!iommu)
                return -ENODEV;
 
-       ret = domain_context_mapping_one(domain, iommu, bus, devfn,
-                                        translation);
-       if (ret || !dev_is_pci(dev))
-               return ret;
-
-       /* dependent device mapping */
-       pdev = to_pci_dev(dev);
-       tmp = pci_find_upstream_pcie_bridge(pdev);
-       if (!tmp)
-               return 0;
-       /* Secondary interface's bus number and devfn 0 */
-       parent = pdev->bus->self;
-       while (parent != tmp) {
-               ret = domain_context_mapping_one(domain, iommu,
-                                                parent->bus->number,
-                                                parent->devfn, translation);
-               if (ret)
-                       return ret;
-               parent = parent->bus->self;
-       }
-       if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
-               return domain_context_mapping_one(domain, iommu,
-                                       tmp->subordinate->number, 0,
-                                       translation);
-       else /* this is a legacy PCI bridge */
-               return domain_context_mapping_one(domain, iommu,
-                                                 tmp->bus->number,
-                                                 tmp->devfn,
+       if (!dev_is_pci(dev))
+               return domain_context_mapping_one(domain, iommu, bus, devfn,
                                                  translation);
+
+       data.domain = domain;
+       data.iommu = iommu;
+       data.translation = translation;
+
+       return pci_for_each_dma_alias(to_pci_dev(dev),
+                                     &domain_context_mapping_cb, &data);
+}
+
+static int domain_context_mapped_cb(struct pci_dev *pdev,
+                                   u16 alias, void *opaque)
+{
+       struct intel_iommu *iommu = opaque;
+
+       return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
 }
 
 static int domain_context_mapped(struct device *dev)
 {
-       int ret;
-       struct pci_dev *pdev, *tmp, *parent;
        struct intel_iommu *iommu;
        u8 bus, devfn;
 
@@ -1895,30 +1897,11 @@ static int domain_context_mapped(struct device *dev)
        if (!iommu)
                return -ENODEV;
 
-       ret = device_context_mapped(iommu, bus, devfn);
-       if (!ret || !dev_is_pci(dev))
-               return ret;
+       if (!dev_is_pci(dev))
+               return device_context_mapped(iommu, bus, devfn);
 
-       /* dependent device mapping */
-       pdev = to_pci_dev(dev);
-       tmp = pci_find_upstream_pcie_bridge(pdev);
-       if (!tmp)
-               return ret;
-       /* Secondary interface's bus number and devfn 0 */
-       parent = pdev->bus->self;
-       while (parent != tmp) {
-               ret = device_context_mapped(iommu, parent->bus->number,
-                                           parent->devfn);
-               if (!ret)
-                       return ret;
-               parent = parent->bus->self;
-       }
-       if (pci_is_pcie(tmp))
-               return device_context_mapped(iommu, tmp->subordinate->number,
-                                            0);
-       else
-               return device_context_mapped(iommu, tmp->bus->number,
-                                            tmp->devfn);
+       return !pci_for_each_dma_alias(to_pci_dev(dev),
+                                      domain_context_mapped_cb, iommu);
 }
 
 /* Returns a number of VTD pages, but aligned to MM page size */
@@ -2207,79 +2190,86 @@ static struct dmar_domain *dmar_insert_dev_info(struct 
intel_iommu *iommu,
        return domain;
 }
 
+static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
+{
+       *(u16 *)opaque = alias;
+       return 0;
+}
+
 /* domain is initialized */
 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
 {
-       struct dmar_domain *domain, *free = NULL;
-       struct intel_iommu *iommu = NULL;
+       struct dmar_domain *domain, *tmp;
+       struct intel_iommu *iommu;
        struct device_domain_info *info;
-       struct pci_dev *dev_tmp = NULL;
+       u16 dma_alias;
        unsigned long flags;
-       u8 bus, devfn, bridge_bus, bridge_devfn;
+       u8 bus, devfn;
 
        domain = find_domain(dev);
        if (domain)
                return domain;
 
+       iommu = device_to_iommu(dev, &bus, &devfn);
+       if (!iommu)
+               return NULL;
+
        if (dev_is_pci(dev)) {
                struct pci_dev *pdev = to_pci_dev(dev);
-               u16 segment;
 
-               segment = pci_domain_nr(pdev->bus);
-               dev_tmp = pci_find_upstream_pcie_bridge(pdev);
-               if (dev_tmp) {
-                       if (pci_is_pcie(dev_tmp)) {
-                               bridge_bus = dev_tmp->subordinate->number;
-                               bridge_devfn = 0;
-                       } else {
-                               bridge_bus = dev_tmp->bus->number;
-                               bridge_devfn = dev_tmp->devfn;
-                       }
-                       spin_lock_irqsave(&device_domain_lock, flags);
-                       info = dmar_search_domain_by_dev_info(segment,
-                                                             bridge_bus,
-                                                             bridge_devfn);
-                       if (info) {
-                               iommu = info->iommu;
-                               domain = info->domain;
-                       }
-                       spin_unlock_irqrestore(&device_domain_lock, flags);
-                       /* pcie-pci bridge already has a domain, uses it */
-                       if (info)
-                               goto found_domain;
+               pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
+
+               spin_lock_irqsave(&device_domain_lock, flags);
+               info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
+                                                     PCI_BUS_NUM(dma_alias),
+                                                     dma_alias & 0xff);
+               if (info) {
+                       iommu = info->iommu;
+                       domain = info->domain;
                }
-       }
+               spin_unlock_irqrestore(&device_domain_lock, flags);
 
-       iommu = device_to_iommu(dev, &bus, &devfn);
-       if (!iommu)
-               goto error;
+               /* DMA alias already has a domain, uses it */
+               if (info)
+                       goto found_domain;
+       }
 
        /* Allocate and initialize new domain for the device */
        domain = alloc_domain(false);
        if (!domain)
-               goto error;
+               return NULL;
+
        if (iommu_attach_domain(domain, iommu)) {
                free_domain_mem(domain);
-               domain = NULL;
-               goto error;
+               return NULL;
        }
-       free = domain;
-       if (domain_init(domain, gaw))
-               goto error;
 
-       /* register pcie-to-pci device */
-       if (dev_tmp) {
-               domain = dmar_insert_dev_info(iommu, bridge_bus, bridge_devfn,
-                                             NULL, domain);
+       if (domain_init(domain, gaw)) {
+               domain_exit(domain);
+               return NULL;
+       }
+
+       /* register PCI DMA alias device */
+       if (dev_is_pci(dev)) {
+               tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
+                                          dma_alias & 0xff, NULL, domain);
+
+               if (!tmp || tmp != domain) {
+                       domain_exit(domain);
+                       domain = tmp;
+               }
+
                if (!domain)
-                       goto error;
+                       return NULL;
        }
 
 found_domain:
-       domain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
-error:
-       if (free != domain)
-               domain_exit(free);
+       tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
+
+       if (!tmp || tmp != domain) {
+               domain_exit(domain);
+               domain = tmp;
+       }
 
        return domain;
 }
@@ -4029,33 +4019,27 @@ out_free_dmar:
        return ret;
 }
 
+static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
+{
+       struct intel_iommu *iommu = opaque;
+
+       iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
+       return 0;
+}
+
+/*
+ * NB - intel-iommu lacks any sort of reference counting for the users of
+ * dependent devices.  If multiple endpoints have intersecting dependent
+ * devices, unbinding the driver from any one of them will possibly leave
+ * the others unable to operate.
+ */
 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
                                           struct device *dev)
 {
-       struct pci_dev *tmp, *parent, *pdev;
-
        if (!iommu || !dev || !dev_is_pci(dev))
                return;
 
-       pdev = to_pci_dev(dev);
-
-       /* dependent device detach */
-       tmp = pci_find_upstream_pcie_bridge(pdev);
-       /* Secondary interface's bus number and devfn 0 */
-       if (tmp) {
-               parent = pdev->bus->self;
-               while (parent != tmp) {
-                       iommu_detach_dev(iommu, parent->bus->number,
-                                        parent->devfn);
-                       parent = parent->bus->self;
-               }
-               if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
-                       iommu_detach_dev(iommu,
-                               tmp->subordinate->number, 0);
-               else /* this is a legacy PCI bridge */
-                       iommu_detach_dev(iommu, tmp->bus->number,
-                                        tmp->devfn);
-       }
+       pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
 }
 
 static void domain_remove_one_dev_info(struct dmar_domain *domain,
diff --git a/drivers/iommu/intel_irq_remapping.c 
b/drivers/iommu/intel_irq_remapping.c
index 9b17489..757e0b0 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -369,29 +369,52 @@ static int set_hpet_sid(struct irte *irte, u8 id)
        return 0;
 }
 
+struct set_msi_sid_data {
+       struct pci_dev *pdev;
+       u16 alias;
+};
+
+static int set_msi_sid_cb(struct pci_dev *pdev, u16 alias, void *opaque)
+{
+       struct set_msi_sid_data *data = opaque;
+
+       data->pdev = pdev;
+       data->alias = alias;
+
+       return 0;
+}
+
 static int set_msi_sid(struct irte *irte, struct pci_dev *dev)
 {
-       struct pci_dev *bridge;
+       struct set_msi_sid_data data;
 
        if (!irte || !dev)
                return -1;
 
-       /* PCIe device or Root Complex integrated PCI device */
-       if (pci_is_pcie(dev) || !dev->bus->parent) {
-               set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
-                            (dev->bus->number << 8) | dev->devfn);
-               return 0;
-       }
+       pci_for_each_dma_alias(dev, set_msi_sid_cb, &data);
 
-       bridge = pci_find_upstream_pcie_bridge(dev);
-       if (bridge) {
-               if (pci_is_pcie(bridge))/* this is a PCIe-to-PCI/PCIX bridge */
-                       set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16,
-                               (bridge->bus->number << 8) | dev->bus->number);
-               else /* this is a legacy PCI bridge */
-                       set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
-                               (bridge->bus->number << 8) | bridge->devfn);
-       }
+       /*
+        * DMA alias provides us with a PCI device and alias.  The only case
+        * where the it will return an alias on a different bus than the
+        * device is the case of a PCIe-to-PCI bridge, where the alias is for
+        * the subordinate bus.  In this case we can only verify the bus.
+        *
+        * If the alias device is on a different bus than our source device
+        * then we have a topology based alias, use it.
+        *
+        * Otherwise, the alias is for a device DMA quirk and we cannot
+        * assume that MSI uses the same requester ID.  Therefore use the
+        * original device.
+        */
+       if (PCI_BUS_NUM(data.alias) != data.pdev->bus->number)
+               set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16,
+                            PCI_DEVID(PCI_BUS_NUM(data.alias),
+                                      dev->bus->number));
+       else if (data.pdev->bus->number != dev->bus->number)
+               set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, data.alias);
+       else
+               set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
+                            PCI_DEVID(dev->bus->number, dev->devfn));
 
        return 0;
 }

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to