From: Laurentiu Tudor <laurentiu.tu...@nxp.com>

In the current implementation, u-boot creates iommu mappings only
for PCI devices enumarated at boot time thus does not take into
account more dynamic scenarios such as SR-IOV or PCI hot-plug.
Add an u-boot env var and a device tree property (to be used for
example in more static scenarios such as hardwired PCI endpoints
that get initialized later in the system setup) that would allow
two things:
 - for a SRIOV capable PCI EP identified by its B.D.F specify
   the maximum number of VFs that will ever be created for it
 - for hot-plug case, specify the B.D.F with which the device
   will show up on the PCI bus
More details can be found in the included documentation:
  arch/arm/cpu/armv8/fsl-layerscape/doc/README.pci_iommu_extra

Signed-off-by: Laurentiu Tudor <laurentiu.tu...@nxp.com>
---
 .../fsl-layerscape/doc/README.pci_iommu_extra |  67 ++++
 drivers/pci/Kconfig                           |  12 +
 drivers/pci/pcie_layerscape_fixup.c           | 328 ++++++++++++++++++
 3 files changed, 407 insertions(+)
 create mode 100644 arch/arm/cpu/armv8/fsl-layerscape/doc/README.pci_iommu_extra

diff --git a/arch/arm/cpu/armv8/fsl-layerscape/doc/README.pci_iommu_extra 
b/arch/arm/cpu/armv8/fsl-layerscape/doc/README.pci_iommu_extra
new file mode 100644
index 0000000000..43db4d8e94
--- /dev/null
+++ b/arch/arm/cpu/armv8/fsl-layerscape/doc/README.pci_iommu_extra
@@ -0,0 +1,67 @@
+#
+# Copyright 2020 NXP
+#
+# SPDX-License-Identifier:      GPL-2.0+
+#
+
+Specifying extra IOMMU mappings for PCI controllers
+
+This feature can be enabled through the PCI_IOMMU_EXTRA_MAPPINGS Kconfig 
option.
+
+The "pci_iommu_extra" env var or "pci-iommu-extra" device tree property (to be
+used for example in more static scenarios such as hardwired PCI endpoints that
+get initialized later in the system setup) allows two things:
+ - for a SRIOV capable PCI EP identified by its B.D.F specify the maximum 
number
+   of VFs that will ever be created for it
+ - for hot-plug case, specify the B.D.F with which the device will show up on
+   the PCI bus
+
+The env var consists of a list of <bdf>,<action> pairs for a certain pci bus
+identified by its controller's base register address, as defined in the "reg"
+property in the device tree.
+
+pci_iommu_extra = pci@<addr1>,<bdf>,<action>,<bdf>,<action>,
+                 pci@<addr2>,<bdf>,<action>,<bdf>,<action>,...
+
+where:
+ <addr> is the base register address of the pci controller for which the
+        subsequent <bdf>,<action> pairs apply
+ <bdf> identifies to which B.D.F the action applies to
+ <action> can be:
+    - "vfs=<number>" to specify that for the PCI EP identified previously by
+      the <bdf> to include mappings for <number> of VFs.
+      The variant "noari_vfs=<number>" is available to disable taking ARI into
+      account.
+    - "hp" to specify that on this <bdf> there will be a hot-plugged device so
+      it needs a mapping
+The device tree property must be placed under the correct pci controller node
+and only the bdf and action pairs need to be specified, like this:
+
+pci-iommu-extra = "<bdf>,<action>,<bdf>,<action>,...";
+
+Note: the env var has priority over the device tree property.
+
+For example, given this configuration on bus 6:
+
+=> pci 6
+Scanning PCI devices on bus 6
+BusDevFun  VendorId   DeviceId   Device Class       Sub-Class
+_____________________________________________________________
+06.00.00   0x8086     0x1572     Network controller      0x00
+06.00.01   0x8086     0x1572     Network controller      0x00
+
+The following u-boot env var will create iommu mappings for 3 VFs for each PF:
+
+=> setenv pci_iommu_extra pci@0x3800000,6.0.0,vfs=3,6.0.1,vfs=3
+
+For the device tree case, this would be specified like this:
+
+pci-iommu-extra = "6.0.0,vfs=3,6.0.1,vfs=3";
+
+To add an iommu mapping for a hot-plugged device, please see following example:
+
+=> setenv pci_iommu_extra pci@0x3800000,2.16.0,hp
+
+For the device tree case, this would be specified like this:
+
+pci-iommu-extra = "2.16.0,hp";
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index dd1cc65229..af92784950 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -179,6 +179,18 @@ config PCIE_LAYERSCAPE_RC
          configured to Root Complex mode by clearing the corresponding bit of
          RCW[HOST_AGT_PEX].
 
+config PCI_IOMMU_EXTRA_MAPPINGS
+       bool "Support for specifying extra IOMMU mappings for PCI"
+       depends on PCIE_LAYERSCAPE_RC
+       help
+         Enable support for specifying extra IOMMU mappings for PCI
+         controllers through a special env var called "pci_iommu_extra" or
+         through a device tree property named "pci-iommu-extra" placed in
+         the node describing the PCI controller.
+         The intent is to cover SR-IOV scenarios which need mappings for VFs
+         and PCI hot-plug scenarios. More documentation can be found under:
+           arch/arm/cpu/armv8/fsl-layerscape/doc/README.pci_iommu_extra
+
 config PCIE_LAYERSCAPE_EP
        bool "Layerscape PCIe Endpoint mode support"
        depends on DM_PCI
diff --git a/drivers/pci/pcie_layerscape_fixup.c 
b/drivers/pci/pcie_layerscape_fixup.c
index fdbfd82740..8db7c6a4c4 100644
--- a/drivers/pci/pcie_layerscape_fixup.c
+++ b/drivers/pci/pcie_layerscape_fixup.c
@@ -19,6 +19,8 @@
 #ifdef CONFIG_ARM
 #include <asm/arch/clock.h>
 #endif
+#include <malloc.h>
+#include <env.h>
 #include "pcie_layerscape.h"
 #include "pcie_layerscape_fixup_common.h"
 
@@ -214,11 +216,292 @@ static int fdt_fixup_pcie_device_ls(void *blob, 
pci_dev_t bdf,
        return 0;
 }
 
+struct extra_iommu_entry {
+       int action;
+       pci_dev_t bdf;
+       int num_vfs;
+       bool noari;
+};
+
+#define EXTRA_IOMMU_ENTRY_HOTPLUG      1
+#define EXTRA_IOMMU_ENTRY_VFS          2
+
+static struct extra_iommu_entry *get_extra_iommu_ents(void *blob,
+                                                     int nodeoffset,
+                                                     phys_addr_t addr,
+                                                     int *cnt)
+{
+       const char *s, *p, *tok;
+       struct extra_iommu_entry *entries;
+       int i = 0, b, d, f;
+
+       /*
+        * Retrieve extra IOMMU configuration from env var or from device tree.
+        * Env var is given priority.
+        */
+       s = env_get("pci_iommu_extra");
+       if (!s) {
+               s = fdt_getprop(blob, nodeoffset, "pci-iommu-extra", NULL);
+       } else {
+               phys_addr_t pci_base;
+               char *endp;
+
+               /*
+                * In env var case the config string has "pci@0x..." in
+                * addition. Parse this part and match it by address against
+                * the input pci controller's registers base address.
+                */
+               tok = s;
+               p = strchrnul(s + 1, ',');
+               s = NULL;
+               do {
+                       if (!strncmp(tok, "pci", 3)) {
+                               pci_base = simple_strtoul(tok  + 4, &endp, 0);
+                               if (pci_base == addr) {
+                                       s = endp + 1;
+                                       break;
+                               }
+                       }
+                       p = strchrnul(p + 1, ',');
+                       tok = p + 1;
+               } while (*p);
+       }
+
+       /*
+        * If no env var or device tree property found or pci register base
+        * address mismatches, bail out
+        */
+       if (!s)
+               return NULL;
+
+       /*
+        * In order to find how many action entries to allocate, count number
+        * of actions by interating through the pairs of bdfs and actions.
+        */
+       *cnt = 0;
+       p = s;
+       while (*p && strncmp(p, "pci", 3)) {
+               if (*p == ',')
+                       (*cnt)++;
+               p++;
+       }
+       if (!(*p))
+               (*cnt)++;
+
+       if (!(*cnt) || (*cnt) % 2) {
+               printf("ERROR: invalid or odd extra iommu token count %d\n",
+                      *cnt);
+               return NULL;
+       }
+       *cnt = (*cnt) / 2;
+
+       entries = malloc((*cnt) * sizeof(*entries));
+       if (!entries) {
+               printf("ERROR: fail to allocate extra iommu entries\n");
+               return NULL;
+       }
+
+       /*
+        * Parse action entries one by one and store the information in the
+        * newly allocated actions array.
+        */
+       p = s;
+       while (p) {
+               /* Extract BDF */
+               b = simple_strtoul(p, (char **)&p, 0); p++;
+               d = simple_strtoul(p, (char **)&p, 0); p++;
+               f = simple_strtoul(p, (char **)&p, 0); p++;
+               entries[i].bdf = PCI_BDF(b, d, f);
+
+               /* Parse action */
+               if (!strncmp(p, "hp", 2)) {
+                       /* Hot-plug entry */
+                       entries[i].action = EXTRA_IOMMU_ENTRY_HOTPLUG;
+                       p += 2;
+               } else if (!strncmp(p, "vfs", 3) ||
+                          !strncmp(p, "noari_vfs", 9)) {
+                       /* VFs or VFs with ARI disabled entry */
+                       entries[i].action = EXTRA_IOMMU_ENTRY_VFS;
+                       entries[i].noari = !strncmp(p, "noari_vfs", 9);
+
+                       /*
+                        * Parse and store total number of VFs to allocate
+                        * IOMMU entries for.
+                        */
+                       p = strchr(p, '=');
+                       entries[i].num_vfs = simple_strtoul(p + 1, (char **)&p,
+                                                           0);
+                       if (*p)
+                               p++;
+               } else {
+                       printf("ERROR: invalid action in extra iommu entry\n");
+                       free(entries);
+
+                       return NULL;
+               }
+
+               if (!(*p) || !strncmp(p, "pci", 3))
+                       break;
+
+               i++;
+       }
+
+       return entries;
+}
+
+static void get_vf_offset_and_stride(struct udevice *dev, int sriov_pos,
+                                    struct extra_iommu_entry *entry,
+                                    u16 *offset, u16 *stride)
+{
+       u16 tmp16;
+       u32 tmp32;
+       bool have_ari = false;
+       int pos;
+       struct udevice *pf_dev;
+
+       dm_pci_read_config16(dev, sriov_pos + PCI_SRIOV_TOTAL_VF, &tmp16);
+       if (entry->num_vfs > tmp16) {
+               printf("WARN: requested no. of VFs %d exceeds total of %d\n",
+                      entry->num_vfs, tmp16);
+       }
+
+       /*
+        * The code below implements the VF Discovery recomandations specified
+        * in PCIe base spec "9.2.1.2 VF Discovery", quoted below:
+        *
+        * VF Discovery
+        *
+        * The First VF Offset and VF Stride fields in the SR-IOV extended
+        * capability are 16-bit Routing ID offsets. These offsets are used to
+        * compute the Routing IDs for the VFs with the following restrictions:
+        *  - The value in NumVFs in a PF (Section 9.3.3.7) may affect the
+        *    values in First VF Offset (Section 9.3.3.9) and VF Stride
+        *    (Section 9.3.3.10) of that PF.
+        *  - The value in ARI Capable Hierarchy (Section 9.3.3.3.5) in the
+        *    lowest-numbered PF of the Device (for example PF0) may affect
+        *    the values in First VF Offset and VF Stride in all PFs of the
+        *    Device.
+        *  - NumVFs of a PF may only be changed when VF Enable
+        *    (Section 9.3.3.3.1) of that PF is Clear.
+        *  - ARI Capable Hierarchy (Section 9.3.3.3.5) may only be changed
+        *    when VF Enable is Clear in all PFs of a Device.
+        */
+
+       /* Clear VF enable for all PFs */
+       device_foreach_child(pf_dev, dev->parent) {
+               dm_pci_read_config16(pf_dev, sriov_pos + PCI_SRIOV_CTRL,
+                                    &tmp16);
+               tmp16 &= ~PCI_SRIOV_CTRL_VFE;
+               dm_pci_write_config16(pf_dev, sriov_pos + PCI_SRIOV_CTRL,
+                                     tmp16);
+       }
+
+       /* Obtain a reference to PF0 device */
+       if (dm_pci_bus_find_bdf(PCI_BDF(PCI_BUS(entry->bdf),
+                                       PCI_DEV(entry->bdf), 0), &pf_dev)) {
+               printf("WARN: failed to get PF0\n");
+       }
+
+       if (entry->noari)
+               goto skip_ari;
+
+       /* Check that connected downstream port supports ARI Forwarding */
+       pos = dm_pci_find_capability(dev->parent, PCI_CAP_ID_EXP);
+       dm_pci_read_config32(dev->parent, pos + PCI_EXP_DEVCAP2, &tmp32);
+       if (!(tmp32 & PCI_EXP_DEVCAP2_ARI))
+               goto skip_ari;
+
+       /* Check that PF supports Alternate Routing ID */
+       if (!dm_pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ARI))
+               goto skip_ari;
+
+       /* Set ARI Capable Hierarcy for PF0 */
+       dm_pci_read_config16(pf_dev, sriov_pos + PCI_SRIOV_CTRL, &tmp16);
+       tmp16 |= PCI_SRIOV_CTRL_ARI;
+       dm_pci_write_config16(pf_dev, sriov_pos + PCI_SRIOV_CTRL, tmp16);
+       have_ari = true;
+
+skip_ari:
+       if (!have_ari) {
+               /*
+                * No ARI support or disabled so clear ARI Capable Hierarcy
+                * for PF0
+                */
+               dm_pci_read_config16(pf_dev, sriov_pos + PCI_SRIOV_CTRL,
+                                    &tmp16);
+               tmp16 &= ~PCI_SRIOV_CTRL_ARI;
+               dm_pci_write_config16(pf_dev, sriov_pos + PCI_SRIOV_CTRL,
+                                     tmp16);
+       }
+
+       /* Set requested number of VFs */
+       dm_pci_write_config16(dev, sriov_pos + PCI_SRIOV_NUM_VF,
+                             entry->num_vfs);
+
+       /* Read VF stride and offset with the configs just made */
+       dm_pci_read_config16(dev, sriov_pos + PCI_SRIOV_VF_OFFSET, offset);
+       dm_pci_read_config16(dev, sriov_pos + PCI_SRIOV_VF_STRIDE, stride);
+
+       if (have_ari) {
+               /* Reset to default ARI Capable Hierarcy bit for PF0 */
+               dm_pci_read_config16(pf_dev, sriov_pos + PCI_SRIOV_CTRL,
+                                    &tmp16);
+               tmp16 &= ~PCI_SRIOV_CTRL_ARI;
+               dm_pci_write_config16(pf_dev, sriov_pos + PCI_SRIOV_CTRL,
+                                     tmp16);
+       }
+       /* Reset to default the number of VFs */
+       dm_pci_write_config16(dev, sriov_pos + PCI_SRIOV_NUM_VF, 0);
+}
+
+static int fdt_fixup_pci_vfs(void *blob, struct extra_iommu_entry *entry,
+                            struct ls_pcie_rc *pcie_rc)
+{
+       struct udevice *dev, *bus;
+       u16 vf_offset, vf_stride;
+       int i, sriov_pos;
+       pci_dev_t bdf;
+
+       if (dm_pci_bus_find_bdf(entry->bdf, &dev)) {
+               printf("ERROR: BDF %d.%d.%d not found\n", PCI_BUS(entry->bdf),
+                      PCI_DEV(entry->bdf), PCI_FUNC(entry->bdf));
+               return 0;
+       }
+
+       sriov_pos = dm_pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV);
+       if (!sriov_pos) {
+               printf("WARN: trying to set VFs on non-SRIOV dev\n");
+               return 0;
+       }
+
+       get_vf_offset_and_stride(dev, sriov_pos, entry, &vf_offset, &vf_stride);
+
+       for (bus = dev; device_is_on_pci_bus(bus);)
+               bus = bus->parent;
+
+       bdf = entry->bdf - PCI_BDF(bus->seq, 0, 0) + (vf_offset << 8);
+
+       for (i = 0; i < entry->num_vfs; i++) {
+               if (fdt_fixup_pcie_device_ls(blob, bdf, pcie_rc) < 0)
+                       return -1;
+               bdf += vf_stride << 8;
+       }
+
+       printf("Added %d iommu VF mappings for PF %d.%d.%d\n",
+              entry->num_vfs, PCI_BUS(entry->bdf),
+              PCI_DEV(entry->bdf), PCI_FUNC(entry->bdf));
+
+       return 0;
+}
+
 static void fdt_fixup_pcie_ls(void *blob)
 {
        struct udevice *dev, *bus;
        struct ls_pcie_rc *pcie_rc;
        pci_dev_t bdf;
+       struct extra_iommu_entry *entries;
+       int i, cnt, nodeoffset;
+
 
        /* Scan all known buses */
        for (pci_find_first_device(&dev);
@@ -234,6 +517,51 @@ static void fdt_fixup_pcie_ls(void *blob)
                if (fdt_fixup_pcie_device_ls(blob, bdf, pcie_rc) < 0)
                        break;
        }
+
+       if (!IS_ENABLED(CONFIG_PCI_IOMMU_EXTRA_MAPPINGS))
+               goto skip;
+
+       list_for_each_entry(pcie_rc, &ls_pcie_list, list) {
+               nodeoffset = fdt_pcie_get_nodeoffset(blob, pcie_rc);
+               if (nodeoffset < 0) {
+                       printf("ERROR: couldn't find pci node\n");
+                       continue;
+               }
+
+               entries = get_extra_iommu_ents(blob, nodeoffset,
+                                              pcie_rc->dbi_res.start, &cnt);
+               if (!entries)
+                       continue;
+
+               for (i = 0; i < cnt; i++) {
+                       if (entries[i].action == EXTRA_IOMMU_ENTRY_HOTPLUG) {
+                               bdf = entries[i].bdf;
+                               printf("Added iommu map for hotplug %d.%d.%d\n",
+                                      PCI_BUS(bdf), PCI_DEV(bdf),
+                                      PCI_FUNC(bdf));
+                               if (fdt_fixup_pcie_device_ls(blob, bdf,
+                                                            pcie_rc) < 0) {
+                                       free(entries);
+                                       return;
+                               }
+                       } else if (entries[i].action == EXTRA_IOMMU_ENTRY_VFS) {
+                               if (fdt_fixup_pci_vfs(blob, &entries[i],
+                                                     pcie_rc) < 0) {
+                                       free(entries);
+                                       return;
+                               }
+                       } else {
+                               printf("Invalid action %d for BDF %d.%d.%d\n",
+                                      entries[i].action,
+                                      PCI_BUS(entries[i].bdf),
+                                      PCI_DEV(entries[i].bdf),
+                                      PCI_FUNC(entries[i].bdf));
+                       }
+               }
+               free(entries);
+       }
+
+skip:
        pcie_board_fix_fdt(blob);
 }
 #endif
-- 
2.17.1

Reply via email to