From: Laurentiu Tudor <laurentiu.tu...@nxp.com> In the current implementation, u-boot creates iommu mappings only for PCI devices enumarated at boot time thus does not take into account more dynamic scenarios such as SR-IOV or PCI hot-plug. Add an u-boot env var and a device tree property (to be used for example in more static scenarios such as hardwired PCI endpoints that get initialized later in the system setup) that would allow two things: - for a SRIOV capable PCI EP identified by its B.D.F specify the maximum number of VFs that will ever be created for it - for hot-plug case, specify the B.D.F with which the device will show up on the PCI bus More details can be found in the included documentation: arch/arm/cpu/armv8/fsl-layerscape/doc/README.pci_iommu_extra
Signed-off-by: Laurentiu Tudor <laurentiu.tu...@nxp.com> --- .../fsl-layerscape/doc/README.pci_iommu_extra | 67 ++++ drivers/pci/Kconfig | 12 + drivers/pci/pcie_layerscape_fixup.c | 328 ++++++++++++++++++ 3 files changed, 407 insertions(+) create mode 100644 arch/arm/cpu/armv8/fsl-layerscape/doc/README.pci_iommu_extra diff --git a/arch/arm/cpu/armv8/fsl-layerscape/doc/README.pci_iommu_extra b/arch/arm/cpu/armv8/fsl-layerscape/doc/README.pci_iommu_extra new file mode 100644 index 0000000000..43db4d8e94 --- /dev/null +++ b/arch/arm/cpu/armv8/fsl-layerscape/doc/README.pci_iommu_extra @@ -0,0 +1,67 @@ +# +# Copyright 2020 NXP +# +# SPDX-License-Identifier: GPL-2.0+ +# + +Specifying extra IOMMU mappings for PCI controllers + +This feature can be enabled through the PCI_IOMMU_EXTRA_MAPPINGS Kconfig option. + +The "pci_iommu_extra" env var or "pci-iommu-extra" device tree property (to be +used for example in more static scenarios such as hardwired PCI endpoints that +get initialized later in the system setup) allows two things: + - for a SRIOV capable PCI EP identified by its B.D.F specify the maximum number + of VFs that will ever be created for it + - for hot-plug case, specify the B.D.F with which the device will show up on + the PCI bus + +The env var consists of a list of <bdf>,<action> pairs for a certain pci bus +identified by its controller's base register address, as defined in the "reg" +property in the device tree. + +pci_iommu_extra = pci@<addr1>,<bdf>,<action>,<bdf>,<action>, + pci@<addr2>,<bdf>,<action>,<bdf>,<action>,... + +where: + <addr> is the base register address of the pci controller for which the + subsequent <bdf>,<action> pairs apply + <bdf> identifies to which B.D.F the action applies to + <action> can be: + - "vfs=<number>" to specify that for the PCI EP identified previously by + the <bdf> to include mappings for <number> of VFs. + The variant "noari_vfs=<number>" is available to disable taking ARI into + account. + - "hp" to specify that on this <bdf> there will be a hot-plugged device so + it needs a mapping +The device tree property must be placed under the correct pci controller node +and only the bdf and action pairs need to be specified, like this: + +pci-iommu-extra = "<bdf>,<action>,<bdf>,<action>,..."; + +Note: the env var has priority over the device tree property. + +For example, given this configuration on bus 6: + +=> pci 6 +Scanning PCI devices on bus 6 +BusDevFun VendorId DeviceId Device Class Sub-Class +_____________________________________________________________ +06.00.00 0x8086 0x1572 Network controller 0x00 +06.00.01 0x8086 0x1572 Network controller 0x00 + +The following u-boot env var will create iommu mappings for 3 VFs for each PF: + +=> setenv pci_iommu_extra pci@0x3800000,6.0.0,vfs=3,6.0.1,vfs=3 + +For the device tree case, this would be specified like this: + +pci-iommu-extra = "6.0.0,vfs=3,6.0.1,vfs=3"; + +To add an iommu mapping for a hot-plugged device, please see following example: + +=> setenv pci_iommu_extra pci@0x3800000,2.16.0,hp + +For the device tree case, this would be specified like this: + +pci-iommu-extra = "2.16.0,hp"; diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 5e0a39396b..b31254bf41 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -141,6 +141,18 @@ config PCIE_LAYERSCAPE_RC configured to Root Complex mode by clearing the corresponding bit of RCW[HOST_AGT_PEX]. +config PCI_IOMMU_EXTRA_MAPPINGS + bool "Support for specifying extra IOMMU mappings for PCI" + depends on PCIE_LAYERSCAPE_RC + help + Enable support for specifying extra IOMMU mappings for PCI + controllers through a special env var called "pci_iommu_extra" or + through a device tree property named "pci-iommu-extra" placed in + the node describing the PCI controller. + The intent is to cover SR-IOV scenarios which need mappings for VFs + and PCI hot-plug scenarios. More documentation can be found under: + arch/arm/cpu/armv8/fsl-layerscape/doc/README.pci_iommu_extra + config PCIE_LAYERSCAPE_EP bool "Layerscape PCIe Endpoint mode support" depends on DM_PCI diff --git a/drivers/pci/pcie_layerscape_fixup.c b/drivers/pci/pcie_layerscape_fixup.c index 9e93841ccd..d14b4a4228 100644 --- a/drivers/pci/pcie_layerscape_fixup.c +++ b/drivers/pci/pcie_layerscape_fixup.c @@ -19,6 +19,8 @@ #ifdef CONFIG_ARM #include <asm/arch/clock.h> #endif +#include <malloc.h> +#include <env.h> #include "pcie_layerscape.h" #include "pcie_layerscape_fixup_common.h" @@ -214,11 +216,292 @@ static int fdt_fixup_pcie_device_ls(void *blob, pci_dev_t bdf, return 0; } +struct extra_iommu_entry { + int action; + pci_dev_t bdf; + int num_vfs; + bool noari; +}; + +#define EXTRA_IOMMU_ENTRY_HOTPLUG 1 +#define EXTRA_IOMMU_ENTRY_VFS 2 + +static struct extra_iommu_entry *get_extra_iommu_ents(void *blob, + int nodeoffset, + phys_addr_t addr, + int *cnt) +{ + const char *s, *p, *tok; + struct extra_iommu_entry *entries; + int i = 0, b, d, f; + + /* + * Retrieve extra IOMMU configuration from env var or from device tree. + * Env var is given priority. + */ + s = env_get("pci_iommu_extra"); + if (!s) { + s = fdt_getprop(blob, nodeoffset, "pci-iommu-extra", NULL); + } else { + phys_addr_t pci_base; + char *endp; + + /* + * In env var case the config string has "pci@0x..." in + * addition. Parse this part and match it by address against + * the input pci controller's registers base address. + */ + tok = s; + p = strchrnul(s + 1, ','); + s = NULL; + do { + if (!strncmp(tok, "pci", 3)) { + pci_base = simple_strtoul(tok + 4, &endp, 0); + if (pci_base == addr) { + s = endp + 1; + break; + } + } + p = strchrnul(p + 1, ','); + tok = p + 1; + } while (*p); + } + + /* + * If no env var or device tree property found or pci register base + * address mismatches, bail out + */ + if (!s) + return NULL; + + /* + * In order to find how many action entries to allocate, count number + * of actions by interating through the pairs of bdfs and actions. + */ + *cnt = 0; + p = s; + while (*p && strncmp(p, "pci", 3)) { + if (*p == ',') + (*cnt)++; + p++; + } + if (!(*p)) + (*cnt)++; + + if (!(*cnt) || (*cnt) % 2) { + printf("ERROR: invalid or odd extra iommu token count %d\n", + *cnt); + return NULL; + } + *cnt = (*cnt) / 2; + + entries = malloc((*cnt) * sizeof(*entries)); + if (!entries) { + printf("ERROR: fail to allocate extra iommu entries\n"); + return NULL; + } + + /* + * Parse action entries one by one and store the information in the + * newly allocated actions array. + */ + p = s; + while (p) { + /* Extract BDF */ + b = simple_strtoul(p, (char **)&p, 0); p++; + d = simple_strtoul(p, (char **)&p, 0); p++; + f = simple_strtoul(p, (char **)&p, 0); p++; + entries[i].bdf = PCI_BDF(b, d, f); + + /* Parse action */ + if (!strncmp(p, "hp", 2)) { + /* Hot-plug entry */ + entries[i].action = EXTRA_IOMMU_ENTRY_HOTPLUG; + p += 2; + } else if (!strncmp(p, "vfs", 3) || + !strncmp(p, "noari_vfs", 9)) { + /* VFs or VFs with ARI disabled entry */ + entries[i].action = EXTRA_IOMMU_ENTRY_VFS; + entries[i].noari = !strncmp(p, "noari_vfs", 9); + + /* + * Parse and store total number of VFs to allocate + * IOMMU entries for. + */ + p = strchr(p, '='); + entries[i].num_vfs = simple_strtoul(p + 1, (char **)&p, + 0); + if (*p) + p++; + } else { + printf("ERROR: invalid action in extra iommu entry\n"); + free(entries); + + return NULL; + } + + if (!(*p) || !strncmp(p, "pci", 3)) + break; + + i++; + } + + return entries; +} + +static void get_vf_offset_and_stride(struct udevice *dev, int sriov_pos, + struct extra_iommu_entry *entry, + u16 *offset, u16 *stride) +{ + u16 tmp16; + u32 tmp32; + bool have_ari = false; + int pos; + struct udevice *pf_dev; + + dm_pci_read_config16(dev, sriov_pos + PCI_SRIOV_TOTAL_VFS, &tmp16); + if (entry->num_vfs > tmp16) { + printf("WARN: requested no. of VFs %d exceeds total of %d\n", + entry->num_vfs, tmp16); + } + + /* + * The code below implements the VF Discovery recomandations specified + * in PCIe base spec "9.2.1.2 VF Discovery", quoted below: + * + * VF Discovery + * + * The First VF Offset and VF Stride fields in the SR-IOV extended + * capability are 16-bit Routing ID offsets. These offsets are used to + * compute the Routing IDs for the VFs with the following restrictions: + * - The value in NumVFs in a PF (Section 9.3.3.7) may affect the + * values in First VF Offset (Section 9.3.3.9) and VF Stride + * (Section 9.3.3.10) of that PF. + * - The value in ARI Capable Hierarchy (Section 9.3.3.3.5) in the + * lowest-numbered PF of the Device (for example PF0) may affect + * the values in First VF Offset and VF Stride in all PFs of the + * Device. + * - NumVFs of a PF may only be changed when VF Enable + * (Section 9.3.3.3.1) of that PF is Clear. + * - ARI Capable Hierarchy (Section 9.3.3.3.5) may only be changed + * when VF Enable is Clear in all PFs of a Device. + */ + + /* Clear VF enable for all PFs */ + device_foreach_child(pf_dev, dev->parent) { + dm_pci_read_config16(pf_dev, sriov_pos + PCI_SRIOV_CTRL, + &tmp16); + tmp16 &= ~PCI_SRIOV_CTRL_VF_EN; + dm_pci_write_config16(pf_dev, sriov_pos + PCI_SRIOV_CTRL, + tmp16); + } + + /* Obtain a reference to PF0 device */ + if (dm_pci_bus_find_bdf(PCI_BDF(PCI_BUS(entry->bdf), + PCI_DEV(entry->bdf), 0), &pf_dev)) { + printf("WARN: failed to get PF0\n"); + } + + if (entry->noari) + goto skip_ari; + + /* Check that connected downstream port supports ARI Forwarding */ + pos = dm_pci_find_capability(dev->parent, PCI_CAP_ID_EXP); + dm_pci_read_config32(dev->parent, pos + PCI_EXP_DEVCAP2, &tmp32); + if (!(tmp32 & PCI_EXP_DEVCAP2_ARI)) + goto skip_ari; + + /* Check that PF supports Alternate Routing ID */ + if (!dm_pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ARI)) + goto skip_ari; + + /* Set ARI Capable Hierarcy for PF0 */ + dm_pci_read_config16(pf_dev, sriov_pos + PCI_SRIOV_CTRL, &tmp16); + tmp16 |= PCI_SRIOV_CTRL_ARI; + dm_pci_write_config16(pf_dev, sriov_pos + PCI_SRIOV_CTRL, tmp16); + have_ari = true; + +skip_ari: + if (!have_ari) { + /* + * No ARI support or disabled so clear ARI Capable Hierarcy + * for PF0 + */ + dm_pci_read_config16(pf_dev, sriov_pos + PCI_SRIOV_CTRL, + &tmp16); + tmp16 &= ~PCI_SRIOV_CTRL_ARI; + dm_pci_write_config16(pf_dev, sriov_pos + PCI_SRIOV_CTRL, + tmp16); + } + + /* Set requested number of VFs */ + dm_pci_write_config16(dev, sriov_pos + PCI_SRIOV_NUM_VFS, + entry->num_vfs); + + /* Read VF stride and offset with the configs just made */ + dm_pci_read_config16(dev, sriov_pos + PCI_SRIOV_VF_OFFSET, offset); + dm_pci_read_config16(dev, sriov_pos + PCI_SRIOV_VF_STRIDE, stride); + + if (have_ari) { + /* Reset to default ARI Capable Hierarcy bit for PF0 */ + dm_pci_read_config16(pf_dev, sriov_pos + PCI_SRIOV_CTRL, + &tmp16); + tmp16 &= ~PCI_SRIOV_CTRL_ARI; + dm_pci_write_config16(pf_dev, sriov_pos + PCI_SRIOV_CTRL, + tmp16); + } + /* Reset to default the number of VFs */ + dm_pci_write_config16(dev, sriov_pos + PCI_SRIOV_NUM_VFS, 0); +} + +static int fdt_fixup_pci_vfs(void *blob, struct extra_iommu_entry *entry, + struct ls_pcie_rc *pcie_rc) +{ + struct udevice *dev, *bus; + u16 vf_offset, vf_stride; + int i, sriov_pos; + pci_dev_t bdf; + + if (dm_pci_bus_find_bdf(entry->bdf, &dev)) { + printf("ERROR: BDF %d.%d.%d not found\n", PCI_BUS(entry->bdf), + PCI_DEV(entry->bdf), PCI_FUNC(entry->bdf)); + return 0; + } + + sriov_pos = dm_pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV); + if (!sriov_pos) { + printf("WARN: trying to set VFs on non-SRIOV dev\n"); + return 0; + } + + get_vf_offset_and_stride(dev, sriov_pos, entry, &vf_offset, &vf_stride); + + for (bus = dev; device_is_on_pci_bus(bus);) + bus = bus->parent; + + bdf = entry->bdf - PCI_BDF(bus->seq, 0, 0) + (vf_offset << 8); + + for (i = 0; i < entry->num_vfs; i++) { + if (fdt_fixup_pcie_device_ls(blob, bdf, pcie_rc) < 0) + return -1; + bdf += vf_stride << 8; + } + + printf("Added %d iommu VF mappings for PF %d.%d.%d\n", + entry->num_vfs, PCI_BUS(entry->bdf), + PCI_DEV(entry->bdf), PCI_FUNC(entry->bdf)); + + return 0; +} + static void fdt_fixup_pcie_ls(void *blob) { struct udevice *dev, *bus; struct ls_pcie_rc *pcie_rc; pci_dev_t bdf; + struct extra_iommu_entry *entries; + int i, cnt, nodeoffset; + /* Scan all known buses */ for (pci_find_first_device(&dev); @@ -234,6 +517,51 @@ static void fdt_fixup_pcie_ls(void *blob) if (fdt_fixup_pcie_device_ls(blob, bdf, pcie_rc) < 0) break; } + + if (!IS_ENABLED(CONFIG_PCI_IOMMU_EXTRA_MAPPINGS)) + goto skip; + + list_for_each_entry(pcie_rc, &ls_pcie_list, list) { + nodeoffset = fdt_pcie_get_nodeoffset(blob, pcie_rc); + if (nodeoffset < 0) { + printf("ERROR: couldn't find pci node\n"); + continue; + } + + entries = get_extra_iommu_ents(blob, nodeoffset, + pcie_rc->dbi_res.start, &cnt); + if (!entries) + continue; + + for (i = 0; i < cnt; i++) { + if (entries[i].action == EXTRA_IOMMU_ENTRY_HOTPLUG) { + bdf = entries[i].bdf; + printf("Added iommu map for hotplug %d.%d.%d\n", + PCI_BUS(bdf), PCI_DEV(bdf), + PCI_FUNC(bdf)); + if (fdt_fixup_pcie_device_ls(blob, bdf, + pcie_rc) < 0) { + free(entries); + return; + } + } else if (entries[i].action == EXTRA_IOMMU_ENTRY_VFS) { + if (fdt_fixup_pci_vfs(blob, &entries[i], + pcie_rc) < 0) { + free(entries); + return; + } + } else { + printf("Invalid action %d for BDF %d.%d.%d\n", + entries[i].action, + PCI_BUS(entries[i].bdf), + PCI_DEV(entries[i].bdf), + PCI_FUNC(entries[i].bdf)); + } + } + free(entries); + } + +skip: pcie_board_fix_fdt(blob); } #endif -- 2.17.1