> -----Original Message----- > From: dev [mailto:dev-boun...@dpdk.org] On Behalf Of Maxime Coquelin > Sent: Tuesday, January 9, 2018 9:18 PM > To: dev@dpdk.org; sta...@dpdk.org; Tan, Jianfeng <jianfeng....@intel.com>; > santosh.shu...@caviumnetworks.com; Burakov, Anatoly > <anatoly.bura...@intel.com>; tho...@monjalon.net; > step...@networkplumber.org > Cc: pet...@redhat.com; Maxime Coquelin <maxime.coque...@redhat.com> > Subject: [dpdk-dev] [PATCH v2] bus/pci: forbid VA as IOVA mode if IOMMU > address width too small > > Intel VT-d supports different address widths for the IOVAs, from > 39 bits to 56 bits. > > While recent processors support at least 48 bits, VT-d emulation currently > only supports 39 bits. It makes DMA mapping to fail in this case when using > VA as IOVA mode, as user-space virtual addresses uses up to 47 bits (see > kernel's Documentation/x86/x86_64/mm.txt). > > This patch parses VT-d CAP register value available in sysfs, and forbid VA as > IOVA mode if the GAW is 39 bits or unknown. > > Fixes: f37dfab21c98 ("drivers/net: enable IOVA mode for Intel PMDs") > > Cc: sta...@dpdk.org > Signed-off-by: Maxime Coquelin <maxime.coque...@redhat.com> > --- > > Changes in v2: > ============== > - Rework pci_one_device_iommu_support_va #ifdefery (Stephen) > - Don't inline introduced functions (Stephen) > > drivers/bus/pci/linux/pci.c | 108 > ++++++++++++++++++++++++++++++++++++++++---- > 1 file changed, 99 insertions(+), 9 deletions(-) > > diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c index > 25f907e04..0a43c4b89 100644 > --- a/drivers/bus/pci/linux/pci.c > +++ b/drivers/bus/pci/linux/pci.c > @@ -547,6 +547,100 @@ pci_one_device_has_iova_va(void) > return 0; > } > > +#if defined(RTE_ARCH_X86) > +static bool > +pci_one_device_iommu_support_va(struct rte_pci_device *dev) { > +#define VTD_CAP_SAGAW_SHIFT 8 > +#define VTD_CAP_SAGAW_MASK (0x1fULL << > VTD_CAP_SAGAW_SHIFT) > +#define X86_VA_WIDTH 47 /* From Documentation/x86/x86_64/mm.txt > */ > + struct rte_pci_addr *addr = &dev->addr; > + char filename[PATH_MAX]; > + FILE *fp; > + uint64_t sagaw, vtd_cap_reg = 0; > + int guest_addr_width = 0; > + > + snprintf(filename, sizeof(filename), > + "%s/" PCI_PRI_FMT "/iommu/intel-iommu/cap", > + rte_pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid, > + addr->function); > + if (access(filename, F_OK) == -1) { > + /* We don't have an Intel IOMMU, assume VA supported*/ > + return true; > + } > + > + /* We have an intel IOMMU */ > + fp = fopen(filename, "r"); > + if (fp == NULL) { > + RTE_LOG(ERR, EAL, "%s(): can't open %s\n", __func__, filename); > + return false; > + } > + > + if (fscanf(fp, "%lx", &vtd_cap_reg) != 1) { > + RTE_LOG(ERR, EAL, "%s(): can't read %s\n", __func__, filename); > + fclose(fp); > + return false; > + } > + > + fclose(fp); > + > + sagaw = (vtd_cap_reg & VTD_CAP_SAGAW_MASK) >> > VTD_CAP_SAGAW_SHIFT;
Base on previous test, sagaw is not the MAX VA address Below should be the correct cap decode from kernel driver include/linux/intel-iommu.h #define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1) Regards Qi > + > + switch (sagaw) { > + case 2: > + guest_addr_width = 39; > + break; > + case 4: > + guest_addr_width = 48; > + break; > + case 6: > + guest_addr_width = 56; > + break; > + default: > + RTE_LOG(ERR, EAL, "Unkwown Intel IOMMU SAGAW value (%lx)\n", > + sagaw); > + break; > + } > + > + if (guest_addr_width < X86_VA_WIDTH) > + return false; > + > + return true; > +} > +#elif defined(RTE_ARCH_PPC_64) > +static bool > +pci_one_device_iommu_support_va(struct rte_pci_device *dev) { > + return false; > +} > +#else > +static bool > +pci_one_device_iommu_support_va(struct rte_pci_device *dev) { > + return true; > +} > +#endif > + > +/* > + * All devices IOMMUs support VA as IOVA */ static bool > +pci_devices_iommu_support_va(void) > +{ > + struct rte_pci_device *dev = NULL; > + struct rte_pci_driver *drv = NULL; > + > + FOREACH_DRIVER_ON_PCIBUS(drv) { > + FOREACH_DEVICE_ON_PCIBUS(dev) { > + if (!rte_pci_match(drv, dev)) > + continue; > + if (!pci_one_device_iommu_support_va(dev)) > + return false; > + } > + } > + return true; > +} > + > /* > * Get iommu class of PCI devices on the bus. > */ > @@ -557,12 +651,7 @@ rte_pci_get_iommu_class(void) > bool is_vfio_noiommu_enabled = true; > bool has_iova_va; > bool is_bound_uio; > - bool spapr_iommu = > -#if defined(RTE_ARCH_PPC_64) > - true; > -#else > - false; > -#endif > + bool iommu_no_va; > > is_bound = pci_one_device_is_bound(); > if (!is_bound) > @@ -570,13 +659,14 @@ rte_pci_get_iommu_class(void) > > has_iova_va = pci_one_device_has_iova_va(); > is_bound_uio = pci_one_device_bound_uio(); > + iommu_no_va = !pci_devices_iommu_support_va(); > #ifdef VFIO_PRESENT > is_vfio_noiommu_enabled = rte_vfio_noiommu_is_enabled() == true ? > true : false; > #endif > > if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled && > - !spapr_iommu) > + !iommu_no_va) > return RTE_IOVA_VA; > > if (has_iova_va) { > @@ -585,8 +675,8 @@ rte_pci_get_iommu_class(void) > RTE_LOG(WARNING, EAL, "vfio-noiommu mode > configured\n"); > if (is_bound_uio) > RTE_LOG(WARNING, EAL, "few device bound to UIO\n"); > - if (spapr_iommu) > - RTE_LOG(WARNING, EAL, "sPAPR IOMMU does not support > IOVA as VA\n"); > + if (iommu_no_va) > + RTE_LOG(WARNING, EAL, "IOMMU does not support IOVA as > VA\n"); > } > > return RTE_IOVA_PA; > -- > 2.14.3