On 7/18/2017 11:29 AM, Santosh Shukla wrote:
Get iommu class of PCI device on the bus and returns preferred iova
mapping mode for that bus.
Algorithm for iova scheme selection for PCI bus:
0. If no device bound then return with RTE_IOVA_DC mapping mode,
else goto 1).
1. Look for device attached to vfio kdrv and has .drv_flag set
to RTE_PCI_DRV_IOVA_AS_VA.
2. Look for any device attached to UIO class of driver.
3. Check for vfio-noiommu mode enabled.
If 2) & 3) is false and 1) is true then select
mapping scheme as RTE_IOVA_VA. Otherwise use default
mapping scheme (RTE_IOVA_PA).
Signed-off-by: Santosh Shukla <santosh.shu...@caviumnetworks.com>
Signed-off-by: Jerin Jacob <jerin.ja...@caviumnetworks.com>
---
v3 --> v4 :
- Reworded WARNING message (suggested by Maxime)
- Added pci_device_is_bound func to check for no device case
(suggested by Hemant).
- Added ifdef vfio_present.
v1 --> v2:
- Removed Linux version check in vfio_noiommu func. Refer [1].
- Extending autodetction logic for _iommu_class.
Refer [2].
[1] https://www.mail-archive.com/dev@dpdk.org/msg70108.html
[2] https://www.mail-archive.com/dev@dpdk.org/msg70279.html
lib/librte_eal/linuxapp/eal/eal_pci.c | 95 +++++++++++++++++++++++++
lib/librte_eal/linuxapp/eal/eal_vfio.c | 19 +++++
lib/librte_eal/linuxapp/eal/eal_vfio.h | 4 ++
lib/librte_eal/linuxapp/eal/rte_eal_version.map | 1 +
4 files changed, 119 insertions(+)
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 7d9e1a99b..ecd946250 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -45,6 +45,7 @@
#include "eal_filesystem.h"
#include "eal_private.h"
#include "eal_pci_init.h"
+#include "eal_vfio.h"
/**
* @file
@@ -488,6 +489,100 @@ rte_pci_scan(void)
return -1;
}
+/*
+ * Is pci device bound to any kdrv
+ */
+static inline int
+pci_device_is_bound(void)
+{
+ struct rte_pci_device *dev = NULL;
+ int ret = 0;
+
+ FOREACH_DEVICE_ON_PCIBUS(dev) {
+ if (dev->kdrv == RTE_KDRV_UNKNOWN ||
+ dev->kdrv == RTE_KDRV_NONE) {
+ continue;
+ } else {
+ ret = 1;
+ break;
+ }
+ }
+ return ret;
+}
+
+/*
+ * Any one of the device bound to uio
+ */
+static inline int
+pci_device_bound_uio(void)
+{
+ struct rte_pci_device *dev = NULL;
+
+ FOREACH_DEVICE_ON_PCIBUS(dev) {
+ if (dev->kdrv == RTE_KDRV_IGB_UIO ||
+ dev->kdrv == RTE_KDRV_UIO_GENERIC) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Any one of the device has iova as va
+ */
+static inline int
+pci_device_has_iova_va(void)
+{
+ struct rte_pci_device *dev = NULL;
+ struct rte_pci_driver *drv = NULL;
+
+ FOREACH_DRIVER_ON_PCIBUS(drv) {
+ if (drv && drv->drv_flags & RTE_PCI_DRV_IOVA_AS_VA) {
+ FOREACH_DEVICE_ON_PCIBUS(dev) {
+ if (dev->kdrv == RTE_KDRV_VFIO &&
+ rte_pci_match(drv, dev))
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+/*
+ * Get iommu class of PCI devices on the bus.
+ */
+enum rte_iova_mode
+rte_pci_get_iommu_class(void)
+{
+ bool is_bound;
+ bool is_vfio_noiommu_enabled = true;
+ bool has_iova_va;
+ bool is_bound_uio;
+
+ is_bound = pci_device_is_bound();
+ if (!is_bound)
+ return RTE_IOVA_DC;
+
+ has_iova_va = pci_device_has_iova_va();
+ is_bound_uio = pci_device_bound_uio();
+#ifdef VFIO_PRESENT
+ is_vfio_noiommu_enabled = vfio_noiommu_is_enabled() == 1 ? 1 : 0;
+#endif
+
+ if (has_iova_va && !is_bound_uio && !is_vfio_noiommu_enabled)
+ return RTE_IOVA_VA;
+
+ if (has_iova_va) {
+ RTE_LOG(WARNING, EAL, "Some devices want iova as va but pa will be
used because.. ");
+ if (is_vfio_noiommu_enabled)
+ RTE_LOG(WARNING, EAL, "vfio-noiommu mode configured\n");
+ if (is_bound_uio)
+ RTE_LOG(WARNING, EAL, "few device bound to UIO\n");
+ }
+
+ return RTE_IOVA_PA;
+}
+
/* Read PCI config space. */
int rte_pci_read_config(const struct rte_pci_device *device,
void *buf, size_t len, off_t offset)
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c
b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 946df7e31..c8a97b7e7 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -816,4 +816,23 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
return 0;
}
+int
+vfio_noiommu_is_enabled(void)
+{
+ int fd, ret, cnt __rte_unused;
+ char c;
+
+ ret = -1;
+ fd = open(VFIO_NOIOMMU_MODE, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ cnt = read(fd, &c, 1);
+ if (c == 'Y')
+ ret = 1;
+
+ close(fd);
+ return ret;
+}
+
#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h
b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 5ff63e5d7..26ea8e119 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -150,6 +150,8 @@ struct vfio_config {
#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
#define VFIO_GET_REGION_IDX(x) (x >> 40)
+#define VFIO_NOIOMMU_MODE \
+ "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"
/* DMA mapping function prototype.
* Takes VFIO container fd as a parameter.
@@ -210,6 +212,8 @@ int pci_vfio_is_enabled(void);
int vfio_mp_sync_setup(void);
+int vfio_noiommu_is_enabled(void);
+
#define SOCKET_REQ_CONTAINER 0x100
#define SOCKET_REQ_GROUP 0x200
#define SOCKET_CLR_GROUP 0x300
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index a69bbb599..5dd40f948 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -206,6 +206,7 @@ DPDK_17.08 {
rte_bus_find_by_device;
rte_bus_find_by_name;
rte_pci_match;
+ rte_pci_get_iommu_class;
} DPDK_17.05;
Acked-by: Hemant Agrawal <hemant.agra...@nxp.com>