This commit is adding a generic mechanism to support multiple IOMMU
types. For now, it's only type 1 (x86 IOMMU) and no-IOMMU (a special
VFIO mode that doesn't use IOMMU at all), but it's easily extended
by adding necessary definitions to eal_vfio.h, and DMA mapping
functions to eal_pci_vfio.c.
Since type 1 IOMMU module is no longer necessary to have VFIO,
we fix the module check to check for vfio-pci instead. It's not
ideal and triggers VFIO checks more often (and thus produces more
error output, which was the reason behind the module check in the
first place), so we compensate for that by providing more verbose
logging, indicating whether VFIO initialization has succeeded or
failed.
Signed-off-by: Anatoly Burakov
Signed-off-by: Santosh Shukla
Tested-by: Santosh Shukla
---
v6 changes:
Fixed functions not declared as static
Fixed definitions to be more consistent with others
v5 changes:
Renamed functions
v4 changes:
Fixed the commit message and added a missing sign-off
v3 changes:
Merging DMA mapping functions back into eal_pci_vfio.c
Fixing and adding comments
v2 changes:
Compile fix (hat-tip to Santosh Shukla)
Tested-by is provisional, since only superficial testing was done
lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 205 +
lib/librte_eal/linuxapp/eal/eal_vfio.h | 8 ++
2 files changed, 160 insertions(+), 53 deletions(-)
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index 74f91ba..a6c7e16 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -72,11 +72,74 @@ EAL_REGISTER_TAILQ(rte_vfio_tailq)
#define VFIO_DIR "/dev/vfio"
#define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
#define VFIO_GROUP_FMT "/dev/vfio/%u"
+#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
/* per-process VFIO config */
static struct vfio_config vfio_cfg;
+/* DMA mapping function prototype.
+ * Takes VFIO container fd as a parameter.
+ * Returns 0 on success, -1 on error.
+ * */
+typedef int (*vfio_dma_func_t)(int);
+
+struct vfio_iommu_type {
+ int type_id;
+ const char *name;
+ vfio_dma_func_t dma_map_func;
+};
+
+static int vfio_type1_dma_map(int);
+static int vfio_noiommu_dma_map(int);
+
+/* IOMMU types we support */
+static const struct vfio_iommu_type iommu_types[] = {
+ /* x86 IOMMU, otherwise known as type 1 */
+ { RTE_VFIO_TYPE1, "Type 1", _type1_dma_map},
+ /* IOMMU-less mode */
+ { RTE_VFIO_NOIOMMU, "No-IOMMU", _noiommu_dma_map},
+};
+
+int
+vfio_type1_dma_map(int vfio_container_fd)
+{
+ const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+ int i, ret;
+
+ /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
+ for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+ struct vfio_iommu_type1_dma_map dma_map;
+
+ if (ms[i].addr == NULL)
+ break;
+
+ memset(_map, 0, sizeof(dma_map));
+ dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+ dma_map.vaddr = ms[i].addr_64;
+ dma_map.size = ms[i].len;
+ dma_map.iova = ms[i].phys_addr;
+ dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
VFIO_DMA_MAP_FLAG_WRITE;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, _map);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot set up DMA remapping, "
+ "error %i (%s)\n", errno,
strerror(errno));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int
+vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
+{
+ /* No-IOMMU mode does not need DMA mapping */
+ return 0;
+}
+
int
pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
void *buf, size_t len, off_t offs)
@@ -208,42 +271,58 @@ pci_vfio_set_bus_master(int dev_fd)
return 0;
}
-/* set up DMA mappings */
-static int
-pci_vfio_setup_dma_maps(int vfio_container_fd)
-{
- const struct rte_memseg *ms = rte_eal_get_physmem_layout();
- int i, ret;
-
- ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU,
- VFIO_TYPE1_IOMMU);
- if (ret) {
- RTE_LOG(ERR, EAL, " cannot set IOMMU type, "
- "error %i (%s)\n", errno, strerror(errno));
- return -1;
+/* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for error */
+static const struct vfio_iommu_type *
+pci_vfio_set_iommu_type(int vfio_container_fd) {
+ unsigned idx;
+ for (idx = 0; idx < RTE_DIM(iommu_types); idx++) {
+ const struct vfio_iommu_type *t = _types[idx];
+
+ int ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU,
+ t->type_id);
+ if (!ret) {
+