Adds VFIO functionality and the DMA it requires.
The VFIO context is initialized during ntnic ethdev startup.

Signed-off-by: Serhii Iliushyk <sil-...@napatech.com>
---
v10
* Use 8 spaces as indentation in meson
---
 drivers/net/ntnic/meson.build      |   1 +
 drivers/net/ntnic/ntnic_ethdev.c   |  22 +++
 drivers/net/ntnic/ntnic_vfio.c     | 235 +++++++++++++++++++++++++++++
 drivers/net/ntnic/ntnic_vfio.h     |  29 ++++
 drivers/net/ntnic/ntutil/nt_util.c |  65 ++++++++
 5 files changed, 352 insertions(+)
 create mode 100644 drivers/net/ntnic/ntnic_vfio.c
 create mode 100644 drivers/net/ntnic/ntnic_vfio.h

diff --git a/drivers/net/ntnic/meson.build b/drivers/net/ntnic/meson.build
index 6f645320b9..deeb0aca09 100644
--- a/drivers/net/ntnic/meson.build
+++ b/drivers/net/ntnic/meson.build
@@ -18,5 +18,6 @@ includes = [
 sources = files(
         'ntlog/ntlog.c',
         'ntutil/nt_util.c',
+        'ntnic_vfio.c',
         'ntnic_ethdev.c',
 )
diff --git a/drivers/net/ntnic/ntnic_ethdev.c b/drivers/net/ntnic/ntnic_ethdev.c
index 02b55e2780..b838eb4d7a 100644
--- a/drivers/net/ntnic/ntnic_ethdev.c
+++ b/drivers/net/ntnic/ntnic_ethdev.c
@@ -8,10 +8,17 @@
 #include <rte_bus_pci.h>
 #include <ethdev_pci.h>
 
+#include <rte_eal.h>
+#include <rte_dev.h>
+#include <rte_vfio.h>
+
 #include "ntlog.h"
 
+#include "ntnic_vfio.h"
 #include "nt_util.h"
 
+#define EXCEPTION_PATH_HID 0
+
 static const struct rte_pci_id nthw_pci_id_map[] = {
        {
                .vendor_id = 0,
@@ -21,12 +28,24 @@ static const struct rte_pci_id nthw_pci_id_map[] = {
 static int
 nthw_pci_dev_init(struct rte_pci_device *pci_dev)
 {
+       nt_vfio_init();
+
        uint32_t n_port_mask = -1;      /* All ports enabled by default */
        int n_phy_ports;
        NT_LOG_DBGX(DEBUG, NTNIC, "Dev %s PF #%i Init : %02x:%02x:%i\n", 
pci_dev->name,
                pci_dev->addr.function, pci_dev->addr.bus, pci_dev->addr.devid,
                pci_dev->addr.function);
 
+
+       /* Setup VFIO context */
+       int vfio = nt_vfio_setup(pci_dev);
+
+       if (vfio < 0) {
+               NT_LOG_DBGX(ERR, TNIC, "%s: vfio_setup error %d\n",
+                       (pci_dev->name[0] ? pci_dev->name : "NA"), -1);
+               return -1;
+       }
+
        n_phy_ports = 0;
 
        for (int n_intf_no = 0; n_intf_no < n_phy_ports; n_intf_no++) {
@@ -67,6 +86,8 @@ static int
 nthw_pci_dev_deinit(struct rte_eth_dev *eth_dev __rte_unused)
 {
        NT_LOG_DBGX(DEBUG, NTNIC, "PCI device deinitialization\n");
+
+       nt_vfio_remove(EXCEPTION_PATH_HID);
        return 0;
 }
 
@@ -131,3 +152,4 @@ static struct rte_pci_driver rte_nthw_pmd = {
 };
 
 RTE_PMD_REGISTER_PCI(net_ntnic, rte_nthw_pmd);
+RTE_PMD_REGISTER_KMOD_DEP(net_ntnic, "* vfio-pci");
diff --git a/drivers/net/ntnic/ntnic_vfio.c b/drivers/net/ntnic/ntnic_vfio.c
new file mode 100644
index 0000000000..f4433152b7
--- /dev/null
+++ b/drivers/net/ntnic/ntnic_vfio.c
@@ -0,0 +1,235 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Napatech A/S
+ */
+
+#include <sys/ioctl.h>
+
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_vfio.h>
+#include <rte_dev.h>
+#include <rte_bus_pci.h>
+#include <rte_spinlock.h>
+
+#include <ntlog.h>
+#include <nt_util.h>
+#include "ntnic_vfio.h"
+
+#define ONE_G_SIZE 0x40000000
+#define ONE_G_MASK (ONE_G_SIZE - 1)
+#define START_VF_IOVA 0x220000000000
+
+int
+nt_vfio_vf_num(const struct rte_pci_device *pdev)
+{
+       return ((pdev->addr.devid & 0x1f) << 3) + ((pdev->addr.function) & 0x7);
+}
+
+/* Internal API */
+struct vfio_dev {
+       int container_fd;
+       int group_fd;
+       int dev_fd;
+       uint64_t iova_addr;
+};
+
+static struct vfio_dev vfio_list[256];
+
+static struct vfio_dev *
+vfio_get(int vf_num)
+{
+       if (vf_num < 0 || vf_num > 255)
+               return NULL;
+
+       return &vfio_list[vf_num];
+}
+
+/* External API */
+int
+nt_vfio_setup(struct rte_pci_device *dev)
+{
+       char devname[RTE_DEV_NAME_MAX_LEN] = { 0 };
+       int iommu_group_num;
+       int vf_num;
+       struct vfio_dev *vfio;
+
+       NT_LOG(INF, NTNIC, "NT VFIO device setup %s\n", dev->name);
+
+       vf_num = nt_vfio_vf_num(dev);
+
+       vfio = vfio_get(vf_num);
+
+       if (vfio == NULL) {
+               NT_LOG(ERR, NTNIC, "VFIO device setup failed. Illegal device 
id\n");
+               return -1;
+       }
+
+       vfio->dev_fd = -1;
+       vfio->group_fd = -1;
+       vfio->container_fd = -1;
+       vfio->iova_addr = START_VF_IOVA;
+
+       rte_pci_device_name(&dev->addr, devname, RTE_DEV_NAME_MAX_LEN);
+       rte_vfio_get_group_num(rte_pci_get_sysfs_path(), devname, 
&iommu_group_num);
+
+       if (vf_num == 0) {
+               /* use default container for pf0 */
+               vfio->container_fd = RTE_VFIO_DEFAULT_CONTAINER_FD;
+
+       } else {
+               vfio->container_fd = rte_vfio_container_create();
+
+               if (vfio->container_fd < 0) {
+                       NT_LOG(ERR, NTNIC,
+                               "VFIO device setup failed. VFIO container 
creation failed.\n");
+                       return -1;
+               }
+       }
+
+       vfio->group_fd = rte_vfio_container_group_bind(vfio->container_fd, 
iommu_group_num);
+
+       if (vfio->group_fd < 0) {
+               NT_LOG(ERR, NTNIC,
+                       "VFIO device setup failed. VFIO container group bind 
failed.\n");
+               goto err;
+       }
+
+       if (vf_num > 0) {
+               if (rte_pci_map_device(dev)) {
+                       NT_LOG(ERR, NTNIC,
+                               "Map VFIO device failed. is the vfio-pci driver 
loaded?\n");
+                       goto err;
+               }
+       }
+
+       vfio->dev_fd = rte_intr_dev_fd_get(dev->intr_handle);
+
+       NT_LOG(DBG, NTNIC,
+               "%s: VFIO id=%d, dev_fd=%d, container_fd=%d, group_fd=%d, 
iommu_group_num=%d\n",
+               dev->name, vf_num, vfio->dev_fd, vfio->container_fd, 
vfio->group_fd,
+               iommu_group_num);
+
+       return vf_num;
+
+err:
+
+       if (vfio->container_fd != RTE_VFIO_DEFAULT_CONTAINER_FD)
+               rte_vfio_container_destroy(vfio->container_fd);
+
+       return -1;
+}
+
+int
+nt_vfio_remove(int vf_num)
+{
+       struct vfio_dev *vfio;
+
+       NT_LOG(DBG, NTNIC, "NT VFIO device remove VF=%d\n", vf_num);
+
+       vfio = vfio_get(vf_num);
+
+       if (!vfio) {
+               NT_LOG(ERR, NTNIC, "VFIO device remove failed. Illegal device 
id\n");
+               return -1;
+       }
+
+       rte_vfio_container_destroy(vfio->container_fd);
+       return 0;
+}
+
+int
+nt_vfio_dma_map(int vf_num, void *virt_addr, uint64_t *iova_addr, uint64_t 
size)
+{
+       uint64_t gp_virt_base;
+       uint64_t gp_offset;
+
+       if (size == ONE_G_SIZE) {
+               gp_virt_base = (uint64_t)virt_addr & ~ONE_G_MASK;
+               gp_offset = (uint64_t)virt_addr & ONE_G_MASK;
+
+       } else {
+               gp_virt_base = (uint64_t)virt_addr;
+               gp_offset = 0;
+       }
+
+       struct vfio_dev *vfio;
+
+       vfio = vfio_get(vf_num);
+
+       if (vfio == NULL) {
+               NT_LOG(ERR, NTNIC, "VFIO MAP: VF number %d invalid\n", vf_num);
+               return -1;
+       }
+
+       NT_LOG(DBG, NTNIC,
+               "VFIO MMAP VF=%d VirtAddr=%p HPA=%" PRIX64 " VirtBase=%" PRIX64
+               " IOVA Addr=%" PRIX64 " size=%" PRIX64 "\n",
+               vf_num, virt_addr, rte_malloc_virt2iova(virt_addr), 
gp_virt_base, vfio->iova_addr,
+               size);
+
+       int res = rte_vfio_container_dma_map(vfio->container_fd, gp_virt_base, 
vfio->iova_addr,
+                       size);
+
+       NT_LOG(DBG, NTNIC, "VFIO MMAP res %i, container_fd %i, vf_num %i\n", 
res,
+               vfio->container_fd, vf_num);
+
+       if (res) {
+               NT_LOG(ERR, NTNIC, "rte_vfio_container_dma_map failed: res 
%d\n", res);
+               return -1;
+       }
+
+       *iova_addr = vfio->iova_addr + gp_offset;
+
+       vfio->iova_addr += ONE_G_SIZE;
+
+       return 0;
+}
+
+int
+nt_vfio_dma_unmap(int vf_num, void *virt_addr, uint64_t iova_addr, uint64_t 
size)
+{
+       uint64_t gp_virt_base;
+       struct vfio_dev *vfio;
+
+       if (size == ONE_G_SIZE) {
+               uint64_t gp_offset;
+               gp_virt_base = (uint64_t)virt_addr & ~ONE_G_MASK;
+               gp_offset = (uint64_t)virt_addr & ONE_G_MASK;
+               iova_addr -= gp_offset;
+
+       } else {
+               gp_virt_base = (uint64_t)virt_addr;
+       }
+
+       vfio = vfio_get(vf_num);
+
+       if (vfio == NULL) {
+               NT_LOG(ERR, NTNIC, "VFIO UNMAP: VF number %d invalid\n", 
vf_num);
+               return -1;
+       }
+
+       if (vfio->container_fd == -1)
+               return 0;
+
+       int res = rte_vfio_container_dma_unmap(vfio->container_fd, 
gp_virt_base, iova_addr, size);
+
+       if (res != 0) {
+               NT_LOG(ERR, NTNIC,
+                       "VFIO UNMMAP FAILED! res %i, container_fd %i, vf_num 
%i, virt_base=%" PRIX64
+                       ", IOVA=%" PRIX64 ", size=%" PRIX64 "\n",
+                       res, vfio->container_fd, vf_num, gp_virt_base, 
iova_addr, size);
+               return -1;
+       }
+
+       return 0;
+}
+
+void
+nt_vfio_init(void)
+{
+       struct nt_util_vfio_impl s = { .vfio_dma_map = nt_vfio_dma_map,
+                      .vfio_dma_unmap = nt_vfio_dma_unmap
+       };
+       nt_util_vfio_init(&s);
+}
diff --git a/drivers/net/ntnic/ntnic_vfio.h b/drivers/net/ntnic/ntnic_vfio.h
new file mode 100644
index 0000000000..69fef7923d
--- /dev/null
+++ b/drivers/net/ntnic/ntnic_vfio.h
@@ -0,0 +1,29 @@
+/*
+ * SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2023 Napatech A/S
+ */
+
+#ifndef _NTNIC_VFIO_H_
+#define _NTNIC_VFIO_H_
+
+#include <rte_dev.h>
+#include <rte_bus_pci.h>
+#include <ethdev_pci.h>
+
+void
+nt_vfio_init(void);
+
+int
+nt_vfio_setup(struct rte_pci_device *dev);
+int
+nt_vfio_remove(int vf_num);
+
+int
+nt_vfio_dma_map(int vf_num, void *virt_addr, uint64_t *iova_addr, uint64_t 
size);
+int
+nt_vfio_dma_unmap(int vf_num, void *virt_addr, uint64_t iova_addr, uint64_t 
size);
+
+/* Find device (PF/VF) number from device address */
+int
+nt_vfio_vf_num(const struct rte_pci_device *dev);
+#endif /* _NTNIC_VFIO_H_ */
diff --git a/drivers/net/ntnic/ntutil/nt_util.c 
b/drivers/net/ntnic/ntutil/nt_util.c
index 5395bf6993..53c39ef112 100644
--- a/drivers/net/ntnic/ntutil/nt_util.c
+++ b/drivers/net/ntnic/ntutil/nt_util.c
@@ -15,6 +15,8 @@
 #include "ntlog.h"
 #include "nt_util.h"
 
+static struct nt_util_vfio_impl vfio_cb;
+
 /* uses usleep which schedules out the calling thread */
 void nt_os_wait_usec(int val)
 {
@@ -31,3 +33,66 @@ uint64_t nt_util_align_size(uint64_t size)
 {
        return 1 << rte_log2_u64(size);
 }
+
+void nt_util_vfio_init(struct nt_util_vfio_impl *impl)
+{
+       vfio_cb = *impl;
+}
+
+struct nt_dma_s *nt_dma_alloc(uint64_t size, uint64_t align, int numa)
+{
+       int res;
+       struct nt_dma_s *vfio_addr;
+
+       vfio_addr = rte_malloc(NULL, sizeof(struct nt_dma_s), 0);
+
+       if (!vfio_addr) {
+               NT_LOG(ERR, GENERAL, "VFIO rte_malloc failed\n");
+               return NULL;
+       }
+
+       void *addr = rte_malloc_socket(NULL, size, align, numa);
+
+       if (!addr) {
+               rte_free(vfio_addr);
+               NT_LOG(ERR, GENERAL, "VFIO rte_malloc_socket failed\n");
+               return NULL;
+       }
+
+       res = vfio_cb.vfio_dma_map(0, addr, &vfio_addr->iova, 
nt_util_align_size(size));
+
+       if (res != 0) {
+               rte_free(addr);
+               rte_free(vfio_addr);
+               NT_LOG(ERR, GENERAL, "VFIO nt_dma_map failed\n");
+               return NULL;
+       }
+
+       vfio_addr->addr = (uint64_t)addr;
+       vfio_addr->size = nt_util_align_size(size);
+
+       NT_LOG(DBG, GENERAL,
+               "VFIO DMA alloc addr=%" PRIX64 ", iova=%" PRIX64
+               ", size=%" PRIX64 "align=0x%" PRIX64 "\n",
+               vfio_addr->addr, vfio_addr->iova, vfio_addr->size, align);
+
+       return vfio_addr;
+}
+
+void nt_dma_free(struct nt_dma_s *vfio_addr)
+{
+       NT_LOG(DBG, GENERAL, "VFIO DMA free addr=%" PRIX64 ", iova=%" PRIX64 ", 
size=%" PRIX64 "\n",
+               vfio_addr->addr, vfio_addr->iova, vfio_addr->size);
+
+       int res = vfio_cb.vfio_dma_unmap(0, (void *)vfio_addr->addr, 
vfio_addr->iova,
+                       vfio_addr->size);
+
+       if (res != 0) {
+               NT_LOG(WRN, GENERAL,
+                       "VFIO DMA free FAILED addr=%" PRIX64 ", iova=%" PRIX64 
", size=%" PRIX64 "\n",
+                       vfio_addr->addr, vfio_addr->iova, vfio_addr->size);
+       }
+
+       rte_free((void *)(vfio_addr->addr));
+       rte_free(vfio_addr);
+}
-- 
2.43.0

Reply via email to