Adds VFIO functionality and the DMA it requires. The VFIO context is initialized during ntnic ethdev startup.
Signed-off-by: Serhii Iliushyk <sil-...@napatech.com> --- v10 * Use 8 spaces as indentation in meson --- drivers/net/ntnic/meson.build | 1 + drivers/net/ntnic/ntnic_ethdev.c | 22 +++ drivers/net/ntnic/ntnic_vfio.c | 235 +++++++++++++++++++++++++++++ drivers/net/ntnic/ntnic_vfio.h | 29 ++++ drivers/net/ntnic/ntutil/nt_util.c | 65 ++++++++ 5 files changed, 352 insertions(+) create mode 100644 drivers/net/ntnic/ntnic_vfio.c create mode 100644 drivers/net/ntnic/ntnic_vfio.h diff --git a/drivers/net/ntnic/meson.build b/drivers/net/ntnic/meson.build index 6f645320b9..deeb0aca09 100644 --- a/drivers/net/ntnic/meson.build +++ b/drivers/net/ntnic/meson.build @@ -18,5 +18,6 @@ includes = [ sources = files( 'ntlog/ntlog.c', 'ntutil/nt_util.c', + 'ntnic_vfio.c', 'ntnic_ethdev.c', ) diff --git a/drivers/net/ntnic/ntnic_ethdev.c b/drivers/net/ntnic/ntnic_ethdev.c index 02b55e2780..b838eb4d7a 100644 --- a/drivers/net/ntnic/ntnic_ethdev.c +++ b/drivers/net/ntnic/ntnic_ethdev.c @@ -8,10 +8,17 @@ #include <rte_bus_pci.h> #include <ethdev_pci.h> +#include <rte_eal.h> +#include <rte_dev.h> +#include <rte_vfio.h> + #include "ntlog.h" +#include "ntnic_vfio.h" #include "nt_util.h" +#define EXCEPTION_PATH_HID 0 + static const struct rte_pci_id nthw_pci_id_map[] = { { .vendor_id = 0, @@ -21,12 +28,24 @@ static const struct rte_pci_id nthw_pci_id_map[] = { static int nthw_pci_dev_init(struct rte_pci_device *pci_dev) { + nt_vfio_init(); + uint32_t n_port_mask = -1; /* All ports enabled by default */ int n_phy_ports; NT_LOG_DBGX(DEBUG, NTNIC, "Dev %s PF #%i Init : %02x:%02x:%i\n", pci_dev->name, pci_dev->addr.function, pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function); + + /* Setup VFIO context */ + int vfio = nt_vfio_setup(pci_dev); + + if (vfio < 0) { + NT_LOG_DBGX(ERR, TNIC, "%s: vfio_setup error %d\n", + (pci_dev->name[0] ? pci_dev->name : "NA"), -1); + return -1; + } + n_phy_ports = 0; for (int n_intf_no = 0; n_intf_no < n_phy_ports; n_intf_no++) { @@ -67,6 +86,8 @@ static int nthw_pci_dev_deinit(struct rte_eth_dev *eth_dev __rte_unused) { NT_LOG_DBGX(DEBUG, NTNIC, "PCI device deinitialization\n"); + + nt_vfio_remove(EXCEPTION_PATH_HID); return 0; } @@ -131,3 +152,4 @@ static struct rte_pci_driver rte_nthw_pmd = { }; RTE_PMD_REGISTER_PCI(net_ntnic, rte_nthw_pmd); +RTE_PMD_REGISTER_KMOD_DEP(net_ntnic, "* vfio-pci"); diff --git a/drivers/net/ntnic/ntnic_vfio.c b/drivers/net/ntnic/ntnic_vfio.c new file mode 100644 index 0000000000..f4433152b7 --- /dev/null +++ b/drivers/net/ntnic/ntnic_vfio.c @@ -0,0 +1,235 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2023 Napatech A/S + */ + +#include <sys/ioctl.h> + +#include <rte_malloc.h> +#include <rte_memory.h> +#include <rte_vfio.h> +#include <rte_dev.h> +#include <rte_bus_pci.h> +#include <rte_spinlock.h> + +#include <ntlog.h> +#include <nt_util.h> +#include "ntnic_vfio.h" + +#define ONE_G_SIZE 0x40000000 +#define ONE_G_MASK (ONE_G_SIZE - 1) +#define START_VF_IOVA 0x220000000000 + +int +nt_vfio_vf_num(const struct rte_pci_device *pdev) +{ + return ((pdev->addr.devid & 0x1f) << 3) + ((pdev->addr.function) & 0x7); +} + +/* Internal API */ +struct vfio_dev { + int container_fd; + int group_fd; + int dev_fd; + uint64_t iova_addr; +}; + +static struct vfio_dev vfio_list[256]; + +static struct vfio_dev * +vfio_get(int vf_num) +{ + if (vf_num < 0 || vf_num > 255) + return NULL; + + return &vfio_list[vf_num]; +} + +/* External API */ +int +nt_vfio_setup(struct rte_pci_device *dev) +{ + char devname[RTE_DEV_NAME_MAX_LEN] = { 0 }; + int iommu_group_num; + int vf_num; + struct vfio_dev *vfio; + + NT_LOG(INF, NTNIC, "NT VFIO device setup %s\n", dev->name); + + vf_num = nt_vfio_vf_num(dev); + + vfio = vfio_get(vf_num); + + if (vfio == NULL) { + NT_LOG(ERR, NTNIC, "VFIO device setup failed. Illegal device id\n"); + return -1; + } + + vfio->dev_fd = -1; + vfio->group_fd = -1; + vfio->container_fd = -1; + vfio->iova_addr = START_VF_IOVA; + + rte_pci_device_name(&dev->addr, devname, RTE_DEV_NAME_MAX_LEN); + rte_vfio_get_group_num(rte_pci_get_sysfs_path(), devname, &iommu_group_num); + + if (vf_num == 0) { + /* use default container for pf0 */ + vfio->container_fd = RTE_VFIO_DEFAULT_CONTAINER_FD; + + } else { + vfio->container_fd = rte_vfio_container_create(); + + if (vfio->container_fd < 0) { + NT_LOG(ERR, NTNIC, + "VFIO device setup failed. VFIO container creation failed.\n"); + return -1; + } + } + + vfio->group_fd = rte_vfio_container_group_bind(vfio->container_fd, iommu_group_num); + + if (vfio->group_fd < 0) { + NT_LOG(ERR, NTNIC, + "VFIO device setup failed. VFIO container group bind failed.\n"); + goto err; + } + + if (vf_num > 0) { + if (rte_pci_map_device(dev)) { + NT_LOG(ERR, NTNIC, + "Map VFIO device failed. is the vfio-pci driver loaded?\n"); + goto err; + } + } + + vfio->dev_fd = rte_intr_dev_fd_get(dev->intr_handle); + + NT_LOG(DBG, NTNIC, + "%s: VFIO id=%d, dev_fd=%d, container_fd=%d, group_fd=%d, iommu_group_num=%d\n", + dev->name, vf_num, vfio->dev_fd, vfio->container_fd, vfio->group_fd, + iommu_group_num); + + return vf_num; + +err: + + if (vfio->container_fd != RTE_VFIO_DEFAULT_CONTAINER_FD) + rte_vfio_container_destroy(vfio->container_fd); + + return -1; +} + +int +nt_vfio_remove(int vf_num) +{ + struct vfio_dev *vfio; + + NT_LOG(DBG, NTNIC, "NT VFIO device remove VF=%d\n", vf_num); + + vfio = vfio_get(vf_num); + + if (!vfio) { + NT_LOG(ERR, NTNIC, "VFIO device remove failed. Illegal device id\n"); + return -1; + } + + rte_vfio_container_destroy(vfio->container_fd); + return 0; +} + +int +nt_vfio_dma_map(int vf_num, void *virt_addr, uint64_t *iova_addr, uint64_t size) +{ + uint64_t gp_virt_base; + uint64_t gp_offset; + + if (size == ONE_G_SIZE) { + gp_virt_base = (uint64_t)virt_addr & ~ONE_G_MASK; + gp_offset = (uint64_t)virt_addr & ONE_G_MASK; + + } else { + gp_virt_base = (uint64_t)virt_addr; + gp_offset = 0; + } + + struct vfio_dev *vfio; + + vfio = vfio_get(vf_num); + + if (vfio == NULL) { + NT_LOG(ERR, NTNIC, "VFIO MAP: VF number %d invalid\n", vf_num); + return -1; + } + + NT_LOG(DBG, NTNIC, + "VFIO MMAP VF=%d VirtAddr=%p HPA=%" PRIX64 " VirtBase=%" PRIX64 + " IOVA Addr=%" PRIX64 " size=%" PRIX64 "\n", + vf_num, virt_addr, rte_malloc_virt2iova(virt_addr), gp_virt_base, vfio->iova_addr, + size); + + int res = rte_vfio_container_dma_map(vfio->container_fd, gp_virt_base, vfio->iova_addr, + size); + + NT_LOG(DBG, NTNIC, "VFIO MMAP res %i, container_fd %i, vf_num %i\n", res, + vfio->container_fd, vf_num); + + if (res) { + NT_LOG(ERR, NTNIC, "rte_vfio_container_dma_map failed: res %d\n", res); + return -1; + } + + *iova_addr = vfio->iova_addr + gp_offset; + + vfio->iova_addr += ONE_G_SIZE; + + return 0; +} + +int +nt_vfio_dma_unmap(int vf_num, void *virt_addr, uint64_t iova_addr, uint64_t size) +{ + uint64_t gp_virt_base; + struct vfio_dev *vfio; + + if (size == ONE_G_SIZE) { + uint64_t gp_offset; + gp_virt_base = (uint64_t)virt_addr & ~ONE_G_MASK; + gp_offset = (uint64_t)virt_addr & ONE_G_MASK; + iova_addr -= gp_offset; + + } else { + gp_virt_base = (uint64_t)virt_addr; + } + + vfio = vfio_get(vf_num); + + if (vfio == NULL) { + NT_LOG(ERR, NTNIC, "VFIO UNMAP: VF number %d invalid\n", vf_num); + return -1; + } + + if (vfio->container_fd == -1) + return 0; + + int res = rte_vfio_container_dma_unmap(vfio->container_fd, gp_virt_base, iova_addr, size); + + if (res != 0) { + NT_LOG(ERR, NTNIC, + "VFIO UNMMAP FAILED! res %i, container_fd %i, vf_num %i, virt_base=%" PRIX64 + ", IOVA=%" PRIX64 ", size=%" PRIX64 "\n", + res, vfio->container_fd, vf_num, gp_virt_base, iova_addr, size); + return -1; + } + + return 0; +} + +void +nt_vfio_init(void) +{ + struct nt_util_vfio_impl s = { .vfio_dma_map = nt_vfio_dma_map, + .vfio_dma_unmap = nt_vfio_dma_unmap + }; + nt_util_vfio_init(&s); +} diff --git a/drivers/net/ntnic/ntnic_vfio.h b/drivers/net/ntnic/ntnic_vfio.h new file mode 100644 index 0000000000..69fef7923d --- /dev/null +++ b/drivers/net/ntnic/ntnic_vfio.h @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2023 Napatech A/S + */ + +#ifndef _NTNIC_VFIO_H_ +#define _NTNIC_VFIO_H_ + +#include <rte_dev.h> +#include <rte_bus_pci.h> +#include <ethdev_pci.h> + +void +nt_vfio_init(void); + +int +nt_vfio_setup(struct rte_pci_device *dev); +int +nt_vfio_remove(int vf_num); + +int +nt_vfio_dma_map(int vf_num, void *virt_addr, uint64_t *iova_addr, uint64_t size); +int +nt_vfio_dma_unmap(int vf_num, void *virt_addr, uint64_t iova_addr, uint64_t size); + +/* Find device (PF/VF) number from device address */ +int +nt_vfio_vf_num(const struct rte_pci_device *dev); +#endif /* _NTNIC_VFIO_H_ */ diff --git a/drivers/net/ntnic/ntutil/nt_util.c b/drivers/net/ntnic/ntutil/nt_util.c index 5395bf6993..53c39ef112 100644 --- a/drivers/net/ntnic/ntutil/nt_util.c +++ b/drivers/net/ntnic/ntutil/nt_util.c @@ -15,6 +15,8 @@ #include "ntlog.h" #include "nt_util.h" +static struct nt_util_vfio_impl vfio_cb; + /* uses usleep which schedules out the calling thread */ void nt_os_wait_usec(int val) { @@ -31,3 +33,66 @@ uint64_t nt_util_align_size(uint64_t size) { return 1 << rte_log2_u64(size); } + +void nt_util_vfio_init(struct nt_util_vfio_impl *impl) +{ + vfio_cb = *impl; +} + +struct nt_dma_s *nt_dma_alloc(uint64_t size, uint64_t align, int numa) +{ + int res; + struct nt_dma_s *vfio_addr; + + vfio_addr = rte_malloc(NULL, sizeof(struct nt_dma_s), 0); + + if (!vfio_addr) { + NT_LOG(ERR, GENERAL, "VFIO rte_malloc failed\n"); + return NULL; + } + + void *addr = rte_malloc_socket(NULL, size, align, numa); + + if (!addr) { + rte_free(vfio_addr); + NT_LOG(ERR, GENERAL, "VFIO rte_malloc_socket failed\n"); + return NULL; + } + + res = vfio_cb.vfio_dma_map(0, addr, &vfio_addr->iova, nt_util_align_size(size)); + + if (res != 0) { + rte_free(addr); + rte_free(vfio_addr); + NT_LOG(ERR, GENERAL, "VFIO nt_dma_map failed\n"); + return NULL; + } + + vfio_addr->addr = (uint64_t)addr; + vfio_addr->size = nt_util_align_size(size); + + NT_LOG(DBG, GENERAL, + "VFIO DMA alloc addr=%" PRIX64 ", iova=%" PRIX64 + ", size=%" PRIX64 "align=0x%" PRIX64 "\n", + vfio_addr->addr, vfio_addr->iova, vfio_addr->size, align); + + return vfio_addr; +} + +void nt_dma_free(struct nt_dma_s *vfio_addr) +{ + NT_LOG(DBG, GENERAL, "VFIO DMA free addr=%" PRIX64 ", iova=%" PRIX64 ", size=%" PRIX64 "\n", + vfio_addr->addr, vfio_addr->iova, vfio_addr->size); + + int res = vfio_cb.vfio_dma_unmap(0, (void *)vfio_addr->addr, vfio_addr->iova, + vfio_addr->size); + + if (res != 0) { + NT_LOG(WRN, GENERAL, + "VFIO DMA free FAILED addr=%" PRIX64 ", iova=%" PRIX64 ", size=%" PRIX64 "\n", + vfio_addr->addr, vfio_addr->iova, vfio_addr->size); + } + + rte_free((void *)(vfio_addr->addr)); + rte_free(vfio_addr); +} -- 2.43.0