Implementation of the PCI api, using pci-vfio.

Signed-off-by: Christophe Milard <christophe.mil...@linaro.org>
---
 platform/linux-generic/Makefile.am                 |   2 +
 .../linux-generic/include/odp_pci_vfio_internal.h  |  92 +++
 platform/linux-generic/odp_pci_vfio.c              | 873 +++++++++++++++++++++
 3 files changed, 967 insertions(+)
 create mode 100644 platform/linux-generic/include/odp_pci_vfio_internal.h
 create mode 100644 platform/linux-generic/odp_pci_vfio.c

diff --git a/platform/linux-generic/Makefile.am 
b/platform/linux-generic/Makefile.am
index 1e1015d..6a08811 100644
--- a/platform/linux-generic/Makefile.am
+++ b/platform/linux-generic/Makefile.am
@@ -108,6 +108,7 @@ noinst_HEADERS = \
                  ${srcdir}/include/odp_packet_socket.h \
                  ${srcdir}/include/odp_packet_tap.h \
                  ${srcdir}/include/odp_pci_internal.h \
+                 ${srcdir}/include/odp_pci_vfio_internal.h \
                  ${srcdir}/include/odp_pkt_queue_internal.h \
                  ${srcdir}/include/odp_pool_internal.h \
                  ${srcdir}/include/odp_queue_internal.h \
@@ -138,6 +139,7 @@ __LIB__libodp_la_SOURCES = \
                           odp_packet.c \
                           odp_packet_flags.c \
                           odp_packet_io.c \
+                          odp_pci_vfio.c \
                           pktio/io_ops.c \
                           pktio/pktio_common.c \
                           pktio/loop.c \
diff --git a/platform/linux-generic/include/odp_pci_vfio_internal.h 
b/platform/linux-generic/include/odp_pci_vfio_internal.h
new file mode 100644
index 0000000..a7c2fe1
--- /dev/null
+++ b/platform/linux-generic/include/odp_pci_vfio_internal.h
@@ -0,0 +1,92 @@
+/* Copyright (c) 2015, Linaro Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier:     BSD-3-Clause
+ */
+
+/**
+ * @file
+ *
+ * ODP pci vfio - implementation internal
+ */
+
+#ifndef ODP_PCI_VFIO_INTERNAL_H_
+#define ODP_PCI_VFIO_INTERNAL_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <odp/dma.h>
+#include <odp_dma_internal.h>
+#include <odp/pci.h>
+#include <odp_pci_internal.h>
+
+/**
+ * Initialise and map the PCI interface for the given pci address,
+ *
+ * @param pci_addr     pci address to the board that should be initialized and
+ *                     mapped, i.e. something like: "0000:23:00.0"
+ *
+ * @returns 0 a pci device on success, NULL on error.
+ */
+int _odp_pci_vfio_init(const char *pci_addr, pci_dev_t *dev);
+
+/**
+ * Release a PCI device
+ *
+ * @param pci_dev      The PCI device to be realeased as returned by
+ *                     _odp_pci_vfio_init()
+ */
+void _odp_pci_vfio_release(pci_dev_t *dev);
+
+/**
+ * map a DMA region fragment.
+ *
+ * @param dev          the pci device as returned by odp_pci_init
+ * @param region       the DMA region descriptor.
+ *
+ * @returns 0 on success or a negative value on error
+ */
+int _odp_pci_vfio_map_dma_region_f(pci_dev_t *dev, dma_map_t *region);
+
+/**
+ * unmap a DMA region fragment.
+ *
+ * @param dev          the pci device as returned by odp_pci_init
+ * @param region       the DMA region descriptor.
+ *
+ * @returns 0 on success or a negative value on error
+ */
+int _odp_pci_vfio_unmap_dma_region_f(pci_dev_t *dev, dma_map_t *region);
+
+/**
+ * Read the PCI config area
+ *
+ * @param dev          the pci device as returned by odp_pci_init
+ * @param buf          where to write the read data
+ * @param len          the number of bytes to read
+ * @param offs         the offset where read should start
+ *
+ * @returns the number of read bytes
+ */
+int _odp_pci_vfio_read_config(pci_dev_t *dev, void *buf, size_t len, int offs);
+
+/**
+ * Write the PCI config area
+ *
+ * @param dev          the pci device as returned by odp_pci_init
+ * @param buf          where to take the data to be written
+ * @param len          the number of bytes to written
+ * @param offs         the offset where data should be written
+ *
+ * @returns the number of written bytes
+ */
+int _odp_pci_vfio_write_config(pci_dev_t *dev, const void *buf,
+                              size_t len, int offs);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ODP_PCI_VFIO_INTERNAL_H_ */
diff --git a/platform/linux-generic/odp_pci_vfio.c 
b/platform/linux-generic/odp_pci_vfio.c
new file mode 100644
index 0000000..adddbd8
--- /dev/null
+++ b/platform/linux-generic/odp_pci_vfio.c
@@ -0,0 +1,873 @@
+/* Copyright (c) 2015, Linaro Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier:     BSD-3-Clause
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <linux/vfio.h>
+#include <linux/limits.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <linux/pci_regs.h>
+#include <unistd.h>
+#include <odp/pci.h>
+#include <odp/shared_memory.h>
+#include <odp_debug_internal.h>
+#include <odp_pci_vfio_internal.h>
+#include <odp_align_internal.h>
+#include <odp/spinlock.h>
+
+#define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
+#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices"
+#define VFIO_GROUP_FMT "/dev/vfio/%u"
+/* The vfio-pci driver harcodes map region n at address n<<40: */
+/* see VFIO_PCI_OFFSET_SHIFT in linux */
+#define VFIO_GET_REGION_ADDR(x) ((uint64_t)x << 40ULL)
+
+#define        DIM(a)  (sizeof(a) / sizeof((a)[0]))
+
+#define MAX_PCI_VFIO_GRP        5
+#define MAX_PCI_VFIO_DEVICES    8
+#define MAX_PCI_VFIO_DMA_FRAGS   50
+
+/*
+ * pci_vfio uses file descriptors to access differents objects.
+ * Access to data structures from different linux threads are protected
+ * by the following spinlock
+ */
+odp_spinlock_t vfio_thread_spinlock;
+
+/**
+ * device implementation data for the VFIO implementation of the PCI interface
+ */
+typedef struct vfio_t {
+       int iommu_group_no;
+       int container_fd;
+       int group_fd;
+       int dev_fd;
+       int config_fd;
+} vfio_t;
+
+/**
+ * data common to all pci vfio instances of this linux thread
+ */
+typedef struct pci_vfio_data_t {
+       int container_fd;       /* odp uses one container */
+       int container_ref_count;
+       int iommu_type;         /* iommu type set for this container */
+       dma_map_t *dma_maps[MAX_PCI_VFIO_DMA_FRAGS]; /* mapped regions */
+       const char *dev_names[MAX_PCI_VFIO_DEVICES]; /*initialised devices */
+       struct {
+               int group_no;
+               int group_fd;
+               int refcount;
+       } groups[MAX_PCI_VFIO_GRP];
+} pci_vfio_data_t;
+
+static pci_vfio_data_t pci_vfio_data;
+
+/* set PCI bus mastering */
+static int pci_vfio_set_bus_master(int dev_fd)
+{
+       uint16_t reg;
+       int ret;
+
+       ret = pread64(dev_fd, &reg, sizeof(reg),
+                     VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
+                     PCI_COMMAND);
+       if (ret != sizeof(reg)) {
+               ODP_ERR("Cannot read command from PCI config space!\n");
+               return -1;
+       }
+
+       /* set the master bit */
+       reg |= PCI_COMMAND_MASTER;
+
+       ret = pwrite64(dev_fd, &reg, sizeof(reg),
+                      VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
+                      PCI_COMMAND);
+
+       if (ret != sizeof(reg)) {
+               ODP_ERR("Cannot write command to PCI config space!\n");
+               return -1;
+       }
+
+       return 0;
+}
+
+/*
+ * read pci device configuration space
+ */
+int _odp_pci_vfio_read_config(pci_dev_t *dev, void *buf, size_t len, int offs)
+{
+       vfio_t *vfio = (vfio_t *)dev->implementation_data;
+
+       return pread64(vfio->dev_fd, buf, len,
+                      VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
+                      + offs);
+}
+
+/*
+ * write pci device configuration space
+ */
+int _odp_pci_vfio_write_config(pci_dev_t *dev,
+                              const void *buf, size_t len, int offs)
+{
+       vfio_t *vfio = (vfio_t *)dev->implementation_data;
+
+       return pwrite64(vfio->dev_fd, buf, len,
+                       VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX)
+                       + offs);
+}
+
+/*
+ * split string into tokens
+ */
+static int strsplit(char *string, int stringlen,
+                   char **tokens, int maxtokens, char delim)
+{
+       int i, tok = 0;
+       int tokstart = 1; /* first token is right at start of string */
+
+       if (string == NULL || tokens == NULL)
+               goto einval_error;
+
+       for (i = 0; i < stringlen; i++) {
+               if (string[i] == '\0' || tok >= maxtokens)
+                       break;
+               if (tokstart) {
+                       tokstart = 0;
+                       tokens[tok++] = &string[i];
+               }
+               if (string[i] == delim) {
+                       string[i] = '\0';
+                       tokstart = 1;
+               }
+       }
+       return tok;
+
+einval_error:
+       return -1;
+}
+
+/**
+ * initialize linux thread common data
+ */
+static void pci_vfio_data_init(void)
+{
+       int i;
+
+       odp_spinlock_lock(&vfio_thread_spinlock);
+
+       pci_vfio_data.container_fd = -1;
+       pci_vfio_data.container_ref_count = 0;
+       pci_vfio_data.iommu_type = -1;
+       for (i = 0; i < MAX_PCI_VFIO_GRP; i++) {
+               pci_vfio_data.groups[i].group_no = -1;
+               pci_vfio_data.groups[i].group_fd = -1;
+               pci_vfio_data.groups[i].refcount = 0;
+       }
+       /* set all DMA fragments as free for this container: */
+       for (i = 0; i < MAX_PCI_VFIO_DMA_FRAGS; i++)
+               pci_vfio_data.dma_maps[i] = NULL;
+
+       odp_spinlock_unlock(&vfio_thread_spinlock);
+}
+
+/*
+ * open a vfio container.
+ * return: the container file descriptor (fd) or -1 on error
+ */
+static int pci_vfio_get_container_fd(void)
+{
+       int ret, vfio_container_fd;
+
+       /* initialise common vfio area  */
+       odp_spinlock_lock(&vfio_thread_spinlock);
+
+       /* if the file descriptor for the container already exists, return it */
+       if (pci_vfio_data.container_fd != -1) {
+               pci_vfio_data.container_ref_count++;
+               odp_spinlock_unlock(&vfio_thread_spinlock);
+               return pci_vfio_data.container_fd;
+       }
+
+       /* try to open the vfio container: */
+       vfio_container_fd = open(VFIO_CONTAINER_PATH, O_RDWR);
+       if (vfio_container_fd < 0) {
+               ODP_ERR("cannot open VFIO container, "
+                       "error %i (%s)\n", errno, strerror(errno));
+               odp_spinlock_unlock(&vfio_thread_spinlock);
+               return -1;
+       }
+
+       /* check the VFIO API version */
+       ret = ioctl(vfio_container_fd, VFIO_GET_API_VERSION);
+       if (ret != VFIO_API_VERSION) {
+               if (ret < 0)
+                       ODP_ERR("couldn't get VFIO API version, err=%i (%s)\n",
+                               errno, strerror(errno));
+               else
+                       ODP_ERR("unsupported VFIO API version!\n");
+               close(vfio_container_fd);
+               odp_spinlock_unlock(&vfio_thread_spinlock);
+               return -1;
+       }
+
+       /* check if we support IOMMU type 1 */
+       ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU);
+       if (ret != 1) {
+               if (ret < 0)
+                       ODP_ERR("could not get IOMMU type, "
+                               "error %i (%s)\n", errno,
+                               strerror(errno));
+               else
+                       ODP_ERR("unsupported IOMMU type "
+                               "detected in VFIO\n");
+               close(vfio_container_fd);
+               odp_spinlock_unlock(&vfio_thread_spinlock);
+               return -1;
+       }
+
+       pci_vfio_data.container_fd = vfio_container_fd;
+       pci_vfio_data.container_ref_count = 1;
+       odp_spinlock_unlock(&vfio_thread_spinlock);
+       return vfio_container_fd;
+}
+
+static void pci_vfio_release_container_fd(int container_fd)
+{
+       odp_spinlock_lock(&vfio_thread_spinlock);
+
+       if (container_fd != pci_vfio_data.container_fd) {
+               ODP_ERR("Attempt to release an invalid container fd failed");
+               odp_spinlock_unlock(&vfio_thread_spinlock);
+               return;
+       }
+
+       if (--pci_vfio_data.container_ref_count > 0) {
+               odp_spinlock_unlock(&vfio_thread_spinlock);
+               return;
+       }
+
+       pci_vfio_data.container_ref_count = 0;
+       close(pci_vfio_data.container_fd);
+       pci_vfio_data.container_fd = -1;
+       odp_spinlock_unlock(&vfio_thread_spinlock);
+}
+
+/*
+ * get IOMMU group number for a PCI device
+ * returns -1 for errors, 0 for non-existent group
+ */
+static int pci_vfio_get_group_no(const char *pci_addr)
+{
+       char linkname[PATH_MAX];
+       char filename[PATH_MAX];
+       char *tok[16], *group_tok, *end;
+       int ret, iommu_group_no;
+
+       memset(linkname, 0, sizeof(linkname));
+       memset(filename, 0, sizeof(filename));
+
+       /* try to find out IOMMU group for this device */
+       snprintf(linkname, sizeof(linkname),
+                SYSFS_PCI_DEVICES "/%s/iommu_group", pci_addr);
+
+       ret = readlink(linkname, filename, sizeof(filename));
+
+       /* if the link doesn't exist, no VFIO for us */
+       if (ret < 0) {
+               ODP_ERR("%s not managed by VFIO driver, skipping\n",
+                       pci_addr);
+               ODP_ERR("linkname was %s, error: %s\n",
+                       linkname, strerror(errno));
+               return -1;
+       }
+
+       ret = strsplit(filename, sizeof(filename), tok, DIM(tok), '/');
+
+       if (ret <= 0) {
+               ODP_ERR("%s cannot get IOMMU group\n", pci_addr);
+               return -1;
+       }
+
+       /* IOMMU group is always the last token */
+       errno = 0;
+       group_tok = tok[ret - 1];
+       end = group_tok;
+       iommu_group_no = strtol(group_tok, &end, 10);
+       if ((end != group_tok && *end != '\0') || errno != 0) {
+               ODP_ERR("%s error parsing IOMMU number!\n", pci_addr);
+               return -1;
+       }
+
+       return iommu_group_no;
+}
+
+/*
+ * open group fd
+ */
+static int pci_vfio_get_group_fd(int iommu_group_no)
+{
+       int vfio_group_fd;
+       char filename[PATH_MAX];
+       int index, free_index;
+
+       free_index = -1;
+
+       /* check if the group fd is already known:
+        * Also search for a free slot in the table, in case we need one */
+       odp_spinlock_lock(&vfio_thread_spinlock);
+       for (index = 0; index < MAX_PCI_VFIO_GRP; index++) {
+               if ((pci_vfio_data.groups[index].group_no == iommu_group_no) &&
+                   (pci_vfio_data.groups[index].refcount > 0)) {
+                       pci_vfio_data.groups[index].refcount++;
+                       odp_spinlock_unlock(&vfio_thread_spinlock);
+                       return pci_vfio_data.groups[index].group_fd;
+               }
+
+               if ((pci_vfio_data.groups[index].group_no == -1) &&
+                   (pci_vfio_data.groups[index].refcount == 0))
+                       free_index = index;
+       }
+
+       if (free_index == -1) {
+               ODP_ERR("No space available in vfio grp table "
+                       "(MAX_PCI_VFIO_GRP)");
+               odp_spinlock_unlock(&vfio_thread_spinlock);
+               return -1;
+       }
+       snprintf(filename, sizeof(filename),
+                VFIO_GROUP_FMT, iommu_group_no);
+       vfio_group_fd = open(filename, O_RDWR);
+       if (vfio_group_fd < 0) {
+               /* if file not found, it's not an error */
+               if (errno != ENOENT) {
+                       ODP_ERR("Cannot open %s: %s\n", filename,
+                               strerror(errno));
+                       odp_spinlock_unlock(&vfio_thread_spinlock);
+                       return -1;
+               }
+               odp_spinlock_unlock(&vfio_thread_spinlock);
+               return 0;
+       }
+
+       pci_vfio_data.groups[free_index].group_no = iommu_group_no;
+       pci_vfio_data.groups[free_index].refcount = 1;
+       pci_vfio_data.groups[free_index].group_fd = vfio_group_fd;
+       odp_spinlock_unlock(&vfio_thread_spinlock);
+       return vfio_group_fd;
+}
+
+/*
+ * release group fd
+ */
+static void pci_vfio_release_group_fd(int vfio_group_fd)
+{
+       int index;
+
+       /* decrement the file descriptor refcount. close if last. */
+       odp_spinlock_lock(&vfio_thread_spinlock);
+       for (index = 0; index < MAX_PCI_VFIO_GRP; index++) {
+               if (pci_vfio_data.groups[index].group_fd == vfio_group_fd) {
+                       if (--pci_vfio_data.groups[index].refcount == 0) {
+                               pci_vfio_data.groups[index].group_no = -1;
+                               close(vfio_group_fd);
+                               odp_spinlock_unlock(&vfio_thread_spinlock);
+                               return;
+                       }
+               }
+       }
+       ODP_ERR("vfio: attempt to close a unreferenced group file descriptor");
+       odp_spinlock_unlock(&vfio_thread_spinlock);
+}
+
+/*
+ * check vfio group viability
+ * A group is said to be viable if all its members (i.e. devices)
+ * are either bound to VFIO or not bound at all
+ */
+static int pci_vfio_check_group_viable(const char *pci_addr, int vfio_group_fd)
+{
+       int ret;
+       struct vfio_group_status group_status = {
+                       .argsz = sizeof(group_status)
+       };
+
+       /* check if the group is viable */
+       ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS, &group_status);
+       if (ret) {
+               ODP_ERR("%s can't get group status, error %i (%s)\n",
+                       pci_addr, errno, strerror(errno));
+               close(vfio_group_fd);
+               return -1;
+       } else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
+               ODP_ERR("%s VFIO group is not viable!\n", pci_addr);
+               close(vfio_group_fd);
+               return -1;
+       }
+
+       return 0;
+}
+
+/*
+ * add group to container, if group does not have a container, already
+ */
+static int pci_vfio_add_group_to_container(const char *pci_addr,
+                                          int vfio_group_fd,
+                                          int vfio_container_fd)
+{
+       int ret;
+       struct vfio_group_status group_status = {
+                       .argsz = sizeof(group_status)
+       };
+
+       /* check if the group already has a container... */
+       ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS, &group_status);
+       if (ret) {
+               ODP_ERR("%s cannot get group status, error %i (%s)\n",
+                       pci_addr, errno, strerror(errno));
+               close(vfio_group_fd);
+               return -1;
+       }
+
+       /* if not, add the group to the container */
+       if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) {
+               ODP_ERR("Adding group into container ...\n");
+               ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER,
+                           &vfio_container_fd);
+               if (ret) {
+                       ODP_ERR("%s can't add VFIO group to container "
+                               "error %i (%s)\n",
+                               pci_addr, errno, strerror(errno));
+                       close(vfio_group_fd);
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ *  set iommu type to VFIO_TYPE1_IOMMU
+ */
+static int pcio_vfio_set_mmu_type1(int vfio_container_fd)
+{
+       int ret;
+
+       odp_spinlock_lock(&vfio_thread_spinlock);
+       if (pci_vfio_data.iommu_type > 0) {
+               odp_spinlock_unlock(&vfio_thread_spinlock);
+               return 0; /* already done */
+       }
+
+       ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU,
+                   VFIO_TYPE1_IOMMU);
+       if (ret) {
+               ODP_ERR("cannot set IOMMU type, "
+                               "error %i (%s)\n", errno, strerror(errno));
+               odp_spinlock_unlock(&vfio_thread_spinlock);
+               return -1;
+       }
+
+       pci_vfio_data.iommu_type = VFIO_TYPE1_IOMMU;
+       odp_spinlock_unlock(&vfio_thread_spinlock);
+       return 0;
+}
+
+/*
+ * returns true if the given DMA mapping fragment
+ * already exists for the container
+ */
+static bool dma_mapping_exists(dma_map_t *region_f)
+{
+       int i;
+       dma_map_t *map;
+
+       for (i = 0; i < MAX_PCI_VFIO_DMA_FRAGS; i++) {
+               map = pci_vfio_data.dma_maps[i];
+               if (map == NULL)
+                       continue;
+               if ((region_f->dma_addr == map->dma_addr) &&
+                   (region_f->size == map->size))
+                       return true;
+       }
+
+       return false;
+}
+
+/*
+ * set up DMA mappings.
+ */
+int _odp_pci_vfio_map_dma_region_f(pci_dev_t *dev, dma_map_t *fragment)
+{
+       int ret;
+       int i, free_index;
+       vfio_t *vfio = (vfio_t *)dev->implementation_data;
+       struct vfio_iommu_type1_dma_map dma_map;
+
+       odp_spinlock_lock(&vfio_thread_spinlock);
+
+       /* search for free room for this new fragment: */
+       free_index = -1;
+       for (i = 0; i < MAX_PCI_VFIO_DMA_FRAGS; i++) {
+               if (pci_vfio_data.dma_maps[i] == NULL) {
+                       free_index = i;
+                       break;
+               }
+       }
+
+       if (free_index == -1) {
+               ODP_DBG("DMA MAPPING: No room left for new DMA mapping");
+               odp_spinlock_unlock(&vfio_thread_spinlock);
+               return -1;
+       }
+
+       /*check that no equivalent fragment (mapping the same area) exist: */
+       if (dma_mapping_exists(fragment)) {
+               odp_spinlock_unlock(&vfio_thread_spinlock);
+               return 0;
+       }
+
+       /* perform the DMA mapping */
+       memset(&dma_map, 0, sizeof(dma_map));
+       dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+       dma_map.size = ODP_PAGE_SIZE_ROUNDUP(fragment->size);
+       dma_map.iova = fragment->dma_addr;
+       dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+       dma_map.vaddr = (uint64_t)fragment->addr;
+       ODP_DBG("DMA MAPPING: iova addr: %lx to addr: %lx of size:%ld\n",
+               (uint64_t)dma_map.iova,
+               (uint64_t)dma_map.vaddr,
+               (uint64_t)dma_map.size);
+       ret = ioctl(vfio->container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+       if (ret) {
+               ODP_ERR("cannot set up DMA mapping, "
+                               "error %i (%s)\n", errno, strerror(errno));
+               odp_spinlock_unlock(&vfio_thread_spinlock);
+               return -1;
+       }
+
+       /* remember this mapped fragment for this container */
+       pci_vfio_data.dma_maps[free_index] = fragment;
+       odp_spinlock_unlock(&vfio_thread_spinlock);
+       return 0;
+}
+
+/*
+ * remove DMA mappings.
+ */
+int _odp_pci_vfio_unmap_dma_region_f(pci_dev_t *dev, dma_map_t *fragment)
+{
+       int i;
+       int ret;
+       vfio_t *vfio = (vfio_t *)dev->implementation_data;
+       struct vfio_iommu_type1_dma_unmap dma_unmap;
+
+       odp_spinlock_lock(&vfio_thread_spinlock);
+       /* search this fragment in the list of known fragments */
+       for (i = 0; i < MAX_PCI_VFIO_DMA_FRAGS; i++)
+               if (pci_vfio_data.dma_maps[i] == fragment)
+                       break;
+
+       /* fragment was never mapped: error! */
+       if ((i >= MAX_PCI_VFIO_DMA_FRAGS) ||
+           (pci_vfio_data.dma_maps[i] != fragment)) {
+               odp_spinlock_unlock(&vfio_thread_spinlock);
+               return -1;
+       }
+
+       /* mark this fragment as deleteted */
+       pci_vfio_data.dma_maps[i] = NULL;
+
+       /*check that no equivalent fragment (mapping the same area) remains: */
+       if (dma_mapping_exists(fragment)) {
+               odp_spinlock_unlock(&vfio_thread_spinlock);
+               return 0;
+       }
+
+       /* perform the DMA unmapping */
+       memset(&dma_unmap, 0, sizeof(dma_unmap));
+       dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+       dma_unmap.size = ODP_PAGE_SIZE_ROUNDUP(fragment->size);
+       dma_unmap.iova = fragment->dma_addr;
+       dma_unmap.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+       ODP_DBG("DMA UNMAPPING: iova addr: %lx of size:%ld\n",
+               (uint64_t)dma_unmap.iova,
+               (uint64_t)dma_unmap.size);
+       ret = ioctl(vfio->container_fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap);
+       if (ret) {
+               ODP_ERR("cannot remove DMA mapping, "
+                       "error %i (%s)\n", errno, strerror(errno));
+               odp_spinlock_unlock(&vfio_thread_spinlock);
+               return -1;
+       }
+
+       odp_spinlock_unlock(&vfio_thread_spinlock);
+       return 0;
+}
+
+/*
+ * get device file descriptor
+ */
+static int pci_vfio_get_device_fd(int vfio_group_fd, const char *pci_addr)
+{
+       int vfio_dev_fd;
+
+       /* get a file descriptor for the device */
+       vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, pci_addr);
+       if (vfio_dev_fd < 0) {
+               /* if we cannot get a device fd, this simply means that this
+                * particular port is not bound to VFIO
+                */
+               ODP_DBG("%s not managed by VFIO driver, skipping\n"
+                       "error %i (%s)\n", pci_addr, errno, strerror(errno));
+               return -1;
+       }
+
+       return vfio_dev_fd;
+}
+
+/*
+ * map a particular resource  from a device file
+ */
+static void *pci_map_resource(void *requested_addr, int fd,
+                             off_t offset, size_t size, int additional_flags)
+{
+       void *mapaddr;
+
+       /* Map the PCI memory resource of device */
+       mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE,
+                      MAP_SHARED | additional_flags, fd, offset);
+       if (mapaddr == MAP_FAILED) {
+               ODP_ERR("%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n",
+                       __func__, fd, requested_addr,
+                       (unsigned long)size, (unsigned long)offset,
+                       strerror(errno), mapaddr);
+       } else
+               ODP_DBG("%s(): mapped(%d, %p, 0x%lx, 0x%lx): %s (%p)\n",
+                       __func__, fd, requested_addr,
+                       (unsigned long)size, (unsigned long)offset,
+                       strerror(errno), mapaddr);
+
+       return mapaddr;
+}
+
+/*
+ * check if dev with pci_addr is already registered as initialised by this
+ * linux thread. If not, mark it as initialised (TAS)
+ * Return true if the pci address is not already registered, false otherwise.
+ */
+static bool tas_device(const char *pci_addr)
+{
+       int i;
+       int free_idx;
+
+       free_idx = -1;
+       odp_spinlock_lock(&vfio_thread_spinlock);
+       for (i = 0; i < MAX_PCI_VFIO_DEVICES; i++) {
+               if (pci_vfio_data.dev_names[i] == NULL) {
+                       free_idx = i;
+                       continue;
+               }
+
+               if (strncmp(pci_vfio_data.dev_names[i],
+                           pci_addr, _ODP_PCI_ADDR_LEN + 1) == 0) {
+                       odp_spinlock_unlock(&vfio_thread_spinlock);
+                       return false;
+               }
+       }
+
+       if (free_idx == -1) {
+               odp_spinlock_unlock(&vfio_thread_spinlock);
+               ODP_ERR("No space left for more vfio pci devices!");
+               return false; /* no space left -> prevent further init */
+       }
+
+       pci_vfio_data.dev_names[free_idx] = pci_addr;
+       odp_spinlock_unlock(&vfio_thread_spinlock);
+       return true;
+}
+
+/*
+ * remove the pci_address from the list of vfio registered devices
+ */
+static void rm_device(const char *pci_addr)
+{
+       int i;
+
+       odp_spinlock_lock(&vfio_thread_spinlock);
+       for (i = 0; i < MAX_PCI_VFIO_DEVICES; i++) {
+               if (pci_vfio_data.dev_names[i] == NULL)
+                       continue;
+
+               if (strncmp(pci_vfio_data.dev_names[i],
+                           pci_addr, _ODP_PCI_ADDR_LEN + 1) == 0) {
+                       pci_vfio_data.dev_names[i] = NULL;
+                       odp_spinlock_unlock(&vfio_thread_spinlock);
+                       return;
+               }
+       }
+
+       odp_spinlock_unlock(&vfio_thread_spinlock);
+}
+
+/*
+ * device init
+ */
+int _odp_pci_vfio_init(const char *pci_addr, pci_dev_t *dev)
+{
+       int ret;
+       int i;
+       struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+       struct vfio_region_info reg = { .argsz = sizeof(reg) };
+       vfio_t *vfio;
+
+       /* initialize linux thread common data (only first time) */
+       pci_vfio_data_init();
+
+       /* prevent double init on same device */
+       if (!tas_device(pci_addr))
+               return -1;
+
+       /* allocate memory for vfio specific data and link it to dev: */
+       vfio = malloc(sizeof(vfio_t));
+       if (!vfio)
+               goto out;
+       dev->implementation_data = vfio;
+
+       strncpy(dev->pci_address, pci_addr, _ODP_PCI_ADDR_LEN);
+
+       /* open a vfio container: */
+       vfio->container_fd = pci_vfio_get_container_fd();
+       if (vfio->container_fd == -1)
+               goto out1;
+
+       /* get group number the target PCI_ADDRESS belongs to:*/
+       vfio->iommu_group_no = pci_vfio_get_group_no(pci_addr);
+       if ((vfio->iommu_group_no == -1) || (vfio->iommu_group_no == 0))
+               goto out1;
+
+       /* get the actual group fd */
+       vfio->group_fd = pci_vfio_get_group_fd(vfio->iommu_group_no);
+       if (vfio->group_fd < 0)
+               goto out1;
+
+       /* Test the group is viable */
+       if (pci_vfio_check_group_viable(pci_addr, vfio->group_fd) < 0)
+               goto out1;
+
+       /* Add the group to the container, if needed: */
+       if (pci_vfio_add_group_to_container(pci_addr, vfio->group_fd,
+                                           vfio->container_fd) < 0)
+               goto out1;
+
+       /* set mmu type 1: */
+       if (pcio_vfio_set_mmu_type1(vfio->container_fd) < 0)
+               goto out1;
+
+       /* Get a file descriptor for the device */
+       vfio->dev_fd = pci_vfio_get_device_fd(vfio->group_fd, pci_addr);
+       if (vfio->dev_fd < 0)
+               goto out1;
+
+       /* Get pci device information (number of regions and interrupts...) */
+       ret = ioctl(vfio->dev_fd, VFIO_DEVICE_GET_INFO, &device_info);
+       if (ret) {
+               ODP_ERR("%s cannot get device info, "
+                       "error %i (%s)\n", pci_addr, errno, strerror(errno));
+               close(vfio->dev_fd);
+               goto out1;
+       }
+       ODP_DBG("found %d regions in the device\n", device_info.num_regions);
+
+       /* map all the found BAR regions: */
+       for (i = VFIO_PCI_BAR0_REGION_INDEX;
+            i <= VFIO_PCI_BAR5_REGION_INDEX; i++) {
+               reg.index = i;
+               ret = ioctl(vfio->dev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg);
+               if (ret) {
+                       ODP_ERR("%s cannot get device region info "
+                               "error %i (%s)\n", pci_addr, errno,
+                               strerror(errno));
+                       continue;
+               }
+               ODP_DBG("REGION %d: size=%ld, offset=%lx:\n",
+                       i, (long int)reg.size, (long int)reg.offset);
+
+               /* skip non-mmapable BARs */
+               if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) {
+                       ODP_DBG("found non mappable region %d in the device\n",
+                               reg.index);
+                       continue;
+               }
+
+               /* skip non-valid regions (size 0) */
+               if (!reg.size) {
+                       ODP_DBG("found zero-sized region %d in the device\n",
+                               reg.index);
+                       continue;
+               }
+
+               dev->bar_maps[i].addr = pci_map_resource(NULL, vfio->dev_fd,
+                                                        reg.offset,
+                                                        reg.size, 0);
+               dev->bar_maps[i].size = reg.size;
+       }
+
+       pci_vfio_set_bus_master(vfio->dev_fd);
+
+       /* device reset and go... */
+       ioctl(vfio->dev_fd, VFIO_DEVICE_RESET);
+
+       return 0;
+
+       /*FIXME: proper error handling with release of resource (close()) */
+out1:
+       free(vfio);
+out:
+       rm_device(pci_addr);
+       return -1;
+}
+
+/*
+ * device release
+ */
+void _odp_pci_vfio_release(pci_dev_t *dev)
+{
+       vfio_t *vfio = (vfio_t *)dev->implementation_data;
+
+       /*FIXME: check if DMA mapping still exists for the device
+         FIXME: or good enough to do that on final container closure...? */
+
+       /* release the config area file descriptor: */
+       close(vfio->config_fd);
+
+       /* release the device file descriptor: */
+       close(vfio->dev_fd);
+
+       /* release (if possible), the group file descriptor: */
+       pci_vfio_release_group_fd(vfio->group_fd);
+
+       /* release (if possible), the container file descriptor: */
+       pci_vfio_release_container_fd(vfio->container_fd);
+
+       /* release the vfio specific device data */
+       free(dev->implementation_data);
+
+       /* mark pci address as uninitialized: */
+       rm_device(dev->pci_address);
+}
-- 
2.1.4

_______________________________________________
lng-odp mailing list
lng-odp@lists.linaro.org
https://lists.linaro.org/mailman/listinfo/lng-odp

Reply via email to