[dpdk-dev] [PATCH 1/3] vfio: Added hot removal feature for vfio

2015-08-07 Thread Burakov, Anatoly
Hi Harpal,

> I think maintaining a ref count of groups will solve this problem. 

Yes, refcounting seems like the best way to solve this problem.

> Additionally, I have found a bug in existing design in case of multiple 
> devices of same group. 
> <...>
> Therefore, I will provide a fix?for this as well in my next version.

That should probably be in a separate patch.

Thanks,
Anatoly


[dpdk-dev] [PATCH 1/3] vfio: Added hot removal feature for vfio

2015-08-04 Thread Harpal Singh
From: Harpal Singh 

This patch will add a new API i.e. pci_vfio_unmap_resource.
It will basically cleanup all the vfio resources allocated for a device.
cleanup includes :-
1) removing vfio_res from vfio_res_list
2) unmap mapped bars
3) close device fd
4) close group fd
5) free vfio_res

Signed-off-by: Harpal Singh 
---
 lib/librte_eal/common/include/rte_pci.h|  3 ++
 lib/librte_eal/linuxapp/eal/eal_pci.c  |  4 +-
 lib/librte_eal/linuxapp/eal/eal_pci_init.h |  1 +
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 60 ++
 4 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/lib/librte_eal/common/include/rte_pci.h 
b/lib/librte_eal/common/include/rte_pci.h
index 3fb2d3a..fa8c1a8 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -243,6 +243,9 @@ struct mapped_pci_resource {
struct rte_pci_addr pci_addr;
char path[PATH_MAX];
int nb_maps;
+   int vfio_group_fd;
+   int vfio_dev_fd;
+   int iommu_group_no;
struct pci_map maps[PCI_MAX_RESOURCE];
 };

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c 
b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 0e62f65..edabde5 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -158,7 +158,9 @@ pci_unmap_device(struct rte_pci_device *dev)
/* try unmapping the NIC resources using VFIO if it exists */
switch (dev->kdrv) {
case RTE_KDRV_VFIO:
-   RTE_LOG(ERR, EAL, "Hotplug doesn't support vfio yet\n");
+#ifdef VFIO_PRESENT
+   pci_vfio_unmap_resource(dev);
+#endif
break;
case RTE_KDRV_IGB_UIO:
case RTE_KDRV_UIO_GENERIC:
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h 
b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
index a17c708..9a42ea2 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
@@ -70,6 +70,7 @@ int pci_vfio_write_config(const struct rte_intr_handle 
*intr_handle,

 /* map VFIO resource prototype */
 int pci_vfio_map_resource(struct rte_pci_device *dev);
+int pci_vfio_unmap_resource(struct rte_pci_device *dev);
 int pci_vfio_get_group_fd(int iommu_group_fd);
 int pci_vfio_get_container_fd(void);

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c 
b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index 0e6c48a..8631653 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -550,6 +550,18 @@ clear_current_group(void)
 }


+static void
+clear_group_by_number(int iommu_group_no)
+{
+   int i;
+   for (i = 0; i < vfio_cfg.vfio_group_idx; i++)
+   if (vfio_cfg.vfio_groups[i].group_no == iommu_group_no)
+   {
+   vfio_cfg.vfio_groups[i].group_no = 0;
+   vfio_cfg.vfio_groups[i].fd = -1;
+   }
+}
+
 /*
  * map the PCI resources of a PCI device in virtual memory (VFIO version).
  * primary and secondary processes follow almost exactly the same path
@@ -876,6 +888,9 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
ioctl(vfio_dev_fd, VFIO_DEVICE_RESET);
}

+   vfio_res->vfio_dev_fd = vfio_dev_fd;
+   vfio_res->vfio_group_fd = vfio_group_fd;
+   vfio_res->iommu_group_no = iommu_group_no;
if (internal_config.process_type == RTE_PROC_PRIMARY)
TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next);

@@ -883,6 +898,51 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
 }

 int
+pci_vfio_unmap_resource(struct rte_pci_device *dev)
+{
+   int i;
+   struct mapped_pci_resource *vfio_res = NULL;
+   struct mapped_pci_res_list *vfio_res_list =
+   RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
+
+   /// Remove TAILQ entry
+   if (internal_config.process_type == RTE_PROC_PRIMARY)
+   {
+   TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
+   if (memcmp(&vfio_res->pci_addr, &dev->addr,
+  sizeof(dev->addr)))
+   continue;
+   TAILQ_REMOVE(vfio_res_list, vfio_res, next);
+   break;
+   }
+   }
+
+   if(!vfio_res)
+   return -1;
+
+   // /* unmap BARs */
+
+   for (i = 0; i < (int) vfio_res->nb_maps; i++)
+   {
+   /* skip non-mmapable BARs */
+   if (!vfio_res->maps[i].addr)
+   continue;
+   munmap(vfio_res->maps[i].addr, vfio_res->maps[i].size);
+   }
+
+
+   if(TAILQ_EMPTY(vfio_res_list))
+   vfio_cfg.vfio_container_has_dma = 0;
+   close(vfio_res->vfio_dev_fd);
+   close(vfio_res->vfio_group_fd);
+   clear_group_by_number(vfio_res->iommu_group_no);
+   if (internal_config.process_type == RTE_PROC_PRIMARY)
+   rte_free(vfio_res);
+   vfio_res = NULL;
+  

[dpdk-dev] [PATCH 1/3] vfio: Added hot removal feature for vfio

2015-08-04 Thread Burakov, Anatoly
Hi Harpal,

> This patch will add a new API i.e. pci_vfio_unmap_resource.
> It will basically cleanup all the vfio resources allocated for a device.
> cleanup includes :-
> 1) removing vfio_res from vfio_res_list
> 2) unmap mapped bars
> 3) close device fd
> 4) close group fd
> 5) free vfio_res

Do I understand it correctly that your code assumes that there is always a 
single PCI device per group? From what I understand, unless we're only using a 
single port from the NIC, this won't be the case. 

Also, extraneous comment symbols:

> + // /* unmap BARs */

Best regards,
Anatoly