Re: [PATCH v5 29/29] vfio: powerpc/spapr: Support Dynamic DMA windows

2015-03-10 Thread Alex Williamson
On Tue, 2015-03-10 at 01:07 +1100, Alexey Kardashevskiy wrote:
 This adds create/remove window ioctls to create and remove DMA windows.
 sPAPR defines a Dynamic DMA windows capability which allows
 para-virtualized guests to create additional DMA windows on a PCI bus.
 The existing linux kernels use this new window to map the entire guest
 memory and switch to the direct DMA operations saving time on map/unmap
 requests which would normally happen in a big amounts.
 
 This adds 2 ioctl handlers - VFIO_IOMMU_SPAPR_TCE_CREATE and
 VFIO_IOMMU_SPAPR_TCE_REMOVE - to create and remove windows.
 Up to 2 windows are supported now by the hardware and by this driver.
 
 This changes VFIO_IOMMU_SPAPR_TCE_GET_INFO handler to return additional
 information such as a number of supported windows and maximum number
 levels of TCE tables.
 
 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 ---
 Changes:
 v4:
 * moved code to tce_iommu_create_window()/tce_iommu_remove_window()
 helpers
 * added docs
 ---
  Documentation/vfio.txt  |  19 +
  arch/powerpc/include/asm/iommu.h|   2 +-
  drivers/vfio/vfio_iommu_spapr_tce.c | 165 
 +++-
  include/uapi/linux/vfio.h   |  24 +-
  4 files changed, 207 insertions(+), 3 deletions(-)
 
 diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
 index 791e85c..61ce393 100644
 --- a/Documentation/vfio.txt
 +++ b/Documentation/vfio.txt
 @@ -446,6 +446,25 @@ the memory block.
  The user space is not expected to call these often and the block descriptors
  are stored in a linked list in the kernel.
  
 +6) sPAPR specification allows guests to have an ddditional DMA window(s) on
 +a PCI bus with a variable page size. Two ioctls have been added to support
 +this: VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE.
 +The platform has to support the functionality or error will be returned to
 +the userspace. The existing hardware supports up to 2 DMA windows, one is
 +2GB long, uses 4K pages and called default 32bit window; the other can
 +be as big as entire RAM, use different page size, it is optional - guests
 +create those in run-time if the guest driver supports 64bit DMA.
 +
 +VFIO_IOMMU_SPAPR_TCE_CREATE receives a page shift, a DMA window size and
 +a number of TCE table levels (if a TCE table is going to be big enough and
 +the kernel may not be able to allocate enough of physicall contiguous 
 memory).
 +It creates a new window in the available slot and returns the bus address 
 where
 +the new window starts. Due to hardware limitation, the user space cannot 
 choose
 +the location of DMA windows.
 +
 +VFIO_IOMMU_SPAPR_TCE_REMOVE receives the bus start address of the window
 +and removes it.
 +
  
 ---
  
  [1] VFIO was originally an acronym for Virtual Function I/O in its
 diff --git a/arch/powerpc/include/asm/iommu.h 
 b/arch/powerpc/include/asm/iommu.h
 index 04f72ac..de82b61 100644
 --- a/arch/powerpc/include/asm/iommu.h
 +++ b/arch/powerpc/include/asm/iommu.h
 @@ -138,7 +138,7 @@ extern void iommu_free_table(struct iommu_table *tbl, 
 const char *node_name);
  extern struct iommu_table *iommu_init_table(struct iommu_table * tbl,
   int nid);
  
 -#define IOMMU_TABLE_GROUP_MAX_TABLES 1
 +#define IOMMU_TABLE_GROUP_MAX_TABLES 2
  
  struct iommu_table_group;
  
 diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c 
 b/drivers/vfio/vfio_iommu_spapr_tce.c
 index 3a0b5fe..7aa4141b 100644
 --- a/drivers/vfio/vfio_iommu_spapr_tce.c
 +++ b/drivers/vfio/vfio_iommu_spapr_tce.c
 @@ -96,6 +96,7 @@ struct tce_container {
   struct list_head mem_list;
   struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES];
   struct list_head group_list;
 + bool v2;
  };
  
  struct tce_iommu_group {
 @@ -333,6 +334,20 @@ static struct iommu_table *spapr_tce_find_table(
   return ret;
  }
  
 +static int spapr_tce_find_free_table(struct tce_container *container)
 +{
 + int i;
 +
 + for (i = 0; i  IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
 + struct iommu_table *tbl = container-tables[i];
 +
 + if (!tbl-it_size)
 + return i;
 + }
 +
 + return -1;


Why not use a real errno here?

 +}
 +
  static int tce_iommu_enable(struct tce_container *container)
  {
   int ret = 0;
 @@ -432,6 +447,8 @@ static void *tce_iommu_open(unsigned long arg)
   INIT_LIST_HEAD_RCU(container-mem_list);
   INIT_LIST_HEAD_RCU(container-group_list);
  
 + container-v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU;
 +

Ah, here v2 actually provides some enforced differentiation, right?  ...
oh wait, nobody ever uses this.
   return container;
  }
  
 @@ -605,11 +622,90 @@ static long tce_iommu_build(struct tce_container 
 *container,
   return ret;
  }
  
 +static long tce_iommu_create_window(struct tce_container *container,
 + __u32 page_shift, __u64 

[PATCH v5 29/29] vfio: powerpc/spapr: Support Dynamic DMA windows

2015-03-09 Thread Alexey Kardashevskiy
This adds create/remove window ioctls to create and remove DMA windows.
sPAPR defines a Dynamic DMA windows capability which allows
para-virtualized guests to create additional DMA windows on a PCI bus.
The existing linux kernels use this new window to map the entire guest
memory and switch to the direct DMA operations saving time on map/unmap
requests which would normally happen in a big amounts.

This adds 2 ioctl handlers - VFIO_IOMMU_SPAPR_TCE_CREATE and
VFIO_IOMMU_SPAPR_TCE_REMOVE - to create and remove windows.
Up to 2 windows are supported now by the hardware and by this driver.

This changes VFIO_IOMMU_SPAPR_TCE_GET_INFO handler to return additional
information such as a number of supported windows and maximum number
levels of TCE tables.

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
---
Changes:
v4:
* moved code to tce_iommu_create_window()/tce_iommu_remove_window()
helpers
* added docs
---
 Documentation/vfio.txt  |  19 +
 arch/powerpc/include/asm/iommu.h|   2 +-
 drivers/vfio/vfio_iommu_spapr_tce.c | 165 +++-
 include/uapi/linux/vfio.h   |  24 +-
 4 files changed, 207 insertions(+), 3 deletions(-)

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index 791e85c..61ce393 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -446,6 +446,25 @@ the memory block.
 The user space is not expected to call these often and the block descriptors
 are stored in a linked list in the kernel.
 
+6) sPAPR specification allows guests to have an ddditional DMA window(s) on
+a PCI bus with a variable page size. Two ioctls have been added to support
+this: VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE.
+The platform has to support the functionality or error will be returned to
+the userspace. The existing hardware supports up to 2 DMA windows, one is
+2GB long, uses 4K pages and called default 32bit window; the other can
+be as big as entire RAM, use different page size, it is optional - guests
+create those in run-time if the guest driver supports 64bit DMA.
+
+VFIO_IOMMU_SPAPR_TCE_CREATE receives a page shift, a DMA window size and
+a number of TCE table levels (if a TCE table is going to be big enough and
+the kernel may not be able to allocate enough of physicall contiguous memory).
+It creates a new window in the available slot and returns the bus address where
+the new window starts. Due to hardware limitation, the user space cannot choose
+the location of DMA windows.
+
+VFIO_IOMMU_SPAPR_TCE_REMOVE receives the bus start address of the window
+and removes it.
+
 ---
 
 [1] VFIO was originally an acronym for Virtual Function I/O in its
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 04f72ac..de82b61 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -138,7 +138,7 @@ extern void iommu_free_table(struct iommu_table *tbl, const 
char *node_name);
 extern struct iommu_table *iommu_init_table(struct iommu_table * tbl,
int nid);
 
-#define IOMMU_TABLE_GROUP_MAX_TABLES   1
+#define IOMMU_TABLE_GROUP_MAX_TABLES   2
 
 struct iommu_table_group;
 
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c 
b/drivers/vfio/vfio_iommu_spapr_tce.c
index 3a0b5fe..7aa4141b 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -96,6 +96,7 @@ struct tce_container {
struct list_head mem_list;
struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES];
struct list_head group_list;
+   bool v2;
 };
 
 struct tce_iommu_group {
@@ -333,6 +334,20 @@ static struct iommu_table *spapr_tce_find_table(
return ret;
 }
 
+static int spapr_tce_find_free_table(struct tce_container *container)
+{
+   int i;
+
+   for (i = 0; i  IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+   struct iommu_table *tbl = container-tables[i];
+
+   if (!tbl-it_size)
+   return i;
+   }
+
+   return -1;
+}
+
 static int tce_iommu_enable(struct tce_container *container)
 {
int ret = 0;
@@ -432,6 +447,8 @@ static void *tce_iommu_open(unsigned long arg)
INIT_LIST_HEAD_RCU(container-mem_list);
INIT_LIST_HEAD_RCU(container-group_list);
 
+   container-v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU;
+
return container;
 }
 
@@ -605,11 +622,90 @@ static long tce_iommu_build(struct tce_container 
*container,
return ret;
 }
 
+static long tce_iommu_create_window(struct tce_container *container,
+   __u32 page_shift, __u64 window_size, __u32 levels,
+   __u64 *start_addr)
+{
+   struct iommu_table_group *table_group;
+   struct tce_iommu_group *tcegrp;
+   int num;
+   long ret;
+
+   num = spapr_tce_find_free_table(container);
+   if (num  0)
+   return -ENOSYS;
+
+