Re: [PATCH v5 29/29] vfio: powerpc/spapr: Support Dynamic DMA windows
On Tue, 2015-03-10 at 01:07 +1100, Alexey Kardashevskiy wrote: This adds create/remove window ioctls to create and remove DMA windows. sPAPR defines a Dynamic DMA windows capability which allows para-virtualized guests to create additional DMA windows on a PCI bus. The existing linux kernels use this new window to map the entire guest memory and switch to the direct DMA operations saving time on map/unmap requests which would normally happen in a big amounts. This adds 2 ioctl handlers - VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE - to create and remove windows. Up to 2 windows are supported now by the hardware and by this driver. This changes VFIO_IOMMU_SPAPR_TCE_GET_INFO handler to return additional information such as a number of supported windows and maximum number levels of TCE tables. Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru --- Changes: v4: * moved code to tce_iommu_create_window()/tce_iommu_remove_window() helpers * added docs --- Documentation/vfio.txt | 19 + arch/powerpc/include/asm/iommu.h| 2 +- drivers/vfio/vfio_iommu_spapr_tce.c | 165 +++- include/uapi/linux/vfio.h | 24 +- 4 files changed, 207 insertions(+), 3 deletions(-) diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index 791e85c..61ce393 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt @@ -446,6 +446,25 @@ the memory block. The user space is not expected to call these often and the block descriptors are stored in a linked list in the kernel. +6) sPAPR specification allows guests to have an ddditional DMA window(s) on +a PCI bus with a variable page size. Two ioctls have been added to support +this: VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE. +The platform has to support the functionality or error will be returned to +the userspace. The existing hardware supports up to 2 DMA windows, one is +2GB long, uses 4K pages and called default 32bit window; the other can +be as big as entire RAM, use different page size, it is optional - guests +create those in run-time if the guest driver supports 64bit DMA. + +VFIO_IOMMU_SPAPR_TCE_CREATE receives a page shift, a DMA window size and +a number of TCE table levels (if a TCE table is going to be big enough and +the kernel may not be able to allocate enough of physicall contiguous memory). +It creates a new window in the available slot and returns the bus address where +the new window starts. Due to hardware limitation, the user space cannot choose +the location of DMA windows. + +VFIO_IOMMU_SPAPR_TCE_REMOVE receives the bus start address of the window +and removes it. + --- [1] VFIO was originally an acronym for Virtual Function I/O in its diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 04f72ac..de82b61 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -138,7 +138,7 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name); extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, int nid); -#define IOMMU_TABLE_GROUP_MAX_TABLES 1 +#define IOMMU_TABLE_GROUP_MAX_TABLES 2 struct iommu_table_group; diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 3a0b5fe..7aa4141b 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -96,6 +96,7 @@ struct tce_container { struct list_head mem_list; struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES]; struct list_head group_list; + bool v2; }; struct tce_iommu_group { @@ -333,6 +334,20 @@ static struct iommu_table *spapr_tce_find_table( return ret; } +static int spapr_tce_find_free_table(struct tce_container *container) +{ + int i; + + for (i = 0; i IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbl = container-tables[i]; + + if (!tbl-it_size) + return i; + } + + return -1; Why not use a real errno here? +} + static int tce_iommu_enable(struct tce_container *container) { int ret = 0; @@ -432,6 +447,8 @@ static void *tce_iommu_open(unsigned long arg) INIT_LIST_HEAD_RCU(container-mem_list); INIT_LIST_HEAD_RCU(container-group_list); + container-v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU; + Ah, here v2 actually provides some enforced differentiation, right? ... oh wait, nobody ever uses this. return container; } @@ -605,11 +622,90 @@ static long tce_iommu_build(struct tce_container *container, return ret; } +static long tce_iommu_create_window(struct tce_container *container, + __u32 page_shift, __u64
[PATCH v5 29/29] vfio: powerpc/spapr: Support Dynamic DMA windows
This adds create/remove window ioctls to create and remove DMA windows. sPAPR defines a Dynamic DMA windows capability which allows para-virtualized guests to create additional DMA windows on a PCI bus. The existing linux kernels use this new window to map the entire guest memory and switch to the direct DMA operations saving time on map/unmap requests which would normally happen in a big amounts. This adds 2 ioctl handlers - VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE - to create and remove windows. Up to 2 windows are supported now by the hardware and by this driver. This changes VFIO_IOMMU_SPAPR_TCE_GET_INFO handler to return additional information such as a number of supported windows and maximum number levels of TCE tables. Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru --- Changes: v4: * moved code to tce_iommu_create_window()/tce_iommu_remove_window() helpers * added docs --- Documentation/vfio.txt | 19 + arch/powerpc/include/asm/iommu.h| 2 +- drivers/vfio/vfio_iommu_spapr_tce.c | 165 +++- include/uapi/linux/vfio.h | 24 +- 4 files changed, 207 insertions(+), 3 deletions(-) diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index 791e85c..61ce393 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt @@ -446,6 +446,25 @@ the memory block. The user space is not expected to call these often and the block descriptors are stored in a linked list in the kernel. +6) sPAPR specification allows guests to have an ddditional DMA window(s) on +a PCI bus with a variable page size. Two ioctls have been added to support +this: VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE. +The platform has to support the functionality or error will be returned to +the userspace. The existing hardware supports up to 2 DMA windows, one is +2GB long, uses 4K pages and called default 32bit window; the other can +be as big as entire RAM, use different page size, it is optional - guests +create those in run-time if the guest driver supports 64bit DMA. + +VFIO_IOMMU_SPAPR_TCE_CREATE receives a page shift, a DMA window size and +a number of TCE table levels (if a TCE table is going to be big enough and +the kernel may not be able to allocate enough of physicall contiguous memory). +It creates a new window in the available slot and returns the bus address where +the new window starts. Due to hardware limitation, the user space cannot choose +the location of DMA windows. + +VFIO_IOMMU_SPAPR_TCE_REMOVE receives the bus start address of the window +and removes it. + --- [1] VFIO was originally an acronym for Virtual Function I/O in its diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 04f72ac..de82b61 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -138,7 +138,7 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name); extern struct iommu_table *iommu_init_table(struct iommu_table * tbl, int nid); -#define IOMMU_TABLE_GROUP_MAX_TABLES 1 +#define IOMMU_TABLE_GROUP_MAX_TABLES 2 struct iommu_table_group; diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 3a0b5fe..7aa4141b 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -96,6 +96,7 @@ struct tce_container { struct list_head mem_list; struct iommu_table tables[IOMMU_TABLE_GROUP_MAX_TABLES]; struct list_head group_list; + bool v2; }; struct tce_iommu_group { @@ -333,6 +334,20 @@ static struct iommu_table *spapr_tce_find_table( return ret; } +static int spapr_tce_find_free_table(struct tce_container *container) +{ + int i; + + for (i = 0; i IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbl = container-tables[i]; + + if (!tbl-it_size) + return i; + } + + return -1; +} + static int tce_iommu_enable(struct tce_container *container) { int ret = 0; @@ -432,6 +447,8 @@ static void *tce_iommu_open(unsigned long arg) INIT_LIST_HEAD_RCU(container-mem_list); INIT_LIST_HEAD_RCU(container-group_list); + container-v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU; + return container; } @@ -605,11 +622,90 @@ static long tce_iommu_build(struct tce_container *container, return ret; } +static long tce_iommu_create_window(struct tce_container *container, + __u32 page_shift, __u64 window_size, __u32 levels, + __u64 *start_addr) +{ + struct iommu_table_group *table_group; + struct tce_iommu_group *tcegrp; + int num; + long ret; + + num = spapr_tce_find_free_table(container); + if (num 0) + return -ENOSYS; + +