Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-12 Thread Alexey Kardashevskiy
On 12/07/12 13:11, Alex Williamson wrote:
 On Wed, 2012-07-11 at 12:54 +1000, Alexey Kardashevskiy wrote:
 On 11/07/12 02:55, Alex Williamson wrote:
 On Tue, 2012-07-10 at 15:51 +1000, Alexey Kardashevskiy wrote:
 The patch enables VFIO on POWER.

 It literally does the following:

 1. POWERPC IOMMU support (the kernel counterpart is required)

 2. Added #ifdef TARGET_PPC64 for EOI handlers initialisation.

 3. Added vfio_get_container_fd() to VFIO in order to initialize 1).

 4. Makefile fixed and is_vfio flag added into sPAPR PHB - required to
 distinguish VFIO's DMA context from the emulated one.

 WIth the pathes posted today a bit earlier, this patch fully supports
 VFIO what includes MSIX as well,


 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 ---
  hw/ppc/Makefile.objs |3 ++
  hw/spapr.h   |4 +++
  hw/spapr_iommu.c |   87 
 ++
  hw/spapr_pci.c   |   23 -
  hw/spapr_pci.h   |2 ++
  hw/vfio_pci.c|   76 +--
  hw/vfio_pci.h|2 ++
  7 files changed, 193 insertions(+), 4 deletions(-)

 diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
 index f573a95..c46a049 100644
 --- a/hw/ppc/Makefile.objs
 +++ b/hw/ppc/Makefile.objs
 @@ -25,4 +25,7 @@ obj-$(CONFIG_FDT) += ../device_tree.o
  # Xilinx PPC peripherals
  obj-y += xilinx_ethlite.o
  
 +# VFIO PCI device assignment
 +obj-$(CONFIG_VFIO_PCI) += vfio_pci.o
 +
  obj-y := $(addprefix ../,$(obj-y))
 diff --git a/hw/spapr.h b/hw/spapr.h
 index b37f337..9dca704 100644
 --- a/hw/spapr.h
 +++ b/hw/spapr.h
 @@ -340,4 +340,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char 
 *propname,
  int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
DMAContext *dma);
  
 +void spapr_vfio_init_dma(int fd, uint32_t liobn,
 + uint64_t *dma32_window_start,
 + uint64_t *dma32_window_size);
 +
  #endif /* !defined (__HW_SPAPR_H__) */
 diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
 index 50c288d..0a194e8 100644
 --- a/hw/spapr_iommu.c
 +++ b/hw/spapr_iommu.c
 @@ -16,6 +16,8 @@
   * You should have received a copy of the GNU Lesser General Public
   * License along with this library; if not, see 
 http://www.gnu.org/licenses/.
   */
 +#include sys/ioctl.h
 +
  #include hw.h
  #include kvm.h
  #include qdev.h
 @@ -23,6 +25,7 @@
  #include dma.h
  
  #include hw/spapr.h
 +#include hw/linux-vfio.h

 I really need to move this into linux-headers.

  
  #include libfdt.h
  
 @@ -183,6 +186,86 @@ static int put_tce_emu(target_ulong liobn, 
 target_ulong ioba, target_ulong tce)
  return 0;
  }
  
 +/*  API for POWERPC IOMMU  */
 +
 +#define POWERPC_IOMMU   2
 +
 +struct tce_iommu_info {
 +__u32 argsz;
 +__u32 dma32_window_start;
 +__u32 dma32_window_size;
 +};
 +
 +#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
 +
 +struct tce_iommu_dma_map {
 +__u32 argsz;
 +__u64 va;
 +__u64 dmaaddr;
 +};
 +
 +#define POWERPC_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
 +#define POWERPC_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)

 I assume this would eventually go into the kernel vfio.h with a VFIO_
 prefix.  Add a flags field to the structures or it'll be hard to extend
 them later.


 We can always define another type of IOMMU :) But yes, I'll extend both map 
 and info structures.



 +typedef struct sPAPRVFIOTable {
 +int fd;
 +uint32_t liobn;
 +QLIST_ENTRY(sPAPRVFIOTable) list;
 +} sPAPRVFIOTable;
 +
 +QLIST_HEAD(vfio_tce_tables, sPAPRVFIOTable) vfio_tce_tables;
 +
 +void spapr_vfio_init_dma(int fd, uint32_t liobn,
 + uint64_t *dma32_window_start,
 + uint64_t *dma32_window_size)
 +{
 +sPAPRVFIOTable *t;
 +struct tce_iommu_info info = { .argsz = sizeof(info) };
 +
 +if (ioctl(fd, POWERPC_IOMMU_GET_INFO, info)) {
 +fprintf(stderr, POWERPC_IOMMU_GET_INFO failed %d\n, errno);
 +return;
 +}
 +*dma32_window_start = info.dma32_window_start;
 +*dma32_window_size = info.dma32_window_size;
 +
 +t = g_malloc0(sizeof(*t));
 +t-fd = fd;
 +t-liobn = liobn;
 +
 +QLIST_INSERT_HEAD(vfio_tce_tables, t, list);
 +}
 +
 +static int put_tce_vfio(uint32_t liobn, target_ulong ioba, target_ulong 
 tce)
 +{
 +sPAPRVFIOTable *t;
 +struct tce_iommu_dma_map map = {
 +.argsz = sizeof(map),
 +.va = 0,
 +.dmaaddr = ioba,
 +};
 +
 +QLIST_FOREACH(t, vfio_tce_tables, list) {
 +if (t-liobn != liobn) {
 +continue;
 +}
 +if (tce) {
 +map.va = (uintptr_t)qemu_get_ram_ptr(tce  
 ~SPAPR_TCE_PAGE_MASK);
 +if (ioctl(t-fd, POWERPC_IOMMU_MAP_DMA, map)) {
 +fprintf(stderr, TCE_MAP_DMA: %d\n, errno);
 +return H_PARAMETER;
 +}
 +} else {
 +if 

Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-11 Thread Alex Williamson
On Wed, 2012-07-11 at 12:54 +1000, Alexey Kardashevskiy wrote:
 On 11/07/12 02:55, Alex Williamson wrote:
  On Tue, 2012-07-10 at 15:51 +1000, Alexey Kardashevskiy wrote:
  The patch enables VFIO on POWER.
 
  It literally does the following:
 
  1. POWERPC IOMMU support (the kernel counterpart is required)
 
  2. Added #ifdef TARGET_PPC64 for EOI handlers initialisation.
 
  3. Added vfio_get_container_fd() to VFIO in order to initialize 1).
 
  4. Makefile fixed and is_vfio flag added into sPAPR PHB - required to
  distinguish VFIO's DMA context from the emulated one.
 
  WIth the pathes posted today a bit earlier, this patch fully supports
  VFIO what includes MSIX as well,
 
 
  Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
  ---
   hw/ppc/Makefile.objs |3 ++
   hw/spapr.h   |4 +++
   hw/spapr_iommu.c |   87 
  ++
   hw/spapr_pci.c   |   23 -
   hw/spapr_pci.h   |2 ++
   hw/vfio_pci.c|   76 +--
   hw/vfio_pci.h|2 ++
   7 files changed, 193 insertions(+), 4 deletions(-)
 
  diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
  index f573a95..c46a049 100644
  --- a/hw/ppc/Makefile.objs
  +++ b/hw/ppc/Makefile.objs
  @@ -25,4 +25,7 @@ obj-$(CONFIG_FDT) += ../device_tree.o
   # Xilinx PPC peripherals
   obj-y += xilinx_ethlite.o
   
  +# VFIO PCI device assignment
  +obj-$(CONFIG_VFIO_PCI) += vfio_pci.o
  +
   obj-y := $(addprefix ../,$(obj-y))
  diff --git a/hw/spapr.h b/hw/spapr.h
  index b37f337..9dca704 100644
  --- a/hw/spapr.h
  +++ b/hw/spapr.h
  @@ -340,4 +340,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char 
  *propname,
   int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
 DMAContext *dma);
   
  +void spapr_vfio_init_dma(int fd, uint32_t liobn,
  + uint64_t *dma32_window_start,
  + uint64_t *dma32_window_size);
  +
   #endif /* !defined (__HW_SPAPR_H__) */
  diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
  index 50c288d..0a194e8 100644
  --- a/hw/spapr_iommu.c
  +++ b/hw/spapr_iommu.c
  @@ -16,6 +16,8 @@
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see 
  http://www.gnu.org/licenses/.
*/
  +#include sys/ioctl.h
  +
   #include hw.h
   #include kvm.h
   #include qdev.h
  @@ -23,6 +25,7 @@
   #include dma.h
   
   #include hw/spapr.h
  +#include hw/linux-vfio.h
  
  I really need to move this into linux-headers.
  
   
   #include libfdt.h
   
  @@ -183,6 +186,86 @@ static int put_tce_emu(target_ulong liobn, 
  target_ulong ioba, target_ulong tce)
   return 0;
   }
   
  +/*  API for POWERPC IOMMU  */
  +
  +#define POWERPC_IOMMU   2
  +
  +struct tce_iommu_info {
  +__u32 argsz;
  +__u32 dma32_window_start;
  +__u32 dma32_window_size;
  +};
  +
  +#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
  +
  +struct tce_iommu_dma_map {
  +__u32 argsz;
  +__u64 va;
  +__u64 dmaaddr;
  +};
  +
  +#define POWERPC_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
  +#define POWERPC_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
  
  I assume this would eventually go into the kernel vfio.h with a VFIO_
  prefix.  Add a flags field to the structures or it'll be hard to extend
  them later.
 
 
 We can always define another type of IOMMU :) But yes, I'll extend both map 
 and info structures.
 
 
 
  +typedef struct sPAPRVFIOTable {
  +int fd;
  +uint32_t liobn;
  +QLIST_ENTRY(sPAPRVFIOTable) list;
  +} sPAPRVFIOTable;
  +
  +QLIST_HEAD(vfio_tce_tables, sPAPRVFIOTable) vfio_tce_tables;
  +
  +void spapr_vfio_init_dma(int fd, uint32_t liobn,
  + uint64_t *dma32_window_start,
  + uint64_t *dma32_window_size)
  +{
  +sPAPRVFIOTable *t;
  +struct tce_iommu_info info = { .argsz = sizeof(info) };
  +
  +if (ioctl(fd, POWERPC_IOMMU_GET_INFO, info)) {
  +fprintf(stderr, POWERPC_IOMMU_GET_INFO failed %d\n, errno);
  +return;
  +}
  +*dma32_window_start = info.dma32_window_start;
  +*dma32_window_size = info.dma32_window_size;
  +
  +t = g_malloc0(sizeof(*t));
  +t-fd = fd;
  +t-liobn = liobn;
  +
  +QLIST_INSERT_HEAD(vfio_tce_tables, t, list);
  +}
  +
  +static int put_tce_vfio(uint32_t liobn, target_ulong ioba, target_ulong 
  tce)
  +{
  +sPAPRVFIOTable *t;
  +struct tce_iommu_dma_map map = {
  +.argsz = sizeof(map),
  +.va = 0,
  +.dmaaddr = ioba,
  +};
  +
  +QLIST_FOREACH(t, vfio_tce_tables, list) {
  +if (t-liobn != liobn) {
  +continue;
  +}
  +if (tce) {
  +map.va = (uintptr_t)qemu_get_ram_ptr(tce  
  ~SPAPR_TCE_PAGE_MASK);
  +if (ioctl(t-fd, POWERPC_IOMMU_MAP_DMA, map)) {
  +

Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Alex Williamson
On Tue, 2012-07-10 at 15:51 +1000, Alexey Kardashevskiy wrote:
 The patch enables VFIO on POWER.
 
 It literally does the following:
 
 1. POWERPC IOMMU support (the kernel counterpart is required)
 
 2. Added #ifdef TARGET_PPC64 for EOI handlers initialisation.
 
 3. Added vfio_get_container_fd() to VFIO in order to initialize 1).
 
 4. Makefile fixed and is_vfio flag added into sPAPR PHB - required to
 distinguish VFIO's DMA context from the emulated one.
 
 WIth the pathes posted today a bit earlier, this patch fully supports
 VFIO what includes MSIX as well,
 
 
 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 ---
  hw/ppc/Makefile.objs |3 ++
  hw/spapr.h   |4 +++
  hw/spapr_iommu.c |   87 
 ++
  hw/spapr_pci.c   |   23 -
  hw/spapr_pci.h   |2 ++
  hw/vfio_pci.c|   76 +--
  hw/vfio_pci.h|2 ++
  7 files changed, 193 insertions(+), 4 deletions(-)
 
 diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
 index f573a95..c46a049 100644
 --- a/hw/ppc/Makefile.objs
 +++ b/hw/ppc/Makefile.objs
 @@ -25,4 +25,7 @@ obj-$(CONFIG_FDT) += ../device_tree.o
  # Xilinx PPC peripherals
  obj-y += xilinx_ethlite.o
  
 +# VFIO PCI device assignment
 +obj-$(CONFIG_VFIO_PCI) += vfio_pci.o
 +
  obj-y := $(addprefix ../,$(obj-y))
 diff --git a/hw/spapr.h b/hw/spapr.h
 index b37f337..9dca704 100644
 --- a/hw/spapr.h
 +++ b/hw/spapr.h
 @@ -340,4 +340,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char 
 *propname,
  int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
DMAContext *dma);
  
 +void spapr_vfio_init_dma(int fd, uint32_t liobn,
 + uint64_t *dma32_window_start,
 + uint64_t *dma32_window_size);
 +
  #endif /* !defined (__HW_SPAPR_H__) */
 diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
 index 50c288d..0a194e8 100644
 --- a/hw/spapr_iommu.c
 +++ b/hw/spapr_iommu.c
 @@ -16,6 +16,8 @@
   * You should have received a copy of the GNU Lesser General Public
   * License along with this library; if not, see 
 http://www.gnu.org/licenses/.
   */
 +#include sys/ioctl.h
 +
  #include hw.h
  #include kvm.h
  #include qdev.h
 @@ -23,6 +25,7 @@
  #include dma.h
  
  #include hw/spapr.h
 +#include hw/linux-vfio.h

I really need to move this into linux-headers.

  
  #include libfdt.h
  
 @@ -183,6 +186,86 @@ static int put_tce_emu(target_ulong liobn, target_ulong 
 ioba, target_ulong tce)
  return 0;
  }
  
 +/*  API for POWERPC IOMMU  */
 +
 +#define POWERPC_IOMMU   2
 +
 +struct tce_iommu_info {
 +__u32 argsz;
 +__u32 dma32_window_start;
 +__u32 dma32_window_size;
 +};
 +
 +#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
 +
 +struct tce_iommu_dma_map {
 +__u32 argsz;
 +__u64 va;
 +__u64 dmaaddr;
 +};
 +
 +#define POWERPC_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
 +#define POWERPC_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)

I assume this would eventually go into the kernel vfio.h with a VFIO_
prefix.  Add a flags field to the structures or it'll be hard to extend
them later.

 +typedef struct sPAPRVFIOTable {
 +int fd;
 +uint32_t liobn;
 +QLIST_ENTRY(sPAPRVFIOTable) list;
 +} sPAPRVFIOTable;
 +
 +QLIST_HEAD(vfio_tce_tables, sPAPRVFIOTable) vfio_tce_tables;
 +
 +void spapr_vfio_init_dma(int fd, uint32_t liobn,
 + uint64_t *dma32_window_start,
 + uint64_t *dma32_window_size)
 +{
 +sPAPRVFIOTable *t;
 +struct tce_iommu_info info = { .argsz = sizeof(info) };
 +
 +if (ioctl(fd, POWERPC_IOMMU_GET_INFO, info)) {
 +fprintf(stderr, POWERPC_IOMMU_GET_INFO failed %d\n, errno);
 +return;
 +}
 +*dma32_window_start = info.dma32_window_start;
 +*dma32_window_size = info.dma32_window_size;
 +
 +t = g_malloc0(sizeof(*t));
 +t-fd = fd;
 +t-liobn = liobn;
 +
 +QLIST_INSERT_HEAD(vfio_tce_tables, t, list);
 +}
 +
 +static int put_tce_vfio(uint32_t liobn, target_ulong ioba, target_ulong tce)
 +{
 +sPAPRVFIOTable *t;
 +struct tce_iommu_dma_map map = {
 +.argsz = sizeof(map),
 +.va = 0,
 +.dmaaddr = ioba,
 +};
 +
 +QLIST_FOREACH(t, vfio_tce_tables, list) {
 +if (t-liobn != liobn) {
 +continue;
 +}
 +if (tce) {
 +map.va = (uintptr_t)qemu_get_ram_ptr(tce  ~SPAPR_TCE_PAGE_MASK);
 +if (ioctl(t-fd, POWERPC_IOMMU_MAP_DMA, map)) {
 +fprintf(stderr, TCE_MAP_DMA: %d\n, errno);
 +return H_PARAMETER;
 +}
 +} else {
 +if (ioctl(t-fd, POWERPC_IOMMU_UNMAP_DMA, map)) {
 +fprintf(stderr, TCE_UNMAP_DMA: %d\n, errno);
 +return H_PARAMETER;
 +}
 +}
 +return H_SUCCESS;
 +}
 +return H_CONTINUE; /* positive 

Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Benjamin Herrenschmidt
On Tue, 2012-07-10 at 10:55 -0600, Alex Williamson wrote:
 
 I wish you could do this through a MemoryListener like we do on x86.
 

Can you elaborate ? TCE (iommu) manipulation on PAPR is done via
specific hypervisor calls, not sure what a MemoryListener would do
here ...

Cheers,
Ben.





Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Alex Williamson
On Wed, 2012-07-11 at 07:32 +1000, Benjamin Herrenschmidt wrote:
 On Tue, 2012-07-10 at 10:55 -0600, Alex Williamson wrote:
  
  I wish you could do this through a MemoryListener like we do on x86.
  
 
 Can you elaborate ? TCE (iommu) manipulation on PAPR is done via
 specific hypervisor calls, not sure what a MemoryListener would do
 here ...

Hmm, the guest directed iommu updates via hypercalls may not really fit
the MemoryListener model.  I'm just trying to think of ways to avoid
having an offshoot of vfio in the power code base by making use of
common abstraction layers.  If we got a region_add/del callback we could
potentially move the spapr map and unmap into vfio like we do for x86.
Thanks,

Alex




Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Benjamin Herrenschmidt
On Tue, 2012-07-10 at 15:48 -0600, Alex Williamson wrote:
  specific hypervisor calls, not sure what a MemoryListener would do
  here ...
 
 Hmm, the guest directed iommu updates via hypercalls may not really fit
 the MemoryListener model.  I'm just trying to think of ways to avoid
 having an offshoot of vfio in the power code base by making use of
 common abstraction layers.  If we got a region_add/del callback we could
 potentially move the spapr map and unmap into vfio like we do for x86.
 Thanks, 

In the end we don't really want to use that anyway. map and unmap are
*extremely* performance sensitive in practice, so plan is to implement
the hypercall directly in the kernel KVM at a level where it won't even
go near generic code :-)

Basically, when the hypercall gets in, we take control in what we call
real mode on powerpc (MMU off, translation disabled), we have a window
to implement critical stuff like this before we context switch the MMU
to the host context (which on P7 is quite expensive).

This is where I want to go directly whack the TCE table as used by the
HW (provided the page has a good PTE of course), pretty much like we do
for populating the main MMU hash table.

So the map/unmap path will be entirely in arch specific code. The one
you see in Alexey code is basically only ever going to be used by
something like qemu full emulation...

Cheers,
Ben.





Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Scott Wood
On 07/10/2012 12:51 AM, Alexey Kardashevskiy wrote:
 The patch enables VFIO on POWER.
 
 It literally does the following:
 
 1. POWERPC IOMMU support (the kernel counterpart is required)
[snip]
 +/*  API for POWERPC IOMMU  */
 +
 +#define POWERPC_IOMMU   2
 +
 +struct tce_iommu_info {
 +__u32 argsz;
 +__u32 dma32_window_start;
 +__u32 dma32_window_size;
 +};
 +
 +#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)

Is there a more specific name that could be used for this?  Not all
PowerPC chips have the same kind of IOMMU.

-Scott




Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Alexey Kardashevskiy
On 11/07/12 08:26, Scott Wood wrote:
 On 07/10/2012 12:51 AM, Alexey Kardashevskiy wrote:
 The patch enables VFIO on POWER.

 It literally does the following:

 1. POWERPC IOMMU support (the kernel counterpart is required)
 [snip]
 +/*  API for POWERPC IOMMU  */
 +
 +#define POWERPC_IOMMU   2
 +
 +struct tce_iommu_info {
 +__u32 argsz;
 +__u32 dma32_window_start;
 +__u32 dma32_window_size;
 +};
 +
 +#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
 
 Is there a more specific name that could be used for this?  Not all
 PowerPC chips have the same kind of IOMMU.


Ben, is is SPAPR? BOOK3S?


-- 
Alexey





Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Benjamin Herrenschmidt
On Wed, 2012-07-11 at 09:55 +1000, Alexey Kardashevskiy wrote:
 On 11/07/12 08:26, Scott Wood wrote:
  On 07/10/2012 12:51 AM, Alexey Kardashevskiy wrote:
  The patch enables VFIO on POWER.
 
  It literally does the following:
 
  1. POWERPC IOMMU support (the kernel counterpart is required)
  [snip]
  +/*  API for POWERPC IOMMU  */
  +
  +#define POWERPC_IOMMU   2
  +
  +struct tce_iommu_info {
  +__u32 argsz;
  +__u32 dma32_window_start;
  +__u32 dma32_window_size;
  +};
  +
  +#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
  
  Is there a more specific name that could be used for this?  Not all
  PowerPC chips have the same kind of IOMMU.

 
 Ben, is is SPAPR? BOOK3S?

So we have varieties of different iommus in the kernel indeed, we
probably want the info ioctl to reflect that. I would call this one
spapr_tce.

Also we will want a few other things, dunno if that's reflected here,
or whether the ioctl is easily extendable, but in the long run we will
need:

 - Ways to tell KVM about association between a liobn (logical bus
number as used in H_PUT_TCE) and an iommu so we can implement the real
mode H_PUT_TCE properly.

 - We will need some conduit to implement the DDW APIs (part of PAPR
allowing the guest to control the DMA windows, ie, by creating new
windows in 64-bit DMA space, with different page sizes etc). So you
may want to make it clear that the above provides information about the
base window specifically.

Cheers,
Ben.





Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Alexey Kardashevskiy
On 11/07/12 10:04, Benjamin Herrenschmidt wrote:
 On Wed, 2012-07-11 at 09:55 +1000, Alexey Kardashevskiy wrote:
 On 11/07/12 08:26, Scott Wood wrote:
 On 07/10/2012 12:51 AM, Alexey Kardashevskiy wrote:
 The patch enables VFIO on POWER.

 It literally does the following:

 1. POWERPC IOMMU support (the kernel counterpart is required)
 [snip]
 +/*  API for POWERPC IOMMU  */
 +
 +#define POWERPC_IOMMU   2
 +
 +struct tce_iommu_info {
 +__u32 argsz;
 +__u32 dma32_window_start;
 +__u32 dma32_window_size;
 +};
 +
 +#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)

 Is there a more specific name that could be used for this?  Not all
 PowerPC chips have the same kind of IOMMU.
 

 Ben, is is SPAPR? BOOK3S?
 
 So we have varieties of different iommus in the kernel indeed, we
 probably want the info ioctl to reflect that. I would call this one
 spapr_tce.

VFIO provides such ioctl actually and it is it who returns POWERPC_IOMMU as an 
IOMMU type.

ok. SPAPR_TCE.


 Also we will want a few other things, dunno if that's reflected here,
 or whether the ioctl is easily extendable, but in the long run we will
 need:
 
  - Ways to tell KVM about association between a liobn (logical bus
 number as used in H_PUT_TCE) and an iommu so we can implement the real
 mode H_PUT_TCE properly.
 
  - We will need some conduit to implement the DDW APIs (part of PAPR
 allowing the guest to control the DMA windows, ie, by creating new
 windows in 64-bit DMA space, with different page sizes etc). So you
 may want to make it clear that the above provides information about the
 base window specifically.

So the current one would be SPAPR_TCE_32?


-- 
Alexey





Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Benjamin Herrenschmidt
On Wed, 2012-07-11 at 10:17 +1000, Alexey Kardashevskiy wrote:
 So the current one would be SPAPR_TCE_32?

No, the iommu type is SPAPR_TCE, but the *window* info you get here is
the 32-bit window. My thinking is add some versionning and a bunch of
reserved fields to that info struct so we can stick a few more things,
or at the very least add a flags field, so we can return if/when we
support DDW etc... on that iommu.

Cheers,
Ben.
 




Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Alexey Kardashevskiy
On 11/07/12 02:55, Alex Williamson wrote:
 On Tue, 2012-07-10 at 15:51 +1000, Alexey Kardashevskiy wrote:
 The patch enables VFIO on POWER.

 It literally does the following:

 1. POWERPC IOMMU support (the kernel counterpart is required)

 2. Added #ifdef TARGET_PPC64 for EOI handlers initialisation.

 3. Added vfio_get_container_fd() to VFIO in order to initialize 1).

 4. Makefile fixed and is_vfio flag added into sPAPR PHB - required to
 distinguish VFIO's DMA context from the emulated one.

 WIth the pathes posted today a bit earlier, this patch fully supports
 VFIO what includes MSIX as well,


 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 ---
  hw/ppc/Makefile.objs |3 ++
  hw/spapr.h   |4 +++
  hw/spapr_iommu.c |   87 
 ++
  hw/spapr_pci.c   |   23 -
  hw/spapr_pci.h   |2 ++
  hw/vfio_pci.c|   76 +--
  hw/vfio_pci.h|2 ++
  7 files changed, 193 insertions(+), 4 deletions(-)

 diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
 index f573a95..c46a049 100644
 --- a/hw/ppc/Makefile.objs
 +++ b/hw/ppc/Makefile.objs
 @@ -25,4 +25,7 @@ obj-$(CONFIG_FDT) += ../device_tree.o
  # Xilinx PPC peripherals
  obj-y += xilinx_ethlite.o
  
 +# VFIO PCI device assignment
 +obj-$(CONFIG_VFIO_PCI) += vfio_pci.o
 +
  obj-y := $(addprefix ../,$(obj-y))
 diff --git a/hw/spapr.h b/hw/spapr.h
 index b37f337..9dca704 100644
 --- a/hw/spapr.h
 +++ b/hw/spapr.h
 @@ -340,4 +340,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char 
 *propname,
  int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
DMAContext *dma);
  
 +void spapr_vfio_init_dma(int fd, uint32_t liobn,
 + uint64_t *dma32_window_start,
 + uint64_t *dma32_window_size);
 +
  #endif /* !defined (__HW_SPAPR_H__) */
 diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
 index 50c288d..0a194e8 100644
 --- a/hw/spapr_iommu.c
 +++ b/hw/spapr_iommu.c
 @@ -16,6 +16,8 @@
   * You should have received a copy of the GNU Lesser General Public
   * License along with this library; if not, see 
 http://www.gnu.org/licenses/.
   */
 +#include sys/ioctl.h
 +
  #include hw.h
  #include kvm.h
  #include qdev.h
 @@ -23,6 +25,7 @@
  #include dma.h
  
  #include hw/spapr.h
 +#include hw/linux-vfio.h
 
 I really need to move this into linux-headers.
 
  
  #include libfdt.h
  
 @@ -183,6 +186,86 @@ static int put_tce_emu(target_ulong liobn, target_ulong 
 ioba, target_ulong tce)
  return 0;
  }
  
 +/*  API for POWERPC IOMMU  */
 +
 +#define POWERPC_IOMMU   2
 +
 +struct tce_iommu_info {
 +__u32 argsz;
 +__u32 dma32_window_start;
 +__u32 dma32_window_size;
 +};
 +
 +#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
 +
 +struct tce_iommu_dma_map {
 +__u32 argsz;
 +__u64 va;
 +__u64 dmaaddr;
 +};
 +
 +#define POWERPC_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
 +#define POWERPC_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
 
 I assume this would eventually go into the kernel vfio.h with a VFIO_
 prefix.  Add a flags field to the structures or it'll be hard to extend
 them later.


We can always define another type of IOMMU :) But yes, I'll extend both map and 
info structures.



 +typedef struct sPAPRVFIOTable {
 +int fd;
 +uint32_t liobn;
 +QLIST_ENTRY(sPAPRVFIOTable) list;
 +} sPAPRVFIOTable;
 +
 +QLIST_HEAD(vfio_tce_tables, sPAPRVFIOTable) vfio_tce_tables;
 +
 +void spapr_vfio_init_dma(int fd, uint32_t liobn,
 + uint64_t *dma32_window_start,
 + uint64_t *dma32_window_size)
 +{
 +sPAPRVFIOTable *t;
 +struct tce_iommu_info info = { .argsz = sizeof(info) };
 +
 +if (ioctl(fd, POWERPC_IOMMU_GET_INFO, info)) {
 +fprintf(stderr, POWERPC_IOMMU_GET_INFO failed %d\n, errno);
 +return;
 +}
 +*dma32_window_start = info.dma32_window_start;
 +*dma32_window_size = info.dma32_window_size;
 +
 +t = g_malloc0(sizeof(*t));
 +t-fd = fd;
 +t-liobn = liobn;
 +
 +QLIST_INSERT_HEAD(vfio_tce_tables, t, list);
 +}
 +
 +static int put_tce_vfio(uint32_t liobn, target_ulong ioba, target_ulong tce)
 +{
 +sPAPRVFIOTable *t;
 +struct tce_iommu_dma_map map = {
 +.argsz = sizeof(map),
 +.va = 0,
 +.dmaaddr = ioba,
 +};
 +
 +QLIST_FOREACH(t, vfio_tce_tables, list) {
 +if (t-liobn != liobn) {
 +continue;
 +}
 +if (tce) {
 +map.va = (uintptr_t)qemu_get_ram_ptr(tce  
 ~SPAPR_TCE_PAGE_MASK);
 +if (ioctl(t-fd, POWERPC_IOMMU_MAP_DMA, map)) {
 +fprintf(stderr, TCE_MAP_DMA: %d\n, errno);
 +return H_PARAMETER;
 +}
 +} else {
 +if (ioctl(t-fd, POWERPC_IOMMU_UNMAP_DMA, map)) {
 +fprintf(stderr, TCE_UNMAP_DMA: %d\n, errno);
 

Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Benjamin Herrenschmidt
On Wed, 2012-07-11 at 12:54 +1000, Alexey Kardashevskiy wrote:
  Why do you need this, aren't the extension checks sufficient for this to
  be a nop for you?
 
 
 It uses ioapic_remove_gsi_eoi_notifier() so it needs some #ifdef anyway. And 
 as we do not support
 kvm_irqchip_in_kernel(), there is no point in fixing it and I disabled it all.
 When we make eoi notifiers a platform independent, then yes, it will be nop.

In fact we have an internal experimental patch to move our
PIC emulation into the kernel but so far I have not managed
to make it fit in the existing irqchip stuff.

That irqchip interface is nasty. It's completely x86 centric,
and have tendrils all over the place, into the msi code, into
devices (virtio-pci.c) etc... in ways that are essentially unusable for
anything that looks a bit different.

IE. In urgent need of refactoring.

  -
  +#ifndef TARGET_PPC64
   vdev-msi_vectors[vector].virq = kvm_irqchip_add_msi_route(kvm_state, 
  msg);
   if (vdev-msi_vectors[vector].virq  0 || 
   kvm_irqchip_add_irqfd(kvm_state, fd,
  @@ -551,7 +575,11 @@ static int vfio_msix_vector_use(PCIDevice *pdev,
   qemu_set_fd_handler(fd, vfio_msi_interrupt, NULL,
   vdev-msi_vectors[vector]);
   }
  -
  +#else
  +vdev-msi_vectors[vector].virq = -1;
  +qemu_set_fd_handler(fd, vfio_msi_interrupt, NULL,
  +vdev-msi_vectors[vector]);
  +#endif
  
  This shouldn't be necessary once the abort is removed from
  kvm_irqchip_add_msi_route.  It'll be merged next time the kvm uq tree
  merges into qemu.

It must also return an irq number, what to chose in that case ? Ie. the
whole irqchip API is a trainwreck if you ask me :-) Very poorly thought
out.

 True, I just did not pick up your very last changes. Updating is always 
 painful, and now it is even
 worse then usual as pci_get_irq has been renamed to something else :) Will do 
 though.

 .../...

 Well I probably can add MemoryListener for the DMA window and move all 
 power-specific map/unmap code
 to VFIO but it does not look much better. I would rather prefer separating 
 IOMMU code from vfio_pci
 somehow (more or less as it is now for powerpc). While doing it, we could 
 think of the API to get
 this fd which we need anyway in order to setup the DMA window which is per 
 group (which QEMU does
 not understand) but not per device.

Right. What we really want is our own private iommu interface based on
the iommu type. Each iommu will have it's own quirks in that regard
anyways.

Cheers,
Ben.

 
 
  Alex
  
  diff --git a/hw/vfio_pci.h b/hw/vfio_pci.h
  index 226607c..0d13341 100644
  --- a/hw/vfio_pci.h
  +++ b/hw/vfio_pci.h
  @@ -105,4 +105,6 @@ typedef struct VFIOGroup {
   #define VFIO_FLAG_IOMMU_SHARED_BIT 0
   #define VFIO_FLAG_IOMMU_SHARED (1U  VFIO_FLAG_UIOMMU_SHARED_BIT)
   
  +int vfio_get_container_fd(struct PCIBus *pbus);
  +
   #endif /* __VFIO_H__ */
  
  
  
 
 
 -- 
 Alexey
 
 
 




[Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-09 Thread Alexey Kardashevskiy
The patch enables VFIO on POWER.

It literally does the following:

1. POWERPC IOMMU support (the kernel counterpart is required)

2. Added #ifdef TARGET_PPC64 for EOI handlers initialisation.

3. Added vfio_get_container_fd() to VFIO in order to initialize 1).

4. Makefile fixed and is_vfio flag added into sPAPR PHB - required to
distinguish VFIO's DMA context from the emulated one.

WIth the pathes posted today a bit earlier, this patch fully supports
VFIO what includes MSIX as well,


Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
---
 hw/ppc/Makefile.objs |3 ++
 hw/spapr.h   |4 +++
 hw/spapr_iommu.c |   87 ++
 hw/spapr_pci.c   |   23 -
 hw/spapr_pci.h   |2 ++
 hw/vfio_pci.c|   76 +--
 hw/vfio_pci.h|2 ++
 7 files changed, 193 insertions(+), 4 deletions(-)

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index f573a95..c46a049 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -25,4 +25,7 @@ obj-$(CONFIG_FDT) += ../device_tree.o
 # Xilinx PPC peripherals
 obj-y += xilinx_ethlite.o
 
+# VFIO PCI device assignment
+obj-$(CONFIG_VFIO_PCI) += vfio_pci.o
+
 obj-y := $(addprefix ../,$(obj-y))
diff --git a/hw/spapr.h b/hw/spapr.h
index b37f337..9dca704 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -340,4 +340,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char 
*propname,
 int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
   DMAContext *dma);
 
+void spapr_vfio_init_dma(int fd, uint32_t liobn,
+ uint64_t *dma32_window_start,
+ uint64_t *dma32_window_size);
+
 #endif /* !defined (__HW_SPAPR_H__) */
diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
index 50c288d..0a194e8 100644
--- a/hw/spapr_iommu.c
+++ b/hw/spapr_iommu.c
@@ -16,6 +16,8 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, see http://www.gnu.org/licenses/.
  */
+#include sys/ioctl.h
+
 #include hw.h
 #include kvm.h
 #include qdev.h
@@ -23,6 +25,7 @@
 #include dma.h
 
 #include hw/spapr.h
+#include hw/linux-vfio.h
 
 #include libfdt.h
 
@@ -183,6 +186,86 @@ static int put_tce_emu(target_ulong liobn, target_ulong 
ioba, target_ulong tce)
 return 0;
 }
 
+/*  API for POWERPC IOMMU  */
+
+#define POWERPC_IOMMU   2
+
+struct tce_iommu_info {
+__u32 argsz;
+__u32 dma32_window_start;
+__u32 dma32_window_size;
+};
+
+#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
+
+struct tce_iommu_dma_map {
+__u32 argsz;
+__u64 va;
+__u64 dmaaddr;
+};
+
+#define POWERPC_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
+#define POWERPC_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
+
+typedef struct sPAPRVFIOTable {
+int fd;
+uint32_t liobn;
+QLIST_ENTRY(sPAPRVFIOTable) list;
+} sPAPRVFIOTable;
+
+QLIST_HEAD(vfio_tce_tables, sPAPRVFIOTable) vfio_tce_tables;
+
+void spapr_vfio_init_dma(int fd, uint32_t liobn,
+ uint64_t *dma32_window_start,
+ uint64_t *dma32_window_size)
+{
+sPAPRVFIOTable *t;
+struct tce_iommu_info info = { .argsz = sizeof(info) };
+
+if (ioctl(fd, POWERPC_IOMMU_GET_INFO, info)) {
+fprintf(stderr, POWERPC_IOMMU_GET_INFO failed %d\n, errno);
+return;
+}
+*dma32_window_start = info.dma32_window_start;
+*dma32_window_size = info.dma32_window_size;
+
+t = g_malloc0(sizeof(*t));
+t-fd = fd;
+t-liobn = liobn;
+
+QLIST_INSERT_HEAD(vfio_tce_tables, t, list);
+}
+
+static int put_tce_vfio(uint32_t liobn, target_ulong ioba, target_ulong tce)
+{
+sPAPRVFIOTable *t;
+struct tce_iommu_dma_map map = {
+.argsz = sizeof(map),
+.va = 0,
+.dmaaddr = ioba,
+};
+
+QLIST_FOREACH(t, vfio_tce_tables, list) {
+if (t-liobn != liobn) {
+continue;
+}
+if (tce) {
+map.va = (uintptr_t)qemu_get_ram_ptr(tce  ~SPAPR_TCE_PAGE_MASK);
+if (ioctl(t-fd, POWERPC_IOMMU_MAP_DMA, map)) {
+fprintf(stderr, TCE_MAP_DMA: %d\n, errno);
+return H_PARAMETER;
+}
+} else {
+if (ioctl(t-fd, POWERPC_IOMMU_UNMAP_DMA, map)) {
+fprintf(stderr, TCE_UNMAP_DMA: %d\n, errno);
+return H_PARAMETER;
+}
+}
+return H_SUCCESS;
+}
+return H_CONTINUE; /* positive non-zero value */
+}
+
 static target_ulong h_put_tce(CPUPPCState *env, sPAPREnvironment *spapr,
   target_ulong opcode, target_ulong *args)
 {
@@ -203,6 +286,10 @@ static target_ulong h_put_tce(CPUPPCState *env, 
sPAPREnvironment *spapr,
 if (0 = ret) {
 return ret ? H_PARAMETER : H_SUCCESS;
 }
+ret = put_tce_vfio(liobn, ioba, tce);
+if (0 = ret) {
+return ret ?