Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-12 Thread Alexey Kardashevskiy
On 12/07/12 13:11, Alex Williamson wrote:
> On Wed, 2012-07-11 at 12:54 +1000, Alexey Kardashevskiy wrote:
>> On 11/07/12 02:55, Alex Williamson wrote:
>>> On Tue, 2012-07-10 at 15:51 +1000, Alexey Kardashevskiy wrote:
 The patch enables VFIO on POWER.

 It literally does the following:

 1. POWERPC IOMMU support (the kernel counterpart is required)

 2. Added #ifdef TARGET_PPC64 for EOI handlers initialisation.

 3. Added vfio_get_container_fd() to VFIO in order to initialize 1).

 4. Makefile fixed and "is_vfio" flag added into sPAPR PHB - required to
 distinguish VFIO's DMA context from the emulated one.

 WIth the pathes posted today a bit earlier, this patch fully supports
 VFIO what includes MSIX as well,


 Signed-off-by: Alexey Kardashevskiy 
 ---
  hw/ppc/Makefile.objs |3 ++
  hw/spapr.h   |4 +++
  hw/spapr_iommu.c |   87 
 ++
  hw/spapr_pci.c   |   23 -
  hw/spapr_pci.h   |2 ++
  hw/vfio_pci.c|   76 +--
  hw/vfio_pci.h|2 ++
  7 files changed, 193 insertions(+), 4 deletions(-)

 diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
 index f573a95..c46a049 100644
 --- a/hw/ppc/Makefile.objs
 +++ b/hw/ppc/Makefile.objs
 @@ -25,4 +25,7 @@ obj-$(CONFIG_FDT) += ../device_tree.o
  # Xilinx PPC peripherals
  obj-y += xilinx_ethlite.o
  
 +# VFIO PCI device assignment
 +obj-$(CONFIG_VFIO_PCI) += vfio_pci.o
 +
  obj-y := $(addprefix ../,$(obj-y))
 diff --git a/hw/spapr.h b/hw/spapr.h
 index b37f337..9dca704 100644
 --- a/hw/spapr.h
 +++ b/hw/spapr.h
 @@ -340,4 +340,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char 
 *propname,
  int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
DMAContext *dma);
  
 +void spapr_vfio_init_dma(int fd, uint32_t liobn,
 + uint64_t *dma32_window_start,
 + uint64_t *dma32_window_size);
 +
  #endif /* !defined (__HW_SPAPR_H__) */
 diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
 index 50c288d..0a194e8 100644
 --- a/hw/spapr_iommu.c
 +++ b/hw/spapr_iommu.c
 @@ -16,6 +16,8 @@
   * You should have received a copy of the GNU Lesser General Public
   * License along with this library; if not, see 
 .
   */
 +#include 
 +
  #include "hw.h"
  #include "kvm.h"
  #include "qdev.h"
 @@ -23,6 +25,7 @@
  #include "dma.h"
  
  #include "hw/spapr.h"
 +#include "hw/linux-vfio.h"
>>>
>>> I really need to move this into linux-headers.
>>>
  
  #include 
  
 @@ -183,6 +186,86 @@ static int put_tce_emu(target_ulong liobn, 
 target_ulong ioba, target_ulong tce)
  return 0;
  }
  
 +/*  API for POWERPC IOMMU  */
 +
 +#define POWERPC_IOMMU   2
 +
 +struct tce_iommu_info {
 +__u32 argsz;
 +__u32 dma32_window_start;
 +__u32 dma32_window_size;
 +};
 +
 +#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
 +
 +struct tce_iommu_dma_map {
 +__u32 argsz;
 +__u64 va;
 +__u64 dmaaddr;
 +};
 +
 +#define POWERPC_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
 +#define POWERPC_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
>>>
>>> I assume this would eventually go into the kernel vfio.h with a VFIO_
>>> prefix.  Add a flags field to the structures or it'll be hard to extend
>>> them later.
>>
>>
>> We can always define another type of IOMMU :) But yes, I'll extend both map 
>> and info structures.
>>
>>
>>
 +typedef struct sPAPRVFIOTable {
 +int fd;
 +uint32_t liobn;
 +QLIST_ENTRY(sPAPRVFIOTable) list;
 +} sPAPRVFIOTable;
 +
 +QLIST_HEAD(vfio_tce_tables, sPAPRVFIOTable) vfio_tce_tables;
 +
 +void spapr_vfio_init_dma(int fd, uint32_t liobn,
 + uint64_t *dma32_window_start,
 + uint64_t *dma32_window_size)
 +{
 +sPAPRVFIOTable *t;
 +struct tce_iommu_info info = { .argsz = sizeof(info) };
 +
 +if (ioctl(fd, POWERPC_IOMMU_GET_INFO, &info)) {
 +fprintf(stderr, "POWERPC_IOMMU_GET_INFO failed %d\n", errno);
 +return;
 +}
 +*dma32_window_start = info.dma32_window_start;
 +*dma32_window_size = info.dma32_window_size;
 +
 +t = g_malloc0(sizeof(*t));
 +t->fd = fd;
 +t->liobn = liobn;
 +
 +QLIST_INSERT_HEAD(&vfio_tce_tables, t, list);
 +}
 +
 +static int put_tce_vfio(uint32_t liobn, target_ulong ioba, target_ulong 
 tce)
 +{
 +sPAPRVFIOTabl

Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-11 Thread Alex Williamson
On Wed, 2012-07-11 at 12:54 +1000, Alexey Kardashevskiy wrote:
> On 11/07/12 02:55, Alex Williamson wrote:
> > On Tue, 2012-07-10 at 15:51 +1000, Alexey Kardashevskiy wrote:
> >> The patch enables VFIO on POWER.
> >>
> >> It literally does the following:
> >>
> >> 1. POWERPC IOMMU support (the kernel counterpart is required)
> >>
> >> 2. Added #ifdef TARGET_PPC64 for EOI handlers initialisation.
> >>
> >> 3. Added vfio_get_container_fd() to VFIO in order to initialize 1).
> >>
> >> 4. Makefile fixed and "is_vfio" flag added into sPAPR PHB - required to
> >> distinguish VFIO's DMA context from the emulated one.
> >>
> >> WIth the pathes posted today a bit earlier, this patch fully supports
> >> VFIO what includes MSIX as well,
> >>
> >>
> >> Signed-off-by: Alexey Kardashevskiy 
> >> ---
> >>  hw/ppc/Makefile.objs |3 ++
> >>  hw/spapr.h   |4 +++
> >>  hw/spapr_iommu.c |   87 
> >> ++
> >>  hw/spapr_pci.c   |   23 -
> >>  hw/spapr_pci.h   |2 ++
> >>  hw/vfio_pci.c|   76 +--
> >>  hw/vfio_pci.h|2 ++
> >>  7 files changed, 193 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> >> index f573a95..c46a049 100644
> >> --- a/hw/ppc/Makefile.objs
> >> +++ b/hw/ppc/Makefile.objs
> >> @@ -25,4 +25,7 @@ obj-$(CONFIG_FDT) += ../device_tree.o
> >>  # Xilinx PPC peripherals
> >>  obj-y += xilinx_ethlite.o
> >>  
> >> +# VFIO PCI device assignment
> >> +obj-$(CONFIG_VFIO_PCI) += vfio_pci.o
> >> +
> >>  obj-y := $(addprefix ../,$(obj-y))
> >> diff --git a/hw/spapr.h b/hw/spapr.h
> >> index b37f337..9dca704 100644
> >> --- a/hw/spapr.h
> >> +++ b/hw/spapr.h
> >> @@ -340,4 +340,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char 
> >> *propname,
> >>  int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
> >>DMAContext *dma);
> >>  
> >> +void spapr_vfio_init_dma(int fd, uint32_t liobn,
> >> + uint64_t *dma32_window_start,
> >> + uint64_t *dma32_window_size);
> >> +
> >>  #endif /* !defined (__HW_SPAPR_H__) */
> >> diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
> >> index 50c288d..0a194e8 100644
> >> --- a/hw/spapr_iommu.c
> >> +++ b/hw/spapr_iommu.c
> >> @@ -16,6 +16,8 @@
> >>   * You should have received a copy of the GNU Lesser General Public
> >>   * License along with this library; if not, see 
> >> .
> >>   */
> >> +#include 
> >> +
> >>  #include "hw.h"
> >>  #include "kvm.h"
> >>  #include "qdev.h"
> >> @@ -23,6 +25,7 @@
> >>  #include "dma.h"
> >>  
> >>  #include "hw/spapr.h"
> >> +#include "hw/linux-vfio.h"
> > 
> > I really need to move this into linux-headers.
> > 
> >>  
> >>  #include 
> >>  
> >> @@ -183,6 +186,86 @@ static int put_tce_emu(target_ulong liobn, 
> >> target_ulong ioba, target_ulong tce)
> >>  return 0;
> >>  }
> >>  
> >> +/*  API for POWERPC IOMMU  */
> >> +
> >> +#define POWERPC_IOMMU   2
> >> +
> >> +struct tce_iommu_info {
> >> +__u32 argsz;
> >> +__u32 dma32_window_start;
> >> +__u32 dma32_window_size;
> >> +};
> >> +
> >> +#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
> >> +
> >> +struct tce_iommu_dma_map {
> >> +__u32 argsz;
> >> +__u64 va;
> >> +__u64 dmaaddr;
> >> +};
> >> +
> >> +#define POWERPC_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
> >> +#define POWERPC_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
> > 
> > I assume this would eventually go into the kernel vfio.h with a VFIO_
> > prefix.  Add a flags field to the structures or it'll be hard to extend
> > them later.
> 
> 
> We can always define another type of IOMMU :) But yes, I'll extend both map 
> and info structures.
> 
> 
> 
> >> +typedef struct sPAPRVFIOTable {
> >> +int fd;
> >> +uint32_t liobn;
> >> +QLIST_ENTRY(sPAPRVFIOTable) list;
> >> +} sPAPRVFIOTable;
> >> +
> >> +QLIST_HEAD(vfio_tce_tables, sPAPRVFIOTable) vfio_tce_tables;
> >> +
> >> +void spapr_vfio_init_dma(int fd, uint32_t liobn,
> >> + uint64_t *dma32_window_start,
> >> + uint64_t *dma32_window_size)
> >> +{
> >> +sPAPRVFIOTable *t;
> >> +struct tce_iommu_info info = { .argsz = sizeof(info) };
> >> +
> >> +if (ioctl(fd, POWERPC_IOMMU_GET_INFO, &info)) {
> >> +fprintf(stderr, "POWERPC_IOMMU_GET_INFO failed %d\n", errno);
> >> +return;
> >> +}
> >> +*dma32_window_start = info.dma32_window_start;
> >> +*dma32_window_size = info.dma32_window_size;
> >> +
> >> +t = g_malloc0(sizeof(*t));
> >> +t->fd = fd;
> >> +t->liobn = liobn;
> >> +
> >> +QLIST_INSERT_HEAD(&vfio_tce_tables, t, list);
> >> +}
> >> +
> >> +static int put_tce_vfio(uint32_t liobn, target_ulong ioba, target_ulong 
> >> tce)
> >> +{
> >> +sPAPRVFIOTable *t;
> >> +struct tce_iommu_dma_map map

Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Benjamin Herrenschmidt
On Wed, 2012-07-11 at 12:54 +1000, Alexey Kardashevskiy wrote:
> > Why do you need this, aren't the extension checks sufficient for this to
> > be a nop for you?
> 
> 
> It uses ioapic_remove_gsi_eoi_notifier() so it needs some #ifdef anyway. And 
> as we do not support
> kvm_irqchip_in_kernel(), there is no point in fixing it and I disabled it all.
> When we make eoi notifiers a platform independent, then yes, it will be nop.

In fact we have an internal experimental patch to move our
PIC emulation into the kernel but so far I have not managed
to make it fit in the existing irqchip stuff.

That irqchip interface is nasty. It's completely x86 centric,
and have tendrils all over the place, into the msi code, into
devices (virtio-pci.c) etc... in ways that are essentially unusable for
anything that looks a bit different.

IE. In urgent need of refactoring.

> >> -
> >> +#ifndef TARGET_PPC64
> >>  vdev->msi_vectors[vector].virq = kvm_irqchip_add_msi_route(kvm_state, 
> >> msg);
> >>  if (vdev->msi_vectors[vector].virq < 0 || 
> >>  kvm_irqchip_add_irqfd(kvm_state, fd,
> >> @@ -551,7 +575,11 @@ static int vfio_msix_vector_use(PCIDevice *pdev,
> >>  qemu_set_fd_handler(fd, vfio_msi_interrupt, NULL,
> >>  &vdev->msi_vectors[vector]);
> >>  }
> >> -
> >> +#else
> >> +vdev->msi_vectors[vector].virq = -1;
> >> +qemu_set_fd_handler(fd, vfio_msi_interrupt, NULL,
> >> +&vdev->msi_vectors[vector]);
> >> +#endif
> > 
> > This shouldn't be necessary once the abort is removed from
> > kvm_irqchip_add_msi_route.  It'll be merged next time the kvm uq tree
> > merges into qemu.

It must also return an irq number, what to chose in that case ? Ie. the
whole irqchip API is a trainwreck if you ask me :-) Very poorly thought
out.

> True, I just did not pick up your very last changes. Updating is always 
> painful, and now it is even
> worse then usual as pci_get_irq has been renamed to something else :) Will do 
> though.

 .../...

> Well I probably can add MemoryListener for the DMA window and move all 
> power-specific map/unmap code
> to VFIO but it does not look much better. I would rather prefer separating 
> IOMMU code from vfio_pci
> somehow (more or less as it is now for powerpc). While doing it, we could 
> think of the API to get
> this fd which we need anyway in order to setup the DMA window which is per 
> group (which QEMU does
> not understand) but not per device.

Right. What we really want is our own private iommu interface based on
the iommu type. Each iommu will have it's own "quirks" in that regard
anyways.

Cheers,
Ben.

> 
> 
> > Alex
> > 
> >> diff --git a/hw/vfio_pci.h b/hw/vfio_pci.h
> >> index 226607c..0d13341 100644
> >> --- a/hw/vfio_pci.h
> >> +++ b/hw/vfio_pci.h
> >> @@ -105,4 +105,6 @@ typedef struct VFIOGroup {
> >>  #define VFIO_FLAG_IOMMU_SHARED_BIT 0
> >>  #define VFIO_FLAG_IOMMU_SHARED (1U << VFIO_FLAG_UIOMMU_SHARED_BIT)
> >>  
> >> +int vfio_get_container_fd(struct PCIBus *pbus);
> >> +
> >>  #endif /* __VFIO_H__ */
> > 
> > 
> > 
> 
> 
> -- 
> Alexey
> 
> 
> 




Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Alexey Kardashevskiy
On 11/07/12 02:55, Alex Williamson wrote:
> On Tue, 2012-07-10 at 15:51 +1000, Alexey Kardashevskiy wrote:
>> The patch enables VFIO on POWER.
>>
>> It literally does the following:
>>
>> 1. POWERPC IOMMU support (the kernel counterpart is required)
>>
>> 2. Added #ifdef TARGET_PPC64 for EOI handlers initialisation.
>>
>> 3. Added vfio_get_container_fd() to VFIO in order to initialize 1).
>>
>> 4. Makefile fixed and "is_vfio" flag added into sPAPR PHB - required to
>> distinguish VFIO's DMA context from the emulated one.
>>
>> WIth the pathes posted today a bit earlier, this patch fully supports
>> VFIO what includes MSIX as well,
>>
>>
>> Signed-off-by: Alexey Kardashevskiy 
>> ---
>>  hw/ppc/Makefile.objs |3 ++
>>  hw/spapr.h   |4 +++
>>  hw/spapr_iommu.c |   87 
>> ++
>>  hw/spapr_pci.c   |   23 -
>>  hw/spapr_pci.h   |2 ++
>>  hw/vfio_pci.c|   76 +--
>>  hw/vfio_pci.h|2 ++
>>  7 files changed, 193 insertions(+), 4 deletions(-)
>>
>> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
>> index f573a95..c46a049 100644
>> --- a/hw/ppc/Makefile.objs
>> +++ b/hw/ppc/Makefile.objs
>> @@ -25,4 +25,7 @@ obj-$(CONFIG_FDT) += ../device_tree.o
>>  # Xilinx PPC peripherals
>>  obj-y += xilinx_ethlite.o
>>  
>> +# VFIO PCI device assignment
>> +obj-$(CONFIG_VFIO_PCI) += vfio_pci.o
>> +
>>  obj-y := $(addprefix ../,$(obj-y))
>> diff --git a/hw/spapr.h b/hw/spapr.h
>> index b37f337..9dca704 100644
>> --- a/hw/spapr.h
>> +++ b/hw/spapr.h
>> @@ -340,4 +340,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char 
>> *propname,
>>  int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
>>DMAContext *dma);
>>  
>> +void spapr_vfio_init_dma(int fd, uint32_t liobn,
>> + uint64_t *dma32_window_start,
>> + uint64_t *dma32_window_size);
>> +
>>  #endif /* !defined (__HW_SPAPR_H__) */
>> diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
>> index 50c288d..0a194e8 100644
>> --- a/hw/spapr_iommu.c
>> +++ b/hw/spapr_iommu.c
>> @@ -16,6 +16,8 @@
>>   * You should have received a copy of the GNU Lesser General Public
>>   * License along with this library; if not, see 
>> .
>>   */
>> +#include 
>> +
>>  #include "hw.h"
>>  #include "kvm.h"
>>  #include "qdev.h"
>> @@ -23,6 +25,7 @@
>>  #include "dma.h"
>>  
>>  #include "hw/spapr.h"
>> +#include "hw/linux-vfio.h"
> 
> I really need to move this into linux-headers.
> 
>>  
>>  #include 
>>  
>> @@ -183,6 +186,86 @@ static int put_tce_emu(target_ulong liobn, target_ulong 
>> ioba, target_ulong tce)
>>  return 0;
>>  }
>>  
>> +/*  API for POWERPC IOMMU  */
>> +
>> +#define POWERPC_IOMMU   2
>> +
>> +struct tce_iommu_info {
>> +__u32 argsz;
>> +__u32 dma32_window_start;
>> +__u32 dma32_window_size;
>> +};
>> +
>> +#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
>> +
>> +struct tce_iommu_dma_map {
>> +__u32 argsz;
>> +__u64 va;
>> +__u64 dmaaddr;
>> +};
>> +
>> +#define POWERPC_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
>> +#define POWERPC_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
> 
> I assume this would eventually go into the kernel vfio.h with a VFIO_
> prefix.  Add a flags field to the structures or it'll be hard to extend
> them later.


We can always define another type of IOMMU :) But yes, I'll extend both map and 
info structures.



>> +typedef struct sPAPRVFIOTable {
>> +int fd;
>> +uint32_t liobn;
>> +QLIST_ENTRY(sPAPRVFIOTable) list;
>> +} sPAPRVFIOTable;
>> +
>> +QLIST_HEAD(vfio_tce_tables, sPAPRVFIOTable) vfio_tce_tables;
>> +
>> +void spapr_vfio_init_dma(int fd, uint32_t liobn,
>> + uint64_t *dma32_window_start,
>> + uint64_t *dma32_window_size)
>> +{
>> +sPAPRVFIOTable *t;
>> +struct tce_iommu_info info = { .argsz = sizeof(info) };
>> +
>> +if (ioctl(fd, POWERPC_IOMMU_GET_INFO, &info)) {
>> +fprintf(stderr, "POWERPC_IOMMU_GET_INFO failed %d\n", errno);
>> +return;
>> +}
>> +*dma32_window_start = info.dma32_window_start;
>> +*dma32_window_size = info.dma32_window_size;
>> +
>> +t = g_malloc0(sizeof(*t));
>> +t->fd = fd;
>> +t->liobn = liobn;
>> +
>> +QLIST_INSERT_HEAD(&vfio_tce_tables, t, list);
>> +}
>> +
>> +static int put_tce_vfio(uint32_t liobn, target_ulong ioba, target_ulong tce)
>> +{
>> +sPAPRVFIOTable *t;
>> +struct tce_iommu_dma_map map = {
>> +.argsz = sizeof(map),
>> +.va = 0,
>> +.dmaaddr = ioba,
>> +};
>> +
>> +QLIST_FOREACH(t, &vfio_tce_tables, list) {
>> +if (t->liobn != liobn) {
>> +continue;
>> +}
>> +if (tce) {
>> +map.va = (uintptr_t)qemu_get_ram_ptr(tce & 
>> ~SPAPR_TCE_PAGE_MASK);
>> +if (ioctl(t->

Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Benjamin Herrenschmidt
On Wed, 2012-07-11 at 10:17 +1000, Alexey Kardashevskiy wrote:
> So the current one would be SPAPR_TCE_32?

No, the iommu type is SPAPR_TCE, but the *window* info you get here is
the 32-bit window. My thinking is add some versionning and a bunch of
reserved fields to that info struct so we can stick a few more things,
or at the very least add a flags field, so we can return if/when we
support DDW etc... on that iommu.

Cheers,
Ben.
 




Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Alexey Kardashevskiy
On 11/07/12 10:04, Benjamin Herrenschmidt wrote:
> On Wed, 2012-07-11 at 09:55 +1000, Alexey Kardashevskiy wrote:
>> On 11/07/12 08:26, Scott Wood wrote:
>>> On 07/10/2012 12:51 AM, Alexey Kardashevskiy wrote:
 The patch enables VFIO on POWER.

 It literally does the following:

 1. POWERPC IOMMU support (the kernel counterpart is required)
>>> [snip]
 +/*  API for POWERPC IOMMU  */
 +
 +#define POWERPC_IOMMU   2
 +
 +struct tce_iommu_info {
 +__u32 argsz;
 +__u32 dma32_window_start;
 +__u32 dma32_window_size;
 +};
 +
 +#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
>>>
>>> Is there a more specific name that could be used for this?  Not all
>>> PowerPC chips have the same kind of IOMMU.
> 
>>
>> Ben, is is SPAPR? BOOK3S?
> 
> So we have varieties of different iommus in the kernel indeed, we
> probably want the info ioctl to reflect that. I would call this one
> spapr_tce.

VFIO provides such ioctl actually and it is it who returns POWERPC_IOMMU as an 
IOMMU type.

ok. SPAPR_TCE.


> Also we will want a few other things, dunno if that's reflected here,
> or whether the ioctl is easily extendable, but in the long run we will
> need:
> 
>  - Ways to tell KVM about association between a liobn (logical bus
> number as used in H_PUT_TCE) and an iommu so we can implement the real
> mode H_PUT_TCE properly.
> 
>  - We will need some conduit to implement the "DDW" APIs (part of PAPR
> allowing the guest to control the DMA windows, ie, by creating new
> windows in 64-bit DMA space, with different page sizes etc). So you
> may want to make it clear that the above provides information about the
> "base window" specifically.

So the current one would be SPAPR_TCE_32?


-- 
Alexey





Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Benjamin Herrenschmidt
On Wed, 2012-07-11 at 09:55 +1000, Alexey Kardashevskiy wrote:
> On 11/07/12 08:26, Scott Wood wrote:
> > On 07/10/2012 12:51 AM, Alexey Kardashevskiy wrote:
> >> The patch enables VFIO on POWER.
> >>
> >> It literally does the following:
> >>
> >> 1. POWERPC IOMMU support (the kernel counterpart is required)
> > [snip]
> >> +/*  API for POWERPC IOMMU  */
> >> +
> >> +#define POWERPC_IOMMU   2
> >> +
> >> +struct tce_iommu_info {
> >> +__u32 argsz;
> >> +__u32 dma32_window_start;
> >> +__u32 dma32_window_size;
> >> +};
> >> +
> >> +#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
> > 
> > Is there a more specific name that could be used for this?  Not all
> > PowerPC chips have the same kind of IOMMU.

> 
> Ben, is is SPAPR? BOOK3S?

So we have varieties of different iommus in the kernel indeed, we
probably want the info ioctl to reflect that. I would call this one
spapr_tce.

Also we will want a few other things, dunno if that's reflected here,
or whether the ioctl is easily extendable, but in the long run we will
need:

 - Ways to tell KVM about association between a liobn (logical bus
number as used in H_PUT_TCE) and an iommu so we can implement the real
mode H_PUT_TCE properly.

 - We will need some conduit to implement the "DDW" APIs (part of PAPR
allowing the guest to control the DMA windows, ie, by creating new
windows in 64-bit DMA space, with different page sizes etc). So you
may want to make it clear that the above provides information about the
"base window" specifically.

Cheers,
Ben.





Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Alexey Kardashevskiy
On 11/07/12 08:26, Scott Wood wrote:
> On 07/10/2012 12:51 AM, Alexey Kardashevskiy wrote:
>> The patch enables VFIO on POWER.
>>
>> It literally does the following:
>>
>> 1. POWERPC IOMMU support (the kernel counterpart is required)
> [snip]
>> +/*  API for POWERPC IOMMU  */
>> +
>> +#define POWERPC_IOMMU   2
>> +
>> +struct tce_iommu_info {
>> +__u32 argsz;
>> +__u32 dma32_window_start;
>> +__u32 dma32_window_size;
>> +};
>> +
>> +#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
> 
> Is there a more specific name that could be used for this?  Not all
> PowerPC chips have the same kind of IOMMU.


Ben, is is SPAPR? BOOK3S?


-- 
Alexey





Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Scott Wood
On 07/10/2012 12:51 AM, Alexey Kardashevskiy wrote:
> The patch enables VFIO on POWER.
> 
> It literally does the following:
> 
> 1. POWERPC IOMMU support (the kernel counterpart is required)
[snip]
> +/*  API for POWERPC IOMMU  */
> +
> +#define POWERPC_IOMMU   2
> +
> +struct tce_iommu_info {
> +__u32 argsz;
> +__u32 dma32_window_start;
> +__u32 dma32_window_size;
> +};
> +
> +#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)

Is there a more specific name that could be used for this?  Not all
PowerPC chips have the same kind of IOMMU.

-Scott




Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Benjamin Herrenschmidt
On Tue, 2012-07-10 at 15:48 -0600, Alex Williamson wrote:
> > specific hypervisor calls, not sure what a MemoryListener would do
> > here ...
> 
> Hmm, the guest directed iommu updates via hypercalls may not really fit
> the MemoryListener model.  I'm just trying to think of ways to avoid
> having an offshoot of vfio in the power code base by making use of
> common abstraction layers.  If we got a region_add/del callback we could
> potentially move the spapr map and unmap into vfio like we do for x86.
> Thanks, 

In the end we don't really want to use that anyway. map and unmap are
*extremely* performance sensitive in practice, so plan is to implement
the hypercall directly in the kernel KVM at a level where it won't even
go near generic code :-)

Basically, when the hypercall gets in, we take control in what we call
"real mode" on powerpc (MMU off, translation disabled), we have a window
to implement critical stuff like this before we context switch the MMU
to the host context (which on P7 is quite expensive).

This is where I want to go directly whack the TCE table as used by the
HW (provided the page has a good PTE of course), pretty much like we do
for populating the main MMU hash table.

So the map/unmap path will be entirely in arch specific code. The one
you see in Alexey code is basically only ever going to be used by
something like qemu full emulation...

Cheers,
Ben.





Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Alex Williamson
On Wed, 2012-07-11 at 07:32 +1000, Benjamin Herrenschmidt wrote:
> On Tue, 2012-07-10 at 10:55 -0600, Alex Williamson wrote:
> > 
> > I wish you could do this through a MemoryListener like we do on x86.
> > 
> 
> Can you elaborate ? TCE (iommu) manipulation on PAPR is done via
> specific hypervisor calls, not sure what a MemoryListener would do
> here ...

Hmm, the guest directed iommu updates via hypercalls may not really fit
the MemoryListener model.  I'm just trying to think of ways to avoid
having an offshoot of vfio in the power code base by making use of
common abstraction layers.  If we got a region_add/del callback we could
potentially move the spapr map and unmap into vfio like we do for x86.
Thanks,

Alex




Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Benjamin Herrenschmidt
On Tue, 2012-07-10 at 10:55 -0600, Alex Williamson wrote:
> 
> I wish you could do this through a MemoryListener like we do on x86.
> 

Can you elaborate ? TCE (iommu) manipulation on PAPR is done via
specific hypervisor calls, not sure what a MemoryListener would do
here ...

Cheers,
Ben.





Re: [Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-10 Thread Alex Williamson
On Tue, 2012-07-10 at 15:51 +1000, Alexey Kardashevskiy wrote:
> The patch enables VFIO on POWER.
> 
> It literally does the following:
> 
> 1. POWERPC IOMMU support (the kernel counterpart is required)
> 
> 2. Added #ifdef TARGET_PPC64 for EOI handlers initialisation.
> 
> 3. Added vfio_get_container_fd() to VFIO in order to initialize 1).
> 
> 4. Makefile fixed and "is_vfio" flag added into sPAPR PHB - required to
> distinguish VFIO's DMA context from the emulated one.
> 
> WIth the pathes posted today a bit earlier, this patch fully supports
> VFIO what includes MSIX as well,
> 
> 
> Signed-off-by: Alexey Kardashevskiy 
> ---
>  hw/ppc/Makefile.objs |3 ++
>  hw/spapr.h   |4 +++
>  hw/spapr_iommu.c |   87 
> ++
>  hw/spapr_pci.c   |   23 -
>  hw/spapr_pci.h   |2 ++
>  hw/vfio_pci.c|   76 +--
>  hw/vfio_pci.h|2 ++
>  7 files changed, 193 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> index f573a95..c46a049 100644
> --- a/hw/ppc/Makefile.objs
> +++ b/hw/ppc/Makefile.objs
> @@ -25,4 +25,7 @@ obj-$(CONFIG_FDT) += ../device_tree.o
>  # Xilinx PPC peripherals
>  obj-y += xilinx_ethlite.o
>  
> +# VFIO PCI device assignment
> +obj-$(CONFIG_VFIO_PCI) += vfio_pci.o
> +
>  obj-y := $(addprefix ../,$(obj-y))
> diff --git a/hw/spapr.h b/hw/spapr.h
> index b37f337..9dca704 100644
> --- a/hw/spapr.h
> +++ b/hw/spapr.h
> @@ -340,4 +340,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char 
> *propname,
>  int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
>DMAContext *dma);
>  
> +void spapr_vfio_init_dma(int fd, uint32_t liobn,
> + uint64_t *dma32_window_start,
> + uint64_t *dma32_window_size);
> +
>  #endif /* !defined (__HW_SPAPR_H__) */
> diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
> index 50c288d..0a194e8 100644
> --- a/hw/spapr_iommu.c
> +++ b/hw/spapr_iommu.c
> @@ -16,6 +16,8 @@
>   * You should have received a copy of the GNU Lesser General Public
>   * License along with this library; if not, see 
> .
>   */
> +#include 
> +
>  #include "hw.h"
>  #include "kvm.h"
>  #include "qdev.h"
> @@ -23,6 +25,7 @@
>  #include "dma.h"
>  
>  #include "hw/spapr.h"
> +#include "hw/linux-vfio.h"

I really need to move this into linux-headers.

>  
>  #include 
>  
> @@ -183,6 +186,86 @@ static int put_tce_emu(target_ulong liobn, target_ulong 
> ioba, target_ulong tce)
>  return 0;
>  }
>  
> +/*  API for POWERPC IOMMU  */
> +
> +#define POWERPC_IOMMU   2
> +
> +struct tce_iommu_info {
> +__u32 argsz;
> +__u32 dma32_window_start;
> +__u32 dma32_window_size;
> +};
> +
> +#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
> +
> +struct tce_iommu_dma_map {
> +__u32 argsz;
> +__u64 va;
> +__u64 dmaaddr;
> +};
> +
> +#define POWERPC_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
> +#define POWERPC_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)

I assume this would eventually go into the kernel vfio.h with a VFIO_
prefix.  Add a flags field to the structures or it'll be hard to extend
them later.

> +typedef struct sPAPRVFIOTable {
> +int fd;
> +uint32_t liobn;
> +QLIST_ENTRY(sPAPRVFIOTable) list;
> +} sPAPRVFIOTable;
> +
> +QLIST_HEAD(vfio_tce_tables, sPAPRVFIOTable) vfio_tce_tables;
> +
> +void spapr_vfio_init_dma(int fd, uint32_t liobn,
> + uint64_t *dma32_window_start,
> + uint64_t *dma32_window_size)
> +{
> +sPAPRVFIOTable *t;
> +struct tce_iommu_info info = { .argsz = sizeof(info) };
> +
> +if (ioctl(fd, POWERPC_IOMMU_GET_INFO, &info)) {
> +fprintf(stderr, "POWERPC_IOMMU_GET_INFO failed %d\n", errno);
> +return;
> +}
> +*dma32_window_start = info.dma32_window_start;
> +*dma32_window_size = info.dma32_window_size;
> +
> +t = g_malloc0(sizeof(*t));
> +t->fd = fd;
> +t->liobn = liobn;
> +
> +QLIST_INSERT_HEAD(&vfio_tce_tables, t, list);
> +}
> +
> +static int put_tce_vfio(uint32_t liobn, target_ulong ioba, target_ulong tce)
> +{
> +sPAPRVFIOTable *t;
> +struct tce_iommu_dma_map map = {
> +.argsz = sizeof(map),
> +.va = 0,
> +.dmaaddr = ioba,
> +};
> +
> +QLIST_FOREACH(t, &vfio_tce_tables, list) {
> +if (t->liobn != liobn) {
> +continue;
> +}
> +if (tce) {
> +map.va = (uintptr_t)qemu_get_ram_ptr(tce & ~SPAPR_TCE_PAGE_MASK);
> +if (ioctl(t->fd, POWERPC_IOMMU_MAP_DMA, &map)) {
> +fprintf(stderr, "TCE_MAP_DMA: %d\n", errno);
> +return H_PARAMETER;
> +}
> +} else {
> +if (ioctl(t->fd, POWERPC_IOMMU_UNMAP_DMA, &map)) {
> +fprintf(stderr, "TCE_UNMAP_DMA: %d\n", er

[Qemu-devel] [PATCH 2/2] vfio-powerpc: added VFIO support

2012-07-09 Thread Alexey Kardashevskiy
The patch enables VFIO on POWER.

It literally does the following:

1. POWERPC IOMMU support (the kernel counterpart is required)

2. Added #ifdef TARGET_PPC64 for EOI handlers initialisation.

3. Added vfio_get_container_fd() to VFIO in order to initialize 1).

4. Makefile fixed and "is_vfio" flag added into sPAPR PHB - required to
distinguish VFIO's DMA context from the emulated one.

WIth the pathes posted today a bit earlier, this patch fully supports
VFIO what includes MSIX as well,


Signed-off-by: Alexey Kardashevskiy 
---
 hw/ppc/Makefile.objs |3 ++
 hw/spapr.h   |4 +++
 hw/spapr_iommu.c |   87 ++
 hw/spapr_pci.c   |   23 -
 hw/spapr_pci.h   |2 ++
 hw/vfio_pci.c|   76 +--
 hw/vfio_pci.h|2 ++
 7 files changed, 193 insertions(+), 4 deletions(-)

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index f573a95..c46a049 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -25,4 +25,7 @@ obj-$(CONFIG_FDT) += ../device_tree.o
 # Xilinx PPC peripherals
 obj-y += xilinx_ethlite.o
 
+# VFIO PCI device assignment
+obj-$(CONFIG_VFIO_PCI) += vfio_pci.o
+
 obj-y := $(addprefix ../,$(obj-y))
diff --git a/hw/spapr.h b/hw/spapr.h
index b37f337..9dca704 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -340,4 +340,8 @@ int spapr_dma_dt(void *fdt, int node_off, const char 
*propname,
 int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
   DMAContext *dma);
 
+void spapr_vfio_init_dma(int fd, uint32_t liobn,
+ uint64_t *dma32_window_start,
+ uint64_t *dma32_window_size);
+
 #endif /* !defined (__HW_SPAPR_H__) */
diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
index 50c288d..0a194e8 100644
--- a/hw/spapr_iommu.c
+++ b/hw/spapr_iommu.c
@@ -16,6 +16,8 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, see .
  */
+#include 
+
 #include "hw.h"
 #include "kvm.h"
 #include "qdev.h"
@@ -23,6 +25,7 @@
 #include "dma.h"
 
 #include "hw/spapr.h"
+#include "hw/linux-vfio.h"
 
 #include 
 
@@ -183,6 +186,86 @@ static int put_tce_emu(target_ulong liobn, target_ulong 
ioba, target_ulong tce)
 return 0;
 }
 
+/*  API for POWERPC IOMMU  */
+
+#define POWERPC_IOMMU   2
+
+struct tce_iommu_info {
+__u32 argsz;
+__u32 dma32_window_start;
+__u32 dma32_window_size;
+};
+
+#define POWERPC_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
+
+struct tce_iommu_dma_map {
+__u32 argsz;
+__u64 va;
+__u64 dmaaddr;
+};
+
+#define POWERPC_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
+#define POWERPC_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
+
+typedef struct sPAPRVFIOTable {
+int fd;
+uint32_t liobn;
+QLIST_ENTRY(sPAPRVFIOTable) list;
+} sPAPRVFIOTable;
+
+QLIST_HEAD(vfio_tce_tables, sPAPRVFIOTable) vfio_tce_tables;
+
+void spapr_vfio_init_dma(int fd, uint32_t liobn,
+ uint64_t *dma32_window_start,
+ uint64_t *dma32_window_size)
+{
+sPAPRVFIOTable *t;
+struct tce_iommu_info info = { .argsz = sizeof(info) };
+
+if (ioctl(fd, POWERPC_IOMMU_GET_INFO, &info)) {
+fprintf(stderr, "POWERPC_IOMMU_GET_INFO failed %d\n", errno);
+return;
+}
+*dma32_window_start = info.dma32_window_start;
+*dma32_window_size = info.dma32_window_size;
+
+t = g_malloc0(sizeof(*t));
+t->fd = fd;
+t->liobn = liobn;
+
+QLIST_INSERT_HEAD(&vfio_tce_tables, t, list);
+}
+
+static int put_tce_vfio(uint32_t liobn, target_ulong ioba, target_ulong tce)
+{
+sPAPRVFIOTable *t;
+struct tce_iommu_dma_map map = {
+.argsz = sizeof(map),
+.va = 0,
+.dmaaddr = ioba,
+};
+
+QLIST_FOREACH(t, &vfio_tce_tables, list) {
+if (t->liobn != liobn) {
+continue;
+}
+if (tce) {
+map.va = (uintptr_t)qemu_get_ram_ptr(tce & ~SPAPR_TCE_PAGE_MASK);
+if (ioctl(t->fd, POWERPC_IOMMU_MAP_DMA, &map)) {
+fprintf(stderr, "TCE_MAP_DMA: %d\n", errno);
+return H_PARAMETER;
+}
+} else {
+if (ioctl(t->fd, POWERPC_IOMMU_UNMAP_DMA, &map)) {
+fprintf(stderr, "TCE_UNMAP_DMA: %d\n", errno);
+return H_PARAMETER;
+}
+}
+return H_SUCCESS;
+}
+return H_CONTINUE; /* positive non-zero value */
+}
+
 static target_ulong h_put_tce(CPUPPCState *env, sPAPREnvironment *spapr,
   target_ulong opcode, target_ulong *args)
 {
@@ -203,6 +286,10 @@ static target_ulong h_put_tce(CPUPPCState *env, 
sPAPREnvironment *spapr,
 if (0 >= ret) {
 return ret ? H_PARAMETER : H_SUCCESS;
 }
+ret = put_tce_vfio(liobn, ioba, tce);
+if (0 >= ret) {
+return ret ? H_PARAM