Re: [Qemu-devel] [PATCH qemu v14 18/18] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)

2016-03-30 Thread David Gibson
On Thu, Mar 24, 2016 at 01:32:48PM +1100, Alexey Kardashevskiy wrote:
> On 03/23/2016 05:11 PM, David Gibson wrote:
> >On Wed, Mar 23, 2016 at 02:28:01PM +1100, Alexey Kardashevskiy wrote:
> >>On 03/23/2016 01:13 PM, David Gibson wrote:
> >>>On Mon, Mar 21, 2016 at 06:47:06PM +1100, Alexey Kardashevskiy wrote:
> This adds support for Dynamic DMA Windows (DDW) option defined by
> the SPAPR specification which allows to have additional DMA window(s)
> 
> This implements DDW for emulated and VFIO devices.
> This reserves RTAS token numbers for DDW calls.
> 
> This changes the TCE table migration descriptor to support dynamic
> tables as from now on, PHB will create as many stub TCE table objects
> as PHB can possibly support but not all of them might be initialized at
> the time of migration because DDW might or might not be requested by
> the guest.
> 
> The "ddw" property is enabled by default on a PHB but for compatibility
> the pseries-2.5 machine and older disable it.
> 
> This implements DDW for VFIO. The host kernel support is required.
> This adds a "levels" property to PHB to control the number of levels
> in the actual TCE table allocated by the host kernel, 0 is the default
> value to tell QEMU to calculate the correct value. Current hardware
> supports up to 5 levels.
> 
> The existing linux guests try creating one additional huge DMA window
> with 64K or 16MB pages and map the entire guest RAM to. If succeeded,
> the guest switches to dma_direct_ops and never calls TCE hypercalls
> (H_PUT_TCE,...) again. This enables VFIO devices to use the entire RAM
> and not waste time on map/unmap later. This adds a "dma64_win_addr"
> property which is a bus address for the 64bit window and by default
> set to 0x800... as this is what the modern POWER8 hardware
> uses and this allows having emulated and VFIO devices on the same bus.
> 
> This adds 4 RTAS handlers:
> * ibm,query-pe-dma-window
> * ibm,create-pe-dma-window
> * ibm,remove-pe-dma-window
> * ibm,reset-pe-dma-window
> These are registered from type_init() callback.
> 
> These RTAS handlers are implemented in a separate file to avoid polluting
> spapr_iommu.c with PCI.
> 
> Signed-off-by: Alexey Kardashevskiy 
> ---
>   hw/ppc/Makefile.objs|   1 +
>   hw/ppc/spapr.c  |   7 +-
>   hw/ppc/spapr_pci.c  |  73 ---
>   hw/ppc/spapr_rtas_ddw.c | 300 
>  
>   hw/vfio/common.c|   5 -
>   include/hw/pci-host/spapr.h |  13 ++
>   include/hw/ppc/spapr.h  |  16 ++-
>   trace-events|   4 +
>   8 files changed, 395 insertions(+), 24 deletions(-)
>   create mode 100644 hw/ppc/spapr_rtas_ddw.c
> 
> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> index c1ffc77..986b36f 100644
> --- a/hw/ppc/Makefile.objs
> +++ b/hw/ppc/Makefile.objs
> @@ -7,6 +7,7 @@ obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o 
> spapr_drc.o spapr_rng.o
>   ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
>   obj-y += spapr_pci_vfio.o
>   endif
> +obj-$(CONFIG_PSERIES) += spapr_rtas_ddw.o
>   # PowerPC 4xx boards
>   obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
>   obj-y += ppc4xx_pci.o
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index d0bb423..ef4c637 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2362,7 +2362,12 @@ DEFINE_SPAPR_MACHINE(2_6, "2.6", true);
>    * pseries-2.5
>    */
>   #define SPAPR_COMPAT_2_5 \
> -HW_COMPAT_2_5
> +HW_COMPAT_2_5 \
> +{\
> +.driver   = TYPE_SPAPR_PCI_HOST_BRIDGE,\
> +.property = "ddw",\
> +.value= stringify(off),\
> +},
> 
>   static void spapr_machine_2_5_instance_options(MachineState *machine)
>   {
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index af99a36..3bb294a 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -803,12 +803,12 @@ static char *spapr_phb_get_loc_code(sPAPRPHBState 
> *sphb, PCIDevice *pdev)
>   return buf;
>   }
> 
> -static void spapr_phb_dma_window_enable(sPAPRPHBState *sphb,
> -   uint32_t liobn,
> -   uint32_t page_shift,
> -   uint64_t window_addr,
> -   uint64_t window_size,
> -   Error **errp)
> +void spapr_phb_dma_window_enable(sPAPRPHBState *sphb,
> + uint32_t liobn,
> + uint32_t page_shift,
> +  

Re: [Qemu-devel] [PATCH qemu v14 18/18] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)

2016-03-28 Thread Alexey Kardashevskiy

On 03/29/2016 04:22 PM, David Gibson wrote:

On Thu, Mar 24, 2016 at 01:32:48PM +1100, Alexey Kardashevskiy wrote:

On 03/23/2016 05:11 PM, David Gibson wrote:

On Wed, Mar 23, 2016 at 02:28:01PM +1100, Alexey Kardashevskiy wrote:

On 03/23/2016 01:13 PM, David Gibson wrote:

On Mon, Mar 21, 2016 at 06:47:06PM +1100, Alexey Kardashevskiy wrote:

This adds support for Dynamic DMA Windows (DDW) option defined by
the SPAPR specification which allows to have additional DMA window(s)

This implements DDW for emulated and VFIO devices.
This reserves RTAS token numbers for DDW calls.

This changes the TCE table migration descriptor to support dynamic
tables as from now on, PHB will create as many stub TCE table objects
as PHB can possibly support but not all of them might be initialized at
the time of migration because DDW might or might not be requested by
the guest.

The "ddw" property is enabled by default on a PHB but for compatibility
the pseries-2.5 machine and older disable it.

This implements DDW for VFIO. The host kernel support is required.
This adds a "levels" property to PHB to control the number of levels
in the actual TCE table allocated by the host kernel, 0 is the default
value to tell QEMU to calculate the correct value. Current hardware
supports up to 5 levels.

The existing linux guests try creating one additional huge DMA window
with 64K or 16MB pages and map the entire guest RAM to. If succeeded,
the guest switches to dma_direct_ops and never calls TCE hypercalls
(H_PUT_TCE,...) again. This enables VFIO devices to use the entire RAM
and not waste time on map/unmap later. This adds a "dma64_win_addr"
property which is a bus address for the 64bit window and by default
set to 0x800... as this is what the modern POWER8 hardware
uses and this allows having emulated and VFIO devices on the same bus.

This adds 4 RTAS handlers:
* ibm,query-pe-dma-window
* ibm,create-pe-dma-window
* ibm,remove-pe-dma-window
* ibm,reset-pe-dma-window
These are registered from type_init() callback.

These RTAS handlers are implemented in a separate file to avoid polluting
spapr_iommu.c with PCI.

Signed-off-by: Alexey Kardashevskiy 
---
  hw/ppc/Makefile.objs|   1 +
  hw/ppc/spapr.c  |   7 +-
  hw/ppc/spapr_pci.c  |  73 ---
  hw/ppc/spapr_rtas_ddw.c | 300 
  hw/vfio/common.c|   5 -
  include/hw/pci-host/spapr.h |  13 ++
  include/hw/ppc/spapr.h  |  16 ++-
  trace-events|   4 +
  8 files changed, 395 insertions(+), 24 deletions(-)
  create mode 100644 hw/ppc/spapr_rtas_ddw.c

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index c1ffc77..986b36f 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -7,6 +7,7 @@ obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o 
spapr_rng.o
  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
  obj-y += spapr_pci_vfio.o
  endif
+obj-$(CONFIG_PSERIES) += spapr_rtas_ddw.o
  # PowerPC 4xx boards
  obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
  obj-y += ppc4xx_pci.o
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index d0bb423..ef4c637 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2362,7 +2362,12 @@ DEFINE_SPAPR_MACHINE(2_6, "2.6", true);
   * pseries-2.5
   */
  #define SPAPR_COMPAT_2_5 \
-HW_COMPAT_2_5
+HW_COMPAT_2_5 \
+{\
+.driver   = TYPE_SPAPR_PCI_HOST_BRIDGE,\
+.property = "ddw",\
+.value= stringify(off),\
+},

  static void spapr_machine_2_5_instance_options(MachineState *machine)
  {
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index af99a36..3bb294a 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -803,12 +803,12 @@ static char *spapr_phb_get_loc_code(sPAPRPHBState *sphb, 
PCIDevice *pdev)
  return buf;
  }

-static void spapr_phb_dma_window_enable(sPAPRPHBState *sphb,
-   uint32_t liobn,
-   uint32_t page_shift,
-   uint64_t window_addr,
-   uint64_t window_size,
-   Error **errp)
+void spapr_phb_dma_window_enable(sPAPRPHBState *sphb,
+ uint32_t liobn,
+ uint32_t page_shift,
+ uint64_t window_addr,
+ uint64_t window_size,
+ Error **errp)
  {
  sPAPRTCETable *tcet;
  uint32_t nb_table = window_size >> page_shift;
@@ -825,10 +825,16 @@ static void spapr_phb_dma_window_enable(sPAPRPHBState 
*sphb,
  return;
  }

+if (SPAPR_PCI_DMA_WINDOW_NUM(liobn) && !sphb->ddw_enabled) {
+error_setg(errp,
+   "Attempt to use second window when DDW is disabled on PHB");
+return;
+}


This should never happen unless something 

Re: [Qemu-devel] [PATCH qemu v14 18/18] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)

2016-03-28 Thread David Gibson
On Thu, Mar 24, 2016 at 01:32:48PM +1100, Alexey Kardashevskiy wrote:
> On 03/23/2016 05:11 PM, David Gibson wrote:
> >On Wed, Mar 23, 2016 at 02:28:01PM +1100, Alexey Kardashevskiy wrote:
> >>On 03/23/2016 01:13 PM, David Gibson wrote:
> >>>On Mon, Mar 21, 2016 at 06:47:06PM +1100, Alexey Kardashevskiy wrote:
> This adds support for Dynamic DMA Windows (DDW) option defined by
> the SPAPR specification which allows to have additional DMA window(s)
> 
> This implements DDW for emulated and VFIO devices.
> This reserves RTAS token numbers for DDW calls.
> 
> This changes the TCE table migration descriptor to support dynamic
> tables as from now on, PHB will create as many stub TCE table objects
> as PHB can possibly support but not all of them might be initialized at
> the time of migration because DDW might or might not be requested by
> the guest.
> 
> The "ddw" property is enabled by default on a PHB but for compatibility
> the pseries-2.5 machine and older disable it.
> 
> This implements DDW for VFIO. The host kernel support is required.
> This adds a "levels" property to PHB to control the number of levels
> in the actual TCE table allocated by the host kernel, 0 is the default
> value to tell QEMU to calculate the correct value. Current hardware
> supports up to 5 levels.
> 
> The existing linux guests try creating one additional huge DMA window
> with 64K or 16MB pages and map the entire guest RAM to. If succeeded,
> the guest switches to dma_direct_ops and never calls TCE hypercalls
> (H_PUT_TCE,...) again. This enables VFIO devices to use the entire RAM
> and not waste time on map/unmap later. This adds a "dma64_win_addr"
> property which is a bus address for the 64bit window and by default
> set to 0x800... as this is what the modern POWER8 hardware
> uses and this allows having emulated and VFIO devices on the same bus.
> 
> This adds 4 RTAS handlers:
> * ibm,query-pe-dma-window
> * ibm,create-pe-dma-window
> * ibm,remove-pe-dma-window
> * ibm,reset-pe-dma-window
> These are registered from type_init() callback.
> 
> These RTAS handlers are implemented in a separate file to avoid polluting
> spapr_iommu.c with PCI.
> 
> Signed-off-by: Alexey Kardashevskiy 
> ---
>   hw/ppc/Makefile.objs|   1 +
>   hw/ppc/spapr.c  |   7 +-
>   hw/ppc/spapr_pci.c  |  73 ---
>   hw/ppc/spapr_rtas_ddw.c | 300 
>  
>   hw/vfio/common.c|   5 -
>   include/hw/pci-host/spapr.h |  13 ++
>   include/hw/ppc/spapr.h  |  16 ++-
>   trace-events|   4 +
>   8 files changed, 395 insertions(+), 24 deletions(-)
>   create mode 100644 hw/ppc/spapr_rtas_ddw.c
> 
> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> index c1ffc77..986b36f 100644
> --- a/hw/ppc/Makefile.objs
> +++ b/hw/ppc/Makefile.objs
> @@ -7,6 +7,7 @@ obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o 
> spapr_drc.o spapr_rng.o
>   ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
>   obj-y += spapr_pci_vfio.o
>   endif
> +obj-$(CONFIG_PSERIES) += spapr_rtas_ddw.o
>   # PowerPC 4xx boards
>   obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
>   obj-y += ppc4xx_pci.o
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index d0bb423..ef4c637 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2362,7 +2362,12 @@ DEFINE_SPAPR_MACHINE(2_6, "2.6", true);
>    * pseries-2.5
>    */
>   #define SPAPR_COMPAT_2_5 \
> -HW_COMPAT_2_5
> +HW_COMPAT_2_5 \
> +{\
> +.driver   = TYPE_SPAPR_PCI_HOST_BRIDGE,\
> +.property = "ddw",\
> +.value= stringify(off),\
> +},
> 
>   static void spapr_machine_2_5_instance_options(MachineState *machine)
>   {
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index af99a36..3bb294a 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -803,12 +803,12 @@ static char *spapr_phb_get_loc_code(sPAPRPHBState 
> *sphb, PCIDevice *pdev)
>   return buf;
>   }
> 
> -static void spapr_phb_dma_window_enable(sPAPRPHBState *sphb,
> -   uint32_t liobn,
> -   uint32_t page_shift,
> -   uint64_t window_addr,
> -   uint64_t window_size,
> -   Error **errp)
> +void spapr_phb_dma_window_enable(sPAPRPHBState *sphb,
> + uint32_t liobn,
> + uint32_t page_shift,
> +  

Re: [Qemu-devel] [PATCH qemu v14 18/18] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)

2016-03-23 Thread Alexey Kardashevskiy

On 03/23/2016 05:11 PM, David Gibson wrote:

On Wed, Mar 23, 2016 at 02:28:01PM +1100, Alexey Kardashevskiy wrote:

On 03/23/2016 01:13 PM, David Gibson wrote:

On Mon, Mar 21, 2016 at 06:47:06PM +1100, Alexey Kardashevskiy wrote:

This adds support for Dynamic DMA Windows (DDW) option defined by
the SPAPR specification which allows to have additional DMA window(s)

This implements DDW for emulated and VFIO devices.
This reserves RTAS token numbers for DDW calls.

This changes the TCE table migration descriptor to support dynamic
tables as from now on, PHB will create as many stub TCE table objects
as PHB can possibly support but not all of them might be initialized at
the time of migration because DDW might or might not be requested by
the guest.

The "ddw" property is enabled by default on a PHB but for compatibility
the pseries-2.5 machine and older disable it.

This implements DDW for VFIO. The host kernel support is required.
This adds a "levels" property to PHB to control the number of levels
in the actual TCE table allocated by the host kernel, 0 is the default
value to tell QEMU to calculate the correct value. Current hardware
supports up to 5 levels.

The existing linux guests try creating one additional huge DMA window
with 64K or 16MB pages and map the entire guest RAM to. If succeeded,
the guest switches to dma_direct_ops and never calls TCE hypercalls
(H_PUT_TCE,...) again. This enables VFIO devices to use the entire RAM
and not waste time on map/unmap later. This adds a "dma64_win_addr"
property which is a bus address for the 64bit window and by default
set to 0x800... as this is what the modern POWER8 hardware
uses and this allows having emulated and VFIO devices on the same bus.

This adds 4 RTAS handlers:
* ibm,query-pe-dma-window
* ibm,create-pe-dma-window
* ibm,remove-pe-dma-window
* ibm,reset-pe-dma-window
These are registered from type_init() callback.

These RTAS handlers are implemented in a separate file to avoid polluting
spapr_iommu.c with PCI.

Signed-off-by: Alexey Kardashevskiy 
---
  hw/ppc/Makefile.objs|   1 +
  hw/ppc/spapr.c  |   7 +-
  hw/ppc/spapr_pci.c  |  73 ---
  hw/ppc/spapr_rtas_ddw.c | 300 
  hw/vfio/common.c|   5 -
  include/hw/pci-host/spapr.h |  13 ++
  include/hw/ppc/spapr.h  |  16 ++-
  trace-events|   4 +
  8 files changed, 395 insertions(+), 24 deletions(-)
  create mode 100644 hw/ppc/spapr_rtas_ddw.c

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index c1ffc77..986b36f 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -7,6 +7,7 @@ obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o 
spapr_rng.o
  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
  obj-y += spapr_pci_vfio.o
  endif
+obj-$(CONFIG_PSERIES) += spapr_rtas_ddw.o
  # PowerPC 4xx boards
  obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
  obj-y += ppc4xx_pci.o
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index d0bb423..ef4c637 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2362,7 +2362,12 @@ DEFINE_SPAPR_MACHINE(2_6, "2.6", true);
   * pseries-2.5
   */
  #define SPAPR_COMPAT_2_5 \
-HW_COMPAT_2_5
+HW_COMPAT_2_5 \
+{\
+.driver   = TYPE_SPAPR_PCI_HOST_BRIDGE,\
+.property = "ddw",\
+.value= stringify(off),\
+},

  static void spapr_machine_2_5_instance_options(MachineState *machine)
  {
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index af99a36..3bb294a 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -803,12 +803,12 @@ static char *spapr_phb_get_loc_code(sPAPRPHBState *sphb, 
PCIDevice *pdev)
  return buf;
  }

-static void spapr_phb_dma_window_enable(sPAPRPHBState *sphb,
-   uint32_t liobn,
-   uint32_t page_shift,
-   uint64_t window_addr,
-   uint64_t window_size,
-   Error **errp)
+void spapr_phb_dma_window_enable(sPAPRPHBState *sphb,
+ uint32_t liobn,
+ uint32_t page_shift,
+ uint64_t window_addr,
+ uint64_t window_size,
+ Error **errp)
  {
  sPAPRTCETable *tcet;
  uint32_t nb_table = window_size >> page_shift;
@@ -825,10 +825,16 @@ static void spapr_phb_dma_window_enable(sPAPRPHBState 
*sphb,
  return;
  }

+if (SPAPR_PCI_DMA_WINDOW_NUM(liobn) && !sphb->ddw_enabled) {
+error_setg(errp,
+   "Attempt to use second window when DDW is disabled on PHB");
+return;
+}


This should never happen unless something is wrong with the tests in
the RTAS functions, yes?  In which case it should probably be an
assert().


This should 

Re: [Qemu-devel] [PATCH qemu v14 18/18] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)

2016-03-22 Thread David Gibson
On Wed, Mar 23, 2016 at 02:28:01PM +1100, Alexey Kardashevskiy wrote:
> On 03/23/2016 01:13 PM, David Gibson wrote:
> >On Mon, Mar 21, 2016 at 06:47:06PM +1100, Alexey Kardashevskiy wrote:
> >>This adds support for Dynamic DMA Windows (DDW) option defined by
> >>the SPAPR specification which allows to have additional DMA window(s)
> >>
> >>This implements DDW for emulated and VFIO devices.
> >>This reserves RTAS token numbers for DDW calls.
> >>
> >>This changes the TCE table migration descriptor to support dynamic
> >>tables as from now on, PHB will create as many stub TCE table objects
> >>as PHB can possibly support but not all of them might be initialized at
> >>the time of migration because DDW might or might not be requested by
> >>the guest.
> >>
> >>The "ddw" property is enabled by default on a PHB but for compatibility
> >>the pseries-2.5 machine and older disable it.
> >>
> >>This implements DDW for VFIO. The host kernel support is required.
> >>This adds a "levels" property to PHB to control the number of levels
> >>in the actual TCE table allocated by the host kernel, 0 is the default
> >>value to tell QEMU to calculate the correct value. Current hardware
> >>supports up to 5 levels.
> >>
> >>The existing linux guests try creating one additional huge DMA window
> >>with 64K or 16MB pages and map the entire guest RAM to. If succeeded,
> >>the guest switches to dma_direct_ops and never calls TCE hypercalls
> >>(H_PUT_TCE,...) again. This enables VFIO devices to use the entire RAM
> >>and not waste time on map/unmap later. This adds a "dma64_win_addr"
> >>property which is a bus address for the 64bit window and by default
> >>set to 0x800... as this is what the modern POWER8 hardware
> >>uses and this allows having emulated and VFIO devices on the same bus.
> >>
> >>This adds 4 RTAS handlers:
> >>* ibm,query-pe-dma-window
> >>* ibm,create-pe-dma-window
> >>* ibm,remove-pe-dma-window
> >>* ibm,reset-pe-dma-window
> >>These are registered from type_init() callback.
> >>
> >>These RTAS handlers are implemented in a separate file to avoid polluting
> >>spapr_iommu.c with PCI.
> >>
> >>Signed-off-by: Alexey Kardashevskiy 
> >>---
> >>  hw/ppc/Makefile.objs|   1 +
> >>  hw/ppc/spapr.c  |   7 +-
> >>  hw/ppc/spapr_pci.c  |  73 ---
> >>  hw/ppc/spapr_rtas_ddw.c | 300 
> >> 
> >>  hw/vfio/common.c|   5 -
> >>  include/hw/pci-host/spapr.h |  13 ++
> >>  include/hw/ppc/spapr.h  |  16 ++-
> >>  trace-events|   4 +
> >>  8 files changed, 395 insertions(+), 24 deletions(-)
> >>  create mode 100644 hw/ppc/spapr_rtas_ddw.c
> >>
> >>diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> >>index c1ffc77..986b36f 100644
> >>--- a/hw/ppc/Makefile.objs
> >>+++ b/hw/ppc/Makefile.objs
> >>@@ -7,6 +7,7 @@ obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o 
> >>spapr_drc.o spapr_rng.o
> >>  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
> >>  obj-y += spapr_pci_vfio.o
> >>  endif
> >>+obj-$(CONFIG_PSERIES) += spapr_rtas_ddw.o
> >>  # PowerPC 4xx boards
> >>  obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
> >>  obj-y += ppc4xx_pci.o
> >>diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >>index d0bb423..ef4c637 100644
> >>--- a/hw/ppc/spapr.c
> >>+++ b/hw/ppc/spapr.c
> >>@@ -2362,7 +2362,12 @@ DEFINE_SPAPR_MACHINE(2_6, "2.6", true);
> >>   * pseries-2.5
> >>   */
> >>  #define SPAPR_COMPAT_2_5 \
> >>-HW_COMPAT_2_5
> >>+HW_COMPAT_2_5 \
> >>+{\
> >>+.driver   = TYPE_SPAPR_PCI_HOST_BRIDGE,\
> >>+.property = "ddw",\
> >>+.value= stringify(off),\
> >>+},
> >>
> >>  static void spapr_machine_2_5_instance_options(MachineState *machine)
> >>  {
> >>diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> >>index af99a36..3bb294a 100644
> >>--- a/hw/ppc/spapr_pci.c
> >>+++ b/hw/ppc/spapr_pci.c
> >>@@ -803,12 +803,12 @@ static char *spapr_phb_get_loc_code(sPAPRPHBState 
> >>*sphb, PCIDevice *pdev)
> >>  return buf;
> >>  }
> >>
> >>-static void spapr_phb_dma_window_enable(sPAPRPHBState *sphb,
> >>-   uint32_t liobn,
> >>-   uint32_t page_shift,
> >>-   uint64_t window_addr,
> >>-   uint64_t window_size,
> >>-   Error **errp)
> >>+void spapr_phb_dma_window_enable(sPAPRPHBState *sphb,
> >>+ uint32_t liobn,
> >>+ uint32_t page_shift,
> >>+ uint64_t window_addr,
> >>+ uint64_t window_size,
> >>+ Error **errp)
> >>  {
> >>  sPAPRTCETable *tcet;
> >>  uint32_t nb_table = window_size >> page_shift;
> >>@@ -825,10 +825,16 @@ static void spapr_phb_dma_window_enable(sPAPRPHB

Re: [Qemu-devel] [PATCH qemu v14 18/18] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)

2016-03-22 Thread Alexey Kardashevskiy

On 03/23/2016 01:13 PM, David Gibson wrote:

On Mon, Mar 21, 2016 at 06:47:06PM +1100, Alexey Kardashevskiy wrote:

This adds support for Dynamic DMA Windows (DDW) option defined by
the SPAPR specification which allows to have additional DMA window(s)

This implements DDW for emulated and VFIO devices.
This reserves RTAS token numbers for DDW calls.

This changes the TCE table migration descriptor to support dynamic
tables as from now on, PHB will create as many stub TCE table objects
as PHB can possibly support but not all of them might be initialized at
the time of migration because DDW might or might not be requested by
the guest.

The "ddw" property is enabled by default on a PHB but for compatibility
the pseries-2.5 machine and older disable it.

This implements DDW for VFIO. The host kernel support is required.
This adds a "levels" property to PHB to control the number of levels
in the actual TCE table allocated by the host kernel, 0 is the default
value to tell QEMU to calculate the correct value. Current hardware
supports up to 5 levels.

The existing linux guests try creating one additional huge DMA window
with 64K or 16MB pages and map the entire guest RAM to. If succeeded,
the guest switches to dma_direct_ops and never calls TCE hypercalls
(H_PUT_TCE,...) again. This enables VFIO devices to use the entire RAM
and not waste time on map/unmap later. This adds a "dma64_win_addr"
property which is a bus address for the 64bit window and by default
set to 0x800... as this is what the modern POWER8 hardware
uses and this allows having emulated and VFIO devices on the same bus.

This adds 4 RTAS handlers:
* ibm,query-pe-dma-window
* ibm,create-pe-dma-window
* ibm,remove-pe-dma-window
* ibm,reset-pe-dma-window
These are registered from type_init() callback.

These RTAS handlers are implemented in a separate file to avoid polluting
spapr_iommu.c with PCI.

Signed-off-by: Alexey Kardashevskiy 
---
  hw/ppc/Makefile.objs|   1 +
  hw/ppc/spapr.c  |   7 +-
  hw/ppc/spapr_pci.c  |  73 ---
  hw/ppc/spapr_rtas_ddw.c | 300 
  hw/vfio/common.c|   5 -
  include/hw/pci-host/spapr.h |  13 ++
  include/hw/ppc/spapr.h  |  16 ++-
  trace-events|   4 +
  8 files changed, 395 insertions(+), 24 deletions(-)
  create mode 100644 hw/ppc/spapr_rtas_ddw.c

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index c1ffc77..986b36f 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -7,6 +7,7 @@ obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o 
spapr_rng.o
  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
  obj-y += spapr_pci_vfio.o
  endif
+obj-$(CONFIG_PSERIES) += spapr_rtas_ddw.o
  # PowerPC 4xx boards
  obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
  obj-y += ppc4xx_pci.o
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index d0bb423..ef4c637 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2362,7 +2362,12 @@ DEFINE_SPAPR_MACHINE(2_6, "2.6", true);
   * pseries-2.5
   */
  #define SPAPR_COMPAT_2_5 \
-HW_COMPAT_2_5
+HW_COMPAT_2_5 \
+{\
+.driver   = TYPE_SPAPR_PCI_HOST_BRIDGE,\
+.property = "ddw",\
+.value= stringify(off),\
+},

  static void spapr_machine_2_5_instance_options(MachineState *machine)
  {
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index af99a36..3bb294a 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -803,12 +803,12 @@ static char *spapr_phb_get_loc_code(sPAPRPHBState *sphb, 
PCIDevice *pdev)
  return buf;
  }

-static void spapr_phb_dma_window_enable(sPAPRPHBState *sphb,
-   uint32_t liobn,
-   uint32_t page_shift,
-   uint64_t window_addr,
-   uint64_t window_size,
-   Error **errp)
+void spapr_phb_dma_window_enable(sPAPRPHBState *sphb,
+ uint32_t liobn,
+ uint32_t page_shift,
+ uint64_t window_addr,
+ uint64_t window_size,
+ Error **errp)
  {
  sPAPRTCETable *tcet;
  uint32_t nb_table = window_size >> page_shift;
@@ -825,10 +825,16 @@ static void spapr_phb_dma_window_enable(sPAPRPHBState 
*sphb,
  return;
  }

+if (SPAPR_PCI_DMA_WINDOW_NUM(liobn) && !sphb->ddw_enabled) {
+error_setg(errp,
+   "Attempt to use second window when DDW is disabled on PHB");
+return;
+}


This should never happen unless something is wrong with the tests in
the RTAS functions, yes?  In which case it should probably be an
assert().


This should not. But this is called from the RTAS caller so I'd really like 
to have a message rather than assert() if that cond

Re: [Qemu-devel] [PATCH qemu v14 18/18] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)

2016-03-22 Thread David Gibson
On Mon, Mar 21, 2016 at 06:47:06PM +1100, Alexey Kardashevskiy wrote:
> This adds support for Dynamic DMA Windows (DDW) option defined by
> the SPAPR specification which allows to have additional DMA window(s)
> 
> This implements DDW for emulated and VFIO devices.
> This reserves RTAS token numbers for DDW calls.
> 
> This changes the TCE table migration descriptor to support dynamic
> tables as from now on, PHB will create as many stub TCE table objects
> as PHB can possibly support but not all of them might be initialized at
> the time of migration because DDW might or might not be requested by
> the guest.
> 
> The "ddw" property is enabled by default on a PHB but for compatibility
> the pseries-2.5 machine and older disable it.
> 
> This implements DDW for VFIO. The host kernel support is required.
> This adds a "levels" property to PHB to control the number of levels
> in the actual TCE table allocated by the host kernel, 0 is the default
> value to tell QEMU to calculate the correct value. Current hardware
> supports up to 5 levels.
> 
> The existing linux guests try creating one additional huge DMA window
> with 64K or 16MB pages and map the entire guest RAM to. If succeeded,
> the guest switches to dma_direct_ops and never calls TCE hypercalls
> (H_PUT_TCE,...) again. This enables VFIO devices to use the entire RAM
> and not waste time on map/unmap later. This adds a "dma64_win_addr"
> property which is a bus address for the 64bit window and by default
> set to 0x800... as this is what the modern POWER8 hardware
> uses and this allows having emulated and VFIO devices on the same bus.
> 
> This adds 4 RTAS handlers:
> * ibm,query-pe-dma-window
> * ibm,create-pe-dma-window
> * ibm,remove-pe-dma-window
> * ibm,reset-pe-dma-window
> These are registered from type_init() callback.
> 
> These RTAS handlers are implemented in a separate file to avoid polluting
> spapr_iommu.c with PCI.
> 
> Signed-off-by: Alexey Kardashevskiy 
> ---
>  hw/ppc/Makefile.objs|   1 +
>  hw/ppc/spapr.c  |   7 +-
>  hw/ppc/spapr_pci.c  |  73 ---
>  hw/ppc/spapr_rtas_ddw.c | 300 
> 
>  hw/vfio/common.c|   5 -
>  include/hw/pci-host/spapr.h |  13 ++
>  include/hw/ppc/spapr.h  |  16 ++-
>  trace-events|   4 +
>  8 files changed, 395 insertions(+), 24 deletions(-)
>  create mode 100644 hw/ppc/spapr_rtas_ddw.c
> 
> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> index c1ffc77..986b36f 100644
> --- a/hw/ppc/Makefile.objs
> +++ b/hw/ppc/Makefile.objs
> @@ -7,6 +7,7 @@ obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o 
> spapr_rng.o
>  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
>  obj-y += spapr_pci_vfio.o
>  endif
> +obj-$(CONFIG_PSERIES) += spapr_rtas_ddw.o
>  # PowerPC 4xx boards
>  obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
>  obj-y += ppc4xx_pci.o
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index d0bb423..ef4c637 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2362,7 +2362,12 @@ DEFINE_SPAPR_MACHINE(2_6, "2.6", true);
>   * pseries-2.5
>   */
>  #define SPAPR_COMPAT_2_5 \
> -HW_COMPAT_2_5
> +HW_COMPAT_2_5 \
> +{\
> +.driver   = TYPE_SPAPR_PCI_HOST_BRIDGE,\
> +.property = "ddw",\
> +.value= stringify(off),\
> +},
>  
>  static void spapr_machine_2_5_instance_options(MachineState *machine)
>  {
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index af99a36..3bb294a 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -803,12 +803,12 @@ static char *spapr_phb_get_loc_code(sPAPRPHBState 
> *sphb, PCIDevice *pdev)
>  return buf;
>  }
>  
> -static void spapr_phb_dma_window_enable(sPAPRPHBState *sphb,
> -   uint32_t liobn,
> -   uint32_t page_shift,
> -   uint64_t window_addr,
> -   uint64_t window_size,
> -   Error **errp)
> +void spapr_phb_dma_window_enable(sPAPRPHBState *sphb,
> + uint32_t liobn,
> + uint32_t page_shift,
> + uint64_t window_addr,
> + uint64_t window_size,
> + Error **errp)
>  {
>  sPAPRTCETable *tcet;
>  uint32_t nb_table = window_size >> page_shift;
> @@ -825,10 +825,16 @@ static void spapr_phb_dma_window_enable(sPAPRPHBState 
> *sphb,
>  return;
>  }
>  
> +if (SPAPR_PCI_DMA_WINDOW_NUM(liobn) && !sphb->ddw_enabled) {
> +error_setg(errp,
> +   "Attempt to use second window when DDW is disabled on 
> PHB");
> +return;
> +}

This should never happen unless something is wrong with the tests in
the RTAS functions, yes?  In which c

[Qemu-devel] [PATCH qemu v14 18/18] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)

2016-03-21 Thread Alexey Kardashevskiy
This adds support for Dynamic DMA Windows (DDW) option defined by
the SPAPR specification which allows to have additional DMA window(s)

This implements DDW for emulated and VFIO devices.
This reserves RTAS token numbers for DDW calls.

This changes the TCE table migration descriptor to support dynamic
tables as from now on, PHB will create as many stub TCE table objects
as PHB can possibly support but not all of them might be initialized at
the time of migration because DDW might or might not be requested by
the guest.

The "ddw" property is enabled by default on a PHB but for compatibility
the pseries-2.5 machine and older disable it.

This implements DDW for VFIO. The host kernel support is required.
This adds a "levels" property to PHB to control the number of levels
in the actual TCE table allocated by the host kernel, 0 is the default
value to tell QEMU to calculate the correct value. Current hardware
supports up to 5 levels.

The existing linux guests try creating one additional huge DMA window
with 64K or 16MB pages and map the entire guest RAM to. If succeeded,
the guest switches to dma_direct_ops and never calls TCE hypercalls
(H_PUT_TCE,...) again. This enables VFIO devices to use the entire RAM
and not waste time on map/unmap later. This adds a "dma64_win_addr"
property which is a bus address for the 64bit window and by default
set to 0x800... as this is what the modern POWER8 hardware
uses and this allows having emulated and VFIO devices on the same bus.

This adds 4 RTAS handlers:
* ibm,query-pe-dma-window
* ibm,create-pe-dma-window
* ibm,remove-pe-dma-window
* ibm,reset-pe-dma-window
These are registered from type_init() callback.

These RTAS handlers are implemented in a separate file to avoid polluting
spapr_iommu.c with PCI.

Signed-off-by: Alexey Kardashevskiy 
---
 hw/ppc/Makefile.objs|   1 +
 hw/ppc/spapr.c  |   7 +-
 hw/ppc/spapr_pci.c  |  73 ---
 hw/ppc/spapr_rtas_ddw.c | 300 
 hw/vfio/common.c|   5 -
 include/hw/pci-host/spapr.h |  13 ++
 include/hw/ppc/spapr.h  |  16 ++-
 trace-events|   4 +
 8 files changed, 395 insertions(+), 24 deletions(-)
 create mode 100644 hw/ppc/spapr_rtas_ddw.c

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index c1ffc77..986b36f 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -7,6 +7,7 @@ obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o 
spapr_rng.o
 ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
 obj-y += spapr_pci_vfio.o
 endif
+obj-$(CONFIG_PSERIES) += spapr_rtas_ddw.o
 # PowerPC 4xx boards
 obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
 obj-y += ppc4xx_pci.o
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index d0bb423..ef4c637 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2362,7 +2362,12 @@ DEFINE_SPAPR_MACHINE(2_6, "2.6", true);
  * pseries-2.5
  */
 #define SPAPR_COMPAT_2_5 \
-HW_COMPAT_2_5
+HW_COMPAT_2_5 \
+{\
+.driver   = TYPE_SPAPR_PCI_HOST_BRIDGE,\
+.property = "ddw",\
+.value= stringify(off),\
+},
 
 static void spapr_machine_2_5_instance_options(MachineState *machine)
 {
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index af99a36..3bb294a 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -803,12 +803,12 @@ static char *spapr_phb_get_loc_code(sPAPRPHBState *sphb, 
PCIDevice *pdev)
 return buf;
 }
 
-static void spapr_phb_dma_window_enable(sPAPRPHBState *sphb,
-   uint32_t liobn,
-   uint32_t page_shift,
-   uint64_t window_addr,
-   uint64_t window_size,
-   Error **errp)
+void spapr_phb_dma_window_enable(sPAPRPHBState *sphb,
+ uint32_t liobn,
+ uint32_t page_shift,
+ uint64_t window_addr,
+ uint64_t window_size,
+ Error **errp)
 {
 sPAPRTCETable *tcet;
 uint32_t nb_table = window_size >> page_shift;
@@ -825,10 +825,16 @@ static void spapr_phb_dma_window_enable(sPAPRPHBState 
*sphb,
 return;
 }
 
+if (SPAPR_PCI_DMA_WINDOW_NUM(liobn) && !sphb->ddw_enabled) {
+error_setg(errp,
+   "Attempt to use second window when DDW is disabled on PHB");
+return;
+}
+
 spapr_tce_table_enable(tcet, page_shift, window_addr, nb_table);
 }
 
-static int spapr_phb_dma_window_disable(sPAPRPHBState *sphb, uint32_t liobn)
+int spapr_phb_dma_window_disable(sPAPRPHBState *sphb, uint32_t liobn)
 {
 sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
 
@@ -1492,14 +1498,18 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 }
 
 /* DMA setup */
-tcet =