Re: [Qemu-devel] [PATCHv3 6/7] spapr_pci: Add a 64-bit MMIO window

2016-10-13 Thread Laurent Vivier


On 13/10/2016 01:57, David Gibson wrote:
> On real hardware, and under pHyp, the PCI host bridges on Power machines
> typically advertise two outbound MMIO windows from the guest's physical
> memory space to PCI memory space:
>   - A 32-bit window which maps onto 2GiB..4GiB in the PCI address space
>   - A 64-bit window which maps onto a large region somewhere high in PCI
> address space (traditionally this used an identity mapping from guest
> physical address to PCI address, but that's not always the case)
> 
> The qemu implementation in spapr-pci-host-bridge, however, only supports a
> single outbound MMIO window, however.  At least some Linux versions expect
> the two windows however, so we arranged this window to map onto the PCI
> memory space from 2 GiB..~64 GiB, then advertised it as two contiguous
> windows, the "32-bit" window from 2G..4G and the "64-bit" window from
> 4G..~64G.
> 
> This approach means, however, that the 64G window is not naturally aligned.
> In turn this limits the size of the largest BAR we can map (which does have
> to be naturally aligned) to roughly half of the total window.  With some
> large nVidia GPGPU cards which have huge memory BARs, this is starting to
> be a problem.
> 
> This patch adds true support for separate 32-bit and 64-bit outbound MMIO
> windows to the spapr-pci-host-bridge implementation, each of which can
> be independently configured.  The 32-bit window always maps to 2G.. in PCI
> space, but the PCI address of the 64-bit window can be configured (it
> defaults to the same as the guest physical address).
> 
> So as not to break possible existing configurations, as long as a 64-bit
> window is not specified, a large single window can be specified.  This
> will appear the same way to the guest as the old approach, although it's
> now implemented by two contiguous memory regions rather than a single one.
> 
> For now, this only adds the possibility of 64-bit windows.  The default
> configuration still uses the legacy mode.
> 
> Signed-off-by: David Gibson 

Reviewed-by: Laurent Vivier 

> ---
>  hw/ppc/spapr.c  | 10 +--
>  hw/ppc/spapr_pci.c  | 70 
> -
>  include/hw/pci-host/spapr.h |  8 --
>  include/hw/ppc/spapr.h  |  3 +-
>  4 files changed, 72 insertions(+), 19 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index e6b110d..8db3657 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2371,7 +2371,8 @@ static HotpluggableCPUList 
> *spapr_query_hotpluggable_cpus(MachineState *machine)
>  }
>  
>  static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
> -uint64_t *buid, hwaddr *pio, hwaddr *mmio,
> +uint64_t *buid, hwaddr *pio,
> +hwaddr *mmio32, hwaddr *mmio64,
>  unsigned n_dma, uint32_t *liobns, Error 
> **errp)
>  {
>  const uint64_t base_buid = 0x8002000ULL;
> @@ -2409,7 +2410,12 @@ static void spapr_phb_placement(sPAPRMachineState 
> *spapr, uint32_t index,
>  
>  phb_base = phb0_base + index * phb_spacing;
>  *pio = phb_base + pio_offset;
> -*mmio = phb_base + mmio_offset;
> +*mmio32 = phb_base + mmio_offset;
> +/*
> + * We don't set the 64-bit MMIO window, relying on the PHB's
> + * fallback behaviour of automatically splitting a large "32-bit"
> + * window into contiguous 32-bit and 64-bit windows
> + */
>  }
>  
>  static void spapr_machine_class_init(ObjectClass *oc, void *data)
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 0e6cf4d..31ca6fa 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -1317,14 +1317,16 @@ static void spapr_phb_realize(DeviceState *dev, Error 
> **errp)
>  if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != 
> (uint32_t)-1)
>  || (sphb->dma_liobn[1] != (uint32_t)-1 && windows_supported == 2)
>  || (sphb->mem_win_addr != (hwaddr)-1)
> +|| (sphb->mem64_win_addr != (hwaddr)-1)
>  || (sphb->io_win_addr != (hwaddr)-1)) {
>  error_setg(errp, "Either \"index\" or other parameters must"
> " be specified for PAPR PHB, not both");
>  return;
>  }
>  
> -smc->phb_placement(spapr, sphb->index, &sphb->buid,
> -   &sphb->io_win_addr, &sphb->mem_win_addr,
> +smc->phb_placement(spapr, sphb->index,
> +   &sphb->buid, &sphb->io_win_addr,
> +   &sphb->mem_win_addr, &sphb->mem64_win_addr,
> windows_supported, sphb->dma_liobn, &local_err);
>  if (local_err) {
>  error_propagate(errp, local_err);
> @@ -1353,6 +1355,38 @@ static void spapr_phb_realize(DeviceState *dev, Error 
> **errp)
>  return;
>  }
>  
> +if (sphb->mem64_win_size != 0) {
> +  

[Qemu-devel] [PATCHv3 6/7] spapr_pci: Add a 64-bit MMIO window

2016-10-12 Thread David Gibson
On real hardware, and under pHyp, the PCI host bridges on Power machines
typically advertise two outbound MMIO windows from the guest's physical
memory space to PCI memory space:
  - A 32-bit window which maps onto 2GiB..4GiB in the PCI address space
  - A 64-bit window which maps onto a large region somewhere high in PCI
address space (traditionally this used an identity mapping from guest
physical address to PCI address, but that's not always the case)

The qemu implementation in spapr-pci-host-bridge, however, only supports a
single outbound MMIO window, however.  At least some Linux versions expect
the two windows however, so we arranged this window to map onto the PCI
memory space from 2 GiB..~64 GiB, then advertised it as two contiguous
windows, the "32-bit" window from 2G..4G and the "64-bit" window from
4G..~64G.

This approach means, however, that the 64G window is not naturally aligned.
In turn this limits the size of the largest BAR we can map (which does have
to be naturally aligned) to roughly half of the total window.  With some
large nVidia GPGPU cards which have huge memory BARs, this is starting to
be a problem.

This patch adds true support for separate 32-bit and 64-bit outbound MMIO
windows to the spapr-pci-host-bridge implementation, each of which can
be independently configured.  The 32-bit window always maps to 2G.. in PCI
space, but the PCI address of the 64-bit window can be configured (it
defaults to the same as the guest physical address).

So as not to break possible existing configurations, as long as a 64-bit
window is not specified, a large single window can be specified.  This
will appear the same way to the guest as the old approach, although it's
now implemented by two contiguous memory regions rather than a single one.

For now, this only adds the possibility of 64-bit windows.  The default
configuration still uses the legacy mode.

Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c  | 10 +--
 hw/ppc/spapr_pci.c  | 70 -
 include/hw/pci-host/spapr.h |  8 --
 include/hw/ppc/spapr.h  |  3 +-
 4 files changed, 72 insertions(+), 19 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index e6b110d..8db3657 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2371,7 +2371,8 @@ static HotpluggableCPUList 
*spapr_query_hotpluggable_cpus(MachineState *machine)
 }
 
 static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
-uint64_t *buid, hwaddr *pio, hwaddr *mmio,
+uint64_t *buid, hwaddr *pio,
+hwaddr *mmio32, hwaddr *mmio64,
 unsigned n_dma, uint32_t *liobns, Error **errp)
 {
 const uint64_t base_buid = 0x8002000ULL;
@@ -2409,7 +2410,12 @@ static void spapr_phb_placement(sPAPRMachineState 
*spapr, uint32_t index,
 
 phb_base = phb0_base + index * phb_spacing;
 *pio = phb_base + pio_offset;
-*mmio = phb_base + mmio_offset;
+*mmio32 = phb_base + mmio_offset;
+/*
+ * We don't set the 64-bit MMIO window, relying on the PHB's
+ * fallback behaviour of automatically splitting a large "32-bit"
+ * window into contiguous 32-bit and 64-bit windows
+ */
 }
 
 static void spapr_machine_class_init(ObjectClass *oc, void *data)
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 0e6cf4d..31ca6fa 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1317,14 +1317,16 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != 
(uint32_t)-1)
 || (sphb->dma_liobn[1] != (uint32_t)-1 && windows_supported == 2)
 || (sphb->mem_win_addr != (hwaddr)-1)
+|| (sphb->mem64_win_addr != (hwaddr)-1)
 || (sphb->io_win_addr != (hwaddr)-1)) {
 error_setg(errp, "Either \"index\" or other parameters must"
" be specified for PAPR PHB, not both");
 return;
 }
 
-smc->phb_placement(spapr, sphb->index, &sphb->buid,
-   &sphb->io_win_addr, &sphb->mem_win_addr,
+smc->phb_placement(spapr, sphb->index,
+   &sphb->buid, &sphb->io_win_addr,
+   &sphb->mem_win_addr, &sphb->mem64_win_addr,
windows_supported, sphb->dma_liobn, &local_err);
 if (local_err) {
 error_propagate(errp, local_err);
@@ -1353,6 +1355,38 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
+if (sphb->mem64_win_size != 0) {
+if (sphb->mem64_win_addr == (hwaddr)-1) {
+error_setg(errp,
+   "64-bit memory window address not specified for PHB");
+return;
+}
+
+if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) {
+error_setg(errp, "32-bit mem