Re: [Qemu-devel] [PATCHv3 4/7] spapr_pci: Delegate placement of PCI host bridges to machine type

2016-10-13 Thread Laurent Vivier


On 13/10/2016 01:57, David Gibson wrote:
> The 'spapr-pci-host-bridge' represents the virtual PCI host bridge (PHB)
> for a PAPR guest.  Unlike on x86, it's routine on Power (both bare metal
> and PAPR guests) to have numerous independent PHBs, each controlling a
> separate PCI domain.
> 
> There are two ways of configuring the spapr-pci-host-bridge device: first
> it can be done fully manually, specifying the locations and sizes of all
> the IO windows.  This gives the most control, but is very awkward with 6
> mandatory parameters.  Alternatively just an "index" can be specified
> which essentially selects from an array of predefined PHB locations.
> The PHB at index 0 is automatically created as the default PHB.
> 
> The current set of default locations causes some problems for guests with
> large RAM (> 1 TiB) or PCI devices with very large BARs (e.g. big nVidia
> GPGPU cards via VFIO).  Obviously, for migration we can only change the
> locations on a new machine type, however.
> 
> This is awkward, because the placement is currently decided within the
> spapr-pci-host-bridge code, so it breaks abstraction to look inside the
> machine type version.
> 
> So, this patch delegates the "default mode" PHB placement from the
> spapr-pci-host-bridge device back to the machine type via a public method
> in sPAPRMachineClass.  It's still a bit ugly, but it's about the best we
> can do.
> 
> For now, this just changes where the calculation is done.  It doesn't
> change the actual location of the host bridges, or any other behaviour.
> 
> Signed-off-by: David Gibson 

Reviewed-by: Laurent Vivier 

> ---
>  hw/ppc/spapr.c  | 31 +++
>  hw/ppc/spapr_pci.c  | 21 +++--
>  include/hw/pci-host/spapr.h | 11 +--
>  include/hw/ppc/spapr.h  |  3 +++
>  4 files changed, 42 insertions(+), 24 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 03e3803..cb9da96 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2370,6 +2370,36 @@ static HotpluggableCPUList 
> *spapr_query_hotpluggable_cpus(MachineState *machine)
>  return head;
>  }
>  
> +static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
> +uint64_t *buid, hwaddr *pio, hwaddr *mmio,
> +unsigned n_dma, uint32_t *liobns, Error 
> **errp)
> +{
> +const uint64_t base_buid = 0x8002000ULL;
> +const hwaddr phb0_base = 0x100ULL; /* 1 TiB */
> +const hwaddr phb_spacing = 0x10ULL; /* 64 GiB */
> +const hwaddr mmio_offset = 0xa000; /* 2 GiB + 512 MiB */
> +const hwaddr pio_offset = 0x8000; /* 2 GiB */
> +const uint32_t max_index = 255;
> +
> +hwaddr phb_base;
> +int i;
> +
> +if (index > max_index) {
> +error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
> +   max_index);
> +return;
> +}
> +
> +*buid = base_buid + index;
> +for (i = 0; i < n_dma; ++i) {
> +liobns[i] = SPAPR_PCI_LIOBN(index, i);
> +}
> +
> +phb_base = phb0_base + index * phb_spacing;
> +*pio = phb_base + pio_offset;
> +*mmio = phb_base + mmio_offset;
> +}
> +
>  static void spapr_machine_class_init(ObjectClass *oc, void *data)
>  {
>  MachineClass *mc = MACHINE_CLASS(oc);
> @@ -2406,6 +2436,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
> void *data)
>  mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus;
>  fwc->get_dev_path = spapr_get_fw_dev_path;
>  nc->nmi_monitor_handler = spapr_nmi;
> +smc->phb_placement = spapr_phb_placement;
>  }
>  
>  static const TypeInfo spapr_machine_info = {
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 4f00865..0e6cf4d 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -1311,7 +1311,8 @@ static void spapr_phb_realize(DeviceState *dev, Error 
> **errp)
>  sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1;
>  
>  if (sphb->index != (uint32_t)-1) {
> -hwaddr windows_base;
> +sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
> +Error *local_err = NULL;
>  
>  if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != 
> (uint32_t)-1)
>  || (sphb->dma_liobn[1] != (uint32_t)-1 && windows_supported == 2)
> @@ -1322,21 +1323,13 @@ static void spapr_phb_realize(DeviceState *dev, Error 
> **errp)
>  return;
>  }
>  
> -if (sphb->index > SPAPR_PCI_MAX_INDEX) {
> -error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
> -   SPAPR_PCI_MAX_INDEX);
> +smc->phb_placement(spapr, sphb->index, >buid,
> +   >io_win_addr, >mem_win_addr,
> +   windows_supported, sphb->dma_liobn, _err);
> +if (local_err) {
> +error_propagate(errp, local_err);
>

[Qemu-devel] [PATCHv3 4/7] spapr_pci: Delegate placement of PCI host bridges to machine type

2016-10-12 Thread David Gibson
The 'spapr-pci-host-bridge' represents the virtual PCI host bridge (PHB)
for a PAPR guest.  Unlike on x86, it's routine on Power (both bare metal
and PAPR guests) to have numerous independent PHBs, each controlling a
separate PCI domain.

There are two ways of configuring the spapr-pci-host-bridge device: first
it can be done fully manually, specifying the locations and sizes of all
the IO windows.  This gives the most control, but is very awkward with 6
mandatory parameters.  Alternatively just an "index" can be specified
which essentially selects from an array of predefined PHB locations.
The PHB at index 0 is automatically created as the default PHB.

The current set of default locations causes some problems for guests with
large RAM (> 1 TiB) or PCI devices with very large BARs (e.g. big nVidia
GPGPU cards via VFIO).  Obviously, for migration we can only change the
locations on a new machine type, however.

This is awkward, because the placement is currently decided within the
spapr-pci-host-bridge code, so it breaks abstraction to look inside the
machine type version.

So, this patch delegates the "default mode" PHB placement from the
spapr-pci-host-bridge device back to the machine type via a public method
in sPAPRMachineClass.  It's still a bit ugly, but it's about the best we
can do.

For now, this just changes where the calculation is done.  It doesn't
change the actual location of the host bridges, or any other behaviour.

Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c  | 31 +++
 hw/ppc/spapr_pci.c  | 21 +++--
 include/hw/pci-host/spapr.h | 11 +--
 include/hw/ppc/spapr.h  |  3 +++
 4 files changed, 42 insertions(+), 24 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 03e3803..cb9da96 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2370,6 +2370,36 @@ static HotpluggableCPUList 
*spapr_query_hotpluggable_cpus(MachineState *machine)
 return head;
 }
 
+static void spapr_phb_placement(sPAPRMachineState *spapr, uint32_t index,
+uint64_t *buid, hwaddr *pio, hwaddr *mmio,
+unsigned n_dma, uint32_t *liobns, Error **errp)
+{
+const uint64_t base_buid = 0x8002000ULL;
+const hwaddr phb0_base = 0x100ULL; /* 1 TiB */
+const hwaddr phb_spacing = 0x10ULL; /* 64 GiB */
+const hwaddr mmio_offset = 0xa000; /* 2 GiB + 512 MiB */
+const hwaddr pio_offset = 0x8000; /* 2 GiB */
+const uint32_t max_index = 255;
+
+hwaddr phb_base;
+int i;
+
+if (index > max_index) {
+error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
+   max_index);
+return;
+}
+
+*buid = base_buid + index;
+for (i = 0; i < n_dma; ++i) {
+liobns[i] = SPAPR_PCI_LIOBN(index, i);
+}
+
+phb_base = phb0_base + index * phb_spacing;
+*pio = phb_base + pio_offset;
+*mmio = phb_base + mmio_offset;
+}
+
 static void spapr_machine_class_init(ObjectClass *oc, void *data)
 {
 MachineClass *mc = MACHINE_CLASS(oc);
@@ -2406,6 +2436,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
void *data)
 mc->query_hotpluggable_cpus = spapr_query_hotpluggable_cpus;
 fwc->get_dev_path = spapr_get_fw_dev_path;
 nc->nmi_monitor_handler = spapr_nmi;
+smc->phb_placement = spapr_phb_placement;
 }
 
 static const TypeInfo spapr_machine_info = {
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 4f00865..0e6cf4d 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1311,7 +1311,8 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 sphb->ddw_enabled ? SPAPR_PCI_DMA_MAX_WINDOWS : 1;
 
 if (sphb->index != (uint32_t)-1) {
-hwaddr windows_base;
+sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+Error *local_err = NULL;
 
 if ((sphb->buid != (uint64_t)-1) || (sphb->dma_liobn[0] != 
(uint32_t)-1)
 || (sphb->dma_liobn[1] != (uint32_t)-1 && windows_supported == 2)
@@ -1322,21 +1323,13 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
-if (sphb->index > SPAPR_PCI_MAX_INDEX) {
-error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
-   SPAPR_PCI_MAX_INDEX);
+smc->phb_placement(spapr, sphb->index, >buid,
+   >io_win_addr, >mem_win_addr,
+   windows_supported, sphb->dma_liobn, _err);
+if (local_err) {
+error_propagate(errp, local_err);
 return;
 }
-
-sphb->buid = SPAPR_PCI_BASE_BUID + sphb->index;
-for (i = 0; i < windows_supported; ++i) {
-sphb->dma_liobn[i] = SPAPR_PCI_LIOBN(sphb->index, i);
-}
-
-windows_base = SPAPR_PCI_WINDOW_BASE
-+ sphb->index * SPAPR_PCI_WINDOW_SPACING;
-