Re: [Qemu-devel] [PATCH 2/3] pseries: Use more conventional PCI interrupt swizzling

2012-04-16 Thread Benjamin Herrenschmidt
On Sun, 2012-04-15 at 15:26 +0300, Michael S. Tsirkin wrote:
 
   Plus this scheme is basically just better - it won't force
  all the functions of a multi-function device to share an interrupt.
 
 Only if some functions use pin != INTA. Maybe it's true for
 pseries? On the pc most of them use INTA. 

Ah ? Most device I've seen with secondary functions use a different
pin... (not to be confused with PC chipset built-in devices that is, but
we aren't emulating any of these here anyway).

Cheers,
Ben.





Re: [Qemu-devel] [PATCH 2/3] pseries: Use more conventional PCI interrupt swizzling

2012-04-15 Thread Michael S. Tsirkin
On Mon, Apr 02, 2012 at 02:17:36PM +1000, David Gibson wrote:
 Currently the pseries PCI code uses a somewhat strange scheme of PCI irq
 allocation - one per slot up to a maximum that's greater than the usual 4.
 This scheme more or less worked, because we were able to tell the guest the
 irq mapping in the device tree, however it's nonstandard and may break
 assumptions in the future.  Worse, the array used to construct the dev
 tree interrupt map was mis-sized, we got away with it only because it
 happened that our SPAPR_PCI_NUM_LSI value was greater than 7.
 
 This patch changes the pseries PCI code to use the normal PCI interrupt
 swizzling scheme instead,

I don't see anything wrong with the code - I assume someone
who applies this knows about real hardware and can check that
it behaves as emulated here.

But I don't remember any standard scheme except for bridge devices,
and this isn't one, is it?  Care to clarify?

 and corrects allocation of the irq map.
 
 Cc: Michael S. Tsirkin m...@redhat.com
 
 Signed-off-by: David Gibson da...@gibson.dropbear.id.au
 ---
  hw/spapr_pci.c |   49 -
  hw/spapr_pci.h |5 ++---
  2 files changed, 30 insertions(+), 24 deletions(-)
 
 diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
 index 1cf84e7..b8a0313 100644
 --- a/hw/spapr_pci.c
 +++ b/hw/spapr_pci.c
 @@ -198,16 +198,20 @@ static void rtas_write_pci_config(sPAPREnvironment 
 *spapr,
  finish_write_pci_config(spapr, 0, addr, size, val, rets);
  }
  
 +static int pci_swizzle(int slot, int pin)
 +{
 +return (slot + pin) % PCI_NUM_PINS;
 +}
 +

Rename pci_spapr_swizzle pls. Or just open-code.

  static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num)
  {
  /*
   * Here we need to convert pci_dev + irq_num to some unique value
 - * which is less than number of IRQs on the specific bus (now it
 - * is 16).  At the moment irq_num == device_id (number of the
 - * slot?)
 - * FIXME: we should swizzle in fn and irq_num
 + * which is less than number of IRQs on the specific bus (4).  We
 + * use standard PCI swizzling, that is (slot number + pin number)
 + * % 4.
   */
 -return (pci_dev-devfn  3) % SPAPR_PCI_NUM_LSI;
 +return pci_swizzle(PCI_SLOT(pci_dev-devfn), irq_num);
  }
  
  static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
 @@ -304,13 +308,13 @@ static int spapr_phb_init(SysBusDevice *s)
 phb-busname ? phb-busname : phb-dtbusname,
 pci_spapr_set_irq, pci_spapr_map_irq, phb,
 phb-memspace, phb-iospace,
 -   PCI_DEVFN(0, 0), SPAPR_PCI_NUM_LSI);
 +   PCI_DEVFN(0, 0), PCI_NUM_PINS);
  phb-host_state.bus = bus;
  
  QLIST_INSERT_HEAD(spapr-phbs, phb, list);
  
  /* Initialize the LSI table */
 -for (i = 0; i  SPAPR_PCI_NUM_LSI; i++) {
 +for (i = 0; i  PCI_NUM_PINS; i++) {
  qemu_irq qirq;
  uint32_t num;
  
 @@ -392,8 +396,7 @@ int spapr_populate_pci_devices(sPAPRPHBState *phb,
 uint32_t xics_phandle,
 void *fdt)
  {
 -PCIBus *bus = phb-host_state.bus;
 -int bus_off, i;
 +int bus_off, i, j;
  char nodename[256];
  uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
  struct {
 @@ -415,8 +418,8 @@ int spapr_populate_pci_devices(sPAPRPHBState *phb,
  };
  uint64_t bus_reg[] = { cpu_to_be64(phb-buid), 0 };
  uint32_t interrupt_map_mask[] = {
 -cpu_to_be32(b_d(-1)|b_fff(0)), 0x0, 0x0, 0x0};
 -uint32_t interrupt_map[bus-nirq][7];
 +cpu_to_be32(b_d(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
 +uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
  
  /* Start populating the FDT */
  sprintf(nodename, pci@% PRIx64, phb-buid);
 @@ -450,19 +453,23 @@ int spapr_populate_pci_devices(sPAPRPHBState *phb,
   */
  _FDT(fdt_setprop(fdt, bus_off, interrupt-map-mask,
   interrupt_map_mask, sizeof(interrupt_map_mask)));
 -for (i = 0; i  7; i++) {
 -uint32_t *irqmap = interrupt_map[i];
 -irqmap[0] = cpu_to_be32(b_d(i)|b_fff(0));
 -irqmap[1] = 0;
 -irqmap[2] = 0;
 -irqmap[3] = 0;
 -irqmap[4] = cpu_to_be32(xics_phandle);
 -irqmap[5] = cpu_to_be32(phb-lsi_table[i % 
 SPAPR_PCI_NUM_LSI].dt_irq);
 -irqmap[6] = cpu_to_be32(0x8);
 +for (i = 0; i  PCI_SLOT_MAX; i++) {
 +for (j = 0; j  PCI_NUM_PINS; j++) {
 +uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j];
 +int lsi_num = pci_swizzle(i, j);
 +
 +irqmap[0] = cpu_to_be32(b_d(i)|b_fff(0));
 +irqmap[1] = 0;
 +irqmap[2] = 0;
 +irqmap[3] = cpu_to_be32(j+1);
 +irqmap[4] = cpu_to_be32(xics_phandle);
 +irqmap[5] = 

Re: [Qemu-devel] [PATCH 2/3] pseries: Use more conventional PCI interrupt swizzling

2012-04-15 Thread David Gibson
On Sun, Apr 15, 2012 at 01:03:57PM +0300, Michael S. Tsirkin wrote:
 On Mon, Apr 02, 2012 at 02:17:36PM +1000, David Gibson wrote:
  Currently the pseries PCI code uses a somewhat strange scheme of PCI irq
  allocation - one per slot up to a maximum that's greater than the usual 4.
  This scheme more or less worked, because we were able to tell the guest the
  irq mapping in the device tree, however it's nonstandard and may break
  assumptions in the future.  Worse, the array used to construct the dev
  tree interrupt map was mis-sized, we got away with it only because it
  happened that our SPAPR_PCI_NUM_LSI value was greater than 7.
  
  This patch changes the pseries PCI code to use the normal PCI interrupt
  swizzling scheme instead,
 
 I don't see anything wrong with the code - I assume someone
 who applies this knows about real hardware and can check that
 it behaves as emulated here.

Because the device tree lets us advertise to the guest precisely how
the interrupts are mapped, it doesn't really matter whether we behave
identically to real hardware (the PAPR platform we're emulating here
is already a para-virtualized environment running under a hypervisor,
so real isn't a particularly clear concept).  I'm not sure if all
pseries machines (and/or hypervisor versions) are the same in this
regard, but the new scheme is certainly in use.  As long as the device
tree has the correct information, really any interrupt mapping scheme
is compliant with PAPR.

 But I don't remember any standard scheme except for bridge devices,
 and this isn't one, is it?  Care to clarify?

Well, the standard swizzling scheme is for p2p bridges, whereas this
is a host bridge, but there's no reason not to use the same scheme for
both.  This will become more important when we implement pass-through.
On pSeries the unit of passthrough can be either a host bridge or a
p2p bridge, and having the same swizzling scheme will make life
easier.  Plus this scheme is basically just better - it won't force
all the functions of a multi-function device to share an interrupt.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson



Re: [Qemu-devel] [PATCH 2/3] pseries: Use more conventional PCI interrupt swizzling

2012-04-15 Thread Michael S. Tsirkin
On Sun, Apr 15, 2012 at 09:47:47PM +1000, David Gibson wrote:
 On Sun, Apr 15, 2012 at 01:03:57PM +0300, Michael S. Tsirkin wrote:
  On Mon, Apr 02, 2012 at 02:17:36PM +1000, David Gibson wrote:
   Currently the pseries PCI code uses a somewhat strange scheme of PCI irq
   allocation - one per slot up to a maximum that's greater than the usual 4.
   This scheme more or less worked, because we were able to tell the guest 
   the
   irq mapping in the device tree, however it's nonstandard and may break
   assumptions in the future.  Worse, the array used to construct the dev
   tree interrupt map was mis-sized, we got away with it only because it
   happened that our SPAPR_PCI_NUM_LSI value was greater than 7.
   
   This patch changes the pseries PCI code to use the normal PCI interrupt
   swizzling scheme instead,
  
  I don't see anything wrong with the code - I assume someone
  who applies this knows about real hardware and can check that
  it behaves as emulated here.
 
 Because the device tree lets us advertise to the guest precisely how
 the interrupts are mapped, it doesn't really matter whether we behave
 identically to real hardware (the PAPR platform we're emulating here
 is already a para-virtualized environment running under a hypervisor,
 so real isn't a particularly clear concept).  I'm not sure if all
 pseries machines (and/or hypervisor versions) are the same in this
 regard, but the new scheme is certainly in use.  As long as the device
 tree has the correct information, really any interrupt mapping scheme
 is compliant with PAPR.

So no need to check that then :)

  But I don't remember any standard scheme except for bridge devices,
  and this isn't one, is it?  Care to clarify?
 
 Well, the standard swizzling scheme is for p2p bridges, whereas this
 is a host bridge, but there's no reason not to use the same scheme for
 both.  This will become more important when we implement pass-through.
 On pSeries the unit of passthrough can be either a host bridge or a
 p2p bridge, and having the same swizzling scheme will make life
 easier.

So the comment 'use standard PCI swizzling' misleads in implying the
motivation is the standard. Better remove it, or replace with real motivation.

  Plus this scheme is basically just better - it won't force
 all the functions of a multi-function device to share an interrupt.

Only if some functions use pin != INTA. Maybe it's true for
pseries? On the pc most of them use INTA.

 -- 
 David Gibson  | I'll have my music baroque, and my code
 david AT gibson.dropbear.id.au| minimalist, thank you.  NOT _the_ 
 _other_
   | _way_ _around_!
 http://www.ozlabs.org/~dgibson



[Qemu-devel] [PATCH 2/3] pseries: Use more conventional PCI interrupt swizzling

2012-04-01 Thread David Gibson
Currently the pseries PCI code uses a somewhat strange scheme of PCI irq
allocation - one per slot up to a maximum that's greater than the usual 4.
This scheme more or less worked, because we were able to tell the guest the
irq mapping in the device tree, however it's nonstandard and may break
assumptions in the future.  Worse, the array used to construct the dev
tree interrupt map was mis-sized, we got away with it only because it
happened that our SPAPR_PCI_NUM_LSI value was greater than 7.

This patch changes the pseries PCI code to use the normal PCI interrupt
swizzling scheme instead, and corrects allocation of the irq map.

Cc: Michael S. Tsirkin m...@redhat.com

Signed-off-by: David Gibson da...@gibson.dropbear.id.au
---
 hw/spapr_pci.c |   49 -
 hw/spapr_pci.h |5 ++---
 2 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
index 1cf84e7..b8a0313 100644
--- a/hw/spapr_pci.c
+++ b/hw/spapr_pci.c
@@ -198,16 +198,20 @@ static void rtas_write_pci_config(sPAPREnvironment *spapr,
 finish_write_pci_config(spapr, 0, addr, size, val, rets);
 }
 
+static int pci_swizzle(int slot, int pin)
+{
+return (slot + pin) % PCI_NUM_PINS;
+}
+
 static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num)
 {
 /*
  * Here we need to convert pci_dev + irq_num to some unique value
- * which is less than number of IRQs on the specific bus (now it
- * is 16).  At the moment irq_num == device_id (number of the
- * slot?)
- * FIXME: we should swizzle in fn and irq_num
+ * which is less than number of IRQs on the specific bus (4).  We
+ * use standard PCI swizzling, that is (slot number + pin number)
+ * % 4.
  */
-return (pci_dev-devfn  3) % SPAPR_PCI_NUM_LSI;
+return pci_swizzle(PCI_SLOT(pci_dev-devfn), irq_num);
 }
 
 static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
@@ -304,13 +308,13 @@ static int spapr_phb_init(SysBusDevice *s)
phb-busname ? phb-busname : phb-dtbusname,
pci_spapr_set_irq, pci_spapr_map_irq, phb,
phb-memspace, phb-iospace,
-   PCI_DEVFN(0, 0), SPAPR_PCI_NUM_LSI);
+   PCI_DEVFN(0, 0), PCI_NUM_PINS);
 phb-host_state.bus = bus;
 
 QLIST_INSERT_HEAD(spapr-phbs, phb, list);
 
 /* Initialize the LSI table */
-for (i = 0; i  SPAPR_PCI_NUM_LSI; i++) {
+for (i = 0; i  PCI_NUM_PINS; i++) {
 qemu_irq qirq;
 uint32_t num;
 
@@ -392,8 +396,7 @@ int spapr_populate_pci_devices(sPAPRPHBState *phb,
uint32_t xics_phandle,
void *fdt)
 {
-PCIBus *bus = phb-host_state.bus;
-int bus_off, i;
+int bus_off, i, j;
 char nodename[256];
 uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
 struct {
@@ -415,8 +418,8 @@ int spapr_populate_pci_devices(sPAPRPHBState *phb,
 };
 uint64_t bus_reg[] = { cpu_to_be64(phb-buid), 0 };
 uint32_t interrupt_map_mask[] = {
-cpu_to_be32(b_d(-1)|b_fff(0)), 0x0, 0x0, 0x0};
-uint32_t interrupt_map[bus-nirq][7];
+cpu_to_be32(b_d(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
+uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
 
 /* Start populating the FDT */
 sprintf(nodename, pci@% PRIx64, phb-buid);
@@ -450,19 +453,23 @@ int spapr_populate_pci_devices(sPAPRPHBState *phb,
  */
 _FDT(fdt_setprop(fdt, bus_off, interrupt-map-mask,
  interrupt_map_mask, sizeof(interrupt_map_mask)));
-for (i = 0; i  7; i++) {
-uint32_t *irqmap = interrupt_map[i];
-irqmap[0] = cpu_to_be32(b_d(i)|b_fff(0));
-irqmap[1] = 0;
-irqmap[2] = 0;
-irqmap[3] = 0;
-irqmap[4] = cpu_to_be32(xics_phandle);
-irqmap[5] = cpu_to_be32(phb-lsi_table[i % SPAPR_PCI_NUM_LSI].dt_irq);
-irqmap[6] = cpu_to_be32(0x8);
+for (i = 0; i  PCI_SLOT_MAX; i++) {
+for (j = 0; j  PCI_NUM_PINS; j++) {
+uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j];
+int lsi_num = pci_swizzle(i, j);
+
+irqmap[0] = cpu_to_be32(b_d(i)|b_fff(0));
+irqmap[1] = 0;
+irqmap[2] = 0;
+irqmap[3] = cpu_to_be32(j+1);
+irqmap[4] = cpu_to_be32(xics_phandle);
+irqmap[5] = cpu_to_be32(phb-lsi_table[lsi_num].dt_irq);
+irqmap[6] = cpu_to_be32(0x8);
+}
 }
 /* Write interrupt map */
 _FDT(fdt_setprop(fdt, bus_off, interrupt-map, interrupt_map,
- 7 * sizeof(interrupt_map[0])));
+ sizeof(interrupt_map)));
 
 return 0;
 }
diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h
index 039f85b..f54c2e8 100644
--- a/hw/spapr_pci.h
+++ b/hw/spapr_pci.h
@@ -23,11 +23,10 @@
 #if !defined(__HW_SPAPR_PCI_H__)
 #define __HW_SPAPR_PCI_H__
 
+#include