Re: [Qemu-devel] [PATCH 2/3] pseries: Use more conventional PCI interrupt swizzling
On Sun, 2012-04-15 at 15:26 +0300, Michael S. Tsirkin wrote: Plus this scheme is basically just better - it won't force all the functions of a multi-function device to share an interrupt. Only if some functions use pin != INTA. Maybe it's true for pseries? On the pc most of them use INTA. Ah ? Most device I've seen with secondary functions use a different pin... (not to be confused with PC chipset built-in devices that is, but we aren't emulating any of these here anyway). Cheers, Ben.
Re: [Qemu-devel] [PATCH 2/3] pseries: Use more conventional PCI interrupt swizzling
On Mon, Apr 02, 2012 at 02:17:36PM +1000, David Gibson wrote: Currently the pseries PCI code uses a somewhat strange scheme of PCI irq allocation - one per slot up to a maximum that's greater than the usual 4. This scheme more or less worked, because we were able to tell the guest the irq mapping in the device tree, however it's nonstandard and may break assumptions in the future. Worse, the array used to construct the dev tree interrupt map was mis-sized, we got away with it only because it happened that our SPAPR_PCI_NUM_LSI value was greater than 7. This patch changes the pseries PCI code to use the normal PCI interrupt swizzling scheme instead, I don't see anything wrong with the code - I assume someone who applies this knows about real hardware and can check that it behaves as emulated here. But I don't remember any standard scheme except for bridge devices, and this isn't one, is it? Care to clarify? and corrects allocation of the irq map. Cc: Michael S. Tsirkin m...@redhat.com Signed-off-by: David Gibson da...@gibson.dropbear.id.au --- hw/spapr_pci.c | 49 - hw/spapr_pci.h |5 ++--- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c index 1cf84e7..b8a0313 100644 --- a/hw/spapr_pci.c +++ b/hw/spapr_pci.c @@ -198,16 +198,20 @@ static void rtas_write_pci_config(sPAPREnvironment *spapr, finish_write_pci_config(spapr, 0, addr, size, val, rets); } +static int pci_swizzle(int slot, int pin) +{ +return (slot + pin) % PCI_NUM_PINS; +} + Rename pci_spapr_swizzle pls. Or just open-code. static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num) { /* * Here we need to convert pci_dev + irq_num to some unique value - * which is less than number of IRQs on the specific bus (now it - * is 16). At the moment irq_num == device_id (number of the - * slot?) - * FIXME: we should swizzle in fn and irq_num + * which is less than number of IRQs on the specific bus (4). We + * use standard PCI swizzling, that is (slot number + pin number) + * % 4. */ -return (pci_dev-devfn 3) % SPAPR_PCI_NUM_LSI; +return pci_swizzle(PCI_SLOT(pci_dev-devfn), irq_num); } static void pci_spapr_set_irq(void *opaque, int irq_num, int level) @@ -304,13 +308,13 @@ static int spapr_phb_init(SysBusDevice *s) phb-busname ? phb-busname : phb-dtbusname, pci_spapr_set_irq, pci_spapr_map_irq, phb, phb-memspace, phb-iospace, - PCI_DEVFN(0, 0), SPAPR_PCI_NUM_LSI); + PCI_DEVFN(0, 0), PCI_NUM_PINS); phb-host_state.bus = bus; QLIST_INSERT_HEAD(spapr-phbs, phb, list); /* Initialize the LSI table */ -for (i = 0; i SPAPR_PCI_NUM_LSI; i++) { +for (i = 0; i PCI_NUM_PINS; i++) { qemu_irq qirq; uint32_t num; @@ -392,8 +396,7 @@ int spapr_populate_pci_devices(sPAPRPHBState *phb, uint32_t xics_phandle, void *fdt) { -PCIBus *bus = phb-host_state.bus; -int bus_off, i; +int bus_off, i, j; char nodename[256]; uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) }; struct { @@ -415,8 +418,8 @@ int spapr_populate_pci_devices(sPAPRPHBState *phb, }; uint64_t bus_reg[] = { cpu_to_be64(phb-buid), 0 }; uint32_t interrupt_map_mask[] = { -cpu_to_be32(b_d(-1)|b_fff(0)), 0x0, 0x0, 0x0}; -uint32_t interrupt_map[bus-nirq][7]; +cpu_to_be32(b_d(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)}; +uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7]; /* Start populating the FDT */ sprintf(nodename, pci@% PRIx64, phb-buid); @@ -450,19 +453,23 @@ int spapr_populate_pci_devices(sPAPRPHBState *phb, */ _FDT(fdt_setprop(fdt, bus_off, interrupt-map-mask, interrupt_map_mask, sizeof(interrupt_map_mask))); -for (i = 0; i 7; i++) { -uint32_t *irqmap = interrupt_map[i]; -irqmap[0] = cpu_to_be32(b_d(i)|b_fff(0)); -irqmap[1] = 0; -irqmap[2] = 0; -irqmap[3] = 0; -irqmap[4] = cpu_to_be32(xics_phandle); -irqmap[5] = cpu_to_be32(phb-lsi_table[i % SPAPR_PCI_NUM_LSI].dt_irq); -irqmap[6] = cpu_to_be32(0x8); +for (i = 0; i PCI_SLOT_MAX; i++) { +for (j = 0; j PCI_NUM_PINS; j++) { +uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j]; +int lsi_num = pci_swizzle(i, j); + +irqmap[0] = cpu_to_be32(b_d(i)|b_fff(0)); +irqmap[1] = 0; +irqmap[2] = 0; +irqmap[3] = cpu_to_be32(j+1); +irqmap[4] = cpu_to_be32(xics_phandle); +irqmap[5] =
Re: [Qemu-devel] [PATCH 2/3] pseries: Use more conventional PCI interrupt swizzling
On Sun, Apr 15, 2012 at 01:03:57PM +0300, Michael S. Tsirkin wrote: On Mon, Apr 02, 2012 at 02:17:36PM +1000, David Gibson wrote: Currently the pseries PCI code uses a somewhat strange scheme of PCI irq allocation - one per slot up to a maximum that's greater than the usual 4. This scheme more or less worked, because we were able to tell the guest the irq mapping in the device tree, however it's nonstandard and may break assumptions in the future. Worse, the array used to construct the dev tree interrupt map was mis-sized, we got away with it only because it happened that our SPAPR_PCI_NUM_LSI value was greater than 7. This patch changes the pseries PCI code to use the normal PCI interrupt swizzling scheme instead, I don't see anything wrong with the code - I assume someone who applies this knows about real hardware and can check that it behaves as emulated here. Because the device tree lets us advertise to the guest precisely how the interrupts are mapped, it doesn't really matter whether we behave identically to real hardware (the PAPR platform we're emulating here is already a para-virtualized environment running under a hypervisor, so real isn't a particularly clear concept). I'm not sure if all pseries machines (and/or hypervisor versions) are the same in this regard, but the new scheme is certainly in use. As long as the device tree has the correct information, really any interrupt mapping scheme is compliant with PAPR. But I don't remember any standard scheme except for bridge devices, and this isn't one, is it? Care to clarify? Well, the standard swizzling scheme is for p2p bridges, whereas this is a host bridge, but there's no reason not to use the same scheme for both. This will become more important when we implement pass-through. On pSeries the unit of passthrough can be either a host bridge or a p2p bridge, and having the same swizzling scheme will make life easier. Plus this scheme is basically just better - it won't force all the functions of a multi-function device to share an interrupt. -- David Gibson| I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
Re: [Qemu-devel] [PATCH 2/3] pseries: Use more conventional PCI interrupt swizzling
On Sun, Apr 15, 2012 at 09:47:47PM +1000, David Gibson wrote: On Sun, Apr 15, 2012 at 01:03:57PM +0300, Michael S. Tsirkin wrote: On Mon, Apr 02, 2012 at 02:17:36PM +1000, David Gibson wrote: Currently the pseries PCI code uses a somewhat strange scheme of PCI irq allocation - one per slot up to a maximum that's greater than the usual 4. This scheme more or less worked, because we were able to tell the guest the irq mapping in the device tree, however it's nonstandard and may break assumptions in the future. Worse, the array used to construct the dev tree interrupt map was mis-sized, we got away with it only because it happened that our SPAPR_PCI_NUM_LSI value was greater than 7. This patch changes the pseries PCI code to use the normal PCI interrupt swizzling scheme instead, I don't see anything wrong with the code - I assume someone who applies this knows about real hardware and can check that it behaves as emulated here. Because the device tree lets us advertise to the guest precisely how the interrupts are mapped, it doesn't really matter whether we behave identically to real hardware (the PAPR platform we're emulating here is already a para-virtualized environment running under a hypervisor, so real isn't a particularly clear concept). I'm not sure if all pseries machines (and/or hypervisor versions) are the same in this regard, but the new scheme is certainly in use. As long as the device tree has the correct information, really any interrupt mapping scheme is compliant with PAPR. So no need to check that then :) But I don't remember any standard scheme except for bridge devices, and this isn't one, is it? Care to clarify? Well, the standard swizzling scheme is for p2p bridges, whereas this is a host bridge, but there's no reason not to use the same scheme for both. This will become more important when we implement pass-through. On pSeries the unit of passthrough can be either a host bridge or a p2p bridge, and having the same swizzling scheme will make life easier. So the comment 'use standard PCI swizzling' misleads in implying the motivation is the standard. Better remove it, or replace with real motivation. Plus this scheme is basically just better - it won't force all the functions of a multi-function device to share an interrupt. Only if some functions use pin != INTA. Maybe it's true for pseries? On the pc most of them use INTA. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au| minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
[Qemu-devel] [PATCH 2/3] pseries: Use more conventional PCI interrupt swizzling
Currently the pseries PCI code uses a somewhat strange scheme of PCI irq allocation - one per slot up to a maximum that's greater than the usual 4. This scheme more or less worked, because we were able to tell the guest the irq mapping in the device tree, however it's nonstandard and may break assumptions in the future. Worse, the array used to construct the dev tree interrupt map was mis-sized, we got away with it only because it happened that our SPAPR_PCI_NUM_LSI value was greater than 7. This patch changes the pseries PCI code to use the normal PCI interrupt swizzling scheme instead, and corrects allocation of the irq map. Cc: Michael S. Tsirkin m...@redhat.com Signed-off-by: David Gibson da...@gibson.dropbear.id.au --- hw/spapr_pci.c | 49 - hw/spapr_pci.h |5 ++--- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c index 1cf84e7..b8a0313 100644 --- a/hw/spapr_pci.c +++ b/hw/spapr_pci.c @@ -198,16 +198,20 @@ static void rtas_write_pci_config(sPAPREnvironment *spapr, finish_write_pci_config(spapr, 0, addr, size, val, rets); } +static int pci_swizzle(int slot, int pin) +{ +return (slot + pin) % PCI_NUM_PINS; +} + static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num) { /* * Here we need to convert pci_dev + irq_num to some unique value - * which is less than number of IRQs on the specific bus (now it - * is 16). At the moment irq_num == device_id (number of the - * slot?) - * FIXME: we should swizzle in fn and irq_num + * which is less than number of IRQs on the specific bus (4). We + * use standard PCI swizzling, that is (slot number + pin number) + * % 4. */ -return (pci_dev-devfn 3) % SPAPR_PCI_NUM_LSI; +return pci_swizzle(PCI_SLOT(pci_dev-devfn), irq_num); } static void pci_spapr_set_irq(void *opaque, int irq_num, int level) @@ -304,13 +308,13 @@ static int spapr_phb_init(SysBusDevice *s) phb-busname ? phb-busname : phb-dtbusname, pci_spapr_set_irq, pci_spapr_map_irq, phb, phb-memspace, phb-iospace, - PCI_DEVFN(0, 0), SPAPR_PCI_NUM_LSI); + PCI_DEVFN(0, 0), PCI_NUM_PINS); phb-host_state.bus = bus; QLIST_INSERT_HEAD(spapr-phbs, phb, list); /* Initialize the LSI table */ -for (i = 0; i SPAPR_PCI_NUM_LSI; i++) { +for (i = 0; i PCI_NUM_PINS; i++) { qemu_irq qirq; uint32_t num; @@ -392,8 +396,7 @@ int spapr_populate_pci_devices(sPAPRPHBState *phb, uint32_t xics_phandle, void *fdt) { -PCIBus *bus = phb-host_state.bus; -int bus_off, i; +int bus_off, i, j; char nodename[256]; uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) }; struct { @@ -415,8 +418,8 @@ int spapr_populate_pci_devices(sPAPRPHBState *phb, }; uint64_t bus_reg[] = { cpu_to_be64(phb-buid), 0 }; uint32_t interrupt_map_mask[] = { -cpu_to_be32(b_d(-1)|b_fff(0)), 0x0, 0x0, 0x0}; -uint32_t interrupt_map[bus-nirq][7]; +cpu_to_be32(b_d(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)}; +uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7]; /* Start populating the FDT */ sprintf(nodename, pci@% PRIx64, phb-buid); @@ -450,19 +453,23 @@ int spapr_populate_pci_devices(sPAPRPHBState *phb, */ _FDT(fdt_setprop(fdt, bus_off, interrupt-map-mask, interrupt_map_mask, sizeof(interrupt_map_mask))); -for (i = 0; i 7; i++) { -uint32_t *irqmap = interrupt_map[i]; -irqmap[0] = cpu_to_be32(b_d(i)|b_fff(0)); -irqmap[1] = 0; -irqmap[2] = 0; -irqmap[3] = 0; -irqmap[4] = cpu_to_be32(xics_phandle); -irqmap[5] = cpu_to_be32(phb-lsi_table[i % SPAPR_PCI_NUM_LSI].dt_irq); -irqmap[6] = cpu_to_be32(0x8); +for (i = 0; i PCI_SLOT_MAX; i++) { +for (j = 0; j PCI_NUM_PINS; j++) { +uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j]; +int lsi_num = pci_swizzle(i, j); + +irqmap[0] = cpu_to_be32(b_d(i)|b_fff(0)); +irqmap[1] = 0; +irqmap[2] = 0; +irqmap[3] = cpu_to_be32(j+1); +irqmap[4] = cpu_to_be32(xics_phandle); +irqmap[5] = cpu_to_be32(phb-lsi_table[lsi_num].dt_irq); +irqmap[6] = cpu_to_be32(0x8); +} } /* Write interrupt map */ _FDT(fdt_setprop(fdt, bus_off, interrupt-map, interrupt_map, - 7 * sizeof(interrupt_map[0]))); + sizeof(interrupt_map))); return 0; } diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h index 039f85b..f54c2e8 100644 --- a/hw/spapr_pci.h +++ b/hw/spapr_pci.h @@ -23,11 +23,10 @@ #if !defined(__HW_SPAPR_PCI_H__) #define __HW_SPAPR_PCI_H__ +#include