On Wed, May 30, 2012 at 02:11:58PM +0200, Jan Kiszka wrote: > On 2012-05-21 23:03, Michael S. Tsirkin wrote: > > On Mon, May 21, 2012 at 05:35:34PM -0300, Jan Kiszka wrote: > >> On 2012-05-21 16:05, Michael S. Tsirkin wrote: > >>> On Mon, May 21, 2012 at 10:13:47AM -0300, Jan Kiszka wrote: > >>>> @@ -386,6 +387,14 @@ static void piix3_set_irq(void *opaque, int pirq, > >>>> int level) > >>>> piix3_set_irq_level(piix3, pirq, level); > >>>> } > >>>> > >>>> +static int piix3_map_host_irq(void *opaque, int pci_intx) > >>>> +{ > >>>> + PIIX3State *piix3 = opaque; > >>>> + int host_irq = piix3->dev.config[PIIX_PIRQC + pci_intx]; > >>>> + > >>>> + return host_irq < PIIX_NUM_PIC_IRQS ? host_irq : -1; > >>>> +} > >>>> + > >>>> /* irq routing is changed. so rebuild bitmap */ > >>>> static void piix3_update_irq_levels(PIIX3State *piix3) > >>>> { > >>> > >>> > >>> So, instead of special API just for assignment, > >>> I would like to see map_irq in piix being reworked > >>> to take dev config into account. > >>> I think piix is almost unique in this but need to check, > >> > >> Maybe it is the only host bridge with routing that we have in QEMU right > >> now, but it is surely not unique (e.g. all later Intel chipsets support > >> this). > > > > Yes. APIs for this should be in place. Just saying > > instead of failing we can easily just make it work > > if there are no remappings. > > > >>> if not fix other host buses that are programmable. > >>> PCI bridges are all fixed routing. > >>> > >>> Then we can drop set_irq callback. > >> > >> set_irq may do more than IRQ routing. It may also flip polarity (see > >> bonito.c). > > > > In that case host_map_irq is not a good API? > > Need to investigate. > > > >> I guess we need to analyze the requirements of all in-tree > >> host bridges first. > > > > Yes. > > > >>> > >>> Finally all devices can cache the irq#, > >>> and piix would scan devices behind it > >>> and update the irq. > >>> > >>> Assignment then just needs a notifier, since > >>> it owns the device just a pointer in device is > >>> enough. > >> > >> I cannot completely follow your ideas here yet. Do you want to pass the > >> mapping information along the notifier, or why "just ... a notifier"? > > > > > > Each device would resolve IRQs that it uses. > > > > > >>> > >>> Could you look at doing this please? > >>> If no I can give it a stub. > >>> > >> > >> Unless we can converge over a final API quickly, I would suggest to base > >> these refactorings on top of what I propose here. We will have to touch > >> all host bridges more invasively for this anyway. If you can explain to > >> me how simple the refactoring can be (but I'm a bit skeptical ;) ), I > >> could do this, otherwise I would prefer to focus on the device > >> assignment topic which provide some more nuts. > >> > >> Jan > > > > Yes it's simple. I'll post in a couple of days (lots of > > meetings tomorrow). I think you'll > > see it's easier and cleaner. > > I looked into this again and it appears to me that, in fact, more work > is required to bypass the current interrupt routing on delivery: > > The PIIX2 tracks the interrupt level of each device on its bus with the > help of PCIBus::irq_count. On routing changes via its config space, > those levels are currently used to generate the new host IRQ states. > But, once we bypass that level state tracking, we need to do something > else, and that better for _all_ devices: clear all outputs and let the > devices issue an update. The assigned device could provide this based on > the INTx status bit, for all others we need to track the level generically. > > Did you start working on this topic already? > > Jan
I will need a bit of time to understand what you are saying above. Unfortunately I got distracted by various end of quarter activities. Below's as far as I got - hopefully this explains what I had in mind: for deep hierarchies this will save a bus scan so I think this makes sense even in the absense of kvm irqchip. Warning: completely untested and known to be incomplete. Please judge whether this is going in a direction that is helpful for your efforts. If you respond in the positive, I hope to be able to get back to this next week. Signed-off-by: Michael S. Tsirkin <m...@redhat.com> diff --git a/hw/pci.c b/hw/pci.c index c1ebdde..313abe1 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -112,18 +112,33 @@ static inline void pci_set_irq_state(PCIDevice *d, int irq_num, int level) d->irq_state |= level << irq_num; } -static void pci_change_irq_level(PCIDevice *pci_dev, int irq_num, int change) +static void pci_update_irq_maps_for_device(PCIBus *unused, PCIDevice *dev) { PCIBus *bus; - for (;;) { - bus = pci_dev->bus; - irq_num = bus->map_irq(pci_dev, irq_num); - if (bus->set_irq) - break; - pci_dev = bus->parent_dev; + PCIDevice *pci_dev; + int i, irq_num; + for (i = 0; i < PCI_NUM_PINS; ++i) { + pci_dev = dev; + irq_num = i; + for (;;) { + bus = pci_dev->bus; + irq_num = bus->map_irq(pci_dev, irq_num); + if (bus->set_irq) + break; + pci_dev = bus->parent_dev; + } + dev->irq_num[i] = irq_num; + bus->irq_count[i] += pci_irq_state(dev, i); + dev->root_bus = bus; } - bus->irq_count[irq_num] += change; - bus->set_irq(bus->irq_opaque, irq_num, bus->irq_count[irq_num] != 0); +} + +static void pci_change_irq_level(PCIDevice *pci_dev, int irq_num, int change) +{ + PCIBus *bus = pci_dev->root_bus; + int i = pci_dev->irq_num[irq_num]; + bus->irq_count[i] += change; + bus->set_irq(bus->irq_opaque, i, bus->irq_count[i] != 0); } int pci_bus_get_irq_level(PCIBus *bus, int irq_num) @@ -805,6 +820,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus, bus->devices[devfn] = pci_dev; pci_dev->irq = qemu_allocate_irqs(pci_set_irq, pci_dev, PCI_NUM_PINS); pci_dev->version_id = 2; /* Current pci device vmstate version */ + pci_update_irq_maps_for_device(NULL, pci_dev); return pci_dev; } @@ -1140,6 +1156,18 @@ static void pci_for_each_device_under_bus(PCIBus *bus, } } +/* Update per-device IRQ mappings after bus mappings changed. */ +void pci_update_irq_maps(PCIBus *bus) +{ + int i; + /* FIXME: this only scans one level. + * Need to scan child buses recursively. */ + for (i = 0; i <= bus->nirq; ++i) + bus->irq_count[i] = 0; + pci_for_each_device_under_bus(bus, pci_update_irq_maps_for_device); +} + + void pci_for_each_device(PCIBus *bus, int bus_num, void (*fn)(PCIBus *b, PCIDevice *d)) { diff --git a/hw/pci.h b/hw/pci.h index 8d0aa49..4102c44 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -207,6 +207,12 @@ struct PCIDevice { /* Current IRQ levels. Used internally by the generic PCI code. */ uint8_t irq_state; + /* The root bus. Used internally by the generic PCI code. */ + PCIBus *root_bus; + + /* IRQ num at the root bus. Used internally by the generic PCI code. */ + int irq_num[PCI_NUM_PINS]; + /* Capability bits */ uint32_t cap_present; @@ -305,6 +311,7 @@ PCIDevice *pci_nic_init_nofail(NICInfo *nd, const char *default_model, const char *default_devaddr); int pci_bus_num(PCIBus *s); void pci_for_each_device(PCIBus *bus, int bus_num, void (*fn)(PCIBus *bus, PCIDevice *d)); +void pci_update_irq_maps(PCIBus *bus); PCIBus *pci_find_root_bus(int domain); int pci_find_domain(const PCIBus *bus); PCIDevice *pci_find_device(PCIBus *bus, int bus_num, uint8_t devfn); diff --git a/hw/piix_pci.c b/hw/piix_pci.c index 09e84f5..ac71294 100644 --- a/hw/piix_pci.c +++ b/hw/piix_pci.c @@ -391,6 +391,7 @@ static void piix3_update_irq_levels(PIIX3State *piix3) { int pirq; + pci_update_irq_maps(piix3->dev.bus); piix3->pic_levels = 0; for (pirq = 0; pirq < PIIX_NUM_PIRQS; pirq++) { piix3_set_irq_level(piix3, pirq,