On Fri, 2013-06-21 at 00:08 +1000, Alexey Kardashevskiy wrote: > At the moment QEMU creates a route for every MSI IRQ. > > Now we are about to add IRQFD support on PPC64-pseries platform. > pSeries already has in-kernel emulated interrupt controller with > 8192 IRQs. Also, pSeries PHB already supports MSIMessage to IRQ > mapping as a part of PAPR requirements for MSI/MSIX guests. > Specifically, the pSeries guest does not touch MSIMessage's at > all, instead it uses rtas_ibm_change_msi and rtas_ibm_query_interrupt_source > rtas calls to do the mapping. > > Therefore we do not really need more routing than we got already. > The patch introduces the infrastructure to enable direct IRQ mapping. > > Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> > > --- > > The patch is raw and ugly indeed, I made it only to demonstrate > the idea and see if it has right to live or not. > > For some reason which I do not really understand (limited GSI numbers?) > the existing code always adds routing and I do not see why we would need it.
It's an IOAPIC, a pin gets toggled from the device and an MSI message gets written to the CPU. So the route allocates and programs the pin->MSI, then we tell it what notifier triggers that pin. On x86 the MSI vector doesn't encode any information about the device sending the MSI, here you seem to be able to figure out the device and vector space number from the address. Then your pin to MSI is effectively fixed. So why isn't this just your kvm_irqchip_add_msi_route function? On pSeries it's a lookup, on x86 it's a allocate and program. What does kvm_irqchip_add_msi_route do on pSeries today? Thanks, Alex > --- > hw/misc/vfio.c | 11 +++++++++-- > hw/pci/pci.c | 13 +++++++++++++ > hw/ppc/spapr_pci.c | 13 +++++++++++++ > hw/virtio/virtio-pci.c | 26 ++++++++++++++++++++------ > include/hw/pci/pci.h | 4 ++++ > include/hw/pci/pci_bus.h | 1 + > 6 files changed, 60 insertions(+), 8 deletions(-) > > diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c > index 14aac04..2d9eef7 100644 > --- a/hw/misc/vfio.c > +++ b/hw/misc/vfio.c > @@ -639,7 +639,11 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, > unsigned int nr, > * Attempt to enable route through KVM irqchip, > * default to userspace handling if unavailable. > */ > - vector->virq = msg ? kvm_irqchip_add_msi_route(kvm_state, *msg) : -1; > + > + vector->virq = msg ? pci_bus_map_msi(vdev->pdev.bus, *msg) : -1; > + if (vector->virq < 0) { > + vector->virq = msg ? kvm_irqchip_add_msi_route(kvm_state, *msg) : -1; > + } > if (vector->virq < 0 || > kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt, > vector->virq) < 0) { > @@ -807,7 +811,10 @@ retry: > * Attempt to enable route through KVM irqchip, > * default to userspace handling if unavailable. > */ > - vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg); > + vector->virq = pci_bus_map_msi(vdev->pdev.bus, msg); > + if (vector->virq < 0) { > + vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg); > + } > if (vector->virq < 0 || > kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt, > vector->virq) < 0) { > diff --git a/hw/pci/pci.c b/hw/pci/pci.c > index a976e46..a9875e9 100644 > --- a/hw/pci/pci.c > +++ b/hw/pci/pci.c > @@ -1254,6 +1254,19 @@ void pci_device_set_intx_routing_notifier(PCIDevice > *dev, > dev->intx_routing_notifier = notifier; > } > > +void pci_bus_set_map_msi_fn(PCIBus *bus, pci_map_msi_fn map_msi_fn) > +{ > + bus->map_msi = map_msi_fn; > +} > + > +int pci_bus_map_msi(PCIBus *bus, MSIMessage msg) > +{ > + if (bus->map_msi) { > + return bus->map_msi(bus, msg); > + } > + return -1; > +} > + > /* > * PCI-to-PCI bridge specification > * 9.1: Interrupt routing. Table 9-1 > diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c > index 80408c9..9ef9a29 100644 > --- a/hw/ppc/spapr_pci.c > +++ b/hw/ppc/spapr_pci.c > @@ -500,6 +500,18 @@ static void spapr_msi_write(void *opaque, hwaddr addr, > qemu_irq_pulse(xics_get_qirq(spapr->icp, irq)); > } > > +static int spapr_msi_get_irq(PCIBus *bus, MSIMessage msg) > +{ > + DeviceState *par = bus->qbus.parent; > + sPAPRPHBState *sphb = (sPAPRPHBState *) par; > + unsigned long addr = msg.address - sphb->msi_win_addr; > + int ndev = addr >> 16; > + int vec = ((addr & 0xFFFF) >> 2) | msg.data; > + uint32_t irq = sphb->msi_table[ndev].irq + vec; > + > + return (int)irq; > +} > + > static const MemoryRegionOps spapr_msi_ops = { > /* There is no .read as the read result is undefined by PCI spec */ > .read = NULL, > @@ -664,6 +676,7 @@ static int _spapr_phb_init(SysBusDevice *s) > > sphb->lsi_table[i].irq = irq; > } > + pci_bus_set_map_msi_fn(bus, spapr_msi_get_irq); > > return 0; > } > diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c > index d309416..587f53e 100644 > --- a/hw/virtio/virtio-pci.c > +++ b/hw/virtio/virtio-pci.c > @@ -472,6 +472,8 @@ static unsigned virtio_pci_get_features(DeviceState *d) > return proxy->host_features; > } > > +extern int spapr_msi_get_irq(PCIBus *bus, MSIMessage *msg); > + > static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, > unsigned int queue_no, > unsigned int vector, > @@ -481,7 +483,10 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy > *proxy, > int ret; > > if (irqfd->users == 0) { > - ret = kvm_irqchip_add_msi_route(kvm_state, msg); > + ret = pci_bus_map_msi(proxy->pci_dev.bus, msg); > + if (ret < 0) { > + ret = kvm_irqchip_add_msi_route(kvm_state, msg); > + } > if (ret < 0) { > return ret; > } > @@ -609,14 +614,23 @@ static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy > *proxy, > VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no); > EventNotifier *n = virtio_queue_get_guest_notifier(vq); > VirtIOIRQFD *irqfd; > - int ret = 0; > + int ret = 0, tmp; > > if (proxy->vector_irqfd) { > irqfd = &proxy->vector_irqfd[vector]; > - if (irqfd->msg.data != msg.data || irqfd->msg.address != > msg.address) { > - ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg); > - if (ret < 0) { > - return ret; > + > + tmp = pci_bus_map_msi(proxy->pci_dev.bus, msg); > + if (tmp >= 0) { > + if (irqfd->virq != tmp) { > + fprintf(stderr, "FIXME: MSI(-X) vector has changed from %X > to %x\n", > + irqfd->virq, tmp); > + } > + } else { > + if (irqfd->msg.data != msg.data || irqfd->msg.address != > msg.address) { > + ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, > msg); > + if (ret < 0) { > + return ret; > + } > } > } > } > diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h > index 8797802..632739a 100644 > --- a/include/hw/pci/pci.h > +++ b/include/hw/pci/pci.h > @@ -332,6 +332,7 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev); > typedef void (*pci_set_irq_fn)(void *opaque, int irq_num, int level); > typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num); > typedef PCIINTxRoute (*pci_route_irq_fn)(void *opaque, int pin); > +typedef int (*pci_map_msi_fn)(PCIBus *bus, MSIMessage msg); > > typedef enum { > PCI_HOTPLUG_DISABLED, > @@ -375,6 +376,9 @@ bool pci_intx_route_changed(PCIINTxRoute *old, > PCIINTxRoute *new); > void pci_bus_fire_intx_routing_notifier(PCIBus *bus); > void pci_device_set_intx_routing_notifier(PCIDevice *dev, > PCIINTxRoutingNotifier notifier); > +void pci_bus_set_map_msi_fn(PCIBus *bus, pci_map_msi_fn map_msi_fn); > +int pci_bus_map_msi(PCIBus *bus, MSIMessage msg); > + > void pci_device_reset(PCIDevice *dev); > void pci_bus_reset(PCIBus *bus); > > diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h > index 66762f6..81efd2b 100644 > --- a/include/hw/pci/pci_bus.h > +++ b/include/hw/pci/pci_bus.h > @@ -16,6 +16,7 @@ struct PCIBus { > pci_set_irq_fn set_irq; > pci_map_irq_fn map_irq; > pci_route_irq_fn route_intx_to_irq; > + pci_map_msi_fn map_msi; > pci_hotplug_fn hotplug; > DeviceState *hotplug_qdev; > void *irq_opaque;