On Fri, Aug 24, 2012 at 08:19:50AM +0200, Jan Kiszka wrote: > From: Jan Kiszka <jan.kis...@siemens.com> > > This optimization was once used in qemu-kvm to keep KVM route usage low. > But now we solved that problem via lazy updates. It also tried to handle > the case of vectors shared between different sources of the same device. > However, this never really worked and will have to be addressed > differently anyway. So drop this obsolete interface. > > We still need interfaces to clear pending vectors though. Provide > msix_clear_vector and msix_clear_all_vectors for this. > > This also unbreaks MSI-X after reset for ivshmem and megasas as device > models can't easily mark their vectors used afterward (megasas didn't > even try). > > Signed-off-by: Jan Kiszka <jan.kis...@siemens.com> > --- > > This patch has been posted some moons again, and we had a discussion > about the impact on the existing users. At that time, MSI-X refactoring > for KVM support was not yet merged. Now it is, and it should be better > much clearer that vector usage tracking has no business with that > feature. > > hw/ivshmem.c | 20 ------------------- > hw/megasas.c | 4 --- > hw/msix.c | 57 ++++++++++++------------------------------------------ > hw/msix.h | 5 +-- > hw/pci.h | 2 - > hw/virtio-pci.c | 20 +++++++----------- > 6 files changed, 23 insertions(+), 85 deletions(-) > > diff --git a/hw/ivshmem.c b/hw/ivshmem.c > index 47f2a16..5ffff48 100644 > --- a/hw/ivshmem.c > +++ b/hw/ivshmem.c > @@ -523,28 +523,11 @@ static void ivshmem_read(void *opaque, const uint8_t * > buf, int flags) > return; > } > > -/* Select the MSI-X vectors used by device. > - * ivshmem maps events to vectors statically, so > - * we just enable all vectors on init and after reset. */ > -static void ivshmem_use_msix(IVShmemState * s) > -{ > - int i; > - > - if (!msix_present(&s->dev)) { > - return; > - } > - > - for (i = 0; i < s->vectors; i++) { > - msix_vector_use(&s->dev, i); > - } > -} > - > static void ivshmem_reset(DeviceState *d) > { > IVShmemState *s = DO_UPCAST(IVShmemState, dev.qdev, d); > > s->intrstatus = 0; > - ivshmem_use_msix(s); > return; > } > > @@ -586,8 +569,6 @@ static void ivshmem_setup_msi(IVShmemState * s) > > /* allocate QEMU char devices for receiving interrupts */ > s->eventfd_table = g_malloc0(s->vectors * sizeof(EventfdEntry)); > - > - ivshmem_use_msix(s); > } > > static void ivshmem_save(QEMUFile* f, void *opaque) > @@ -629,7 +610,6 @@ static int ivshmem_load(QEMUFile* f, void *opaque, int > version_id) > > if (ivshmem_has_feature(proxy, IVSHMEM_MSI)) { > msix_load(&proxy->dev, f); > - ivshmem_use_msix(proxy); > } else { > proxy->intrstatus = qemu_get_be32(f); > proxy->intrmask = qemu_get_be32(f); > diff --git a/hw/megasas.c b/hw/megasas.c > index c35a15d..4766117 100644 > --- a/hw/megasas.c > +++ b/hw/megasas.c > @@ -2121,10 +2121,6 @@ static int megasas_scsi_init(PCIDevice *dev) > pci_register_bar(&s->dev, 2, PCI_BASE_ADDRESS_SPACE_IO, &s->port_io); > pci_register_bar(&s->dev, 3, bar_type, &s->queue_io); > > - if (megasas_use_msix(s)) { > - msix_vector_use(&s->dev, 0); > - } > - > if (!s->sas_addr) { > s->sas_addr = ((NAA_LOCALLY_ASSIGNED_ID << 24) | > IEEE_COMPANY_LOCALLY_ASSIGNED) << 36; > diff --git a/hw/msix.c b/hw/msix.c > index aea340b..163ce4c 100644 > --- a/hw/msix.c > +++ b/hw/msix.c > @@ -273,7 +273,6 @@ int msix_init(struct PCIDevice *dev, unsigned short > nentries, > > dev->msix_table = g_malloc0(table_size); > dev->msix_pba = g_malloc0(pba_size); > - dev->msix_entry_used = g_malloc0(nentries * sizeof > *dev->msix_entry_used); > > msix_mask_all(dev, nentries); > > @@ -326,16 +325,6 @@ int msix_init_exclusive_bar(PCIDevice *dev, unsigned > short nentries, > return 0; > } > > -static void msix_free_irq_entries(PCIDevice *dev) > -{ > - int vector; > - > - for (vector = 0; vector < dev->msix_entries_nr; ++vector) { > - dev->msix_entry_used[vector] = 0; > - msix_clr_pending(dev, vector); > - } > -} > - > /* Clean up resources for the device. */ > void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion > *pba_bar) > { > @@ -344,7 +333,6 @@ void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, > MemoryRegion *pba_bar) > } > pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH); > dev->msix_cap = 0; > - msix_free_irq_entries(dev); > dev->msix_entries_nr = 0; > memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio); > memory_region_destroy(&dev->msix_pba_mmio); > @@ -354,8 +342,6 @@ void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, > MemoryRegion *pba_bar) > memory_region_destroy(&dev->msix_table_mmio); > g_free(dev->msix_table); > dev->msix_table = NULL; > - g_free(dev->msix_entry_used); > - dev->msix_entry_used = NULL; > dev->cap_present &= ~QEMU_PCI_CAP_MSIX; > return; > } > @@ -390,7 +376,6 @@ void msix_load(PCIDevice *dev, QEMUFile *f) > return; > } > > - msix_free_irq_entries(dev); > qemu_get_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE); > qemu_get_buffer(f, dev->msix_pba, (n + 7) / 8); > msix_update_function_masked(dev); > @@ -419,8 +404,9 @@ void msix_notify(PCIDevice *dev, unsigned vector) > { > MSIMessage msg; > > - if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) > + if (vector >= dev->msix_entries_nr) { > return; > + } > if (msix_is_masked(dev, vector)) { > msix_set_pending(dev, vector); > return; > @@ -436,7 +422,7 @@ void msix_reset(PCIDevice *dev) > if (!msix_present(dev)) { > return; > } > - msix_free_irq_entries(dev); > + msix_clear_all_vectors(dev); > dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &= > ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET]; > memset(dev->msix_table, 0, dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE); > @@ -444,41 +430,24 @@ void msix_reset(PCIDevice *dev) > msix_mask_all(dev, dev->msix_entries_nr); > } > > -/* PCI spec suggests that devices make it possible for software to configure > - * less vectors than supported by the device, but does not specify a standard > - * mechanism for devices to do so. > - * > - * We support this by asking devices to declare vectors software is going to > - * actually use, and checking this on the notification path. Devices that > - * don't want to follow the spec suggestion can declare all vectors as used. > */ > - > -/* Mark vector as used. */ > -int msix_vector_use(PCIDevice *dev, unsigned vector) > +/* Clear pending vector. */ > +void msix_clear_vector(PCIDevice *dev, unsigned vector) > { > - if (vector >= dev->msix_entries_nr) > - return -EINVAL; > - dev->msix_entry_used[vector]++; > - return 0; > -} > -
I keep thinking we should instead call vector notifier here, so that virtio can detect and handle cases where kvm_virtio_pci_vq_vector_use fails. ATM it just asserts. Maybe I am wrong. Let's delay this decision until 1.3. > -/* Mark vector as unused. */ > -void msix_vector_unuse(PCIDevice *dev, unsigned vector) > -{ > - if (vector >= dev->msix_entries_nr || !dev->msix_entry_used[vector]) { > - return; > - } > - if (--dev->msix_entry_used[vector]) { > - return; > + if (msix_present(dev) && vector < dev->msix_entries_nr) { > + msix_clr_pending(dev, vector); > } > - msix_clr_pending(dev, vector); > } > > -void msix_unuse_all_vectors(PCIDevice *dev) > +void msix_clear_all_vectors(PCIDevice *dev) > { > + unsigned int vector; > + > if (!msix_present(dev)) { > return; > } > - msix_free_irq_entries(dev); > + for (vector = 0; vector < dev->msix_entries_nr; ++vector) { > + msix_clr_pending(dev, vector); > + } > } > > unsigned int msix_nr_vectors_allocated(const PCIDevice *dev) > diff --git a/hw/msix.h b/hw/msix.h > index 15211cb..5c2512a 100644 > --- a/hw/msix.h > +++ b/hw/msix.h > @@ -26,9 +26,8 @@ void msix_load(PCIDevice *dev, QEMUFile *f); > int msix_enabled(PCIDevice *dev); > int msix_present(PCIDevice *dev); > > -int msix_vector_use(PCIDevice *dev, unsigned vector); > -void msix_vector_unuse(PCIDevice *dev, unsigned vector); > -void msix_unuse_all_vectors(PCIDevice *dev); > +void msix_clear_vector(PCIDevice *dev, unsigned vector); > +void msix_clear_all_vectors(PCIDevice *dev); > > void msix_notify(PCIDevice *dev, unsigned vector); > > diff --git a/hw/pci.h b/hw/pci.h > index 4b6ab3d..1b07450 100644 > --- a/hw/pci.h > +++ b/hw/pci.h > @@ -240,8 +240,6 @@ struct PCIDevice { > /* Memory Regions for MSIX table and pending bit entries. */ > MemoryRegion msix_table_mmio; > MemoryRegion msix_pba_mmio; > - /* Reference-count for entries actually in use by driver. */ > - unsigned *msix_entry_used; > /* MSIX function mask set or MSIX disabled */ > bool msix_function_masked; > /* Version id needed for VMState */ > diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c > index 2a3d86f..f8ce01d 100644 > --- a/hw/virtio-pci.c > +++ b/hw/virtio-pci.c > @@ -137,9 +137,6 @@ static int virtio_pci_load_config(void * opaque, QEMUFile > *f) > } else { > proxy->vdev->config_vector = VIRTIO_NO_VECTOR; > } > - if (proxy->vdev->config_vector != VIRTIO_NO_VECTOR) { > - return msix_vector_use(&proxy->pci_dev, proxy->vdev->config_vector); > - } > return 0; > } > > @@ -153,9 +150,6 @@ static int virtio_pci_load_queue(void * opaque, int n, > QEMUFile *f) > vector = VIRTIO_NO_VECTOR; > } > virtio_queue_set_vector(proxy->vdev, n, vector); > - if (vector != VIRTIO_NO_VECTOR) { > - return msix_vector_use(&proxy->pci_dev, vector); > - } > return 0; > } > > @@ -268,7 +262,7 @@ static void virtio_ioport_write(void *opaque, uint32_t > addr, uint32_t val) > if (pa == 0) { > virtio_pci_stop_ioeventfd(proxy); > virtio_reset(proxy->vdev); > - msix_unuse_all_vectors(&proxy->pci_dev); > + msix_clear_all_vectors(&proxy->pci_dev); > } > else > virtio_queue_set_addr(vdev, vdev->queue_sel, pa); > @@ -295,7 +289,7 @@ static void virtio_ioport_write(void *opaque, uint32_t > addr, uint32_t val) > > if (vdev->status == 0) { > virtio_reset(proxy->vdev); > - msix_unuse_all_vectors(&proxy->pci_dev); > + msix_clear_all_vectors(&proxy->pci_dev); > } > > /* Linux before 2.6.34 sets the device as OK without enabling > @@ -307,18 +301,20 @@ static void virtio_ioport_write(void *opaque, uint32_t > addr, uint32_t val) > } > break; > case VIRTIO_MSI_CONFIG_VECTOR: > - msix_vector_unuse(&proxy->pci_dev, vdev->config_vector); > + msix_clear_vector(&proxy->pci_dev, vdev->config_vector); > /* Make it possible for guest to discover an error took place. */ > - if (msix_vector_use(&proxy->pci_dev, val) < 0) > + if (val >= vdev->nvectors) { > val = VIRTIO_NO_VECTOR; > + } > vdev->config_vector = val; > break; > case VIRTIO_MSI_QUEUE_VECTOR: > - msix_vector_unuse(&proxy->pci_dev, > + msix_clear_vector(&proxy->pci_dev, > virtio_queue_vector(vdev, vdev->queue_sel)); > /* Make it possible for guest to discover an error took place. */ > - if (msix_vector_use(&proxy->pci_dev, val) < 0) > + if (val >= vdev->nvectors) { > val = VIRTIO_NO_VECTOR; > + } > virtio_queue_set_vector(vdev, vdev->queue_sel, val); > break; > default: > -- > 1.7.3.4