Re: [Qemu-devel] [PATCH 3/5] vfio/quirks: ioeventfd quirk acceleration

2018-03-06 Thread Peter Xu
On Wed, Feb 28, 2018 at 01:45:54PM -0700, Alex Williamson wrote:
> The NVIDIA BAR0 quirks virtualize the PCI config space mirrors found
> in device MMIO space.  Normally PCI config space is considered a slow
> path and further optimization is unnecessary, however NVIDIA uses a
> register here to enable the MSI interrupt to re-trigger.  Exiting to
> QEMU for this MSI-ACK handling can therefore rate limit our interrupt
> handling.  Fortunately the MSI-ACK write is easily detected since the
> quirk MemoryRegion otherwise has very few accesses, so simply looking
> for consecutive writes with the same data is sufficient, in this case
> 10 consecutive writes with the same data and size is arbitrarily
> chosen.  We configure the KVM ioeventfd with data match, so there's
> no risk of triggering for the wrong data or size, but we do risk that
> pathological driver behavior might consume all of QEMU's file
> descriptors, so we cap ourselves to 10 ioeventfds for this purpose.
> 
> In support of the above, generic ioeventfd infrastructure is added
> for vfio quirks.  This automatically initializes an ioeventfd list
> per quirk, disables and frees ioeventfds on exit, and allows
> ioeventfds marked as dynamic to be dropped on device reset.  The
> rationale for this latter feature is that useful ioeventfds may
> depend on specific driver behavior and since we necessarily place a
> cap on our use of ioeventfds, a machine reset is a reasonable point
> at which to assume a new driver and re-profile.
> 
> Signed-off-by: Alex Williamson 

I don't know when will there be non-dynamic vfio-ioeventfds, but it
looks fine at least to me even if all of them are dynamic now:

Reviewed-by: Peter Xu 

-- 
Peter Xu



[Qemu-devel] [PATCH 3/5] vfio/quirks: ioeventfd quirk acceleration

2018-02-28 Thread Alex Williamson
The NVIDIA BAR0 quirks virtualize the PCI config space mirrors found
in device MMIO space.  Normally PCI config space is considered a slow
path and further optimization is unnecessary, however NVIDIA uses a
register here to enable the MSI interrupt to re-trigger.  Exiting to
QEMU for this MSI-ACK handling can therefore rate limit our interrupt
handling.  Fortunately the MSI-ACK write is easily detected since the
quirk MemoryRegion otherwise has very few accesses, so simply looking
for consecutive writes with the same data is sufficient, in this case
10 consecutive writes with the same data and size is arbitrarily
chosen.  We configure the KVM ioeventfd with data match, so there's
no risk of triggering for the wrong data or size, but we do risk that
pathological driver behavior might consume all of QEMU's file
descriptors, so we cap ourselves to 10 ioeventfds for this purpose.

In support of the above, generic ioeventfd infrastructure is added
for vfio quirks.  This automatically initializes an ioeventfd list
per quirk, disables and frees ioeventfds on exit, and allows
ioeventfds marked as dynamic to be dropped on device reset.  The
rationale for this latter feature is that useful ioeventfds may
depend on specific driver behavior and since we necessarily place a
cap on our use of ioeventfds, a machine reset is a reasonable point
at which to assume a new driver and re-profile.

Signed-off-by: Alex Williamson 
---
 hw/vfio/pci-quirks.c |  159 +-
 hw/vfio/pci.h|   14 
 2 files changed, 171 insertions(+), 2 deletions(-)

diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index f0947cbf152f..e01e2f0f69df 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -12,6 +12,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
+#include "qemu/main-loop.h"
 #include "qemu/range.h"
 #include "qapi/error.h"
 #include "qapi/visitor.h"
@@ -202,6 +203,7 @@ typedef struct VFIOConfigMirrorQuirk {
 uint32_t offset;
 uint8_t bar;
 MemoryRegion *mem;
+uint8_t data[];
 } VFIOConfigMirrorQuirk;
 
 static uint64_t vfio_generic_quirk_mirror_read(void *opaque,
@@ -278,12 +280,84 @@ static const MemoryRegionOps vfio_ati_3c3_quirk = {
 static VFIOQuirk *vfio_quirk_alloc(int nr_mem)
 {
 VFIOQuirk *quirk = g_new0(VFIOQuirk, 1);
+QLIST_INIT(&quirk->ioeventfds);
 quirk->mem = g_new0(MemoryRegion, nr_mem);
 quirk->nr_mem = nr_mem;
 
 return quirk;
 }
 
+static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd)
+{
+QLIST_REMOVE(ioeventfd, next);
+memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
+  ioeventfd->match_data, ioeventfd->data,
+  &ioeventfd->e);
+qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e), NULL, NULL, 
NULL);
+event_notifier_cleanup(&ioeventfd->e);
+g_free(ioeventfd);
+}
+
+static void vfio_drop_dynamic_eventfds(VFIOPCIDevice *vdev, VFIOQuirk *quirk)
+{
+VFIOIOEventFD *ioeventfd, *tmp;
+
+QLIST_FOREACH_SAFE(ioeventfd, &quirk->ioeventfds, next, tmp) {
+if (ioeventfd->dynamic) {
+vfio_ioeventfd_exit(ioeventfd);
+}
+}
+}
+
+static void vfio_ioeventfd_handler(void *opaque)
+{
+VFIOIOEventFD *ioeventfd = opaque;
+
+if (event_notifier_test_and_clear(&ioeventfd->e)) {
+vfio_region_write(ioeventfd->region, ioeventfd->region_addr,
+  ioeventfd->data, ioeventfd->size);
+}
+}
+
+static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
+  MemoryRegion *mr, hwaddr addr,
+  unsigned size, uint64_t data,
+  VFIORegion *region,
+  hwaddr region_addr, bool dynamic)
+{
+VFIOIOEventFD *ioeventfd = g_malloc0(sizeof(*ioeventfd));
+
+if (event_notifier_init(&ioeventfd->e, 0)) {
+g_free(ioeventfd);
+return NULL;
+}
+
+/*
+ * MemoryRegion and relative offset, plus additional ioeventfd setup
+ * parameters for configuring and later tearing down KVM ioeventfd.
+ */
+ioeventfd->mr = mr;
+ioeventfd->addr = addr;
+ioeventfd->size = size;
+ioeventfd->data = data;
+ioeventfd->match_data = true;
+ioeventfd->dynamic = dynamic;
+/*
+ * VFIORegion and relative offset for implementing the userspace
+ * handler.  data & size fields shared for both uses.
+ */
+ioeventfd->region = region;
+ioeventfd->region_addr = region_addr;
+
+qemu_set_fd_handler(event_notifier_get_fd(&ioeventfd->e),
+vfio_ioeventfd_handler, NULL, ioeventfd);
+memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr,
+  ioeventfd->size, ioeventfd->match_data,
+  ioeventfd->data, &ioeventfd->e);
+
+return ioeventfd;
+}
+
 static void vfio_vga_