On 26/03/19 14:18, Stefan Hajnoczi wrote:
> Hi Sergio,
> Here are the forgotten event loop optimizations I mentioned:
> 
>   https://github.com/stefanha/qemu/commits/event-loop-optimizations
> 
> The goal was to eliminate or reorder syscalls so that useful work (like
> executing BHs) occurs as soon as possible after an event is detected.
> 
> I remember that these optimizations only shave off a handful of
> microseconds, so they aren't a huge win.  They do become attractive on
> fast SSDs with <10us read/write latency.
> 
> These optimizations are aggressive and there is a possibility of
> introducing regressions.
> 
> If you have time to pick up this work, try benchmarking each commit
> individually so performance changes are attributed individually.
> There's no need to send them together in a single patch series, the
> changes are quite independent.

I reviewed the patches now:

- qemu_bh_schedule_nested should not be necessary since we have 
ctx->notify_me to also avoid the event_notifier_set call.  However, it 
is possible to avoid the smp_mb at the beginning of aio_notify, since  
atomic_xchg already implies it.  Maybe add a "static void 
aio_notify__after_smp_mb"?

- another possible optimization for latency is to delay 
event_notifier_test_and_clear until the very last moment:

diff --git a/block/linux-aio.c b/block/linux-aio.c
index d4b61fb251..5c80ceb85f 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -200,15 +207,16 @@ io_getevents_advance_and_peek(io_context_t ctx,
  * can be called again in a nested event loop.  When there are no events left
  * to complete the BH is being canceled.
  */
-static void qemu_laio_process_completions(LinuxAioState *s)
+static void qemu_laio_process_completions(LinuxAioState *s, bool clear)
 {
     struct io_event *events;
 
     /* Reschedule so nested event loops see currently pending completions */
     qemu_bh_schedule(s->completion_bh);
 
-    while ((s->event_max = io_getevents_advance_and_peek(s->ctx, &events,
-                                                         s->event_idx))) {
+    do {
+        s->event_max = io_getevents_advance_and_peek(s->ctx, &events,
+                                                     s->event_idx);
         for (s->event_idx = 0; s->event_idx < s->event_max; ) {
             struct iocb *iocb = events[s->event_idx].obj;
             struct qemu_laiocb *laiocb =
@@ -221,21 +229,30 @@ static void qemu_laio_process_completions(LinuxAioState 
*s)
             s->event_idx++;
             qemu_laio_process_completion(laiocb);
         }
-    }
+
+        if (!s->event_max && clear) {
+            /* Clearing the eventfd is expensive, only do it once.  */
+            clear = false;
+            event_notifier_test_and_clear(&s->e);
+            /* Check one last time after the EventNotifier's trailing edge.  */
+            s->event_max = io_getevents_peek(ctx, events);
+        }
+    } while (s->event_max);
 
     qemu_bh_cancel(s->completion_bh);
 
     /* If we are nested we have to notify the level above that we are done
      * by setting event_max to zero, upper level will then jump out of it's
-     * own `for` loop.  If we are the last all counters droped to zero. */
+     * own `for` loop.  If we are the last all counters dropped to zero. */
     s->event_max = 0;
     s->event_idx = 0;
 }
 
-static void qemu_laio_process_completions_and_submit(LinuxAioState *s)
+static void qemu_laio_process_completions_and_submit(LinuxAioState *s,
+                                                     bool clear)
 {
     aio_context_acquire(s->aio_context);
-    qemu_laio_process_completions(s);
+    qemu_laio_process_completions(s, clear);
 
     if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
         ioq_submit(s);
@@ -247,16 +264,14 @@ static void qemu_laio_completion_bh(void *opaque)
 {
     LinuxAioState *s = opaque;
 
-    qemu_laio_process_completions_and_submit(s);
+    qemu_laio_process_completions_and_submit(s, true);
 }
 
 static void qemu_laio_completion_cb(EventNotifier *e)
 {
     LinuxAioState *s = container_of(e, LinuxAioState, e);
 
-    if (event_notifier_test_and_clear(&s->e)) {
-        qemu_laio_process_completions_and_submit(s);
-    }
+    qemu_laio_process_completions_and_submit(s, true);
 }
 
 static bool qemu_laio_poll_cb(void *opaque)
@@ -269,7 +284,7 @@ static bool qemu_laio_poll_cb(void *opaque)
         return false;
     }
 
-    qemu_laio_process_completions_and_submit(s);
+    qemu_laio_process_completions_and_submit(s, false);
     return true;
 }
 
- but actually (and a precursor to using IOCB_CMD_POLL) it should be
possible to have just one LinuxAioState per AioContext, and then
it can simply share the AioContext's EventNotifier.  This removes
the need to do the event_notifier_test_and_clear in linux-aio.c.

Paolo

Reply via email to