event_notifier_test_and_clear must be called before processing events. Otherwise, an aio_poll could "eat" the notification before the main I/O thread invokes ppoll(). The main I/O thread then never wakes up. This is an example of what could happen:
i/o thread vcpu thread worker thread --------------------------------------------------------------------- lock_iothread notify_me = 1 ... unlock_iothread bh->scheduled = 1 event_notifier_set lock_iothread notify_me = 3 ppoll notify_me = 1 aio_dispatch aio_bh_poll thread_pool_completion_bh bh->scheduled = 1 event_notifier_set node->io_read(node->opaque) event_notifier_test_and_clear ppoll *** hang *** "Tracing" with qemu_clock_get_ns shows pretty much the same behavior as in the previous bug, so there are no new tricks here---just stare more at the code until it is apparent. One could also use a formal model, of course. The included one shows this with three processes: notifier corresponds to a QEMU thread pool worker, temporary_waiter to a VCPU thread that invokes aio_poll(), waiter to the main I/O thread. I would be happy to say that the formal model found the bug for me, but actually I wrote it after the fact. This patch is a bit of a big hammer. The next one optimizes it, with help (this time for real rather than a posteriori :)) from another, similar formal model. Reported-by: Richard W. M. Jones <rjo...@redhat.com> Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- aio-posix.c | 2 + aio-win32.c | 7 ++- async.c | 8 ++- docs/aio_notify_bug.promela | 140 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 153 insertions(+), 4 deletions(-) create mode 100644 docs/aio_notify_bug.promela diff --git a/aio-posix.c b/aio-posix.c index 249889f..5c8b266 100644 --- a/aio-posix.c +++ b/aio-posix.c @@ -276,6 +276,8 @@ bool aio_poll(AioContext *ctx, bool blocking) aio_context_acquire(ctx); } + event_notifier_test_and_clear(&ctx->notifier); + /* if we have any readable fds, dispatch event */ if (ret > 0) { for (i = 0; i < npfd; i++) { diff --git a/aio-win32.c b/aio-win32.c index ea655b0..7afc999 100644 --- a/aio-win32.c +++ b/aio-win32.c @@ -337,10 +337,11 @@ bool aio_poll(AioContext *ctx, bool blocking) aio_context_acquire(ctx); } - if (first && aio_bh_poll(ctx)) { - progress = true; + if (first) { + event_notifier_test_and_clear(&ctx->notifier); + progress |= aio_bh_poll(ctx); + first = false; } - first = false; /* if we have any signaled events, dispatch event */ event = NULL; diff --git a/async.c b/async.c index a232192..d625e8a 100644 --- a/async.c +++ b/async.c @@ -203,6 +203,8 @@ aio_ctx_check(GSource *source) QEMUBH *bh; atomic_and(&ctx->notify_me, ~1); + event_notifier_test_and_clear(&ctx->notifier); + for (bh = ctx->first_bh; bh; bh = bh->next) { if (!bh->deleted && bh->scheduled) { return true; @@ -279,6 +281,10 @@ static void aio_rfifolock_cb(void *opaque) aio_notify(opaque); } +static void event_notifier_dummy_cb(EventNotifier *e) +{ +} + AioContext *aio_context_new(Error **errp) { int ret; @@ -293,7 +299,7 @@ AioContext *aio_context_new(Error **errp) g_source_set_can_recurse(&ctx->source, true); aio_set_event_notifier(ctx, &ctx->notifier, (EventNotifierHandler *) - event_notifier_test_and_clear); + event_notifier_dummy_cb); ctx->thread_pool = NULL; qemu_mutex_init(&ctx->bh_lock); rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx); diff --git a/docs/aio_notify_bug.promela b/docs/aio_notify_bug.promela new file mode 100644 index 0000000..b3bfca1 --- /dev/null +++ b/docs/aio_notify_bug.promela @@ -0,0 +1,140 @@ +/* + * This model describes a bug in aio_notify. If ctx->notifier is + * cleared too late, a wakeup could be lost. + * + * Author: Paolo Bonzini <pbonz...@redhat.com> + * + * This file is in the public domain. If you really want a license, + * the WTFPL will do. + * + * To verify the buggy version: + * spin -a -DBUG docs/aio_notify_bug.promela + * gcc -O2 pan.c + * ./a.out -a -f + * + * To verify the fixed version: + * spin -a docs/aio_notify_bug.promela + * gcc -O2 pan.c + * ./a.out -a -f + * + * Add -DCHECK_REQ to test an alternative invariant and the + * "notify_me" optimization. + */ + +int notify_me; +bool event; +bool req; +bool notifier_done; + +#ifdef CHECK_REQ +#define USE_NOTIFY_ME 1 +#else +#define USE_NOTIFY_ME 0 +#endif + +active proctype notifier() +{ + do + :: true -> { + req = 1; + if + :: !USE_NOTIFY_ME || notify_me -> event = 1; + :: else -> skip; + fi + } + :: true -> break; + od; + notifier_done = 1; +} + +#ifdef BUG +#define AIO_POLL \ + notify_me++; \ + if \ + :: !req -> { \ + if \ + :: event -> skip; \ + fi; \ + } \ + :: else -> skip; \ + fi; \ + notify_me--; \ + \ + req = 0; \ + event = 0; +#else +#define AIO_POLL \ + notify_me++; \ + if \ + :: !req -> { \ + if \ + :: event -> skip; \ + fi; \ + } \ + :: else -> skip; \ + fi; \ + notify_me--; \ + \ + event = 0; \ + req = 0; +#endif + +active proctype waiter() +{ + do + :: true -> AIO_POLL; + od; +} + +/* Same as waiter(), but disappears after a while. */ +active proctype temporary_waiter() +{ + do + :: true -> AIO_POLL; + :: true -> break; + od; +} + +#ifdef CHECK_REQ +never { + do + :: req -> goto accept_if_req_not_eventually_false; + :: true -> skip; + od; + +accept_if_req_not_eventually_false: + if + :: req -> goto accept_if_req_not_eventually_false; + fi; + assert(0); +} + +#else +/* There must be infinitely many transitions of event as long + * as the notifier does not exit. + * + * If event stayed always true, the waiters would be busy looping. + * If event stayed always false, the waiters would be sleeping + * forever. + */ +never { + do + :: !event -> goto accept_if_event_not_eventually_true; + :: event -> goto accept_if_event_not_eventually_false; + :: true -> skip; + od; + +accept_if_event_not_eventually_true: + if + :: !event && notifier_done -> do :: true -> skip; od; + :: !event && !notifier_done -> goto accept_if_event_not_eventually_true; + fi; + assert(0); + +accept_if_event_not_eventually_false: + if + :: event -> goto accept_if_event_not_eventually_false; + fi; + assert(0); +} +#endif -- 2.4.3