On Thu, May 13, 2021 at 11:04:37PM +0200, Paolo Bonzini wrote: > On 12/05/21 09:15, Vladimir Sementsov-Ogievskiy wrote: > > > > > > > > > > I don't understand. Why doesn't aio_co_enter go through the ctx != > > > qemu_get_current_aio_context() branch and just do aio_co_schedule? > > > That was at least the idea behind aio_co_wake and aio_co_enter. > > > > > > > Because ctx is exactly qemu_get_current_aio_context(), as we are not in > > iothread but in nbd connection thread. So, > > qemu_get_current_aio_context() returns qemu_aio_context. > > So the problem is that threads other than the main thread and > the I/O thread should not return qemu_aio_context. The vCPU thread > may need to return it when running with BQL taken, though.
I'm not sure this is the only case. AFAICS your patch has basically the same effect as Vladimir's patch "util/async: aio_co_enter(): do aio_co_schedule in general case" (https://lore.kernel.org/qemu-devel/20210408140827.332915-4-vsement...@virtuozzo.com/). That one was found to break e.g. aio=threads cases. I guessed it implicitly relied upon aio_co_enter() acquiring the aio_context but we didn't dig further to pinpoint the exact scenario. Roman. > Something like this (untested): > > diff --git a/include/block/aio.h b/include/block/aio.h > index 5f342267d5..10fcae1515 100644 > --- a/include/block/aio.h > +++ b/include/block/aio.h > @@ -691,10 +691,13 @@ void aio_co_enter(AioContext *ctx, struct Coroutine > *co); > * Return the AioContext whose event loop runs in the current thread. > * > * If called from an IOThread this will be the IOThread's AioContext. If > - * called from another thread it will be the main loop AioContext. > + * called from the main thread or with the "big QEMU lock" taken it > + * will be the main loop AioContext. > */ > AioContext *qemu_get_current_aio_context(void); > +void qemu_set_current_aio_context(AioContext *ctx); > + > /** > * aio_context_setup: > * @ctx: the aio context > diff --git a/iothread.c b/iothread.c > index 7f086387be..22b967e77c 100644 > --- a/iothread.c > +++ b/iothread.c > @@ -39,11 +39,23 @@ DECLARE_CLASS_CHECKERS(IOThreadClass, IOTHREAD, > #define IOTHREAD_POLL_MAX_NS_DEFAULT 0ULL > #endif > -static __thread IOThread *my_iothread; > +static __thread AioContext *my_aiocontext; > + > +void qemu_set_current_aio_context(AioContext *ctx) > +{ > + assert(!my_aiocontext); > + my_aiocontext = ctx; > +} > AioContext *qemu_get_current_aio_context(void) > { > - return my_iothread ? my_iothread->ctx : qemu_get_aio_context(); > + if (my_aiocontext) { > + return my_aiocontext; > + } > + if (qemu_mutex_iothread_locked()) { > + return qemu_get_aio_context(); > + } > + return NULL; > } > static void *iothread_run(void *opaque) > @@ -56,7 +68,7 @@ static void *iothread_run(void *opaque) > * in this new thread uses glib. > */ > g_main_context_push_thread_default(iothread->worker_context); > - my_iothread = iothread; > + qemu_set_current_aio_context(iothread->ctx); > iothread->thread_id = qemu_get_thread_id(); > qemu_sem_post(&iothread->init_done_sem); > diff --git a/stubs/iothread.c b/stubs/iothread.c > index 8cc9e28c55..25ff398894 100644 > --- a/stubs/iothread.c > +++ b/stubs/iothread.c > @@ -6,3 +6,7 @@ AioContext *qemu_get_current_aio_context(void) > { > return qemu_get_aio_context(); > } > + > +void qemu_set_current_aio_context(AioContext *ctx) > +{ > +} > diff --git a/tests/unit/iothread.c b/tests/unit/iothread.c > index afde12b4ef..cab38b3da8 100644 > --- a/tests/unit/iothread.c > +++ b/tests/unit/iothread.c > @@ -30,13 +30,26 @@ struct IOThread { > bool stopping; > }; > -static __thread IOThread *my_iothread; > +static __thread AioContext *my_aiocontext; > + > +void qemu_set_current_aio_context(AioContext *ctx) > +{ > + assert(!my_aiocontext); > + my_aiocontext = ctx; > +} > AioContext *qemu_get_current_aio_context(void) > { > - return my_iothread ? my_iothread->ctx : qemu_get_aio_context(); > + if (my_aiocontext) { > + return my_aiocontext; > + } > + if (qemu_mutex_iothread_locked()) { > + return qemu_get_aio_context(); > + } > + return NULL; > } > + > static void iothread_init_gcontext(IOThread *iothread) > { > GSource *source; > @@ -54,7 +67,7 @@ static void *iothread_run(void *opaque) > rcu_register_thread(); > - my_iothread = iothread; > + qemu_set_current_aio_context(iothread->ctx); > qemu_mutex_lock(&iothread->init_done_lock); > iothread->ctx = aio_context_new(&error_abort); > diff --git a/util/main-loop.c b/util/main-loop.c > index d9c55df6f5..4ae5b23e99 100644 > --- a/util/main-loop.c > +++ b/util/main-loop.c > @@ -170,6 +170,7 @@ int qemu_init_main_loop(Error **errp) > if (!qemu_aio_context) { > return -EMFILE; > } > + qemu_set_current_aio_context(qemu_aio_context); > qemu_notify_bh = qemu_bh_new(notify_event_cb, NULL); > gpollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD)); > src = aio_get_g_source(qemu_aio_context); > > If it works for you, I can post it as a formal patch. > > Paolo >