Re: [PATCH] async: the main AioContext is only "current" if under the BQL

2021-06-09 Thread Paolo Bonzini

On 09/06/21 13:40, Vladimir Sementsov-Ogievskiy wrote:



And in gdb all looks like aio_co_wake() in my own separate thread 
leads to coroutine execution exactly in my own thread.. So, it don't 
dead-lock on trying to acquire the context, instead it somehow enter 
to a coroutine.  And then deadlock because called coroutine tries to 
lock the mutex, that already locked before (in the code that thinks 
that aio_co_wake() will only schedule the coroutine).


I'll dig into it a bit more.




Aha, that's because qemu_mutex_iothread_locked() from 
stubs/iothread-lock.c is used, which always returns true.


Ok, you can change it to always return false with this patch.  Which is 
nicer, as it means we have less special casing going on in the tools and 
it matches the fact that there are no vCPU threads.


Paolo




Re: [PATCH] async: the main AioContext is only "current" if under the BQL

2021-06-09 Thread Vladimir Sementsov-Ogievskiy

09.06.2021 14:32, Vladimir Sementsov-Ogievskiy wrote:

09.06.2021 13:53, Paolo Bonzini wrote:

If we want to wake up a coroutine from a worker thread, aio_co_wake()
currently does not work.  In that scenario, aio_co_wake() calls
aio_co_enter(), but there is no current AioContext and therefore
qemu_get_current_aio_context() returns the main thread.  aio_co_wake()
then attempts to call aio_context_acquire() instead of going through
aio_co_schedule().

The default case of qemu_get_current_aio_context() was added to cover
synchronous I/O started from the vCPU thread, but the main and vCPU
threads are quite different.  The main thread is an I/O thread itself,
only running a more complicated event loop; the vCPU thread instead
is essentially a worker thread that occasionally calls
qemu_mutex_lock_iothread().  It is only in those critical sections
that it acts as if it were the home thread of the main AioContext.

Therefore, this patch detaches qemu_get_current_aio_context() from
iothreads, which is a useless complication.  The AioContext pointer
is stored directly in the thread-local variable, including for the
main loop.  Worker threads (including vCPU threads) optionally behave
as temporary home threads if they have taken the big QEMU lock,
but if that is not the case they will always schedule coroutines
on remote threads via aio_co_schedule().

Reported-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Paolo Bonzini 
---
  include/block/aio.h   |  5 -
  iothread.c    |  9 +
  stubs/iothread.c  |  8 
  stubs/meson.build |  1 -
  tests/unit/iothread.c |  9 +
  util/async.c  | 20 
  util/main-loop.c  |  1 +
  7 files changed, 27 insertions(+), 26 deletions(-)
  delete mode 100644 stubs/iothread.c

diff --git a/include/block/aio.h b/include/block/aio.h
index 5f342267d5..10fcae1515 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -691,10 +691,13 @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co);
   * Return the AioContext whose event loop runs in the current thread.
   *
   * If called from an IOThread this will be the IOThread's AioContext.  If
- * called from another thread it will be the main loop AioContext.
+ * called from the main thread or with the "big QEMU lock" taken it
+ * will be the main loop AioContext.
   */
  AioContext *qemu_get_current_aio_context(void);
+void qemu_set_current_aio_context(AioContext *ctx);
+
  /**
   * aio_context_setup:
   * @ctx: the aio context
diff --git a/iothread.c b/iothread.c
index 7f086387be..2c5ccd7367 100644
--- a/iothread.c
+++ b/iothread.c
@@ -39,13 +39,6 @@ DECLARE_CLASS_CHECKERS(IOThreadClass, IOTHREAD,
  #define IOTHREAD_POLL_MAX_NS_DEFAULT 0ULL
  #endif
-static __thread IOThread *my_iothread;
-
-AioContext *qemu_get_current_aio_context(void)
-{
-    return my_iothread ? my_iothread->ctx : qemu_get_aio_context();
-}
-
  static void *iothread_run(void *opaque)
  {
  IOThread *iothread = opaque;
@@ -56,7 +49,7 @@ static void *iothread_run(void *opaque)
   * in this new thread uses glib.
   */
  g_main_context_push_thread_default(iothread->worker_context);
-    my_iothread = iothread;
+    qemu_set_current_aio_context(iothread->ctx);
  iothread->thread_id = qemu_get_thread_id();
  qemu_sem_post(>init_done_sem);
diff --git a/stubs/iothread.c b/stubs/iothread.c
deleted file mode 100644
index 8cc9e28c55..00
--- a/stubs/iothread.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#include "qemu/osdep.h"
-#include "block/aio.h"
-#include "qemu/main-loop.h"
-
-AioContext *qemu_get_current_aio_context(void)
-{
-    return qemu_get_aio_context();
-}
diff --git a/stubs/meson.build b/stubs/meson.build
index 65c22c0568..4993797f05 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -16,7 +16,6 @@ stub_ss.add(files('fw_cfg.c'))
  stub_ss.add(files('gdbstub.c'))
  stub_ss.add(files('get-vm-name.c'))
  stub_ss.add(when: 'CONFIG_LINUX_IO_URING', if_true: files('io_uring.c'))
-stub_ss.add(files('iothread.c'))
  stub_ss.add(files('iothread-lock.c'))
  stub_ss.add(files('isa-bus.c'))
  stub_ss.add(files('is-daemonized.c'))
diff --git a/tests/unit/iothread.c b/tests/unit/iothread.c
index afde12b4ef..f9b0791084 100644
--- a/tests/unit/iothread.c
+++ b/tests/unit/iothread.c
@@ -30,13 +30,6 @@ struct IOThread {
  bool stopping;
  };
-static __thread IOThread *my_iothread;
-
-AioContext *qemu_get_current_aio_context(void)
-{
-    return my_iothread ? my_iothread->ctx : qemu_get_aio_context();
-}
-
  static void iothread_init_gcontext(IOThread *iothread)
  {
  GSource *source;
@@ -54,9 +47,9 @@ static void *iothread_run(void *opaque)
  rcu_register_thread();
-    my_iothread = iothread;
  qemu_mutex_lock(>init_done_lock);
  iothread->ctx = aio_context_new(_abort);
+    qemu_set_current_aio_context(iothread->ctx);
  /*
   * We must connect the ctx to a GMainContext, because in older versions
diff --git a/util/async.c b/util/async.c
index 

Re: [PATCH] async: the main AioContext is only "current" if under the BQL

2021-06-09 Thread Vladimir Sementsov-Ogievskiy

09.06.2021 13:53, Paolo Bonzini wrote:

If we want to wake up a coroutine from a worker thread, aio_co_wake()
currently does not work.  In that scenario, aio_co_wake() calls
aio_co_enter(), but there is no current AioContext and therefore
qemu_get_current_aio_context() returns the main thread.  aio_co_wake()
then attempts to call aio_context_acquire() instead of going through
aio_co_schedule().

The default case of qemu_get_current_aio_context() was added to cover
synchronous I/O started from the vCPU thread, but the main and vCPU
threads are quite different.  The main thread is an I/O thread itself,
only running a more complicated event loop; the vCPU thread instead
is essentially a worker thread that occasionally calls
qemu_mutex_lock_iothread().  It is only in those critical sections
that it acts as if it were the home thread of the main AioContext.

Therefore, this patch detaches qemu_get_current_aio_context() from
iothreads, which is a useless complication.  The AioContext pointer
is stored directly in the thread-local variable, including for the
main loop.  Worker threads (including vCPU threads) optionally behave
as temporary home threads if they have taken the big QEMU lock,
but if that is not the case they will always schedule coroutines
on remote threads via aio_co_schedule().

Reported-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Paolo Bonzini 
---
  include/block/aio.h   |  5 -
  iothread.c|  9 +
  stubs/iothread.c  |  8 
  stubs/meson.build |  1 -
  tests/unit/iothread.c |  9 +
  util/async.c  | 20 
  util/main-loop.c  |  1 +
  7 files changed, 27 insertions(+), 26 deletions(-)
  delete mode 100644 stubs/iothread.c

diff --git a/include/block/aio.h b/include/block/aio.h
index 5f342267d5..10fcae1515 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -691,10 +691,13 @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co);
   * Return the AioContext whose event loop runs in the current thread.
   *
   * If called from an IOThread this will be the IOThread's AioContext.  If
- * called from another thread it will be the main loop AioContext.
+ * called from the main thread or with the "big QEMU lock" taken it
+ * will be the main loop AioContext.
   */
  AioContext *qemu_get_current_aio_context(void);
  
+void qemu_set_current_aio_context(AioContext *ctx);

+
  /**
   * aio_context_setup:
   * @ctx: the aio context
diff --git a/iothread.c b/iothread.c
index 7f086387be..2c5ccd7367 100644
--- a/iothread.c
+++ b/iothread.c
@@ -39,13 +39,6 @@ DECLARE_CLASS_CHECKERS(IOThreadClass, IOTHREAD,
  #define IOTHREAD_POLL_MAX_NS_DEFAULT 0ULL
  #endif
  
-static __thread IOThread *my_iothread;

-
-AioContext *qemu_get_current_aio_context(void)
-{
-return my_iothread ? my_iothread->ctx : qemu_get_aio_context();
-}
-
  static void *iothread_run(void *opaque)
  {
  IOThread *iothread = opaque;
@@ -56,7 +49,7 @@ static void *iothread_run(void *opaque)
   * in this new thread uses glib.
   */
  g_main_context_push_thread_default(iothread->worker_context);
-my_iothread = iothread;
+qemu_set_current_aio_context(iothread->ctx);
  iothread->thread_id = qemu_get_thread_id();
  qemu_sem_post(>init_done_sem);
  
diff --git a/stubs/iothread.c b/stubs/iothread.c

deleted file mode 100644
index 8cc9e28c55..00
--- a/stubs/iothread.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#include "qemu/osdep.h"
-#include "block/aio.h"
-#include "qemu/main-loop.h"
-
-AioContext *qemu_get_current_aio_context(void)
-{
-return qemu_get_aio_context();
-}
diff --git a/stubs/meson.build b/stubs/meson.build
index 65c22c0568..4993797f05 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -16,7 +16,6 @@ stub_ss.add(files('fw_cfg.c'))
  stub_ss.add(files('gdbstub.c'))
  stub_ss.add(files('get-vm-name.c'))
  stub_ss.add(when: 'CONFIG_LINUX_IO_URING', if_true: files('io_uring.c'))
-stub_ss.add(files('iothread.c'))
  stub_ss.add(files('iothread-lock.c'))
  stub_ss.add(files('isa-bus.c'))
  stub_ss.add(files('is-daemonized.c'))
diff --git a/tests/unit/iothread.c b/tests/unit/iothread.c
index afde12b4ef..f9b0791084 100644
--- a/tests/unit/iothread.c
+++ b/tests/unit/iothread.c
@@ -30,13 +30,6 @@ struct IOThread {
  bool stopping;
  };
  
-static __thread IOThread *my_iothread;

-
-AioContext *qemu_get_current_aio_context(void)
-{
-return my_iothread ? my_iothread->ctx : qemu_get_aio_context();
-}
-
  static void iothread_init_gcontext(IOThread *iothread)
  {
  GSource *source;
@@ -54,9 +47,9 @@ static void *iothread_run(void *opaque)
  
  rcu_register_thread();
  
-my_iothread = iothread;

  qemu_mutex_lock(>init_done_lock);
  iothread->ctx = aio_context_new(_abort);
+qemu_set_current_aio_context(iothread->ctx);
  
  /*

   * We must connect the ctx to a GMainContext, because in older versions
diff --git a/util/async.c b/util/async.c
index 674dbefb7c..5d9b7cc1eb 100644
--- 

[PATCH] async: the main AioContext is only "current" if under the BQL

2021-06-09 Thread Paolo Bonzini
If we want to wake up a coroutine from a worker thread, aio_co_wake()
currently does not work.  In that scenario, aio_co_wake() calls
aio_co_enter(), but there is no current AioContext and therefore
qemu_get_current_aio_context() returns the main thread.  aio_co_wake()
then attempts to call aio_context_acquire() instead of going through
aio_co_schedule().

The default case of qemu_get_current_aio_context() was added to cover
synchronous I/O started from the vCPU thread, but the main and vCPU
threads are quite different.  The main thread is an I/O thread itself,
only running a more complicated event loop; the vCPU thread instead
is essentially a worker thread that occasionally calls
qemu_mutex_lock_iothread().  It is only in those critical sections
that it acts as if it were the home thread of the main AioContext.

Therefore, this patch detaches qemu_get_current_aio_context() from
iothreads, which is a useless complication.  The AioContext pointer
is stored directly in the thread-local variable, including for the
main loop.  Worker threads (including vCPU threads) optionally behave
as temporary home threads if they have taken the big QEMU lock,
but if that is not the case they will always schedule coroutines
on remote threads via aio_co_schedule().

Reported-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Paolo Bonzini 
---
 include/block/aio.h   |  5 -
 iothread.c|  9 +
 stubs/iothread.c  |  8 
 stubs/meson.build |  1 -
 tests/unit/iothread.c |  9 +
 util/async.c  | 20 
 util/main-loop.c  |  1 +
 7 files changed, 27 insertions(+), 26 deletions(-)
 delete mode 100644 stubs/iothread.c

diff --git a/include/block/aio.h b/include/block/aio.h
index 5f342267d5..10fcae1515 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -691,10 +691,13 @@ void aio_co_enter(AioContext *ctx, struct Coroutine *co);
  * Return the AioContext whose event loop runs in the current thread.
  *
  * If called from an IOThread this will be the IOThread's AioContext.  If
- * called from another thread it will be the main loop AioContext.
+ * called from the main thread or with the "big QEMU lock" taken it
+ * will be the main loop AioContext.
  */
 AioContext *qemu_get_current_aio_context(void);
 
+void qemu_set_current_aio_context(AioContext *ctx);
+
 /**
  * aio_context_setup:
  * @ctx: the aio context
diff --git a/iothread.c b/iothread.c
index 7f086387be..2c5ccd7367 100644
--- a/iothread.c
+++ b/iothread.c
@@ -39,13 +39,6 @@ DECLARE_CLASS_CHECKERS(IOThreadClass, IOTHREAD,
 #define IOTHREAD_POLL_MAX_NS_DEFAULT 0ULL
 #endif
 
-static __thread IOThread *my_iothread;
-
-AioContext *qemu_get_current_aio_context(void)
-{
-return my_iothread ? my_iothread->ctx : qemu_get_aio_context();
-}
-
 static void *iothread_run(void *opaque)
 {
 IOThread *iothread = opaque;
@@ -56,7 +49,7 @@ static void *iothread_run(void *opaque)
  * in this new thread uses glib.
  */
 g_main_context_push_thread_default(iothread->worker_context);
-my_iothread = iothread;
+qemu_set_current_aio_context(iothread->ctx);
 iothread->thread_id = qemu_get_thread_id();
 qemu_sem_post(>init_done_sem);
 
diff --git a/stubs/iothread.c b/stubs/iothread.c
deleted file mode 100644
index 8cc9e28c55..00
--- a/stubs/iothread.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#include "qemu/osdep.h"
-#include "block/aio.h"
-#include "qemu/main-loop.h"
-
-AioContext *qemu_get_current_aio_context(void)
-{
-return qemu_get_aio_context();
-}
diff --git a/stubs/meson.build b/stubs/meson.build
index 65c22c0568..4993797f05 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -16,7 +16,6 @@ stub_ss.add(files('fw_cfg.c'))
 stub_ss.add(files('gdbstub.c'))
 stub_ss.add(files('get-vm-name.c'))
 stub_ss.add(when: 'CONFIG_LINUX_IO_URING', if_true: files('io_uring.c'))
-stub_ss.add(files('iothread.c'))
 stub_ss.add(files('iothread-lock.c'))
 stub_ss.add(files('isa-bus.c'))
 stub_ss.add(files('is-daemonized.c'))
diff --git a/tests/unit/iothread.c b/tests/unit/iothread.c
index afde12b4ef..f9b0791084 100644
--- a/tests/unit/iothread.c
+++ b/tests/unit/iothread.c
@@ -30,13 +30,6 @@ struct IOThread {
 bool stopping;
 };
 
-static __thread IOThread *my_iothread;
-
-AioContext *qemu_get_current_aio_context(void)
-{
-return my_iothread ? my_iothread->ctx : qemu_get_aio_context();
-}
-
 static void iothread_init_gcontext(IOThread *iothread)
 {
 GSource *source;
@@ -54,9 +47,9 @@ static void *iothread_run(void *opaque)
 
 rcu_register_thread();
 
-my_iothread = iothread;
 qemu_mutex_lock(>init_done_lock);
 iothread->ctx = aio_context_new(_abort);
+qemu_set_current_aio_context(iothread->ctx);
 
 /*
  * We must connect the ctx to a GMainContext, because in older versions
diff --git a/util/async.c b/util/async.c
index 674dbefb7c..5d9b7cc1eb 100644
--- a/util/async.c
+++ b/util/async.c
@@ -649,3 +649,23 @@ void aio_context_release(AioContext *ctx)
 {