On 7/11/24 11:51, Sergey Dyasli wrote:
After 038b4217884c ("Revert "chardev: use a child source for qio input
source"") we've been observing the "iwp->src == NULL" assertion
triggering periodically during the initial capabilities querying by
libvirtd. One of possible backtraces:

Hi Sergey,

thanks for the analysis!

I noticed however that this comment is really old; it was added from commit 2b316774f60 ("qemu-char: do not operate on sources from finalize callbacks", 2013-04-22):

    /* Due to a glib bug, removing the last reference to a source
     * inside a finalize callback causes recursive locking (and a
     * deadlock).  This is not a problem inside other callbacks,
     * including dispatch callbacks, so we call io_remove_watch_poll
     * to remove this source.  At this point, iwp->src must
     * be NULL, or we would leak it.
     *
     * This would be solved much more elegantly by child sources,
     * but we support older glib versions that do not have them.
     */

and the original mailing list message points to a problem on RHEL6 and Wheezy, which were both relatively old in 2013. And in fact the issue with finalize had been fixed in glib in 2010:

    commit b358202856682e5cdefb0b4b8aaed3a45d9a85fa
    Author: Dan Winship <d...@gnome.org>
    Date:   Sat Nov 6 09:35:25 2010 -0400

    gmain: move finalization of GSource outside of context lock

    This avoids ugly deadlock situations such as in
    https://bugzilla.gnome.org/show_bug.cgi?id=586432

    https://bugzilla.gnome.org/show_bug.cgi?id=626702

    https://bugzilla.gnome.org/show_bug.cgi?id=634239

diff --git a/glib/gmain.c b/glib/gmain.c
index b182c6607..301adb0a7 100644
--- a/glib/gmain.c
+++ b/glib/gmain.c
@@ -1520,7 +1520,13 @@ g_source_unref_internal (GSource      *source,
        g_source_list_remove (source, context);

       if (source->source_funcs->finalize)
-       source->source_funcs->finalize (source);
+       {
+         if (context)
+           UNLOCK_CONTEXT (context);
+         source->source_funcs->finalize (source);
+         if (context)
+           LOCK_CONTEXT (context);
+       }

       g_free (source->name);
       source->name = NULL;

So I think we should just revert commit 2b316774f60, which is not hard to do (if it works) even if the code has since moved from qemu-char.c to chardev/char-io.c.

Thanks,

Paolo

Thread 1 (Thread 0x7f16cd4f0700 (LWP 43858)):
0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
1  0x00007f16c6c21e65 in __GI_abort () at abort.c:79
2  0x00007f16c6c21d39 in __assert_fail_base  at assert.c:92
3  0x00007f16c6c46e86 in __GI___assert_fail (assertion=assertion@entry=0x562e9bcdaadd "iwp->src == NULL", 
file=file@entry=0x562e9bcdaac8 "../chardev/char-io.c", line=line@entry=99, 
function=function@entry=0x562e9bcdab10 <__PRETTY_FUNCTION__.20549> "io_watch_poll_finalize") at 
assert.c:101
4  0x0000562e9ba20c2c in io_watch_poll_finalize (source=<optimized out>) at 
../chardev/char-io.c:99
5  io_watch_poll_finalize (source=<optimized out>) at ../chardev/char-io.c:88
6  0x00007f16c904aae0 in g_source_unref_internal () from /lib64/libglib-2.0.so.0
7  0x00007f16c904baf9 in g_source_destroy_internal () from 
/lib64/libglib-2.0.so.0
8  0x0000562e9ba20db0 in io_remove_watch_poll (source=0x562e9d6720b0) at 
../chardev/char-io.c:147
9  remove_fd_in_watch (chr=chr@entry=0x562e9d5f3800) at ../chardev/char-io.c:153
10 0x0000562e9ba23ffb in update_ioc_handlers (s=0x562e9d5f3800) at 
../chardev/char-socket.c:592
11 0x0000562e9ba2072f in qemu_chr_fe_set_handlers_full at 
../chardev/char-fe.c:279
12 0x0000562e9ba207a9 in qemu_chr_fe_set_handlers at ../chardev/char-fe.c:304
13 0x0000562e9ba2ca75 in monitor_qmp_setup_handlers_bh (opaque=0x562e9d4c2c60) 
at ../monitor/qmp.c:509
14 0x0000562e9bb6222e in aio_bh_poll (ctx=ctx@entry=0x562e9d4c2f20) at 
../util/async.c:216
15 0x0000562e9bb4de0a in aio_poll (ctx=0x562e9d4c2f20, 
blocking=blocking@entry=true) at ../util/aio-posix.c:722
16 0x0000562e9b99dfaa in iothread_run (opaque=0x562e9d4c26f0) at 
../iothread.c:63
17 0x0000562e9bb505a4 in qemu_thread_start (args=0x562e9d4c7ea0) at 
../util/qemu-thread-posix.c:543
18 0x00007f16c70081ca in start_thread (arg=<optimized out>) at 
pthread_create.c:479
19 0x00007f16c6c398d3 in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:95

io_remove_watch_poll(), which makes sure that iwp->src is NULL, calls
g_source_destroy() which finds that iwp->src is not NULL in the finalize
callback. This can only happen if another thread has managed to trigger
io_watch_poll_prepare() callback in the meantime.

Introduce a mutex and a boolean variable to prevent other threads
creating a watch in io_watch_poll_prepare() in case that the IOWatchPoll
itself is about to get destroyed.

Signed-off-by: Sergey Dyasli <sergey.dya...@nutanix.com>
---
  chardev/char-io.c | 21 +++++++++++++++++++++
  1 file changed, 21 insertions(+)

diff --git a/chardev/char-io.c b/chardev/char-io.c
index dab77b112e35..b1edccf0cc85 100644
--- a/chardev/char-io.c
+++ b/chardev/char-io.c
@@ -34,6 +34,9 @@ typedef struct IOWatchPoll {
      GSourceFunc fd_read;
      void *opaque;
      GMainContext *context;
+
+    QemuMutex mut;
+    bool dead;
  } IOWatchPoll;
static IOWatchPoll *io_watch_poll_from_source(GSource *source)
@@ -62,10 +65,20 @@ static gboolean io_watch_poll_prepare(GSource *source,
       * more data.
       */
      if (now_active) {
+        qemu_mutex_lock(&iwp->mut);
+
+        /* Don't create a watch if we are about to be destroyed. */
+        if (iwp->dead) {
+            qemu_mutex_unlock(&iwp->mut);
+            return FALSE;
+        }
+
          iwp->src = qio_channel_create_watch(
              iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL);
          g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL);
          g_source_attach(iwp->src, iwp->context);
+
+        qemu_mutex_unlock(&iwp->mut);
      } else {
          g_source_destroy(iwp->src);
          g_source_unref(iwp->src);
@@ -97,6 +110,7 @@ static void io_watch_poll_finalize(GSource *source)
       */
      IOWatchPoll *iwp = io_watch_poll_from_source(source);
      assert(iwp->src == NULL);
+    qemu_mutex_destroy(&iwp->mut);
  }
static GSourceFuncs io_watch_poll_funcs = {
@@ -124,6 +138,8 @@ GSource *io_add_watch_poll(Chardev *chr,
      iwp->fd_read = (GSourceFunc) fd_read;
      iwp->src = NULL;
      iwp->context = context;
+    qemu_mutex_init(&iwp->mut);
+    iwp->dead = false;
name = g_strdup_printf("chardev-iowatch-%s", chr->label);
      g_source_set_name((GSource *)iwp, name);
@@ -139,11 +155,16 @@ static void io_remove_watch_poll(GSource *source)
      IOWatchPoll *iwp;
iwp = io_watch_poll_from_source(source);
+
+    qemu_mutex_lock(&iwp->mut);
+    iwp->dead = true;
      if (iwp->src) {
          g_source_destroy(iwp->src);
          g_source_unref(iwp->src);
          iwp->src = NULL;
      }
+    qemu_mutex_unlock(&iwp->mut);
+
      g_source_destroy(&iwp->parent);
  }


Reply via email to