[Qemu-devel] [PULL for-3.0 1/1] trace/simple: fix hang in child after fork(2)

2018-07-24 Thread Stefan Hajnoczi
The simple trace backend spawns a write-out thread which is used to
asynchronously flush the in-memory ring buffer to disk.

fork(2) does not clone all threads, only the thread that invoked
fork(2).  As a result there is no write-out thread in the child process!

This causes a hang during shutdown when atexit(3) handler installed by
the simple trace backend waits for the non-existent write-out thread.

This patch uses pthread_atfork(3) to terminate the write-out thread
before fork and restart it in both the parent and child after fork.
This solves a hang in qemu-iotests 147 due to qemu-nbd --fork usage.

Reported-by: Cornelia Huck 
Tested-by: Cornelia Huck 
Signed-off-by: Stefan Hajnoczi 
Message-id: 20180717101944.11691-1-stefa...@redhat.com
Suggested-by: Paolo Bonzini 
Signed-off-by: Stefan Hajnoczi 
---
 trace/simple.c | 80 --
 1 file changed, 71 insertions(+), 9 deletions(-)

diff --git a/trace/simple.c b/trace/simple.c
index 701dec639c..a4300b6ff1 100644
--- a/trace/simple.c
+++ b/trace/simple.c
@@ -39,9 +39,11 @@
 static GMutex trace_lock;
 static GCond trace_available_cond;
 static GCond trace_empty_cond;
+static GThread *trace_writeout_thread;
 
 static bool trace_available;
 static bool trace_writeout_enabled;
+static bool trace_writeout_running;
 
 enum {
 TRACE_BUF_LEN = 4096 * 64,
@@ -142,15 +144,34 @@ static void flush_trace_file(bool wait)
 g_mutex_unlock(&trace_lock);
 }
 
-static void wait_for_trace_records_available(void)
+/**
+ * Wait to be kicked by flush_trace_file()
+ *
+ * Returns: true if the writeout thread should continue
+ *  false if the writeout thread should terminate
+ */
+static bool wait_for_trace_records_available(void)
 {
+bool running;
+
 g_mutex_lock(&trace_lock);
-while (!(trace_available && trace_writeout_enabled)) {
+for (;;) {
+running = trace_writeout_running;
+if (!running) {
+break;
+}
+
+if (trace_available && trace_writeout_enabled) {
+break;
+}
+
 g_cond_signal(&trace_empty_cond);
 g_cond_wait(&trace_available_cond, &trace_lock);
 }
 trace_available = false;
 g_mutex_unlock(&trace_lock);
+
+return running;
 }
 
 static gpointer writeout_thread(gpointer opaque)
@@ -165,9 +186,7 @@ static gpointer writeout_thread(gpointer opaque)
 size_t unused __attribute__ ((unused));
 uint64_t type = TRACE_RECORD_TYPE_EVENT;
 
-for (;;) {
-wait_for_trace_records_available();
-
+while (wait_for_trace_records_available()) {
 if (g_atomic_int_get(&dropped_events)) {
 dropped.rec.event = DROPPED_EVENT_ID,
 dropped.rec.timestamp_ns = get_clock();
@@ -398,18 +417,61 @@ static GThread *trace_thread_create(GThreadFunc fn)
 return thread;
 }
 
+#ifndef _WIN32
+static void stop_writeout_thread(void)
+{
+g_mutex_lock(&trace_lock);
+trace_writeout_running = false;
+g_cond_signal(&trace_available_cond);
+g_mutex_unlock(&trace_lock);
+
+g_thread_join(trace_writeout_thread);
+trace_writeout_thread = NULL;
+
+/* Hold trace_lock across fork!  Since threads aren't cloned by fork() the
+ * mutex would be held in the child process and cause a deadlock.
+ * Acquiring the mutex here prevents other threads from being in a
+ * trace_lock critical region when fork() occurs.
+ */
+g_mutex_lock(&trace_lock);
+}
+
+static void restart_writeout_thread(void)
+{
+trace_writeout_running = true;
+trace_writeout_thread = trace_thread_create(writeout_thread);
+if (!trace_writeout_thread) {
+warn_report("unable to initialize simple trace backend");
+}
+
+/* This relies on undefined behavior in the fork() child (it's fine in the
+ * fork() parent).  g_mutex_unlock() on a mutex acquired by another thread
+ * is undefined (see glib documentation).
+ */
+g_mutex_unlock(&trace_lock);
+}
+#endif /* !_WIN32 */
+
 bool st_init(void)
 {
-GThread *thread;
-
 trace_pid = getpid();
+trace_writeout_running = true;
 
-thread = trace_thread_create(writeout_thread);
-if (!thread) {
+trace_writeout_thread = trace_thread_create(writeout_thread);
+if (!trace_writeout_thread) {
 warn_report("unable to initialize simple trace backend");
 return false;
 }
 
+#ifndef _WIN32
+/* Terminate writeout thread across fork and restart it in parent and
+ * child afterwards.
+ */
+pthread_atfork(stop_writeout_thread,
+   restart_writeout_thread,
+   restart_writeout_thread);
+#endif
+
 atexit(st_flush_trace_buffer);
 return true;
 }
-- 
2.17.1




Re: [Qemu-devel] [PULL for-3.0 1/1] trace/simple: fix hang in child after fork(2)

2018-07-26 Thread Stefan Hajnoczi
On Tue, Jul 24, 2018 at 03:41:01PM +0100, Daniel P. Berrangé wrote:
> On Tue, Jul 24, 2018 at 03:35:51PM +0100, Daniel P. Berrangé wrote:
> > On Tue, Jul 24, 2018 at 03:25:04PM +0100, Stefan Hajnoczi wrote:
> > > The simple trace backend spawns a write-out thread which is used to
> > > asynchronously flush the in-memory ring buffer to disk.
> > > 
> > > fork(2) does not clone all threads, only the thread that invoked
> > > fork(2).  As a result there is no write-out thread in the child process!
> > > 
> > > This causes a hang during shutdown when atexit(3) handler installed by
> > > the simple trace backend waits for the non-existent write-out thread.
> > > 
> > > This patch uses pthread_atfork(3) to terminate the write-out thread
> > > before fork and restart it in both the parent and child after fork.
> > > This solves a hang in qemu-iotests 147 due to qemu-nbd --fork usage.
> > 
> > I'm not convinced this is safe, as it looks like it has a window in
> > which both the parent and child processes will be doing write-out to
> > the same file.
> > 
> > In particular in the main QEMU system emulators it means that any
> > time we fork() in QEMU, eg for spawning commands with migration
> > exec: URI, or TAP devuce ifup scripts, etc, we'll be starting a
> > write-out thread in the child.
> 
> I'd be more inclined to have the pthread_atfork() handle simply terminate
> the tracing process, reversing all effects of trace_init_backends(). Then
> after qemu-nbd has called fork(), it can simply call trace_init_backends()
> explicitly to start it running again. This avoids unecessarily starting
> tracing in child processes that are not requiring/expecting it.

Good point.

NACK

Stefan


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PULL for-3.0 1/1] trace/simple: fix hang in child after fork(2)

2018-07-24 Thread Daniel P . Berrangé
On Tue, Jul 24, 2018 at 03:25:04PM +0100, Stefan Hajnoczi wrote:
> The simple trace backend spawns a write-out thread which is used to
> asynchronously flush the in-memory ring buffer to disk.
> 
> fork(2) does not clone all threads, only the thread that invoked
> fork(2).  As a result there is no write-out thread in the child process!
> 
> This causes a hang during shutdown when atexit(3) handler installed by
> the simple trace backend waits for the non-existent write-out thread.
> 
> This patch uses pthread_atfork(3) to terminate the write-out thread
> before fork and restart it in both the parent and child after fork.
> This solves a hang in qemu-iotests 147 due to qemu-nbd --fork usage.

I'm not convinced this is safe, as it looks like it has a window in
which both the parent and child processes will be doing write-out to
the same file.

In particular in the main QEMU system emulators it means that any
time we fork() in QEMU, eg for spawning commands with migration
exec: URI, or TAP devuce ifup scripts, etc, we'll be starting a
write-out thread in the child. 


Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|



Re: [Qemu-devel] [PULL for-3.0 1/1] trace/simple: fix hang in child after fork(2)

2018-07-24 Thread Daniel P . Berrangé
On Tue, Jul 24, 2018 at 03:35:51PM +0100, Daniel P. Berrangé wrote:
> On Tue, Jul 24, 2018 at 03:25:04PM +0100, Stefan Hajnoczi wrote:
> > The simple trace backend spawns a write-out thread which is used to
> > asynchronously flush the in-memory ring buffer to disk.
> > 
> > fork(2) does not clone all threads, only the thread that invoked
> > fork(2).  As a result there is no write-out thread in the child process!
> > 
> > This causes a hang during shutdown when atexit(3) handler installed by
> > the simple trace backend waits for the non-existent write-out thread.
> > 
> > This patch uses pthread_atfork(3) to terminate the write-out thread
> > before fork and restart it in both the parent and child after fork.
> > This solves a hang in qemu-iotests 147 due to qemu-nbd --fork usage.
> 
> I'm not convinced this is safe, as it looks like it has a window in
> which both the parent and child processes will be doing write-out to
> the same file.
> 
> In particular in the main QEMU system emulators it means that any
> time we fork() in QEMU, eg for spawning commands with migration
> exec: URI, or TAP devuce ifup scripts, etc, we'll be starting a
> write-out thread in the child.

I'd be more inclined to have the pthread_atfork() handle simply terminate
the tracing process, reversing all effects of trace_init_backends(). Then
after qemu-nbd has called fork(), it can simply call trace_init_backends()
explicitly to start it running again. This avoids unecessarily starting
tracing in child processes that are not requiring/expecting it.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|



Re: [Qemu-devel] [PULL for-3.0 1/1] trace/simple: fix hang in child after fork(2)

2018-07-24 Thread Eric Blake

On 07/24/2018 09:25 AM, Stefan Hajnoczi wrote:

The simple trace backend spawns a write-out thread which is used to
asynchronously flush the in-memory ring buffer to disk.

fork(2) does not clone all threads, only the thread that invoked
fork(2).  As a result there is no write-out thread in the child process!

This causes a hang during shutdown when atexit(3) handler installed by
the simple trace backend waits for the non-existent write-out thread.

This patch uses pthread_atfork(3) to terminate the write-out thread
before fork and restart it in both the parent and child after fork.
This solves a hang in qemu-iotests 147 due to qemu-nbd --fork usage.

Reported-by: Cornelia Huck 
Tested-by: Cornelia Huck 
Signed-off-by: Stefan Hajnoczi 
Message-id: 20180717101944.11691-1-stefa...@redhat.com
Suggested-by: Paolo Bonzini 
Signed-off-by: Stefan Hajnoczi 
---



+static void restart_writeout_thread(void)
+{
+trace_writeout_running = true;
+trace_writeout_thread = trace_thread_create(writeout_thread);
+if (!trace_writeout_thread) {
+warn_report("unable to initialize simple trace backend");
+}
+
+/* This relies on undefined behavior in the fork() child (it's fine in the
+ * fork() parent).  g_mutex_unlock() on a mutex acquired by another thread
+ * is undefined (see glib documentation).
+ */
+g_mutex_unlock(&trace_lock);


Dan's point about stopping tracing prior to fork, then restarting it 
from scratch in both the parent and in specific children, would also get 
rid of this risky non-portable behavior of trying to manipulate a mutex 
acquired by the parent process' thread.


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org