[Qemu-devel] [PULL 51/57] Start up a postcopy/listener thread ready for incoming page data

2015-11-09 Thread Juan Quintela
From: "Dr. David Alan Gilbert" 

The loading of a device state (during postcopy) may access guest
memory that's still on the source machine and thus might need
a page fill; split off a separate thread that handles the incoming
page data so that the original incoming migration code can finish
off the device data.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h |  4 +++
 migration/migration.c |  6 
 migration/savevm.c| 79 ++-
 trace-events  |  2 ++
 4 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 329d535..fd018b7 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -93,6 +93,10 @@ struct MigrationIncomingState {
 QemuThread fault_thread;
 QemuSemaphore  fault_thread_sem;

+bool   have_listen_thread;
+QemuThread listen_thread;
+QemuSemaphore  listen_thread_sem;
+
 /* For the kernel to send us notifications */
 int   userfault_fd;
 /* To tell the fault_thread to quit */
diff --git a/migration/migration.c b/migration/migration.c
index a12ba15..180103a 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1441,6 +1441,12 @@ static int postcopy_start(MigrationState *ms, bool 
*old_vm_running)
 goto fail;
 }

+/*
+ * Make sure the receiver can get incoming pages before we send the rest
+ * of the state
+ */
+qemu_savevm_send_postcopy_listen(fb);
+
 qemu_savevm_state_complete_precopy(fb);
 qemu_savevm_send_ping(fb, 3);

diff --git a/migration/savevm.c b/migration/savevm.c
index 9794945..51c3fb1 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1380,6 +1380,65 @@ static int 
loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
 return 0;
 }

+/*
+ * Triggered by a postcopy_listen command; this thread takes over reading
+ * the input stream, leaving the main thread free to carry on loading the rest
+ * of the device state (from RAM).
+ * (TODO:This could do with being in a postcopy file - but there again it's
+ * just another input loop, not that postcopy specific)
+ */
+static void *postcopy_ram_listen_thread(void *opaque)
+{
+QEMUFile *f = opaque;
+MigrationIncomingState *mis = migration_incoming_get_current();
+int load_res;
+
+qemu_sem_post(&mis->listen_thread_sem);
+trace_postcopy_ram_listen_thread_start();
+
+/*
+ * Because we're a thread and not a coroutine we can't yield
+ * in qemu_file, and thus we must be blocking now.
+ */
+qemu_file_set_blocking(f, true);
+load_res = qemu_loadvm_state_main(f, mis);
+/* And non-blocking again so we don't block in any cleanup */
+qemu_file_set_blocking(f, false);
+
+trace_postcopy_ram_listen_thread_exit();
+if (load_res < 0) {
+error_report("%s: loadvm failed: %d", __func__, load_res);
+qemu_file_set_error(f, load_res);
+} else {
+/*
+ * This looks good, but it's possible that the device loading in the
+ * main thread hasn't finished yet, and so we might not be in 'RUN'
+ * state yet; wait for the end of the main thread.
+ */
+qemu_event_wait(&mis->main_thread_load_event);
+}
+postcopy_ram_incoming_cleanup(mis);
+/*
+ * If everything has worked fine, then the main thread has waited
+ * for us to start, and we're the last use of the mis.
+ * (If something broke then qemu will have to exit anyway since it's
+ * got a bad migration state).
+ */
+migration_incoming_state_destroy();
+
+if (load_res < 0) {
+/*
+ * If something went wrong then we have a bad state so exit;
+ * depending how far we got it might be possible at this point
+ * to leave the guest running and fire MCEs for pages that never
+ * arrived as a desperate recovery step.
+ */
+exit(EXIT_FAILURE);
+}
+
+return NULL;
+}
+
 /* After this message we must be able to immediately receive postcopy data */
 static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
 {
@@ -1399,7 +1458,20 @@ static int 
loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
 return -1;
 }

-/* TODO start up the postcopy listening thread */
+if (mis->have_listen_thread) {
+error_report("CMD_POSTCOPY_RAM_LISTEN already has a listen thread");
+return -1;
+}
+
+mis->have_listen_thread = true;
+/* Start up the listening thread and wait for it to signal ready */
+qemu_sem_init(&mis->listen_thread_sem, 0);
+qemu_thread_create(&mis->listen_thread, "postcopy/listen",
+   postcopy_ram_listen_thread, mis->from_src_file,
+   QEMU_THREAD_JOINABLE);
+qemu_sem_wait(&mis->listen_thread_sem);
+qemu_sem_destro

[Qemu-devel] [PULL 51/57] Start up a postcopy/listener thread ready for incoming page data

2015-11-10 Thread Juan Quintela
From: "Dr. David Alan Gilbert" 

The loading of a device state (during postcopy) may access guest
memory that's still on the source machine and thus might need
a page fill; split off a separate thread that handles the incoming
page data so that the original incoming migration code can finish
off the device data.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h |  4 +++
 migration/migration.c |  6 
 migration/savevm.c| 79 ++-
 trace-events  |  2 ++
 4 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 329d535..fd018b7 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -93,6 +93,10 @@ struct MigrationIncomingState {
 QemuThread fault_thread;
 QemuSemaphore  fault_thread_sem;

+bool   have_listen_thread;
+QemuThread listen_thread;
+QemuSemaphore  listen_thread_sem;
+
 /* For the kernel to send us notifications */
 int   userfault_fd;
 /* To tell the fault_thread to quit */
diff --git a/migration/migration.c b/migration/migration.c
index 38d64ea..db3d2dd 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1441,6 +1441,12 @@ static int postcopy_start(MigrationState *ms, bool 
*old_vm_running)
 goto fail;
 }

+/*
+ * Make sure the receiver can get incoming pages before we send the rest
+ * of the state
+ */
+qemu_savevm_send_postcopy_listen(fb);
+
 qemu_savevm_state_complete_precopy(fb);
 qemu_savevm_send_ping(fb, 3);

diff --git a/migration/savevm.c b/migration/savevm.c
index 308b7d1..6ef9e62 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1380,6 +1380,65 @@ static int 
loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
 return 0;
 }

+/*
+ * Triggered by a postcopy_listen command; this thread takes over reading
+ * the input stream, leaving the main thread free to carry on loading the rest
+ * of the device state (from RAM).
+ * (TODO:This could do with being in a postcopy file - but there again it's
+ * just another input loop, not that postcopy specific)
+ */
+static void *postcopy_ram_listen_thread(void *opaque)
+{
+QEMUFile *f = opaque;
+MigrationIncomingState *mis = migration_incoming_get_current();
+int load_res;
+
+qemu_sem_post(&mis->listen_thread_sem);
+trace_postcopy_ram_listen_thread_start();
+
+/*
+ * Because we're a thread and not a coroutine we can't yield
+ * in qemu_file, and thus we must be blocking now.
+ */
+qemu_file_set_blocking(f, true);
+load_res = qemu_loadvm_state_main(f, mis);
+/* And non-blocking again so we don't block in any cleanup */
+qemu_file_set_blocking(f, false);
+
+trace_postcopy_ram_listen_thread_exit();
+if (load_res < 0) {
+error_report("%s: loadvm failed: %d", __func__, load_res);
+qemu_file_set_error(f, load_res);
+} else {
+/*
+ * This looks good, but it's possible that the device loading in the
+ * main thread hasn't finished yet, and so we might not be in 'RUN'
+ * state yet; wait for the end of the main thread.
+ */
+qemu_event_wait(&mis->main_thread_load_event);
+}
+postcopy_ram_incoming_cleanup(mis);
+/*
+ * If everything has worked fine, then the main thread has waited
+ * for us to start, and we're the last use of the mis.
+ * (If something broke then qemu will have to exit anyway since it's
+ * got a bad migration state).
+ */
+migration_incoming_state_destroy();
+
+if (load_res < 0) {
+/*
+ * If something went wrong then we have a bad state so exit;
+ * depending how far we got it might be possible at this point
+ * to leave the guest running and fire MCEs for pages that never
+ * arrived as a desperate recovery step.
+ */
+exit(EXIT_FAILURE);
+}
+
+return NULL;
+}
+
 /* After this message we must be able to immediately receive postcopy data */
 static int loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
 {
@@ -1399,7 +1458,20 @@ static int 
loadvm_postcopy_handle_listen(MigrationIncomingState *mis)
 return -1;
 }

-/* TODO start up the postcopy listening thread */
+if (mis->have_listen_thread) {
+error_report("CMD_POSTCOPY_RAM_LISTEN already has a listen thread");
+return -1;
+}
+
+mis->have_listen_thread = true;
+/* Start up the listening thread and wait for it to signal ready */
+qemu_sem_init(&mis->listen_thread_sem, 0);
+qemu_thread_create(&mis->listen_thread, "postcopy/listen",
+   postcopy_ram_listen_thread, mis->from_src_file,
+   QEMU_THREAD_JOINABLE);
+qemu_sem_wait(&mis->listen_thread_sem);
+qemu_sem_destro