[Qemu-devel] [PULL 38/57] Postcopy: Postcopy startup in migration thread

2015-11-09 Thread Juan Quintela
From: "Dr. David Alan Gilbert" 

Rework the migration thread to setup and start postcopy.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Amit Shah 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h |   3 +
 migration/migration.c | 174 --
 trace-events  |   4 +
 3 files changed, 174 insertions(+), 7 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 2ad0d2b..ff13ff2 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -135,6 +135,9 @@ struct MigrationState

 /* Flag set once the migration has been asked to enter postcopy */
 bool start_postcopy;
+
+/* Flag set once the migration thread is running (and needs joining) */
+bool migration_thread_running;
 };

 void process_incoming_migration(QEMUFile *f);
diff --git a/migration/migration.c b/migration/migration.c
index de14359..e68bc43 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -745,7 +745,10 @@ static void migrate_fd_cleanup(void *opaque)
 if (s->file) {
 trace_migrate_fd_cleanup();
 qemu_mutex_unlock_iothread();
-qemu_thread_join(&s->thread);
+if (s->migration_thread_running) {
+qemu_thread_join(&s->thread);
+s->migration_thread_running = false;
+}
 qemu_mutex_lock_iothread();

 migrate_compress_threads_join();
@@ -1238,7 +1241,6 @@ out:
 return NULL;
 }

-__attribute__ (( unused )) /* Until later in patch series */
 static int open_return_path_on_source(MigrationState *ms)
 {

@@ -1279,6 +1281,109 @@ static int 
await_return_path_close_on_source(MigrationState *ms)
 return ms->rp_state.error;
 }

+/*
+ * Switch from normal iteration to postcopy
+ * Returns non-0 on error
+ */
+static int postcopy_start(MigrationState *ms, bool *old_vm_running)
+{
+int ret;
+const QEMUSizedBuffer *qsb;
+int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+migrate_set_state(ms, MIGRATION_STATUS_ACTIVE,
+  MIGRATION_STATUS_POSTCOPY_ACTIVE);
+
+trace_postcopy_start();
+qemu_mutex_lock_iothread();
+trace_postcopy_start_set_run();
+
+qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
+*old_vm_running = runstate_is_running();
+global_state_store();
+ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+
+if (ret < 0) {
+goto fail;
+}
+
+/*
+ * in Finish migrate and with the io-lock held everything should
+ * be quiet, but we've potentially still got dirty pages and we
+ * need to tell the destination to throw any pages it's already received
+ * that are dirty
+ */
+if (ram_postcopy_send_discard_bitmap(ms)) {
+error_report("postcopy send discard bitmap failed");
+goto fail;
+}
+
+/*
+ * send rest of state - note things that are doing postcopy
+ * will notice we're in POSTCOPY_ACTIVE and not actually
+ * wrap their state up here
+ */
+qemu_file_set_rate_limit(ms->file, INT64_MAX);
+/* Ping just for debugging, helps line traces up */
+qemu_savevm_send_ping(ms->file, 2);
+
+/*
+ * While loading the device state we may trigger page transfer
+ * requests and the fd must be free to process those, and thus
+ * the destination must read the whole device state off the fd before
+ * it starts processing it.  Unfortunately the ad-hoc migration format
+ * doesn't allow the destination to know the size to read without fully
+ * parsing it through each devices load-state code (especially the open
+ * coded devices that use get/put).
+ * So we wrap the device state up in a package with a length at the start;
+ * to do this we use a qemu_buf to hold the whole of the device state.
+ */
+QEMUFile *fb = qemu_bufopen("w", NULL);
+if (!fb) {
+error_report("Failed to create buffered file");
+goto fail;
+}
+
+qemu_savevm_state_complete_precopy(fb);
+qemu_savevm_send_ping(fb, 3);
+
+qemu_savevm_send_postcopy_run(fb);
+
+/* <><> end of stuff going into the package */
+qsb = qemu_buf_get(fb);
+
+/* Now send that blob */
+if (qemu_savevm_send_packaged(ms->file, qsb)) {
+goto fail_closefb;
+}
+qemu_fclose(fb);
+ms->downtime =  qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
+
+qemu_mutex_unlock_iothread();
+
+/*
+ * Although this ping is just for debug, it could potentially be
+ * used for getting a better measurement of downtime at the source.
+ */
+qemu_savevm_send_ping(ms->file, 4);
+
+ret = qemu_file_get_error(ms->file);
+if (ret) {
+error_report("postcopy_start: Migration stream errored");
+migrate_set_state(ms, MIGRATION_STATUS_POSTCOPY_ACTIVE,
+  MIGRATION_STATUS_FAILED);
+}
+
+return ret;
+
+fail_closefb:
+qem

[Qemu-devel] [PULL 38/57] Postcopy: Postcopy startup in migration thread

2015-11-10 Thread Juan Quintela
From: "Dr. David Alan Gilbert" 

Rework the migration thread to setup and start postcopy.

Signed-off-by: Dr. David Alan Gilbert 
Reviewed-by: Amit Shah 
Reviewed-by: Juan Quintela 
Signed-off-by: Juan Quintela 
---
 include/migration/migration.h |   3 +
 migration/migration.c | 174 --
 trace-events  |   4 +
 3 files changed, 174 insertions(+), 7 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 2ad0d2b..ff13ff2 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -135,6 +135,9 @@ struct MigrationState

 /* Flag set once the migration has been asked to enter postcopy */
 bool start_postcopy;
+
+/* Flag set once the migration thread is running (and needs joining) */
+bool migration_thread_running;
 };

 void process_incoming_migration(QEMUFile *f);
diff --git a/migration/migration.c b/migration/migration.c
index afc863a..064986b 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -745,7 +745,10 @@ static void migrate_fd_cleanup(void *opaque)
 if (s->file) {
 trace_migrate_fd_cleanup();
 qemu_mutex_unlock_iothread();
-qemu_thread_join(&s->thread);
+if (s->migration_thread_running) {
+qemu_thread_join(&s->thread);
+s->migration_thread_running = false;
+}
 qemu_mutex_lock_iothread();

 migrate_compress_threads_join();
@@ -1238,7 +1241,6 @@ out:
 return NULL;
 }

-__attribute__ (( unused )) /* Until later in patch series */
 static int open_return_path_on_source(MigrationState *ms)
 {

@@ -1279,6 +1281,109 @@ static int 
await_return_path_close_on_source(MigrationState *ms)
 return ms->rp_state.error;
 }

+/*
+ * Switch from normal iteration to postcopy
+ * Returns non-0 on error
+ */
+static int postcopy_start(MigrationState *ms, bool *old_vm_running)
+{
+int ret;
+const QEMUSizedBuffer *qsb;
+int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+migrate_set_state(ms, MIGRATION_STATUS_ACTIVE,
+  MIGRATION_STATUS_POSTCOPY_ACTIVE);
+
+trace_postcopy_start();
+qemu_mutex_lock_iothread();
+trace_postcopy_start_set_run();
+
+qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
+*old_vm_running = runstate_is_running();
+global_state_store();
+ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+
+if (ret < 0) {
+goto fail;
+}
+
+/*
+ * in Finish migrate and with the io-lock held everything should
+ * be quiet, but we've potentially still got dirty pages and we
+ * need to tell the destination to throw any pages it's already received
+ * that are dirty
+ */
+if (ram_postcopy_send_discard_bitmap(ms)) {
+error_report("postcopy send discard bitmap failed");
+goto fail;
+}
+
+/*
+ * send rest of state - note things that are doing postcopy
+ * will notice we're in POSTCOPY_ACTIVE and not actually
+ * wrap their state up here
+ */
+qemu_file_set_rate_limit(ms->file, INT64_MAX);
+/* Ping just for debugging, helps line traces up */
+qemu_savevm_send_ping(ms->file, 2);
+
+/*
+ * While loading the device state we may trigger page transfer
+ * requests and the fd must be free to process those, and thus
+ * the destination must read the whole device state off the fd before
+ * it starts processing it.  Unfortunately the ad-hoc migration format
+ * doesn't allow the destination to know the size to read without fully
+ * parsing it through each devices load-state code (especially the open
+ * coded devices that use get/put).
+ * So we wrap the device state up in a package with a length at the start;
+ * to do this we use a qemu_buf to hold the whole of the device state.
+ */
+QEMUFile *fb = qemu_bufopen("w", NULL);
+if (!fb) {
+error_report("Failed to create buffered file");
+goto fail;
+}
+
+qemu_savevm_state_complete_precopy(fb);
+qemu_savevm_send_ping(fb, 3);
+
+qemu_savevm_send_postcopy_run(fb);
+
+/* <><> end of stuff going into the package */
+qsb = qemu_buf_get(fb);
+
+/* Now send that blob */
+if (qemu_savevm_send_packaged(ms->file, qsb)) {
+goto fail_closefb;
+}
+qemu_fclose(fb);
+ms->downtime =  qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
+
+qemu_mutex_unlock_iothread();
+
+/*
+ * Although this ping is just for debug, it could potentially be
+ * used for getting a better measurement of downtime at the source.
+ */
+qemu_savevm_send_ping(ms->file, 4);
+
+ret = qemu_file_get_error(ms->file);
+if (ret) {
+error_report("postcopy_start: Migration stream errored");
+migrate_set_state(ms, MIGRATION_STATUS_POSTCOPY_ACTIVE,
+  MIGRATION_STATUS_FAILED);
+}
+
+return ret;
+
+fail_closefb:
+qem