[Qemu-devel] [RFC 29/29] migration: reset migrate thread vars when resumed

Peter Xu Fri, 28 Jul 2017 01:31:33 -0700

Firstly, MigThrError enumeration is introduced to describe the error in
migration_detect_error() better. This gives the migration_thread() a
chance to know whether a recovery has happened.


Then, if a recovery is detected, migration_thread() will reset its local
variables to prepare for that.

Signed-off-by: Peter Xu <pet...@redhat.com>
---
 migration/migration.c | 40 +++++++++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index ecebe30..439bc22 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2159,6 +2159,15 @@ static bool postcopy_should_start(MigrationState *s)
     return atomic_read(&s->start_postcopy) || s->start_postcopy_fast;
 }
 
+typedef enum MigThrError {
+    /* No error detected */
+    MIG_THR_ERR_NONE = 0,
+    /* Detected error, but resumed successfully */
+    MIG_THR_ERR_RECOVERED = 1,
+    /* Detected fatal error, need to exit */
+    MIG_THR_ERR_FATAL = 2,
+} MigThrError;
+
 static int postcopy_resume_handshake(MigrationState *s)
 {
     qemu_mutex_lock(&s->resume_lock);
@@ -2209,10 +2218,10 @@ static int postcopy_do_resume(MigrationState *s)
 
 /*
  * We don't return until we are in a safe state to continue current
- * postcopy migration.  Returns true to continue the migration, or
- * false to terminate current migration.
+ * postcopy migration.  Returns MIG_THR_ERR_RECOVERED if recovered, or
+ * MIG_THR_ERR_FATAL if unrecovery failure happened.
  */
-static bool postcopy_pause(MigrationState *s)
+static MigThrError postcopy_pause(MigrationState *s)
 {
     assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
 
@@ -2247,7 +2256,7 @@ do_pause:
         if (postcopy_do_resume(s) == 0) {
             /* Let's continue! */
             trace_postcopy_pause_continued();
-            return true;
+            return MIG_THR_ERR_RECOVERED;
         } else {
             /*
              * Something wrong happened during the recovery, let's
@@ -2258,12 +2267,11 @@ do_pause:
         }
     } else {
         /* This is not right... Time to quit. */
-        return false;
+        return MIG_THR_ERR_FATAL;
     }
 }
 
-/* Return true if we want to stop the migration, otherwise false. */
-static bool migration_detect_error(MigrationState *s)
+static MigThrError migration_detect_error(MigrationState *s)
 {
     int ret;
 
@@ -2272,7 +2280,7 @@ static bool migration_detect_error(MigrationState *s)
 
     if (!ret) {
         /* Everything is fine */
-        return false;
+        return MIG_THR_ERR_NONE;
     }
 
     if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) {
@@ -2281,7 +2289,7 @@ static bool migration_detect_error(MigrationState *s)
          * while. After that, it can be continued by a
          * recovery phase.
          */
-        return !postcopy_pause(s);
+        return postcopy_pause(s);
     } else {
         /*
          * For precopy (or postcopy with error outside IO), we fail
@@ -2291,7 +2299,7 @@ static bool migration_detect_error(MigrationState *s)
         trace_migration_thread_file_err();
 
         /* Time to stop the migration, now. */
-        return true;
+        return MIG_THR_ERR_FATAL;
     }
 }
 
@@ -2319,6 +2327,7 @@ static void *migration_thread(void *opaque)
     /* The active state we expect to be in; ACTIVE or POSTCOPY_ACTIVE */
     enum MigrationStatus current_active_state = MIGRATION_STATUS_ACTIVE;
     bool enable_colo = migrate_colo_enabled();
+    MigThrError thr_error;
 
     rcu_register_thread();
 
@@ -2395,8 +2404,17 @@ static void *migration_thread(void *opaque)
          * Try to detect any kind of failures, and see whether we
          * should stop the migration now.
          */
-        if (migration_detect_error(s)) {
+        thr_error = migration_detect_error(s);
+        if (thr_error == MIG_THR_ERR_FATAL) {
+            /* Stop migration */
             break;
+        } else if (thr_error == MIG_THR_ERR_RECOVERED) {
+            /*
+             * Just recovered from a e.g. network failure, reset all
+             * the local variables.
+             */
+            initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+            initial_bytes = 0;
         }
 
         current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
-- 
2.7.4

[Qemu-devel] [RFC 29/29] migration: reset migrate thread vars when resumed

Reply via email to