For CPR-style migration, we need the VM to stop earlier on, before
cpr_state_save() fires.  That is to prevent the race between source I/O
and target device init, as well as make sure VHOST_RESET_OWNER and
VHOST_SET_OWNER are fired in the right order (cpr-transfer case).  Thus,
for CPR case migration, let's move the stop to qmp_migrate(), directly
before launching cpr_state_save().

This patch is a rework of the change originally proposed by Steve and
Ben at [0].

[0] 
https://lore.kernel.org/qemu-devel/[email protected]

Originally-by: Steve Sistare <[email protected]>
Originally-by: Ben Chaney <[email protected]>
Signed-off-by: Andrey Drobyshev <[email protected]>
---
 migration/migration.c | 73 +++++++++++++++++++++++++++++++++----------
 1 file changed, 56 insertions(+), 17 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index 3c70467d314..c02f1da8b0e 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1380,10 +1380,13 @@ static void migration_cleanup(MigrationState *s)
 
     /*
      * FAILED notification should have already happened.  Notify DONE if
-     * migration completed successfully.
+     * migration completed successfully.  On a failed/canceled path,
+     * resume VM for the CPR case.
      */
     if (!migration_has_failed(s)) {
         migration_call_notifiers(MIG_EVENT_DONE, NULL);
+    } else if (migrate_mode_is_cpr()) {
+        vm_resume(s->vm_old_state);
     }
 
     yank_unregister_instance(MIGRATION_YANK_INSTANCE);
@@ -2130,6 +2133,45 @@ void qmp_migrate(const char *uri, bool has_channels,
      */
     Error *local_err = NULL;
 
+    /*
+     * CPR-transfer  ordering:
+     *
+     *   SOURCE                              TARGET
+     *   ------                              ------
+     *                                       cpr_state_load() blocks
+     *   |                                        |
+     *   |  1. migration_stop_vm()                |
+     *   |     VM stopped, devices quiesced       |
+     *   |                                        | Waiting for
+     *   |  2. notifiers (SETUP)                  | FDs from source
+     *   |     vhost_reset_owner() releases       |
+     *   |     device ownership                   |
+     *   |                                        |
+     *   |  3. cpr_state_save() ---- FDs -------> |
+     *   |                                        |
+     *   v                                        v
+     *   postmigrate                         Device init begins
+     *                                       - cpr_find_fd()
+     *                                       - vhost_dev_init()
+     *                                       - VHOST_SET_OWNER
+     *
+     * Step 3 is the synchronization/cut-over point. Target proceeds 
immediately
+     * upon receiving FDs, so steps 1-2 must complete otherwise:
+     * - Target's VHOST_SET_OWNER fails with -EBUSY (source still owns)
+     * - Race between source I/O and target device init
+     *
+     *  We stop the VM early (before FD transfer) to prevent this race.
+     *  Unlike regular migration, CPR-transfer passes memory via FD (memfd)
+     *  rather than copying RAM, so early VM stop should have minimal downtime.
+     */
+    if (migrate_mode_is_cpr()) {
+        int ret = migration_stop_vm(s, RUN_STATE_FINISH_MIGRATE);
+        if (ret < 0) {
+            error_setg(&local_err, "migration_stop_vm failed, error %d", -ret);
+            goto out;
+        }
+    }
+
     /* Notify before starting migration thread and before starting CPR */
     if (migration_call_notifiers(MIG_EVENT_SETUP, &local_err)) {
         goto out;
@@ -3482,13 +3524,19 @@ static void migration_iteration_finish(MigrationState 
*s)
          */
         migration_call_notifiers(MIG_EVENT_FAILED, NULL);
 
-        if (runstate_is_live(s->vm_old_state)) {
-            if (!runstate_check(RUN_STATE_SHUTDOWN)) {
-                vm_start();
-            }
-        } else {
-            if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
-                runstate_set(s->vm_old_state);
+        /*
+         * For cpr the VM resume on failure is centralized in
+         * migration_cleanup(), so don't resume here as well.
+         */
+        if (!migrate_mode_is_cpr()) {
+            if (runstate_is_live(s->vm_old_state)) {
+                if (!runstate_check(RUN_STATE_SHUTDOWN)) {
+                    vm_start();
+                }
+            } else {
+                if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
+                    runstate_set(s->vm_old_state);
+                }
             }
         }
         break;
@@ -3903,7 +3951,6 @@ void migration_start_outgoing(MigrationState *s)
     Error *local_err = NULL;
     uint64_t rate_limit;
     bool resume = (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP);
-    int ret;
 
     if (resume) {
         /* This is a resumed migration */
@@ -3944,14 +3991,6 @@ void migration_start_outgoing(MigrationState *s)
         return;
     }
 
-    if (migrate_mode_is_cpr()) {
-        ret = migration_stop_vm(s, RUN_STATE_FINISH_MIGRATE);
-        if (ret < 0) {
-            error_setg(&local_err, "migration_stop_vm failed, error %d", -ret);
-            goto fail;
-        }
-    }
-
     /*
      * Take a refcount to make sure the migration object won't get freed by
      * the main thread already in migration_shutdown().
-- 
2.47.1


Reply via email to