When multifd and postcopy-ram capabilities are enabled, if a
migrate-start-postcopy is attempted, the migration will finish sending the
memory pages and then crash with the following error:

qemu-system-x86_64: ../util/yank.c:107: yank_unregister_instance: Assertion
`QLIST_EMPTY(&entry->yankfns)' failed.

This happens because even though all multifd channels could
yank_register_function(), none of them could unregister it before
unregistering the MIGRATION_YANK_INSTANCE, causing the assert to fail.

Fix that by calling multifd_load_cleanup() on postcopy_ram_listen_thread()
before MIGRATION_YANK_INSTANCE is unregistered.

Fixes: b5eea99ec2 ("migration: Add yank feature")
Reported-by: Li Xiaohui <xiao...@redhat.com>
Signed-off-by: Leonardo Bras <leob...@redhat.com>
---
 migration/migration.h |  1 +
 migration/migration.c | 18 +++++++++++++-----
 migration/savevm.c    |  2 ++
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/migration/migration.h b/migration/migration.h
index cdad8aceaa..240f64efb0 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -473,6 +473,7 @@ void migration_make_urgent_request(void);
 void migration_consume_urgent_request(void);
 bool migration_rate_limit(void);
 void migration_cancel(const Error *error);
+bool migration_load_cleanup(void);
 
 void populate_vfio_info(MigrationInfo *info);
 void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page);
diff --git a/migration/migration.c b/migration/migration.c
index 739bb683f3..4f363b2a95 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -486,6 +486,17 @@ void migrate_add_address(SocketAddress *address)
                       QAPI_CLONE(SocketAddress, address));
 }
 
+bool migration_load_cleanup(void)
+{
+    Error *local_err = NULL;
+
+    if (multifd_load_cleanup(&local_err)) {
+        error_report_err(local_err);
+        return true;
+    }
+    return false;
+}
+
 static void qemu_start_incoming_migration(const char *uri, Error **errp)
 {
     const char *p = NULL;
@@ -540,8 +551,7 @@ static void process_incoming_migration_bh(void *opaque)
      */
     qemu_announce_self(&mis->announce_timer, migrate_announce_params());
 
-    if (multifd_load_cleanup(&local_err) != 0) {
-        error_report_err(local_err);
+    if (migration_load_cleanup()) {
         autostart = false;
     }
     /* If global state section was not received or we are in running
@@ -646,9 +656,7 @@ fail:
     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
                       MIGRATION_STATUS_FAILED);
     qemu_fclose(mis->from_src_file);
-    if (multifd_load_cleanup(&local_err) != 0) {
-        error_report_err(local_err);
-    }
+    migration_load_cleanup();
     exit(EXIT_FAILURE);
 }
 
diff --git a/migration/savevm.c b/migration/savevm.c
index a0cdb714f7..250caff7f4 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1889,6 +1889,8 @@ static void *postcopy_ram_listen_thread(void *opaque)
         exit(EXIT_FAILURE);
     }
 
+    migration_load_cleanup();
+
     migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
                                    MIGRATION_STATUS_COMPLETED);
     /*
-- 
2.38.1


Reply via email to