Migration causes a number of events that need to go in the replay trace, such as vm state transitions. The replay_mutex lock needs to be held for these.
The simplest approach seems to be just take it up-front when taking the bql. Signed-off-by: Nicholas Piggin <npig...@gmail.com> --- migration/migration.h | 2 -- migration/migration.c | 11 ++++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/migration/migration.h b/migration/migration.h index f2c8b8f286..0621479a4e 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -543,6 +543,4 @@ int migration_rp_wait(MigrationState *s); */ void migration_rp_kick(MigrationState *s); -int migration_stop_vm(RunState state); - #endif diff --git a/migration/migration.c b/migration/migration.c index 2e794db75c..80a5ce17d1 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -24,6 +24,7 @@ #include "socket.h" #include "sysemu/runstate.h" #include "sysemu/sysemu.h" +#include "sysemu/replay.h" #include "sysemu/cpu-throttle.h" #include "rdma.h" #include "ram.h" @@ -162,7 +163,7 @@ static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp) return (a > b) - (a < b); } -int migration_stop_vm(RunState state) +static int migration_stop_vm(RunState state) { int ret = vm_stop_force_state(state); @@ -2433,6 +2434,7 @@ static int postcopy_start(MigrationState *ms, Error **errp) } trace_postcopy_start(); + replay_mutex_lock(); bql_lock(); trace_postcopy_start_set_run(); @@ -2542,6 +2544,7 @@ static int postcopy_start(MigrationState *ms, Error **errp) migration_downtime_end(ms); bql_unlock(); + replay_mutex_unlock(); if (migrate_postcopy_ram()) { /* @@ -2583,6 +2586,7 @@ fail: } } bql_unlock(); + replay_mutex_unlock(); return -1; } @@ -2634,6 +2638,7 @@ static int migration_completion_precopy(MigrationState *s, { int ret; + replay_mutex_lock(); bql_lock(); migration_downtime_start(s); @@ -2662,6 +2667,7 @@ static int migration_completion_precopy(MigrationState *s, s->block_inactive); out_unlock: bql_unlock(); + replay_mutex_unlock(); return ret; } @@ -3485,6 +3491,7 @@ static void *bg_migration_thread(void *opaque) trace_migration_thread_setup_complete(); migration_downtime_start(s); + replay_mutex_lock(); bql_lock(); s->vm_old_state = runstate_get(); @@ -3522,6 +3529,7 @@ static void *bg_migration_thread(void *opaque) */ migration_bh_schedule(bg_migration_vm_start_bh, s); bql_unlock(); + replay_mutex_unlock(); while (migration_is_active(s)) { MigIterateState iter_state = bg_migration_iteration_run(s); @@ -3551,6 +3559,7 @@ fail: migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED); bql_unlock(); + replay_mutex_unlock(); } bg_migration_iteration_finish(s); -- 2.42.0