* Vladimir Sementsov-Ogievskiy (vsement...@virtuozzo.com) wrote: > First, if only bitmaps postcopy enabled (not ram postcopy) > postcopy_pause_incoming crashes on assertion assert(mis->to_src_file). > > And anyway, bitmaps postcopy is not prepared to be somehow recovered. > The original idea instead is that if bitmaps postcopy failed, we just > loss some bitmaps, which is not critical. So, on failure we just need > to remove unfinished bitmaps and guest should continue execution on > destination. > > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> > --- > migration/savevm.c | 37 ++++++++++++++++++++++++++++++++----- > 1 file changed, 32 insertions(+), 5 deletions(-) > > diff --git a/migration/savevm.c b/migration/savevm.c > index 1d4220ece8..7e9dd58ccb 100644 > --- a/migration/savevm.c > +++ b/migration/savevm.c > @@ -1812,6 +1812,9 @@ static void *postcopy_ram_listen_thread(void *opaque) > MigrationIncomingState *mis = migration_incoming_get_current(); > QEMUFile *f = mis->from_src_file; > int load_res; > + MigrationState *migr = migrate_get_current(); > + > + object_ref(OBJECT(migr)); > > migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, > MIGRATION_STATUS_POSTCOPY_ACTIVE); > @@ -1838,11 +1841,24 @@ static void *postcopy_ram_listen_thread(void *opaque) > > trace_postcopy_ram_listen_thread_exit(); > if (load_res < 0) { > - error_report("%s: loadvm failed: %d", __func__, load_res); > qemu_file_set_error(f, load_res); > - migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, > - MIGRATION_STATUS_FAILED); > - } else { > + dirty_bitmap_mig_cancel_incoming(); > + if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING && > + !migrate_postcopy_ram() && migrate_dirty_bitmaps()) > + { > + error_report("%s: loadvm failed during postcopy: %d. All state > is " > + "migrated except for dirty bitmaps. Some dirty " > + "bitmaps may be lost, and present migrated dirty " > + "bitmaps are correctly migrated and valid.", > + __func__, load_res); > + load_res = 0; /* prevent further exit() */ > + } else { > + error_report("%s: loadvm failed: %d", __func__, load_res); > + migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE, > + MIGRATION_STATUS_FAILED); > + } > + } > + if (load_res >= 0) { > /* > * This looks good, but it's possible that the device loading in the > * main thread hasn't finished yet, and so we might not be in 'RUN' > @@ -1878,6 +1894,8 @@ static void *postcopy_ram_listen_thread(void *opaque) > mis->have_listen_thread = false; > postcopy_state_set(POSTCOPY_INCOMING_END); > > + object_unref(OBJECT(migr)); > + > return NULL; > } > > @@ -2429,6 +2447,8 @@ static bool > postcopy_pause_incoming(MigrationIncomingState *mis) > { > trace_postcopy_pause_incoming(); > > + assert(migrate_postcopy_ram()); > + > /* Clear the triggered bit to allow one recovery */ > mis->postcopy_recover_triggered = false; > > @@ -2513,15 +2533,22 @@ out: > if (ret < 0) { > qemu_file_set_error(f, ret); > > + /* Cancel bitmaps incoming regardless of recovery */ > + dirty_bitmap_mig_cancel_incoming(); > + > /* > * If we are during an active postcopy, then we pause instead > * of bail out to at least keep the VM's dirty data. Note > * that POSTCOPY_INCOMING_LISTENING stage is still not enough, > * during which we're still receiving device states and we > * still haven't yet started the VM on destination. > + * > + * Only RAM postcopy supports recovery. Still, if RAM postcopy is > + * enabled, canceled bitmaps postcopy will not affect RAM postcopy > + * recovering. > */ > if (postcopy_state_get() == POSTCOPY_INCOMING_RUNNING && > - postcopy_pause_incoming(mis)) { > + migrate_postcopy_ram() && postcopy_pause_incoming(mis)) { > /* Reset f to point to the newly created channel */ > f = mis->from_src_file; > goto retry; > -- > 2.21.0 > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK