* Christian Borntraeger (borntrae...@de.ibm.com) wrote: > Am 07.07.2015 um 15:08 schrieb Juan Quintela: > > This includes a new section that for now just stores the current qemu state. > > > > Right now, there are only one way to control what is the state of the > > target after migration. > > > > - If you run the target qemu with -S, it would start stopped. > > - If you run the target qemu without -S, it would run just after migration > > finishes. > > > > The problem here is what happens if we start the target without -S and > > there happens one error during migration that puts current state as > > -EIO. Migration would ends (notice that the error happend doing block > > IO, network IO, i.e. nothing related with migration), and when > > migration finish, we would just "continue" running on destination, > > probably hanging the guest/corruption data, whatever. > > > > Signed-off-by: Juan Quintela <quint...@redhat.com> > > Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> > > This is bisected to cause a regression on s390. > > A guest restarts (booting) after managedsave/start instead of continuing. > > Do you have any idea what might be wrong?
I'd add some debug to the pre_save and post_load to see what state value is being saved/restored. Also, does that regression happen when doing the save/restore using the same/latest git, or is it a load from an older version? Dave > > > --- > > include/migration/migration.h | 1 + > > migration/migration.c | 105 > > +++++++++++++++++++++++++++++++++++++++--- > > trace-events | 3 ++ > > vl.c | 1 + > > 4 files changed, 103 insertions(+), 7 deletions(-) > > > > diff --git a/include/migration/migration.h b/include/migration/migration.h > > index afba233..86df6cc 100644 > > --- a/include/migration/migration.h > > +++ b/include/migration/migration.h > > @@ -197,4 +197,5 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t > > block_offset, > > > > void ram_mig_init(void); > > void savevm_skip_section_footers(void); > > +void register_global_state(void); > > #endif > > diff --git a/migration/migration.c b/migration/migration.c > > index c6ac08a..5e436f7 100644 > > --- a/migration/migration.c > > +++ b/migration/migration.c > > @@ -26,6 +26,7 @@ > > #include "qemu/thread.h" > > #include "qmp-commands.h" > > #include "trace.h" > > +#include "qapi/util.h" > > > > #define MAX_THROTTLE (32 << 20) /* Migration speed throttling */ > > > > @@ -97,6 +98,83 @@ void migration_incoming_state_destroy(void) > > mis_current = NULL; > > } > > > > + > > +typedef struct { > > + uint32_t size; > > + uint8_t runstate[100]; > > +} GlobalState; > > + > > +static GlobalState global_state; > > + > > +static int global_state_store(void) > > +{ > > + if (!runstate_store((char *)global_state.runstate, > > + sizeof(global_state.runstate))) { > > + error_report("runstate name too big: %s", global_state.runstate); > > + trace_migrate_state_too_big(); > > + return -EINVAL; > > + } > > + return 0; > > +} > > + > > +static char *global_state_get_runstate(void) > > +{ > > + return (char *)global_state.runstate; > > +} > > + > > +static int global_state_post_load(void *opaque, int version_id) > > +{ > > + GlobalState *s = opaque; > > + int ret = 0; > > + char *runstate = (char *)s->runstate; > > + > > + trace_migrate_global_state_post_load(runstate); > > + > > + if (strcmp(runstate, "running") != 0) { > > + Error *local_err = NULL; > > + int r = qapi_enum_parse(RunState_lookup, runstate, RUN_STATE_MAX, > > + -1, &local_err); > > + > > + if (r == -1) { > > + if (local_err) { > > + error_report_err(local_err); > > + } > > + return -EINVAL; > > + } > > + ret = vm_stop_force_state(r); > > + } > > + > > + return ret; > > +} > > + > > +static void global_state_pre_save(void *opaque) > > +{ > > + GlobalState *s = opaque; > > + > > + trace_migrate_global_state_pre_save((char *)s->runstate); > > + s->size = strlen((char *)s->runstate) + 1; > > +} > > + > > +static const VMStateDescription vmstate_globalstate = { > > + .name = "globalstate", > > + .version_id = 1, > > + .minimum_version_id = 1, > > + .post_load = global_state_post_load, > > + .pre_save = global_state_pre_save, > > + .fields = (VMStateField[]) { > > + VMSTATE_UINT32(size, GlobalState), > > + VMSTATE_BUFFER(runstate, GlobalState), > > + VMSTATE_END_OF_LIST() > > + }, > > +}; > > + > > +void register_global_state(void) > > +{ > > + /* We would use it independently that we receive it */ > > + strcpy((char *)&global_state.runstate, ""); > > + vmstate_register(NULL, 0, &vmstate_globalstate, &global_state); > > +} > > + > > /* > > * Called on -incoming with a defer: uri. > > * The migration can be started later after any parameters have been > > @@ -164,10 +242,20 @@ static void process_incoming_migration_co(void > > *opaque) > > exit(EXIT_FAILURE); > > } > > > > - if (autostart) { > > + /* runstate == "" means that we haven't received it through the > > + * wire, so we obey autostart. runstate == runing means that we > > + * need to run it, we need to make sure that we do it after > > + * everything else has finished. Every other state change is done > > + * at the post_load function */ > > + > > + if (strcmp(global_state_get_runstate(), "running") == 0) { > > vm_start(); > > - } else { > > - runstate_set(RUN_STATE_PAUSED); > > + } else if (strcmp(global_state_get_runstate(), "") == 0) { > > + if (autostart) { > > + vm_start(); > > + } else { > > + runstate_set(RUN_STATE_PAUSED); > > + } > > } > > migrate_decompress_threads_join(); > > } > > @@ -793,10 +881,13 @@ static void *migration_thread(void *opaque) > > qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); > > old_vm_running = runstate_is_running(); > > > > - ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); > > - if (ret >= 0) { > > - qemu_file_set_rate_limit(s->file, INT64_MAX); > > - qemu_savevm_state_complete(s->file); > > + ret = global_state_store(); > > + if (!ret) { > > + ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); > > + if (ret >= 0) { > > + qemu_file_set_rate_limit(s->file, INT64_MAX); > > + qemu_savevm_state_complete(s->file); > > + } > > } > > qemu_mutex_unlock_iothread(); > > > > diff --git a/trace-events b/trace-events > > index a38dd2e..c0111d0 100644 > > --- a/trace-events > > +++ b/trace-events > > @@ -1403,6 +1403,9 @@ migrate_fd_error(void) "" > > migrate_fd_cancel(void) "" > > migrate_pending(uint64_t size, uint64_t max) "pending size %" PRIu64 " max > > %" PRIu64 > > migrate_transferred(uint64_t tranferred, uint64_t time_spent, double > > bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " > > bandwidth %g max_size %" PRId64 > > +migrate_state_too_big(void) "" > > +migrate_global_state_post_load(const char *state) "loaded state: %s" > > +migrate_global_state_pre_save(const char *state) "saved state: %s" > > > > # migration/rdma.c > > qemu_rdma_accept_incoming_migration(void) "" > > diff --git a/vl.c b/vl.c > > index 19a8737..cfa6133 100644 > > --- a/vl.c > > +++ b/vl.c > > @@ -4624,6 +4624,7 @@ int main(int argc, char **argv, char **envp) > > return 0; > > } > > > > + register_global_state(); > > if (incoming) { > > Error *local_err = NULL; > > qemu_start_incoming_migration(incoming, &local_err); > > > -- Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK