On Thu, Feb 08, 2024 at 10:54:03AM -0800, Steve Sistare wrote:
> When migration for cpr is initiated, stop the vm and set state
> RUN_STATE_FINISH_MIGRATE before ram is saved.  This eliminates the
> possibility of ram and device state being out of sync, and guarantees
> that a guest in the suspended state remains suspended, because qmp_cont
> rejects a cont command in the RUN_STATE_FINISH_MIGRATE state.
> 
> Signed-off-by: Steve Sistare <steven.sist...@oracle.com>
> ---
>  include/migration/misc.h |  1 +
>  migration/migration.c    | 32 +++++++++++++++++++++++++-------
>  2 files changed, 26 insertions(+), 7 deletions(-)
> 
> diff --git a/include/migration/misc.h b/include/migration/misc.h
> index 6dc234b..54c99a3 100644
> --- a/include/migration/misc.h
> +++ b/include/migration/misc.h
> @@ -60,6 +60,7 @@ void migration_object_init(void);
>  void migration_shutdown(void);
>  bool migration_is_idle(void);
>  bool migration_is_active(MigrationState *);
> +bool migrate_mode_is_cpr(MigrationState *);
>  
>  typedef enum MigrationEventType {
>      MIG_EVENT_PRECOPY_SETUP,
> diff --git a/migration/migration.c b/migration/migration.c
> index d1fce9e..fc5c587 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -1603,6 +1603,11 @@ bool migration_is_active(MigrationState *s)
>              s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
>  }
>  
> +bool migrate_mode_is_cpr(MigrationState *s)
> +{
> +    return s->parameters.mode == MIG_MODE_CPR_REBOOT;
> +}
> +
>  int migrate_init(MigrationState *s, Error **errp)
>  {
>      int ret;
> @@ -2651,13 +2656,14 @@ static int 
> migration_completion_precopy(MigrationState *s,
>      bql_lock();
>      migration_downtime_start(s);
>  
> -    s->vm_old_state = runstate_get();
> -    global_state_store();
> -
> -    ret = migration_stop_vm(RUN_STATE_FINISH_MIGRATE);
> -    trace_migration_completion_vm_stop(ret);
> -    if (ret < 0) {
> -        goto out_unlock;
> +    if (!migrate_mode_is_cpr(s)) {
> +        s->vm_old_state = runstate_get();
> +        global_state_store();
> +        ret = migration_stop_vm(RUN_STATE_FINISH_MIGRATE);
> +        trace_migration_completion_vm_stop(ret);
> +        if (ret < 0) {
> +            goto out_unlock;
> +        }
>      }
>  
>      ret = migration_maybe_pause(s, current_active_state,
> @@ -3576,6 +3582,7 @@ void migrate_fd_connect(MigrationState *s, Error 
> *error_in)
>      Error *local_err = NULL;
>      uint64_t rate_limit;
>      bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED;
> +    int ret;
>  
>      /*
>       * If there's a previous error, free it and prepare for another one.
> @@ -3651,6 +3658,17 @@ void migrate_fd_connect(MigrationState *s, Error 
> *error_in)
>          goto fail;
>      }
>  
> +    if (migrate_mode_is_cpr(s)) {
> +        s->vm_old_state = runstate_get();
> +        global_state_store();
> +        ret = migration_stop_vm(RUN_STATE_FINISH_MIGRATE);
> +        trace_migration_completion_vm_stop(ret);
> +        if (ret < 0) {
> +            error_setg(&local_err, "migration_stop_vm failed, error %d", 
> -ret);
> +            goto fail;
> +        }
> +    }

Could we have a helper function for the shared codes?

How about postcopy?  I know it's nonsense to enable postcopy for cpr.. but
iiuc we don't yet forbid an user doing so.  Maybe we should?

> +
>      if (migrate_background_snapshot()) {
>          qemu_thread_create(&s->thread, "bg_snapshot",
>                  bg_migration_thread, s, QEMU_THREAD_JOINABLE);
> -- 
> 1.8.3.1
> 

-- 
Peter Xu


Reply via email to