On Fri, Dec 26, 2025 at 06:19:14PM -0300, Fabiano Rosas wrote:
> Whenever an error occurs between migrate_init() and the start of
> migration_thread, do cleanup immediately after.
> 
> This allows the special casing for resume to be removed from
> migration_connect(), that check is now done at
> migration_connect_error_propagate() which already had a case for
> resume.
> 
> Signed-off-by: Fabiano Rosas <[email protected]>

Didn't spot anything wrong,

Reviewed-by: Peter Xu <[email protected]>

One nitpick below,

> ---
>  migration/migration.c | 42 +++++++++++++++++++++++++++---------------
>  1 file changed, 27 insertions(+), 15 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index 0f1644b276..a66b2d7aaf 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -1576,15 +1576,21 @@ static void 
> migration_connect_error_propagate(MigrationState *s, Error *error)
>  {
>      MigrationStatus current = s->state;
>      MigrationStatus next = MIGRATION_STATUS_NONE;
> +    bool resume = false;
>  
>      switch (current) {
>      case MIGRATION_STATUS_SETUP:
>          next = MIGRATION_STATUS_FAILED;
>          break;
>  
> +    case MIGRATION_STATUS_POSTCOPY_PAUSED:
> +        resume = true;
> +        break;
> +
>      case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
>          /* Never fail a postcopy migration; switch back to PAUSED instead */
>          next = MIGRATION_STATUS_POSTCOPY_PAUSED;
> +        resume = true;
>          break;
>  
>      case MIGRATION_STATUS_CANCELLING:
> @@ -1609,6 +1615,10 @@ static void 
> migration_connect_error_propagate(MigrationState *s, Error *error)
>      }
>  
>      migrate_error_propagate(s, error);
> +
> +    if (!resume) {
> +        migration_cleanup(s);
> +    }
>  }
>  
>  void migration_cancel(void)
> @@ -2209,12 +2219,19 @@ static gboolean qmp_migrate_finish_cb(QIOChannel 
> *channel,
>                                        GIOCondition cond,
>                                        void *opaque)
>  {
> +    MigrationState *s = migrate_get_current();
>      MigrationAddress *addr = opaque;
> +    Error *local_err = NULL;
> +
> +    qmp_migrate_finish(addr, &local_err);
> +
> +    if (local_err) {
> +        migration_connect_error_propagate(s, local_err);
> +    }
>  
> -    qmp_migrate_finish(addr, NULL);
>  
>      cpr_state_close();
> -    migrate_hup_delete(migrate_get_current());
> +    migrate_hup_delete(s);

IMHO we should drop these two lines.  For error cases, now they'll be done
in migration_cleanup() above.  Actually for success, it's the same, but in
the cleanup BH.

Maybe there're other cases where we can clean the code a bit on cpr;
there're codes that always does "if (xxx)" and calling them all over the
places, so it's easy to write such code when drafting a feature, but hard
to maintain, because it'll be obscure when it'll really trigger, like this
one.  We can leave the rest for later if there're applicable similar
cleanups.

>      qapi_free_MigrationAddress(addr);
>      return G_SOURCE_REMOVE;
>  }
> @@ -2223,7 +2240,6 @@ void qmp_migrate(const char *uri, bool has_channels,
>                   MigrationChannelList *channels, bool has_detach, bool 
> detach,
>                   bool has_resume, bool resume, Error **errp)
>  {
> -    Error *local_err = NULL;
>      MigrationState *s = migrate_get_current();
>      g_autoptr(MigrationChannel) channel = NULL;
>      MigrationAddress *addr = NULL;
> @@ -2280,6 +2296,13 @@ void qmp_migrate(const char *uri, bool has_channels,
>          return;
>      }
>  
> +    /*
> +     * The migrate_prepare() above calls migrate_init(). From this
> +     * point on, until the end of migration, make sure any failures
> +     * eventually result in a call to migration_cleanup().
> +     */
> +    Error *local_err = NULL;
> +
>      if (!cpr_state_save(cpr_channel, &local_err)) {
>          goto out;
>      }
> @@ -2299,12 +2322,11 @@ void qmp_migrate(const char *uri, bool has_channels,
>                          QAPI_CLONE(MigrationAddress, addr));
>  
>      } else {
> -        qmp_migrate_finish(addr, errp);
> +        qmp_migrate_finish(addr, &local_err);
>      }
>  
>  out:
>      if (local_err) {
> -        yank_unregister_instance(MIGRATION_YANK_INSTANCE);
>          migration_connect_error_propagate(s, error_copy(local_err));
>          error_propagate(errp, local_err);
>      }
> @@ -2335,12 +2357,6 @@ static void qmp_migrate_finish(MigrationAddress *addr, 
> Error **errp)
>      } else {
>          error_setg(&local_err, "uri is not a valid migration protocol");
>      }
> -
> -    if (local_err) {
> -        migration_connect_error_propagate(s, error_copy(local_err));
> -        error_propagate(errp, local_err);
> -        return;
> -    }
>  }
>  
>  void qmp_migrate_cancel(Error **errp)
> @@ -4027,9 +4043,6 @@ void migration_connect(MigrationState *s, Error 
> *error_in)
>      s->expected_downtime = migrate_downtime_limit();
>      if (error_in) {
>          migration_connect_error_propagate(s, error_in);
> -        if (!resume) {
> -            migration_cleanup(s);
> -        }
>          if (s->error) {
>              error_report_err(error_copy(s->error));
>          }
> @@ -4108,7 +4121,6 @@ void migration_connect(MigrationState *s, Error 
> *error_in)
>  
>  fail:
>      migration_connect_error_propagate(s, local_err);
> -    migration_cleanup(s);
>      if (s->error) {
>          error_report_err(error_copy(s->error));
>      }
> -- 
> 2.51.0
> 

-- 
Peter Xu


Reply via email to