Peter Xu <[email protected]> writes:
> On Fri, Dec 26, 2025 at 06:19:14PM -0300, Fabiano Rosas wrote:
>> Whenever an error occurs between migrate_init() and the start of
>> migration_thread, do cleanup immediately after.
>>
>> This allows the special casing for resume to be removed from
>> migration_connect(), that check is now done at
>> migration_connect_error_propagate() which already had a case for
>> resume.
>>
>> Signed-off-by: Fabiano Rosas <[email protected]>
>
> Didn't spot anything wrong,
>
> Reviewed-by: Peter Xu <[email protected]>
>
> One nitpick below,
>
>> ---
>> migration/migration.c | 42 +++++++++++++++++++++++++++---------------
>> 1 file changed, 27 insertions(+), 15 deletions(-)
>>
>> diff --git a/migration/migration.c b/migration/migration.c
>> index 0f1644b276..a66b2d7aaf 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -1576,15 +1576,21 @@ static void
>> migration_connect_error_propagate(MigrationState *s, Error *error)
>> {
>> MigrationStatus current = s->state;
>> MigrationStatus next = MIGRATION_STATUS_NONE;
>> + bool resume = false;
>>
>> switch (current) {
>> case MIGRATION_STATUS_SETUP:
>> next = MIGRATION_STATUS_FAILED;
>> break;
>>
>> + case MIGRATION_STATUS_POSTCOPY_PAUSED:
>> + resume = true;
>> + break;
>> +
>> case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
>> /* Never fail a postcopy migration; switch back to PAUSED instead */
>> next = MIGRATION_STATUS_POSTCOPY_PAUSED;
>> + resume = true;
>> break;
>>
>> case MIGRATION_STATUS_CANCELLING:
>> @@ -1609,6 +1615,10 @@ static void
>> migration_connect_error_propagate(MigrationState *s, Error *error)
>> }
>>
>> migrate_error_propagate(s, error);
>> +
>> + if (!resume) {
>> + migration_cleanup(s);
>> + }
>> }
>>
>> void migration_cancel(void)
>> @@ -2209,12 +2219,19 @@ static gboolean qmp_migrate_finish_cb(QIOChannel
>> *channel,
>> GIOCondition cond,
>> void *opaque)
>> {
>> + MigrationState *s = migrate_get_current();
>> MigrationAddress *addr = opaque;
>> + Error *local_err = NULL;
>> +
>> + qmp_migrate_finish(addr, &local_err);
>> +
>> + if (local_err) {
>> + migration_connect_error_propagate(s, local_err);
>> + }
>>
>> - qmp_migrate_finish(addr, NULL);
>>
>> cpr_state_close();
>> - migrate_hup_delete(migrate_get_current());
>> + migrate_hup_delete(s);
>
> IMHO we should drop these two lines. For error cases, now they'll be done
> in migration_cleanup() above. Actually for success, it's the same, but in
> the cleanup BH.
>
Hmm that would be good, I'll look into it.
> Maybe there're other cases where we can clean the code a bit on cpr;
> there're codes that always does "if (xxx)" and calling them all over the
> places, so it's easy to write such code when drafting a feature, but hard
> to maintain, because it'll be obscure when it'll really trigger, like this
> one. We can leave the rest for later if there're applicable similar
> cleanups.
>
>> qapi_free_MigrationAddress(addr);
>> return G_SOURCE_REMOVE;
>> }
>> @@ -2223,7 +2240,6 @@ void qmp_migrate(const char *uri, bool has_channels,
>> MigrationChannelList *channels, bool has_detach, bool
>> detach,
>> bool has_resume, bool resume, Error **errp)
>> {
>> - Error *local_err = NULL;
>> MigrationState *s = migrate_get_current();
>> g_autoptr(MigrationChannel) channel = NULL;
>> MigrationAddress *addr = NULL;
>> @@ -2280,6 +2296,13 @@ void qmp_migrate(const char *uri, bool has_channels,
>> return;
>> }
>>
>> + /*
>> + * The migrate_prepare() above calls migrate_init(). From this
>> + * point on, until the end of migration, make sure any failures
>> + * eventually result in a call to migration_cleanup().
>> + */
>> + Error *local_err = NULL;
>> +
>> if (!cpr_state_save(cpr_channel, &local_err)) {
>> goto out;
>> }
>> @@ -2299,12 +2322,11 @@ void qmp_migrate(const char *uri, bool has_channels,
>> QAPI_CLONE(MigrationAddress, addr));
>>
>> } else {
>> - qmp_migrate_finish(addr, errp);
>> + qmp_migrate_finish(addr, &local_err);
>> }
>>
>> out:
>> if (local_err) {
>> - yank_unregister_instance(MIGRATION_YANK_INSTANCE);
>> migration_connect_error_propagate(s, error_copy(local_err));
>> error_propagate(errp, local_err);
>> }
>> @@ -2335,12 +2357,6 @@ static void qmp_migrate_finish(MigrationAddress
>> *addr, Error **errp)
>> } else {
>> error_setg(&local_err, "uri is not a valid migration protocol");
>> }
>> -
>> - if (local_err) {
>> - migration_connect_error_propagate(s, error_copy(local_err));
>> - error_propagate(errp, local_err);
>> - return;
>> - }
>> }
>>
>> void qmp_migrate_cancel(Error **errp)
>> @@ -4027,9 +4043,6 @@ void migration_connect(MigrationState *s, Error
>> *error_in)
>> s->expected_downtime = migrate_downtime_limit();
>> if (error_in) {
>> migration_connect_error_propagate(s, error_in);
>> - if (!resume) {
>> - migration_cleanup(s);
>> - }
>> if (s->error) {
>> error_report_err(error_copy(s->error));
>> }
>> @@ -4108,7 +4121,6 @@ void migration_connect(MigrationState *s, Error
>> *error_in)
>>
>> fail:
>> migration_connect_error_propagate(s, local_err);
>> - migration_cleanup(s);
>> if (s->error) {
>> error_report_err(error_copy(s->error));
>> }
>> --
>> 2.51.0
>>