Peter Xu <[email protected]> writes:
> On Mon, Dec 02, 2024 at 07:01:36PM -0300, Fabiano Rosas wrote:
>> If the destination side fails at migration_ioc_process_incoming()
>> before starting the coroutine, it will report the error but QEMU will
>> not exit.
>>
>> Set the migration state to FAILED and exit the process if
>> exit-on-error allows.
>>
>> CC: Thomas Huth <[email protected]>
>> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2633
>> Reported-by: Daniel P. Berrangé <[email protected]>
>> Signed-off-by: Fabiano Rosas <[email protected]>
>
> (I skipped the postcopy patches as of now, until we finish the discussion
> in patch 2)
>
>> ---
>> migration/channel.c | 11 ++++++-----
>> migration/migration.c | 31 ++++++++++++++++++-------------
>> migration/migration.h | 2 +-
>> 3 files changed, 25 insertions(+), 19 deletions(-)
>>
>> diff --git a/migration/channel.c b/migration/channel.c
>> index f9de064f3b..6d7f9172d8 100644
>> --- a/migration/channel.c
>> +++ b/migration/channel.c
>> @@ -40,13 +40,14 @@ void migration_channel_process_incoming(QIOChannel *ioc)
>>
>> if (migrate_channel_requires_tls_upgrade(ioc)) {
>> migration_tls_channel_process_incoming(s, ioc, &local_err);
>> +
>> + if (local_err) {
>> + error_report_err(local_err);
>> + }
>
> What if tls processing failed here, do we have similar issue that qemu will
> stall? Do we want to cover that too?
>
Hmm, I think I confused this with the multifd_channel_connect() stuff
where the code is reentrant and we take the "regular" path when called a
second time. I need to look closer into this part.
>> +
>> } else {
>> migration_ioc_register_yank(ioc);
>> - migration_ioc_process_incoming(ioc, &local_err);
>> - }
>> -
>> - if (local_err) {
>> - error_report_err(local_err);
>> + migration_ioc_process_incoming(ioc);
>> }
>> }
>>
>> diff --git a/migration/migration.c b/migration/migration.c
>> index 8a61cc26d7..cd88ebc875 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -943,7 +943,7 @@ static bool migration_should_start_incoming(bool
>> main_channel)
>> return true;
>> }
>>
>> -void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
>> +void migration_ioc_process_incoming(QIOChannel *ioc)
>> {
>> MigrationIncomingState *mis = migration_incoming_get_current();
>> Error *local_err = NULL;
>> @@ -966,10 +966,9 @@ void migration_ioc_process_incoming(QIOChannel *ioc,
>> Error **errp)
>> * issue is not possible.
>> */
>> ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
>> - sizeof(channel_magic), errp);
>> -
>> + sizeof(channel_magic),
>> &local_err);
>> if (ret != 0) {
>> - return;
>> + goto err;
>> }
>>
>> default_channel = (channel_magic ==
>> cpu_to_be32(QEMU_VM_FILE_MAGIC));
>> @@ -977,8 +976,8 @@ void migration_ioc_process_incoming(QIOChannel *ioc,
>> Error **errp)
>> default_channel = !mis->from_src_file;
>> }
>>
>> - if (multifd_recv_setup(errp) != 0) {
>> - return;
>> + if (multifd_recv_setup(&local_err) != 0) {
>> + goto err;
>> }
>>
>> if (default_channel) {
>> @@ -995,18 +994,24 @@ void migration_ioc_process_incoming(QIOChannel *ioc,
>> Error **errp)
>> postcopy_preempt_new_channel(mis, f);
>> }
>> if (local_err) {
>> - error_propagate(errp, local_err);
>> - return;
>> + goto err;
>> }
>> }
>>
>> - if (migration_should_start_incoming(default_channel)) {
>> - /* If it's a recovery, we're done */
>> - if (postcopy_try_recover()) {
>> - return;
>> - }
>> + if (migration_should_start_incoming(default_channel) &&
>> + !postcopy_try_recover()) {
>> migration_incoming_process();
>> }
>> +
>> + return;
>> +
>> +err:
>> + error_report_err(local_err);
>> + migrate_set_state(&mis->state, MIGRATION_STATUS_SETUP,
>> + MIGRATION_STATUS_FAILED);
>> + if (mis->exit_on_error) {
>> + exit(EXIT_FAILURE);
>> + }
>> }
>>
>> /**
>> diff --git a/migration/migration.h b/migration/migration.h
>> index 0956e9274b..c367e5ea40 100644
>> --- a/migration/migration.h
>> +++ b/migration/migration.h
>> @@ -477,7 +477,7 @@ void migrate_set_state(MigrationStatus *state,
>> MigrationStatus old_state,
>> MigrationStatus new_state);
>>
>> void migration_fd_process_incoming(QEMUFile *f);
>> -void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp);
>> +void migration_ioc_process_incoming(QIOChannel *ioc);
>> void migration_incoming_process(void);
>>
>> bool migration_has_all_channels(void);
>> --
>> 2.35.3
>>