Prasad Pandit <[email protected]> writes:

> Hi,
>
> On Fri, 9 Jan 2026 at 18:12, Fabiano Rosas <[email protected]> wrote:
>> cpr_set_incoming_mode() is only called on the target side, so
>> migrate_mode() on the source side is the same as s->parameters.mode.
>
> * This message is a little confusing, as the connection between
> cpr_set_incoming_mode() and migrate_mode() is not evident. IIUC,
> during CPR migration mode is not set explicitly on the source side, so
> cpr_get_incoming_mode() returns none, so migrate_mode() sets it to
> s->parameters.mode and returns it. Ideally it should be set to
> s->parameters.mode.  /* I wonder why we need such differention between
> incoming and outgoing modes. */
>

Good question. I had to go check. It's subtle and not properly
documented: cpr_state_load() is called before current_migration
exists. There is no parameters.mode at that point.

>> Use the function to reduce explicit access to s->parameters, we have
>> options.c for that.
>>
>> Cc: Mark Kanda <[email protected]>
>> Cc: Ben Chaney <[email protected]>
>> Reviewed-by: Peter Xu <[email protected]>
>> Signed-off-by: Fabiano Rosas <[email protected]>
>> ---
>>  migration/cpr-exec.c  |  2 +-
>>  migration/migration.c | 27 +++++++++++++--------------
>>  migration/migration.h |  5 ++---
>>  3 files changed, 16 insertions(+), 18 deletions(-)
>>
>> diff --git a/migration/cpr-exec.c b/migration/cpr-exec.c
>> index da287d8031..e315a30f92 100644
>> --- a/migration/cpr-exec.c
>> +++ b/migration/cpr-exec.c
>> @@ -164,7 +164,7 @@ static void cpr_exec_cb(void *opaque)
>>      err = NULL;
>>
>>      /* Note, we can go from state COMPLETED to FAILED */
>
>  /* Going from COMPLETED -> FAILED says something not right here,
> maybe we are prematurely moving to COMPLETED state. */
>

I agree there's a dissonance, but there is no "after" to counterpart
this "prematurely".

cpr-exec happens after migration completes, it needs to receive the
MIG_EVENT_PRECOPY_DONE completion notifier. But then there's a slight
chance that the execvp() may fail.

If it fails, then migration is naturally FAILED and the
MIG_EVENT_PRECOPY_FAILED notifiers are called below.

If it succeeds, then it's another process image, s->state is no more.

There might also be considerations from the management layer side in
seeing the completion event and from the device migration code in seeing
the completion notifier, I'm not sure.

>> -    migration_call_notifiers(s, MIG_EVENT_PRECOPY_FAILED, NULL);
>> +    migration_call_notifiers(MIG_EVENT_PRECOPY_FAILED, NULL);
>>
>>      if (!migration_block_activate(&err)) {
>>          /* error was already reported */
>> diff --git a/migration/migration.c b/migration/migration.c
>> index 4af5baad59..388e0be5a2 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -1534,7 +1534,7 @@ static void migration_cleanup(MigrationState *s)
>>      }
>>      type = migration_has_failed(s) ? MIG_EVENT_PRECOPY_FAILED :
>>                                       MIG_EVENT_PRECOPY_DONE;
>> -    migration_call_notifiers(s, type, NULL);
>> +    migration_call_notifiers(type, NULL);
>>      yank_unregister_instance(MIGRATION_YANK_INSTANCE);
>>  }
>>
>> @@ -1696,10 +1696,9 @@ void migration_remove_notifier(NotifierWithReturn 
>> *notify)
>>      }
>>  }
>>
>> -int migration_call_notifiers(MigrationState *s, MigrationEventType type,
>> -                             Error **errp)
>> +int migration_call_notifiers(MigrationEventType type, Error **errp)
>>  {
>> -    MigMode mode = s->parameters.mode;
>> +    MigMode mode = migrate_mode();
>>      MigrationEvent e;
>>      NotifierWithReturn *notifier;
>>      GSList *elem, *next;
>> @@ -1780,9 +1779,9 @@ bool migration_thread_is_self(void)
>>      return qemu_thread_is_self(&s->thread);
>>  }
>>
>> -bool migrate_mode_is_cpr(MigrationState *s)
>> +bool migrate_mode_is_cpr(void)
>>  {
>> -    MigMode mode = s->parameters.mode;
>> +    MigMode mode = migrate_mode();
>>      return mode == MIG_MODE_CPR_REBOOT ||
>>             mode == MIG_MODE_CPR_TRANSFER ||
>>             mode == MIG_MODE_CPR_EXEC;
>> @@ -2136,7 +2135,7 @@ static bool migrate_prepare(MigrationState *s, bool 
>> resume, Error **errp)
>>          }
>>      }
>>
>> -    if (migrate_mode_is_cpr(s)) {
>> +    if (migrate_mode_is_cpr()) {
>>          const char *conflict = NULL;
>>
>>          if (migrate_postcopy()) {
>> @@ -2252,7 +2251,7 @@ void qmp_migrate(const char *uri, bool has_channels,
>>          return;
>>      }
>>
>> -    if (s->parameters.mode == MIG_MODE_CPR_TRANSFER && !cpr_channel) {
>> +    if (migrate_mode() == MIG_MODE_CPR_TRANSFER && !cpr_channel) {
>>          error_setg(errp, "missing 'cpr' migration channel");
>>          return;
>>      }
>> @@ -2277,7 +2276,7 @@ void qmp_migrate(const char *uri, bool has_channels,
>>       * in which case the target will not listen for the incoming migration
>>       * connection, so qmp_migrate_finish will fail to connect, and then 
>> recover.
>>       */
>> -    if (s->parameters.mode == MIG_MODE_CPR_TRANSFER) {
>> +    if (migrate_mode() == MIG_MODE_CPR_TRANSFER) {
>
> * Shouldn't we use migrate_mode_is_cpr() to avoid this comparison?
>

There are other CPR modes that I don't think we want to match here, only
cpr-transfer.

>>          migrate_hup_add(s, cpr_state_ioc(), 
>> (GSourceFunc)qmp_migrate_finish_cb,
>>                          QAPI_CLONE(MigrationAddress, addr));
>>
>> @@ -2852,7 +2851,7 @@ static int postcopy_start(MigrationState *ms, Error 
>> **errp)
>>       * at the transition to postcopy and after the device state; in 
>> particular
>>       * spice needs to trigger a transition now
>>       */
>> -    migration_call_notifiers(ms, MIG_EVENT_PRECOPY_DONE, NULL);
>> +    migration_call_notifiers(MIG_EVENT_PRECOPY_DONE, NULL);
>>
>>      migration_downtime_end(ms);
>>
>> @@ -2901,7 +2900,7 @@ fail:
>>          migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED);
>>      }
>>      migration_block_activate(NULL);
>> -    migration_call_notifiers(ms, MIG_EVENT_PRECOPY_FAILED, NULL);
>> +    migration_call_notifiers(MIG_EVENT_PRECOPY_FAILED, NULL);
>>      bql_unlock();
>>      return -1;
>>  }
>> @@ -3003,7 +3002,7 @@ static int migration_completion_precopy(MigrationState 
>> *s)
>>
>>      bql_lock();
>>
>> -    if (!migrate_mode_is_cpr(s)) {
>> +    if (!migrate_mode_is_cpr()) {
>>          ret = migration_stop_vm(s, RUN_STATE_FINISH_MIGRATE);
>>          if (ret < 0) {
>>              goto out_unlock;
>> @@ -4049,7 +4048,7 @@ void migration_connect(MigrationState *s, Error 
>> *error_in)
>>          rate_limit = migrate_max_bandwidth();
>>
>>          /* Notify before starting migration thread */
>> -        if (migration_call_notifiers(s, MIG_EVENT_PRECOPY_SETUP, 
>> &local_err)) {
>> +        if (migration_call_notifiers(MIG_EVENT_PRECOPY_SETUP, &local_err)) {
>>              goto fail;
>>          }
>>      }
>> @@ -4085,7 +4084,7 @@ void migration_connect(MigrationState *s, Error 
>> *error_in)
>>          return;
>>      }
>>
>> -    if (migrate_mode_is_cpr(s)) {
>> +    if (migrate_mode_is_cpr()) {
>>          ret = migration_stop_vm(s, RUN_STATE_FINISH_MIGRATE);
>>          if (ret < 0) {
>>              error_setg(&local_err, "migration_stop_vm failed, error %d", 
>> -ret);
>> diff --git a/migration/migration.h b/migration/migration.h
>> index ccc4e536a5..8b55d4741a 100644
>> --- a/migration/migration.h
>> +++ b/migration/migration.h
>> @@ -538,8 +538,7 @@ bool migrate_has_error(MigrationState *s);
>>
>>  void migration_connect(MigrationState *s, Error *error_in);
>>
>> -int migration_call_notifiers(MigrationState *s, MigrationEventType type,
>> -                             Error **errp);
>> +int migration_call_notifiers(MigrationEventType type, Error **errp);
>>
>>  int migrate_init(MigrationState *s, Error **errp);
>>  bool migration_is_blocked(Error **errp);
>> @@ -548,7 +547,7 @@ bool migration_in_postcopy(void);
>>  bool migration_postcopy_is_alive(MigrationStatus state);
>>  MigrationState *migrate_get_current(void);
>>  bool migration_has_failed(MigrationState *);
>> -bool migrate_mode_is_cpr(MigrationState *);
>> +bool migrate_mode_is_cpr(void);
>>
>>  uint64_t ram_get_total_transferred_pages(void);
>>
>> --
>
> * Otherwise change looks okay. Needs minor fix above.
> Reviewed-by: Prasad Pandit <[email protected]>
>
> Thank you.
> ---
>    - Prasad

Reply via email to