Prasad Pandit <[email protected]> writes:

> From: Prasad Pandit <[email protected]>
>
> When migration connection is broken, the QEMU and libvirtd(8)
> process on the source side receive TCP connection reset
> notification. QEMU sets the migration status to FAILED and
> proceeds to migration_cleanup(). Meanwhile, Libvirtd(8) sends
> a QMP command to migrate_set_capabilities().
>
> The migration_cleanup() and qmp_migrate_set_capabilities()
> calls race with each other. When the latter is invoked first,
> since the migration is not running (FAILED), migration
> capabilities are reset to false, so during migration_cleanup()
> the QEMU process crashes with assertion failure.
>
>   Stack trace of thread 255014:
>    #0  __pthread_kill_implementation (libc.so.6 + 0x822e8)
>    #1  raise (libc.so.6 + 0x3a73c)
>    #2  abort (libc.so.6 + 0x27034)
>    #3  __assert_fail_base (libc.so.6 + 0x34090)
>    #4  __assert_fail (libc.so.6 + 0x34100)
>    #5  yank_unregister_instance (qemu-kvm + 0x8b8280)
>    #6  migrate_fd_cleanup (qemu-kvm + 0x3c6308)
>    #7  migration_bh_dispatch_bh (qemu-kvm + 0x3c2144)
>    #8  aio_bh_poll (qemu-kvm + 0x8ba358)
>    #9  aio_dispatch (qemu-kvm + 0x8a0ab4)
>    #10 aio_ctx_dispatch (qemu-kvm + 0x8bb180)
>

This doesn't look like it's caused by set-capabilities. Seems like:

qmp_migrate()               
-> migrate_init()           
   s->to_dst_file = NULL;
bql_unlock() somewhere
                            bql_lock()
                            migration_cleanup()
                            tmp = s->to_dst_file;
                            if (tmp) {
                                migration_ioc_unregister_yank_from_file(tmp);
                            ...
                            yank_unregister_instance()
                            *sees the function was not unregistered*

Please clarify, there might be some other bug lurking around, such as a
locking issue with qemu_file_lock or even the BQL.

Also, if possible provide an upstream backtrace, or at least mention the
code path based on upstream code. It's ok to keep the downstream
backtrace, but point that out in the commit message.

> Introduce a new migration status FAILING and use it as an
> interim status when an error occurs. Once migration_cleanup()
> is done, it sets the migration status to FAILED. This helps
> to avoid the above race condition and ensuing failure.

I'm fine with the general idea:

i) FAILED and CANCELLED are terminal states. It makes sense to not have
work happen after they're set.

ii) Using an intermediate state, assuming locking/atomic are correct is
a suitable fix for the issue.

iii) Using a FAILING status seems appropriate.

However,

It would be great if we could stop exposing implementation details via
QAPI. Does the user really need to see events for CANCELLING and
FAILING?

It would probably be easier if we kept MigrationStatus as QAPI only and
used a separate mechanism to track the internal states.

That said, we could merge this as is to fix the bug and think about that
later.

>
> Interim status FAILING is set wherever the execution moves
> towards migration_cleanup() directly OR via:
>
>   migration_iteration_finish  bg_migration_iteration_finish
>   -> migration_bh_schedule    -> migration_bh_schedule
>    -> migration_cleanup_bh     -> migration_cleanup_bh
>     -> migration_cleanup        -> migration_cleanup
>      -> FAILED                   -> FAILED
>
> The migration status finally moves to FAILED and reports an
> appropriate error to the user.
>
> Signed-off-by: Prasad Pandit <[email protected]>
> ---
>  migration/migration.c                 | 33 +++++++++++++++------------
>  migration/multifd.c                   |  4 ++--
>  qapi/migration.json                   |  8 ++++---
>  tests/qtest/migration/migration-qmp.c |  3 ++-
>  tests/qtest/migration/precopy-tests.c |  5 ++--
>  5 files changed, 31 insertions(+), 22 deletions(-)
>
> diff --git a/migration/migration.c b/migration/migration.c
> index b316ee01ab..5c32bc8fe5 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -1216,6 +1216,7 @@ bool migration_is_running(void)
>      case MIGRATION_STATUS_DEVICE:
>      case MIGRATION_STATUS_WAIT_UNPLUG:
>      case MIGRATION_STATUS_CANCELLING:
> +    case MIGRATION_STATUS_FAILING:
>      case MIGRATION_STATUS_COLO:
>          return true;
>      default:
> @@ -1351,6 +1352,7 @@ static void fill_source_migration_info(MigrationInfo 
> *info)
>          break;
>      case MIGRATION_STATUS_ACTIVE:
>      case MIGRATION_STATUS_CANCELLING:
> +    case MIGRATION_STATUS_FAILING:
>      case MIGRATION_STATUS_POSTCOPY_DEVICE:
>      case MIGRATION_STATUS_POSTCOPY_ACTIVE:
>      case MIGRATION_STATUS_PRE_SWITCHOVER:
> @@ -1409,6 +1411,7 @@ static void 
> fill_destination_migration_info(MigrationInfo *info)
>      case MIGRATION_STATUS_POSTCOPY_ACTIVE:
>      case MIGRATION_STATUS_POSTCOPY_PAUSED:
>      case MIGRATION_STATUS_POSTCOPY_RECOVER:
> +    case MIGRATION_STATUS_FAILING:
>      case MIGRATION_STATUS_FAILED:
>      case MIGRATION_STATUS_COLO:
>          info->has_status = true;
> @@ -1531,6 +1534,9 @@ static void migration_cleanup(MigrationState *s)
>      if (s->state == MIGRATION_STATUS_CANCELLING) {
>          migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
>                            MIGRATION_STATUS_CANCELLED);
> +    } else if (s->state == MIGRATION_STATUS_FAILING) {
> +        migrate_set_state(&s->state, MIGRATION_STATUS_FAILING,
> +                          MIGRATION_STATUS_FAILED);
>      }
>  
>      if (s->error) {
> @@ -1584,7 +1590,7 @@ static void migration_connect_set_error(MigrationState 
> *s, const Error *error)
>  
>      switch (current) {
>      case MIGRATION_STATUS_SETUP:
> -        next = MIGRATION_STATUS_FAILED;
> +        next = MIGRATION_STATUS_FAILING;
>          break;
>      case MIGRATION_STATUS_POSTCOPY_RECOVER_SETUP:
>          /* Never fail a postcopy migration; switch back to PAUSED instead */
> @@ -1728,6 +1734,7 @@ bool migration_has_failed(MigrationState *s)
>  {
>      return (s->state == MIGRATION_STATUS_CANCELLING ||
>              s->state == MIGRATION_STATUS_CANCELLED ||
> +            s->state == MIGRATION_STATUS_FAILING ||
>              s->state == MIGRATION_STATUS_FAILED);
>  }
>  
> @@ -2744,7 +2751,7 @@ static int postcopy_start(MigrationState *ms, Error 
> **errp)
>          if (postcopy_preempt_establish_channel(ms)) {
>              if (ms->state != MIGRATION_STATUS_CANCELLING) {
>                  migrate_set_state(&ms->state, ms->state,
> -                                  MIGRATION_STATUS_FAILED);
> +                                  MIGRATION_STATUS_FAILING);
>              }
>              error_setg(errp, "%s: Failed to establish preempt channel",
>                         __func__);
> @@ -2907,7 +2914,7 @@ fail_closefb:
>      qemu_fclose(fb);
>  fail:
>      if (ms->state != MIGRATION_STATUS_CANCELLING) {
> -        migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED);
> +        migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILING);
>      }

This is a good example where having MigrationStatus makes the code more
complicated. This could be exiting=true, running=false, etc. It
shouldn't matter why this routine failed. If we reach
migration_cleanup() and, at the very end, state is CANCELLING, then we
know the cancel command has caused this and set the state to
CANCELLED. If the state was something else, then it's unintended and we
set FAILED.

>      migration_block_activate(NULL);
>      migration_call_notifiers(ms, MIG_EVENT_PRECOPY_FAILED, NULL);
> @@ -3102,7 +3109,7 @@ fail:
>      }
>  
>      if (s->state != MIGRATION_STATUS_CANCELLING) {
> -        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
> +        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILING);
>      }
>  }
>  
> @@ -3139,7 +3146,7 @@ static void bg_migration_completion(MigrationState *s)
>  
>  fail:
>      migrate_set_state(&s->state, current_active_state,
> -                      MIGRATION_STATUS_FAILED);
> +                      MIGRATION_STATUS_FAILING);
>  }
>  
>  typedef enum MigThrError {
> @@ -3341,7 +3348,7 @@ static MigThrError 
> migration_detect_error(MigrationState *s)
>           * For precopy (or postcopy with error outside IO, or before dest
>           * starts), we fail with no time.
>           */
> -        migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED);
> +        migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILING);
>          trace_migration_thread_file_err();
>  
>          /* Time to stop the migration, now. */
> @@ -3572,7 +3579,7 @@ static void migration_iteration_finish(MigrationState 
> *s)
>          migrate_start_colo_process(s);
>          s->vm_old_state = RUN_STATE_RUNNING;
>          /* Fallthrough */
> -    case MIGRATION_STATUS_FAILED:
> +    case MIGRATION_STATUS_FAILING:
>      case MIGRATION_STATUS_CANCELLED:
>      case MIGRATION_STATUS_CANCELLING:
>          if (!migration_block_activate(&local_err)) {
> @@ -3631,7 +3638,7 @@ static void 
> bg_migration_iteration_finish(MigrationState *s)
>      switch (s->state) {
>      case MIGRATION_STATUS_COMPLETED:
>      case MIGRATION_STATUS_ACTIVE:
> -    case MIGRATION_STATUS_FAILED:
> +    case MIGRATION_STATUS_FAILING:
>      case MIGRATION_STATUS_CANCELLED:
>      case MIGRATION_STATUS_CANCELLING:
>          break;
> @@ -3821,7 +3828,7 @@ static void *migration_thread(void *opaque)
>          migrate_set_error(s, local_err);
>          error_free(local_err);
>          migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
> -                          MIGRATION_STATUS_FAILED);
> +                          MIGRATION_STATUS_FAILING);
>          goto out;
>      }
>  
> @@ -3945,8 +3952,6 @@ static void *bg_migration_thread(void *opaque)
>      if (ret) {
>          migrate_set_error(s, local_err);
>          error_free(local_err);
> -        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
> -                          MIGRATION_STATUS_FAILED);
>          goto fail_setup;
>      }
>  
> @@ -4008,12 +4013,12 @@ static void *bg_migration_thread(void *opaque)
>  
>  fail:
>      if (early_fail) {
> -        migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
> -                MIGRATION_STATUS_FAILED);
>          bql_unlock();
>      }
>  
>  fail_setup:
> +    migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
> +                        MIGRATION_STATUS_FAILING);
>      bg_migration_iteration_finish(s);
>  
>      qemu_fclose(fb);
> @@ -4128,7 +4133,7 @@ void migration_connect(MigrationState *s, Error 
> *error_in)
>  fail:
>      migrate_set_error(s, local_err);
>      if (s->state != MIGRATION_STATUS_CANCELLING) {
> -        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
> +        migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILING);
>      }
>      error_report_err(local_err);
>      migration_cleanup(s);
> diff --git a/migration/multifd.c b/migration/multifd.c
> index 3203dc98e1..970633474e 100644
> --- a/migration/multifd.c
> +++ b/migration/multifd.c
> @@ -434,7 +434,7 @@ static void multifd_send_set_error(Error *err)
>              s->state == MIGRATION_STATUS_DEVICE ||
>              s->state == MIGRATION_STATUS_ACTIVE) {
>              migrate_set_state(&s->state, s->state,
> -                              MIGRATION_STATUS_FAILED);
> +                              MIGRATION_STATUS_FAILING);
>          }
>      }
>  }
> @@ -1001,7 +1001,7 @@ bool multifd_send_setup(void)
>  
>  err:
>      migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
> -                      MIGRATION_STATUS_FAILED);
> +                      MIGRATION_STATUS_FAILING);
>      return false;
>  }
>  
> diff --git a/qapi/migration.json b/qapi/migration.json
> index cf023bd29d..85f4961781 100644
> --- a/qapi/migration.json
> +++ b/qapi/migration.json
> @@ -158,7 +158,9 @@
>  #
>  # @completed: migration is finished.
>  #
> -# @failed: some error occurred during migration process.
> +# @failing: error occurred during migration, clean-up underway.

"wait a moment, we're picking up the pieces" =D

> +#
> +# @failed: error occurred during migration, clean-up done.
>  #
>  # @colo: VM is in the process of fault tolerance, VM can not get into
>  #     this state unless colo capability is enabled for migration.
> @@ -181,8 +183,8 @@
>    'data': [ 'none', 'setup', 'cancelling', 'cancelled',
>              'active', 'postcopy-device', 'postcopy-active',
>              'postcopy-paused', 'postcopy-recover-setup',
> -            'postcopy-recover', 'completed', 'failed', 'colo',
> -            'pre-switchover', 'device', 'wait-unplug' ] }
> +            'postcopy-recover', 'completed', 'failing', 'failed',
> +            'colo', 'pre-switchover', 'device', 'wait-unplug' ] }
>  
>  ##
>  # @VfioStats:
> diff --git a/tests/qtest/migration/migration-qmp.c 
> b/tests/qtest/migration/migration-qmp.c
> index c803fcee9d..ceb40efd56 100644
> --- a/tests/qtest/migration/migration-qmp.c
> +++ b/tests/qtest/migration/migration-qmp.c
> @@ -241,7 +241,8 @@ void wait_for_migration_fail(QTestState *from, bool 
> allow_active)
>      do {
>          status = migrate_query_status(from);
>          bool result = !strcmp(status, "setup") || !strcmp(status, "failed") 
> ||
> -            (allow_active && !strcmp(status, "active"));
> +            (allow_active && !strcmp(status, "active")) ||
> +            !strcmp(status, "failing");
>          if (!result) {
>              fprintf(stderr, "%s: unexpected status status=%s 
> allow_active=%d\n",
>                      __func__, status, allow_active);
> diff --git a/tests/qtest/migration/precopy-tests.c 
> b/tests/qtest/migration/precopy-tests.c
> index 57ca623de5..a04442df96 100644
> --- a/tests/qtest/migration/precopy-tests.c
> +++ b/tests/qtest/migration/precopy-tests.c
> @@ -807,7 +807,8 @@ static void test_cancel_src_after_status(void *opaque)
>      } else if (g_str_equal(phase, "completed")) {
>          test_cancel_src_after_complete(from, to, uri, phase);
>  
> -    } else if (g_str_equal(phase, "failed")) {
> +    } else if (g_str_equal(phase, "failing") ||
> +               g_str_equal(phase, "failed")) {
>          test_cancel_src_after_failed(from, to, uri, phase);
>  
>      } else if (g_str_equal(phase, "none")) {
> @@ -1316,7 +1317,7 @@ void migration_test_add_precopy(MigrationTestEnv *env)
>      }
>  
>      /* ensure new status don't go unnoticed */
> -    assert(MIGRATION_STATUS__MAX == 16);
> +    assert(MIGRATION_STATUS__MAX == 17);
>  
>      for (int i = MIGRATION_STATUS_NONE; i < MIGRATION_STATUS__MAX; i++) {
>          switch (i) {

Reply via email to