* Peter Xu (pet...@redhat.com) wrote:
> This patch allows the postcopy preempt channel to be created
> asynchronously.  The benefit is that when the connection is slow, we won't
> take the BQL (and potentially block all things like QMP) for a long time
> without releasing.
> 
> A function postcopy_preempt_wait_channel() is introduced, allowing the
> migration thread to be able to wait on the channel creation.  The channel
> is always created by the main thread, in which we'll kick a new semaphore
> to tell the migration thread that the channel has created.
> 
> We'll need to wait for the new channel in two places: (1) when there's a
> new postcopy migration that is starting, or (2) when there's a postcopy
> migration to resume.
> 
> For the start of migration, we don't need to wait for this channel until
> when we want to start postcopy, aka, postcopy_start().  We'll fail the
> migration if we found that the channel creation failed (which should
> probably not happen at all in 99% of the cases, because the main channel is
> using the same network topology).
> 
> For a postcopy recovery, we'll need to wait in postcopy_pause().  In that
> case if the channel creation failed, we can't fail the migration or we'll
> crash the VM, instead we keep in PAUSED state, waiting for yet another
> recovery.
> 
> Signed-off-by: Peter Xu <pet...@redhat.com>

Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>

> ---
>  migration/migration.c    | 16 ++++++++++++
>  migration/migration.h    |  7 +++++
>  migration/postcopy-ram.c | 56 +++++++++++++++++++++++++++++++---------
>  migration/postcopy-ram.h |  1 +
>  4 files changed, 68 insertions(+), 12 deletions(-)
> 
> diff --git a/migration/migration.c b/migration/migration.c
> index a0db5de685..cce741e20e 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -3020,6 +3020,12 @@ static int postcopy_start(MigrationState *ms)
>      int64_t bandwidth = migrate_max_postcopy_bandwidth();
>      bool restart_block = false;
>      int cur_state = MIGRATION_STATUS_ACTIVE;
> +
> +    if (postcopy_preempt_wait_channel(ms)) {
> +        migrate_set_state(&ms->state, ms->state, MIGRATION_STATUS_FAILED);
> +        return -1;
> +    }
> +
>      if (!migrate_pause_before_switchover()) {
>          migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
>                            MIGRATION_STATUS_POSTCOPY_ACTIVE);
> @@ -3501,6 +3507,14 @@ static MigThrError postcopy_pause(MigrationState *s)
>          if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
>              /* Woken up by a recover procedure. Give it a shot */
>  
> +            if (postcopy_preempt_wait_channel(s)) {
> +                /*
> +                 * Preempt enabled, and new channel create failed; loop
> +                 * back to wait for another recovery.
> +                 */
> +                continue;
> +            }
> +
>              /*
>               * Firstly, let's wake up the return path now, with a new
>               * return path channel.
> @@ -4360,6 +4374,7 @@ static void migration_instance_finalize(Object *obj)
>      qemu_sem_destroy(&ms->postcopy_pause_sem);
>      qemu_sem_destroy(&ms->postcopy_pause_rp_sem);
>      qemu_sem_destroy(&ms->rp_state.rp_sem);
> +    qemu_sem_destroy(&ms->postcopy_qemufile_src_sem);
>      error_free(ms->error);
>  }
>  
> @@ -4406,6 +4421,7 @@ static void migration_instance_init(Object *obj)
>      qemu_sem_init(&ms->rp_state.rp_sem, 0);
>      qemu_sem_init(&ms->rate_limit_sem, 0);
>      qemu_sem_init(&ms->wait_unplug_sem, 0);
> +    qemu_sem_init(&ms->postcopy_qemufile_src_sem, 0);
>      qemu_mutex_init(&ms->qemu_file_lock);
>  }
>  
> diff --git a/migration/migration.h b/migration/migration.h
> index 91f845e9e4..f898b8547a 100644
> --- a/migration/migration.h
> +++ b/migration/migration.h
> @@ -219,6 +219,13 @@ struct MigrationState {
>      QEMUFile *to_dst_file;
>      /* Postcopy specific transfer channel */
>      QEMUFile *postcopy_qemufile_src;
> +    /*
> +     * It is posted when the preempt channel is established.  Note: this is
> +     * used for both the start or recover of a postcopy migration.  We'll
> +     * post to this sem every time a new preempt channel is created in the
> +     * main thread, and we keep post() and wait() in pair.
> +     */
> +    QemuSemaphore postcopy_qemufile_src_sem;
>      QIOChannelBuffer *bioc;
>      /*
>       * Protects to_dst_file/from_dst_file pointers.  We need to make sure we
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index b3c81b46f6..1bb603051a 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -1552,10 +1552,50 @@ bool 
> postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
>      return true;
>  }
>  
> -int postcopy_preempt_setup(MigrationState *s, Error **errp)
> +static void
> +postcopy_preempt_send_channel_new(QIOTask *task, gpointer opaque)
>  {
> -    QIOChannel *ioc;
> +    MigrationState *s = opaque;
> +    QIOChannel *ioc = QIO_CHANNEL(qio_task_get_source(task));
> +    Error *local_err = NULL;
> +
> +    if (qio_task_propagate_error(task, &local_err)) {
> +        /* Something wrong happened.. */
> +        migrate_set_error(s, local_err);
> +        error_free(local_err);
> +    } else {
> +        migration_ioc_register_yank(ioc);
> +        s->postcopy_qemufile_src = qemu_fopen_channel_output(ioc);
> +        trace_postcopy_preempt_new_channel();
> +    }
> +
> +    /*
> +     * Kick the waiter in all cases.  The waiter should check upon
> +     * postcopy_qemufile_src to know whether it failed or not.
> +     */
> +    qemu_sem_post(&s->postcopy_qemufile_src_sem);
> +    object_unref(OBJECT(ioc));
> +}
>  
> +/* Returns 0 if channel established, -1 for error. */
> +int postcopy_preempt_wait_channel(MigrationState *s)
> +{
> +    /* If preempt not enabled, no need to wait */
> +    if (!migrate_postcopy_preempt()) {
> +        return 0;
> +    }
> +
> +    /*
> +     * We need the postcopy preempt channel to be established before
> +     * starting doing anything.
> +     */
> +    qemu_sem_wait(&s->postcopy_qemufile_src_sem);
> +
> +    return s->postcopy_qemufile_src ? 0 : -1;
> +}
> +
> +int postcopy_preempt_setup(MigrationState *s, Error **errp)
> +{
>      if (!migrate_postcopy_preempt()) {
>          return 0;
>      }
> @@ -1566,16 +1606,8 @@ int postcopy_preempt_setup(MigrationState *s, Error 
> **errp)
>          return -1;
>      }
>  
> -    ioc = socket_send_channel_create_sync(errp);
> -
> -    if (ioc == NULL) {
> -        return -1;
> -    }
> -
> -    migration_ioc_register_yank(ioc);
> -    s->postcopy_qemufile_src = qemu_fopen_channel_output(ioc);
> -
> -    trace_postcopy_preempt_new_channel();
> +    /* Kick an async task to connect */
> +    socket_send_channel_create(postcopy_preempt_send_channel_new, s);
>  
>      return 0;
>  }
> diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
> index 34b1080cde..6147bf7d1d 100644
> --- a/migration/postcopy-ram.h
> +++ b/migration/postcopy-ram.h
> @@ -192,5 +192,6 @@ enum PostcopyChannels {
>  
>  bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile 
> *file);
>  int postcopy_preempt_setup(MigrationState *s, Error **errp);
> +int postcopy_preempt_wait_channel(MigrationState *s);
>  
>  #endif
> -- 
> 2.32.0
> 
-- 
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK


Reply via email to