On Tue, May 23, 2017 at 02:31:07PM +0300, Alexey Perevalov wrote:
> This patch adds request to kernel space for UFFD_FEATURE_THREAD_ID,
> in case when this feature is provided by kernel.
> 
> PostcopyBlocktimeContext is incapsulated inside postcopy-ram.c,
> due to it's postcopy only feature.
> Also it defines PostcopyBlocktimeContext's instance live time.
> Information from PostcopyBlocktimeContext instance will be provided
> much after postcopy migration end, instance of PostcopyBlocktimeContext
> will live till QEMU exit, but part of it (vcpu_addr,
> page_fault_vcpu_time) used only during calculation, will be released
> when postcopy ended or failed.
> 
> To enable postcopy blocktime calculation on destination, need to request
> proper capabiltiy (Patch for documentation will be at the tail of the patch
> set).
> 
> As an example following command enable that capability, assume QEMU was
> started with
> -chardev socket,id=charmonitor,path=/var/lib/migrate-vm-monitor.sock
> option to control it
> 
> [root@host]#printf "{\"execute\" : \"qmp_capabilities\"}\r\n \
> {\"execute\": \"migrate-set-capabilities\" , \"arguments\":   {
> \"capabilities\": [ { \"capability\": \"postcopy-blocktime\", \"state\":
> true } ] } }" | nc -U /var/lib/migrate-vm-monitor.sock
> 
> Or just with HMP
> (qemu) migrate_set_capability postcopy-blocktime on
> 
> Signed-off-by: Alexey Perevalov <a.pereva...@samsung.com>
> ---
>  include/migration/migration.h |  8 +++++
>  migration/postcopy-ram.c      | 80 
> +++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 88 insertions(+)
> 
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index 2951253..449cb07 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -57,6 +57,8 @@ enum mig_rp_message_type {
>  
>  typedef QLIST_HEAD(, LoadStateEntry) LoadStateEntry_Head;
>  
> +struct PostcopyBlocktimeContext;
> +
>  /* State for the incoming migration */
>  struct MigrationIncomingState {
>      QEMUFile *from_src_file;
> @@ -97,6 +99,12 @@ struct MigrationIncomingState {
>  
>      /* See savevm.c */
>      LoadStateEntry_Head loadvm_handlers;
> +
> +    /*
> +     * PostcopyBlocktimeContext to keep information for postcopy
> +     * live migration, to calculate vCPU block time
> +     * */
> +    struct PostcopyBlocktimeContext *blocktime_ctx;
>  };
>  
>  MigrationIncomingState *migration_incoming_get_current(void);
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index 4f3f495..5435a40 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -59,6 +59,73 @@ struct PostcopyDiscardState {
>  #include <sys/eventfd.h>
>  #include <linux/userfaultfd.h>
>  
> +typedef struct PostcopyBlocktimeContext {
> +    /* time when page fault initiated per vCPU */
> +    int64_t *page_fault_vcpu_time;
> +    /* page address per vCPU */
> +    uint64_t *vcpu_addr;
> +    int64_t total_blocktime;
> +    /* blocktime per vCPU */
> +    int64_t *vcpu_blocktime;
> +    /* point in time when last page fault was initiated */
> +    int64_t last_begin;
> +    /* number of vCPU are suspended */
> +    int smp_cpus_down;
> +
> +    /*
> +     * Handler for exit event, necessary for
> +     * releasing whole blocktime_ctx
> +     */
> +    Notifier exit_notifier;
> +    /*
> +     * Handler for postcopy event, necessary for
> +     * releasing unnecessary part of blocktime_ctx
> +     */
> +    Notifier postcopy_notifier;
> +} PostcopyBlocktimeContext;
> +
> +static void destroy_blocktime_context(struct PostcopyBlocktimeContext *ctx)
> +{
> +    g_free(ctx->page_fault_vcpu_time);
> +    g_free(ctx->vcpu_addr);
> +    g_free(ctx->vcpu_blocktime);
> +    g_free(ctx);
> +}
> +
> +static void postcopy_migration_cb(Notifier *n, void *data)
> +{
> +    PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext,
> +                                               postcopy_notifier);
> +    MigrationState *s = data;
> +    if (migration_has_finished(s) || migration_has_failed(s)) {
> +        g_free(ctx->page_fault_vcpu_time);
> +        /* g_free is NULL robust */
> +        ctx->page_fault_vcpu_time = NULL;
> +        g_free(ctx->vcpu_addr);
> +        ctx->vcpu_addr = NULL;
> +    }
> +}
> +
> +static void migration_exit_cb(Notifier *n, void *data)
> +{
> +    PostcopyBlocktimeContext *ctx = container_of(n, PostcopyBlocktimeContext,
> +                                               exit_notifier);
> +    destroy_blocktime_context(ctx);
> +}
> +
> +static struct PostcopyBlocktimeContext *blocktime_context_new(void)
> +{
> +    PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
> +    ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
> +    ctx->vcpu_addr = g_new0(uint64_t, smp_cpus);
> +    ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus);
> +
> +    ctx->exit_notifier.notify = migration_exit_cb;
> +    ctx->postcopy_notifier.notify = postcopy_migration_cb;
> +    qemu_add_exit_notifier(&ctx->exit_notifier);
> +    add_migration_state_change_notifier(&ctx->postcopy_notifier);

I think we just need a global MAX_VCPUS macro, then we can just make
the whole struct static. But I admit this is out of topic for current
thread. The point is indeed I see no much point on such fine-grained
management of memory on this... only for a summary of, say a maximum
of 1K vcpus, no more than 8B*3*1K=24KB memory...

IMHO This just made things complicated.

> +    return ctx;
> +}
>  
>  /**
>   * receive_ufd_features: check userfault fd features, to request only 
> supported
> @@ -151,6 +218,19 @@ static bool ufd_check_and_apply(int ufd, 
> MigrationIncomingState *mis)
>          }
>      }
>  
> +#ifdef UFFD_FEATURE_THREAD_ID
> +    if (migrate_postcopy_blocktime() && mis &&
> +            UFFD_FEATURE_THREAD_ID & supported_features) {

(indent)

> +        /* kernel supports that feature */
> +        /* don't create blocktime_context if it exists */
> +        if (!mis->blocktime_ctx) {

If it does existed, then maybe we'll have problem - in the state
change notifier we cleaned up ->page_fault_vcpu_time and ->vcpu_addr,
but blocktime_ctx was there. So will it possible when we reached here
we'll get blocktime_ctx != NULL but
blocktime_ctx->page_fault_vcpu_time == NULL and also
blocktime_ctx->vcpu_addr == NULL?

Maybe we can just drop the state change notifier, and one single exit
notifier would be enough to cleanup everything?

> +            mis->blocktime_ctx = blocktime_context_new();
> +        }
> +
> +        asked_features |= UFFD_FEATURE_THREAD_ID;
> +    }
> +#endif
> +
>      /*
>       * request features, even if asked_features is 0, due to
>       * kernel expects UFFD_API before UFFDIO_REGISTER, per
> -- 
> 1.8.3.1
> 

Thanks,

-- 
Peter Xu

Reply via email to