Re: [PATCH RESEND v3 08/10] migration: Implement dirty-limit convergence algo

Markus Armbruster Wed, 11 Jan 2023 06:13:10 -0800

huang...@chinatelecom.cn writes:

> From: Hyman Huang(黄勇) <huang...@chinatelecom.cn>
>
> Implement dirty-limit convergence algo for live migration,
> which is kind of like auto-converge algo but using dirty-limit
> instead of cpu throttle to make migration convergent.
>
> Enable dirty page limit if dirty_rate_high_cnt greater than 2
> when dirty-limit capability enabled, disable dirty-limit if
> migration be cancled.
>
> Note that "set_vcpu_dirty_limit", "cancel_vcpu_dirty_limit"
> commands are not allowed during dirty-limit live migration.


Only during live migration, or also during migration of a stopped guest?
If the latter, the easy fix is to scratch "live".

> Signed-off-by: Hyman Huang(黄勇) <huang...@chinatelecom.cn>
> ---
>  migration/migration.c  |  3 +++
>  migration/ram.c        | 63 
> ++++++++++++++++++++++++++++++++++++++------------
>  migration/trace-events |  1 +
>  softmmu/dirtylimit.c   | 22 ++++++++++++++++++
>  4 files changed, 74 insertions(+), 15 deletions(-)
>
> diff --git a/migration/migration.c b/migration/migration.c
> index 702e7f4..127d0fe 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -240,6 +240,9 @@ void migration_cancel(const Error *error)
>      if (error) {
>          migrate_set_error(current_migration, error);
>      }
> +    if (migrate_dirty_limit()) {
> +        qmp_cancel_vcpu_dirty_limit(false, -1, NULL);
> +    }
>      migrate_fd_cancel(current_migration);
>  }
>  
> diff --git a/migration/ram.c b/migration/ram.c
> index 5e66652..78b9167 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -45,6 +45,7 @@
>  #include "qapi/error.h"
>  #include "qapi/qapi-types-migration.h"
>  #include "qapi/qapi-events-migration.h"
> +#include "qapi/qapi-commands-migration.h"
>  #include "qapi/qmp/qerror.h"
>  #include "trace.h"
>  #include "exec/ram_addr.h"
> @@ -57,6 +58,8 @@
>  #include "qemu/iov.h"
>  #include "multifd.h"
>  #include "sysemu/runstate.h"
> +#include "sysemu/dirtylimit.h"
> +#include "sysemu/kvm.h"
>  
>  #include "hw/boards.h" /* for machine_dump_guest_core() */
>  
> @@ -1139,6 +1142,30 @@ static void migration_update_rates(RAMState *rs, 
> int64_t end_time)
>      }
>  }
>  
> +/*
> + * Enable dirty-limit to throttle down the guest
> + */
> +static void migration_dirty_limit_guest(void)
> +{
> +    static int64_t quota_dirtyrate;
> +    MigrationState *s = migrate_get_current();
> +
> +    /*
> +     * If dirty limit already enabled and migration parameter
> +     * vcpu-dirty-limit untouched.
> +     */
> +    if (dirtylimit_in_service() &&
> +        quota_dirtyrate == s->parameters.vcpu_dirty_limit) {
> +        return;
> +    }
> +
> +    quota_dirtyrate = s->parameters.vcpu_dirty_limit;
> +
> +    /* Set or update quota dirty limit */
> +    qmp_set_vcpu_dirty_limit(false, -1, quota_dirtyrate, NULL);
> +    trace_migration_dirty_limit_guest(quota_dirtyrate);
> +}
> +
>  static void migration_trigger_throttle(RAMState *rs)
>  {
>      MigrationState *s = migrate_get_current();
> @@ -1148,26 +1175,32 @@ static void migration_trigger_throttle(RAMState *rs)
>      uint64_t bytes_dirty_period = rs->num_dirty_pages_period * 
> TARGET_PAGE_SIZE;
>      uint64_t bytes_dirty_threshold = bytes_xfer_period * threshold / 100;
>  
> -    /* During block migration the auto-converge logic incorrectly detects
> -     * that ram migration makes no progress. Avoid this by disabling the
> -     * throttling logic during the bulk phase of block migration. */
> -    if (blk_mig_bulk_active()) {
> -        return;
> -    }
> +    /*
> +     * The following detection logic can be refined later. For now:
> +     * Check to see if the ratio between dirtied bytes and the approx.
> +     * amount of bytes that just got transferred since the last time
> +     * we were in this routine reaches the threshold. If that happens
> +     * twice, start or increase throttling.
> +     */
>  
> -    if (migrate_auto_converge()) {
> -        /* The following detection logic can be refined later. For now:
> -           Check to see if the ratio between dirtied bytes and the approx.
> -           amount of bytes that just got transferred since the last time
> -           we were in this routine reaches the threshold. If that happens
> -           twice, start or increase throttling. */
> +    if ((bytes_dirty_period > bytes_dirty_threshold) &&
> +        (++rs->dirty_rate_high_cnt >= 2)) {
> +        rs->dirty_rate_high_cnt = 0;
> +        /*
> +         * During block migration the auto-converge logic incorrectly detects
> +         * that ram migration makes no progress. Avoid this by disabling the
> +         * throttling logic during the bulk phase of block migration
> +         */
> +        if (blk_mig_bulk_active()) {
> +            return;
> +        }
>  
> -        if ((bytes_dirty_period > bytes_dirty_threshold) &&
> -            (++rs->dirty_rate_high_cnt >= 2)) {
> +        if (migrate_auto_converge()) {
>              trace_migration_throttle();
> -            rs->dirty_rate_high_cnt = 0;
>              mig_throttle_guest_down(bytes_dirty_period,
>                                      bytes_dirty_threshold);
> +        } else if (migrate_dirty_limit()) {
> +            migration_dirty_limit_guest();
>          }
>      }
>  }
> diff --git a/migration/trace-events b/migration/trace-events
> index 57003ed..33a2666 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -91,6 +91,7 @@ migration_bitmap_sync_start(void) ""
>  migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64
>  migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, 
> unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx"
>  migration_throttle(void) ""
> +migration_dirty_limit_guest(int64_t dirtyrate) "guest dirty page rate limit 
> %" PRIi64 " MB/s"
>  ram_discard_range(const char *rbname, uint64_t start, size_t len) "%s: 
> start: %" PRIx64 " %zx"
>  ram_load_loop(const char *rbname, uint64_t addr, int flags, void *host) "%s: 
> addr: 0x%" PRIx64 " flags: 0x%x host: %p"
>  ram_load_postcopy_loop(int channel, uint64_t addr, int flags) "chan=%d 
> addr=0x%" PRIx64 " flags=0x%x"
> diff --git a/softmmu/dirtylimit.c b/softmmu/dirtylimit.c
> index 2a07200..b63032c 100644
> --- a/softmmu/dirtylimit.c
> +++ b/softmmu/dirtylimit.c
> @@ -439,6 +439,8 @@ void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,
>                                   int64_t cpu_index,
>                                   Error **errp)
>  {
> +    MigrationState *ms = migrate_get_current();
> +
>      if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
>          return;
>      }
> @@ -452,6 +454,15 @@ void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,
>          return;
>      }
>  
> +    if (migration_is_running(ms->state) &&
> +        (!qemu_thread_is_self(&ms->thread)) &&
> +        migrate_dirty_limit() &&
> +        dirtylimit_in_service()) {
> +        error_setg(errp, "dirty-limit live migration is running, do"
> +                   " not allow dirty page limit to be canceled manually");

"do not allow" sounds like a request.  What about "can't cancel dirty
page limit while migration is running"?

> +        return;
> +    }
> +
>      dirtylimit_state_lock();
>  
>      if (has_cpu_index) {
> @@ -487,6 +498,8 @@ void qmp_set_vcpu_dirty_limit(bool has_cpu_index,
>                                uint64_t dirty_rate,
>                                Error **errp)
>  {
> +    MigrationState *ms = migrate_get_current();
> +
>      if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
>          error_setg(errp, "dirty page limit feature requires KVM with"
>                     " accelerator property 'dirty-ring-size' set'");
> @@ -503,6 +516,15 @@ void qmp_set_vcpu_dirty_limit(bool has_cpu_index,
>          return;
>      }
>  
> +    if (migration_is_running(ms->state) &&
> +        (!qemu_thread_is_self(&ms->thread)) &&
> +        migrate_dirty_limit() &&
> +        dirtylimit_in_service()) {
> +        error_setg(errp, "dirty-limit live migration is running, do"
> +                   " not allow dirty page limit to be configured manually");

Likewise.

> +        return;
> +    }
> +
>      dirtylimit_state_lock();
>  
>      if (!dirtylimit_in_service()) {

Re: [PATCH RESEND v3 08/10] migration: Implement dirty-limit convergence algo

Reply via email to