On Wed, Jun 16, 2021 at 09:12:32AM +0800, huang...@chinatelecom.cn wrote:
> From: Hyman Huang(黄勇) <huang...@chinatelecom.cn>
> 
> use dirty ring feature to implement dirtyrate calculation.
> 
> introduce mode option in qmp calc_dirty_rate to specify what
> method should be used when calculating dirtyrate, either
> page-sampling or dirty-ring should be passed.
> 
> introduce "dirty_ring:-r" option in hmp calc_dirty_rate to
> indicate dirty ring method should be used for calculation.
> 
> Signed-off-by: Hyman Huang(黄勇) <huang...@chinatelecom.cn>

Mostly good to me, thanks; still some more comments below.

> ---
>  hmp-commands.hx        |   7 +-
>  migration/dirtyrate.c  | 183 
> ++++++++++++++++++++++++++++++++++++++++++++++---
>  migration/trace-events |   2 +
>  qapi/migration.json    |  16 ++++-
>  4 files changed, 195 insertions(+), 13 deletions(-)
> 
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index 8e45bce..f7fc9d7 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -1738,8 +1738,9 @@ ERST
>  
>      {
>          .name       = "calc_dirty_rate",
> -        .args_type  = "second:l,sample_pages_per_GB:l?",
> -        .params     = "second [sample_pages_per_GB]",
> -        .help       = "start a round of guest dirty rate measurement",
> +        .args_type  = "dirty_ring:-r,second:l,sample_pages_per_GB:l?",
> +        .params     = "[-r] second [sample_pages_per_GB]",
> +        .help       = "start a round of guest dirty rate measurement (using 
> -d to"
> +                      "\n\t\t\t specify dirty ring as the method of 
> calculation)",
>          .cmd        = hmp_calc_dirty_rate,
>      },
> diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
> index d7b41bd..7c9515b 100644
> --- a/migration/dirtyrate.c
> +++ b/migration/dirtyrate.c
> @@ -16,6 +16,7 @@
>  #include "cpu.h"
>  #include "exec/ramblock.h"
>  #include "qemu/rcu_queue.h"
> +#include "qemu/main-loop.h"
>  #include "qapi/qapi-commands-migration.h"
>  #include "ram.h"
>  #include "trace.h"
> @@ -23,11 +24,20 @@
>  #include "monitor/hmp.h"
>  #include "monitor/monitor.h"
>  #include "qapi/qmp/qdict.h"
> +#include "sysemu/kvm.h"
> +#include "sysemu/runstate.h"
> +#include "exec/memory.h"
> +
> +typedef struct DirtyPageRecord {
> +    uint64_t start_pages;
> +    uint64_t end_pages;
> +} DirtyPageRecord;
>  
>  static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED;
>  static struct DirtyRateStat DirtyStat;
>  static QemuMutex dirtyrate_lock;
>  static DirtyRateMeasureMode dirtyrate_mode = DIRTY_RATE_MEASURE_MODE_NONE;
> +static DirtyPageRecord *dirty_pages;

I think this can be a local var.  See below.

>  
>  static int64_t set_sample_page_period(int64_t msec, int64_t initial_time)
>  {
> @@ -72,9 +82,11 @@ static int dirtyrate_set_state(int *state, int old_state, 
> int new_state)
>  
>  static struct DirtyRateInfo *query_dirty_rate_info(void)
>  {
> +    int i;
>      qemu_mutex_lock(&dirtyrate_lock);
>      int64_t dirty_rate = DirtyStat.dirty_rate;
>      struct DirtyRateInfo *info = g_malloc0(sizeof(DirtyRateInfo));
> +    DirtyRateVcpuList *head = NULL, **tail = &head;
>  
>      if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) {
>          info->has_dirty_rate = true;
> @@ -85,9 +97,22 @@ static struct DirtyRateInfo *query_dirty_rate_info(void)
>      info->start_time = DirtyStat.start_time;
>      info->calc_time = DirtyStat.calc_time;
>      info->sample_pages = DirtyStat.sample_pages;
> +    info->mode = dirtyrate_mode;
> +
> +    if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
> +        /* set sample_pages with 0 to indicate page sampling isn't enabled */
> +        info->sample_pages = 0;
> +        info->has_vcpu_dirty_rate = true;
> +        for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
> +            DirtyRateVcpu *rate = g_malloc0(sizeof(DirtyRateVcpu));
> +            rate->id = DirtyStat.dirty_ring.rates[i].id;
> +            rate->dirty_rate = DirtyStat.dirty_ring.rates[i].dirty_rate;
> +            QAPI_LIST_APPEND(tail, rate);
> +        }
> +        info->vcpu_dirty_rate = head;
> +    }

I think it's nicer to move this chunk into the previous block:

    if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) {
        ...
    }

Then as mentioned previously I think we can drop the mutex in previous patch.

>  
>      qemu_mutex_unlock(&dirtyrate_lock);
> -
>      trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState));
>  
>      return info;
> @@ -119,7 +144,11 @@ static void init_dirtyrate_stat(int64_t start_time,
>  
>  static void cleanup_dirtyrate_stat(struct DirtyRateConfig config)
>  {
> -    /* TODO */
> +    /* last calc-dirty-rate qmp use dirty ring mode */
> +    if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
> +        free(DirtyStat.dirty_ring.rates);
> +        DirtyStat.dirty_ring.rates = NULL;
> +    }
>  }
>  
>  static void update_dirtyrate_stat(struct RamblockDirtyInfo *info)
> @@ -356,7 +385,97 @@ static bool compare_page_hash_info(struct 
> RamblockDirtyInfo *info,
>      return true;
>  }
>  
> -static void calculate_dirtyrate(struct DirtyRateConfig config)
> +static void record_dirtypages(CPUState *cpu, bool start)
> +{
> +    if (start) {
> +        dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
> +    } else {
> +        dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
> +    }
> +}

I suggest to drop this helper and inline them.  More below.

> +
> +static void dirtyrate_global_dirty_log_start(void)
> +{
> +    qemu_mutex_lock_iothread();
> +    memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE);
> +    qemu_mutex_unlock_iothread();
> +}
> +
> +static void dirtyrate_global_dirty_log_stop(void)
> +{
> +    qemu_mutex_lock_iothread();
> +    memory_global_dirty_log_stop(GLOBAL_DIRTY_DIRTY_RATE);
> +    qemu_mutex_unlock_iothread();
> +}
> +
> +static int64_t do_calculate_dirtyrate_vcpu(int idx)
> +{
> +    uint64_t memory_size_MB;
> +    int64_t time_s;
> +    uint64_t start_pages = dirty_pages[idx].start_pages;
> +    uint64_t end_pages = dirty_pages[idx].end_pages;
> +    uint64_t dirty_pages = 0;
> +
> +    dirty_pages = end_pages - start_pages;
> +
> +    memory_size_MB = (dirty_pages * TARGET_PAGE_SIZE) >> 20;
> +    time_s = DirtyStat.calc_time;
> +
> +    trace_dirtyrate_do_calculate_vcpu(idx, dirty_pages, time_s);
> +
> +    return memory_size_MB / time_s;
> +}
> +
> +static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
> +{
> +    CPUState *cpu;
> +    int64_t msec = 0;
> +    int64_t start_time;
> +    uint64_t dirtyrate = 0;
> +    uint64_t dirtyrate_sum = 0;
> +    int nvcpu = 0;
> +    int i = 0;
> +
> +    CPU_FOREACH(cpu) {
> +        nvcpu++;
> +    }
> +
> +    dirty_pages = malloc(sizeof(*dirty_pages) * nvcpu);

I think dirty_pages can be a local var in this function and should be enough.

> +
> +    DirtyStat.dirty_ring.nvcpu = nvcpu;
> +    DirtyStat.dirty_ring.rates = malloc(sizeof(DirtyRateVcpu) * nvcpu);
> +
> +    dirtyrate_global_dirty_log_start();
> +
> +    CPU_FOREACH(cpu) {
> +        record_dirtypages(cpu, true);

Here we expand it so reference dirty_pages will have no problem.

> +    }
> +
> +    start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> +    DirtyStat.start_time = start_time / 1000;
> +
> +    msec = config.sample_period_seconds * 1000;
> +    msec = set_sample_page_period(msec, start_time);
> +    DirtyStat.calc_time = msec / 1000;
> +
> +    CPU_FOREACH(cpu) {
> +        record_dirtypages(cpu, false);

Same here.

> +    }
> +
> +    dirtyrate_global_dirty_log_stop();
> +
> +    for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
> +        dirtyrate = do_calculate_dirtyrate_vcpu(i);

We may need to pass in dirty_pages here too, but this should be the last thing
we do to make it local.

> +        DirtyStat.dirty_ring.rates[i].id = i;
> +        DirtyStat.dirty_ring.rates[i].dirty_rate = dirtyrate;
> +        dirtyrate_sum += dirtyrate;
> +    }
> +
> +    DirtyStat.dirty_rate = dirtyrate_sum;
> +    free(dirty_pages);
> +}
> +
> +static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
>  {
>      struct RamblockDirtyInfo *block_dinfo = NULL;
>      int block_count = 0;
> @@ -387,6 +506,17 @@ out:
>      free_ramblock_dirty_info(block_dinfo, block_count);
>  }
>  
> +static void calculate_dirtyrate(struct DirtyRateConfig config)
> +{
> +    if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
> +        calculate_dirtyrate_dirty_ring(config);
> +    } else {
> +        calculate_dirtyrate_sample_vm(config);
> +    }
> +
> +    trace_dirtyrate_calculate(DirtyStat.dirty_rate);
> +}
> +
>  void *get_dirtyrate_thread(void *arg)
>  {
>      struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg;
> @@ -412,8 +542,12 @@ void *get_dirtyrate_thread(void *arg)
>      return NULL;
>  }
>  
> -void qmp_calc_dirty_rate(int64_t calc_time, bool has_sample_pages,
> -                         int64_t sample_pages, Error **errp)
> +void qmp_calc_dirty_rate(int64_t calc_time,
> +                         bool has_sample_pages,
> +                         int64_t sample_pages,
> +                         bool has_mode,
> +                         DirtyRateMeasureMode mode,
> +                         Error **errp)
>  {
>      static struct DirtyRateConfig config;
>      QemuThread thread;
> @@ -435,6 +569,15 @@ void qmp_calc_dirty_rate(int64_t calc_time, bool 
> has_sample_pages,
>          return;
>      }
>  
> +    if (!has_mode) {
> +        mode =  DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
> +    }
> +
> +    if (has_sample_pages && mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
> +        error_setg(errp, "either sample-pages or dirty-ring can be 
> specified.");
> +        return;
> +    }
> +
>      if (has_sample_pages) {
>          if (!is_sample_pages_valid(sample_pages)) {
>              error_setg(errp, "sample-pages is out of range[%d, %d].",
> @@ -447,6 +590,16 @@ void qmp_calc_dirty_rate(int64_t calc_time, bool 
> has_sample_pages,
>      }
>  
>      /*
> +     * dirty ring mode only works when kvm dirty ring is enabled.
> +     */
> +    if ((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) &&
> +        !kvm_dirty_ring_enabled()) {
> +        error_setg(errp, "dirty ring is disabled, use sample-pages method "
> +                         "or remeasure later.");
> +        return;
> +    }
> +
> +    /*
>       * Init calculation state as unstarted.
>       */
>      ret = dirtyrate_set_state(&CalculatingState, CalculatingState,
> @@ -458,7 +611,7 @@ void qmp_calc_dirty_rate(int64_t calc_time, bool 
> has_sample_pages,
>  
>      config.sample_period_seconds = calc_time;
>      config.sample_pages_per_gigabytes = sample_pages;
> -    config.mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
> +    config.mode = mode;
>  
>      if (unlikely(dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_NONE)) {
>          /* first time to calculate dirty rate */
> @@ -471,7 +624,7 @@ void qmp_calc_dirty_rate(int64_t calc_time, bool 
> has_sample_pages,
>       * update dirty rate mode so that we can figure out what mode has
>       * been used in last calculation
>       **/
> -    dirtyrate_mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
> +    dirtyrate_mode = mode;
>  
>      start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
>      init_dirtyrate_stat(start_time, config);
> @@ -497,9 +650,18 @@ void hmp_info_dirty_rate(Monitor *mon, const QDict 
> *qdict)
>                     info->sample_pages);
>      monitor_printf(mon, "Period: %"PRIi64" (sec)\n",
>                     info->calc_time);
> +    monitor_printf(mon, "Mode: %s\n",
> +                   DirtyRateMeasureMode_str(info->mode));
>      monitor_printf(mon, "Dirty rate: ");
>      if (info->has_dirty_rate) {
>          monitor_printf(mon, "%"PRIi64" (MB/s)\n", info->dirty_rate);
> +        if (info->has_vcpu_dirty_rate) {
> +            DirtyRateVcpuList *rate, *head = info->vcpu_dirty_rate;
> +            for (rate = head; rate != NULL; rate = rate->next) {
> +                monitor_printf(mon, "vcpu[%"PRIi64"], Dirty rate: 
> %"PRIi64"\n",
> +                               rate->value->id, rate->value->dirty_rate);
> +            }
> +        }
>      } else {
>          monitor_printf(mon, "(not ready)\n");
>      }

Please be careful to not leak the list of vcpu results.. I think we need
something like qapi_free_DirtyRateVcpuList().

Thanks,

-- 
Peter Xu


Reply via email to