From: "Dr. David Alan Gilbert" <dgilb...@redhat.com> Record: Checkpoint lifetime (ms) Pause time due to checkpoint (ms) Checkpoint size (bytes)
Signed-off-by: Dr. David Alan Gilbert <dgilb...@redhat.com> --- hmp.c | 12 ++++++++++++ include/migration/migration.h | 3 +++ migration/colo.c | 15 +++++++++++++++ migration/migration.c | 30 ++++++++++++++++++++++++++++++ qapi-schema.json | 11 ++++++++++- 5 files changed, 70 insertions(+), 1 deletion(-) diff --git a/hmp.c b/hmp.c index c724efa..2b17ed0 100644 --- a/hmp.c +++ b/hmp.c @@ -197,6 +197,18 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->setup_time); } } + if (info->has_colo_checkpoint_stats) { + monitor_printf_RollingStats(mon, "colo checkpoint (ms)", + info->colo_checkpoint_stats); + } + if (info->has_colo_paused_stats) { + monitor_printf_RollingStats(mon, "colo paused time (ms)", + info->colo_paused_stats); + } + if (info->has_colo_size_stats) { + monitor_printf_RollingStats(mon, "colo checkpoint size", + info->colo_size_stats); + } if (info->has_ram) { monitor_printf(mon, "transferred ram: %" PRIu64 " kbytes\n", diff --git a/include/migration/migration.h b/include/migration/migration.h index 9893467..564edaa 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -64,6 +64,9 @@ struct MigrationState int64_t setup_time; int64_t dirty_sync_count; RStats *expected_downtime_stats; + RStats *colo_checkpoint_stats; + RStats *colo_paused_stats; + RStats *colo_size_stats; }; enum { diff --git a/migration/colo.c b/migration/colo.c index 042dec8..653ef25 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -15,6 +15,7 @@ #include "sysemu/sysemu.h" #include "migration/migration-colo.h" #include "qemu/error-report.h" +#include "qemu/rolling-stats.h" #include "migration/migration-failover.h" #include "net/colo-nic.h" #include "block/block.h" @@ -272,6 +273,7 @@ static int do_colo_transaction(MigrationState *s, QEMUFile *control) int ret; size_t size; QEMUFile *trans = NULL; + int64_t stop_time, start_time; ret = colo_ctl_put(s->file, COLO_CHECKPOINT_NEW); if (ret < 0) { @@ -295,6 +297,7 @@ static int do_colo_transaction(MigrationState *s, QEMUFile *control) goto out; } /* suspend and save vm state to colo buffer */ + stop_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); qemu_mutex_lock_iothread(); vm_stop_force_state(RUN_STATE_COLO); qemu_mutex_unlock_iothread(); @@ -343,6 +346,9 @@ static int do_colo_transaction(MigrationState *s, QEMUFile *control) if (ret < 0) { goto out; } + + rstats_add_value(s->colo_size_stats, size, stop_time); + ret = colo_ctl_get(control, COLO_CHECKPOINT_RECEIVED); if (ret < 0) { goto out; @@ -366,6 +372,11 @@ static int do_colo_transaction(MigrationState *s, QEMUFile *control) qemu_mutex_lock_iothread(); vm_start(); qemu_mutex_unlock_iothread(); + start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); + rstats_add_value(s->colo_paused_stats, + start_time - stop_time, + start_time); + DPRINTF("vm resume to run again\n"); out: @@ -450,6 +461,10 @@ static void *colo_thread(void *opaque) DPRINTF("Net packets is not consistent!!!\n"); } + current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); + rstats_add_value(s->colo_checkpoint_stats, + current_time - start_time, + current_time); /* start a colo checkpoint */ if (do_colo_transaction(s, colo_control)) { goto out; diff --git a/migration/migration.c b/migration/migration.c index 794d94a..7c0517a 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -244,6 +244,21 @@ MigrationInfo *qmp_query_migrate(Error **errp) case MIG_STATE_COLO: info->has_status = true; info->status = g_strdup("colo"); + if (s->colo_checkpoint_stats) { + info->colo_checkpoint_stats = + rstats_as_RollingStats(s->colo_checkpoint_stats); + info->has_colo_checkpoint_stats = true; + } + if (s->colo_paused_stats) { + info->colo_paused_stats = + rstats_as_RollingStats(s->colo_paused_stats); + info->has_colo_paused_stats = true; + } + if (s->colo_size_stats) { + info->colo_size_stats = + rstats_as_RollingStats(s->colo_size_stats); + info->has_colo_size_stats = true; + } /* TODO: display COLO specific informations(checkpoint info etc.),*/ break; case MIG_STATE_COMPLETED: @@ -433,6 +448,21 @@ static MigrationState *migrate_init(const MigrationParams *params) } else { rstats_reset(s->expected_downtime_stats); } + if (!s->colo_checkpoint_stats) { + s->colo_checkpoint_stats = rstats_init(10, 0.2); + } else { + rstats_reset(s->colo_checkpoint_stats); + } + if (!s->colo_paused_stats) { + s->colo_paused_stats = rstats_init(10, 0.2); + } else { + rstats_reset(s->colo_paused_stats); + } + if (!s->colo_size_stats) { + s->colo_size_stats = rstats_init(10, 0.2); + } else { + rstats_reset(s->colo_size_stats); + } s->bandwidth_limit = bandwidth_limit; s->state = MIG_STATE_SETUP; trace_migrate_set_state(MIG_STATE_SETUP); diff --git a/qapi-schema.json b/qapi-schema.json index 2ec35c7..f2a666c 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -486,6 +486,12 @@ # @expected-downtime-stats: #optional more detailed statistics from the # downtime estimation. # +# @colo-checkpoint-stats: #optional The length of COLO checkpoints (ms) +# +# @colo-paused-stats: #optional The time paused (ms) as COLO took checkpoints +# +# @colo-size-stats: #optional The size of COLO checkpoints (bytes) +# # Since: 0.14.0 ## { 'type': 'MigrationInfo', @@ -496,7 +502,10 @@ '*expected-downtime': 'int', '*downtime': 'int', '*setup-time': 'int', - '*expected-downtime-stats': 'RollingStats' } } + '*expected-downtime-stats': 'RollingStats', + '*colo-checkpoint-stats': 'RollingStats', + '*colo-paused-stats': 'RollingStats', + '*colo-size-stats': 'RollingStats' } } ## # @query-migrate -- 2.1.0