Only do checkpoint, when the PVM's and SVM's output net packets are inconsistent, We also limit the min time between two continuous checkpoint action, to give VM a change to run.
Signed-off-by: zhanghailiang <zhang.zhanghaili...@huawei.com> Signed-off-by: Li Zhijian <lizhij...@cn.fujitsu.com> --- include/net/colo-nic.h | 2 ++ migration/colo.c | 32 ++++++++++++++++++++++++++++++++ net/colo-nic.c | 5 +++++ 3 files changed, 39 insertions(+) diff --git a/include/net/colo-nic.h b/include/net/colo-nic.h index 809726c..57c6719 100644 --- a/include/net/colo-nic.h +++ b/include/net/colo-nic.h @@ -20,4 +20,6 @@ void colo_proxy_destroy(enum colo_mode mode); void colo_add_nic_devices(NetClientState *nc); void colo_remove_nic_devices(NetClientState *nc); +int colo_proxy_compare(void); + #endif diff --git a/migration/colo.c b/migration/colo.c index 22d7df1..a7cead9 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -17,6 +17,13 @@ #include "migration/migration-failover.h" #include "net/colo-nic.h" +/* +* We should not do checkpoint one after another without any time interval, +* Because this will lead continuous 'stop' status for VM. +* CHECKPOINT_MIN_PERIOD is the min time limit between two checkpoint action. +*/ +#define CHECKPOINT_MIN_PERIOD 100 /* unit: ms */ + enum { COLO_CHECPOINT_READY = 0x46, @@ -283,6 +290,7 @@ static void *colo_thread(void *opaque) { MigrationState *s = opaque; QEMUFile *colo_control = NULL; + int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); int ret; if (colo_proxy_init(COLO_PRIMARY_MODE) != 0) { @@ -318,15 +326,39 @@ static void *colo_thread(void *opaque) trace_colo_vm_state_change("stop", "run"); while (s->state == MIGRATION_STATUS_COLO) { + int proxy_checkpoint_req; + if (failover_request_is_set()) { error_report("failover request"); goto out; } + /* wait for a colo checkpoint */ + proxy_checkpoint_req = colo_proxy_compare(); + if (proxy_checkpoint_req < 0) { + goto out; + } else if (!proxy_checkpoint_req) { + /* + * No checkpoint is needed, wait for 1ms and then + * check if we need checkpoint again + */ + g_usleep(1000); + continue; + } else { + int64_t interval; + + current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); + interval = current_time - checkpoint_time; + if (interval < CHECKPOINT_MIN_PERIOD) { + /* Limit the min time between two checkpoint */ + g_usleep((1000*(CHECKPOINT_MIN_PERIOD - interval))); + } + } /* start a colo checkpoint */ if (colo_do_checkpoint_transaction(s, colo_control)) { goto out; } + checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST); } out: diff --git a/net/colo-nic.c b/net/colo-nic.c index 9e08fe3..a004e08 100644 --- a/net/colo-nic.c +++ b/net/colo-nic.c @@ -390,3 +390,8 @@ void colo_proxy_destroy(enum colo_mode mode) nfnl_close(nfnlh); teardown_nic(mode, getpid()); } + +int colo_proxy_compare(void) +{ + return atomic_xchg(&packet_compare_different, 0); +} -- 1.7.12.4