On Wed, May 16, 2018 at 7:12 PM, Dr. David Alan Gilbert <dgilb...@redhat.com > wrote:
> * Zhang Chen (zhangc...@gmail.com) wrote: > > For COLO FT, both the PVM and SVM run at the same time, > > only sync the state while it needs. > > > > So here, let SVM runs while not doing checkpoint, change > > DEFAULT_MIGRATE_X_CHECKPOINT_DELAY to 200*100. > > > > Besides, we forgot to release colo_checkpoint_semd and > > colo_delay_timer, fix them here. > > > > Signed-off-by: zhanghailiang <zhang.zhanghaili...@huawei.com> > > Signed-off-by: Zhang Chen <zhangc...@gmail.com> > > Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com> > > --- > > migration/colo.c | 42 ++++++++++++++++++++++++++++++++++++++++-- > > migration/migration.c | 4 ++-- > > 2 files changed, 42 insertions(+), 4 deletions(-) > > > > diff --git a/migration/colo.c b/migration/colo.c > > index 4381067ed4..081df1835f 100644 > > --- a/migration/colo.c > > +++ b/migration/colo.c > > @@ -25,8 +25,11 @@ > > #include "qemu/error-report.h" > > #include "migration/failover.h" > > #include "replication.h" > > +#include "net/colo-compare.h" > > +#include "net/colo.h" > > > > static bool vmstate_loading; > > +static Notifier packets_compare_notifier; > > > > #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024) > > > > @@ -343,6 +346,11 @@ static int > > colo_do_checkpoint_transaction(MigrationState > *s, > > goto out; > > } > > > > + colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT, > &local_err); > > + if (local_err) { > > + goto out; > > + } > > + > > /* Disable block migration */ > > migrate_set_block_enabled(false, &local_err); > > qemu_savevm_state_header(fb); > > @@ -400,6 +408,11 @@ out: > > return ret; > > } > > > > +static void colo_compare_notify_checkpoint(Notifier *notifier, void > *data) > > +{ > > + colo_checkpoint_notify(data); > > +} > > + > > static void colo_process_checkpoint(MigrationState *s) > > { > > QIOChannelBuffer *bioc; > > @@ -416,6 +429,9 @@ static void colo_process_checkpoint(MigrationState > *s) > > goto out; > > } > > > > + packets_compare_notifier.notify = colo_compare_notify_checkpoint; > > + colo_compare_register_notifier(&packets_compare_notifier); > > + > > /* > > * Wait for Secondary finish loading VM states and enter COLO > > * restore. > > @@ -461,11 +477,21 @@ out: > > qemu_fclose(fb); > > } > > > > - timer_del(s->colo_delay_timer); > > - > > /* Hope this not to be too long to wait here */ > > qemu_sem_wait(&s->colo_exit_sem); > > qemu_sem_destroy(&s->colo_exit_sem); > > + > > + /* > > + * It is safe to unregister notifier after failover finished. > > + * Besides, colo_delay_timer and colo_checkpoint_sem can't be > > + * released befor unregister notifier, or there will be > use-after-free > > + * error. > > + */ > > + colo_compare_unregister_notifier(&packets_compare_notifier); > > + timer_del(s->colo_delay_timer); > > + timer_free(s->colo_delay_timer); > > + qemu_sem_destroy(&s->colo_checkpoint_sem); > > + > > /* > > * Must be called after failover BH is completed, > > * Or the failover BH may shutdown the wrong fd that > > @@ -558,6 +584,11 @@ void *colo_process_incoming_thread(void *opaque) > > fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc)); > > object_unref(OBJECT(bioc)); > > > > + qemu_mutex_lock_iothread(); > > + vm_start(); > > + trace_colo_vm_state_change("stop", "run"); > > + qemu_mutex_unlock_iothread(); > > + > > colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_READY, > > &local_err); > > if (local_err) { > > @@ -577,6 +608,11 @@ void *colo_process_incoming_thread(void *opaque) > > goto out; > > } > > > > + qemu_mutex_lock_iothread(); > > + vm_stop_force_state(RUN_STATE_COLO); > > + trace_colo_vm_state_change("run", "stop"); > > + qemu_mutex_unlock_iothread(); > > + > > /* FIXME: This is unnecessary for periodic checkpoint mode */ > > colo_send_message(mis->to_src_file, > COLO_MESSAGE_CHECKPOINT_REPLY, > > &local_err); > > @@ -630,6 +666,8 @@ void *colo_process_incoming_thread(void *opaque) > > } > > > > vmstate_loading = false; > > + vm_start(); > > + trace_colo_vm_state_change("stop", "run"); > > qemu_mutex_unlock_iothread(); > > > > if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { > > diff --git a/migration/migration.c b/migration/migration.c > > index 35f2781b03..bca187275a 100644 > > --- a/migration/migration.c > > +++ b/migration/migration.c > > @@ -76,9 +76,9 @@ > > #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) > > > > /* The delay time (in ms) between two COLO checkpoints > > - * Note: Please change this default value to 10000 when we support > hybrid mode. > > + * Note: Please change this default value to 20000 when we support > hybrid mode. > > You can remove that comment now? > Yes, I will remove it in next version. Thanks Zhang Chen > > Dave > > > */ > > -#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200 > > +#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) > > #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 > > #define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16 > > > > -- > > 2.17.0 > > > -- > Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK >