Hi, I tried and executed this exciting patches named colo. However this patch causes abnormal termination in my environment. Although I think it's a known issue, the details and a presumed origin is described below:
On 2014/09/23 18:23, Yang Hongyang wrote: > implement colo save > > Signed-off-by: Yang Hongyang <yan...@cn.fujitsu.com> > --- > migration-colo.c | 60 > +++++++++++++++++++++++++++++++++++++++++++++++++------- > 1 file changed, 53 insertions(+), 7 deletions(-) > > diff --git a/migration-colo.c b/migration-colo.c > index 2e478e9..d99342a 100644 > --- a/migration-colo.c > +++ b/migration-colo.c > @@ -13,6 +13,7 @@ > #include "block/coroutine.h" > #include "hw/qdev-core.h" > #include "qemu/timer.h" > +#include "sysemu/sysemu.h" > #include "migration/migration-colo.h" > #include <sys/ioctl.h> > #include "qemu/error-report.h" > @@ -106,12 +107,12 @@ static int colo_compare(void) > return ioctl(comp_fd, COMP_IOCTWAIT, 250); > } > > -static __attribute__((unused)) int colo_compare_flush(void) > +static int colo_compare_flush(void) > { > return ioctl(comp_fd, COMP_IOCTFLUSH, 1); > } > > -static __attribute__((unused)) int colo_compare_resume(void) > +static int colo_compare_resume(void) > { > return ioctl(comp_fd, COMP_IOCTRESUME, 1); > } > @@ -200,6 +201,9 @@ static bool colo_is_master(void) > static int do_colo_transaction(MigrationState *s, QEMUFile *control) > { > int ret; > + uint8_t *buf; > + size_t size; > + QEMUFile *trans = NULL; > > ret = colo_ctl_put(s->file, COLO_CHECKPOINT_NEW); > if (ret) { > @@ -211,30 +215,73 @@ static int do_colo_transaction(MigrationState *s, > QEMUFile *control) > goto out; > } > > - /* TODO: suspend and save vm state to colo buffer */ > + /* open colo buffer for write */ > + trans = qemu_bufopen("w", NULL); > + if (!trans) { > + error_report("Open colo buffer for write failed"); > + goto out; > + } > + > + /* suspend and save vm state to colo buffer */ > + qemu_mutex_lock_iothread(); > + vm_stop_force_state(RUN_STATE_COLO); > + qemu_mutex_unlock_iothread(); > + /* Disable block migration */ > + s->params.blk = 0; > + s->params.shared = 0; > + qemu_savevm_state_begin(trans, &s->params); > + qemu_savevm_state_complete(trans); This line causes aborting Qemu immediately after starting a colo's migration process. If I'm not mistaken, the cause of aborting is not getting mutex lock when calling qemu_savevm_state_complete(). The aborting was resolved by getting mutex lock chen calling qemu_save_state_complete(). Thanks, Shunsuke > + > + qemu_fflush(trans); > > ret = colo_ctl_put(s->file, COLO_CHECKPOINT_SEND); > if (ret) { > goto out; > } > > - /* TODO: send vmstate to slave */ > + /* send vmstate to slave */ > + > + /* we send the total size of the vmstate first */ > + size = qsb_get_length(qemu_buf_get(trans)); > + ret = colo_ctl_put(s->file, size); > + if (ret) { > + goto out; > + } > + > + buf = g_malloc(size); > + qsb_get_buffer(qemu_buf_get(trans), 0, size, &buf); > + qemu_put_buffer(s->file, buf, size); > + g_free(buf); > + ret = qemu_file_get_error(s->file); > + if (ret < 0) { > + goto out; > + } > + qemu_fflush(s->file); > > ret = colo_ctl_get(control, COLO_CHECKPOINT_RECEIVED); > if (ret) { > goto out; > } > > - /* TODO: Flush network etc. */ > + /* Flush network etc. */ > + colo_compare_flush(); > > ret = colo_ctl_get(control, COLO_CHECKPOINT_LOADED); > if (ret) { > goto out; > } > > - /* TODO: resume master */ > + colo_compare_resume(); > + ret = 0; > > out: > + if (trans) > + qemu_fclose(trans); > + /* resume master */ > + qemu_mutex_lock_iothread(); > + vm_start(); > + qemu_mutex_unlock_iothread(); > + > return ret; > } > > @@ -289,7 +336,6 @@ static void *colo_thread(void *opaque) > } > > /* start a colo checkpoint */ > - > if (do_colo_transaction(s, colo_control)) { > goto out; > } >