Re: [Qemu-devel] [RFC PATCH v2 13/23] COLO ctl: implement colo save

Shunsuke Kurumatani Wed, 08 Oct 2014 06:24:49 -0700

Hi,

I tried and executed this exciting patches named colo. However this
patch causes abnormal termination in my environment. Although I
think it's a known issue, the details and a presumed origin is
described below:



On 2014/09/23 18:23, Yang Hongyang wrote:
> implement colo save
> 
> Signed-off-by: Yang Hongyang <yan...@cn.fujitsu.com>
> ---
>   migration-colo.c | 60 
> +++++++++++++++++++++++++++++++++++++++++++++++++-------
>   1 file changed, 53 insertions(+), 7 deletions(-)
> 
> diff --git a/migration-colo.c b/migration-colo.c
> index 2e478e9..d99342a 100644
> --- a/migration-colo.c
> +++ b/migration-colo.c
> @@ -13,6 +13,7 @@
>   #include "block/coroutine.h"
>   #include "hw/qdev-core.h"
>   #include "qemu/timer.h"
> +#include "sysemu/sysemu.h"
>   #include "migration/migration-colo.h"
>   #include <sys/ioctl.h>
>   #include "qemu/error-report.h"
> @@ -106,12 +107,12 @@ static int colo_compare(void)
>       return ioctl(comp_fd, COMP_IOCTWAIT, 250);
>   }
>   
> -static __attribute__((unused)) int colo_compare_flush(void)
> +static int colo_compare_flush(void)
>   {
>       return ioctl(comp_fd, COMP_IOCTFLUSH, 1);
>   }
>   
> -static __attribute__((unused)) int colo_compare_resume(void)
> +static int colo_compare_resume(void)
>   {
>       return ioctl(comp_fd, COMP_IOCTRESUME, 1);
>   }
> @@ -200,6 +201,9 @@ static bool colo_is_master(void)
>   static int do_colo_transaction(MigrationState *s, QEMUFile *control)
>   {
>       int ret;
> +    uint8_t *buf;
> +    size_t size;
> +    QEMUFile *trans = NULL;
>   
>       ret = colo_ctl_put(s->file, COLO_CHECKPOINT_NEW);
>       if (ret) {
> @@ -211,30 +215,73 @@ static int do_colo_transaction(MigrationState *s, 
> QEMUFile *control)
>           goto out;
>       }
>   
> -    /* TODO: suspend and save vm state to colo buffer */
> +    /* open colo buffer for write */
> +    trans = qemu_bufopen("w", NULL);
> +    if (!trans) {
> +        error_report("Open colo buffer for write failed");
> +        goto out;
> +    }
> +
> +    /* suspend and save vm state to colo buffer */
> +    qemu_mutex_lock_iothread();
> +    vm_stop_force_state(RUN_STATE_COLO);
> +    qemu_mutex_unlock_iothread();
> +    /* Disable block migration */
> +    s->params.blk = 0;
> +    s->params.shared = 0;
> +    qemu_savevm_state_begin(trans, &s->params);
> +    qemu_savevm_state_complete(trans);

This line causes aborting Qemu immediately after starting a colo's
migration process. If I'm not mistaken, the cause of aborting is not
getting mutex lock when calling qemu_savevm_state_complete(). The
aborting was resolved by getting mutex lock chen calling
qemu_save_state_complete().

Thanks,
Shunsuke


> +
> +    qemu_fflush(trans);
>   
>       ret = colo_ctl_put(s->file, COLO_CHECKPOINT_SEND);
>       if (ret) {
>           goto out;
>       }
>   
> -    /* TODO: send vmstate to slave */
> +    /* send vmstate to slave */
> +
> +    /* we send the total size of the vmstate first */
> +    size = qsb_get_length(qemu_buf_get(trans));
> +    ret = colo_ctl_put(s->file, size);
> +    if (ret) {
> +        goto out;
> +    }
> +
> +    buf = g_malloc(size);
> +    qsb_get_buffer(qemu_buf_get(trans), 0, size, &buf);
> +    qemu_put_buffer(s->file, buf, size);
> +    g_free(buf);
> +    ret = qemu_file_get_error(s->file);
> +    if (ret < 0) {
> +        goto out;
> +    }
> +    qemu_fflush(s->file);
>   
>       ret = colo_ctl_get(control, COLO_CHECKPOINT_RECEIVED);
>       if (ret) {
>           goto out;
>       }
>   
> -    /* TODO: Flush network etc. */
> +    /* Flush network etc. */
> +    colo_compare_flush();
>   
>       ret = colo_ctl_get(control, COLO_CHECKPOINT_LOADED);
>       if (ret) {
>           goto out;
>       }
>   
> -    /* TODO: resume master */
> +    colo_compare_resume();
> +    ret = 0;
>   
>   out:
> +    if (trans)
> +        qemu_fclose(trans);
> +    /* resume master */
> +    qemu_mutex_lock_iothread();
> +    vm_start();
> +    qemu_mutex_unlock_iothread();
> +
>       return ret;
>   }
>   
> @@ -289,7 +336,6 @@ static void *colo_thread(void *opaque)
>           }
>   
>           /* start a colo checkpoint */
> -
>           if (do_colo_transaction(s, colo_control)) {
>               goto out;
>           }
>

Re: [Qemu-devel] [RFC PATCH v2 13/23] COLO ctl: implement colo save

Reply via email to