Re: [Xen-devel] [PATCH 23/27] tools/libxl: [RFC] Write checkpoint records into the stream
On 06/15/2015 09:44 PM, Andrew Cooper wrote: when signalled to do so by libxl__remus_domain_checkpoint_callback() Signed-off-by: Andrew Cooper andrew.coop...@citrix.com CC: Ian Campbell ian.campb...@citrix.com CC: Ian Jackson ian.jack...@eu.citrix.com CC: Wei Liu wei.l...@citrix.com --- tools/libxl/libxl_dom.c | 16 +++--- tools/libxl/libxl_internal.h |7 +++ tools/libxl/libxl_stream_write.c | 111 -- 3 files changed, 121 insertions(+), 13 deletions(-) diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c index 06bfaab..3597a91 100644 --- a/tools/libxl/libxl_dom.c +++ b/tools/libxl/libxl_dom.c @@ -1867,8 +1867,8 @@ static void remus_devices_preresume_cb(libxl__egc *egc, /*- remus asynchronous checkpoint callback -*/ -static void remus_checkpoint_dm_saved(libxl__egc *egc, - libxl__domain_suspend_state *dss, int rc); +static void remus_checkpoint_stream_written( +libxl__egc *egc, libxl__domain_suspend_state *dss, int rc); static void remus_devices_commit_cb(libxl__egc *egc, libxl__remus_devices_state *rds, int rc); @@ -1882,16 +1882,11 @@ static void libxl__remus_domain_checkpoint_callback(void *data) libxl__egc *egc = dss-shs.egc; STATE_AO_GC(dss-ao); -/* This would go into tailbuf. */ -if (dss-hvm) { -libxl__domain_save_device_model(egc, dss, remus_checkpoint_dm_saved); -} else { -remus_checkpoint_dm_saved(egc, dss, 0); -} +libxl__stream_write_start_checkpoint(egc, dss-sws); } -static void remus_checkpoint_dm_saved(libxl__egc *egc, - libxl__domain_suspend_state *dss, int rc) +static void remus_checkpoint_stream_written( +libxl__egc *egc, libxl__domain_suspend_state *dss, int rc) { /* Convenience aliases */ libxl__remus_devices_state *const rds = dss-rds; @@ -2036,6 +2031,7 @@ void libxl__domain_suspend(libxl__egc *egc, libxl__domain_suspend_state *dss) callbacks-suspend = libxl__remus_domain_suspend_callback; callbacks-postcopy = libxl__remus_domain_resume_callback; callbacks-checkpoint = libxl__remus_domain_checkpoint_callback; +dss-sws.checkpoint_callback = remus_checkpoint_stream_written; } else callbacks-suspend = libxl__domain_suspend_callback; diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 82cd792..bf1c377 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -2879,17 +2879,24 @@ struct libxl__stream_write_state { void (*completion_callback)(libxl__egc *egc, libxl__domain_suspend_state *dss, int rc); +void (*checkpoint_callback)(libxl__egc *egc, +libxl__domain_suspend_state *dss, +int rc); /* Private */ int rc; int joined_rc; size_t padding; bool running; +bool in_checkpoint; libxl__datacopier_state dc; }; _hidden void libxl__stream_write_start(libxl__egc *egc, libxl__stream_write_state *stream); +_hidden void libxl__stream_write_start_checkpoint( +libxl__egc *egc, libxl__stream_write_state *stream); + _hidden void libxl__stream_write_abort(libxl__egc *egc, libxl__stream_write_state *stream, int rc); diff --git a/tools/libxl/libxl_stream_write.c b/tools/libxl/libxl_stream_write.c index d28a8a5..40f2cb7 100644 --- a/tools/libxl/libxl_stream_write.c +++ b/tools/libxl/libxl_stream_write.c @@ -23,6 +23,9 @@ * - libxl__stream_write_start() * - Start writing a stream from the start. * + * - libxl__stream_write_start() + * - Write the records which form a checkpoint into a stream. + * * In normal operation, there are two tasks running at once; this stream * processing, and the the libxl-save-helper. check_stream_finished() is used * to join all the tasks in both success and error cases. @@ -39,6 +42,12 @@ * - Toolstack record * - if (hvm), Qemu record * - End record + * + * For checkpointed stream, there is a second loop which is triggered by a + * save-helper checkpoint callback. It writes: + * - Toolstack record + * - if (hvm), Qemu record + * - Checkpoint end record */ static const uint8_t zero_padding[1U REC_ALIGN_ORDER] = { 0 }; @@ -81,6 +90,16 @@ static void end_record_done(libxl__egc *egc, libxl__datacopier_state *dc, int onwrite, int errnoval); +/* Event callbacks unique to checkpointed streams. */ +static void checkpoint_done(libxl__egc *egc, +
Re: [Xen-devel] [PATCH 23/27] tools/libxl: [RFC] Write checkpoint records into the stream
On Tue, 2015-06-16 at 16:53 +0100, Andrew Cooper wrote: On 16/06/15 16:03, Ian Campbell wrote: On Mon, 2015-06-15 at 14:44 +0100, Andrew Cooper wrote: when signalled to do so by libxl__remus_domain_checkpoint_callback() I think I saw that Remus wasn't currently working, so I'll let you and Hongyang thrash something out before I spend too much effort reviewing these last few RFC bits. Unless you think it is worth my having a look now? Remus was broken by patch 19 in the series, and this patch forms part of fixing it again. I can't find a way of fixing the layering violation in both plain migration and Remus, in a readable, bisectable way. Remus requires identical source and destination toolstacks, and the Remus maintainers are happy enough with the break it and fix it up in the same series approach. Now that the series is comeplete, there is some shuffling room to reduce the window of breakage, but short of folding patches 19, 21, 23-25 together, Remus will break. The report I was referring to thinking I'd seen was that Remus was still broken even after the complete series was applied i.e. there was still more to be done. I'm happy with the transient breakage in this series on this occasion, but I was proposing not to review until Remus was thought to be working OK at the end. Ian. ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 23/27] tools/libxl: [RFC] Write checkpoint records into the stream
On Mon, 2015-06-15 at 14:44 +0100, Andrew Cooper wrote: when signalled to do so by libxl__remus_domain_checkpoint_callback() I think I saw that Remus wasn't currently working, so I'll let you and Hongyang thrash something out before I spend too much effort reviewing these last few RFC bits. Unless you think it is worth my having a look now? ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
Re: [Xen-devel] [PATCH 23/27] tools/libxl: [RFC] Write checkpoint records into the stream
On 16/06/15 16:03, Ian Campbell wrote: On Mon, 2015-06-15 at 14:44 +0100, Andrew Cooper wrote: when signalled to do so by libxl__remus_domain_checkpoint_callback() I think I saw that Remus wasn't currently working, so I'll let you and Hongyang thrash something out before I spend too much effort reviewing these last few RFC bits. Unless you think it is worth my having a look now? Remus was broken by patch 19 in the series, and this patch forms part of fixing it again. I can't find a way of fixing the layering violation in both plain migration and Remus, in a readable, bisectable way. Remus requires identical source and destination toolstacks, and the Remus maintainers are happy enough with the break it and fix it up in the same series approach. Now that the series is comeplete, there is some shuffling room to reduce the window of breakage, but short of folding patches 19, 21, 23-25 together, Remus will break. ~Andrew ___ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel
[Xen-devel] [PATCH 23/27] tools/libxl: [RFC] Write checkpoint records into the stream
when signalled to do so by libxl__remus_domain_checkpoint_callback() Signed-off-by: Andrew Cooper andrew.coop...@citrix.com CC: Ian Campbell ian.campb...@citrix.com CC: Ian Jackson ian.jack...@eu.citrix.com CC: Wei Liu wei.l...@citrix.com --- tools/libxl/libxl_dom.c | 16 +++--- tools/libxl/libxl_internal.h |7 +++ tools/libxl/libxl_stream_write.c | 111 -- 3 files changed, 121 insertions(+), 13 deletions(-) diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c index 06bfaab..3597a91 100644 --- a/tools/libxl/libxl_dom.c +++ b/tools/libxl/libxl_dom.c @@ -1867,8 +1867,8 @@ static void remus_devices_preresume_cb(libxl__egc *egc, /*- remus asynchronous checkpoint callback -*/ -static void remus_checkpoint_dm_saved(libxl__egc *egc, - libxl__domain_suspend_state *dss, int rc); +static void remus_checkpoint_stream_written( +libxl__egc *egc, libxl__domain_suspend_state *dss, int rc); static void remus_devices_commit_cb(libxl__egc *egc, libxl__remus_devices_state *rds, int rc); @@ -1882,16 +1882,11 @@ static void libxl__remus_domain_checkpoint_callback(void *data) libxl__egc *egc = dss-shs.egc; STATE_AO_GC(dss-ao); -/* This would go into tailbuf. */ -if (dss-hvm) { -libxl__domain_save_device_model(egc, dss, remus_checkpoint_dm_saved); -} else { -remus_checkpoint_dm_saved(egc, dss, 0); -} +libxl__stream_write_start_checkpoint(egc, dss-sws); } -static void remus_checkpoint_dm_saved(libxl__egc *egc, - libxl__domain_suspend_state *dss, int rc) +static void remus_checkpoint_stream_written( +libxl__egc *egc, libxl__domain_suspend_state *dss, int rc) { /* Convenience aliases */ libxl__remus_devices_state *const rds = dss-rds; @@ -2036,6 +2031,7 @@ void libxl__domain_suspend(libxl__egc *egc, libxl__domain_suspend_state *dss) callbacks-suspend = libxl__remus_domain_suspend_callback; callbacks-postcopy = libxl__remus_domain_resume_callback; callbacks-checkpoint = libxl__remus_domain_checkpoint_callback; +dss-sws.checkpoint_callback = remus_checkpoint_stream_written; } else callbacks-suspend = libxl__domain_suspend_callback; diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h index 82cd792..bf1c377 100644 --- a/tools/libxl/libxl_internal.h +++ b/tools/libxl/libxl_internal.h @@ -2879,17 +2879,24 @@ struct libxl__stream_write_state { void (*completion_callback)(libxl__egc *egc, libxl__domain_suspend_state *dss, int rc); +void (*checkpoint_callback)(libxl__egc *egc, +libxl__domain_suspend_state *dss, +int rc); /* Private */ int rc; int joined_rc; size_t padding; bool running; +bool in_checkpoint; libxl__datacopier_state dc; }; _hidden void libxl__stream_write_start(libxl__egc *egc, libxl__stream_write_state *stream); +_hidden void libxl__stream_write_start_checkpoint( +libxl__egc *egc, libxl__stream_write_state *stream); + _hidden void libxl__stream_write_abort(libxl__egc *egc, libxl__stream_write_state *stream, int rc); diff --git a/tools/libxl/libxl_stream_write.c b/tools/libxl/libxl_stream_write.c index d28a8a5..40f2cb7 100644 --- a/tools/libxl/libxl_stream_write.c +++ b/tools/libxl/libxl_stream_write.c @@ -23,6 +23,9 @@ * - libxl__stream_write_start() * - Start writing a stream from the start. * + * - libxl__stream_write_start() + * - Write the records which form a checkpoint into a stream. + * * In normal operation, there are two tasks running at once; this stream * processing, and the the libxl-save-helper. check_stream_finished() is used * to join all the tasks in both success and error cases. @@ -39,6 +42,12 @@ * - Toolstack record * - if (hvm), Qemu record * - End record + * + * For checkpointed stream, there is a second loop which is triggered by a + * save-helper checkpoint callback. It writes: + * - Toolstack record + * - if (hvm), Qemu record + * - Checkpoint end record */ static const uint8_t zero_padding[1U REC_ALIGN_ORDER] = { 0 }; @@ -81,6 +90,16 @@ static void end_record_done(libxl__egc *egc, libxl__datacopier_state *dc, int onwrite, int errnoval); +/* Event callbacks unique to checkpointed streams. */ +static void checkpoint_done(libxl__egc *egc, +libxl__stream_write_state *stream, +int rc); +static void write_checkpoint_end_record(libxl__egc *egc, +