Re: [Xen-devel] [PATCH 23/27] tools/libxl: [RFC] Write checkpoint records into the stream

2015-06-17 Thread Wen Congyang
On 06/15/2015 09:44 PM, Andrew Cooper wrote:
 when signalled to do so by libxl__remus_domain_checkpoint_callback()
 
 Signed-off-by: Andrew Cooper andrew.coop...@citrix.com
 CC: Ian Campbell ian.campb...@citrix.com
 CC: Ian Jackson ian.jack...@eu.citrix.com
 CC: Wei Liu wei.l...@citrix.com
 ---
  tools/libxl/libxl_dom.c  |   16 +++---
  tools/libxl/libxl_internal.h |7 +++
  tools/libxl/libxl_stream_write.c |  111 
 --
  3 files changed, 121 insertions(+), 13 deletions(-)
 
 diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
 index 06bfaab..3597a91 100644
 --- a/tools/libxl/libxl_dom.c
 +++ b/tools/libxl/libxl_dom.c
 @@ -1867,8 +1867,8 @@ static void remus_devices_preresume_cb(libxl__egc *egc,
  
  /*- remus asynchronous checkpoint callback -*/
  
 -static void remus_checkpoint_dm_saved(libxl__egc *egc,
 -  libxl__domain_suspend_state *dss, int 
 rc);
 +static void remus_checkpoint_stream_written(
 +libxl__egc *egc, libxl__domain_suspend_state *dss, int rc);
  static void remus_devices_commit_cb(libxl__egc *egc,
  libxl__remus_devices_state *rds,
  int rc);
 @@ -1882,16 +1882,11 @@ static void 
 libxl__remus_domain_checkpoint_callback(void *data)
  libxl__egc *egc = dss-shs.egc;
  STATE_AO_GC(dss-ao);
  
 -/* This would go into tailbuf. */
 -if (dss-hvm) {
 -libxl__domain_save_device_model(egc, dss, remus_checkpoint_dm_saved);
 -} else {
 -remus_checkpoint_dm_saved(egc, dss, 0);
 -}
 +libxl__stream_write_start_checkpoint(egc, dss-sws);
  }
  
 -static void remus_checkpoint_dm_saved(libxl__egc *egc,
 -  libxl__domain_suspend_state *dss, int 
 rc)
 +static void remus_checkpoint_stream_written(
 +libxl__egc *egc, libxl__domain_suspend_state *dss, int rc)
  {
  /* Convenience aliases */
  libxl__remus_devices_state *const rds = dss-rds;
 @@ -2036,6 +2031,7 @@ void libxl__domain_suspend(libxl__egc *egc, 
 libxl__domain_suspend_state *dss)
  callbacks-suspend = libxl__remus_domain_suspend_callback;
  callbacks-postcopy = libxl__remus_domain_resume_callback;
  callbacks-checkpoint = libxl__remus_domain_checkpoint_callback;
 +dss-sws.checkpoint_callback = remus_checkpoint_stream_written;
  } else
  callbacks-suspend = libxl__domain_suspend_callback;
  
 diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
 index 82cd792..bf1c377 100644
 --- a/tools/libxl/libxl_internal.h
 +++ b/tools/libxl/libxl_internal.h
 @@ -2879,17 +2879,24 @@ struct libxl__stream_write_state {
  void (*completion_callback)(libxl__egc *egc,
  libxl__domain_suspend_state *dss,
  int rc);
 +void (*checkpoint_callback)(libxl__egc *egc,
 +libxl__domain_suspend_state *dss,
 +int rc);
  /* Private */
  int rc;
  int joined_rc;
  size_t padding;
  bool running;
 +bool in_checkpoint;
  libxl__datacopier_state dc;
  };
  
  _hidden void libxl__stream_write_start(libxl__egc *egc,
 libxl__stream_write_state *stream);
  
 +_hidden void libxl__stream_write_start_checkpoint(
 +libxl__egc *egc, libxl__stream_write_state *stream);
 +
  _hidden void libxl__stream_write_abort(libxl__egc *egc,
 libxl__stream_write_state *stream,
 int rc);
 diff --git a/tools/libxl/libxl_stream_write.c 
 b/tools/libxl/libxl_stream_write.c
 index d28a8a5..40f2cb7 100644
 --- a/tools/libxl/libxl_stream_write.c
 +++ b/tools/libxl/libxl_stream_write.c
 @@ -23,6 +23,9 @@
   *  - libxl__stream_write_start()
   * - Start writing a stream from the start.
   *
 + *  - libxl__stream_write_start()
 + * - Write the records which form a checkpoint into a stream.
 + *
   * In normal operation, there are two tasks running at once; this stream
   * processing, and the the libxl-save-helper.  check_stream_finished() is 
 used
   * to join all the tasks in both success and error cases.
 @@ -39,6 +42,12 @@
   *  - Toolstack record
   *  - if (hvm), Qemu record
   *  - End record
 + *
 + * For checkpointed stream, there is a second loop which is triggered by a
 + * save-helper checkpoint callback.  It writes:
 + *  - Toolstack record
 + *  - if (hvm), Qemu record
 + *  - Checkpoint end record
   */
  
  static const uint8_t zero_padding[1U  REC_ALIGN_ORDER] = { 0 };
 @@ -81,6 +90,16 @@ static void end_record_done(libxl__egc *egc,
  libxl__datacopier_state *dc,
  int onwrite, int errnoval);
  
 +/* Event callbacks unique to checkpointed streams. */
 +static void checkpoint_done(libxl__egc *egc,
 +  

Re: [Xen-devel] [PATCH 23/27] tools/libxl: [RFC] Write checkpoint records into the stream

2015-06-17 Thread Ian Campbell
On Tue, 2015-06-16 at 16:53 +0100, Andrew Cooper wrote:
 On 16/06/15 16:03, Ian Campbell wrote:
  On Mon, 2015-06-15 at 14:44 +0100, Andrew Cooper wrote:
  when signalled to do so by libxl__remus_domain_checkpoint_callback()
  I think I saw that Remus wasn't currently working, so I'll let you and
  Hongyang thrash something out before I spend too much effort reviewing
  these last few RFC bits. Unless you think it is worth my having a look
  now?
 
 
 
 Remus was broken by patch 19 in the series, and this patch forms part of
 fixing it again.
 
 I can't find a way of fixing the layering violation in both plain
 migration and Remus, in a readable, bisectable way.
 
 Remus requires identical source and destination toolstacks, and the
 Remus maintainers are happy enough with the break it and fix it up in
 the same series approach.
 
 Now that the series is comeplete, there is some shuffling room to reduce
 the window of breakage, but short of folding patches 19, 21, 23-25
 together, Remus will break.

The report I was referring to thinking I'd seen was that Remus was still
broken even after the complete series was applied i.e. there was still
more to be done.

I'm happy with the transient breakage in this series on this occasion,
but I was proposing not to review until Remus was thought to be working
OK at the end.

Ian.


___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 23/27] tools/libxl: [RFC] Write checkpoint records into the stream

2015-06-16 Thread Ian Campbell
On Mon, 2015-06-15 at 14:44 +0100, Andrew Cooper wrote:
 when signalled to do so by libxl__remus_domain_checkpoint_callback()

I think I saw that Remus wasn't currently working, so I'll let you and
Hongyang thrash something out before I spend too much effort reviewing
these last few RFC bits. Unless you think it is worth my having a look
now?



___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


Re: [Xen-devel] [PATCH 23/27] tools/libxl: [RFC] Write checkpoint records into the stream

2015-06-16 Thread Andrew Cooper
On 16/06/15 16:03, Ian Campbell wrote:
 On Mon, 2015-06-15 at 14:44 +0100, Andrew Cooper wrote:
 when signalled to do so by libxl__remus_domain_checkpoint_callback()
 I think I saw that Remus wasn't currently working, so I'll let you and
 Hongyang thrash something out before I spend too much effort reviewing
 these last few RFC bits. Unless you think it is worth my having a look
 now?



Remus was broken by patch 19 in the series, and this patch forms part of
fixing it again.

I can't find a way of fixing the layering violation in both plain
migration and Remus, in a readable, bisectable way.

Remus requires identical source and destination toolstacks, and the
Remus maintainers are happy enough with the break it and fix it up in
the same series approach.

Now that the series is comeplete, there is some shuffling room to reduce
the window of breakage, but short of folding patches 19, 21, 23-25
together, Remus will break.

~Andrew

___
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel


[Xen-devel] [PATCH 23/27] tools/libxl: [RFC] Write checkpoint records into the stream

2015-06-15 Thread Andrew Cooper
when signalled to do so by libxl__remus_domain_checkpoint_callback()

Signed-off-by: Andrew Cooper andrew.coop...@citrix.com
CC: Ian Campbell ian.campb...@citrix.com
CC: Ian Jackson ian.jack...@eu.citrix.com
CC: Wei Liu wei.l...@citrix.com
---
 tools/libxl/libxl_dom.c  |   16 +++---
 tools/libxl/libxl_internal.h |7 +++
 tools/libxl/libxl_stream_write.c |  111 --
 3 files changed, 121 insertions(+), 13 deletions(-)

diff --git a/tools/libxl/libxl_dom.c b/tools/libxl/libxl_dom.c
index 06bfaab..3597a91 100644
--- a/tools/libxl/libxl_dom.c
+++ b/tools/libxl/libxl_dom.c
@@ -1867,8 +1867,8 @@ static void remus_devices_preresume_cb(libxl__egc *egc,
 
 /*- remus asynchronous checkpoint callback -*/
 
-static void remus_checkpoint_dm_saved(libxl__egc *egc,
-  libxl__domain_suspend_state *dss, int 
rc);
+static void remus_checkpoint_stream_written(
+libxl__egc *egc, libxl__domain_suspend_state *dss, int rc);
 static void remus_devices_commit_cb(libxl__egc *egc,
 libxl__remus_devices_state *rds,
 int rc);
@@ -1882,16 +1882,11 @@ static void 
libxl__remus_domain_checkpoint_callback(void *data)
 libxl__egc *egc = dss-shs.egc;
 STATE_AO_GC(dss-ao);
 
-/* This would go into tailbuf. */
-if (dss-hvm) {
-libxl__domain_save_device_model(egc, dss, remus_checkpoint_dm_saved);
-} else {
-remus_checkpoint_dm_saved(egc, dss, 0);
-}
+libxl__stream_write_start_checkpoint(egc, dss-sws);
 }
 
-static void remus_checkpoint_dm_saved(libxl__egc *egc,
-  libxl__domain_suspend_state *dss, int rc)
+static void remus_checkpoint_stream_written(
+libxl__egc *egc, libxl__domain_suspend_state *dss, int rc)
 {
 /* Convenience aliases */
 libxl__remus_devices_state *const rds = dss-rds;
@@ -2036,6 +2031,7 @@ void libxl__domain_suspend(libxl__egc *egc, 
libxl__domain_suspend_state *dss)
 callbacks-suspend = libxl__remus_domain_suspend_callback;
 callbacks-postcopy = libxl__remus_domain_resume_callback;
 callbacks-checkpoint = libxl__remus_domain_checkpoint_callback;
+dss-sws.checkpoint_callback = remus_checkpoint_stream_written;
 } else
 callbacks-suspend = libxl__domain_suspend_callback;
 
diff --git a/tools/libxl/libxl_internal.h b/tools/libxl/libxl_internal.h
index 82cd792..bf1c377 100644
--- a/tools/libxl/libxl_internal.h
+++ b/tools/libxl/libxl_internal.h
@@ -2879,17 +2879,24 @@ struct libxl__stream_write_state {
 void (*completion_callback)(libxl__egc *egc,
 libxl__domain_suspend_state *dss,
 int rc);
+void (*checkpoint_callback)(libxl__egc *egc,
+libxl__domain_suspend_state *dss,
+int rc);
 /* Private */
 int rc;
 int joined_rc;
 size_t padding;
 bool running;
+bool in_checkpoint;
 libxl__datacopier_state dc;
 };
 
 _hidden void libxl__stream_write_start(libxl__egc *egc,
libxl__stream_write_state *stream);
 
+_hidden void libxl__stream_write_start_checkpoint(
+libxl__egc *egc, libxl__stream_write_state *stream);
+
 _hidden void libxl__stream_write_abort(libxl__egc *egc,
libxl__stream_write_state *stream,
int rc);
diff --git a/tools/libxl/libxl_stream_write.c b/tools/libxl/libxl_stream_write.c
index d28a8a5..40f2cb7 100644
--- a/tools/libxl/libxl_stream_write.c
+++ b/tools/libxl/libxl_stream_write.c
@@ -23,6 +23,9 @@
  *  - libxl__stream_write_start()
  * - Start writing a stream from the start.
  *
+ *  - libxl__stream_write_start()
+ * - Write the records which form a checkpoint into a stream.
+ *
  * In normal operation, there are two tasks running at once; this stream
  * processing, and the the libxl-save-helper.  check_stream_finished() is used
  * to join all the tasks in both success and error cases.
@@ -39,6 +42,12 @@
  *  - Toolstack record
  *  - if (hvm), Qemu record
  *  - End record
+ *
+ * For checkpointed stream, there is a second loop which is triggered by a
+ * save-helper checkpoint callback.  It writes:
+ *  - Toolstack record
+ *  - if (hvm), Qemu record
+ *  - Checkpoint end record
  */
 
 static const uint8_t zero_padding[1U  REC_ALIGN_ORDER] = { 0 };
@@ -81,6 +90,16 @@ static void end_record_done(libxl__egc *egc,
 libxl__datacopier_state *dc,
 int onwrite, int errnoval);
 
+/* Event callbacks unique to checkpointed streams. */
+static void checkpoint_done(libxl__egc *egc,
+libxl__stream_write_state *stream,
+int rc);
+static void write_checkpoint_end_record(libxl__egc *egc,
+