Re: [Qemu-devel] [PATCH v3 1/2] qapi: Introduce blockdev-group-snapshot-sync command

2012-02-28 Thread Kevin Wolf
Am 28.02.2012 01:33, schrieb Jeff Cody:
 This is a QAPI/QMP only command to take a snapshot of a group of
 devices. This is similar to the blockdev-snapshot-sync command, except
 blockdev-group-snapshot-sync accepts a list devices, filenames, and
 formats.
 
 It is attempted to keep the snapshot of the group atomic; if the
 creation or open of any of the new snapshots fails, then all of
 the new snapshots are abandoned, and the name of the snapshot image
 that failed is returned.  The failure case should not interrupt
 any operations.
 
 Rather than use bdrv_close() along with a subsequent bdrv_open() to
 perform the pivot, the original image is never closed and the new
 image is placed 'in front' of the original image via manipulation
 of the BlockDriverState fields.  Thus, once the new snapshot image
 has been successfully created, there are no more failure points
 before pivoting to the new snapshot.
 
 This allows the group of disks to remain consistent with each other,
 even across snapshot failures.
 
 Signed-off-by: Jeff Cody jc...@redhat.com
 ---
  block.c  |   80 
  block.h  |1 +
  block_int.h  |6 +++
  blockdev.c   |  133 
 ++
  qapi-schema.json |   38 +++
  5 files changed, 258 insertions(+), 0 deletions(-)
 
 diff --git a/block.c b/block.c
 index 3621d11..393e8bf 100644
 --- a/block.c
 +++ b/block.c
 @@ -880,6 +880,86 @@ void bdrv_make_anon(BlockDriverState *bs)
  bs-device_name[0] = '\0';
  }
  
 +/*
 + * Add new bs contents at the top of an image chain while the chain is
 + * live, while keeping required fields on the top layer.
 + *
 + * This will modify the BlockDriverState fields, and swap contents
 + * between bs_new and bs_top. Both bs_new and bs_top are modified.
 + *
 + * This function does not create any image files.
 + */
 +void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
 +{
 +BlockDriverState tmp;
 +
 +/* the new bs must not be in bdrv_states */
 +bdrv_make_anon(bs_new);
 +
 +tmp = *bs_new;
 +
 +/* there are some fields that need to stay on the top layer: */
 +
 +/* dev info */
 +tmp.dev_ops   = bs_top-dev_ops;
 +tmp.dev_opaque= bs_top-dev_opaque;
 +tmp.dev   = bs_top-dev;
 +tmp.buffer_alignment  = bs_top-buffer_alignment;
 +tmp.copy_on_read  = bs_top-copy_on_read;
 +
 +/* i/o timing parameters */
 +tmp.slice_time= bs_top-slice_time;
 +tmp.slice_start   = bs_top-slice_start;
 +tmp.slice_end = bs_top-slice_end;
 +tmp.io_limits = bs_top-io_limits;
 +tmp.io_base   = bs_top-io_base;
 +tmp.throttled_reqs= bs_top-throttled_reqs;
 +tmp.block_timer   = bs_top-block_timer;
 +tmp.io_limits_enabled = bs_top-io_limits_enabled;
 +
 +/* geometry */
 +tmp.cyls  = bs_top-cyls;
 +tmp.heads = bs_top-heads;
 +tmp.secs  = bs_top-secs;
 +tmp.translation   = bs_top-translation;
 +
 +/* r/w error */
 +tmp.on_read_error = bs_top-on_read_error;
 +tmp.on_write_error= bs_top-on_write_error;
 +
 +/* i/o status */
 +tmp.iostatus_enabled  = bs_top-iostatus_enabled;
 +tmp.iostatus  = bs_top-iostatus;
 +
 +/* keep the same entry in bdrv_states */
 +pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top-device_name);
 +tmp.list = bs_top-list;
 +
 +/* The contents of 'tmp' will become bs_top, as we are
 + * swapping bs_new and bs_top contents. */
 +tmp.backing_hd = bs_new;

tmp.backing_file should be set as well (copy from bs_top-filename?)

 +
 +/* swap contents of the fixed new bs and the current top */
 +*bs_new = *bs_top;
 +*bs_top = tmp;
 +
 +/* clear the copied fields in the new backing file */
 +bdrv_detach_dev(bs_new, bs_new-dev);
 +
 +qemu_co_queue_init(bs_new-throttled_reqs);
 +memset(bs_new-io_base,   0, sizeof(bs_new-io_base));
 +memset(bs_new-io_limits, 0, sizeof(bs_new-io_limits));
 +bdrv_iostatus_disable(bs_new);
 +
 +/* we don't use bdrv_io_limits_disable() for this, because we don't want
 + * to affect or delete the block_timer, as it has been moved to bs_top */
 +bs_new-io_limits_enabled = false;
 +bs_new-block_timer   = NULL;
 +bs_new-slice_time= 0;
 +bs_new-slice_start   = 0;
 +bs_new-slice_end = 0;
 +}
 +
  void bdrv_delete(BlockDriverState *bs)
  {
  assert(!bs-dev);
 diff --git a/block.h b/block.h
 index cae289b..190a780 100644
 --- a/block.h
 +++ b/block.h
 @@ -114,6 +114,7 @@ int bdrv_create(BlockDriver *drv, const char* filename,
  int bdrv_create_file(const char* filename, QEMUOptionParameter *options);
  BlockDriverState *bdrv_new(const char *device_name);
  void bdrv_make_anon(BlockDriverState *bs);
 +void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);

[Qemu-devel] [PATCH v3 1/2] qapi: Introduce blockdev-group-snapshot-sync command

2012-02-27 Thread Jeff Cody
This is a QAPI/QMP only command to take a snapshot of a group of
devices. This is similar to the blockdev-snapshot-sync command, except
blockdev-group-snapshot-sync accepts a list devices, filenames, and
formats.

It is attempted to keep the snapshot of the group atomic; if the
creation or open of any of the new snapshots fails, then all of
the new snapshots are abandoned, and the name of the snapshot image
that failed is returned.  The failure case should not interrupt
any operations.

Rather than use bdrv_close() along with a subsequent bdrv_open() to
perform the pivot, the original image is never closed and the new
image is placed 'in front' of the original image via manipulation
of the BlockDriverState fields.  Thus, once the new snapshot image
has been successfully created, there are no more failure points
before pivoting to the new snapshot.

This allows the group of disks to remain consistent with each other,
even across snapshot failures.

Signed-off-by: Jeff Cody jc...@redhat.com
---
 block.c  |   80 
 block.h  |1 +
 block_int.h  |6 +++
 blockdev.c   |  133 ++
 qapi-schema.json |   38 +++
 5 files changed, 258 insertions(+), 0 deletions(-)

diff --git a/block.c b/block.c
index 3621d11..393e8bf 100644
--- a/block.c
+++ b/block.c
@@ -880,6 +880,86 @@ void bdrv_make_anon(BlockDriverState *bs)
 bs-device_name[0] = '\0';
 }
 
+/*
+ * Add new bs contents at the top of an image chain while the chain is
+ * live, while keeping required fields on the top layer.
+ *
+ * This will modify the BlockDriverState fields, and swap contents
+ * between bs_new and bs_top. Both bs_new and bs_top are modified.
+ *
+ * This function does not create any image files.
+ */
+void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
+{
+BlockDriverState tmp;
+
+/* the new bs must not be in bdrv_states */
+bdrv_make_anon(bs_new);
+
+tmp = *bs_new;
+
+/* there are some fields that need to stay on the top layer: */
+
+/* dev info */
+tmp.dev_ops   = bs_top-dev_ops;
+tmp.dev_opaque= bs_top-dev_opaque;
+tmp.dev   = bs_top-dev;
+tmp.buffer_alignment  = bs_top-buffer_alignment;
+tmp.copy_on_read  = bs_top-copy_on_read;
+
+/* i/o timing parameters */
+tmp.slice_time= bs_top-slice_time;
+tmp.slice_start   = bs_top-slice_start;
+tmp.slice_end = bs_top-slice_end;
+tmp.io_limits = bs_top-io_limits;
+tmp.io_base   = bs_top-io_base;
+tmp.throttled_reqs= bs_top-throttled_reqs;
+tmp.block_timer   = bs_top-block_timer;
+tmp.io_limits_enabled = bs_top-io_limits_enabled;
+
+/* geometry */
+tmp.cyls  = bs_top-cyls;
+tmp.heads = bs_top-heads;
+tmp.secs  = bs_top-secs;
+tmp.translation   = bs_top-translation;
+
+/* r/w error */
+tmp.on_read_error = bs_top-on_read_error;
+tmp.on_write_error= bs_top-on_write_error;
+
+/* i/o status */
+tmp.iostatus_enabled  = bs_top-iostatus_enabled;
+tmp.iostatus  = bs_top-iostatus;
+
+/* keep the same entry in bdrv_states */
+pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top-device_name);
+tmp.list = bs_top-list;
+
+/* The contents of 'tmp' will become bs_top, as we are
+ * swapping bs_new and bs_top contents. */
+tmp.backing_hd = bs_new;
+
+/* swap contents of the fixed new bs and the current top */
+*bs_new = *bs_top;
+*bs_top = tmp;
+
+/* clear the copied fields in the new backing file */
+bdrv_detach_dev(bs_new, bs_new-dev);
+
+qemu_co_queue_init(bs_new-throttled_reqs);
+memset(bs_new-io_base,   0, sizeof(bs_new-io_base));
+memset(bs_new-io_limits, 0, sizeof(bs_new-io_limits));
+bdrv_iostatus_disable(bs_new);
+
+/* we don't use bdrv_io_limits_disable() for this, because we don't want
+ * to affect or delete the block_timer, as it has been moved to bs_top */
+bs_new-io_limits_enabled = false;
+bs_new-block_timer   = NULL;
+bs_new-slice_time= 0;
+bs_new-slice_start   = 0;
+bs_new-slice_end = 0;
+}
+
 void bdrv_delete(BlockDriverState *bs)
 {
 assert(!bs-dev);
diff --git a/block.h b/block.h
index cae289b..190a780 100644
--- a/block.h
+++ b/block.h
@@ -114,6 +114,7 @@ int bdrv_create(BlockDriver *drv, const char* filename,
 int bdrv_create_file(const char* filename, QEMUOptionParameter *options);
 BlockDriverState *bdrv_new(const char *device_name);
 void bdrv_make_anon(BlockDriverState *bs);
+void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
 void bdrv_delete(BlockDriverState *bs);
 int bdrv_parse_cache_flags(const char *mode, int *flags);
 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags);
diff --git a/block_int.h b/block_int.h
index 7be2988..5edc8c1