[Qemu-devel] [PATCHv2] block: add the optional file entry to query-block

2013-07-05 Thread Federico Simoncelli
This patch adds the optional file entry to the query-block output.
The value is a json-object representing the information about the
underlying file or device (when present).

Signed-off-by: Federico Simoncelli 
---
 block/qapi.c   |7 +++
 qapi-schema.json   |4 +++-
 qmp-commands.hx|8 
 tests/qemu-iotests/043.out |   15 +++
 4 files changed, 33 insertions(+), 1 deletions(-)

diff --git a/block/qapi.c b/block/qapi.c
index a4bc411..40db983 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -171,6 +171,13 @@ void bdrv_query_image_info(BlockDriverState *bs,
 return;
 }
 
+if (bs->file) {
+bdrv_query_image_info(bs->file, &info->file, &err);
+if (info->file) {
+info->has_file = true;
+}
+}
+
 *p_info = info;
 }
 
diff --git a/qapi-schema.json b/qapi-schema.json
index 5c32528..0cb872c 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -238,6 +238,8 @@
 #
 # @backing-image: #optional info of the backing image (since 1.6)
 #
+# @file: #optional info of the underlying file (since 1.6)
+#
 # Since: 1.3
 #
 ##
@@ -248,7 +250,7 @@
'*cluster-size': 'int', '*encrypted': 'bool',
'*backing-filename': 'str', '*full-backing-filename': 'str',
'*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
-   '*backing-image': 'ImageInfo' } }
+   '*backing-image': 'ImageInfo', '*file': 'ImageInfo' } }
 
 ##
 # @ImageCheck:
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 362f0e1..bb8a931 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1791,6 +1791,8 @@ Each json-object contain the following:
  - "backing-image": the detail of the backing image, it is an
 optional json-object only present when a
 backing image present for this image
+ - "file": an optional json-object representing the information
+   about the underlying file or device (when present)
 
 - "io-status": I/O operation status, only present if the device supports it
and the VM is configured to stop on errors. It's always reset
@@ -1842,6 +1844,12 @@ Example:
   "format":"qcow2",
   "virtual-size":2048000
   }
+  "file":{
+  "filename":"disks/test.qcow2",
+  "format": "file",
+  "virtual-size": 262144,
+  "actual-size": 139264
+  }
}
 },
 "type":"unknown"
diff --git a/tests/qemu-iotests/043.out b/tests/qemu-iotests/043.out
index ad23337..6aea37b 100644
--- a/tests/qemu-iotests/043.out
+++ b/tests/qemu-iotests/043.out
@@ -44,6 +44,11 @@ cluster_size: 65536
 "filename": "TEST_DIR/t.IMGFMT", 
 "cluster-size": 65536, 
 "format": "IMGFMT", 
+"file": {
+"virtual-size": 327680, 
+"filename": "TEST_DIR/t.IMGFMT", 
+"format": "file", 
+}, 
 "backing-filename": "TEST_DIR/t.IMGFMT.2.base", 
 "dirty-flag": false
 }, 
@@ -52,6 +57,11 @@ cluster_size: 65536
 "filename": "TEST_DIR/t.IMGFMT.2.base", 
 "cluster-size": 65536, 
 "format": "IMGFMT", 
+"file": {
+"virtual-size": 327680, 
+"filename": "TEST_DIR/t.IMGFMT.2.base", 
+"format": "file", 
+}, 
 "backing-filename": "TEST_DIR/t.IMGFMT.1.base", 
 "dirty-flag": false
 }, 
@@ -60,6 +70,11 @@ cluster_size: 65536
 "filename": "TEST_DIR/t.IMGFMT.1.base", 
 "cluster-size": 65536, 
 "format": "IMGFMT", 
+"file": {
+"virtual-size": 327680, 
+"filename": "TEST_DIR/t.IMGFMT.1.base", 
+"format": "file", 
+}, 
 "dirty-flag": false
 }
 ]
-- 
1.7.1




[Qemu-devel] [PATCH] block: add the optional file entry to query-block

2013-06-28 Thread Federico Simoncelli
This patch adds the optional file entry to the query-block output.
The value is a json-object representing the information about the
underlying file or device (when present).

Signed-off-by: Federico Simoncelli 
---
 block/qapi.c   |9 -
 qapi-schema.json   |4 +++-
 qmp-commands.hx|8 
 tests/qemu-iotests/043.out |   15 +++
 4 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/block/qapi.c b/block/qapi.c
index a4bc411..03cd222 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -119,7 +119,7 @@ void bdrv_query_image_info(BlockDriverState *bs,
 
 info->filename= g_strdup(bs->filename);
 info->format  = g_strdup(bdrv_get_format_name(bs));
-info->virtual_size= total_sectors * 512;
+info->virtual_size= bdrv_getlength(bs);
 info->actual_size = bdrv_get_allocated_file_size(bs);
 info->has_actual_size = info->actual_size >= 0;
 if (bdrv_is_encrypted(bs)) {
@@ -171,6 +171,13 @@ void bdrv_query_image_info(BlockDriverState *bs,
 return;
 }
 
+if (bs->file) {
+bdrv_query_image_info(bs->file, &info->file, &err);
+if (info->file) {
+info->has_file = true;
+}
+}
+
 *p_info = info;
 }
 
diff --git a/qapi-schema.json b/qapi-schema.json
index a30a728..524a4a0 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -238,6 +238,8 @@
 #
 # @backing-image: #optional info of the backing image (since 1.6)
 #
+# @file: #optional info of the underlying file (since 1.6)
+#
 # Since: 1.3
 #
 ##
@@ -248,7 +250,7 @@
'*cluster-size': 'int', '*encrypted': 'bool',
'*backing-filename': 'str', '*full-backing-filename': 'str',
'*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
-   '*backing-image': 'ImageInfo' } }
+   '*backing-image': 'ImageInfo', '*file': 'ImageInfo' } }
 
 ##
 # @ImageCheck:
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 8cea5e5..c25ac77 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1745,6 +1745,8 @@ Each json-object contain the following:
  - "backing-image": the detail of the backing image, it is an
 optional json-object only present when a
 backing image present for this image
+ - "file": an optional json-object representing the information
+   about the underlying file or device (when present)
 
 - "io-status": I/O operation status, only present if the device supports it
and the VM is configured to stop on errors. It's always reset
@@ -1796,6 +1798,12 @@ Example:
   "format":"qcow2",
   "virtual-size":2048000
   }
+  "file":{
+  "filename":"disks/test.qcow2",
+  "format": "file",
+  "virtual-size": 262144,
+  "actual-size": 139264
+  }
}
 },
 "type":"unknown"
diff --git a/tests/qemu-iotests/043.out b/tests/qemu-iotests/043.out
index ad23337..6aea37b 100644
--- a/tests/qemu-iotests/043.out
+++ b/tests/qemu-iotests/043.out
@@ -44,6 +44,11 @@ cluster_size: 65536
 "filename": "TEST_DIR/t.IMGFMT", 
 "cluster-size": 65536, 
 "format": "IMGFMT", 
+"file": {
+"virtual-size": 327680, 
+"filename": "TEST_DIR/t.IMGFMT", 
+"format": "file", 
+}, 
 "backing-filename": "TEST_DIR/t.IMGFMT.2.base", 
 "dirty-flag": false
 }, 
@@ -52,6 +57,11 @@ cluster_size: 65536
 "filename": "TEST_DIR/t.IMGFMT.2.base", 
 "cluster-size": 65536, 
 "format": "IMGFMT", 
+"file": {
+"virtual-size": 327680, 
+"filename": "TEST_DIR/t.IMGFMT.2.base", 
+"format": "file", 
+}, 
 "backing-filename": "TEST_DIR/t.IMGFMT.1.base", 
 "dirty-flag": false
 }, 
@@ -60,6 +70,11 @@ cluster_size: 65536
 "filename": "TEST_DIR/t.IMGFMT.1.base", 
 "cluster-size": 65536, 
 "format": "IMGFMT", 
+"file": {
+"virtual-size": 327680, 
+"filename": "TEST_DIR/t.IMGFMT.1.base", 
+"format": "file", 
+}, 
 "dirty-flag": false
 }
 ]
-- 
1.7.1




[Qemu-devel] [PATCH] block: add apparent-size to query-block

2013-06-21 Thread Federico Simoncelli
This patch adds the apparent-size entry to the query-block output.
The value represents the apparent size in bytes of the image, e.g.
file size (including the blocks not yet allocated) or block device
size.

Signed-off-by: Federico Simoncelli 
---
 block/qapi.c |3 ++-
 qapi-schema.json |4 +++-
 qmp-commands.hx  |1 +
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/block/qapi.c b/block/qapi.c
index a4bc411..ccaba67 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -119,8 +119,9 @@ void bdrv_query_image_info(BlockDriverState *bs,
 
 info->filename= g_strdup(bs->filename);
 info->format  = g_strdup(bdrv_get_format_name(bs));
-info->virtual_size= total_sectors * 512;
+info->virtual_size= bdrv_getlength(bs);
 info->actual_size = bdrv_get_allocated_file_size(bs);
+info->apparent_size   = bdrv_getlength(bs->file);
 info->has_actual_size = info->actual_size >= 0;
 if (bdrv_is_encrypted(bs)) {
 info->encrypted = true;
diff --git a/qapi-schema.json b/qapi-schema.json
index a80ee40..c7403ed 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -222,6 +222,8 @@
 #
 # @actual-size: #optional actual size on disk in bytes of the image
 #
+# @apparent-size: apparent size in bytes of the image
+#
 # @dirty-flag: #optional true if image is not cleanly closed
 #
 # @cluster-size: #optional size of a cluster in bytes
@@ -244,7 +246,7 @@
 
 { 'type': 'ImageInfo',
   'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool',
-   '*actual-size': 'int', 'virtual-size': 'int',
+   '*actual-size': 'int', 'virtual-size': 'int', 'apparent-size': 
'int',
'*cluster-size': 'int', '*encrypted': 'bool',
'*backing-filename': 'str', '*full-backing-filename': 'str',
'*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'],
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 8cea5e5..3f3aded 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1714,6 +1714,7 @@ Each json-object contain the following:
  - "actual-size": actual size on disk in bytes of the image, not
   present when image does not support thin
   provision (json-int, optional)
+ - "apparent-size": apparent size in bytes of the image
  - "cluster-size": size of a cluster in bytes, not present if image
format does not support it (json-int, optional)
  - "encrypted": true if the image is encrypted, not present means
-- 
1.7.1




[Qemu-devel] [PATCHv4 1/2] qemu-img: find the image end offset during check

2013-01-28 Thread Federico Simoncelli
This patch adds the support for reporting the image end offset (in
bytes). This is particularly useful after a conversion (or a rebase)
where the destination is a block device in order to find the first
unused byte at the end of the image.

Signed-off-by: Federico Simoncelli 
---
 block/qcow2-refcount.c   |   10 --
 include/block/block.h|1 +
 qemu-img.c   |4 
 tests/qemu-iotests/026   |6 +++---
 tests/qemu-iotests/036   |3 ++-
 tests/qemu-iotests/039   |2 +-
 tests/qemu-iotests/044.out   |1 +
 tests/qemu-iotests/common.rc |5 +++--
 8 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 6a95aa6..45ad043 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1116,7 +1116,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
   BdrvCheckMode fix)
 {
 BDRVQcowState *s = bs->opaque;
-int64_t size, i;
+int64_t size, i, highest_cluster;
 int nb_clusters, refcount1, refcount2;
 QCowSnapshot *sn;
 uint16_t *refcount_table;
@@ -1187,7 +1187,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 
 /* compare ref counts */
-for(i = 0; i < nb_clusters; i++) {
+for(i = 0, highest_cluster = 0; i < nb_clusters; i++) {
 refcount1 = get_refcount(bs, i);
 if (refcount1 < 0) {
 fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
@@ -1197,6 +1197,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 
 refcount2 = refcount_table[i];
+
+if (refcount1 > 0 || refcount2 > 0) {
+highest_cluster = i;
+}
+
 if (refcount1 != refcount2) {
 
 /* Check if we're allowed to fix the mismatch */
@@ -1231,6 +1236,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 }
 
+res->image_end_offset = (highest_cluster + 1) * s->cluster_size;
 ret = 0;
 
 fail:
diff --git a/include/block/block.h b/include/block/block.h
index ffd1936..85eb3d9 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -213,6 +213,7 @@ typedef struct BdrvCheckResult {
 int check_errors;
 int corruptions_fixed;
 int leaks_fixed;
+int64_t image_end_offset;
 BlockFragInfo bfi;
 } BdrvCheckResult;
 
diff --git a/qemu-img.c b/qemu-img.c
index 85d3740..e80c1c5 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -475,6 +475,10 @@ static int img_check(int argc, char **argv)
 result.bfi.fragmented_clusters * 100.0 / 
result.bfi.allocated_clusters);
 }
 
+if (result.image_end_offset > 0) {
+printf("Image end offset: %" PRId64 "\n", result.image_end_offset);
+}
+
 bdrv_delete(bs);
 
 if (ret < 0 || result.check_errors) {
diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026
index 1602ccd..107a3ff 100755
--- a/tests/qemu-iotests/026
+++ b/tests/qemu-iotests/026
@@ -102,7 +102,7 @@ if [ "$event" == "l2_load" ]; then
 $QEMU_IO -c "read $vmstate 0 128k " $BLKDBG_TEST_IMG | _filter_qemu_io
 fi
 
-$QEMU_IMG check $TEST_IMG 2>&1 | grep -v "refcount=1 reference=0"
+_check_test_img 2>&1 | grep -v "refcount=1 reference=0"
 
 done
 done
@@ -147,7 +147,7 @@ echo
 echo "Event: $event; errno: $errno; imm: $imm; once: $once; write $vmstate"
 $QEMU_IO -c "write $vmstate 0 64M" $BLKDBG_TEST_IMG | _filter_qemu_io
 
-$QEMU_IMG check $TEST_IMG 2>&1 | grep -v "refcount=1 reference=0"
+_check_test_img 2>&1 | grep -v "refcount=1 reference=0"
 
 done
 done
@@ -186,7 +186,7 @@ echo
 echo "Event: $event; errno: $errno; imm: $imm; once: $once"
 $QEMU_IO -c "write -b 0 64k" $BLKDBG_TEST_IMG | _filter_qemu_io
 
-$QEMU_IMG check $TEST_IMG 2>&1 | grep -v "refcount=1 reference=0"
+_check_test_img 2>&1 | grep -v "refcount=1 reference=0"
 
 done
 done
diff --git a/tests/qemu-iotests/036 b/tests/qemu-iotests/036
index 329533e..4dbfc57 100755
--- a/tests/qemu-iotests/036
+++ b/tests/qemu-iotests/036
@@ -59,7 +59,8 @@ _make_test_img 64M
 echo
 echo === Repair image ===
 echo
-$QEMU_IMG check -r all $TEST_IMG
+_check_test_img -r all
+
 ./qcow2.py $TEST_IMG dump-header
 
 # success, all done
diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039
index c5ae806..ae35175 100755
--- a/tests/qemu-iotests/039
+++ b/tests/qemu-iotests/039
@@ -86,7 +86,7 @@ $QEMU_IO -r -c "read -P 0x5a 0 512" $TEST_IMG | 
_filter_qemu_io
 echo
 echo "== Repairing the image file must succeed =="
 
-$QEMU_IMG check -r all $TEST_IMG
+_check_test_img -r all
 
 # The dirty bit must not be set
 ./qcow2.py $TEST_IMG dump-header | g

[Qemu-devel] [PATCHv4 2/2] qemu-img: add json output option to the check command

2013-01-28 Thread Federico Simoncelli
This option --output=[human|json] makes qemu-img check output a human
or JSON representation at the choice of the user.

Signed-off-by: Federico Simoncelli 
---
 qapi-schema.json |   46 +++
 qemu-img-cmds.hx |4 +-
 qemu-img.c   |  232 +++---
 qemu-img.texi|5 +-
 4 files changed, 220 insertions(+), 67 deletions(-)

diff --git a/qapi-schema.json b/qapi-schema.json
index 6d7252b..dc99c28 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -245,6 +245,52 @@
'*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'] } 
}
 
 ##
+# @ImageCheck:
+#
+# Information about a QEMU image file check
+#
+# @filename: name of the image file checked
+#
+# @format: format of the image file checked
+#
+# @check-errors: number of unexpected errors occurred during check
+#
+# @image-end-offset: #optional offset (in bytes) where the image ends, this
+#field is present if the driver for the image format
+#supports it
+#
+# @corruptions: #optional number of corruptions found during the check if any
+#
+# @leaks: #optional number of leaks found during the check if any
+#
+# @corruptions-fixed: #optional number of corruptions fixed during the check
+# if any
+#
+# @leaks-fixed: #optional number of leaks fixed during the check if any
+#
+# @total-clusters: #optional total number of clusters, this field is present
+#  if the driver for the image format supports it
+#
+# @allocated-clusters: #optional total number of allocated clusters, this
+#  field is present if the driver for the image format
+#  supports it
+#
+# @fragmented-clusters: #optional total number of fragmented clusters, this
+#   field is present if the driver for the image format
+#   supports it
+#
+# Since: 1.4
+#
+##
+
+{ 'type': 'ImageCheck',
+  'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int',
+   '*image-end-offset': 'int', '*corruptions': 'int', '*leaks': 'int',
+   '*corruptions-fixed': 'int', '*leaks-fixed': 'int',
+   '*total-clusters': 'int', '*allocated-clusters': 'int',
+   '*fragmented-clusters': 'int' } }
+
+##
 # @StatusInfo:
 #
 # Information about VCPU run state
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index a181363..259fc14 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -10,9 +10,9 @@ STEXI
 ETEXI
 
 DEF("check", img_check,
-"check [-f fmt] [-r [leaks | all]] filename")
+"check [-f fmt] [--output=ofmt] [-r [leaks | all]] filename")
 STEXI
-@item check [-f @var{fmt}] [-r [leaks | all]] @var{filename}
+@item check [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] 
@var{filename}
 ETEXI
 
 DEF("create", img_create,
diff --git a/qemu-img.c b/qemu-img.c
index e80c1c5..34249fe 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -42,6 +42,16 @@ typedef struct img_cmd_t {
 int (*handler)(int argc, char **argv);
 } img_cmd_t;
 
+enum {
+OPTION_OUTPUT = 256,
+OPTION_BACKING_CHAIN = 257,
+};
+
+typedef enum OutputFormat {
+OFORMAT_JSON,
+OFORMAT_HUMAN,
+} OutputFormat;
+
 /* Default to cache=writeback as data integrity is not important for qemu-tcg. 
*/
 #define BDRV_O_FLAGS BDRV_O_CACHE_WB
 #define BDRV_DEFAULT_CACHE "writeback"
@@ -375,6 +385,96 @@ static int img_create(int argc, char **argv)
 return 0;
 }
 
+static void dump_json_image_check(ImageCheck *check)
+{
+Error *errp = NULL;
+QString *str;
+QmpOutputVisitor *ov = qmp_output_visitor_new();
+QObject *obj;
+visit_type_ImageCheck(qmp_output_get_visitor(ov),
+  &check, NULL, &errp);
+obj = qmp_output_get_qobject(ov);
+str = qobject_to_json_pretty(obj);
+assert(str != NULL);
+printf("%s\n", qstring_get_str(str));
+qobject_decref(obj);
+qmp_output_visitor_cleanup(ov);
+QDECREF(str);
+}
+
+static void dump_human_image_check(ImageCheck *check)
+{
+if (!(check->corruptions || check->leaks || check->check_errors)) {
+printf("No errors were found on the image.\n");
+} else {
+if (check->corruptions) {
+printf("\n%" PRId64 " errors were found on the image.\n"
+"Data may be corrupted, or further writes to the image "
+"may corrupt it.\n",
+check->corruptions);
+}
+
+if (check->leaks) {
+printf("\n%" PRId64 " leaked clusters were found on the image.\n&qu

[Qemu-devel] [PATCHv3 2/2] qemu-img: add json output option to the check command

2013-01-21 Thread Federico Simoncelli
This option --output=[human|json] make qemu-img check output an human
or JSON representation at the choice of the user.

Signed-off-by: Federico Simoncelli 
---
 qapi-schema.json |   46 +++
 qemu-img-cmds.hx |4 +-
 qemu-img.c   |  232 +++--
 qemu-img.texi|5 +-
 4 files changed, 221 insertions(+), 66 deletions(-)

diff --git a/qapi-schema.json b/qapi-schema.json
index 6d7252b..d576728 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -245,6 +245,52 @@
'*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'] } 
}
 
 ##
+# @ImageCheck:
+#
+# Information about a QEMU image file check
+#
+# @filename: name of the image file checked
+#
+# @format: format of the image file checked
+#
+# @check-errors: number of unexpected errors occurred during check
+#
+# @highest-offset: #optional highest offset (in bytes) in use by the image,
+#  this field is present if the driver for the image format
+#  supports it
+#
+# @corruptions: #optional number of corruptions found during the check if any
+#
+# @leaks: #optional number of leaks found during the check if any
+#
+# @corruptions-fixed: #optional number of corruptions fixed during the check
+# if any
+#
+# @leaks-fixed: #optional number of leaks fixed during the check if any
+#
+# @total-clusters: #optional total number of clusters, this field is present
+#  if the driver for the image format supports it
+#
+# @allocated-clusters: #optional total number of allocated clusters, this
+#  field is present if the driver for the image format
+#  supports it
+#
+# @fragmented-clusters: #optional total number of fragmented clusters, this
+#   field is present if the driver for the image format
+#   supports it
+#
+# Since: 1.4
+#
+##
+
+{ 'type': 'ImageCheck',
+  'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int',
+   '*highest-offset': 'int', '*corruptions': 'int', '*leaks': 'int',
+   '*corruptions-fixed': 'int', '*leaks-fixed': 'int',
+   '*total-clusters': 'int', '*allocated-clusters': 'int',
+   '*fragmented-clusters': 'int' } }
+
+##
 # @StatusInfo:
 #
 # Information about VCPU run state
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index a181363..259fc14 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -10,9 +10,9 @@ STEXI
 ETEXI
 
 DEF("check", img_check,
-"check [-f fmt] [-r [leaks | all]] filename")
+"check [-f fmt] [--output=ofmt] [-r [leaks | all]] filename")
 STEXI
-@item check [-f @var{fmt}] [-r [leaks | all]] @var{filename}
+@item check [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] 
@var{filename}
 ETEXI
 
 DEF("create", img_create,
diff --git a/qemu-img.c b/qemu-img.c
index 7b977cd..bcb21d5 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -42,6 +42,16 @@ typedef struct img_cmd_t {
 int (*handler)(int argc, char **argv);
 } img_cmd_t;
 
+enum {
+OPTION_OUTPUT = 256,
+OPTION_BACKING_CHAIN = 257,
+};
+
+typedef enum OutputFormat {
+OFORMAT_JSON,
+OFORMAT_HUMAN,
+} OutputFormat;
+
 /* Default to cache=writeback as data integrity is not important for qemu-tcg. 
*/
 #define BDRV_O_FLAGS BDRV_O_CACHE_WB
 #define BDRV_DEFAULT_CACHE "writeback"
@@ -375,6 +385,96 @@ static int img_create(int argc, char **argv)
 return 0;
 }
 
+static void dump_json_image_check(ImageCheck *check)
+{
+Error *errp = NULL;
+QString *str;
+QmpOutputVisitor *ov = qmp_output_visitor_new();
+QObject *obj;
+visit_type_ImageCheck(qmp_output_get_visitor(ov),
+  &check, NULL, &errp);
+obj = qmp_output_get_qobject(ov);
+str = qobject_to_json_pretty(obj);
+assert(str != NULL);
+printf("%s\n", qstring_get_str(str));
+qobject_decref(obj);
+qmp_output_visitor_cleanup(ov);
+QDECREF(str);
+}
+
+static void dump_human_image_check(ImageCheck *check)
+{
+if (!(check->corruptions || check->leaks || check->check_errors)) {
+printf("No errors were found on the image.\n");
+} else {
+if (check->corruptions) {
+printf("\n%" PRId64 " errors were found on the image.\n"
+"Data may be corrupted, or further writes to the image "
+"may corrupt it.\n",
+check->corruptions);
+}
+
+if (check->leaks) {
+printf("\n%" PRId64 " leaked clusters were found on the image.\n&qu

[Qemu-devel] [PATCHv3 1/2] qemu-img: find the highest offset in use during check

2013-01-21 Thread Federico Simoncelli
This patch adds the support for reporting the highest offset in use by
an image. This is particularly useful after a conversion (or a rebase)
where the destination is a block device in order to find the actual
amount of space in use.

Signed-off-by: Federico Simoncelli 
---
 block/qcow2-refcount.c   |   10 --
 include/block/block.h|1 +
 qemu-img.c   |4 
 tests/qemu-iotests/026   |6 +++---
 tests/qemu-iotests/036   |3 ++-
 tests/qemu-iotests/039   |2 +-
 tests/qemu-iotests/044.out   |1 +
 tests/qemu-iotests/common.rc |5 +++--
 8 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 6a95aa6..3444f85 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1116,7 +1116,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
   BdrvCheckMode fix)
 {
 BDRVQcowState *s = bs->opaque;
-int64_t size, i;
+int64_t size, i, highest_cluster;
 int nb_clusters, refcount1, refcount2;
 QCowSnapshot *sn;
 uint16_t *refcount_table;
@@ -1154,7 +1154,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 s->refcount_table_offset,
 s->refcount_table_size * sizeof(uint64_t));
 
-for(i = 0; i < s->refcount_table_size; i++) {
+for(i = 0, highest_cluster = 0; i < s->refcount_table_size; i++) {
 uint64_t offset, cluster;
 offset = s->refcount_table[i];
 cluster = offset >> s->cluster_bits;
@@ -1197,6 +1197,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 
 refcount2 = refcount_table[i];
+
+if (refcount1 > 0 || refcount2 > 0) {
+highest_cluster = i;
+}
+
 if (refcount1 != refcount2) {
 
 /* Check if we're allowed to fix the mismatch */
@@ -1231,6 +1236,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 }
 
+res->highest_offset = (highest_cluster + 1) * s->cluster_size;
 ret = 0;
 
 fail:
diff --git a/include/block/block.h b/include/block/block.h
index ffd1936..b5cf6ac 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -213,6 +213,7 @@ typedef struct BdrvCheckResult {
 int check_errors;
 int corruptions_fixed;
 int leaks_fixed;
+int64_t highest_offset;
 BlockFragInfo bfi;
 } BdrvCheckResult;
 
diff --git a/qemu-img.c b/qemu-img.c
index 85d3740..7b977cd 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -475,6 +475,10 @@ static int img_check(int argc, char **argv)
 result.bfi.fragmented_clusters * 100.0 / 
result.bfi.allocated_clusters);
 }
 
+if (result.highest_offset > 0) {
+printf("Highest offset in use: %" PRId64 "\n", result.highest_offset);
+}
+
 bdrv_delete(bs);
 
 if (ret < 0 || result.check_errors) {
diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026
index 1602ccd..107a3ff 100755
--- a/tests/qemu-iotests/026
+++ b/tests/qemu-iotests/026
@@ -102,7 +102,7 @@ if [ "$event" == "l2_load" ]; then
 $QEMU_IO -c "read $vmstate 0 128k " $BLKDBG_TEST_IMG | _filter_qemu_io
 fi
 
-$QEMU_IMG check $TEST_IMG 2>&1 | grep -v "refcount=1 reference=0"
+_check_test_img 2>&1 | grep -v "refcount=1 reference=0"
 
 done
 done
@@ -147,7 +147,7 @@ echo
 echo "Event: $event; errno: $errno; imm: $imm; once: $once; write $vmstate"
 $QEMU_IO -c "write $vmstate 0 64M" $BLKDBG_TEST_IMG | _filter_qemu_io
 
-$QEMU_IMG check $TEST_IMG 2>&1 | grep -v "refcount=1 reference=0"
+_check_test_img 2>&1 | grep -v "refcount=1 reference=0"
 
 done
 done
@@ -186,7 +186,7 @@ echo
 echo "Event: $event; errno: $errno; imm: $imm; once: $once"
 $QEMU_IO -c "write -b 0 64k" $BLKDBG_TEST_IMG | _filter_qemu_io
 
-$QEMU_IMG check $TEST_IMG 2>&1 | grep -v "refcount=1 reference=0"
+_check_test_img 2>&1 | grep -v "refcount=1 reference=0"
 
 done
 done
diff --git a/tests/qemu-iotests/036 b/tests/qemu-iotests/036
index 329533e..4dbfc57 100755
--- a/tests/qemu-iotests/036
+++ b/tests/qemu-iotests/036
@@ -59,7 +59,8 @@ _make_test_img 64M
 echo
 echo === Repair image ===
 echo
-$QEMU_IMG check -r all $TEST_IMG
+_check_test_img -r all
+
 ./qcow2.py $TEST_IMG dump-header
 
 # success, all done
diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039
index c5ae806..ae35175 100755
--- a/tests/qemu-iotests/039
+++ b/tests/qemu-iotests/039
@@ -86,7 +86,7 @@ $QEMU_IO -r -c "read -P 0x5a 0 512" $TEST_IMG | 
_filter_qemu_io
 echo
 echo "== Repairing the image file must succeed =="
 
-$QEMU_IMG check -r all $TEST_IMG
+_check_test_img -r all
 
 # The dirty bit must not be set
 ./qcow2.py $TEST_IMG dump-header | grep incom

[Qemu-devel] [PATCH 2/2] qemu-img: add json output option to the check command

2012-12-10 Thread Federico Simoncelli
This option --output=[human|json] make qemu-img check output an human
or JSON representation at the choice of the user.

Signed-off-by: Federico Simoncelli 
---
 qapi-schema.json |   38 +
 qemu-img-cmds.hx |4 +-
 qemu-img.c   |  246 +++---
 qemu-img.texi|5 +-
 4 files changed, 221 insertions(+), 72 deletions(-)

diff --git a/qapi-schema.json b/qapi-schema.json
index 5dfa052..8877285 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -245,6 +245,44 @@
'*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'] } 
}
 
 ##
+# @ImageCheck:
+#
+# Information about a QEMU image file check
+#
+# @filename: name of the image file checked
+#
+# @format: format of the image file checked
+#
+# @check-errors: number of unexpected errors occurred during check
+#
+# @highest-offset: #optional highest offset (in bytes) in use by the image
+#
+# @corruptions: #optional number of corruptions found during the check
+#
+# @leaks: #optional number of leaks found during the check
+#
+# @corruptions-fixed: #optional number of corruptions fixed during the check
+#
+# @leaks-fixed: #optional number of leaks fixed during the check
+#
+# @total-clusters: #optional total number of clusters
+#
+# @allocated-clusters: #optional total number of allocated clusters
+#
+# @fragmented-clusters: #optional total number of fragmented clusters
+#
+# Since: 1.4
+#
+##
+
+{ 'type': 'ImageCheck',
+  'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int',
+   '*highest-offset': 'int', '*corruptions': 'int', '*leaks': 'int',
+   '*corruptions-fixed': 'int', '*leaks-fixed': 'int',
+   '*total-clusters': 'int', '*allocated-clusters': 'int',
+   '*fragmented-clusters': 'int' } }
+
+##
 # @StatusInfo:
 #
 # Information about VCPU run state
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index a181363..259fc14 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -10,9 +10,9 @@ STEXI
 ETEXI
 
 DEF("check", img_check,
-"check [-f fmt] [-r [leaks | all]] filename")
+"check [-f fmt] [--output=ofmt] [-r [leaks | all]] filename")
 STEXI
-@item check [-f @var{fmt}] [-r [leaks | all]] @var{filename}
+@item check [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] 
@var{filename}
 ETEXI
 
 DEF("create", img_create,
diff --git a/qemu-img.c b/qemu-img.c
index 45c1ec1..18ba5c2 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -42,6 +42,16 @@ typedef struct img_cmd_t {
 int (*handler)(int argc, char **argv);
 } img_cmd_t;
 
+enum {
+OPTION_OUTPUT = 256,
+OPTION_BACKING_CHAIN = 257,
+};
+
+typedef enum OutputFormat {
+OFORMAT_JSON,
+OFORMAT_HUMAN,
+} OutputFormat;
+
 /* Default to cache=writeback as data integrity is not important for qemu-tcg. 
*/
 #define BDRV_O_FLAGS BDRV_O_CACHE_WB
 #define BDRV_DEFAULT_CACHE "writeback"
@@ -370,6 +380,122 @@ out:
 return 0;
 }
 
+static void dump_json_image_check(ImageCheck *check)
+{
+Error *errp = NULL;
+QString *str;
+QmpOutputVisitor *ov = qmp_output_visitor_new();
+QObject *obj;
+visit_type_ImageCheck(qmp_output_get_visitor(ov),
+  &check, NULL, &errp);
+obj = qmp_output_get_qobject(ov);
+str = qobject_to_json_pretty(obj);
+assert(str != NULL);
+printf("%s\n", qstring_get_str(str));
+qobject_decref(obj);
+qmp_output_visitor_cleanup(ov);
+QDECREF(str);
+}
+
+static void dump_human_image_check(ImageCheck *check)
+{
+if (check->corruptions_fixed || check->leaks_fixed) {
+ printf("The following inconsistencies were found and repaired:\n\n"
+"%" PRId64 " leaked clusters\n"
+"%" PRId64 " corruptions\n\n"
+"Double checking the fixed image now...\n",
+check->leaks_fixed,
+check->corruptions_fixed);
+}
+
+if (!(check->corruptions || check->leaks || check->check_errors)) {
+printf("No errors were found on the image.\n");
+} else {
+if (check->corruptions) {
+printf("\n%" PRId64 " errors were found on the image.\n"
+"Data may be corrupted, or further writes to the image "
+"may corrupt it.\n",
+check->corruptions);
+}
+
+if (check->leaks) {
+printf("\n%" PRId64 " leaked clusters were found on the image.\n"
+"This means waste of disk space, but no harm t

[Qemu-devel] [PATCHv2 1/2] qemu-img: find the highest offset in use during check

2012-12-10 Thread Federico Simoncelli
This patch adds the support for reporting the highest offset in use by
an image. This is particularly useful after a conversion (or a rebase)
where the destination is a block device in order to find the actual
amount of space in use.

Signed-off-by: Federico Simoncelli 
---
 block.h  |1 +
 block/qcow2-refcount.c   |   10 --
 qemu-img.c   |4 
 tests/qemu-iotests/026   |6 +++---
 tests/qemu-iotests/036   |3 ++-
 tests/qemu-iotests/039   |2 +-
 tests/qemu-iotests/common.rc |5 +++--
 7 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/block.h b/block.h
index 722c620..de42e8c 100644
--- a/block.h
+++ b/block.h
@@ -213,6 +213,7 @@ typedef struct BdrvCheckResult {
 int check_errors;
 int corruptions_fixed;
 int leaks_fixed;
+int64_t highest_offset;
 BlockFragInfo bfi;
 } BdrvCheckResult;
 
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 96224d1..017439d 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1116,7 +1116,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
   BdrvCheckMode fix)
 {
 BDRVQcowState *s = bs->opaque;
-int64_t size, i;
+int64_t size, i, highest_cluster;
 int nb_clusters, refcount1, refcount2;
 QCowSnapshot *sn;
 uint16_t *refcount_table;
@@ -1154,7 +1154,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 s->refcount_table_offset,
 s->refcount_table_size * sizeof(uint64_t));
 
-for(i = 0; i < s->refcount_table_size; i++) {
+for(i = 0, highest_cluster = 0; i < s->refcount_table_size; i++) {
 uint64_t offset, cluster;
 offset = s->refcount_table[i];
 cluster = offset >> s->cluster_bits;
@@ -1197,6 +1197,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 
 refcount2 = refcount_table[i];
+
+if (refcount1 > 0 || refcount2 > 0) {
+highest_cluster = i;
+}
+
 if (refcount1 != refcount2) {
 
 /* Check if we're allowed to fix the mismatch */
@@ -1231,6 +1236,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 }
 
+res->highest_offset = (highest_cluster + 1) * s->cluster_size;
 ret = 0;
 
 fail:
diff --git a/qemu-img.c b/qemu-img.c
index e29e01b..45c1ec1 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -470,6 +470,10 @@ static int img_check(int argc, char **argv)
 result.bfi.fragmented_clusters * 100.0 / 
result.bfi.allocated_clusters);
 }
 
+if (result.highest_offset > 0) {
+printf("Highest offset in use: %" PRId64 "\n", result.highest_offset);
+}
+
 bdrv_delete(bs);
 
 if (ret < 0 || result.check_errors) {
diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026
index 1602ccd..107a3ff 100755
--- a/tests/qemu-iotests/026
+++ b/tests/qemu-iotests/026
@@ -102,7 +102,7 @@ if [ "$event" == "l2_load" ]; then
 $QEMU_IO -c "read $vmstate 0 128k " $BLKDBG_TEST_IMG | _filter_qemu_io
 fi
 
-$QEMU_IMG check $TEST_IMG 2>&1 | grep -v "refcount=1 reference=0"
+_check_test_img 2>&1 | grep -v "refcount=1 reference=0"
 
 done
 done
@@ -147,7 +147,7 @@ echo
 echo "Event: $event; errno: $errno; imm: $imm; once: $once; write $vmstate"
 $QEMU_IO -c "write $vmstate 0 64M" $BLKDBG_TEST_IMG | _filter_qemu_io
 
-$QEMU_IMG check $TEST_IMG 2>&1 | grep -v "refcount=1 reference=0"
+_check_test_img 2>&1 | grep -v "refcount=1 reference=0"
 
 done
 done
@@ -186,7 +186,7 @@ echo
 echo "Event: $event; errno: $errno; imm: $imm; once: $once"
 $QEMU_IO -c "write -b 0 64k" $BLKDBG_TEST_IMG | _filter_qemu_io
 
-$QEMU_IMG check $TEST_IMG 2>&1 | grep -v "refcount=1 reference=0"
+_check_test_img 2>&1 | grep -v "refcount=1 reference=0"
 
 done
 done
diff --git a/tests/qemu-iotests/036 b/tests/qemu-iotests/036
index 329533e..4dbfc57 100755
--- a/tests/qemu-iotests/036
+++ b/tests/qemu-iotests/036
@@ -59,7 +59,8 @@ _make_test_img 64M
 echo
 echo === Repair image ===
 echo
-$QEMU_IMG check -r all $TEST_IMG
+_check_test_img -r all
+
 ./qcow2.py $TEST_IMG dump-header
 
 # success, all done
diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039
index c5ae806..ae35175 100755
--- a/tests/qemu-iotests/039
+++ b/tests/qemu-iotests/039
@@ -86,7 +86,7 @@ $QEMU_IO -r -c "read -P 0x5a 0 512" $TEST_IMG | 
_filter_qemu_io
 echo
 echo "== Repairing the image file must succeed =="
 
-$QEMU_IMG check -r all $TEST_IMG
+_check_test_img -r all
 
 # The dirty bit must not be set
 ./qcow2.py $TEST_IMG dump-header | grep incompatible_features
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc

[Qemu-devel] [PATCH] qemu-img: find the highest offset in use during check

2012-12-10 Thread Federico Simoncelli
This patch adds the support for reporting the highest offset in use by
an image. This is particularly useful after a conversion (or a rebase)
where the destination is a block device in order to find the actual
amount of space in use.

Signed-off-by: Federico Simoncelli 
---
 block.h|1 +
 block/qcow2-refcount.c |   10 --
 qemu-img.c |4 
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/block.h b/block.h
index 722c620..de42e8c 100644
--- a/block.h
+++ b/block.h
@@ -213,6 +213,7 @@ typedef struct BdrvCheckResult {
 int check_errors;
 int corruptions_fixed;
 int leaks_fixed;
+int64_t highest_offset;
 BlockFragInfo bfi;
 } BdrvCheckResult;
 
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 96224d1..017439d 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1116,7 +1116,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
   BdrvCheckMode fix)
 {
 BDRVQcowState *s = bs->opaque;
-int64_t size, i;
+int64_t size, i, highest_cluster;
 int nb_clusters, refcount1, refcount2;
 QCowSnapshot *sn;
 uint16_t *refcount_table;
@@ -1154,7 +1154,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 s->refcount_table_offset,
 s->refcount_table_size * sizeof(uint64_t));
 
-for(i = 0; i < s->refcount_table_size; i++) {
+for(i = 0, highest_cluster = 0; i < s->refcount_table_size; i++) {
 uint64_t offset, cluster;
 offset = s->refcount_table[i];
 cluster = offset >> s->cluster_bits;
@@ -1197,6 +1197,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 
 refcount2 = refcount_table[i];
+
+if (refcount1 > 0 || refcount2 > 0) {
+highest_cluster = i;
+}
+
 if (refcount1 != refcount2) {
 
 /* Check if we're allowed to fix the mismatch */
@@ -1231,6 +1236,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 }
 }
 
+res->highest_offset = (highest_cluster + 1) * s->cluster_size;
 ret = 0;
 
 fail:
diff --git a/qemu-img.c b/qemu-img.c
index e29e01b..3a8090b 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -470,6 +470,10 @@ static int img_check(int argc, char **argv)
 result.bfi.fragmented_clusters * 100.0 / 
result.bfi.allocated_clusters);
 }
 
+if (result.highest_offset > 0) {
+printf("Highest offset in use: %lu\n", result.highest_offset);
+}
+
 bdrv_delete(bs);
 
 if (ret < 0 || result.check_errors) {
-- 
1.7.1




Re: [Qemu-devel] [PATCH 4/7] block: close unused image files at the end of streaming

2012-04-06 Thread Federico Simoncelli
- Original Message -
> From: "Paolo Bonzini" 
> To: qemu-devel@nongnu.org
> Cc: "Marcelo Tosatti" , "Federico Simoncelli" 
> 
> Sent: Thursday, April 5, 2012 5:42:58 PM
> Subject: [PATCH 4/7] block: close unused image files at the end of streaming
> 
> From: Marcelo Tosatti 
> 
> Close the now unused images that were part of the previous backing
> file
> chain and adjust ->backing_hd properly.
> 
> Note that this only works with relative paths.

s/relative/absolute/

> Given the images:
> 
>  /tmp/a/base.raw
>  /tmp/a/snap1.qcow2
>  /tmp/b/snap2.qcow2
> 
> chained as:
> 
>  base(bak:"") <- snap1(bak:"base.raw") <-
>  snap2(bak:"../a/snap1.qcow2")
> 
> merging snap1 and snap2 we will obtain:
> 
>  base(bak:"") <- snap2(bak:"base.raw")
> 
> However this should be maintained by the user/admin: one can also
> screw up relative paths using qemu-img manually.

The patch is fine but I disagree with this comment. The user/admin didn't make
any mistake and he shouldn't be in charge of additional maintenance (which is
also tricky since the VM is running).

-- 
Federico



[Qemu-devel] [PATCH v3] qapi: add c_fun to escape function names

2012-03-20 Thread Federico Simoncelli
Signed-off-by: Federico Simoncelli 
---
 scripts/qapi-commands.py |   14 +++---
 scripts/qapi-types.py|4 ++--
 scripts/qapi-visit.py|4 ++--
 scripts/qapi.py  |5 -
 4 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/scripts/qapi-commands.py b/scripts/qapi-commands.py
index 3aabf61..30a24d2 100644
--- a/scripts/qapi-commands.py
+++ b/scripts/qapi-commands.py
@@ -42,7 +42,7 @@ def generate_command_decl(name, args, ret_type):
 return mcgen('''
 %(ret_type)s qmp_%(name)s(%(args)sError **errp);
 ''',
- ret_type=c_type(ret_type), name=c_var(name), 
args=arglist).strip()
+ ret_type=c_type(ret_type), name=c_fun(name), 
args=arglist).strip()
 
 def gen_sync_call(name, args, ret_type, indent=0):
 ret = ""
@@ -59,7 +59,7 @@ def gen_sync_call(name, args, ret_type, indent=0):
 %(retval)sqmp_%(name)s(%(args)serrp);
 
 ''',
-name=c_var(name), args=arglist, retval=retval).rstrip()
+name=c_fun(name), args=arglist, retval=retval).rstrip()
 if ret_type:
 ret += "\n" + mcgen(''''
 if (!error_is_set(errp)) {
@@ -74,7 +74,7 @@ if (!error_is_set(errp)) {
 def gen_marshal_output_call(name, ret_type):
 if not ret_type:
 return ""
-return "qmp_marshal_output_%s(retval, ret, errp);" % c_var(name)
+return "qmp_marshal_output_%s(retval, ret, errp);" % c_fun(name)
 
 def gen_visitor_output_containers_decl(ret_type):
 ret = ""
@@ -198,16 +198,16 @@ static void qmp_marshal_output_%(c_name)s(%(c_ret_type)s 
ret_in, QObject **ret_o
 qapi_dealloc_visitor_cleanup(md);
 }
 ''',
-c_ret_type=c_type(ret_type), c_name=c_var(name),
+c_ret_type=c_type(ret_type), c_name=c_fun(name),
 visitor=type_visitor(ret_type))
 
 return ret
 
 def gen_marshal_input_decl(name, args, ret_type, middle_mode):
 if middle_mode:
-return 'int qmp_marshal_input_%s(Monitor *mon, const QDict *qdict, 
QObject **ret)' % c_var(name)
+return 'int qmp_marshal_input_%s(Monitor *mon, const QDict *qdict, 
QObject **ret)' % c_fun(name)
 else:
-return 'static void qmp_marshal_input_%s(QDict *args, QObject **ret, 
Error **errp)' % c_var(name)
+return 'static void qmp_marshal_input_%s(QDict *args, QObject **ret, 
Error **errp)' % c_fun(name)
 
 
 
@@ -298,7 +298,7 @@ def gen_registry(commands):
 registry += mcgen('''
 qmp_register_command("%(name)s", qmp_marshal_input_%(c_name)s);
 ''',
- name=cmd['command'], c_name=c_var(cmd['command']))
+ name=cmd['command'], c_name=c_fun(cmd['command']))
 pop_indent()
 ret = mcgen('''
 static void qmp_init_marshal(void)
diff --git a/scripts/qapi-types.py b/scripts/qapi-types.py
index 727fb77..4a734f5 100644
--- a/scripts/qapi-types.py
+++ b/scripts/qapi-types.py
@@ -100,7 +100,7 @@ typedef enum %(name)s
 %(abbrev)s_%(value)s = %(i)d,
 ''',
  abbrev=de_camel_case(name).upper(),
- value=c_var(value).upper(),
+ value=c_fun(value).upper(),
  i=i)
 i += 1
 
@@ -126,7 +126,7 @@ struct %(name)s
 %(c_type)s %(c_name)s;
 ''',
  c_type=c_type(typeinfo[key]),
- c_name=c_var(key))
+ c_name=c_fun(key))
 
 ret += mcgen('''
 };
diff --git a/scripts/qapi-visit.py b/scripts/qapi-visit.py
index 54117d4..78c947c 100644
--- a/scripts/qapi-visit.py
+++ b/scripts/qapi-visit.py
@@ -129,9 +129,9 @@ void visit_type_%(name)s(Visitor *m, %(name)s ** obj, const 
char *name, Error **
 break;
 ''',
 abbrev = de_camel_case(name).upper(),
-enum = de_camel_case(key).upper(),
+enum = c_fun(de_camel_case(key)).upper(),
 c_type=members[key],
-c_name=c_var(key))
+c_name=c_fun(key))
 
 ret += mcgen('''
 default:
diff --git a/scripts/qapi.py b/scripts/qapi.py
index 6e05469..e062336 100644
--- a/scripts/qapi.py
+++ b/scripts/qapi.py
@@ -131,7 +131,10 @@ def camel_case(name):
 return new_name
 
 def c_var(name):
-return '_'.join(name.split('-')).lstrip("*")
+return name.replace('-', '_').lstrip("*")
+
+def c_fun(name):
+return c_var(name).replace('.', '_')
 
 def c_list_type(name):
 return '%sList' % name
-- 
1.7.1




[Qemu-devel] [PATCH v2] qapi: escaping dots in c_var and in de_camel_case

2012-03-15 Thread Federico Simoncelli
This allows qapi commands and types with dots (downstream QMP extensions
containing fully qualified domain names).

Signed-off-by: Federico Simoncelli 
---
 scripts/qapi.py |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/qapi.py b/scripts/qapi.py
index 6e05469..b1173bf 100644
--- a/scripts/qapi.py
+++ b/scripts/qapi.py
@@ -111,7 +111,7 @@ def de_camel_case(name):
 for ch in name:
 if ch.isupper() and new_name:
 new_name += '_'
-if ch == '-':
+if ch == '-' or ch == '.':
 new_name += '_'
 else:
 new_name += ch.lower()
@@ -131,7 +131,7 @@ def camel_case(name):
 return new_name
 
 def c_var(name):
-return '_'.join(name.split('-')).lstrip("*")
+return name.replace('-', '_').replace('.', '_').lstrip('*')
 
 def c_list_type(name):
 return '%sList' % name
-- 
1.7.1




[Qemu-devel] [PATCH] qapi: escaping the dots in c_var

2012-03-15 Thread Federico Simoncelli
This allows qapi commands and types with dots.

Signed-off-by: Federico Simoncelli 
---
 scripts/qapi.py |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/scripts/qapi.py b/scripts/qapi.py
index 6e05469..4090c55 100644
--- a/scripts/qapi.py
+++ b/scripts/qapi.py
@@ -131,7 +131,7 @@ def camel_case(name):
 return new_name
 
 def c_var(name):
-return '_'.join(name.split('-')).lstrip("*")
+return name.replace('-', '_').replace('.', '_').lstrip('*')
 
 def c_list_type(name):
 return '%sList' % name
-- 
1.7.1




Re: [Qemu-devel] [PATCH v4 10/10] Add the drive-reopen command

2012-03-14 Thread Federico Simoncelli
- Original Message -
> From: "Kevin Wolf" 
> To: "Federico Simoncelli" 
> Cc: "Eric Blake" , qemu-devel@nongnu.org, 
> stefa...@linux.vnet.ibm.com, lcapitul...@redhat.com,
> "Paolo Bonzini" , "Markus Armbruster" 
> Sent: Wednesday, March 14, 2012 10:34:08 AM
> Subject: Re: [Qemu-devel] [PATCH v4 10/10] Add the drive-reopen command
> 
> > * We could leave it as it is, a distinct command that is not part
> > of
> >   the transaction and that it's closing the old image before
> >   opening
> >   the new one.
> 
> Yes, this would be the short-term preliminary solution. I would tend
> to
> leave it to downstreams to implement it as an extension, though.
> 
> > This is not completely correct, the main intent was to not spread
> > one image chain across two storage domains (making it incomplete if one
> > of them was missing). In the next oVirt release a VM can have
> > different disks on different storage domains, so this wouldn't be a special
> > case but just a normal situation.
> 
> The problem with this kind of argument is that we're not developing
> only for oVirt, but need to look for what makes sense for any management
> tool (or even just direct users of qemu).

That is a general rule, and it's perfectly agreeable, but it has nothing to
do with what I was saying. Do you know any management tool or any reason to
forbid to a VM to have different disks on different storages?
In fact qemu-kvm doesn't even know where the images are coming from (what
particular iscsi connection or what particular NFS server).

What I was asking was a tool (mirroring) to avoid the split of an image
chain between multiple storages, and not a tool to avoid the ability to
have full image chains on different storages.

There is nothing oVirt specific here, it came into play only when I said that
for us is not a problem (for once :-).

-- 
Federico



Re: [Qemu-devel] [PATCH v4 10/10] Add the drive-reopen command

2012-03-13 Thread Federico Simoncelli
- Original Message -
> From: "Eric Blake" 
> To: "Paolo Bonzini" 
> Cc: kw...@redhat.com, fsimo...@redhat.com, qemu-devel@nongnu.org, 
> stefa...@linux.vnet.ibm.com, lcapitul...@redhat.com
> Sent: Tuesday, March 13, 2012 9:48:10 PM
> Subject: Re: [Qemu-devel] [PATCH v4 10/10] Add the drive-reopen command
> 
> On 03/06/2012 10:56 AM, Paolo Bonzini wrote:
> > From: Federico Simoncelli 
> > 
> > Signed-off-by: Federico Simoncelli 
> > Signed-off-by: Paolo Bonzini 
> 
> >  ##
> > +# @drive-reopen
> > +#
> > +# Assigns a new image file to a device.
> > +#
> > +# @device: the name of the device for which we are changing the
> > image file.
> > +#
> > +# @new-image-file: the target of the new image. If the file
> > doesn't exists the
> > +#  command will fail.
> > +#
> > +# @format: #optional the format of the new image, default is
> > 'qcow2'.
> > +#
> > +# Returns: nothing on success
> > +#  If @device is not a valid block device, DeviceNotFound
> > +#  If @new-image-file can't be opened, OpenFileFailed
> > +#  If @format is invalid, InvalidBlockFormat
> > +#
> > +# Since 1.1
> > +##
> > +{ 'command': 'drive-reopen',
> > +  'data': { 'device': 'str', 'new-image-file': 'str', '*format':
> > 'str' } }
> 
> I still think we need a 'drive-reopen' action included in
> 'transaction',
> as an 11/10 on this series.  For disk migration, it is true that you
> can
> migrate one disk at a time, and therefore only need to reopen one
> disk
> at a time, to get the guarantee that for a single disk image, the
> current state of that image will be guaranteed to be consistent using
> only one storage domain.

I'm not sure if this was already addressed on this mailing list but
the main problem is that as general rule a qcow file cannot be opened
in r/w mode twice. I believe there was only one exception to that
with the live migration and it generated several issues.

That said, reopen is really hard to be implemented as a transaction
without breaking that rule. For example in the blkmirror case you'd
need to open the destination image in r/w while the mirroring is in
action (already having the same image in r/w mode).

There are several solutions here but they are either really hard to
implement or non definitive. For example:

* We could try to implement the reopen command for each special case,
  eg: blkmirror, reopening the same image, etc... and in such cases
  reusing the same bs that we already have. The downside is that this
  command will be coupled with all this special cases.

* We could use the transaction APIs without actually making it
  transaction (if we fail in the middle we can't rollback). The only
  advantage of this is that we'd provide a consistent API to libvirt
  and we would postpone the problem to the future. Anyway I strongly
  discourage this as it's completely unsafe and it's going to break
  the transaction semantic. Moreover it's a solution that relies too
  much on the hope of finding something appropriate in the future.

* We could leave it as it is, a distinct command that is not part of
  the transaction and that it's closing the old image before opening
  the new one.

> But since the API allows the creation of two mirrors in one command,
> I'm
> worried that someone will try to start a mirror on two disks at once,
> but then be stuck doing two separate 'drive-reopen' commands.  If the
> first succeeds but the second fails, you have now stranded the qemu
> process across two storage domains, which is exactly what we were
> trying
> to avoid in the first place by inventing transactions.  That is, even
> if
> all disks are individually consistent in a single domain, the act of
> migrating then reopening one disk at a time means you will have a
> window
> where disk 1 and disk 2 are opened on different storage domains.

This is not completely correct, the main intent was to not spread one
image chain across two storage domains (making it incomplete if one of
them was missing). In the next oVirt release a VM can have different
disks on different storage domains, so this wouldn't be a special case
but just a normal situation.

-- 
Federico



[Qemu-devel] [PATCH] Add the drive-reopen command

2012-03-05 Thread Federico Simoncelli
Signed-off-by: Federico Simoncelli 
---
 blockdev.c   |   40 +++-
 hmp-commands.hx  |   16 
 hmp.c|   11 +++
 hmp.h|1 +
 qapi-schema.json |   22 ++
 5 files changed, 77 insertions(+), 13 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 058b6d5..56da5c9 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -646,9 +646,8 @@ void do_commit(Monitor *mon, const QDict *qdict)
 }
 }
 
-void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
-bool has_format, const char *format,
-Error **errp)
+static void change_blockdev_image(const char *device, const char 
*new_image_file,
+  const char *format, bool create, Error 
**errp)
 {
 BlockDriverState *bs;
 BlockDriver *drv, *old_drv, *proto_drv;
@@ -671,7 +670,7 @@ void qmp_blockdev_snapshot_sync(const char *device, const 
char *snapshot_file,
 old_drv = bs->drv;
 flags = bs->open_flags;
 
-if (!has_format) {
+if (!format) {
 format = "qcow2";
 }
 
@@ -681,24 +680,26 @@ void qmp_blockdev_snapshot_sync(const char *device, const 
char *snapshot_file,
 return;
 }
 
-proto_drv = bdrv_find_protocol(snapshot_file);
+proto_drv = bdrv_find_protocol(new_image_file);
 if (!proto_drv) {
 error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
 return;
 }
 
-ret = bdrv_img_create(snapshot_file, format, bs->filename,
-  bs->drv->format_name, NULL, -1, flags);
-if (ret) {
-error_set(errp, QERR_UNDEFINED_ERROR);
-return;
+if (create) {
+ret = bdrv_img_create(new_image_file, format, bs->filename,
+  bs->drv->format_name, NULL, -1, flags);
+if (ret) {
+error_set(errp, QERR_UNDEFINED_ERROR);
+return;
+}
 }
 
 bdrv_drain_all();
 bdrv_flush(bs);
 
 bdrv_close(bs);
-ret = bdrv_open(bs, snapshot_file, flags, drv);
+ret = bdrv_open(bs, new_image_file, flags, drv);
 /*
  * If reopening the image file we just created fails, fall back
  * and try to re-open the original image. If that fails too, we
@@ -709,11 +710,25 @@ void qmp_blockdev_snapshot_sync(const char *device, const 
char *snapshot_file,
 if (ret != 0) {
 error_set(errp, QERR_OPEN_FILE_FAILED, old_filename);
 } else {
-error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file);
+error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
 }
 }
 }
 
+void qmp_drive_reopen(const char *device, const char *new_image_file,
+  bool has_format, const char *format, Error **errp)
+{
+change_blockdev_image(device, new_image_file,
+  has_format ? format : NULL, false, errp);
+}
+
+void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
+bool has_format, const char *format,
+Error **errp)
+{
+change_blockdev_image(device, snapshot_file,
+  has_format ? format : NULL, true, errp);
+}
 
 /* New and old BlockDriverState structs for group snapshots */
 typedef struct BlkTransactionStates {
@@ -877,7 +892,6 @@ exit:
 return;
 }
 
-
 static void eject_device(BlockDriverState *bs, int force, Error **errp)
 {
 if (bdrv_in_use(bs)) {
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 64b3656..9e08123 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -900,6 +900,22 @@ Snapshot device, using snapshot file as target if provided
 ETEXI
 
 {
+.name   = "drive-reopen",
+.args_type  = "device:B,new-image-file:s,format:s?",
+.params = "device new-image-file [format]",
+.help   = "Assigns a new image file to a device.\n\t\t\t"
+  "The image will be opened using the format\n\t\t\t"
+  "specified or 'qcow2' by default.\n\t\t\t",
+.mhandler.cmd = hmp_drive_reopen,
+},
+
+STEXI
+@item drive-reopen
+@findex drive-reopen
+Assigns a new image file to a device.
+ETEXI
+
+{
 .name   = "drive_add",
 .args_type  = "pci_addr:s,opts:s",
 .params = "[[:]:]\n"
diff --git a/hmp.c b/hmp.c
index 3a54455..ab069ad 100644
--- a/hmp.c
+++ b/hmp.c
@@ -706,6 +706,17 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
 hmp_handle_error(mon, &errp);
 }
 
+void hmp_drive_reopen(Monitor *mon, const QDict *qdict)
+{
+const char *device = qdict_get_str(qdict, "device");
+const char *filename = qdict_get_str(qdict, "new-image-file");
+const char *format = qdict_get_try_st

[Qemu-devel] [PATCH] add reopen to blockdev-transaction

2012-03-01 Thread Federico Simoncelli
Signed-off-by: Federico Simoncelli 
---
 blockdev.c   |8 
 qapi-schema.json |   12 
 qmp-commands.hx  |6 +-
 3 files changed, 25 insertions(+), 1 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 56da5c9..36fe07c 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -798,6 +798,14 @@ void qmp_blockdev_transaction(BlockdevActionList *dev_list,
  dev_info->mirror->target);
 break;
 
+case BLOCKDEV_ACTION_KIND_REOPEN:
+device = dev_info->reopen->device;
+if (dev_info->format->has_format) {
+format = dev_info->reopen->format;
+}
+new_source = g_strdup(dev_info->reopen->target);
+break;
+
 default:
 abort();
 }
diff --git a/qapi-schema.json b/qapi-schema.json
index b33875d..17f7548 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1145,6 +1145,17 @@
 { 'type': 'BlockdevMirror',
   'data': { 'device': 'str', 'target': 'str', '*format': 'str',
 '*reuse': 'bool' } }
+##
+# @BlockdevReopen
+#
+# @device: the name of the device to reopen.
+#
+# @target: the target of the new image.
+#
+# @format: #optional the format of the new image, default is 'qcow2'.
+##
+{ 'type': 'BlockdevReopen',
+  'data': { 'device': 'str', 'target': 'str', '*format': 'str' } }
 
 ##
 # @BlockdevAction
@@ -1156,6 +1167,7 @@
   'data': {
'snapshot': 'BlockdevSnapshot',
'mirror': 'BlockdevMirror',
+   'reopen': 'BlockdevReopen',
} }
 
 ##
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 50ac5a0..317c448 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -720,7 +720,7 @@ Arguments:
 
 actions array:
 - "type": the operation to perform.  The only supported
-  values are "snapshot" and "mirror". (json-string)
+  values are "snapshot", "mirror" and "reopen". (json-string)
 - "data": a dictionary.  The contents depend on the value
   of "type".  When "type" is "snapshot":
   - "device": device name to snapshot (json-string)
@@ -734,6 +734,10 @@ actions array:
   - "format": format of new image (json-string, optional)
   - "reuse": whether QEMU should look for an existing image file
 (json-bool, optional, default false)
+  When "type" is "reopen":
+  - "device": device name to reopen (json-string)
+  - "target": name of destination image file (json-string)
+  - "format": format of new image (json-string, optional)
 
 Example:
 
-- 
1.7.1




[Qemu-devel] [PATCHv4] Add blkmirror block driver

2012-02-29 Thread Federico Simoncelli
Mirrored writes are used by live block copy.

Signed-off-by: Marcelo Tosatti 
Signed-off-by: Federico Simoncelli 
Signed-off-by: Paolo Bonzini 
---
 Makefile.objs  |2 +-
 block/blkmirror.c  |  278 
 cutils.c   |   56 +++
 docs/blkmirror.txt |   15 +++
 qemu-common.h  |2 +
 5 files changed, 352 insertions(+), 1 deletions(-)
 create mode 100644 block/blkmirror.c
 create mode 100644 docs/blkmirror.txt

diff --git a/Makefile.objs b/Makefile.objs
index 808de6a..2302c96 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o 
dmg.o bochs.o vpc.o vv
 block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o
 block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-nested-y += qed-check.o
-block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
+block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o 
blkmirror.o
 block-nested-y += stream.o
 block-nested-$(CONFIG_WIN32) += raw-win32.o
 block-nested-$(CONFIG_POSIX) += raw-posix.o
diff --git a/block/blkmirror.c b/block/blkmirror.c
new file mode 100644
index 000..d894ca8
--- /dev/null
+++ b/block/blkmirror.c
@@ -0,0 +1,278 @@
+/*
+ * Block driver for mirrored writes.
+ *
+ * Copyright (C) 2011-2012 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include 
+#include "block_int.h"
+
+typedef struct {
+BlockDriverState *bs[2];
+} BdrvMirrorState;
+
+typedef struct DupAIOCB DupAIOCB;
+
+typedef struct SingleAIOCB {
+BlockDriverAIOCB *aiocb;
+int finished;
+DupAIOCB *parent;
+} SingleAIOCB;
+
+struct DupAIOCB {
+BlockDriverAIOCB common;
+int count;
+
+BlockDriverCompletionFunc *cb;
+SingleAIOCB aios[2];
+int ret;
+};
+
+/* Valid blkmirror filenames look like
+ * blkmirror:fmt1:path/to/image1:fmt2:path/to/image2 */
+static int blkmirror_open(BlockDriverState *bs, const char *filename, int 
flags)
+{
+int ret = 0, i;
+char *tmpbuf, *tok[4], *next;
+BlockDriver *drv1, *drv2;
+BdrvMirrorState *m = bs->opaque;
+BlockDriverState *bk;
+
+m->bs[0] = bdrv_new("");
+if (m->bs[0] == NULL) {
+return -ENOMEM;
+}
+
+m->bs[1] = bdrv_new("");
+if (m->bs[1] == NULL) {
+bdrv_delete(m->bs[0]);
+return -ENOMEM;
+}
+
+tmpbuf = g_malloc(strlen(filename) + 1);
+pstrcpy(tmpbuf, strlen(filename) + 1, filename);
+
+/* Parse the blkmirror: prefix */
+if (!strstart(tmpbuf, "blkmirror:", (const char **) &next)) {
+next = tmpbuf;
+}
+
+for (i = 0; i < 4; i++) {
+if (!next) {
+ret = -EINVAL;
+goto out;
+}
+tok[i] = estrtok_r(NULL, ":", '\\', &next);
+}
+
+drv1 = bdrv_find_whitelisted_format(tok[0]);
+drv2 = bdrv_find_whitelisted_format(tok[2]);
+if (!drv1 || !drv2) {
+ret = -EINVAL;
+goto out;
+}
+
+ret = bdrv_open(m->bs[0], tok[1], flags, drv1);
+if (ret < 0) {
+goto out;
+}
+
+/* If we crash, we cannot assume that the destination is a
+ * valid mirror and we have to start over.  So speed up things
+ * by effectively operating on the destination in cache=unsafe
+ * mode.
+ */
+ret = bdrv_open(m->bs[1], tok[3], flags | BDRV_O_NO_BACKING
+| BDRV_O_NO_FLUSH | BDRV_O_CACHE_WB, drv2);
+if (ret < 0) {
+goto out;
+}
+
+if (m->bs[0]->backing_hd) {
+bk = m->bs[0]->backing_hd;
+
+m->bs[1]->backing_hd = bdrv_new("");
+if (!m->bs[1]->backing_hd) {
+ret = -ENOMEM;
+goto out;
+}
+
+/* opening the same backing file of the source */
+ret = bdrv_open(m->bs[1]->backing_hd,
+bk->filename, bk->open_flags, bk->drv);
+if (ret < 0) {
+goto out;
+}
+}
+
+ out:
+g_free(tmpbuf);
+
+if (ret < 0) {
+for (i = 0; i < 2; i++) {
+bdrv_delete(m->bs[i]);
+m->bs[i] = NULL;
+}
+}
+
+return ret;
+}
+
+static void blkmirror_close(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs->opaque;
+int i;
+
+for (i = 0; i < 2; i++) {
+bdrv_delete(m->bs[i]);
+m->bs[i] = NULL;
+}
+}
+
+static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs->opaque;
+int ret;
+
+ret = bdrv_co_flush(m->bs[0]);
+if (ret < 0) {
+return ret;
+}
+
+return bdrv_co_flush(m->bs[1]);
+}
+
+static int64_t blkmirror_getlength(BlockDriverState *bs)
+{
+BdrvMirror

Re: [Qemu-devel] [PATCHv3] Add blkmirror block driver

2012-02-29 Thread Federico Simoncelli
- Original Message -
> From: "Federico Simoncelli" 
> To: qemu-devel@nongnu.org
> Cc: mtosa...@redhat.com, kw...@redhat.com, pbonz...@redhat.com, 
> stefa...@gmail.com, "Federico Simoncelli"
> 
> Sent: Wednesday, February 29, 2012 1:28:21 PM
> Subject: [PATCHv3] Add blkmirror block driver
> 
> Mirrored writes are used by live block copy.
> 
> Signed-off-by: Marcelo Tosatti 
> Signed-off-by: Federico Simoncelli 
> ---
>  Makefile.objs  |2 +-
>  block/blkmirror.c  |  255
>  
>  cutils.c   |   30 ++
>  docs/blkmirror.txt |   15 +++
>  qemu-common.h  |1 +
>  5 files changed, 302 insertions(+), 1 deletions(-)
>  create mode 100644 block/blkmirror.c
>  create mode 100644 docs/blkmirror.txt
> 
> diff --git a/block/blkmirror.c b/block/blkmirror.c
> new file mode 100644
> index 000..c98b162
> --- /dev/null
> +++ b/block/blkmirror.c
[...]
> +if (!drv1 || !drv2) {
> +ret = -EINVAL;
> +goto out;
> +}
> +
> +ret = bdrv_open(m->bs[0], tok[1], flags, drv1);
> +if (ret < 0) {
> +goto out;
> +}
> +
> +ret = bdrv_open(m->bs[0], tok[3], flags, drv2);
> +if (ret < 0) {
> +goto out;
> +}

There's a small mistake here, the second one is m->bs[1].

-- 
Federico



[Qemu-devel] [PATCHv3] Add blkmirror block driver

2012-02-29 Thread Federico Simoncelli
Mirrored writes are used by live block copy.

Signed-off-by: Marcelo Tosatti 
Signed-off-by: Federico Simoncelli 
---
 Makefile.objs  |2 +-
 block/blkmirror.c  |  255 
 cutils.c   |   30 ++
 docs/blkmirror.txt |   15 +++
 qemu-common.h  |1 +
 5 files changed, 302 insertions(+), 1 deletions(-)
 create mode 100644 block/blkmirror.c
 create mode 100644 docs/blkmirror.txt

diff --git a/Makefile.objs b/Makefile.objs
index 808de6a..2302c96 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o 
dmg.o bochs.o vpc.o vv
 block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o
 block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-nested-y += qed-check.o
-block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
+block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o 
blkmirror.o
 block-nested-y += stream.o
 block-nested-$(CONFIG_WIN32) += raw-win32.o
 block-nested-$(CONFIG_POSIX) += raw-posix.o
diff --git a/block/blkmirror.c b/block/blkmirror.c
new file mode 100644
index 000..c98b162
--- /dev/null
+++ b/block/blkmirror.c
@@ -0,0 +1,255 @@
+/*
+ * Block driver for mirrored writes.
+ *
+ * Copyright (C) 2011-2012 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include 
+#include "block_int.h"
+
+typedef struct {
+BlockDriverState *bs[2];
+} BdrvMirrorState;
+
+typedef struct DupAIOCB DupAIOCB;
+
+typedef struct SingleAIOCB {
+BlockDriverAIOCB *aiocb;
+int finished;
+DupAIOCB *parent;
+} SingleAIOCB;
+
+struct DupAIOCB {
+BlockDriverAIOCB common;
+int count;
+
+BlockDriverCompletionFunc *cb;
+SingleAIOCB aios[2];
+int ret;
+};
+
+/* Valid blkmirror filenames look like
+ * blkmirror:fmt1:path/to/image1:fmt2:path/to/image2 */
+static int blkmirror_open(BlockDriverState *bs, const char *filename, int 
flags)
+{
+int ret = 0, i;
+char *tmpbuf, *tok[4], *next;
+BlockDriver *drv1, *drv2;
+BdrvMirrorState *m = bs->opaque;
+
+m->bs[0] = bdrv_new("");
+if (m->bs[0] == NULL) {
+return -ENOMEM;
+}
+
+m->bs[1] = bdrv_new("");
+if (m->bs[1] == NULL) {
+bdrv_delete(m->bs[0]);
+return -ENOMEM;
+}
+
+tmpbuf = g_malloc(strlen(filename) + 1);
+pstrcpy(tmpbuf, strlen(filename) + 1, filename);
+
+/* Parse the blkmirror: prefix */
+if (!strstart(tmpbuf, "blkmirror:", (const char **) &next)) {
+next = tmpbuf;
+}
+
+for (i = 0; i < 4; i++) {
+if (!next) {
+ret = -EINVAL;
+goto out;
+}
+tok[i] = pstrtok_r(NULL, ":", '\\', &next);
+}
+
+drv1 = bdrv_find_whitelisted_format(tok[0]);
+drv2 = bdrv_find_whitelisted_format(tok[2]);
+
+if (!drv1 || !drv2) {
+ret = -EINVAL;
+goto out;
+}
+
+ret = bdrv_open(m->bs[0], tok[1], flags, drv1);
+if (ret < 0) {
+goto out;
+}
+
+ret = bdrv_open(m->bs[0], tok[3], flags, drv2);
+if (ret < 0) {
+goto out;
+}
+
+ out:
+g_free(tmpbuf);
+
+if (ret < 0) {
+for (i = 0; i < 2; i++) {
+bdrv_delete(m->bs[i]);
+m->bs[i] = NULL;
+}
+}
+
+return ret;
+}
+
+static void blkmirror_close(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs->opaque;
+int i;
+
+for (i = 0; i < 2; i++) {
+bdrv_delete(m->bs[i]);
+m->bs[i] = NULL;
+}
+}
+
+static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs->opaque;
+int ret;
+
+ret = bdrv_co_flush(m->bs[0]);
+if (ret < 0) {
+return ret;
+}
+
+return bdrv_co_flush(m->bs[1]);
+}
+
+static int64_t blkmirror_getlength(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs->opaque;
+
+return bdrv_getlength(m->bs[0]);
+}
+
+static BlockDriverAIOCB *blkmirror_aio_readv(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+BdrvMirrorState *m = bs->opaque;
+return bdrv_aio_readv(m->bs[0], sector_num, qiov, nb_sectors, cb, opaque);
+}
+
+static void dup_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+DupAIOCB *acb = container_of(blockacb, DupAIOCB, common);
+int i;
+
+for (i = 0 ; i < 2; i++) {
+if (!acb->aios[i].finished) {
+bdrv_aio_cancel(acb->

Re: [Qemu-devel] Live Block Migration using Mirroring

2012-02-28 Thread Federico Simoncelli
- Original Message -
> From: "Paolo Bonzini" 
> To: qemu-devel@nongnu.org
> Sent: Tuesday, February 28, 2012 7:02:40 PM
> Subject: Re: [Qemu-devel] Live Block Migration using Mirroring
> 
> Il 28/02/2012 18:46, Federico Simoncelli ha scritto:
> > > > Thank you for getting this. Being able to have a bogus backing
> > > > file
> > > > was a bonus but it's not really required for the mirrored live
> > > > block
> > > > migration. We can add the support for switching the backing
> > > > file in the
> > > > drive-reopen part.
> > > 
> > > Wait, it's not really required for oVirt because it creates the
> > > snapshot outside QEMU.  What about everyone else?
> > 
> > They'll have (as oVirt) two mirrored snapshot pointing at the same
> > base. The only difference is that the image is created internally,
> > but
> > that's not hard.
> 
> Can you detail how you are switching the backing file in the
> drive-reopen?  Either the BlockDriverState opened by blkmirror, or
> the
> one opened at the end, will have to use the "wrong" backing_file.
>  How
> do you arrange for that?

Step 1 - Initital Scenario
==
VM1 is running on the src/hd0base.

[src/hd0base] <= VM1(read-write)

Step 3 - Mirrored Live Snapshot
===
A mirrored live snapshot is issued using src/hd0snap1 and dst/hd0snap1 as image
files (both having src/hd0base as backing file).

[src/hd0base] <- [src/hd0snap1] <= VM1(read-write)
 ^-- [dst/hd0snap1] <= VM1(read-write)

Step 4 - Backing File Copy
==
An external manager copies src/hd0base to the destination (dst/hd0base).

[src/hd0base] <- [src/hd0snap1] <= VM1(read-write)
[dst/hd0base]^-- [dst/hd0snap1] <= VM1(read-write)


Step 5 - Final Switch to Destination

VM1 is now able to switch to the destination for both read and write operations
fixing the backing file path in dst/hd0snap1.

[src/hd0base] <- [src/hd0snap1]
[dst/hd0base] <- [dst/hd0snap1] <= VM1(read-write)

-- 
Federico



Re: [Qemu-devel] Live Block Migration using Mirroring

2012-02-28 Thread Federico Simoncelli
- Original Message -
> From: "Paolo Bonzini" 
> To: "Federico Simoncelli" 
> Cc: "Stefan Hajnoczi" , qemu-devel@nongnu.org, 
> kw...@redhat.com, mtosa...@redhat.com
> Sent: Tuesday, February 28, 2012 6:36:57 PM
> Subject: Re: [Qemu-devel] Live Block Migration using Mirroring
> 
> Il 28/02/2012 18:15, Federico Simoncelli ha scritto:
> > Thank you for getting this. Being able to have a bogus backing file
> > was
> > a bonus but it's not really required for the mirrored live block
> > migration.
> > We can add the support for switching the backing file in the
> > drive-reopen
> > part.
> 
> Wait, it's not really required for oVirt because it creates the
> snapshot
> outside QEMU.  What about everyone else?

They'll have (as oVirt) two mirrored snapshot pointing at the same
base. The only difference is that the image is created internally, but
that's not hard.

-- 
Federico



Re: [Qemu-devel] Live Block Migration using Mirroring

2012-02-28 Thread Federico Simoncelli
- Original Message -
> From: "Stefan Hajnoczi" 
> To: "Federico Simoncelli" 
> Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com
> Sent: Tuesday, February 28, 2012 4:47:48 PM
> Subject: Re: [Qemu-devel] Live Block Migration using Mirroring
>
> On Wed, Feb 22, 2012 at 5:13 PM, Federico Simoncelli
>  wrote:
> > Step 3 - Mirrored Live Snapshot
> > ===
> > A mirrored live snapshot is issued using src/hd0snap1 and
> > dst/hd0snap1 as
> > image files. (Where "<-" stands for "has backing file")
> >
> > [src/hd0base] <- [src/hd0snap1] <= VM1(read-write)
> >     ...      <- [dst/hd0snap1] <= VM1(write-only)
> >
> > $ qemu-img create -f qcow2 \
> >           -b /tmp/src/hd0base.qcow2 /tmp/src/hd0snap1.qcow2 20G
> > Formatting '/tmp/src/hd0snap1.qcow2', fmt=qcow2 size=21474836480
> > backing_file='/tmp/src/hd0base.qcow2' encryption=off
> > cluster_size=65536
> >
> > $ qemu-img create -f qcow2 \
> >           -b /tmp/dst/hd0base.qcow2 /tmp/dst/hd0snap1.qcow2 20G
> > Formatting '/tmp/dst/hd0snap1.qcow2', fmt=qcow2 size=21474836480
> > backing_file='/tmp/src/hd0base.qcow2' encryption=off
> > cluster_size=65536
> >
> > (qemu) snapshot_blkdev -n ide0-hd0 \
> >         blkmirror:/tmp/src/hd0snap1.qcow2:/tmp/dst/hd0snap1.qcow2
> >         blkmirror
> >
> > Step 4 - Backing File Copy
> > ==
> > An external manager copies src/hd0base to the destination
> > dst/hd0base.
> >
> > [src/hd0base] <- [src/hd0snap1] <= VM1(read-write)
> > [dst/hd0base] <- [dst/hd0snap1] <= VM1(write-only)
>
> At this stage we have dst/hd0snap1 opened with BDRV_O_NO_BACKING.  If
> it has no backing file and the guest issues a write request that is
> smaller than a cluster in the image file, the untouched areas of that
> cluster will be populated with zeroes.
>
> Once dst/hd0snap1 is reopened with dst/hd0base in place there will be
> zeros in clusters where the guest wrote only a few sectors.  We will
> not "see" the backing file data in those clusters.
>
> Have you hit this problem or did I miss something?

Thank you for getting this. Being able to have a bogus backing file was
a bonus but it's not really required for the mirrored live block migration.
We can add the support for switching the backing file in the drive-reopen
part.
I'll remove the BDRV_O_NO_BACKING flag from the blkmirror patch.

--
Federico



Re: [Qemu-devel] drive transactions (was Re: [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate commands)

2012-02-27 Thread Federico Simoncelli
- Original Message -
> From: "Anthony Liguori" 
> To: "Federico Simoncelli" 
> Cc: "Paolo Bonzini" , kw...@redhat.com, 
> arm...@redhat.com, "Jeff Cody" ,
> mtosa...@redhat.com, qemu-devel@nongnu.org, "Luiz Capitulino" 
> 
> Sent: Monday, February 27, 2012 5:42:31 PM
> Subject: Re: [Qemu-devel] drive transactions (was Re: [PATCH 2/2 v2] Add the 
> blockdev-reopen and blockdev-migrate
> commands)
> 
> On 02/27/2012 10:33 AM, Federico Simoncelli wrote:
> > I'm all for the modularity of the commands (I suggested it since
> > the beginning),
> > but all this infrastructure goes way beyond what I'd need for oVirt
> > now.
> >
> > When I submitted my patches we knew that my work wasn't the
> > definitive solution
> > (eg: the future implementation of -blockdev). So I'd suggest to try
> > and settle
> > with something that is good enough and that is not preventing a
> > future improvement.
> >
> > What about having a (temporary) flag in drive-mirror to accept also
> > a new-image-file
> > until we will have the optimal solution?
> 
> Unless there are extenuating circumstances (like the absence of core
> infrastructure in QEMU), then we should not add commands that we know
> are not
> the right command.

So are you in favor or against my suggestion? It looks like this is exactly
the case where the core infrastructure (transactions) is missing.

-- 
Federico



Re: [Qemu-devel] drive transactions (was Re: [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate commands)

2012-02-27 Thread Federico Simoncelli
- Original Message -
> From: "Paolo Bonzini" 
> To: "Luiz Capitulino" 
> Cc: "Federico Simoncelli" , kw...@redhat.com, 
> mtosa...@redhat.com, qemu-devel@nongnu.org,
> arm...@redhat.com, "Jeff Cody" 
> Sent: Monday, February 27, 2012 3:39:33 PM
> Subject: drive transactions (was Re: [PATCH 2/2 v2] Add the blockdev-reopen 
> and blockdev-migrate commands)
> 
> > > 4) incremental=true, new_image_file passed:
> > >- no images will be created
> > >- writes will be mirrored to new_image_file and dest
> 
> No need to provide this from within QEMU, because libvirt/oVirt can
> do
> the dance using elementary operations:
> 
> blockdev-begin-transaction
> drive-reopen device new-image-file
> drive-mirror streaming=false device dest
> blockdev-commit-transaction
> 
> No strange optional arguments, no proliferation of commands, etc.
>  The
> only downside is that if someone tries to do (4) without transactions
> (or without stopping the VM) they'll get corruption because atomicity
> is
> required.

I'm all for the modularity of the commands (I suggested it since the beginning),
but all this infrastructure goes way beyond what I'd need for oVirt now.

When I submitted my patches we knew that my work wasn't the definitive solution
(eg: the future implementation of -blockdev). So I'd suggest to try and settle
with something that is good enough and that is not preventing a future 
improvement.

What about having a (temporary) flag in drive-mirror to accept also a 
new-image-file
until we will have the optimal solution?

-- 
Federico



Re: [Qemu-devel] [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate commands

2012-02-27 Thread Federico Simoncelli
- Original Message -
> From: "Luiz Capitulino" 
> To: "Federico Simoncelli" 
> Cc: qemu-devel@nongnu.org, mtosa...@redhat.com, pbonz...@redhat.com, 
> kw...@redhat.com, arm...@redhat.com
> Sent: Friday, February 24, 2012 8:01:43 PM
> Subject: Re: [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate 
> commands
> 
> On Fri, 24 Feb 2012 16:49:04 +
> Federico Simoncelli  wrote:
> 
> > Signed-off-by: Federico Simoncelli 
> 
> Btw, would be nice to have a 0/2 intro email describing the feature
> and changelog
> info.

Yes the 0/2 (actually 0/3) was sent at the beginning of the thread so you might
have missed it because you were added later on but I'm sure you can still find
it in the archives.

> 
> > +BlockDriver *drv;
> > +int i, j, escape;
> > +char new_filename[2048], *filename;
> 
> I'd use PATH_MAX for new_filename's size.

Maybe PATH_MAX * 2 + 1? (To handle the case where all the characters should be
escaped).

> > +escape = 0;
> > +for (i = 0, j = 0; j < strlen(new_image_file); j++) {
> > + loop:
> > +if (!(i < sizeof(new_filename) - 2)) {
> > +error_set(errp, QERR_INVALID_PARAMETER_VALUE,
> > +  "new-image-file", "shorter filename");
> > +return;
> > +}
> > +
> > +if (new_image_file[j] == ':' || new_image_file[j] == '\\')
> > {
> > +if (!escape) {
> > +new_filename[i++] = '\\', escape = 1;
> > +goto loop;
> > +} else {
> > +escape = 0;
> > +}
> > +}
> > +
> > +new_filename[i++] = new_image_file[j];
> > +}
> 
> IMO, you could require the filename arguments to be escaped already.

Can you be more explicit, who should escape it?
The only thing that comes to my mind right now is requiring the input of
blockdev-migrate already escaped but that would expose an internal format.
(I'd not recommend it).

> > +void qmp_blockdev_migrate(const char *device, bool incremental,
> > +  const char *destination, bool
> > has_new_image_file,
> > +  const char *new_image_file, Error
> > **errp)
> > +{
> > +BlockDriverState *bs;
> > +
> > +bs = bdrv_find(device);
> > +if (!bs) {
> > +error_set(errp, QERR_DEVICE_NOT_FOUND, device);
> > +return;
> > +}
> > +if (bdrv_in_use(bs)) {
> > +error_set(errp, QERR_DEVICE_IN_USE, device);
> > +return;
> > +}
> > +
> > +if (incremental) {
> > +if (!has_new_image_file) {
> > +error_set(errp, QERR_INVALID_PARAMETER_VALUE,
> > +  "incremental", "a new image file");
> > +} else {
> > +qmp_blockdev_mirror(device, destination,
> > new_image_file, errp);
> > +}
> > +} else {
> > +error_set(errp, QERR_NOT_SUPPORTED);
> > +}
> 
> The command documentation says that 'incremental' and
> 'new_image_file' are
> optionals, but the code makes them required. Why?

If I didn't make any mistake in the code I'm just enforcing that when
you specify "incremental" you also need a new image.
There are still other valid cases where they are optional.

-- 
Federico



Re: [Qemu-devel] [PATCH 1/2 v2] Add blkmirror block driver

2012-02-27 Thread Federico Simoncelli
- Original Message -
> From: "Luiz Capitulino" 
> To: "Federico Simoncelli" 
> Cc: qemu-devel@nongnu.org, mtosa...@redhat.com, pbonz...@redhat.com, 
> kw...@redhat.com, arm...@redhat.com
> Sent: Friday, February 24, 2012 7:17:22 PM
> Subject: Re: [PATCH 1/2 v2] Add blkmirror block driver
> 
> On Fri, 24 Feb 2012 16:49:03 +
> Federico Simoncelli  wrote:
> 
> > +/* Parse the raw image filename */
> > +filename2 = g_malloc(strlen(filename)+1);
> > +escape = 0;
> > +for (i = n = 0; i < strlen(filename); i++) {
> > +if (!escape && filename[i] == ':') {
> > +break;
> > +}
> > +if (!escape && filename[i] == '\\') {
> > +escape = 1;
> > +} else {
> > +escape = 0;
> > +}
> > +
> > +if (!escape) {
> > +filename2[n++] = filename[i];
> > +}
> > +}
> > +filename2[n] = '\0';
> 
> You're escaping only the first image name string, is that
> intentional?

Yes, it was also documented here by Marcelo:

> > diff --git a/docs/blkmirror.txt b/docs/blkmirror.txt
> > new file mode 100644
> > index 000..cf73f3f
> > --- /dev/null
> > +++ b/docs/blkmirror.txt
> > @@ -0,0 +1,16 @@
> > +Block mirror driver
> > +---
> > +
> > +This driver will mirror writes to two distinct images.
> > +It's used internally by live block copy.
> > +
> > +Format
> > +--
> > +
> > +blkmirror:/image1.img:/image2.img
> > +
> > +'\' (backslash) can be used to escape colon processing
> > +as a separator, in the first image filename.
> > +Backslashes themselves also can be escaped as '\\'.
> > +
> > +

Anyway this format is encapsulated by blockdev-migrate.

-- 
Federico



Re: [Qemu-devel] [PATCH 1/2 v2] Add blkmirror block driver

2012-02-24 Thread Federico Simoncelli
- Original Message -
> From: "Eric Blake" 
> To: "Federico Simoncelli" 
> Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com, 
> arm...@redhat.com, lcapitul...@redhat.com,
> pbonz...@redhat.com
> Sent: Friday, February 24, 2012 6:02:36 PM
> Subject: Re: [Qemu-devel] [PATCH 1/2 v2] Add blkmirror block driver
> > +++ b/docs/blkmirror.txt
> > @@ -0,0 +1,16 @@
> > +Block mirror driver
> > +---
> > +
> > +This driver will mirror writes to two distinct images.
> > +It's used internally by live block copy.
> > +
> > +Format
> > +--
> > +
> > +blkmirror:/image1.img:/image2.img
> > +
> > +'\' (backslash) can be used to escape colon processing
> > +as a separator, in the first image filename.
> > +Backslashes themselves also can be escaped as '\\'.
> 
> Is the escaping of : and \ only necessary for image1.img, or for both
> image1 and image2?  I need to know if the parser is consistent for
> both
> arguments, but this wording makes it sound like it is only for the
> first
> argument.

Yes it is only for the first argument.

-- 
Federico



[Qemu-devel] [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate commands

2012-02-24 Thread Federico Simoncelli
Signed-off-by: Federico Simoncelli 
---
 blockdev.c   |  107 --
 hmp-commands.hx  |   38 +++
 hmp.c|   24 
 hmp.h|2 +
 qapi-schema.json |   54 +++
 5 files changed, 213 insertions(+), 12 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 2c132a3..f51bd6f 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -646,9 +646,8 @@ void do_commit(Monitor *mon, const QDict *qdict)
 }
 }
 
-void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
-bool has_format, const char *format,
-Error **errp)
+static void change_blockdev_image(const char *device, const char 
*new_image_file,
+  const char *format, bool create, Error 
**errp)
 {
 BlockDriverState *bs;
 BlockDriver *drv, *old_drv, *proto_drv;
@@ -671,7 +670,7 @@ void qmp_blockdev_snapshot_sync(const char *device, const 
char *snapshot_file,
 old_drv = bs->drv;
 flags = bs->open_flags;
 
-if (!has_format) {
+if (!format) {
 format = "qcow2";
 }
 
@@ -681,24 +680,26 @@ void qmp_blockdev_snapshot_sync(const char *device, const 
char *snapshot_file,
 return;
 }
 
-proto_drv = bdrv_find_protocol(snapshot_file);
+proto_drv = bdrv_find_protocol(new_image_file);
 if (!proto_drv) {
 error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
 return;
 }
 
-ret = bdrv_img_create(snapshot_file, format, bs->filename,
-  bs->drv->format_name, NULL, -1, flags);
-if (ret) {
-error_set(errp, QERR_UNDEFINED_ERROR);
-return;
+if (create) {
+ret = bdrv_img_create(new_image_file, format, bs->filename,
+  bs->drv->format_name, NULL, -1, flags);
+if (ret) {
+error_set(errp, QERR_UNDEFINED_ERROR);
+return;
+}
 }
 
 bdrv_drain_all();
 bdrv_flush(bs);
 
 bdrv_close(bs);
-ret = bdrv_open(bs, snapshot_file, flags, drv);
+ret = bdrv_open(bs, new_image_file, flags, drv);
 /*
  * If reopening the image file we just created fails, fall back
  * and try to re-open the original image. If that fails too, we
@@ -709,11 +710,93 @@ void qmp_blockdev_snapshot_sync(const char *device, const 
char *snapshot_file,
 if (ret != 0) {
 error_set(errp, QERR_OPEN_FILE_FAILED, old_filename);
 } else {
-error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file);
+error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
 }
 }
 }
 
+void qmp_drive_reopen(const char *device, const char *new_image_file,
+ bool has_format, const char *format, Error **errp)
+{
+change_blockdev_image(device, new_image_file,
+  has_format ? format : NULL, false, errp);
+}
+
+void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
+bool has_format, const char *format,
+Error **errp)
+{
+change_blockdev_image(device, snapshot_file,
+  has_format ? format : NULL, true, errp);
+}
+
+static void qmp_blockdev_mirror(const char *device, const char *destination,
+const char *new_image_file, Error **errp)
+{
+BlockDriver *drv;
+int i, j, escape;
+char new_filename[2048], *filename;
+
+drv = bdrv_find_format("blkmirror");
+if (!drv) {
+error_set(errp, QERR_INVALID_BLOCK_FORMAT, "blkmirror");
+return;
+}
+
+escape = 0;
+for (i = 0, j = 0; j < strlen(new_image_file); j++) {
+ loop:
+if (!(i < sizeof(new_filename) - 2)) {
+error_set(errp, QERR_INVALID_PARAMETER_VALUE,
+  "new-image-file", "shorter filename");
+return;
+}
+
+if (new_image_file[j] == ':' || new_image_file[j] == '\\') {
+if (!escape) {
+new_filename[i++] = '\\', escape = 1;
+goto loop;
+} else {
+escape = 0;
+}
+}
+
+new_filename[i++] = new_image_file[j];
+}
+
+filename = g_strdup_printf("blkmirror:%s:%s", new_filename, destination);
+change_blockdev_image(device, filename, "blkmirror", false, errp);
+g_free(filename);
+}
+
+void qmp_blockdev_migrate(const char *device, bool incremental,
+  const char *destination, bool has_new_image_file,
+  const char *new_image_file, Error **errp)
+{
+BlockDriverState *bs;
+
+bs = bdrv_find(device);
+if (!bs) {
+error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+return;
+

[Qemu-devel] [PATCH 1/2 v2] Add blkmirror block driver

2012-02-24 Thread Federico Simoncelli
From: Marcelo Tosatti 

Mirrored writes are used by live block copy.

Signed-off-by: Marcelo Tosatti 
Signed-off-by: Federico Simoncelli 
---
 Makefile.objs  |2 +-
 block/blkmirror.c  |  247 
 docs/blkmirror.txt |   16 
 3 files changed, 264 insertions(+), 1 deletions(-)
 create mode 100644 block/blkmirror.c
 create mode 100644 docs/blkmirror.txt

diff --git a/Makefile.objs b/Makefile.objs
index 67ee3df..6020308 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o 
dmg.o bochs.o vpc.o vv
 block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o
 block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-nested-y += qed-check.o
-block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
+block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o 
blkmirror.o
 block-nested-y += stream.o
 block-nested-$(CONFIG_WIN32) += raw-win32.o
 block-nested-$(CONFIG_POSIX) += raw-posix.o
diff --git a/block/blkmirror.c b/block/blkmirror.c
new file mode 100644
index 000..49e3381
--- /dev/null
+++ b/block/blkmirror.c
@@ -0,0 +1,247 @@
+/*
+ * Block driver for mirrored writes.
+ *
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include 
+#include "block_int.h"
+
+typedef struct {
+BlockDriverState *bs[2];
+} BdrvMirrorState;
+
+typedef struct DupAIOCB DupAIOCB;
+
+typedef struct SingleAIOCB {
+BlockDriverAIOCB *aiocb;
+int finished;
+DupAIOCB *parent;
+} SingleAIOCB;
+
+struct DupAIOCB {
+BlockDriverAIOCB common;
+int count;
+
+BlockDriverCompletionFunc *cb;
+SingleAIOCB aios[2];
+int ret;
+};
+
+/* Valid blkmirror filenames look like
+ * blkmirror:path/to/image1:path/to/image2 */
+static int blkmirror_open(BlockDriverState *bs, const char *filename, int 
flags)
+{
+BdrvMirrorState *m = bs->opaque;
+int ret, escape, i, n;
+char *filename2;
+
+/* Parse the blkmirror: prefix */
+if (strncmp(filename, "blkmirror:", strlen("blkmirror:"))) {
+return -EINVAL;
+}
+filename += strlen("blkmirror:");
+
+/* Parse the raw image filename */
+filename2 = g_malloc(strlen(filename)+1);
+escape = 0;
+for (i = n = 0; i < strlen(filename); i++) {
+if (!escape && filename[i] == ':') {
+break;
+}
+if (!escape && filename[i] == '\\') {
+escape = 1;
+} else {
+escape = 0;
+}
+
+if (!escape) {
+filename2[n++] = filename[i];
+}
+}
+filename2[n] = '\0';
+
+m->bs[0] = bdrv_new("");
+if (m->bs[0] == NULL) {
+g_free(filename2);
+return -ENOMEM;
+}
+ret = bdrv_open(m->bs[0], filename2, flags, NULL);
+g_free(filename2);
+if (ret < 0) {
+return ret;
+}
+filename += i + 1;
+
+m->bs[1] = bdrv_new("");
+if (m->bs[1] == NULL) {
+bdrv_delete(m->bs[0]);
+return -ENOMEM;
+}
+ret = bdrv_open(m->bs[1], filename, flags | BDRV_O_NO_BACKING, NULL);
+if (ret < 0) {
+bdrv_delete(m->bs[0]);
+return ret;
+}
+
+return 0;
+}
+
+static void blkmirror_close(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs->opaque;
+int i;
+
+for (i = 0; i < 2; i++) {
+bdrv_delete(m->bs[i]);
+m->bs[i] = NULL;
+}
+}
+
+static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs->opaque;
+int ret;
+
+ret = bdrv_co_flush(m->bs[0]);
+if (ret < 0) {
+return ret;
+}
+
+return bdrv_co_flush(m->bs[1]);
+}
+
+static int64_t blkmirror_getlength(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs->opaque;
+
+return bdrv_getlength(m->bs[0]);
+}
+
+static BlockDriverAIOCB *blkmirror_aio_readv(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+BdrvMirrorState *m = bs->opaque;
+return bdrv_aio_readv(m->bs[0], sector_num, qiov, nb_sectors, cb, opaque);
+}
+
+static void dup_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+DupAIOCB *acb = container_of(blockacb, DupAIOCB, common);
+int i;
+
+for (i = 0 ; i < 2; i++) {
+if (!acb->aios[i].finished) {
+bdrv_aio_cancel(acb->aios[i].aiocb);
+}
+}
+qemu_aio_release(ac

Re: [Qemu-devel] [PATCH 2/2] Add the blockdev-reopen and blockdev-migrate commands

2012-02-24 Thread Federico Simoncelli
- Original Message -
> From: "Kevin Wolf" 
> To: "Federico Simoncelli" 
> Cc: qemu-devel@nongnu.org, "Marcelo Tosatti" , 
> lcapitul...@redhat.com, "Paolo Bonzini"
> , "Markus Armbruster" 
> Sent: Friday, February 24, 2012 1:03:08 PM
> Subject: Re: [PATCH 2/2] Add the blockdev-reopen and blockdev-migrate commands
> 
> Am 24.02.2012 12:37, schrieb Federico Simoncelli:
> > Signed-off-by: Federico Simoncelli 
> > ---
> >  block/blkmirror.c |2 +-
> >  blockdev.c|  109
> >  +++--
> >  hmp-commands.hx   |   36 +
> >  hmp.c |   30 ++
> >  hmp.h |2 +
> >  qapi-schema.json  |   63 ++
> >  6 files changed, 229 insertions(+), 13 deletions(-)
> > 
> > diff --git a/block/blkmirror.c b/block/blkmirror.c
> > index 39927c8..49e3381 100644
> > --- a/block/blkmirror.c
> > +++ b/block/blkmirror.c
> > @@ -81,7 +81,7 @@ static int blkmirror_open(BlockDriverState *bs,
> > const char *filename, int flags)
> >  bdrv_delete(m->bs[0]);
> >  return -ENOMEM;
> >  }
> > -ret = bdrv_open(m->bs[1], filename, flags, NULL);
> > +ret = bdrv_open(m->bs[1], filename, flags | BDRV_O_NO_BACKING,
> > NULL);
> >  if (ret < 0) {
> >  bdrv_delete(m->bs[0]);
> >  return ret;
> 
> Was this hunk meant to be in patch 1?

Not necessarily, I thought a lot about it. I didn't want to modify Marcelo's
patch too much. This flag is actually mandatory only to make blockdev-migrate
work with a destination that doesn't have a backing file yet, so I thought it
was more appropriate to squash it here.

-- 
Federico



[Qemu-devel] [PATCH 2/2] Add the blockdev-reopen and blockdev-migrate commands

2012-02-24 Thread Federico Simoncelli
Signed-off-by: Federico Simoncelli 
---
 block/blkmirror.c |2 +-
 blockdev.c|  109 +++--
 hmp-commands.hx   |   36 +
 hmp.c |   30 ++
 hmp.h |2 +
 qapi-schema.json  |   63 ++
 6 files changed, 229 insertions(+), 13 deletions(-)

diff --git a/block/blkmirror.c b/block/blkmirror.c
index 39927c8..49e3381 100644
--- a/block/blkmirror.c
+++ b/block/blkmirror.c
@@ -81,7 +81,7 @@ static int blkmirror_open(BlockDriverState *bs, const char 
*filename, int flags)
 bdrv_delete(m->bs[0]);
 return -ENOMEM;
 }
-ret = bdrv_open(m->bs[1], filename, flags, NULL);
+ret = bdrv_open(m->bs[1], filename, flags | BDRV_O_NO_BACKING, NULL);
 if (ret < 0) {
 bdrv_delete(m->bs[0]);
 return ret;
diff --git a/blockdev.c b/blockdev.c
index 2c132a3..1df2542 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -646,9 +646,8 @@ void do_commit(Monitor *mon, const QDict *qdict)
 }
 }
 
-void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
-bool has_format, const char *format,
-Error **errp)
+static void change_blockdev_image(const char *device, const char 
*new_image_file,
+  const char *format, bool create, Error 
**errp)
 {
 BlockDriverState *bs;
 BlockDriver *drv, *old_drv, *proto_drv;
@@ -671,7 +670,7 @@ void qmp_blockdev_snapshot_sync(const char *device, const 
char *snapshot_file,
 old_drv = bs->drv;
 flags = bs->open_flags;
 
-if (!has_format) {
+if (!format) {
 format = "qcow2";
 }
 
@@ -681,24 +680,26 @@ void qmp_blockdev_snapshot_sync(const char *device, const 
char *snapshot_file,
 return;
 }
 
-proto_drv = bdrv_find_protocol(snapshot_file);
+proto_drv = bdrv_find_protocol(new_image_file);
 if (!proto_drv) {
 error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
 return;
 }
 
-ret = bdrv_img_create(snapshot_file, format, bs->filename,
-  bs->drv->format_name, NULL, -1, flags);
-if (ret) {
-error_set(errp, QERR_UNDEFINED_ERROR);
-return;
+if (create) {
+ret = bdrv_img_create(new_image_file, format, bs->filename,
+  bs->drv->format_name, NULL, -1, flags);
+if (ret) {
+error_set(errp, QERR_UNDEFINED_ERROR);
+return;
+}
 }
 
 bdrv_drain_all();
 bdrv_flush(bs);
 
 bdrv_close(bs);
-ret = bdrv_open(bs, snapshot_file, flags, drv);
+ret = bdrv_open(bs, new_image_file, flags, drv);
 /*
  * If reopening the image file we just created fails, fall back
  * and try to re-open the original image. If that fails too, we
@@ -709,11 +710,95 @@ void qmp_blockdev_snapshot_sync(const char *device, const 
char *snapshot_file,
 if (ret != 0) {
 error_set(errp, QERR_OPEN_FILE_FAILED, old_filename);
 } else {
-error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file);
+error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
 }
 }
 }
 
+void qmp_blockdev_reopen(const char *device, const char *new_image_file,
+ bool has_format, const char *format, Error **errp)
+{
+change_blockdev_image(device, new_image_file,
+  has_format ? format : NULL, false, errp);
+}
+
+void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
+bool has_format, const char *format,
+Error **errp)
+{
+change_blockdev_image(device, snapshot_file,
+  has_format ? format : NULL, true, errp);
+}
+
+void qmp_blockdev_migrate(const char *device, BlockMigrateOp mode,
+  const char *destination, bool has_new_image_file,
+  const char *new_image_file, Error **errp)
+{
+BlockDriverState *bs;
+BlockDriver *drv;
+int i, j, escape;
+char filename[2048];
+
+bs = bdrv_find(device);
+if (!bs) {
+error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+return;
+}
+if (bdrv_in_use(bs)) {
+error_set(errp, QERR_DEVICE_IN_USE, device);
+return;
+}
+
+if (mode == BLOCK_MIGRATE_OP_MIRROR) {
+drv = bdrv_find_format("blkmirror");
+if (!drv) {
+error_set(errp, QERR_INVALID_BLOCK_FORMAT, "blkmirror");
+return;
+}
+
+if (!has_new_image_file) {
+new_image_file = bs->filename;
+}
+
+pstrcpy(filename, sizeof(filename), "blkmirror:");
+i = strlen("blkmirror:");
+
+escape = 0;
+for (j = 0; j < strlen(new_image_file); j++) {
+ loop:
+i

[Qemu-devel] [PATCH 1/2] Add blkmirror block driver

2012-02-24 Thread Federico Simoncelli
From: Marcelo Tosatti 

Mirrored writes are used by live block copy.

Signed-off-by: Marcelo Tosatti 
Signed-off-by: Federico Simoncelli 
---
 Makefile.objs  |2 +-
 block/blkmirror.c  |  247 
 docs/blkmirror.txt |   16 
 3 files changed, 264 insertions(+), 1 deletions(-)
 create mode 100644 block/blkmirror.c
 create mode 100644 docs/blkmirror.txt

diff --git a/Makefile.objs b/Makefile.objs
index 67ee3df..6020308 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o 
dmg.o bochs.o vpc.o vv
 block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o
 block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-nested-y += qed-check.o
-block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
+block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o 
blkmirror.o
 block-nested-y += stream.o
 block-nested-$(CONFIG_WIN32) += raw-win32.o
 block-nested-$(CONFIG_POSIX) += raw-posix.o
diff --git a/block/blkmirror.c b/block/blkmirror.c
new file mode 100644
index 000..39927c8
--- /dev/null
+++ b/block/blkmirror.c
@@ -0,0 +1,247 @@
+/*
+ * Block driver for mirrored writes.
+ *
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include 
+#include "block_int.h"
+
+typedef struct {
+BlockDriverState *bs[2];
+} BdrvMirrorState;
+
+typedef struct DupAIOCB DupAIOCB;
+
+typedef struct SingleAIOCB {
+BlockDriverAIOCB *aiocb;
+int finished;
+DupAIOCB *parent;
+} SingleAIOCB;
+
+struct DupAIOCB {
+BlockDriverAIOCB common;
+int count;
+
+BlockDriverCompletionFunc *cb;
+SingleAIOCB aios[2];
+int ret;
+};
+
+/* Valid blkmirror filenames look like
+ * blkmirror:path/to/image1:path/to/image2 */
+static int blkmirror_open(BlockDriverState *bs, const char *filename, int 
flags)
+{
+BdrvMirrorState *m = bs->opaque;
+int ret, escape, i, n;
+char *filename2;
+
+/* Parse the blkmirror: prefix */
+if (strncmp(filename, "blkmirror:", strlen("blkmirror:"))) {
+return -EINVAL;
+}
+filename += strlen("blkmirror:");
+
+/* Parse the raw image filename */
+filename2 = g_malloc(strlen(filename)+1);
+escape = 0;
+for (i = n = 0; i < strlen(filename); i++) {
+if (!escape && filename[i] == ':') {
+break;
+}
+if (!escape && filename[i] == '\\') {
+escape = 1;
+} else {
+escape = 0;
+}
+
+if (!escape) {
+filename2[n++] = filename[i];
+}
+}
+filename2[n] = '\0';
+
+m->bs[0] = bdrv_new("");
+if (m->bs[0] == NULL) {
+g_free(filename2);
+return -ENOMEM;
+}
+ret = bdrv_open(m->bs[0], filename2, flags, NULL);
+g_free(filename2);
+if (ret < 0) {
+return ret;
+}
+filename += i + 1;
+
+m->bs[1] = bdrv_new("");
+if (m->bs[1] == NULL) {
+bdrv_delete(m->bs[0]);
+return -ENOMEM;
+}
+ret = bdrv_open(m->bs[1], filename, flags, NULL);
+if (ret < 0) {
+bdrv_delete(m->bs[0]);
+return ret;
+}
+
+return 0;
+}
+
+static void blkmirror_close(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs->opaque;
+int i;
+
+for (i = 0; i < 2; i++) {
+bdrv_delete(m->bs[i]);
+m->bs[i] = NULL;
+}
+}
+
+static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs->opaque;
+int ret;
+
+ret = bdrv_co_flush(m->bs[0]);
+if (ret < 0) {
+return ret;
+}
+
+return bdrv_co_flush(m->bs[1]);
+}
+
+static int64_t blkmirror_getlength(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs->opaque;
+
+return bdrv_getlength(m->bs[0]);
+}
+
+static BlockDriverAIOCB *blkmirror_aio_readv(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+BdrvMirrorState *m = bs->opaque;
+return bdrv_aio_readv(m->bs[0], sector_num, qiov, nb_sectors, cb, opaque);
+}
+
+static void dup_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+DupAIOCB *acb = container_of(blockacb, DupAIOCB, common);
+int i;
+
+for (i = 0 ; i < 2; i++) {
+if (!acb->aios[i].finished) {
+bdrv_aio_cancel(acb->aios[i].aiocb);
+}
+}
+qemu_aio_release(acb);
+}
+
+static AIOPool d

Re: [Qemu-devel] Live Block Migration using Mirroring

2012-02-23 Thread Federico Simoncelli
- Original Message -
> From: "Stefan Hajnoczi" 
> To: "Federico Simoncelli" 
> Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com
> Sent: Thursday, February 23, 2012 5:35:23 PM
> Subject: Re: [Qemu-devel] Live Block Migration using Mirroring
>
> On Wed, Feb 22, 2012 at 5:13 PM, Federico Simoncelli
>  wrote:
> >  recently I've been working on live block migration combining the
> >  live
> > snapshots and the blkmirror patch sent by Marcelo Tosatti few
> > months ago.
> >
> > The design is summarized at this url as "Mirrored-Snapshot":
> >
> > http://www.ovirt.org/wiki/Features/Design/StorageLiveMigration
>
> After mirrored-snapshot completes we're left with the base and the
> snapshot.  Is the idea to implement live snapshot merge next?  Or do
> you have something else planned to avoid growing the backing file
> chain each time mirrored-snapshot is used?

The general idea is that I don't expect the need to migrate to a new
storage to be very frequent. Being able to live merge the new snapshot
would be great.

--
Federico



Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver

2012-02-23 Thread Federico Simoncelli
- Original Message -
> From: "Stefan Hajnoczi" 
> To: "Federico Simoncelli" 
> Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com
> Sent: Thursday, February 23, 2012 5:28:23 PM
> Subject: Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver
>
> On Thu, Feb 23, 2012 at 4:20 PM, Federico Simoncelli
>  wrote:
> > - Original Message -
> >> From: "Stefan Hajnoczi" 
> >> To: "Federico Simoncelli" 
> >> Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com
> >> Sent: Thursday, February 23, 2012 5:18:41 PM
> >> Subject: Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver
> >>
> >> On Thu, Feb 23, 2012 at 4:14 PM, Stefan Hajnoczi
> >> 
> >> wrote:
> >> > By the way, this code does not build against qemu.git/master.
> >> >  It
> >> > uses
> >> > interfaces which have been dropped.
> >>
> >> Ah, I see you changed that in Patch 2/3.  Please don't do that,
> >> it's
> >> hard to review and breaks git-bisect.
> >
> > Squashing is easy (anyone could do it at the commit phase),
> > reviewing
> > is hard (if you hide your changes in someone else's code).
>
> I don't care if you or Marcelo wrote it, I want to see a coherent
> piece of code.
>
> The way to do it is to merge the g_malloc() and BlockDriver interface
> changes into this path.  Then you have not snuck any significant
> changes into Marcelo's code.

Well for example I'd have snuck the qemu_vmalloc/qemu_vfree mistake.

> Keep the BDRV_O_NO_BACKING separate.

Ok, thanks.

--
Federico



Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver

2012-02-23 Thread Federico Simoncelli
- Original Message -
> From: "Stefan Hajnoczi" 
> To: "Federico Simoncelli" 
> Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com
> Sent: Thursday, February 23, 2012 5:18:41 PM
> Subject: Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver
>
> On Thu, Feb 23, 2012 at 4:14 PM, Stefan Hajnoczi 
> wrote:
> > By the way, this code does not build against qemu.git/master.  It
> > uses
> > interfaces which have been dropped.
>
> Ah, I see you changed that in Patch 2/3.  Please don't do that, it's
> hard to review and breaks git-bisect.

Squashing is easy (anyone could do it at the commit phase), reviewing
is hard (if you hide your changes in someone else's code).

--
Federico



Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver

2012-02-23 Thread Federico Simoncelli
- Original Message -
> From: "Stefan Hajnoczi" 
> To: "Federico Simoncelli" 
> Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com
> Sent: Thursday, February 23, 2012 5:14:09 PM
> Subject: Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver
> 
> On Wed, Feb 22, 2012 at 5:13 PM, Federico Simoncelli
>  wrote:
> > From: Marcelo Tosatti 
> >
> > Mirrored writes are used by live block copy.
> 
> I think the right approach is to create a single blkmirror driver
> that
> also includes blkverify functionality.  The code is basically the
> same
> except blkverify also compares reads - just use a flag to
> enable/disable that behavior.
> 
> Feel free to rename the blkverify driver to blkmirror if you wish.
> 
> By the way, this code does not build against qemu.git/master.  It
> uses
> interfaces which have been dropped.

Yes you also need:

[PATCH 2/3] Update the blkmirror block driver

Which was sent separately to facilitate the review for people who
already reviewed this.

-- 
Federico



Re: [Qemu-devel] Live Block Migration using Mirroring

2012-02-23 Thread Federico Simoncelli
- Original Message -
> From: "Stefan Hajnoczi" 
> To: "Federico Simoncelli" 
> Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com
> Sent: Thursday, February 23, 2012 4:47:38 PM
> Subject: Re: [Qemu-devel] Live Block Migration using Mirroring
>
> On Wed, Feb 22, 2012 at 5:13 PM, Federico Simoncelli
>  wrote:
> > Step 3 - Mirrored Live Snapshot
> > ===
> > A mirrored live snapshot is issued using src/hd0snap1 and
> > dst/hd0snap1 as
> > image files. (Where "<-" stands for "has backing file")
> >
> > [src/hd0base] <- [src/hd0snap1] <= VM1(read-write)
> >     ...      <- [dst/hd0snap1] <= VM1(write-only)
> >
> > $ qemu-img create -f qcow2 \
> >           -b /tmp/src/hd0base.qcow2 /tmp/src/hd0snap1.qcow2 20G
> > Formatting '/tmp/src/hd0snap1.qcow2', fmt=qcow2 size=21474836480
> > backing_file='/tmp/src/hd0base.qcow2' encryption=off
> > cluster_size=65536
> >
> > $ qemu-img create -f qcow2 \
> >           -b /tmp/dst/hd0base.qcow2 /tmp/dst/hd0snap1.qcow2 20G
> > Formatting '/tmp/dst/hd0snap1.qcow2', fmt=qcow2 size=21474836480
> > backing_file='/tmp/src/hd0base.qcow2' encryption=off
> > cluster_size=65536
>
> At this stage /tmp/dst/hd0base.qcow2 does not exist yet.  The
> qemu-img
> output you pasted shows /tmp/src/hd0base.qcow2 was actually used.
> Typo?

No that's part of the flag used in [PATCH 2/3] (Update the blkmirror
block driver): BDRV_O_NO_BACKING

It's also documented in the design:

http://www.ovirt.org/wiki/File:StorageLiveMigration2.png

> > (qemu) snapshot_blkdev -n ide0-hd0 \
> >         blkmirror:/tmp/src/hd0snap1.qcow2:/tmp/dst/hd0snap1.qcow2
> >         blkmirror
> >
> > Step 4 - Backing File Copy
> > ==
> > An external manager copies src/hd0base to the destination
> > dst/hd0base.
> >
> > [src/hd0base] <- [src/hd0snap1] <= VM1(read-write)
> > [dst/hd0base] <- [dst/hd0snap1] <= VM1(write-only)
> >
> > $ cp -a /tmp/src/hd0base.qcow2 /tmp/dst/hd0base.qcow2
>
> Are we missing a fixup step that changes backing_file in
> dst/hd0snap1.qcow2 to point at dst/hd0base.qcow2?

See above.

--
Federico



Re: [Qemu-devel] [PATCH 3/3] Add nocreate option to snapshot_blkdev

2012-02-23 Thread Federico Simoncelli
- Original Message -
> From: "Paolo Bonzini" 
> To: "Federico Simoncelli" 
> Cc: kw...@redhat.com, mtosa...@redhat.com, qemu-devel@nongnu.org
> Sent: Thursday, February 23, 2012 10:48:59 AM
> Subject: Re: [PATCH 3/3] Add nocreate option to snapshot_blkdev
> 
> On 02/23/2012 10:39 AM, Federico Simoncelli wrote:
> > I agree on the fact that using snapshot_blkdev is a misnomer but at
> > the same time I like very much how blkmirror is modular and it can
> > be used as a regular image filename.
> 
> True, on the other hand if we add this we will have to keep it
> forever.
> 
> I'm worried that blkmirror does not satisfy _all_ mirroring needs,
> for
> example you cannot use block_stream to copy from the source to the
> destination, so perhaps in the future we want to change it to
> something
> else.

Are you talking about a mirroring where you block_stream the missing
clusters in the destination from the source?
I believe that it could be done without losing the blkmirror
modularity probably extending the BlockDriver structure with some
additional concepts.

> So while I appreciate the easier debugging, I'm afraid that we'll
> regret
> adding a command-line-visible feature.
> 
> > I'm worried that making it explicit with a "--mirror" option would
> > take away its flexibility.
> > What about adding block_reopen and extending the blkmirror string:
> > 
> >  (qemu) block_reopen ide-hd0 \
> >  blkmirror:qcow2:/tmp/src/hd0snap1.qcow2:/tmp/dst/hd0snap1.qcow2
> > 
> > so that the format is passed as first argument (and if it's empty
> > we
> > would use the auto-detection).
> 
> No, the format of the source and destination should be independent.
>  It
> would be nice to have something like this:
> 
> blkmirror:src=...,dst=...,srcformat=...,dstformat=...
> 
> but commas are a pain because they need escaping.

I like that, one limitation we need to keep in mind is that it should
fit into the hard-coded filename limit of 1024 characters that (sadly) we
have in multiple places. Another thing is that at this stage the mirroring
is more an original/copy concept rather than source/destination.
What about:

blkmirror:...,copy=...,fmt=... (both images uses the same fmt)
blkmirror:...,copy=...,fmt=...,copyfmt=...

Or, eventually if you feel like that source/destination is more appropriate
for the future, then:

blkmirror:...,dst=...,fmt=...,dstfmt=...

-- 
Federico



Re: [Qemu-devel] [PATCH 2/3] Update the blkmirror block driver

2012-02-23 Thread Federico Simoncelli
- Original Message -
> From: "Paolo Bonzini" 
> To: "Federico Simoncelli" 
> Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com
> Sent: Thursday, February 23, 2012 8:18:28 AM
> Subject: Re: [PATCH 2/3] Update the blkmirror block driver
> 
> On 02/22/2012 06:13 PM, Federico Simoncelli wrote:
> > @@ -46,7 +46,7 @@ static int blkmirror_open(BlockDriverState *bs,
> > const char *filename, int flags)
> >  filename += strlen("blkmirror:");
> >  
> >  /* Parse the raw image filename */
> > -filename2 = qemu_malloc(strlen(filename)+1);
> > +filename2 = qemu_vmalloc(strlen(filename)+1);
> >  escape = 0;
> >  for (i = n = 0; i < strlen(filename); i++) {
> >  if (!escape && filename[i] == ':') {
> > @@ -66,11 +66,11 @@ static int blkmirror_open(BlockDriverState *bs,
> > const char *filename, int flags)
> >  
> >  m->bs[0] = bdrv_new("");
> >  if (m->bs[0] == NULL) {
> > -free(filename2);
> > +qemu_vfree(filename2);
> >  return -ENOMEM;
> >  }
> >  ret = bdrv_open(m->bs[0], filename2, flags, NULL);
> > -free(filename2);
> > +qemu_vfree(filename2);
> >  if (ret < 0) {
> >  return ret;
> >  }
> 
> These should be g_malloc and g_free.

Thanks!

> Also, please squash this patch in part 1.

Yes, I sent it as a separate patch to make it easier to review my
changes (if someone already reviewed Marcelo's patch).
Any comment on the BDRV_O_NO_BACKING flag I added? That's the real
trick I'm pulling here. It basically allows to open the second image
only for writing and it doesn't complain if the backing file is not
there yet (it will be copied during step 4).

-- 
Federico



Re: [Qemu-devel] [PATCH 3/3] Add nocreate option to snapshot_blkdev

2012-02-23 Thread Federico Simoncelli
- Original Message -
> From: "Paolo Bonzini" 
> Cc: "Federico Simoncelli" , kw...@redhat.com, 
> mtosa...@redhat.com, qemu-devel@nongnu.org
> Sent: Thursday, February 23, 2012 8:38:55 AM
> Subject: Re: [PATCH 3/3] Add nocreate option to snapshot_blkdev
> 
> On 02/23/2012 08:19 AM, Paolo Bonzini wrote:
> >> > Signed-off-by: Federico Simoncelli 
> > What is the usecase, and how can this be tested?
> 
> Oops, you explained it in part 0.
> 
> > Step 5 - Final Switch to Destination
> > 
> > VM1 is now able to switch to the destination for both read and
> > write
> > operations.
> > 
> > [src/hd0base] <- [src/hd0snap1] <= VM1(read-write)
> > 
> > (qemu) snapshot_blkdev -n ide0-hd0 /tmp/dst/hd0snap1.qcow2
> 
> It seems to me that reusing the snapshot_blkdev command is a bit of a
> misnomer.  It also forces you to use format autodetection for the
> destination in the blkmirror phase.
> 
> I see two possibilities:
> 
> 1) you can make a new command block_reopen; this fixes only the
> naming
> problem.
> 
> 2) you can hide the new format behind a new command to be used like
> this:
> 
>block_migrate --mirror ide-hd0 /tmp/dst/hd0snap1.qcow2
>...
>block_migrate --switch ide-hd0
> 
> This leave the possibility to specify the format in the future:
> 
>block_migrate --mirror ide-hd0 /tmp/dst/hd0snap1.qcow2 qcow2
> 
> And we could have another sub-command
> 
>block_mirror --stream ide-hd0 /tmp/dst/hd0.raw
> 
> to migrate block devices without shared storage and without an
> intermediate snapshot.

I agree on the fact that using snapshot_blkdev is a misnomer but at
the same time I like very much how blkmirror is modular and it can
be used as a regular image filename.
I'm worried that making it explicit with a "--mirror" option would
take away its flexibility.
What about adding block_reopen and extending the blkmirror string:

 (qemu) block_reopen ide-hd0 \
 blkmirror:qcow2:/tmp/src/hd0snap1.qcow2:/tmp/dst/hd0snap1.qcow2

so that the format is passed as first argument (and if it's empty we
would use the auto-detection).

-- 
Federico



[Qemu-devel] [PATCH 1/3] Add blkmirror block driver

2012-02-22 Thread Federico Simoncelli
From: Marcelo Tosatti 

Mirrored writes are used by live block copy.

Signed-off-by: Marcelo Tosatti 
---
 Makefile.objs  |2 +-
 block/blkmirror.c  |  282 
 docs/blkmirror.txt |   15 +++
 3 files changed, 298 insertions(+), 1 deletions(-)
 create mode 100644 block/blkmirror.c
 create mode 100644 docs/blkmirror.txt

diff --git a/Makefile.objs b/Makefile.objs
index 391e524..cd65e1b 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o 
dmg.o bochs.o vpc.o vv
 block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o
 block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-nested-y += qed-check.o
-block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
+block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o 
blkmirror.o
 block-nested-y += stream.o
 block-nested-$(CONFIG_WIN32) += raw-win32.o
 block-nested-$(CONFIG_POSIX) += raw-posix.o
diff --git a/block/blkmirror.c b/block/blkmirror.c
new file mode 100644
index 000..1c02710
--- /dev/null
+++ b/block/blkmirror.c
@@ -0,0 +1,282 @@
+/*
+ * Block driver for mirrored writes.
+ *
+ * Copyright (C) 2011 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include 
+#include "block_int.h"
+
+typedef struct {
+BlockDriverState *bs[2];
+} BdrvMirrorState;
+
+typedef struct DupAIOCB DupAIOCB;
+
+typedef struct SingleAIOCB {
+BlockDriverAIOCB *aiocb;
+int finished;
+DupAIOCB *parent;
+} SingleAIOCB;
+
+struct DupAIOCB {
+BlockDriverAIOCB common;
+int count;
+
+BlockDriverCompletionFunc *cb;
+SingleAIOCB aios[2];
+int ret;
+};
+
+/* Valid blkmirror filenames look like
+ * blkmirror:path/to/image1:path/to/image2 */
+static int blkmirror_open(BlockDriverState *bs, const char *filename, int 
flags)
+{
+BdrvMirrorState *m = bs->opaque;
+int ret, escape, i, n;
+char *filename2;
+
+/* Parse the blkmirror: prefix */
+if (strncmp(filename, "blkmirror:", strlen("blkmirror:"))) {
+return -EINVAL;
+}
+filename += strlen("blkmirror:");
+
+/* Parse the raw image filename */
+filename2 = qemu_malloc(strlen(filename)+1);
+escape = 0;
+for (i = n = 0; i < strlen(filename); i++) {
+if (!escape && filename[i] == ':') {
+break;
+}
+if (!escape && filename[i] == '\\') {
+escape = 1;
+} else {
+escape = 0;
+}
+
+if (!escape) {
+filename2[n++] = filename[i];
+}
+}
+filename2[n] = '\0';
+
+m->bs[0] = bdrv_new("");
+if (m->bs[0] == NULL) {
+free(filename2);
+return -ENOMEM;
+}
+ret = bdrv_open(m->bs[0], filename2, flags, NULL);
+free(filename2);
+if (ret < 0) {
+return ret;
+}
+filename += i + 1;
+
+m->bs[1] = bdrv_new("");
+if (m->bs[1] == NULL) {
+bdrv_delete(m->bs[0]);
+return -ENOMEM;
+}
+ret = bdrv_open(m->bs[1], filename, flags, NULL);
+if (ret < 0) {
+bdrv_delete(m->bs[0]);
+return ret;
+}
+
+return 0;
+}
+
+static void blkmirror_close(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs->opaque;
+int i;
+
+for (i = 0; i < 2; i++) {
+bdrv_delete(m->bs[i]);
+m->bs[i] = NULL;
+}
+}
+
+static int blkmirror_flush(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs->opaque;
+int ret;
+
+ret = bdrv_flush(m->bs[0]);
+if (ret < 0) {
+return ret;
+}
+
+return bdrv_flush(m->bs[1]);
+}
+
+static int64_t blkmirror_getlength(BlockDriverState *bs)
+{
+BdrvMirrorState *m = bs->opaque;
+
+return bdrv_getlength(m->bs[0]);
+}
+
+static BlockDriverAIOCB *blkmirror_aio_readv(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+BdrvMirrorState *m = bs->opaque;
+return bdrv_aio_readv(m->bs[0], sector_num, qiov, nb_sectors, cb, opaque);
+}
+
+static void dup_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+DupAIOCB *acb = container_of(blockacb, DupAIOCB, common);
+int i;
+
+for (i = 0 ; i < 2; i++) {
+if (!acb->aios[i].finished) {
+bdrv_aio_cancel(acb->aios[i].aiocb);
+}
+}
+qemu_aio_release(acb);
+}
+
+static AIOPool dup_aio_pool = {
+.aiocb_size = sizeof(DupAIOCB),
+.cancel = dup_aio_cancel,
+};
+
+static void blkmirror_aio_cb(void *opaque, int ret)
+{
+SingleAIOCB *scb = opaque;
+DupAIOCB *dcb = scb->parent;
+
+scb->finishe

[Qemu-devel] [PATCH 3/3] Add nocreate option to snapshot_blkdev

2012-02-22 Thread Federico Simoncelli
Signed-off-by: Federico Simoncelli 
---
 blockdev.c   |   14 --
 hmp-commands.hx  |   16 ++--
 hmp.c|4 +++-
 qapi-schema.json |8 +++-
 qmp-commands.hx  |2 +-
 5 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 7a6613a..2c3e10a 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -647,7 +647,7 @@ void do_commit(Monitor *mon, const QDict *qdict)
 
 void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file,
 bool has_format, const char *format,
-Error **errp)
+bool has_nocreate, bool nocreate, Error **errp)
 {
 BlockDriverState *bs;
 BlockDriver *drv, *old_drv, *proto_drv;
@@ -686,11 +686,13 @@ void qmp_blockdev_snapshot_sync(const char *device, const 
char *snapshot_file,
 return;
 }
 
-ret = bdrv_img_create(snapshot_file, format, bs->filename,
-  bs->drv->format_name, NULL, -1, flags);
-if (ret) {
-error_set(errp, QERR_UNDEFINED_ERROR);
-return;
+if (!(has_nocreate && nocreate)) {
+ret = bdrv_img_create(snapshot_file, format, bs->filename,
+  bs->drv->format_name, NULL, -1, flags);
+if (ret) {
+error_set(errp, QERR_UNDEFINED_ERROR);
+return;
+}
 }
 
 bdrv_drain_all();
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 573b823..ff49412 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -868,14 +868,18 @@ ETEXI
 
 {
 .name   = "snapshot_blkdev",
-.args_type  = "device:B,snapshot-file:s?,format:s?",
-.params = "device [new-image-file] [format]",
+.args_type  = "nocreate:-n,device:B,snapshot-file:s?,format:s?",
+.params = "[-n] device [new-image-file] [format]",
 .help   = "initiates a live snapshot\n\t\t\t"
   "of device. If a new image file is specified, 
the\n\t\t\t"
-  "new image file will become the new root image.\n\t\t\t"
-  "If format is specified, the snapshot file will\n\t\t\t"
-  "be created in that format. Otherwise the\n\t\t\t"
-  "snapshot will be internal! (currently unsupported)",
+  "new image file will be created (unless -n is\n\t\t\t"
+  "specified) and will become the new root image.\n\t\t\t"
+  "The -n (nocreate) option is potentially 
dangerous\n\t\t\t"
+  "if the image wasn't prepared in the correct way\n\t\t\t"
+  "by an external manager. If format is specified,\n\t\t\t"
+  "the snapshot file will be created in that 
format.\n\t\t\t"
+  "Otherwise the snapshot will be internal!\n\t\t\t"
+  "(currently unsupported)",
 .mhandler.cmd = hmp_snapshot_blkdev,
 },
 
diff --git a/hmp.c b/hmp.c
index 8ff8c94..44868c7 100644
--- a/hmp.c
+++ b/hmp.c
@@ -684,6 +684,7 @@ void hmp_block_resize(Monitor *mon, const QDict *qdict)
 
 void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
 {
+int nocreate = qdict_get_try_bool(qdict, "nocreate", 0);
 const char *device = qdict_get_str(qdict, "device");
 const char *filename = qdict_get_try_str(qdict, "snapshot-file");
 const char *format = qdict_get_try_str(qdict, "format");
@@ -697,7 +698,8 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
 return;
 }
 
-qmp_blockdev_snapshot_sync(device, filename, !!format, format, &errp);
+qmp_blockdev_snapshot_sync(device, filename,
+   !!format, format, true, nocreate, &errp);
 hmp_handle_error(mon, &errp);
 }
 
diff --git a/qapi-schema.json b/qapi-schema.json
index d02ee86..ac652c2 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1116,9 +1116,14 @@
 # @snapshot-file: the target of the new image. If the file exists, or if it
 # is a device, the snapshot will be created in the existing
 # file/device. If does not exist, a new file will be created.
+# This behavior can be overridden using the nocreate option.
 #
 # @format: #optional the format of the snapshot image, default is 'qcow2'.
 #
+# @nocreate: #optional use the target image avoiding the creation. This is
+#a potentially dangerous option if the image wasn't prepared
+#in the correct way by an external manager.
+#
 # Returns: nothing on success
 #  If @device is not a valid block device, DeviceNotFound

[Qemu-devel] [PATCH 2/3] Update the blkmirror block driver

2012-02-22 Thread Federico Simoncelli
Signed-off-by: Federico Simoncelli 
---
 block/blkmirror.c |   26 +-
 1 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/block/blkmirror.c b/block/blkmirror.c
index 1c02710..1cfd2fb 100644
--- a/block/blkmirror.c
+++ b/block/blkmirror.c
@@ -46,7 +46,7 @@ static int blkmirror_open(BlockDriverState *bs, const char 
*filename, int flags)
 filename += strlen("blkmirror:");
 
 /* Parse the raw image filename */
-filename2 = qemu_malloc(strlen(filename)+1);
+filename2 = qemu_vmalloc(strlen(filename)+1);
 escape = 0;
 for (i = n = 0; i < strlen(filename); i++) {
 if (!escape && filename[i] == ':') {
@@ -66,11 +66,11 @@ static int blkmirror_open(BlockDriverState *bs, const char 
*filename, int flags)
 
 m->bs[0] = bdrv_new("");
 if (m->bs[0] == NULL) {
-free(filename2);
+qemu_vfree(filename2);
 return -ENOMEM;
 }
 ret = bdrv_open(m->bs[0], filename2, flags, NULL);
-free(filename2);
+qemu_vfree(filename2);
 if (ret < 0) {
 return ret;
 }
@@ -81,7 +81,7 @@ static int blkmirror_open(BlockDriverState *bs, const char 
*filename, int flags)
 bdrv_delete(m->bs[0]);
 return -ENOMEM;
 }
-ret = bdrv_open(m->bs[1], filename, flags, NULL);
+ret = bdrv_open(m->bs[1], filename, flags | BDRV_O_NO_BACKING, NULL);
 if (ret < 0) {
 bdrv_delete(m->bs[0]);
 return ret;
@@ -101,17 +101,17 @@ static void blkmirror_close(BlockDriverState *bs)
 }
 }
 
-static int blkmirror_flush(BlockDriverState *bs)
+static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs)
 {
 BdrvMirrorState *m = bs->opaque;
 int ret;
 
-ret = bdrv_flush(m->bs[0]);
+ret = bdrv_co_flush(m->bs[0]);
 if (ret < 0) {
 return ret;
 }
 
-return bdrv_flush(m->bs[1]);
+return bdrv_co_flush(m->bs[1]);
 }
 
 static int64_t blkmirror_getlength(BlockDriverState *bs)
@@ -242,18 +242,18 @@ static BlockDriverAIOCB 
*blkmirror_aio_flush(BlockDriverState *bs,
 return &dcb->common;
 }
 
-static int blkmirror_discard(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors)
+static coroutine_fn int blkmirror_co_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
 {
 BdrvMirrorState *m = bs->opaque;
 int ret;
 
-ret = bdrv_discard(m->bs[0], sector_num, nb_sectors);
+ret = bdrv_co_discard(m->bs[0], sector_num, nb_sectors);
 if (ret < 0) {
 return ret;
 }
 
-return bdrv_discard(m->bs[1], sector_num, nb_sectors);
+return bdrv_co_discard(m->bs[1], sector_num, nb_sectors);
 }
 
 
@@ -266,8 +266,8 @@ static BlockDriver bdrv_blkmirror = {
 
 .bdrv_file_open = blkmirror_open,
 .bdrv_close = blkmirror_close,
-.bdrv_flush = blkmirror_flush,
-.bdrv_discard   = blkmirror_discard,
+.bdrv_co_flush_to_disk = blkmirror_co_flush,
+.bdrv_co_discard= blkmirror_co_discard,
 
 .bdrv_aio_readv = blkmirror_aio_readv,
 .bdrv_aio_writev= blkmirror_aio_writev,
-- 
1.7.1




[Qemu-devel] Live Block Migration using Mirroring

2012-02-22 Thread Federico Simoncelli
Hi,
 recently I've been working on live block migration combining the live
snapshots and the blkmirror patch sent by Marcelo Tosatti few months ago.

The design is summarized at this url as "Mirrored-Snapshot": 

http://www.ovirt.org/wiki/Features/Design/StorageLiveMigration

The design assumes that the qemu process can reach both the source and
destination storages and no real VM migration between hosts is involved.
The principal problem that it tries to solve is moving a VM to a new
reachable storage (more space, faster) without temporarily disrupting its
services.

The following set of patches are implementing the required changes in
QEMU.

Here it is a quick example of the use case (for consistency with the
design at the url above I will use the same step numbers):

Preparation
===

$ mkdir /tmp/{src/dst}
$ qemu-img create -f qcow2 /tmp/src/hd0base.qcow2 20G
Formatting '/tmp/src/hd0base.qcow2', fmt=qcow2 size=21474836480
encryption=off cluster_size=65536

Step 1 - Initital Scenario
==
VM1 is running on the src/hd0base. (Where "<=" stands for "uses")

[src/hd0base] <= VM1(read-write)

$ qemu-system-x86_64 -hda /tmp/src/hd0base.qcow2 -monitor stdio
QEMU 1.0.50 monitor - type 'help' for more information
(qemu)

Step 3 - Mirrored Live Snapshot
===
A mirrored live snapshot is issued using src/hd0snap1 and dst/hd0snap1 as
image files. (Where "<-" stands for "has backing file")

[src/hd0base] <- [src/hd0snap1] <= VM1(read-write)
 ...  <- [dst/hd0snap1] <= VM1(write-only)

$ qemu-img create -f qcow2 \
   -b /tmp/src/hd0base.qcow2 /tmp/src/hd0snap1.qcow2 20G
Formatting '/tmp/src/hd0snap1.qcow2', fmt=qcow2 size=21474836480
backing_file='/tmp/src/hd0base.qcow2' encryption=off cluster_size=65536

$ qemu-img create -f qcow2 \
   -b /tmp/dst/hd0base.qcow2 /tmp/dst/hd0snap1.qcow2 20G
Formatting '/tmp/dst/hd0snap1.qcow2', fmt=qcow2 size=21474836480
backing_file='/tmp/src/hd0base.qcow2' encryption=off cluster_size=65536

(qemu) snapshot_blkdev -n ide0-hd0 \
 blkmirror:/tmp/src/hd0snap1.qcow2:/tmp/dst/hd0snap1.qcow2 blkmirror

Step 4 - Backing File Copy
==
An external manager copies src/hd0base to the destination dst/hd0base.

[src/hd0base] <- [src/hd0snap1] <= VM1(read-write)
[dst/hd0base] <- [dst/hd0snap1] <= VM1(write-only)

$ cp -a /tmp/src/hd0base.qcow2 /tmp/dst/hd0base.qcow2

Step 5 - Final Switch to Destination

VM1 is now able to switch to the destination for both read and write
operations.

[src/hd0base] <- [src/hd0snap1] <= VM1(read-write)

(qemu) snapshot_blkdev -n ide0-hd0 /tmp/dst/hd0snap1.qcow2

-- 
Federico




Re: [Qemu-devel] [vdsm] oVirt Live Snapshots

2012-02-02 Thread Federico Simoncelli
- Original Message -
> From: "Shu Ming" 
> To: "Federico Simoncelli" 
> Cc: qemu-devel@nongnu.org, libvir-l...@redhat.com, "VDSM Project Development" 
> ,
> "Dave Allan" , "Eric Blake" 
> Sent: Thursday, February 2, 2012 1:59:01 PM
> Subject: Re: [vdsm] oVirt Live Snapshots
>
> Can someone explain what is "DB" in this wiki page?

It is the ovirt-engine database, where the VMs/images information
and status is stored.
That part of the wiki should be improved.

> See,
>
>
> Live snapshots operation extend regular snapshots as follow:
>
> • Create a locked snapshot in DB


--
Federico



[Qemu-devel] oVirt Live Snapshots

2012-01-30 Thread Federico Simoncelli
Hi,
  oVirt, and more specifically VDSM, is currently implementing the live
snapshot feature using the API/commands provided by libvirt and qemu.
It would be great if you could review the design and the current open
issues at:

  http://ovirt.org/wiki/Live_Snapshots

Thank you,
-- 
Federico



Re: [Qemu-devel] [PATCH] qemu: new option for snapshot_blkdev to avoid image creation

2011-10-11 Thread Federico Simoncelli
- Original Message -
> From: "Dor Laor" 
> To: "Federico Simoncelli" 
> Cc: qemu-devel@nongnu.org, aba...@redhat.com, "Kevin Wolf" 
> Sent: Friday, October 7, 2011 12:45:06 AM
> Subject: Re: [Qemu-devel] [PATCH] qemu: new option for snapshot_blkdev to 
> avoid image creation
> 
> On 10/03/2011 06:09 PM, Federico Simoncelli wrote:
> > Add the new option [-n] for snapshot_blkdev to avoid the image
> > creation.
> > The file provided as [new-image-file] is considered as already
> > initialized
> > and will be used after passing a check for the backing file.
> 
> Seems ok to me as a way to go around fdget and still have selinux
> gain.
> Worth to get Kevin's view too.

Hi Kevin, any news on this?

> Federico, would you like to ack or extend the design:
> http://wiki.qemu.org/Features/Snapshots

Dor, should I do it now or only after my patch is acked?

-- 
Federico



Re: [Qemu-devel] New option for snapshot_blkdev to avoid image creation

2011-10-04 Thread Federico Simoncelli
- Original Message -
> From: "Stefan Hajnoczi" 
> To: "Federico Simoncelli" 
> Cc: qemu-devel@nongnu.org, aba...@redhat.com, dl...@redhat.com
> Sent: Tuesday, October 4, 2011 9:33:48 AM
> Subject: Re: [Qemu-devel] New option for snapshot_blkdev to avoid image 
> creation
> 
> On Mon, Oct 03, 2011 at 04:09:01PM +, Federico Simoncelli wrote:
> > In some situations might be useful to let qemu use an image that
> > was
> > prepared for a live snapshot.
> > The advantage is that creating the snapshot file outside of the
> > qemu
> > process we can use the whole range of options provided by the
> > format
> > (eg for qcow2: encryption, cluster_size and preallocation).
> > It is also possible to pre-set a relative path to the backing file
> > (now it is created by default as absolute path).
> > In the long run it can also avoid the danger of reimplementing
> > qemu-img
> > inside qemu (if we wanted to expose such options when a snapshot is
> > requested).
> 
> When the image file is created based on the backing file size:
> 
> $ qemu-img create -f qcow2 -o backing_file=master.img vm001.qcow2
> 
> It turns out that bdrv_img_create() opens the backing file with
> read/write permissions.  This is generally a bad idea but especially
> dangerous when the VM currently has the image file open already since
> image formats are not designed for multiple initiators (clustering).

Hi Stefan, are you sure that bdrv_img_create opens the backing file
with read/write permissions?
After a quick check I can't see that happening:

$ ./qemu-img create -f qcow2 /tmp/master.img 1G
Formatting '/tmp/master.img', fmt=qcow2 size=1073741824 encryption=off 
cluster_size=65536

$ strace -e trace=open ./qemu-img create -f qcow2 -o 
backing_file=/tmp/master.img /tmp/vm001.qcow2
[...]
open("/tmp/vm001.qcow2", O_RDONLY|O_NONBLOCK) = -1 ENOENT (No such file or 
directory)
open("/tmp/vm001.qcow2", O_RDONLY|O_NONBLOCK) = -1 ENOENT (No such file or 
directory)
open("/tmp/master.img", O_RDONLY|O_NONBLOCK) = 3
open("/tmp/master.img", O_RDONLY|O_NONBLOCK) = 3
open("/tmp/master.img", O_RDONLY|O_DSYNC|O_CLOEXEC) = 3
open("/tmp/master.img", O_RDONLY|O_NONBLOCK) = 3
open("/tmp/master.img", O_RDONLY|O_NONBLOCK) = 3
open("/tmp/master.img", O_RDONLY|O_CLOEXEC) = 3
Formatting '/tmp/vm001.qcow2', fmt=qcow2 size=1073741824 
backing_file='/tmp/master.img' encryption=off cluster_size=65536 
open("/tmp/vm001.qcow2", O_RDONLY|O_NONBLOCK) = -1 ENOENT (No such file or 
directory)
open("/tmp/vm001.qcow2", O_RDONLY|O_NONBLOCK) = -1 ENOENT (No such file or 
directory)
open("/tmp/vm001.qcow2", O_WRONLY|O_CREAT|O_TRUNC, 0644) = 6
open("/tmp/vm001.qcow2", O_RDONLY|O_NONBLOCK) = 6
open("/tmp/vm001.qcow2", O_RDONLY|O_NONBLOCK) = 6
open("/tmp/vm001.qcow2", O_RDWR|O_DSYNC|O_CLOEXEC) = 6
open("/tmp/vm001.qcow2", O_RDONLY|O_NONBLOCK) = 6
open("/tmp/vm001.qcow2", O_RDONLY|O_NONBLOCK) = 6
open("/tmp/vm001.qcow2", O_RDWR|O_CLOEXEC) = 6

-- 
Federico



[Qemu-devel] [PATCH] qemu: new option for snapshot_blkdev to avoid image creation

2011-10-03 Thread Federico Simoncelli
Add the new option [-n] for snapshot_blkdev to avoid the image creation.
The file provided as [new-image-file] is considered as already initialized
and will be used after passing a check for the backing file.

Signed-off-by: Federico Simoncelli 
---
 blockdev.c  |   54 --
 hmp-commands.hx |7 ---
 qmp-commands.hx |4 ++--
 3 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 0827bf7..bd46808 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -550,8 +550,53 @@ void do_commit(Monitor *mon, const QDict *qdict)
 }
 }
 
+static int check_snapshot_file(const char *filename, const char *oldfilename,
+   int flags, BlockDriver *drv)
+{
+BlockDriverState *bs;
+char bak_filename[1024], *abs_filename;
+int ret = 0;
+
+bs = bdrv_new("");
+if (!bs) {
+return -1;
+}
+
+ret = bdrv_open(bs, filename, flags, drv);
+if (ret) {
+qerror_report(QERR_OPEN_FILE_FAILED, filename);
+goto err0;
+}
+
+if (bs->backing_file) {
+path_combine(bak_filename, sizeof(bak_filename),
+ filename, bs->backing_file);
+
+abs_filename = realpath(bak_filename, NULL);
+if (!abs_filename) {
+ret = -1;
+goto err1;
+}
+
+if (strcmp(abs_filename, oldfilename)) {
+qerror_report(QERR_OPEN_FILE_FAILED, filename);
+ret = -1;
+}
+
+free(abs_filename);
+}
+
+err1:
+bdrv_close(bs);
+
+err0:
+bdrv_delete(bs);
+return ret;
+}
+
 int do_snapshot_blkdev(Monitor *mon, const QDict *qdict, QObject **ret_data)
 {
+const int nocreate = qdict_get_try_bool(qdict, "nocreate", 0);
 const char *device = qdict_get_str(qdict, "device");
 const char *filename = qdict_get_try_str(qdict, "snapshot-file");
 const char *format = qdict_get_try_str(qdict, "format");
@@ -597,8 +642,13 @@ int do_snapshot_blkdev(Monitor *mon, const QDict *qdict, 
QObject **ret_data)
 goto out;
 }
 
-ret = bdrv_img_create(filename, format, bs->filename,
-  bs->drv->format_name, NULL, -1, flags);
+if (nocreate) {
+ret = check_snapshot_file(filename, bs->filename, flags, drv);
+} else {
+ret = bdrv_img_create(filename, format, bs->filename,
+  bs->drv->format_name, NULL, -1, flags);
+}
+
 if (ret) {
 goto out;
 }
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 9e1cca8..eb9fcd4 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -840,11 +840,12 @@ ETEXI
 
 {
 .name   = "snapshot_blkdev",
-.args_type  = "device:B,snapshot-file:s?,format:s?",
-.params = "device [new-image-file] [format]",
+.args_type  = "nocreate:-n,device:B,snapshot-file:s?,format:s?",
+.params = "[-n] device [new-image-file] [format]",
 .help   = "initiates a live snapshot\n\t\t\t"
   "of device. If a new image file is specified, 
the\n\t\t\t"
-  "new image file will become the new root image.\n\t\t\t"
+  "new image file will be created (unless -n is\n\t\t\t"
+  "specified) and will become the new root image.\n\t\t\t"
   "If format is specified, the snapshot file will\n\t\t\t"
   "be created in that format. Otherwise the\n\t\t\t"
   "snapshot will be internal! (currently unsupported)",
diff --git a/qmp-commands.hx b/qmp-commands.hx
index d83bce5..7af36d8 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -695,8 +695,8 @@ EQMP
 
 {
 .name   = "blockdev-snapshot-sync",
-.args_type  = "device:B,snapshot-file:s?,format:s?",
-.params = "device [new-image-file] [format]",
+.args_type  = "nocreate:-n,device:B,snapshot-file:s?,format:s?",
+.params = "[-n] device [new-image-file] [format]",
 .user_print = monitor_user_noop,
 .mhandler.cmd_new = do_snapshot_blkdev,
 },
-- 
1.7.1




[Qemu-devel] New option for snapshot_blkdev to avoid image creation

2011-10-03 Thread Federico Simoncelli
In some situations might be useful to let qemu use an image that was
prepared for a live snapshot.
The advantage is that creating the snapshot file outside of the qemu
process we can use the whole range of options provided by the format
(eg for qcow2: encryption, cluster_size and preallocation).
It is also possible to pre-set a relative path to the backing file
(now it is created by default as absolute path).
In the long run it can also avoid the danger of reimplementing qemu-img
inside qemu (if we wanted to expose such options when a snapshot is
requested).

-- 
Federico.




[Qemu-devel] [PATCHv4] qemu-img: Add cache command line option

2011-06-20 Thread Federico Simoncelli
qemu-img currently writes disk images using writeback and filling
up the cache buffers which are then flushed by the kernel preventing
other processes from accessing the storage.
This is particularly bad in cluster environments where time-based
algorithms might be in place and accessing the storage within
certain timeouts is critical.
This patch adds the option to choose a cache method when writing
disk images.

Signed-off-by: Federico Simoncelli 
---
 qemu-img-cmds.hx |6 ++--
 qemu-img.c   |   80 +-
 2 files changed, 70 insertions(+), 16 deletions(-)

diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 3072d38..2b70618 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -22,13 +22,13 @@ STEXI
 ETEXI
 
 DEF("commit", img_commit,
-"commit [-f fmt] filename")
+"commit [-f fmt] [-t cache] filename")
 STEXI
 @item commit [-f @var{fmt}] @var{filename}
 ETEXI
 
 DEF("convert", img_convert,
-"convert [-c] [-p] [-f fmt] [-O output_fmt] [-o options] [-s 
snapshot_name] filename [filename2 [...]] output_filename")
+"convert [-c] [-p] [-f fmt] [-t cache] [-O output_fmt] [-o options] [-s 
snapshot_name] filename [filename2 [...]] output_filename")
 STEXI
 @item convert [-c] [-f @var{fmt}] [-O @var{output_fmt}] [-o @var{options}] [-s 
@var{snapshot_name}] @var{filename} [@var{filename2} [...]] 
@var{output_filename}
 ETEXI
@@ -46,7 +46,7 @@ STEXI
 ETEXI
 
 DEF("rebase", img_rebase,
-"rebase [-f fmt] [-p] [-u] -b backing_file [-F backing_fmt] filename")
+"rebase [-f fmt] [-t cache] [-p] [-u] -b backing_file [-F backing_fmt] 
filename")
 STEXI
 @item rebase [-f @var{fmt}] [-u] -b @var{backing_file} [-F @var{backing_fmt}] 
@var{filename}
 ETEXI
diff --git a/qemu-img.c b/qemu-img.c
index 4f162d1..f904e32 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -40,6 +40,7 @@ typedef struct img_cmd_t {
 
 /* Default to cache=writeback as data integrity is not important for qemu-tcg. 
*/
 #define BDRV_O_FLAGS BDRV_O_CACHE_WB
+#define BDRV_DEFAULT_CACHE "writeback"
 
 static void format_print(void *opaque, const char *name)
 {
@@ -64,6 +65,8 @@ static void help(void)
"Command parameters:\n"
"  'filename' is a disk image filename\n"
"  'fmt' is the disk image format. It is guessed automatically in 
most cases\n"
+   "  'cache' is the cache mode used to write the output disk image, 
the valid\n"
+   "options are: 'none', 'writeback' (default), 'writethrough' and 
'unsafe'\n"
"  'size' is the disk image size in bytes. Optional suffixes\n"
"'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' 
(gigabyte, 1024M)\n"
"and T (terabyte, 1024G) are supported. 'b' is ignored.\n"
@@ -180,6 +183,27 @@ static int read_password(char *buf, int buf_size)
 }
 #endif
 
+static int set_cache_flag(const char *mode, int *flags)
+{
+*flags &= ~BDRV_O_CACHE_MASK;
+
+if (!strcmp(mode, "none") || !strcmp(mode, "off")) {
+*flags |= BDRV_O_CACHE_WB;
+*flags |= BDRV_O_NOCACHE;
+} else if (!strcmp(mode, "writeback")) {
+*flags |= BDRV_O_CACHE_WB;
+} else if (!strcmp(mode, "unsafe")) {
+*flags |= BDRV_O_CACHE_WB;
+*flags |= BDRV_O_NO_FLUSH;
+} else if (!strcmp(mode, "writethrough")) {
+/* this is the default */
+} else {
+return -1;
+}
+
+return 0;
+}
+
 static int print_block_option_help(const char *filename, const char *fmt)
 {
 BlockDriver *drv, *proto_drv;
@@ -441,13 +465,14 @@ static int img_check(int argc, char **argv)
 
 static int img_commit(int argc, char **argv)
 {
-int c, ret;
-const char *filename, *fmt;
+int c, ret, flags;
+const char *filename, *fmt, *cache;
 BlockDriverState *bs;
 
 fmt = NULL;
+cache = BDRV_DEFAULT_CACHE;
 for(;;) {
-c = getopt(argc, argv, "f:h");
+c = getopt(argc, argv, "f:ht:");
 if (c == -1) {
 break;
 }
@@ -459,6 +484,9 @@ static int img_commit(int argc, char **argv)
 case 'f':
 fmt = optarg;
 break;
+case 't':
+cache = optarg;
+break;
 }
 }
 if (optind >= argc) {
@@ -466,7 +494,14 @@ static int img_commit(int argc, char **argv)
 }
 filename = argv[optind++];
 
-bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR);
+flags = BDRV_O_RDWR;
+ret = set_cache_flag(cache, &flags);
+if (ret < 0) {
+error_report("Invalid cache option: %s\n"

Re: [Qemu-devel] [PATCH] qemu-img: Add cache command line option

2011-06-16 Thread Federico Simoncelli
- Original Message -
> From: "Christoph Hellwig" 
> To: "Federico Simoncelli" 
> Cc: k...@vger.kernel.org, kw...@redhat.com, qemu-devel@nongnu.org, 
> a...@redhat.com
> Sent: Thursday, June 16, 2011 4:28:09 PM
> Subject: Re: [Qemu-devel] [PATCH] qemu-img: Add cache command line option
> On Wed, Jun 15, 2011 at 09:46:10AM -0400, Federico Simoncelli wrote:
> > qemu-img currently writes disk images using writeback and filling
> > up the cache buffers which are then flushed by the kernel preventing
> > other processes from accessing the storage.
> > This is particularly bad in cluster environments where time-based
> > algorithms might be in place and accessing the storage within
> > certain timeouts is critical.
> > This patch adds the option to choose a cache method when writing
> > disk images.
> 
> Allowing to chose the mode is of course fine, but what about also
> choosing a good default? writethrough doesn't really make any sense
> for qemu-img, given that we can trivially flush the cache at the end
> of the operations. I'd also say that using the buffer cache doesn't
> make sense either, as there is little point in caching these
> operations.

I totally agree with you, I didn't change the default to increase the
chances for my patch to be accepted.
If we want cache=none as default we can easily change:

-#define BDRV_DEFAULT_CACHE "writeback"
+#define BDRV_DEFAULT_CACHE "none"

-- 
Federico



[Qemu-devel] [PATCHv3] qemu-img: Add cache command line option

2011-06-16 Thread Federico Simoncelli
qemu-img currently writes disk images using writeback and filling
up the cache buffers which are then flushed by the kernel preventing
other processes from accessing the storage.
This is particularly bad in cluster environments where time-based
algorithms might be in place and accessing the storage within
certain timeouts is critical.
This patch adds the option to choose a cache method when writing
disk images.

Signed-off-by: Federico Simoncelli 
---
 qemu-img-cmds.hx |6 ++--
 qemu-img.c   |   80 +-
 2 files changed, 70 insertions(+), 16 deletions(-)

diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 3072d38..2b70618 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -22,13 +22,13 @@ STEXI
 ETEXI
 
 DEF("commit", img_commit,
-"commit [-f fmt] filename")
+"commit [-f fmt] [-t cache] filename")
 STEXI
 @item commit [-f @var{fmt}] @var{filename}
 ETEXI
 
 DEF("convert", img_convert,
-"convert [-c] [-p] [-f fmt] [-O output_fmt] [-o options] [-s 
snapshot_name] filename [filename2 [...]] output_filename")
+"convert [-c] [-p] [-f fmt] [-t cache] [-O output_fmt] [-o options] [-s 
snapshot_name] filename [filename2 [...]] output_filename")
 STEXI
 @item convert [-c] [-f @var{fmt}] [-O @var{output_fmt}] [-o @var{options}] [-s 
@var{snapshot_name}] @var{filename} [@var{filename2} [...]] 
@var{output_filename}
 ETEXI
@@ -46,7 +46,7 @@ STEXI
 ETEXI
 
 DEF("rebase", img_rebase,
-"rebase [-f fmt] [-p] [-u] -b backing_file [-F backing_fmt] filename")
+"rebase [-f fmt] [-t cache] [-p] [-u] -b backing_file [-F backing_fmt] 
filename")
 STEXI
 @item rebase [-f @var{fmt}] [-u] -b @var{backing_file} [-F @var{backing_fmt}] 
@var{filename}
 ETEXI
diff --git a/qemu-img.c b/qemu-img.c
index 4f162d1..c2a106e 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -40,6 +40,7 @@ typedef struct img_cmd_t {
 
 /* Default to cache=writeback as data integrity is not important for qemu-tcg. 
*/
 #define BDRV_O_FLAGS BDRV_O_CACHE_WB
+#define BDRV_DEFAULT_CACHE "writeback"
 
 static void format_print(void *opaque, const char *name)
 {
@@ -64,6 +65,8 @@ static void help(void)
"Command parameters:\n"
"  'filename' is a disk image filename\n"
"  'fmt' is the disk image format. It is guessed automatically in 
most cases\n"
+   "  'cache' is the cache mode used to write the output disk image, 
the valid\n"
+   "options are: 'none', 'writeback' (default), 'writethrough' and 
'unsafe'\n"
"  'size' is the disk image size in bytes. Optional suffixes\n"
"'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' 
(gigabyte, 1024M)\n"
"and T (terabyte, 1024G) are supported. 'b' is ignored.\n"
@@ -180,6 +183,27 @@ static int read_password(char *buf, int buf_size)
 }
 #endif
 
+static int set_cache_flag(const char *mode, int *flags)
+{
+*flags &= ~BDRV_O_CACHE_MASK;
+
+if (!strcmp(mode, "none") || !strcmp(mode, "off")) {
+*flags |= BDRV_O_CACHE_WB;
+*flags |= BDRV_O_NOCACHE;
+} else if (!strcmp(mode, "writeback")) {
+*flags |= BDRV_O_CACHE_WB;
+} else if (!strcmp(mode, "unsafe")) {
+*flags |= BDRV_O_CACHE_WB;
+*flags |= BDRV_O_NO_FLUSH;
+} else if (!strcmp(mode, "writethrough")) {
+/* this is the default */
+} else {
+return -1;
+}
+
+return 0;
+}
+
 static int print_block_option_help(const char *filename, const char *fmt)
 {
 BlockDriver *drv, *proto_drv;
@@ -441,13 +465,14 @@ static int img_check(int argc, char **argv)
 
 static int img_commit(int argc, char **argv)
 {
-int c, ret;
-const char *filename, *fmt;
+int c, ret, flags;
+const char *filename, *fmt, *cache;
 BlockDriverState *bs;
 
 fmt = NULL;
+cache = BDRV_DEFAULT_CACHE;
 for(;;) {
-c = getopt(argc, argv, "f:h");
+c = getopt(argc, argv, "f:ht:");
 if (c == -1) {
 break;
 }
@@ -459,6 +484,9 @@ static int img_commit(int argc, char **argv)
 case 'f':
 fmt = optarg;
 break;
+case 't':
+cache = optarg;
+break;
 }
 }
 if (optind >= argc) {
@@ -466,7 +494,14 @@ static int img_commit(int argc, char **argv)
 }
 filename = argv[optind++];
 
-bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR);
+flags = BDRV_O_RDWR;
+ret = set_cache_flag(cache, &flags);
+if (ret < 0) {
+error_report("Invalid cache option: %s\n"

[Qemu-devel] [PATCHv2] qemu-img: Add cache command line option

2011-06-15 Thread Federico Simoncelli
qemu-img currently writes disk images using writeback and filling
up the cache buffers which are then flushed by the kernel preventing
other processes from accessing the storage.
This is particularly bad in cluster environments where time-based
algorithms might be in place and accessing the storage within
certain timeouts is critical.
This patch adds the option to choose a cache method when writing
disk images.

Signed-off-by: Federico Simoncelli 
---
 qemu-img.c |   80 ++-
 1 files changed, 67 insertions(+), 13 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index 4f162d1..c2a106e 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -40,6 +40,7 @@ typedef struct img_cmd_t {
 
 /* Default to cache=writeback as data integrity is not important for qemu-tcg. 
*/
 #define BDRV_O_FLAGS BDRV_O_CACHE_WB
+#define BDRV_DEFAULT_CACHE "writeback"
 
 static void format_print(void *opaque, const char *name)
 {
@@ -64,6 +65,8 @@ static void help(void)
"Command parameters:\n"
"  'filename' is a disk image filename\n"
"  'fmt' is the disk image format. It is guessed automatically in 
most cases\n"
+   "  'cache' is the cache mode used to write the output disk image, 
the valid\n"
+   "options are: 'none', 'writeback' (default), 'writethrough' and 
'unsafe'\n"
"  'size' is the disk image size in bytes. Optional suffixes\n"
"'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' 
(gigabyte, 1024M)\n"
"and T (terabyte, 1024G) are supported. 'b' is ignored.\n"
@@ -180,6 +183,27 @@ static int read_password(char *buf, int buf_size)
 }
 #endif
 
+static int set_cache_flag(const char *mode, int *flags)
+{
+*flags &= ~BDRV_O_CACHE_MASK;
+
+if (!strcmp(mode, "none") || !strcmp(mode, "off")) {
+*flags |= BDRV_O_CACHE_WB;
+*flags |= BDRV_O_NOCACHE;
+} else if (!strcmp(mode, "writeback")) {
+*flags |= BDRV_O_CACHE_WB;
+} else if (!strcmp(mode, "unsafe")) {
+*flags |= BDRV_O_CACHE_WB;
+*flags |= BDRV_O_NO_FLUSH;
+} else if (!strcmp(mode, "writethrough")) {
+/* this is the default */
+} else {
+return -1;
+}
+
+return 0;
+}
+
 static int print_block_option_help(const char *filename, const char *fmt)
 {
 BlockDriver *drv, *proto_drv;
@@ -441,13 +465,14 @@ static int img_check(int argc, char **argv)
 
 static int img_commit(int argc, char **argv)
 {
-int c, ret;
-const char *filename, *fmt;
+int c, ret, flags;
+const char *filename, *fmt, *cache;
 BlockDriverState *bs;
 
 fmt = NULL;
+cache = BDRV_DEFAULT_CACHE;
 for(;;) {
-c = getopt(argc, argv, "f:h");
+c = getopt(argc, argv, "f:ht:");
 if (c == -1) {
 break;
 }
@@ -459,6 +484,9 @@ static int img_commit(int argc, char **argv)
 case 'f':
 fmt = optarg;
 break;
+case 't':
+cache = optarg;
+break;
 }
 }
 if (optind >= argc) {
@@ -466,7 +494,14 @@ static int img_commit(int argc, char **argv)
 }
 filename = argv[optind++];
 
-bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR);
+flags = BDRV_O_RDWR;
+ret = set_cache_flag(cache, &flags);
+if (ret < 0) {
+error_report("Invalid cache option: %s\n", cache);
+return -1;
+}
+
+bs = bdrv_new_open(filename, fmt, flags);
 if (!bs) {
 return 1;
 }
@@ -591,8 +626,8 @@ static int compare_sectors(const uint8_t *buf1, const 
uint8_t *buf2, int n,
 static int img_convert(int argc, char **argv)
 {
 int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size, cluster_sectors;
-int progress = 0;
-const char *fmt, *out_fmt, *out_baseimg, *out_filename;
+int progress = 0, flags;
+const char *fmt, *out_fmt, *cache, *out_baseimg, *out_filename;
 BlockDriver *drv, *proto_drv;
 BlockDriverState **bs = NULL, *out_bs = NULL;
 int64_t total_sectors, nb_sectors, sector_num, bs_offset;
@@ -608,10 +643,11 @@ static int img_convert(int argc, char **argv)
 
 fmt = NULL;
 out_fmt = "raw";
+cache = BDRV_DEFAULT_CACHE;
 out_baseimg = NULL;
 compress = 0;
 for(;;) {
-c = getopt(argc, argv, "f:O:B:s:hce6o:p");
+c = getopt(argc, argv, "f:O:B:s:hce6o:pt:");
 if (c == -1) {
 break;
 }
@@ -649,6 +685,9 @@ static int img_convert(int argc, char **argv)
 case 'p':
 progress = 1;
 break;
+case 't':
+

[Qemu-devel] [PATCH] qemu-img: Add cache command line option

2011-06-15 Thread Federico Simoncelli
qemu-img currently writes disk images using writeback and filling
up the cache buffers which are then flushed by the kernel preventing
other processes from accessing the storage.
This is particularly bad in cluster environments where time-based
algorithms might be in place and accessing the storage within
certain timeouts is critical.
This patch adds the option to choose a cache method when writing
disk images.

Signed-off-by: Federico Simoncelli 
---
 qemu-img.c |   81 ++-
 1 files changed, 68 insertions(+), 13 deletions(-)

diff --git a/qemu-img.c b/qemu-img.c
index 4f162d1..7609db5 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -40,6 +40,7 @@ typedef struct img_cmd_t {
 
 /* Default to cache=writeback as data integrity is not important for qemu-tcg. 
*/
 #define BDRV_O_FLAGS BDRV_O_CACHE_WB
+#define BDRV_DEFAULT_CACHE "writeback"
 
 static void format_print(void *opaque, const char *name)
 {
@@ -64,6 +65,8 @@ static void help(void)
"Command parameters:\n"
"  'filename' is a disk image filename\n"
"  'fmt' is the disk image format. It is guessed automatically in 
most cases\n"
+   "  'cache' is the cache mode used to write the output disk image, 
the valid\n"
+   "options are: 'none', 'writeback' (default), 'writethrough' and 
'unsafe'\n"
"  'size' is the disk image size in bytes. Optional suffixes\n"
"'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' 
(gigabyte, 1024M)\n"
"and T (terabyte, 1024G) are supported. 'b' is ignored.\n"
@@ -180,6 +183,27 @@ static int read_password(char *buf, int buf_size)
 }
 #endif
 
+static int set_cache_flag(const char *mode, int *flags)
+{
+*flags &= ~BDRV_O_CACHE_MASK;
+
+if (!strcmp(mode, "none") || !strcmp(mode, "off")) {
+*flags |= BDRV_O_CACHE_WB;
+*flags |= BDRV_O_NOCACHE;
+} else if (!strcmp(mode, "writeback")) {
+*flags |= BDRV_O_CACHE_WB;
+} else if (!strcmp(mode, "unsafe")) {
+*flags |= BDRV_O_CACHE_WB;
+*flags |= BDRV_O_NO_FLUSH;
+} else if (!strcmp(mode, "writethrough")) {
+/* this is the default */
+} else {
+return -1;
+}
+
+return 0;
+}
+
 static int print_block_option_help(const char *filename, const char *fmt)
 {
 BlockDriver *drv, *proto_drv;
@@ -441,13 +465,14 @@ static int img_check(int argc, char **argv)
 
 static int img_commit(int argc, char **argv)
 {
-int c, ret;
-const char *filename, *fmt;
+int c, ret, flags;
+const char *filename, *fmt, *cache;
 BlockDriverState *bs;
 
 fmt = NULL;
+cache = BDRV_DEFAULT_CACHE;
 for(;;) {
-c = getopt(argc, argv, "f:h");
+c = getopt(argc, argv, "f:ht:");
 if (c == -1) {
 break;
 }
@@ -459,6 +484,9 @@ static int img_commit(int argc, char **argv)
 case 'f':
 fmt = optarg;
 break;
+case 't':
+cache = optarg;
+break;
 }
 }
 if (optind >= argc) {
@@ -466,7 +494,14 @@ static int img_commit(int argc, char **argv)
 }
 filename = argv[optind++];
 
-bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR);
+flags = BDRV_O_RDWR;
+ret = set_cache_flag(cache, &flags);
+if (ret < 0) {
+error_report("Invalid cache option: %s\n", cache);
+return -1;
+}
+
+bs = bdrv_new_open(filename, fmt, flags);
 if (!bs) {
 return 1;
 }
@@ -591,8 +626,8 @@ static int compare_sectors(const uint8_t *buf1, const 
uint8_t *buf2, int n,
 static int img_convert(int argc, char **argv)
 {
 int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size, cluster_sectors;
-int progress = 0;
-const char *fmt, *out_fmt, *out_baseimg, *out_filename;
+int progress = 0, flags;
+const char *fmt, *out_fmt, *cache, *out_baseimg, *out_filename;
 BlockDriver *drv, *proto_drv;
 BlockDriverState **bs = NULL, *out_bs = NULL;
 int64_t total_sectors, nb_sectors, sector_num, bs_offset;
@@ -608,10 +643,11 @@ static int img_convert(int argc, char **argv)
 
 fmt = NULL;
 out_fmt = "raw";
+cache = BDRV_DEFAULT_CACHE;
 out_baseimg = NULL;
 compress = 0;
 for(;;) {
-c = getopt(argc, argv, "f:O:B:s:hce6o:p");
+c = getopt(argc, argv, "f:O:B:s:hce6o:pt:");
 if (c == -1) {
 break;
 }
@@ -649,6 +685,9 @@ static int img_convert(int argc, char **argv)
 case 'p':
 progress = 1;
 break;
+case 't':
+