[Qemu-devel] [PATCHv2] block: add the optional file entry to query-block
This patch adds the optional file entry to the query-block output. The value is a json-object representing the information about the underlying file or device (when present). Signed-off-by: Federico Simoncelli --- block/qapi.c |7 +++ qapi-schema.json |4 +++- qmp-commands.hx|8 tests/qemu-iotests/043.out | 15 +++ 4 files changed, 33 insertions(+), 1 deletions(-) diff --git a/block/qapi.c b/block/qapi.c index a4bc411..40db983 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -171,6 +171,13 @@ void bdrv_query_image_info(BlockDriverState *bs, return; } +if (bs->file) { +bdrv_query_image_info(bs->file, &info->file, &err); +if (info->file) { +info->has_file = true; +} +} + *p_info = info; } diff --git a/qapi-schema.json b/qapi-schema.json index 5c32528..0cb872c 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -238,6 +238,8 @@ # # @backing-image: #optional info of the backing image (since 1.6) # +# @file: #optional info of the underlying file (since 1.6) +# # Since: 1.3 # ## @@ -248,7 +250,7 @@ '*cluster-size': 'int', '*encrypted': 'bool', '*backing-filename': 'str', '*full-backing-filename': 'str', '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'], - '*backing-image': 'ImageInfo' } } + '*backing-image': 'ImageInfo', '*file': 'ImageInfo' } } ## # @ImageCheck: diff --git a/qmp-commands.hx b/qmp-commands.hx index 362f0e1..bb8a931 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -1791,6 +1791,8 @@ Each json-object contain the following: - "backing-image": the detail of the backing image, it is an optional json-object only present when a backing image present for this image + - "file": an optional json-object representing the information + about the underlying file or device (when present) - "io-status": I/O operation status, only present if the device supports it and the VM is configured to stop on errors. It's always reset @@ -1842,6 +1844,12 @@ Example: "format":"qcow2", "virtual-size":2048000 } + "file":{ + "filename":"disks/test.qcow2", + "format": "file", + "virtual-size": 262144, + "actual-size": 139264 + } } }, "type":"unknown" diff --git a/tests/qemu-iotests/043.out b/tests/qemu-iotests/043.out index ad23337..6aea37b 100644 --- a/tests/qemu-iotests/043.out +++ b/tests/qemu-iotests/043.out @@ -44,6 +44,11 @@ cluster_size: 65536 "filename": "TEST_DIR/t.IMGFMT", "cluster-size": 65536, "format": "IMGFMT", +"file": { +"virtual-size": 327680, +"filename": "TEST_DIR/t.IMGFMT", +"format": "file", +}, "backing-filename": "TEST_DIR/t.IMGFMT.2.base", "dirty-flag": false }, @@ -52,6 +57,11 @@ cluster_size: 65536 "filename": "TEST_DIR/t.IMGFMT.2.base", "cluster-size": 65536, "format": "IMGFMT", +"file": { +"virtual-size": 327680, +"filename": "TEST_DIR/t.IMGFMT.2.base", +"format": "file", +}, "backing-filename": "TEST_DIR/t.IMGFMT.1.base", "dirty-flag": false }, @@ -60,6 +70,11 @@ cluster_size: 65536 "filename": "TEST_DIR/t.IMGFMT.1.base", "cluster-size": 65536, "format": "IMGFMT", +"file": { +"virtual-size": 327680, +"filename": "TEST_DIR/t.IMGFMT.1.base", +"format": "file", +}, "dirty-flag": false } ] -- 1.7.1
[Qemu-devel] [PATCH] block: add the optional file entry to query-block
This patch adds the optional file entry to the query-block output. The value is a json-object representing the information about the underlying file or device (when present). Signed-off-by: Federico Simoncelli --- block/qapi.c |9 - qapi-schema.json |4 +++- qmp-commands.hx|8 tests/qemu-iotests/043.out | 15 +++ 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/block/qapi.c b/block/qapi.c index a4bc411..03cd222 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -119,7 +119,7 @@ void bdrv_query_image_info(BlockDriverState *bs, info->filename= g_strdup(bs->filename); info->format = g_strdup(bdrv_get_format_name(bs)); -info->virtual_size= total_sectors * 512; +info->virtual_size= bdrv_getlength(bs); info->actual_size = bdrv_get_allocated_file_size(bs); info->has_actual_size = info->actual_size >= 0; if (bdrv_is_encrypted(bs)) { @@ -171,6 +171,13 @@ void bdrv_query_image_info(BlockDriverState *bs, return; } +if (bs->file) { +bdrv_query_image_info(bs->file, &info->file, &err); +if (info->file) { +info->has_file = true; +} +} + *p_info = info; } diff --git a/qapi-schema.json b/qapi-schema.json index a30a728..524a4a0 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -238,6 +238,8 @@ # # @backing-image: #optional info of the backing image (since 1.6) # +# @file: #optional info of the underlying file (since 1.6) +# # Since: 1.3 # ## @@ -248,7 +250,7 @@ '*cluster-size': 'int', '*encrypted': 'bool', '*backing-filename': 'str', '*full-backing-filename': 'str', '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'], - '*backing-image': 'ImageInfo' } } + '*backing-image': 'ImageInfo', '*file': 'ImageInfo' } } ## # @ImageCheck: diff --git a/qmp-commands.hx b/qmp-commands.hx index 8cea5e5..c25ac77 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -1745,6 +1745,8 @@ Each json-object contain the following: - "backing-image": the detail of the backing image, it is an optional json-object only present when a backing image present for this image + - "file": an optional json-object representing the information + about the underlying file or device (when present) - "io-status": I/O operation status, only present if the device supports it and the VM is configured to stop on errors. It's always reset @@ -1796,6 +1798,12 @@ Example: "format":"qcow2", "virtual-size":2048000 } + "file":{ + "filename":"disks/test.qcow2", + "format": "file", + "virtual-size": 262144, + "actual-size": 139264 + } } }, "type":"unknown" diff --git a/tests/qemu-iotests/043.out b/tests/qemu-iotests/043.out index ad23337..6aea37b 100644 --- a/tests/qemu-iotests/043.out +++ b/tests/qemu-iotests/043.out @@ -44,6 +44,11 @@ cluster_size: 65536 "filename": "TEST_DIR/t.IMGFMT", "cluster-size": 65536, "format": "IMGFMT", +"file": { +"virtual-size": 327680, +"filename": "TEST_DIR/t.IMGFMT", +"format": "file", +}, "backing-filename": "TEST_DIR/t.IMGFMT.2.base", "dirty-flag": false }, @@ -52,6 +57,11 @@ cluster_size: 65536 "filename": "TEST_DIR/t.IMGFMT.2.base", "cluster-size": 65536, "format": "IMGFMT", +"file": { +"virtual-size": 327680, +"filename": "TEST_DIR/t.IMGFMT.2.base", +"format": "file", +}, "backing-filename": "TEST_DIR/t.IMGFMT.1.base", "dirty-flag": false }, @@ -60,6 +70,11 @@ cluster_size: 65536 "filename": "TEST_DIR/t.IMGFMT.1.base", "cluster-size": 65536, "format": "IMGFMT", +"file": { +"virtual-size": 327680, +"filename": "TEST_DIR/t.IMGFMT.1.base", +"format": "file", +}, "dirty-flag": false } ] -- 1.7.1
[Qemu-devel] [PATCH] block: add apparent-size to query-block
This patch adds the apparent-size entry to the query-block output. The value represents the apparent size in bytes of the image, e.g. file size (including the blocks not yet allocated) or block device size. Signed-off-by: Federico Simoncelli --- block/qapi.c |3 ++- qapi-schema.json |4 +++- qmp-commands.hx |1 + 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/block/qapi.c b/block/qapi.c index a4bc411..ccaba67 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -119,8 +119,9 @@ void bdrv_query_image_info(BlockDriverState *bs, info->filename= g_strdup(bs->filename); info->format = g_strdup(bdrv_get_format_name(bs)); -info->virtual_size= total_sectors * 512; +info->virtual_size= bdrv_getlength(bs); info->actual_size = bdrv_get_allocated_file_size(bs); +info->apparent_size = bdrv_getlength(bs->file); info->has_actual_size = info->actual_size >= 0; if (bdrv_is_encrypted(bs)) { info->encrypted = true; diff --git a/qapi-schema.json b/qapi-schema.json index a80ee40..c7403ed 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -222,6 +222,8 @@ # # @actual-size: #optional actual size on disk in bytes of the image # +# @apparent-size: apparent size in bytes of the image +# # @dirty-flag: #optional true if image is not cleanly closed # # @cluster-size: #optional size of a cluster in bytes @@ -244,7 +246,7 @@ { 'type': 'ImageInfo', 'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool', - '*actual-size': 'int', 'virtual-size': 'int', + '*actual-size': 'int', 'virtual-size': 'int', 'apparent-size': 'int', '*cluster-size': 'int', '*encrypted': 'bool', '*backing-filename': 'str', '*full-backing-filename': 'str', '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'], diff --git a/qmp-commands.hx b/qmp-commands.hx index 8cea5e5..3f3aded 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -1714,6 +1714,7 @@ Each json-object contain the following: - "actual-size": actual size on disk in bytes of the image, not present when image does not support thin provision (json-int, optional) + - "apparent-size": apparent size in bytes of the image - "cluster-size": size of a cluster in bytes, not present if image format does not support it (json-int, optional) - "encrypted": true if the image is encrypted, not present means -- 1.7.1
[Qemu-devel] [PATCHv4 1/2] qemu-img: find the image end offset during check
This patch adds the support for reporting the image end offset (in bytes). This is particularly useful after a conversion (or a rebase) where the destination is a block device in order to find the first unused byte at the end of the image. Signed-off-by: Federico Simoncelli --- block/qcow2-refcount.c | 10 -- include/block/block.h|1 + qemu-img.c |4 tests/qemu-iotests/026 |6 +++--- tests/qemu-iotests/036 |3 ++- tests/qemu-iotests/039 |2 +- tests/qemu-iotests/044.out |1 + tests/qemu-iotests/common.rc |5 +++-- 8 files changed, 23 insertions(+), 9 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 6a95aa6..45ad043 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1116,7 +1116,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) { BDRVQcowState *s = bs->opaque; -int64_t size, i; +int64_t size, i, highest_cluster; int nb_clusters, refcount1, refcount2; QCowSnapshot *sn; uint16_t *refcount_table; @@ -1187,7 +1187,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } /* compare ref counts */ -for(i = 0; i < nb_clusters; i++) { +for(i = 0, highest_cluster = 0; i < nb_clusters; i++) { refcount1 = get_refcount(bs, i); if (refcount1 < 0) { fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n", @@ -1197,6 +1197,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } refcount2 = refcount_table[i]; + +if (refcount1 > 0 || refcount2 > 0) { +highest_cluster = i; +} + if (refcount1 != refcount2) { /* Check if we're allowed to fix the mismatch */ @@ -1231,6 +1236,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } } +res->image_end_offset = (highest_cluster + 1) * s->cluster_size; ret = 0; fail: diff --git a/include/block/block.h b/include/block/block.h index ffd1936..85eb3d9 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -213,6 +213,7 @@ typedef struct BdrvCheckResult { int check_errors; int corruptions_fixed; int leaks_fixed; +int64_t image_end_offset; BlockFragInfo bfi; } BdrvCheckResult; diff --git a/qemu-img.c b/qemu-img.c index 85d3740..e80c1c5 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -475,6 +475,10 @@ static int img_check(int argc, char **argv) result.bfi.fragmented_clusters * 100.0 / result.bfi.allocated_clusters); } +if (result.image_end_offset > 0) { +printf("Image end offset: %" PRId64 "\n", result.image_end_offset); +} + bdrv_delete(bs); if (ret < 0 || result.check_errors) { diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026 index 1602ccd..107a3ff 100755 --- a/tests/qemu-iotests/026 +++ b/tests/qemu-iotests/026 @@ -102,7 +102,7 @@ if [ "$event" == "l2_load" ]; then $QEMU_IO -c "read $vmstate 0 128k " $BLKDBG_TEST_IMG | _filter_qemu_io fi -$QEMU_IMG check $TEST_IMG 2>&1 | grep -v "refcount=1 reference=0" +_check_test_img 2>&1 | grep -v "refcount=1 reference=0" done done @@ -147,7 +147,7 @@ echo echo "Event: $event; errno: $errno; imm: $imm; once: $once; write $vmstate" $QEMU_IO -c "write $vmstate 0 64M" $BLKDBG_TEST_IMG | _filter_qemu_io -$QEMU_IMG check $TEST_IMG 2>&1 | grep -v "refcount=1 reference=0" +_check_test_img 2>&1 | grep -v "refcount=1 reference=0" done done @@ -186,7 +186,7 @@ echo echo "Event: $event; errno: $errno; imm: $imm; once: $once" $QEMU_IO -c "write -b 0 64k" $BLKDBG_TEST_IMG | _filter_qemu_io -$QEMU_IMG check $TEST_IMG 2>&1 | grep -v "refcount=1 reference=0" +_check_test_img 2>&1 | grep -v "refcount=1 reference=0" done done diff --git a/tests/qemu-iotests/036 b/tests/qemu-iotests/036 index 329533e..4dbfc57 100755 --- a/tests/qemu-iotests/036 +++ b/tests/qemu-iotests/036 @@ -59,7 +59,8 @@ _make_test_img 64M echo echo === Repair image === echo -$QEMU_IMG check -r all $TEST_IMG +_check_test_img -r all + ./qcow2.py $TEST_IMG dump-header # success, all done diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039 index c5ae806..ae35175 100755 --- a/tests/qemu-iotests/039 +++ b/tests/qemu-iotests/039 @@ -86,7 +86,7 @@ $QEMU_IO -r -c "read -P 0x5a 0 512" $TEST_IMG | _filter_qemu_io echo echo "== Repairing the image file must succeed ==" -$QEMU_IMG check -r all $TEST_IMG +_check_test_img -r all # The dirty bit must not be set ./qcow2.py $TEST_IMG dump-header | g
[Qemu-devel] [PATCHv4 2/2] qemu-img: add json output option to the check command
This option --output=[human|json] makes qemu-img check output a human or JSON representation at the choice of the user. Signed-off-by: Federico Simoncelli --- qapi-schema.json | 46 +++ qemu-img-cmds.hx |4 +- qemu-img.c | 232 +++--- qemu-img.texi|5 +- 4 files changed, 220 insertions(+), 67 deletions(-) diff --git a/qapi-schema.json b/qapi-schema.json index 6d7252b..dc99c28 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -245,6 +245,52 @@ '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'] } } ## +# @ImageCheck: +# +# Information about a QEMU image file check +# +# @filename: name of the image file checked +# +# @format: format of the image file checked +# +# @check-errors: number of unexpected errors occurred during check +# +# @image-end-offset: #optional offset (in bytes) where the image ends, this +#field is present if the driver for the image format +#supports it +# +# @corruptions: #optional number of corruptions found during the check if any +# +# @leaks: #optional number of leaks found during the check if any +# +# @corruptions-fixed: #optional number of corruptions fixed during the check +# if any +# +# @leaks-fixed: #optional number of leaks fixed during the check if any +# +# @total-clusters: #optional total number of clusters, this field is present +# if the driver for the image format supports it +# +# @allocated-clusters: #optional total number of allocated clusters, this +# field is present if the driver for the image format +# supports it +# +# @fragmented-clusters: #optional total number of fragmented clusters, this +# field is present if the driver for the image format +# supports it +# +# Since: 1.4 +# +## + +{ 'type': 'ImageCheck', + 'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int', + '*image-end-offset': 'int', '*corruptions': 'int', '*leaks': 'int', + '*corruptions-fixed': 'int', '*leaks-fixed': 'int', + '*total-clusters': 'int', '*allocated-clusters': 'int', + '*fragmented-clusters': 'int' } } + +## # @StatusInfo: # # Information about VCPU run state diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index a181363..259fc14 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -10,9 +10,9 @@ STEXI ETEXI DEF("check", img_check, -"check [-f fmt] [-r [leaks | all]] filename") +"check [-f fmt] [--output=ofmt] [-r [leaks | all]] filename") STEXI -@item check [-f @var{fmt}] [-r [leaks | all]] @var{filename} +@item check [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] @var{filename} ETEXI DEF("create", img_create, diff --git a/qemu-img.c b/qemu-img.c index e80c1c5..34249fe 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -42,6 +42,16 @@ typedef struct img_cmd_t { int (*handler)(int argc, char **argv); } img_cmd_t; +enum { +OPTION_OUTPUT = 256, +OPTION_BACKING_CHAIN = 257, +}; + +typedef enum OutputFormat { +OFORMAT_JSON, +OFORMAT_HUMAN, +} OutputFormat; + /* Default to cache=writeback as data integrity is not important for qemu-tcg. */ #define BDRV_O_FLAGS BDRV_O_CACHE_WB #define BDRV_DEFAULT_CACHE "writeback" @@ -375,6 +385,96 @@ static int img_create(int argc, char **argv) return 0; } +static void dump_json_image_check(ImageCheck *check) +{ +Error *errp = NULL; +QString *str; +QmpOutputVisitor *ov = qmp_output_visitor_new(); +QObject *obj; +visit_type_ImageCheck(qmp_output_get_visitor(ov), + &check, NULL, &errp); +obj = qmp_output_get_qobject(ov); +str = qobject_to_json_pretty(obj); +assert(str != NULL); +printf("%s\n", qstring_get_str(str)); +qobject_decref(obj); +qmp_output_visitor_cleanup(ov); +QDECREF(str); +} + +static void dump_human_image_check(ImageCheck *check) +{ +if (!(check->corruptions || check->leaks || check->check_errors)) { +printf("No errors were found on the image.\n"); +} else { +if (check->corruptions) { +printf("\n%" PRId64 " errors were found on the image.\n" +"Data may be corrupted, or further writes to the image " +"may corrupt it.\n", +check->corruptions); +} + +if (check->leaks) { +printf("\n%" PRId64 " leaked clusters were found on the image.\n&qu
[Qemu-devel] [PATCHv3 2/2] qemu-img: add json output option to the check command
This option --output=[human|json] make qemu-img check output an human or JSON representation at the choice of the user. Signed-off-by: Federico Simoncelli --- qapi-schema.json | 46 +++ qemu-img-cmds.hx |4 +- qemu-img.c | 232 +++-- qemu-img.texi|5 +- 4 files changed, 221 insertions(+), 66 deletions(-) diff --git a/qapi-schema.json b/qapi-schema.json index 6d7252b..d576728 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -245,6 +245,52 @@ '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'] } } ## +# @ImageCheck: +# +# Information about a QEMU image file check +# +# @filename: name of the image file checked +# +# @format: format of the image file checked +# +# @check-errors: number of unexpected errors occurred during check +# +# @highest-offset: #optional highest offset (in bytes) in use by the image, +# this field is present if the driver for the image format +# supports it +# +# @corruptions: #optional number of corruptions found during the check if any +# +# @leaks: #optional number of leaks found during the check if any +# +# @corruptions-fixed: #optional number of corruptions fixed during the check +# if any +# +# @leaks-fixed: #optional number of leaks fixed during the check if any +# +# @total-clusters: #optional total number of clusters, this field is present +# if the driver for the image format supports it +# +# @allocated-clusters: #optional total number of allocated clusters, this +# field is present if the driver for the image format +# supports it +# +# @fragmented-clusters: #optional total number of fragmented clusters, this +# field is present if the driver for the image format +# supports it +# +# Since: 1.4 +# +## + +{ 'type': 'ImageCheck', + 'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int', + '*highest-offset': 'int', '*corruptions': 'int', '*leaks': 'int', + '*corruptions-fixed': 'int', '*leaks-fixed': 'int', + '*total-clusters': 'int', '*allocated-clusters': 'int', + '*fragmented-clusters': 'int' } } + +## # @StatusInfo: # # Information about VCPU run state diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index a181363..259fc14 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -10,9 +10,9 @@ STEXI ETEXI DEF("check", img_check, -"check [-f fmt] [-r [leaks | all]] filename") +"check [-f fmt] [--output=ofmt] [-r [leaks | all]] filename") STEXI -@item check [-f @var{fmt}] [-r [leaks | all]] @var{filename} +@item check [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] @var{filename} ETEXI DEF("create", img_create, diff --git a/qemu-img.c b/qemu-img.c index 7b977cd..bcb21d5 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -42,6 +42,16 @@ typedef struct img_cmd_t { int (*handler)(int argc, char **argv); } img_cmd_t; +enum { +OPTION_OUTPUT = 256, +OPTION_BACKING_CHAIN = 257, +}; + +typedef enum OutputFormat { +OFORMAT_JSON, +OFORMAT_HUMAN, +} OutputFormat; + /* Default to cache=writeback as data integrity is not important for qemu-tcg. */ #define BDRV_O_FLAGS BDRV_O_CACHE_WB #define BDRV_DEFAULT_CACHE "writeback" @@ -375,6 +385,96 @@ static int img_create(int argc, char **argv) return 0; } +static void dump_json_image_check(ImageCheck *check) +{ +Error *errp = NULL; +QString *str; +QmpOutputVisitor *ov = qmp_output_visitor_new(); +QObject *obj; +visit_type_ImageCheck(qmp_output_get_visitor(ov), + &check, NULL, &errp); +obj = qmp_output_get_qobject(ov); +str = qobject_to_json_pretty(obj); +assert(str != NULL); +printf("%s\n", qstring_get_str(str)); +qobject_decref(obj); +qmp_output_visitor_cleanup(ov); +QDECREF(str); +} + +static void dump_human_image_check(ImageCheck *check) +{ +if (!(check->corruptions || check->leaks || check->check_errors)) { +printf("No errors were found on the image.\n"); +} else { +if (check->corruptions) { +printf("\n%" PRId64 " errors were found on the image.\n" +"Data may be corrupted, or further writes to the image " +"may corrupt it.\n", +check->corruptions); +} + +if (check->leaks) { +printf("\n%" PRId64 " leaked clusters were found on the image.\n&qu
[Qemu-devel] [PATCHv3 1/2] qemu-img: find the highest offset in use during check
This patch adds the support for reporting the highest offset in use by an image. This is particularly useful after a conversion (or a rebase) where the destination is a block device in order to find the actual amount of space in use. Signed-off-by: Federico Simoncelli --- block/qcow2-refcount.c | 10 -- include/block/block.h|1 + qemu-img.c |4 tests/qemu-iotests/026 |6 +++--- tests/qemu-iotests/036 |3 ++- tests/qemu-iotests/039 |2 +- tests/qemu-iotests/044.out |1 + tests/qemu-iotests/common.rc |5 +++-- 8 files changed, 23 insertions(+), 9 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 6a95aa6..3444f85 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1116,7 +1116,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) { BDRVQcowState *s = bs->opaque; -int64_t size, i; +int64_t size, i, highest_cluster; int nb_clusters, refcount1, refcount2; QCowSnapshot *sn; uint16_t *refcount_table; @@ -1154,7 +1154,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, s->refcount_table_offset, s->refcount_table_size * sizeof(uint64_t)); -for(i = 0; i < s->refcount_table_size; i++) { +for(i = 0, highest_cluster = 0; i < s->refcount_table_size; i++) { uint64_t offset, cluster; offset = s->refcount_table[i]; cluster = offset >> s->cluster_bits; @@ -1197,6 +1197,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } refcount2 = refcount_table[i]; + +if (refcount1 > 0 || refcount2 > 0) { +highest_cluster = i; +} + if (refcount1 != refcount2) { /* Check if we're allowed to fix the mismatch */ @@ -1231,6 +1236,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } } +res->highest_offset = (highest_cluster + 1) * s->cluster_size; ret = 0; fail: diff --git a/include/block/block.h b/include/block/block.h index ffd1936..b5cf6ac 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -213,6 +213,7 @@ typedef struct BdrvCheckResult { int check_errors; int corruptions_fixed; int leaks_fixed; +int64_t highest_offset; BlockFragInfo bfi; } BdrvCheckResult; diff --git a/qemu-img.c b/qemu-img.c index 85d3740..7b977cd 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -475,6 +475,10 @@ static int img_check(int argc, char **argv) result.bfi.fragmented_clusters * 100.0 / result.bfi.allocated_clusters); } +if (result.highest_offset > 0) { +printf("Highest offset in use: %" PRId64 "\n", result.highest_offset); +} + bdrv_delete(bs); if (ret < 0 || result.check_errors) { diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026 index 1602ccd..107a3ff 100755 --- a/tests/qemu-iotests/026 +++ b/tests/qemu-iotests/026 @@ -102,7 +102,7 @@ if [ "$event" == "l2_load" ]; then $QEMU_IO -c "read $vmstate 0 128k " $BLKDBG_TEST_IMG | _filter_qemu_io fi -$QEMU_IMG check $TEST_IMG 2>&1 | grep -v "refcount=1 reference=0" +_check_test_img 2>&1 | grep -v "refcount=1 reference=0" done done @@ -147,7 +147,7 @@ echo echo "Event: $event; errno: $errno; imm: $imm; once: $once; write $vmstate" $QEMU_IO -c "write $vmstate 0 64M" $BLKDBG_TEST_IMG | _filter_qemu_io -$QEMU_IMG check $TEST_IMG 2>&1 | grep -v "refcount=1 reference=0" +_check_test_img 2>&1 | grep -v "refcount=1 reference=0" done done @@ -186,7 +186,7 @@ echo echo "Event: $event; errno: $errno; imm: $imm; once: $once" $QEMU_IO -c "write -b 0 64k" $BLKDBG_TEST_IMG | _filter_qemu_io -$QEMU_IMG check $TEST_IMG 2>&1 | grep -v "refcount=1 reference=0" +_check_test_img 2>&1 | grep -v "refcount=1 reference=0" done done diff --git a/tests/qemu-iotests/036 b/tests/qemu-iotests/036 index 329533e..4dbfc57 100755 --- a/tests/qemu-iotests/036 +++ b/tests/qemu-iotests/036 @@ -59,7 +59,8 @@ _make_test_img 64M echo echo === Repair image === echo -$QEMU_IMG check -r all $TEST_IMG +_check_test_img -r all + ./qcow2.py $TEST_IMG dump-header # success, all done diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039 index c5ae806..ae35175 100755 --- a/tests/qemu-iotests/039 +++ b/tests/qemu-iotests/039 @@ -86,7 +86,7 @@ $QEMU_IO -r -c "read -P 0x5a 0 512" $TEST_IMG | _filter_qemu_io echo echo "== Repairing the image file must succeed ==" -$QEMU_IMG check -r all $TEST_IMG +_check_test_img -r all # The dirty bit must not be set ./qcow2.py $TEST_IMG dump-header | grep incom
[Qemu-devel] [PATCH 2/2] qemu-img: add json output option to the check command
This option --output=[human|json] make qemu-img check output an human or JSON representation at the choice of the user. Signed-off-by: Federico Simoncelli --- qapi-schema.json | 38 + qemu-img-cmds.hx |4 +- qemu-img.c | 246 +++--- qemu-img.texi|5 +- 4 files changed, 221 insertions(+), 72 deletions(-) diff --git a/qapi-schema.json b/qapi-schema.json index 5dfa052..8877285 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -245,6 +245,44 @@ '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'] } } ## +# @ImageCheck: +# +# Information about a QEMU image file check +# +# @filename: name of the image file checked +# +# @format: format of the image file checked +# +# @check-errors: number of unexpected errors occurred during check +# +# @highest-offset: #optional highest offset (in bytes) in use by the image +# +# @corruptions: #optional number of corruptions found during the check +# +# @leaks: #optional number of leaks found during the check +# +# @corruptions-fixed: #optional number of corruptions fixed during the check +# +# @leaks-fixed: #optional number of leaks fixed during the check +# +# @total-clusters: #optional total number of clusters +# +# @allocated-clusters: #optional total number of allocated clusters +# +# @fragmented-clusters: #optional total number of fragmented clusters +# +# Since: 1.4 +# +## + +{ 'type': 'ImageCheck', + 'data': {'filename': 'str', 'format': 'str', 'check-errors': 'int', + '*highest-offset': 'int', '*corruptions': 'int', '*leaks': 'int', + '*corruptions-fixed': 'int', '*leaks-fixed': 'int', + '*total-clusters': 'int', '*allocated-clusters': 'int', + '*fragmented-clusters': 'int' } } + +## # @StatusInfo: # # Information about VCPU run state diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index a181363..259fc14 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -10,9 +10,9 @@ STEXI ETEXI DEF("check", img_check, -"check [-f fmt] [-r [leaks | all]] filename") +"check [-f fmt] [--output=ofmt] [-r [leaks | all]] filename") STEXI -@item check [-f @var{fmt}] [-r [leaks | all]] @var{filename} +@item check [-f @var{fmt}] [--output=@var{ofmt}] [-r [leaks | all]] @var{filename} ETEXI DEF("create", img_create, diff --git a/qemu-img.c b/qemu-img.c index 45c1ec1..18ba5c2 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -42,6 +42,16 @@ typedef struct img_cmd_t { int (*handler)(int argc, char **argv); } img_cmd_t; +enum { +OPTION_OUTPUT = 256, +OPTION_BACKING_CHAIN = 257, +}; + +typedef enum OutputFormat { +OFORMAT_JSON, +OFORMAT_HUMAN, +} OutputFormat; + /* Default to cache=writeback as data integrity is not important for qemu-tcg. */ #define BDRV_O_FLAGS BDRV_O_CACHE_WB #define BDRV_DEFAULT_CACHE "writeback" @@ -370,6 +380,122 @@ out: return 0; } +static void dump_json_image_check(ImageCheck *check) +{ +Error *errp = NULL; +QString *str; +QmpOutputVisitor *ov = qmp_output_visitor_new(); +QObject *obj; +visit_type_ImageCheck(qmp_output_get_visitor(ov), + &check, NULL, &errp); +obj = qmp_output_get_qobject(ov); +str = qobject_to_json_pretty(obj); +assert(str != NULL); +printf("%s\n", qstring_get_str(str)); +qobject_decref(obj); +qmp_output_visitor_cleanup(ov); +QDECREF(str); +} + +static void dump_human_image_check(ImageCheck *check) +{ +if (check->corruptions_fixed || check->leaks_fixed) { + printf("The following inconsistencies were found and repaired:\n\n" +"%" PRId64 " leaked clusters\n" +"%" PRId64 " corruptions\n\n" +"Double checking the fixed image now...\n", +check->leaks_fixed, +check->corruptions_fixed); +} + +if (!(check->corruptions || check->leaks || check->check_errors)) { +printf("No errors were found on the image.\n"); +} else { +if (check->corruptions) { +printf("\n%" PRId64 " errors were found on the image.\n" +"Data may be corrupted, or further writes to the image " +"may corrupt it.\n", +check->corruptions); +} + +if (check->leaks) { +printf("\n%" PRId64 " leaked clusters were found on the image.\n" +"This means waste of disk space, but no harm t
[Qemu-devel] [PATCHv2 1/2] qemu-img: find the highest offset in use during check
This patch adds the support for reporting the highest offset in use by an image. This is particularly useful after a conversion (or a rebase) where the destination is a block device in order to find the actual amount of space in use. Signed-off-by: Federico Simoncelli --- block.h |1 + block/qcow2-refcount.c | 10 -- qemu-img.c |4 tests/qemu-iotests/026 |6 +++--- tests/qemu-iotests/036 |3 ++- tests/qemu-iotests/039 |2 +- tests/qemu-iotests/common.rc |5 +++-- 7 files changed, 22 insertions(+), 9 deletions(-) diff --git a/block.h b/block.h index 722c620..de42e8c 100644 --- a/block.h +++ b/block.h @@ -213,6 +213,7 @@ typedef struct BdrvCheckResult { int check_errors; int corruptions_fixed; int leaks_fixed; +int64_t highest_offset; BlockFragInfo bfi; } BdrvCheckResult; diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 96224d1..017439d 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1116,7 +1116,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) { BDRVQcowState *s = bs->opaque; -int64_t size, i; +int64_t size, i, highest_cluster; int nb_clusters, refcount1, refcount2; QCowSnapshot *sn; uint16_t *refcount_table; @@ -1154,7 +1154,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, s->refcount_table_offset, s->refcount_table_size * sizeof(uint64_t)); -for(i = 0; i < s->refcount_table_size; i++) { +for(i = 0, highest_cluster = 0; i < s->refcount_table_size; i++) { uint64_t offset, cluster; offset = s->refcount_table[i]; cluster = offset >> s->cluster_bits; @@ -1197,6 +1197,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } refcount2 = refcount_table[i]; + +if (refcount1 > 0 || refcount2 > 0) { +highest_cluster = i; +} + if (refcount1 != refcount2) { /* Check if we're allowed to fix the mismatch */ @@ -1231,6 +1236,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } } +res->highest_offset = (highest_cluster + 1) * s->cluster_size; ret = 0; fail: diff --git a/qemu-img.c b/qemu-img.c index e29e01b..45c1ec1 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -470,6 +470,10 @@ static int img_check(int argc, char **argv) result.bfi.fragmented_clusters * 100.0 / result.bfi.allocated_clusters); } +if (result.highest_offset > 0) { +printf("Highest offset in use: %" PRId64 "\n", result.highest_offset); +} + bdrv_delete(bs); if (ret < 0 || result.check_errors) { diff --git a/tests/qemu-iotests/026 b/tests/qemu-iotests/026 index 1602ccd..107a3ff 100755 --- a/tests/qemu-iotests/026 +++ b/tests/qemu-iotests/026 @@ -102,7 +102,7 @@ if [ "$event" == "l2_load" ]; then $QEMU_IO -c "read $vmstate 0 128k " $BLKDBG_TEST_IMG | _filter_qemu_io fi -$QEMU_IMG check $TEST_IMG 2>&1 | grep -v "refcount=1 reference=0" +_check_test_img 2>&1 | grep -v "refcount=1 reference=0" done done @@ -147,7 +147,7 @@ echo echo "Event: $event; errno: $errno; imm: $imm; once: $once; write $vmstate" $QEMU_IO -c "write $vmstate 0 64M" $BLKDBG_TEST_IMG | _filter_qemu_io -$QEMU_IMG check $TEST_IMG 2>&1 | grep -v "refcount=1 reference=0" +_check_test_img 2>&1 | grep -v "refcount=1 reference=0" done done @@ -186,7 +186,7 @@ echo echo "Event: $event; errno: $errno; imm: $imm; once: $once" $QEMU_IO -c "write -b 0 64k" $BLKDBG_TEST_IMG | _filter_qemu_io -$QEMU_IMG check $TEST_IMG 2>&1 | grep -v "refcount=1 reference=0" +_check_test_img 2>&1 | grep -v "refcount=1 reference=0" done done diff --git a/tests/qemu-iotests/036 b/tests/qemu-iotests/036 index 329533e..4dbfc57 100755 --- a/tests/qemu-iotests/036 +++ b/tests/qemu-iotests/036 @@ -59,7 +59,8 @@ _make_test_img 64M echo echo === Repair image === echo -$QEMU_IMG check -r all $TEST_IMG +_check_test_img -r all + ./qcow2.py $TEST_IMG dump-header # success, all done diff --git a/tests/qemu-iotests/039 b/tests/qemu-iotests/039 index c5ae806..ae35175 100755 --- a/tests/qemu-iotests/039 +++ b/tests/qemu-iotests/039 @@ -86,7 +86,7 @@ $QEMU_IO -r -c "read -P 0x5a 0 512" $TEST_IMG | _filter_qemu_io echo echo "== Repairing the image file must succeed ==" -$QEMU_IMG check -r all $TEST_IMG +_check_test_img -r all # The dirty bit must not be set ./qcow2.py $TEST_IMG dump-header | grep incompatible_features diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
[Qemu-devel] [PATCH] qemu-img: find the highest offset in use during check
This patch adds the support for reporting the highest offset in use by an image. This is particularly useful after a conversion (or a rebase) where the destination is a block device in order to find the actual amount of space in use. Signed-off-by: Federico Simoncelli --- block.h|1 + block/qcow2-refcount.c | 10 -- qemu-img.c |4 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/block.h b/block.h index 722c620..de42e8c 100644 --- a/block.h +++ b/block.h @@ -213,6 +213,7 @@ typedef struct BdrvCheckResult { int check_errors; int corruptions_fixed; int leaks_fixed; +int64_t highest_offset; BlockFragInfo bfi; } BdrvCheckResult; diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 96224d1..017439d 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -1116,7 +1116,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) { BDRVQcowState *s = bs->opaque; -int64_t size, i; +int64_t size, i, highest_cluster; int nb_clusters, refcount1, refcount2; QCowSnapshot *sn; uint16_t *refcount_table; @@ -1154,7 +1154,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, s->refcount_table_offset, s->refcount_table_size * sizeof(uint64_t)); -for(i = 0; i < s->refcount_table_size; i++) { +for(i = 0, highest_cluster = 0; i < s->refcount_table_size; i++) { uint64_t offset, cluster; offset = s->refcount_table[i]; cluster = offset >> s->cluster_bits; @@ -1197,6 +1197,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } refcount2 = refcount_table[i]; + +if (refcount1 > 0 || refcount2 > 0) { +highest_cluster = i; +} + if (refcount1 != refcount2) { /* Check if we're allowed to fix the mismatch */ @@ -1231,6 +1236,7 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } } +res->highest_offset = (highest_cluster + 1) * s->cluster_size; ret = 0; fail: diff --git a/qemu-img.c b/qemu-img.c index e29e01b..3a8090b 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -470,6 +470,10 @@ static int img_check(int argc, char **argv) result.bfi.fragmented_clusters * 100.0 / result.bfi.allocated_clusters); } +if (result.highest_offset > 0) { +printf("Highest offset in use: %lu\n", result.highest_offset); +} + bdrv_delete(bs); if (ret < 0 || result.check_errors) { -- 1.7.1
Re: [Qemu-devel] [PATCH 4/7] block: close unused image files at the end of streaming
- Original Message - > From: "Paolo Bonzini" > To: qemu-devel@nongnu.org > Cc: "Marcelo Tosatti" , "Federico Simoncelli" > > Sent: Thursday, April 5, 2012 5:42:58 PM > Subject: [PATCH 4/7] block: close unused image files at the end of streaming > > From: Marcelo Tosatti > > Close the now unused images that were part of the previous backing > file > chain and adjust ->backing_hd properly. > > Note that this only works with relative paths. s/relative/absolute/ > Given the images: > > /tmp/a/base.raw > /tmp/a/snap1.qcow2 > /tmp/b/snap2.qcow2 > > chained as: > > base(bak:"") <- snap1(bak:"base.raw") <- > snap2(bak:"../a/snap1.qcow2") > > merging snap1 and snap2 we will obtain: > > base(bak:"") <- snap2(bak:"base.raw") > > However this should be maintained by the user/admin: one can also > screw up relative paths using qemu-img manually. The patch is fine but I disagree with this comment. The user/admin didn't make any mistake and he shouldn't be in charge of additional maintenance (which is also tricky since the VM is running). -- Federico
[Qemu-devel] [PATCH v3] qapi: add c_fun to escape function names
Signed-off-by: Federico Simoncelli --- scripts/qapi-commands.py | 14 +++--- scripts/qapi-types.py|4 ++-- scripts/qapi-visit.py|4 ++-- scripts/qapi.py |5 - 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/scripts/qapi-commands.py b/scripts/qapi-commands.py index 3aabf61..30a24d2 100644 --- a/scripts/qapi-commands.py +++ b/scripts/qapi-commands.py @@ -42,7 +42,7 @@ def generate_command_decl(name, args, ret_type): return mcgen(''' %(ret_type)s qmp_%(name)s(%(args)sError **errp); ''', - ret_type=c_type(ret_type), name=c_var(name), args=arglist).strip() + ret_type=c_type(ret_type), name=c_fun(name), args=arglist).strip() def gen_sync_call(name, args, ret_type, indent=0): ret = "" @@ -59,7 +59,7 @@ def gen_sync_call(name, args, ret_type, indent=0): %(retval)sqmp_%(name)s(%(args)serrp); ''', -name=c_var(name), args=arglist, retval=retval).rstrip() +name=c_fun(name), args=arglist, retval=retval).rstrip() if ret_type: ret += "\n" + mcgen('''' if (!error_is_set(errp)) { @@ -74,7 +74,7 @@ if (!error_is_set(errp)) { def gen_marshal_output_call(name, ret_type): if not ret_type: return "" -return "qmp_marshal_output_%s(retval, ret, errp);" % c_var(name) +return "qmp_marshal_output_%s(retval, ret, errp);" % c_fun(name) def gen_visitor_output_containers_decl(ret_type): ret = "" @@ -198,16 +198,16 @@ static void qmp_marshal_output_%(c_name)s(%(c_ret_type)s ret_in, QObject **ret_o qapi_dealloc_visitor_cleanup(md); } ''', -c_ret_type=c_type(ret_type), c_name=c_var(name), +c_ret_type=c_type(ret_type), c_name=c_fun(name), visitor=type_visitor(ret_type)) return ret def gen_marshal_input_decl(name, args, ret_type, middle_mode): if middle_mode: -return 'int qmp_marshal_input_%s(Monitor *mon, const QDict *qdict, QObject **ret)' % c_var(name) +return 'int qmp_marshal_input_%s(Monitor *mon, const QDict *qdict, QObject **ret)' % c_fun(name) else: -return 'static void qmp_marshal_input_%s(QDict *args, QObject **ret, Error **errp)' % c_var(name) +return 'static void qmp_marshal_input_%s(QDict *args, QObject **ret, Error **errp)' % c_fun(name) @@ -298,7 +298,7 @@ def gen_registry(commands): registry += mcgen(''' qmp_register_command("%(name)s", qmp_marshal_input_%(c_name)s); ''', - name=cmd['command'], c_name=c_var(cmd['command'])) + name=cmd['command'], c_name=c_fun(cmd['command'])) pop_indent() ret = mcgen(''' static void qmp_init_marshal(void) diff --git a/scripts/qapi-types.py b/scripts/qapi-types.py index 727fb77..4a734f5 100644 --- a/scripts/qapi-types.py +++ b/scripts/qapi-types.py @@ -100,7 +100,7 @@ typedef enum %(name)s %(abbrev)s_%(value)s = %(i)d, ''', abbrev=de_camel_case(name).upper(), - value=c_var(value).upper(), + value=c_fun(value).upper(), i=i) i += 1 @@ -126,7 +126,7 @@ struct %(name)s %(c_type)s %(c_name)s; ''', c_type=c_type(typeinfo[key]), - c_name=c_var(key)) + c_name=c_fun(key)) ret += mcgen(''' }; diff --git a/scripts/qapi-visit.py b/scripts/qapi-visit.py index 54117d4..78c947c 100644 --- a/scripts/qapi-visit.py +++ b/scripts/qapi-visit.py @@ -129,9 +129,9 @@ void visit_type_%(name)s(Visitor *m, %(name)s ** obj, const char *name, Error ** break; ''', abbrev = de_camel_case(name).upper(), -enum = de_camel_case(key).upper(), +enum = c_fun(de_camel_case(key)).upper(), c_type=members[key], -c_name=c_var(key)) +c_name=c_fun(key)) ret += mcgen(''' default: diff --git a/scripts/qapi.py b/scripts/qapi.py index 6e05469..e062336 100644 --- a/scripts/qapi.py +++ b/scripts/qapi.py @@ -131,7 +131,10 @@ def camel_case(name): return new_name def c_var(name): -return '_'.join(name.split('-')).lstrip("*") +return name.replace('-', '_').lstrip("*") + +def c_fun(name): +return c_var(name).replace('.', '_') def c_list_type(name): return '%sList' % name -- 1.7.1
[Qemu-devel] [PATCH v2] qapi: escaping dots in c_var and in de_camel_case
This allows qapi commands and types with dots (downstream QMP extensions containing fully qualified domain names). Signed-off-by: Federico Simoncelli --- scripts/qapi.py |4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/qapi.py b/scripts/qapi.py index 6e05469..b1173bf 100644 --- a/scripts/qapi.py +++ b/scripts/qapi.py @@ -111,7 +111,7 @@ def de_camel_case(name): for ch in name: if ch.isupper() and new_name: new_name += '_' -if ch == '-': +if ch == '-' or ch == '.': new_name += '_' else: new_name += ch.lower() @@ -131,7 +131,7 @@ def camel_case(name): return new_name def c_var(name): -return '_'.join(name.split('-')).lstrip("*") +return name.replace('-', '_').replace('.', '_').lstrip('*') def c_list_type(name): return '%sList' % name -- 1.7.1
[Qemu-devel] [PATCH] qapi: escaping the dots in c_var
This allows qapi commands and types with dots. Signed-off-by: Federico Simoncelli --- scripts/qapi.py |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/scripts/qapi.py b/scripts/qapi.py index 6e05469..4090c55 100644 --- a/scripts/qapi.py +++ b/scripts/qapi.py @@ -131,7 +131,7 @@ def camel_case(name): return new_name def c_var(name): -return '_'.join(name.split('-')).lstrip("*") +return name.replace('-', '_').replace('.', '_').lstrip('*') def c_list_type(name): return '%sList' % name -- 1.7.1
Re: [Qemu-devel] [PATCH v4 10/10] Add the drive-reopen command
- Original Message - > From: "Kevin Wolf" > To: "Federico Simoncelli" > Cc: "Eric Blake" , qemu-devel@nongnu.org, > stefa...@linux.vnet.ibm.com, lcapitul...@redhat.com, > "Paolo Bonzini" , "Markus Armbruster" > Sent: Wednesday, March 14, 2012 10:34:08 AM > Subject: Re: [Qemu-devel] [PATCH v4 10/10] Add the drive-reopen command > > > * We could leave it as it is, a distinct command that is not part > > of > > the transaction and that it's closing the old image before > > opening > > the new one. > > Yes, this would be the short-term preliminary solution. I would tend > to > leave it to downstreams to implement it as an extension, though. > > > This is not completely correct, the main intent was to not spread > > one image chain across two storage domains (making it incomplete if one > > of them was missing). In the next oVirt release a VM can have > > different disks on different storage domains, so this wouldn't be a special > > case but just a normal situation. > > The problem with this kind of argument is that we're not developing > only for oVirt, but need to look for what makes sense for any management > tool (or even just direct users of qemu). That is a general rule, and it's perfectly agreeable, but it has nothing to do with what I was saying. Do you know any management tool or any reason to forbid to a VM to have different disks on different storages? In fact qemu-kvm doesn't even know where the images are coming from (what particular iscsi connection or what particular NFS server). What I was asking was a tool (mirroring) to avoid the split of an image chain between multiple storages, and not a tool to avoid the ability to have full image chains on different storages. There is nothing oVirt specific here, it came into play only when I said that for us is not a problem (for once :-). -- Federico
Re: [Qemu-devel] [PATCH v4 10/10] Add the drive-reopen command
- Original Message - > From: "Eric Blake" > To: "Paolo Bonzini" > Cc: kw...@redhat.com, fsimo...@redhat.com, qemu-devel@nongnu.org, > stefa...@linux.vnet.ibm.com, lcapitul...@redhat.com > Sent: Tuesday, March 13, 2012 9:48:10 PM > Subject: Re: [Qemu-devel] [PATCH v4 10/10] Add the drive-reopen command > > On 03/06/2012 10:56 AM, Paolo Bonzini wrote: > > From: Federico Simoncelli > > > > Signed-off-by: Federico Simoncelli > > Signed-off-by: Paolo Bonzini > > > ## > > +# @drive-reopen > > +# > > +# Assigns a new image file to a device. > > +# > > +# @device: the name of the device for which we are changing the > > image file. > > +# > > +# @new-image-file: the target of the new image. If the file > > doesn't exists the > > +# command will fail. > > +# > > +# @format: #optional the format of the new image, default is > > 'qcow2'. > > +# > > +# Returns: nothing on success > > +# If @device is not a valid block device, DeviceNotFound > > +# If @new-image-file can't be opened, OpenFileFailed > > +# If @format is invalid, InvalidBlockFormat > > +# > > +# Since 1.1 > > +## > > +{ 'command': 'drive-reopen', > > + 'data': { 'device': 'str', 'new-image-file': 'str', '*format': > > 'str' } } > > I still think we need a 'drive-reopen' action included in > 'transaction', > as an 11/10 on this series. For disk migration, it is true that you > can > migrate one disk at a time, and therefore only need to reopen one > disk > at a time, to get the guarantee that for a single disk image, the > current state of that image will be guaranteed to be consistent using > only one storage domain. I'm not sure if this was already addressed on this mailing list but the main problem is that as general rule a qcow file cannot be opened in r/w mode twice. I believe there was only one exception to that with the live migration and it generated several issues. That said, reopen is really hard to be implemented as a transaction without breaking that rule. For example in the blkmirror case you'd need to open the destination image in r/w while the mirroring is in action (already having the same image in r/w mode). There are several solutions here but they are either really hard to implement or non definitive. For example: * We could try to implement the reopen command for each special case, eg: blkmirror, reopening the same image, etc... and in such cases reusing the same bs that we already have. The downside is that this command will be coupled with all this special cases. * We could use the transaction APIs without actually making it transaction (if we fail in the middle we can't rollback). The only advantage of this is that we'd provide a consistent API to libvirt and we would postpone the problem to the future. Anyway I strongly discourage this as it's completely unsafe and it's going to break the transaction semantic. Moreover it's a solution that relies too much on the hope of finding something appropriate in the future. * We could leave it as it is, a distinct command that is not part of the transaction and that it's closing the old image before opening the new one. > But since the API allows the creation of two mirrors in one command, > I'm > worried that someone will try to start a mirror on two disks at once, > but then be stuck doing two separate 'drive-reopen' commands. If the > first succeeds but the second fails, you have now stranded the qemu > process across two storage domains, which is exactly what we were > trying > to avoid in the first place by inventing transactions. That is, even > if > all disks are individually consistent in a single domain, the act of > migrating then reopening one disk at a time means you will have a > window > where disk 1 and disk 2 are opened on different storage domains. This is not completely correct, the main intent was to not spread one image chain across two storage domains (making it incomplete if one of them was missing). In the next oVirt release a VM can have different disks on different storage domains, so this wouldn't be a special case but just a normal situation. -- Federico
[Qemu-devel] [PATCH] Add the drive-reopen command
Signed-off-by: Federico Simoncelli --- blockdev.c | 40 +++- hmp-commands.hx | 16 hmp.c| 11 +++ hmp.h|1 + qapi-schema.json | 22 ++ 5 files changed, 77 insertions(+), 13 deletions(-) diff --git a/blockdev.c b/blockdev.c index 058b6d5..56da5c9 100644 --- a/blockdev.c +++ b/blockdev.c @@ -646,9 +646,8 @@ void do_commit(Monitor *mon, const QDict *qdict) } } -void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, -bool has_format, const char *format, -Error **errp) +static void change_blockdev_image(const char *device, const char *new_image_file, + const char *format, bool create, Error **errp) { BlockDriverState *bs; BlockDriver *drv, *old_drv, *proto_drv; @@ -671,7 +670,7 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, old_drv = bs->drv; flags = bs->open_flags; -if (!has_format) { +if (!format) { format = "qcow2"; } @@ -681,24 +680,26 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, return; } -proto_drv = bdrv_find_protocol(snapshot_file); +proto_drv = bdrv_find_protocol(new_image_file); if (!proto_drv) { error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); return; } -ret = bdrv_img_create(snapshot_file, format, bs->filename, - bs->drv->format_name, NULL, -1, flags); -if (ret) { -error_set(errp, QERR_UNDEFINED_ERROR); -return; +if (create) { +ret = bdrv_img_create(new_image_file, format, bs->filename, + bs->drv->format_name, NULL, -1, flags); +if (ret) { +error_set(errp, QERR_UNDEFINED_ERROR); +return; +} } bdrv_drain_all(); bdrv_flush(bs); bdrv_close(bs); -ret = bdrv_open(bs, snapshot_file, flags, drv); +ret = bdrv_open(bs, new_image_file, flags, drv); /* * If reopening the image file we just created fails, fall back * and try to re-open the original image. If that fails too, we @@ -709,11 +710,25 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, if (ret != 0) { error_set(errp, QERR_OPEN_FILE_FAILED, old_filename); } else { -error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file); +error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); } } } +void qmp_drive_reopen(const char *device, const char *new_image_file, + bool has_format, const char *format, Error **errp) +{ +change_blockdev_image(device, new_image_file, + has_format ? format : NULL, false, errp); +} + +void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, +bool has_format, const char *format, +Error **errp) +{ +change_blockdev_image(device, snapshot_file, + has_format ? format : NULL, true, errp); +} /* New and old BlockDriverState structs for group snapshots */ typedef struct BlkTransactionStates { @@ -877,7 +892,6 @@ exit: return; } - static void eject_device(BlockDriverState *bs, int force, Error **errp) { if (bdrv_in_use(bs)) { diff --git a/hmp-commands.hx b/hmp-commands.hx index 64b3656..9e08123 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -900,6 +900,22 @@ Snapshot device, using snapshot file as target if provided ETEXI { +.name = "drive-reopen", +.args_type = "device:B,new-image-file:s,format:s?", +.params = "device new-image-file [format]", +.help = "Assigns a new image file to a device.\n\t\t\t" + "The image will be opened using the format\n\t\t\t" + "specified or 'qcow2' by default.\n\t\t\t", +.mhandler.cmd = hmp_drive_reopen, +}, + +STEXI +@item drive-reopen +@findex drive-reopen +Assigns a new image file to a device. +ETEXI + +{ .name = "drive_add", .args_type = "pci_addr:s,opts:s", .params = "[[:]:]\n" diff --git a/hmp.c b/hmp.c index 3a54455..ab069ad 100644 --- a/hmp.c +++ b/hmp.c @@ -706,6 +706,17 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict) hmp_handle_error(mon, &errp); } +void hmp_drive_reopen(Monitor *mon, const QDict *qdict) +{ +const char *device = qdict_get_str(qdict, "device"); +const char *filename = qdict_get_str(qdict, "new-image-file"); +const char *format = qdict_get_try_st
[Qemu-devel] [PATCH] add reopen to blockdev-transaction
Signed-off-by: Federico Simoncelli --- blockdev.c |8 qapi-schema.json | 12 qmp-commands.hx |6 +- 3 files changed, 25 insertions(+), 1 deletions(-) diff --git a/blockdev.c b/blockdev.c index 56da5c9..36fe07c 100644 --- a/blockdev.c +++ b/blockdev.c @@ -798,6 +798,14 @@ void qmp_blockdev_transaction(BlockdevActionList *dev_list, dev_info->mirror->target); break; +case BLOCKDEV_ACTION_KIND_REOPEN: +device = dev_info->reopen->device; +if (dev_info->format->has_format) { +format = dev_info->reopen->format; +} +new_source = g_strdup(dev_info->reopen->target); +break; + default: abort(); } diff --git a/qapi-schema.json b/qapi-schema.json index b33875d..17f7548 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1145,6 +1145,17 @@ { 'type': 'BlockdevMirror', 'data': { 'device': 'str', 'target': 'str', '*format': 'str', '*reuse': 'bool' } } +## +# @BlockdevReopen +# +# @device: the name of the device to reopen. +# +# @target: the target of the new image. +# +# @format: #optional the format of the new image, default is 'qcow2'. +## +{ 'type': 'BlockdevReopen', + 'data': { 'device': 'str', 'target': 'str', '*format': 'str' } } ## # @BlockdevAction @@ -1156,6 +1167,7 @@ 'data': { 'snapshot': 'BlockdevSnapshot', 'mirror': 'BlockdevMirror', + 'reopen': 'BlockdevReopen', } } ## diff --git a/qmp-commands.hx b/qmp-commands.hx index 50ac5a0..317c448 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -720,7 +720,7 @@ Arguments: actions array: - "type": the operation to perform. The only supported - values are "snapshot" and "mirror". (json-string) + values are "snapshot", "mirror" and "reopen". (json-string) - "data": a dictionary. The contents depend on the value of "type". When "type" is "snapshot": - "device": device name to snapshot (json-string) @@ -734,6 +734,10 @@ actions array: - "format": format of new image (json-string, optional) - "reuse": whether QEMU should look for an existing image file (json-bool, optional, default false) + When "type" is "reopen": + - "device": device name to reopen (json-string) + - "target": name of destination image file (json-string) + - "format": format of new image (json-string, optional) Example: -- 1.7.1
[Qemu-devel] [PATCHv4] Add blkmirror block driver
Mirrored writes are used by live block copy. Signed-off-by: Marcelo Tosatti Signed-off-by: Federico Simoncelli Signed-off-by: Paolo Bonzini --- Makefile.objs |2 +- block/blkmirror.c | 278 cutils.c | 56 +++ docs/blkmirror.txt | 15 +++ qemu-common.h |2 + 5 files changed, 352 insertions(+), 1 deletions(-) create mode 100644 block/blkmirror.c create mode 100644 docs/blkmirror.txt diff --git a/Makefile.objs b/Makefile.objs index 808de6a..2302c96 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vv block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-nested-y += qed-check.o -block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o +block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o blkmirror.o block-nested-y += stream.o block-nested-$(CONFIG_WIN32) += raw-win32.o block-nested-$(CONFIG_POSIX) += raw-posix.o diff --git a/block/blkmirror.c b/block/blkmirror.c new file mode 100644 index 000..d894ca8 --- /dev/null +++ b/block/blkmirror.c @@ -0,0 +1,278 @@ +/* + * Block driver for mirrored writes. + * + * Copyright (C) 2011-2012 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include +#include "block_int.h" + +typedef struct { +BlockDriverState *bs[2]; +} BdrvMirrorState; + +typedef struct DupAIOCB DupAIOCB; + +typedef struct SingleAIOCB { +BlockDriverAIOCB *aiocb; +int finished; +DupAIOCB *parent; +} SingleAIOCB; + +struct DupAIOCB { +BlockDriverAIOCB common; +int count; + +BlockDriverCompletionFunc *cb; +SingleAIOCB aios[2]; +int ret; +}; + +/* Valid blkmirror filenames look like + * blkmirror:fmt1:path/to/image1:fmt2:path/to/image2 */ +static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) +{ +int ret = 0, i; +char *tmpbuf, *tok[4], *next; +BlockDriver *drv1, *drv2; +BdrvMirrorState *m = bs->opaque; +BlockDriverState *bk; + +m->bs[0] = bdrv_new(""); +if (m->bs[0] == NULL) { +return -ENOMEM; +} + +m->bs[1] = bdrv_new(""); +if (m->bs[1] == NULL) { +bdrv_delete(m->bs[0]); +return -ENOMEM; +} + +tmpbuf = g_malloc(strlen(filename) + 1); +pstrcpy(tmpbuf, strlen(filename) + 1, filename); + +/* Parse the blkmirror: prefix */ +if (!strstart(tmpbuf, "blkmirror:", (const char **) &next)) { +next = tmpbuf; +} + +for (i = 0; i < 4; i++) { +if (!next) { +ret = -EINVAL; +goto out; +} +tok[i] = estrtok_r(NULL, ":", '\\', &next); +} + +drv1 = bdrv_find_whitelisted_format(tok[0]); +drv2 = bdrv_find_whitelisted_format(tok[2]); +if (!drv1 || !drv2) { +ret = -EINVAL; +goto out; +} + +ret = bdrv_open(m->bs[0], tok[1], flags, drv1); +if (ret < 0) { +goto out; +} + +/* If we crash, we cannot assume that the destination is a + * valid mirror and we have to start over. So speed up things + * by effectively operating on the destination in cache=unsafe + * mode. + */ +ret = bdrv_open(m->bs[1], tok[3], flags | BDRV_O_NO_BACKING +| BDRV_O_NO_FLUSH | BDRV_O_CACHE_WB, drv2); +if (ret < 0) { +goto out; +} + +if (m->bs[0]->backing_hd) { +bk = m->bs[0]->backing_hd; + +m->bs[1]->backing_hd = bdrv_new(""); +if (!m->bs[1]->backing_hd) { +ret = -ENOMEM; +goto out; +} + +/* opening the same backing file of the source */ +ret = bdrv_open(m->bs[1]->backing_hd, +bk->filename, bk->open_flags, bk->drv); +if (ret < 0) { +goto out; +} +} + + out: +g_free(tmpbuf); + +if (ret < 0) { +for (i = 0; i < 2; i++) { +bdrv_delete(m->bs[i]); +m->bs[i] = NULL; +} +} + +return ret; +} + +static void blkmirror_close(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs->opaque; +int i; + +for (i = 0; i < 2; i++) { +bdrv_delete(m->bs[i]); +m->bs[i] = NULL; +} +} + +static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs->opaque; +int ret; + +ret = bdrv_co_flush(m->bs[0]); +if (ret < 0) { +return ret; +} + +return bdrv_co_flush(m->bs[1]); +} + +static int64_t blkmirror_getlength(BlockDriverState *bs) +{ +BdrvMirror
Re: [Qemu-devel] [PATCHv3] Add blkmirror block driver
- Original Message - > From: "Federico Simoncelli" > To: qemu-devel@nongnu.org > Cc: mtosa...@redhat.com, kw...@redhat.com, pbonz...@redhat.com, > stefa...@gmail.com, "Federico Simoncelli" > > Sent: Wednesday, February 29, 2012 1:28:21 PM > Subject: [PATCHv3] Add blkmirror block driver > > Mirrored writes are used by live block copy. > > Signed-off-by: Marcelo Tosatti > Signed-off-by: Federico Simoncelli > --- > Makefile.objs |2 +- > block/blkmirror.c | 255 > > cutils.c | 30 ++ > docs/blkmirror.txt | 15 +++ > qemu-common.h |1 + > 5 files changed, 302 insertions(+), 1 deletions(-) > create mode 100644 block/blkmirror.c > create mode 100644 docs/blkmirror.txt > > diff --git a/block/blkmirror.c b/block/blkmirror.c > new file mode 100644 > index 000..c98b162 > --- /dev/null > +++ b/block/blkmirror.c [...] > +if (!drv1 || !drv2) { > +ret = -EINVAL; > +goto out; > +} > + > +ret = bdrv_open(m->bs[0], tok[1], flags, drv1); > +if (ret < 0) { > +goto out; > +} > + > +ret = bdrv_open(m->bs[0], tok[3], flags, drv2); > +if (ret < 0) { > +goto out; > +} There's a small mistake here, the second one is m->bs[1]. -- Federico
[Qemu-devel] [PATCHv3] Add blkmirror block driver
Mirrored writes are used by live block copy. Signed-off-by: Marcelo Tosatti Signed-off-by: Federico Simoncelli --- Makefile.objs |2 +- block/blkmirror.c | 255 cutils.c | 30 ++ docs/blkmirror.txt | 15 +++ qemu-common.h |1 + 5 files changed, 302 insertions(+), 1 deletions(-) create mode 100644 block/blkmirror.c create mode 100644 docs/blkmirror.txt diff --git a/Makefile.objs b/Makefile.objs index 808de6a..2302c96 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vv block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-nested-y += qed-check.o -block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o +block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o blkmirror.o block-nested-y += stream.o block-nested-$(CONFIG_WIN32) += raw-win32.o block-nested-$(CONFIG_POSIX) += raw-posix.o diff --git a/block/blkmirror.c b/block/blkmirror.c new file mode 100644 index 000..c98b162 --- /dev/null +++ b/block/blkmirror.c @@ -0,0 +1,255 @@ +/* + * Block driver for mirrored writes. + * + * Copyright (C) 2011-2012 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include +#include "block_int.h" + +typedef struct { +BlockDriverState *bs[2]; +} BdrvMirrorState; + +typedef struct DupAIOCB DupAIOCB; + +typedef struct SingleAIOCB { +BlockDriverAIOCB *aiocb; +int finished; +DupAIOCB *parent; +} SingleAIOCB; + +struct DupAIOCB { +BlockDriverAIOCB common; +int count; + +BlockDriverCompletionFunc *cb; +SingleAIOCB aios[2]; +int ret; +}; + +/* Valid blkmirror filenames look like + * blkmirror:fmt1:path/to/image1:fmt2:path/to/image2 */ +static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) +{ +int ret = 0, i; +char *tmpbuf, *tok[4], *next; +BlockDriver *drv1, *drv2; +BdrvMirrorState *m = bs->opaque; + +m->bs[0] = bdrv_new(""); +if (m->bs[0] == NULL) { +return -ENOMEM; +} + +m->bs[1] = bdrv_new(""); +if (m->bs[1] == NULL) { +bdrv_delete(m->bs[0]); +return -ENOMEM; +} + +tmpbuf = g_malloc(strlen(filename) + 1); +pstrcpy(tmpbuf, strlen(filename) + 1, filename); + +/* Parse the blkmirror: prefix */ +if (!strstart(tmpbuf, "blkmirror:", (const char **) &next)) { +next = tmpbuf; +} + +for (i = 0; i < 4; i++) { +if (!next) { +ret = -EINVAL; +goto out; +} +tok[i] = pstrtok_r(NULL, ":", '\\', &next); +} + +drv1 = bdrv_find_whitelisted_format(tok[0]); +drv2 = bdrv_find_whitelisted_format(tok[2]); + +if (!drv1 || !drv2) { +ret = -EINVAL; +goto out; +} + +ret = bdrv_open(m->bs[0], tok[1], flags, drv1); +if (ret < 0) { +goto out; +} + +ret = bdrv_open(m->bs[0], tok[3], flags, drv2); +if (ret < 0) { +goto out; +} + + out: +g_free(tmpbuf); + +if (ret < 0) { +for (i = 0; i < 2; i++) { +bdrv_delete(m->bs[i]); +m->bs[i] = NULL; +} +} + +return ret; +} + +static void blkmirror_close(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs->opaque; +int i; + +for (i = 0; i < 2; i++) { +bdrv_delete(m->bs[i]); +m->bs[i] = NULL; +} +} + +static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs->opaque; +int ret; + +ret = bdrv_co_flush(m->bs[0]); +if (ret < 0) { +return ret; +} + +return bdrv_co_flush(m->bs[1]); +} + +static int64_t blkmirror_getlength(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs->opaque; + +return bdrv_getlength(m->bs[0]); +} + +static BlockDriverAIOCB *blkmirror_aio_readv(BlockDriverState *bs, + int64_t sector_num, + QEMUIOVector *qiov, + int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque) +{ +BdrvMirrorState *m = bs->opaque; +return bdrv_aio_readv(m->bs[0], sector_num, qiov, nb_sectors, cb, opaque); +} + +static void dup_aio_cancel(BlockDriverAIOCB *blockacb) +{ +DupAIOCB *acb = container_of(blockacb, DupAIOCB, common); +int i; + +for (i = 0 ; i < 2; i++) { +if (!acb->aios[i].finished) { +bdrv_aio_cancel(acb->
Re: [Qemu-devel] Live Block Migration using Mirroring
- Original Message - > From: "Paolo Bonzini" > To: qemu-devel@nongnu.org > Sent: Tuesday, February 28, 2012 7:02:40 PM > Subject: Re: [Qemu-devel] Live Block Migration using Mirroring > > Il 28/02/2012 18:46, Federico Simoncelli ha scritto: > > > > Thank you for getting this. Being able to have a bogus backing > > > > file > > > > was a bonus but it's not really required for the mirrored live > > > > block > > > > migration. We can add the support for switching the backing > > > > file in the > > > > drive-reopen part. > > > > > > Wait, it's not really required for oVirt because it creates the > > > snapshot outside QEMU. What about everyone else? > > > > They'll have (as oVirt) two mirrored snapshot pointing at the same > > base. The only difference is that the image is created internally, > > but > > that's not hard. > > Can you detail how you are switching the backing file in the > drive-reopen? Either the BlockDriverState opened by blkmirror, or > the > one opened at the end, will have to use the "wrong" backing_file. > How > do you arrange for that? Step 1 - Initital Scenario == VM1 is running on the src/hd0base. [src/hd0base] <= VM1(read-write) Step 3 - Mirrored Live Snapshot === A mirrored live snapshot is issued using src/hd0snap1 and dst/hd0snap1 as image files (both having src/hd0base as backing file). [src/hd0base] <- [src/hd0snap1] <= VM1(read-write) ^-- [dst/hd0snap1] <= VM1(read-write) Step 4 - Backing File Copy == An external manager copies src/hd0base to the destination (dst/hd0base). [src/hd0base] <- [src/hd0snap1] <= VM1(read-write) [dst/hd0base]^-- [dst/hd0snap1] <= VM1(read-write) Step 5 - Final Switch to Destination VM1 is now able to switch to the destination for both read and write operations fixing the backing file path in dst/hd0snap1. [src/hd0base] <- [src/hd0snap1] [dst/hd0base] <- [dst/hd0snap1] <= VM1(read-write) -- Federico
Re: [Qemu-devel] Live Block Migration using Mirroring
- Original Message - > From: "Paolo Bonzini" > To: "Federico Simoncelli" > Cc: "Stefan Hajnoczi" , qemu-devel@nongnu.org, > kw...@redhat.com, mtosa...@redhat.com > Sent: Tuesday, February 28, 2012 6:36:57 PM > Subject: Re: [Qemu-devel] Live Block Migration using Mirroring > > Il 28/02/2012 18:15, Federico Simoncelli ha scritto: > > Thank you for getting this. Being able to have a bogus backing file > > was > > a bonus but it's not really required for the mirrored live block > > migration. > > We can add the support for switching the backing file in the > > drive-reopen > > part. > > Wait, it's not really required for oVirt because it creates the > snapshot > outside QEMU. What about everyone else? They'll have (as oVirt) two mirrored snapshot pointing at the same base. The only difference is that the image is created internally, but that's not hard. -- Federico
Re: [Qemu-devel] Live Block Migration using Mirroring
- Original Message - > From: "Stefan Hajnoczi" > To: "Federico Simoncelli" > Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com > Sent: Tuesday, February 28, 2012 4:47:48 PM > Subject: Re: [Qemu-devel] Live Block Migration using Mirroring > > On Wed, Feb 22, 2012 at 5:13 PM, Federico Simoncelli > wrote: > > Step 3 - Mirrored Live Snapshot > > === > > A mirrored live snapshot is issued using src/hd0snap1 and > > dst/hd0snap1 as > > image files. (Where "<-" stands for "has backing file") > > > > [src/hd0base] <- [src/hd0snap1] <= VM1(read-write) > > ... <- [dst/hd0snap1] <= VM1(write-only) > > > > $ qemu-img create -f qcow2 \ > > -b /tmp/src/hd0base.qcow2 /tmp/src/hd0snap1.qcow2 20G > > Formatting '/tmp/src/hd0snap1.qcow2', fmt=qcow2 size=21474836480 > > backing_file='/tmp/src/hd0base.qcow2' encryption=off > > cluster_size=65536 > > > > $ qemu-img create -f qcow2 \ > > -b /tmp/dst/hd0base.qcow2 /tmp/dst/hd0snap1.qcow2 20G > > Formatting '/tmp/dst/hd0snap1.qcow2', fmt=qcow2 size=21474836480 > > backing_file='/tmp/src/hd0base.qcow2' encryption=off > > cluster_size=65536 > > > > (qemu) snapshot_blkdev -n ide0-hd0 \ > > blkmirror:/tmp/src/hd0snap1.qcow2:/tmp/dst/hd0snap1.qcow2 > > blkmirror > > > > Step 4 - Backing File Copy > > == > > An external manager copies src/hd0base to the destination > > dst/hd0base. > > > > [src/hd0base] <- [src/hd0snap1] <= VM1(read-write) > > [dst/hd0base] <- [dst/hd0snap1] <= VM1(write-only) > > At this stage we have dst/hd0snap1 opened with BDRV_O_NO_BACKING. If > it has no backing file and the guest issues a write request that is > smaller than a cluster in the image file, the untouched areas of that > cluster will be populated with zeroes. > > Once dst/hd0snap1 is reopened with dst/hd0base in place there will be > zeros in clusters where the guest wrote only a few sectors. We will > not "see" the backing file data in those clusters. > > Have you hit this problem or did I miss something? Thank you for getting this. Being able to have a bogus backing file was a bonus but it's not really required for the mirrored live block migration. We can add the support for switching the backing file in the drive-reopen part. I'll remove the BDRV_O_NO_BACKING flag from the blkmirror patch. -- Federico
Re: [Qemu-devel] drive transactions (was Re: [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate commands)
- Original Message - > From: "Anthony Liguori" > To: "Federico Simoncelli" > Cc: "Paolo Bonzini" , kw...@redhat.com, > arm...@redhat.com, "Jeff Cody" , > mtosa...@redhat.com, qemu-devel@nongnu.org, "Luiz Capitulino" > > Sent: Monday, February 27, 2012 5:42:31 PM > Subject: Re: [Qemu-devel] drive transactions (was Re: [PATCH 2/2 v2] Add the > blockdev-reopen and blockdev-migrate > commands) > > On 02/27/2012 10:33 AM, Federico Simoncelli wrote: > > I'm all for the modularity of the commands (I suggested it since > > the beginning), > > but all this infrastructure goes way beyond what I'd need for oVirt > > now. > > > > When I submitted my patches we knew that my work wasn't the > > definitive solution > > (eg: the future implementation of -blockdev). So I'd suggest to try > > and settle > > with something that is good enough and that is not preventing a > > future improvement. > > > > What about having a (temporary) flag in drive-mirror to accept also > > a new-image-file > > until we will have the optimal solution? > > Unless there are extenuating circumstances (like the absence of core > infrastructure in QEMU), then we should not add commands that we know > are not > the right command. So are you in favor or against my suggestion? It looks like this is exactly the case where the core infrastructure (transactions) is missing. -- Federico
Re: [Qemu-devel] drive transactions (was Re: [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate commands)
- Original Message - > From: "Paolo Bonzini" > To: "Luiz Capitulino" > Cc: "Federico Simoncelli" , kw...@redhat.com, > mtosa...@redhat.com, qemu-devel@nongnu.org, > arm...@redhat.com, "Jeff Cody" > Sent: Monday, February 27, 2012 3:39:33 PM > Subject: drive transactions (was Re: [PATCH 2/2 v2] Add the blockdev-reopen > and blockdev-migrate commands) > > > > 4) incremental=true, new_image_file passed: > > >- no images will be created > > >- writes will be mirrored to new_image_file and dest > > No need to provide this from within QEMU, because libvirt/oVirt can > do > the dance using elementary operations: > > blockdev-begin-transaction > drive-reopen device new-image-file > drive-mirror streaming=false device dest > blockdev-commit-transaction > > No strange optional arguments, no proliferation of commands, etc. > The > only downside is that if someone tries to do (4) without transactions > (or without stopping the VM) they'll get corruption because atomicity > is > required. I'm all for the modularity of the commands (I suggested it since the beginning), but all this infrastructure goes way beyond what I'd need for oVirt now. When I submitted my patches we knew that my work wasn't the definitive solution (eg: the future implementation of -blockdev). So I'd suggest to try and settle with something that is good enough and that is not preventing a future improvement. What about having a (temporary) flag in drive-mirror to accept also a new-image-file until we will have the optimal solution? -- Federico
Re: [Qemu-devel] [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate commands
- Original Message - > From: "Luiz Capitulino" > To: "Federico Simoncelli" > Cc: qemu-devel@nongnu.org, mtosa...@redhat.com, pbonz...@redhat.com, > kw...@redhat.com, arm...@redhat.com > Sent: Friday, February 24, 2012 8:01:43 PM > Subject: Re: [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate > commands > > On Fri, 24 Feb 2012 16:49:04 + > Federico Simoncelli wrote: > > > Signed-off-by: Federico Simoncelli > > Btw, would be nice to have a 0/2 intro email describing the feature > and changelog > info. Yes the 0/2 (actually 0/3) was sent at the beginning of the thread so you might have missed it because you were added later on but I'm sure you can still find it in the archives. > > > +BlockDriver *drv; > > +int i, j, escape; > > +char new_filename[2048], *filename; > > I'd use PATH_MAX for new_filename's size. Maybe PATH_MAX * 2 + 1? (To handle the case where all the characters should be escaped). > > +escape = 0; > > +for (i = 0, j = 0; j < strlen(new_image_file); j++) { > > + loop: > > +if (!(i < sizeof(new_filename) - 2)) { > > +error_set(errp, QERR_INVALID_PARAMETER_VALUE, > > + "new-image-file", "shorter filename"); > > +return; > > +} > > + > > +if (new_image_file[j] == ':' || new_image_file[j] == '\\') > > { > > +if (!escape) { > > +new_filename[i++] = '\\', escape = 1; > > +goto loop; > > +} else { > > +escape = 0; > > +} > > +} > > + > > +new_filename[i++] = new_image_file[j]; > > +} > > IMO, you could require the filename arguments to be escaped already. Can you be more explicit, who should escape it? The only thing that comes to my mind right now is requiring the input of blockdev-migrate already escaped but that would expose an internal format. (I'd not recommend it). > > +void qmp_blockdev_migrate(const char *device, bool incremental, > > + const char *destination, bool > > has_new_image_file, > > + const char *new_image_file, Error > > **errp) > > +{ > > +BlockDriverState *bs; > > + > > +bs = bdrv_find(device); > > +if (!bs) { > > +error_set(errp, QERR_DEVICE_NOT_FOUND, device); > > +return; > > +} > > +if (bdrv_in_use(bs)) { > > +error_set(errp, QERR_DEVICE_IN_USE, device); > > +return; > > +} > > + > > +if (incremental) { > > +if (!has_new_image_file) { > > +error_set(errp, QERR_INVALID_PARAMETER_VALUE, > > + "incremental", "a new image file"); > > +} else { > > +qmp_blockdev_mirror(device, destination, > > new_image_file, errp); > > +} > > +} else { > > +error_set(errp, QERR_NOT_SUPPORTED); > > +} > > The command documentation says that 'incremental' and > 'new_image_file' are > optionals, but the code makes them required. Why? If I didn't make any mistake in the code I'm just enforcing that when you specify "incremental" you also need a new image. There are still other valid cases where they are optional. -- Federico
Re: [Qemu-devel] [PATCH 1/2 v2] Add blkmirror block driver
- Original Message - > From: "Luiz Capitulino" > To: "Federico Simoncelli" > Cc: qemu-devel@nongnu.org, mtosa...@redhat.com, pbonz...@redhat.com, > kw...@redhat.com, arm...@redhat.com > Sent: Friday, February 24, 2012 7:17:22 PM > Subject: Re: [PATCH 1/2 v2] Add blkmirror block driver > > On Fri, 24 Feb 2012 16:49:03 + > Federico Simoncelli wrote: > > > +/* Parse the raw image filename */ > > +filename2 = g_malloc(strlen(filename)+1); > > +escape = 0; > > +for (i = n = 0; i < strlen(filename); i++) { > > +if (!escape && filename[i] == ':') { > > +break; > > +} > > +if (!escape && filename[i] == '\\') { > > +escape = 1; > > +} else { > > +escape = 0; > > +} > > + > > +if (!escape) { > > +filename2[n++] = filename[i]; > > +} > > +} > > +filename2[n] = '\0'; > > You're escaping only the first image name string, is that > intentional? Yes, it was also documented here by Marcelo: > > diff --git a/docs/blkmirror.txt b/docs/blkmirror.txt > > new file mode 100644 > > index 000..cf73f3f > > --- /dev/null > > +++ b/docs/blkmirror.txt > > @@ -0,0 +1,16 @@ > > +Block mirror driver > > +--- > > + > > +This driver will mirror writes to two distinct images. > > +It's used internally by live block copy. > > + > > +Format > > +-- > > + > > +blkmirror:/image1.img:/image2.img > > + > > +'\' (backslash) can be used to escape colon processing > > +as a separator, in the first image filename. > > +Backslashes themselves also can be escaped as '\\'. > > + > > + Anyway this format is encapsulated by blockdev-migrate. -- Federico
Re: [Qemu-devel] [PATCH 1/2 v2] Add blkmirror block driver
- Original Message - > From: "Eric Blake" > To: "Federico Simoncelli" > Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com, > arm...@redhat.com, lcapitul...@redhat.com, > pbonz...@redhat.com > Sent: Friday, February 24, 2012 6:02:36 PM > Subject: Re: [Qemu-devel] [PATCH 1/2 v2] Add blkmirror block driver > > +++ b/docs/blkmirror.txt > > @@ -0,0 +1,16 @@ > > +Block mirror driver > > +--- > > + > > +This driver will mirror writes to two distinct images. > > +It's used internally by live block copy. > > + > > +Format > > +-- > > + > > +blkmirror:/image1.img:/image2.img > > + > > +'\' (backslash) can be used to escape colon processing > > +as a separator, in the first image filename. > > +Backslashes themselves also can be escaped as '\\'. > > Is the escaping of : and \ only necessary for image1.img, or for both > image1 and image2? I need to know if the parser is consistent for > both > arguments, but this wording makes it sound like it is only for the > first > argument. Yes it is only for the first argument. -- Federico
[Qemu-devel] [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate commands
Signed-off-by: Federico Simoncelli --- blockdev.c | 107 -- hmp-commands.hx | 38 +++ hmp.c| 24 hmp.h|2 + qapi-schema.json | 54 +++ 5 files changed, 213 insertions(+), 12 deletions(-) diff --git a/blockdev.c b/blockdev.c index 2c132a3..f51bd6f 100644 --- a/blockdev.c +++ b/blockdev.c @@ -646,9 +646,8 @@ void do_commit(Monitor *mon, const QDict *qdict) } } -void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, -bool has_format, const char *format, -Error **errp) +static void change_blockdev_image(const char *device, const char *new_image_file, + const char *format, bool create, Error **errp) { BlockDriverState *bs; BlockDriver *drv, *old_drv, *proto_drv; @@ -671,7 +670,7 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, old_drv = bs->drv; flags = bs->open_flags; -if (!has_format) { +if (!format) { format = "qcow2"; } @@ -681,24 +680,26 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, return; } -proto_drv = bdrv_find_protocol(snapshot_file); +proto_drv = bdrv_find_protocol(new_image_file); if (!proto_drv) { error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); return; } -ret = bdrv_img_create(snapshot_file, format, bs->filename, - bs->drv->format_name, NULL, -1, flags); -if (ret) { -error_set(errp, QERR_UNDEFINED_ERROR); -return; +if (create) { +ret = bdrv_img_create(new_image_file, format, bs->filename, + bs->drv->format_name, NULL, -1, flags); +if (ret) { +error_set(errp, QERR_UNDEFINED_ERROR); +return; +} } bdrv_drain_all(); bdrv_flush(bs); bdrv_close(bs); -ret = bdrv_open(bs, snapshot_file, flags, drv); +ret = bdrv_open(bs, new_image_file, flags, drv); /* * If reopening the image file we just created fails, fall back * and try to re-open the original image. If that fails too, we @@ -709,11 +710,93 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, if (ret != 0) { error_set(errp, QERR_OPEN_FILE_FAILED, old_filename); } else { -error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file); +error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); } } } +void qmp_drive_reopen(const char *device, const char *new_image_file, + bool has_format, const char *format, Error **errp) +{ +change_blockdev_image(device, new_image_file, + has_format ? format : NULL, false, errp); +} + +void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, +bool has_format, const char *format, +Error **errp) +{ +change_blockdev_image(device, snapshot_file, + has_format ? format : NULL, true, errp); +} + +static void qmp_blockdev_mirror(const char *device, const char *destination, +const char *new_image_file, Error **errp) +{ +BlockDriver *drv; +int i, j, escape; +char new_filename[2048], *filename; + +drv = bdrv_find_format("blkmirror"); +if (!drv) { +error_set(errp, QERR_INVALID_BLOCK_FORMAT, "blkmirror"); +return; +} + +escape = 0; +for (i = 0, j = 0; j < strlen(new_image_file); j++) { + loop: +if (!(i < sizeof(new_filename) - 2)) { +error_set(errp, QERR_INVALID_PARAMETER_VALUE, + "new-image-file", "shorter filename"); +return; +} + +if (new_image_file[j] == ':' || new_image_file[j] == '\\') { +if (!escape) { +new_filename[i++] = '\\', escape = 1; +goto loop; +} else { +escape = 0; +} +} + +new_filename[i++] = new_image_file[j]; +} + +filename = g_strdup_printf("blkmirror:%s:%s", new_filename, destination); +change_blockdev_image(device, filename, "blkmirror", false, errp); +g_free(filename); +} + +void qmp_blockdev_migrate(const char *device, bool incremental, + const char *destination, bool has_new_image_file, + const char *new_image_file, Error **errp) +{ +BlockDriverState *bs; + +bs = bdrv_find(device); +if (!bs) { +error_set(errp, QERR_DEVICE_NOT_FOUND, device); +return; +
[Qemu-devel] [PATCH 1/2 v2] Add blkmirror block driver
From: Marcelo Tosatti Mirrored writes are used by live block copy. Signed-off-by: Marcelo Tosatti Signed-off-by: Federico Simoncelli --- Makefile.objs |2 +- block/blkmirror.c | 247 docs/blkmirror.txt | 16 3 files changed, 264 insertions(+), 1 deletions(-) create mode 100644 block/blkmirror.c create mode 100644 docs/blkmirror.txt diff --git a/Makefile.objs b/Makefile.objs index 67ee3df..6020308 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vv block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-nested-y += qed-check.o -block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o +block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o blkmirror.o block-nested-y += stream.o block-nested-$(CONFIG_WIN32) += raw-win32.o block-nested-$(CONFIG_POSIX) += raw-posix.o diff --git a/block/blkmirror.c b/block/blkmirror.c new file mode 100644 index 000..49e3381 --- /dev/null +++ b/block/blkmirror.c @@ -0,0 +1,247 @@ +/* + * Block driver for mirrored writes. + * + * Copyright (C) 2011 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include +#include "block_int.h" + +typedef struct { +BlockDriverState *bs[2]; +} BdrvMirrorState; + +typedef struct DupAIOCB DupAIOCB; + +typedef struct SingleAIOCB { +BlockDriverAIOCB *aiocb; +int finished; +DupAIOCB *parent; +} SingleAIOCB; + +struct DupAIOCB { +BlockDriverAIOCB common; +int count; + +BlockDriverCompletionFunc *cb; +SingleAIOCB aios[2]; +int ret; +}; + +/* Valid blkmirror filenames look like + * blkmirror:path/to/image1:path/to/image2 */ +static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) +{ +BdrvMirrorState *m = bs->opaque; +int ret, escape, i, n; +char *filename2; + +/* Parse the blkmirror: prefix */ +if (strncmp(filename, "blkmirror:", strlen("blkmirror:"))) { +return -EINVAL; +} +filename += strlen("blkmirror:"); + +/* Parse the raw image filename */ +filename2 = g_malloc(strlen(filename)+1); +escape = 0; +for (i = n = 0; i < strlen(filename); i++) { +if (!escape && filename[i] == ':') { +break; +} +if (!escape && filename[i] == '\\') { +escape = 1; +} else { +escape = 0; +} + +if (!escape) { +filename2[n++] = filename[i]; +} +} +filename2[n] = '\0'; + +m->bs[0] = bdrv_new(""); +if (m->bs[0] == NULL) { +g_free(filename2); +return -ENOMEM; +} +ret = bdrv_open(m->bs[0], filename2, flags, NULL); +g_free(filename2); +if (ret < 0) { +return ret; +} +filename += i + 1; + +m->bs[1] = bdrv_new(""); +if (m->bs[1] == NULL) { +bdrv_delete(m->bs[0]); +return -ENOMEM; +} +ret = bdrv_open(m->bs[1], filename, flags | BDRV_O_NO_BACKING, NULL); +if (ret < 0) { +bdrv_delete(m->bs[0]); +return ret; +} + +return 0; +} + +static void blkmirror_close(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs->opaque; +int i; + +for (i = 0; i < 2; i++) { +bdrv_delete(m->bs[i]); +m->bs[i] = NULL; +} +} + +static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs->opaque; +int ret; + +ret = bdrv_co_flush(m->bs[0]); +if (ret < 0) { +return ret; +} + +return bdrv_co_flush(m->bs[1]); +} + +static int64_t blkmirror_getlength(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs->opaque; + +return bdrv_getlength(m->bs[0]); +} + +static BlockDriverAIOCB *blkmirror_aio_readv(BlockDriverState *bs, + int64_t sector_num, + QEMUIOVector *qiov, + int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque) +{ +BdrvMirrorState *m = bs->opaque; +return bdrv_aio_readv(m->bs[0], sector_num, qiov, nb_sectors, cb, opaque); +} + +static void dup_aio_cancel(BlockDriverAIOCB *blockacb) +{ +DupAIOCB *acb = container_of(blockacb, DupAIOCB, common); +int i; + +for (i = 0 ; i < 2; i++) { +if (!acb->aios[i].finished) { +bdrv_aio_cancel(acb->aios[i].aiocb); +} +} +qemu_aio_release(ac
Re: [Qemu-devel] [PATCH 2/2] Add the blockdev-reopen and blockdev-migrate commands
- Original Message - > From: "Kevin Wolf" > To: "Federico Simoncelli" > Cc: qemu-devel@nongnu.org, "Marcelo Tosatti" , > lcapitul...@redhat.com, "Paolo Bonzini" > , "Markus Armbruster" > Sent: Friday, February 24, 2012 1:03:08 PM > Subject: Re: [PATCH 2/2] Add the blockdev-reopen and blockdev-migrate commands > > Am 24.02.2012 12:37, schrieb Federico Simoncelli: > > Signed-off-by: Federico Simoncelli > > --- > > block/blkmirror.c |2 +- > > blockdev.c| 109 > > +++-- > > hmp-commands.hx | 36 + > > hmp.c | 30 ++ > > hmp.h |2 + > > qapi-schema.json | 63 ++ > > 6 files changed, 229 insertions(+), 13 deletions(-) > > > > diff --git a/block/blkmirror.c b/block/blkmirror.c > > index 39927c8..49e3381 100644 > > --- a/block/blkmirror.c > > +++ b/block/blkmirror.c > > @@ -81,7 +81,7 @@ static int blkmirror_open(BlockDriverState *bs, > > const char *filename, int flags) > > bdrv_delete(m->bs[0]); > > return -ENOMEM; > > } > > -ret = bdrv_open(m->bs[1], filename, flags, NULL); > > +ret = bdrv_open(m->bs[1], filename, flags | BDRV_O_NO_BACKING, > > NULL); > > if (ret < 0) { > > bdrv_delete(m->bs[0]); > > return ret; > > Was this hunk meant to be in patch 1? Not necessarily, I thought a lot about it. I didn't want to modify Marcelo's patch too much. This flag is actually mandatory only to make blockdev-migrate work with a destination that doesn't have a backing file yet, so I thought it was more appropriate to squash it here. -- Federico
[Qemu-devel] [PATCH 2/2] Add the blockdev-reopen and blockdev-migrate commands
Signed-off-by: Federico Simoncelli --- block/blkmirror.c |2 +- blockdev.c| 109 +++-- hmp-commands.hx | 36 + hmp.c | 30 ++ hmp.h |2 + qapi-schema.json | 63 ++ 6 files changed, 229 insertions(+), 13 deletions(-) diff --git a/block/blkmirror.c b/block/blkmirror.c index 39927c8..49e3381 100644 --- a/block/blkmirror.c +++ b/block/blkmirror.c @@ -81,7 +81,7 @@ static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) bdrv_delete(m->bs[0]); return -ENOMEM; } -ret = bdrv_open(m->bs[1], filename, flags, NULL); +ret = bdrv_open(m->bs[1], filename, flags | BDRV_O_NO_BACKING, NULL); if (ret < 0) { bdrv_delete(m->bs[0]); return ret; diff --git a/blockdev.c b/blockdev.c index 2c132a3..1df2542 100644 --- a/blockdev.c +++ b/blockdev.c @@ -646,9 +646,8 @@ void do_commit(Monitor *mon, const QDict *qdict) } } -void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, -bool has_format, const char *format, -Error **errp) +static void change_blockdev_image(const char *device, const char *new_image_file, + const char *format, bool create, Error **errp) { BlockDriverState *bs; BlockDriver *drv, *old_drv, *proto_drv; @@ -671,7 +670,7 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, old_drv = bs->drv; flags = bs->open_flags; -if (!has_format) { +if (!format) { format = "qcow2"; } @@ -681,24 +680,26 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, return; } -proto_drv = bdrv_find_protocol(snapshot_file); +proto_drv = bdrv_find_protocol(new_image_file); if (!proto_drv) { error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); return; } -ret = bdrv_img_create(snapshot_file, format, bs->filename, - bs->drv->format_name, NULL, -1, flags); -if (ret) { -error_set(errp, QERR_UNDEFINED_ERROR); -return; +if (create) { +ret = bdrv_img_create(new_image_file, format, bs->filename, + bs->drv->format_name, NULL, -1, flags); +if (ret) { +error_set(errp, QERR_UNDEFINED_ERROR); +return; +} } bdrv_drain_all(); bdrv_flush(bs); bdrv_close(bs); -ret = bdrv_open(bs, snapshot_file, flags, drv); +ret = bdrv_open(bs, new_image_file, flags, drv); /* * If reopening the image file we just created fails, fall back * and try to re-open the original image. If that fails too, we @@ -709,11 +710,95 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, if (ret != 0) { error_set(errp, QERR_OPEN_FILE_FAILED, old_filename); } else { -error_set(errp, QERR_OPEN_FILE_FAILED, snapshot_file); +error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file); } } } +void qmp_blockdev_reopen(const char *device, const char *new_image_file, + bool has_format, const char *format, Error **errp) +{ +change_blockdev_image(device, new_image_file, + has_format ? format : NULL, false, errp); +} + +void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, +bool has_format, const char *format, +Error **errp) +{ +change_blockdev_image(device, snapshot_file, + has_format ? format : NULL, true, errp); +} + +void qmp_blockdev_migrate(const char *device, BlockMigrateOp mode, + const char *destination, bool has_new_image_file, + const char *new_image_file, Error **errp) +{ +BlockDriverState *bs; +BlockDriver *drv; +int i, j, escape; +char filename[2048]; + +bs = bdrv_find(device); +if (!bs) { +error_set(errp, QERR_DEVICE_NOT_FOUND, device); +return; +} +if (bdrv_in_use(bs)) { +error_set(errp, QERR_DEVICE_IN_USE, device); +return; +} + +if (mode == BLOCK_MIGRATE_OP_MIRROR) { +drv = bdrv_find_format("blkmirror"); +if (!drv) { +error_set(errp, QERR_INVALID_BLOCK_FORMAT, "blkmirror"); +return; +} + +if (!has_new_image_file) { +new_image_file = bs->filename; +} + +pstrcpy(filename, sizeof(filename), "blkmirror:"); +i = strlen("blkmirror:"); + +escape = 0; +for (j = 0; j < strlen(new_image_file); j++) { + loop: +i
[Qemu-devel] [PATCH 1/2] Add blkmirror block driver
From: Marcelo Tosatti Mirrored writes are used by live block copy. Signed-off-by: Marcelo Tosatti Signed-off-by: Federico Simoncelli --- Makefile.objs |2 +- block/blkmirror.c | 247 docs/blkmirror.txt | 16 3 files changed, 264 insertions(+), 1 deletions(-) create mode 100644 block/blkmirror.c create mode 100644 docs/blkmirror.txt diff --git a/Makefile.objs b/Makefile.objs index 67ee3df..6020308 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vv block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-nested-y += qed-check.o -block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o +block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o blkmirror.o block-nested-y += stream.o block-nested-$(CONFIG_WIN32) += raw-win32.o block-nested-$(CONFIG_POSIX) += raw-posix.o diff --git a/block/blkmirror.c b/block/blkmirror.c new file mode 100644 index 000..39927c8 --- /dev/null +++ b/block/blkmirror.c @@ -0,0 +1,247 @@ +/* + * Block driver for mirrored writes. + * + * Copyright (C) 2011 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include +#include "block_int.h" + +typedef struct { +BlockDriverState *bs[2]; +} BdrvMirrorState; + +typedef struct DupAIOCB DupAIOCB; + +typedef struct SingleAIOCB { +BlockDriverAIOCB *aiocb; +int finished; +DupAIOCB *parent; +} SingleAIOCB; + +struct DupAIOCB { +BlockDriverAIOCB common; +int count; + +BlockDriverCompletionFunc *cb; +SingleAIOCB aios[2]; +int ret; +}; + +/* Valid blkmirror filenames look like + * blkmirror:path/to/image1:path/to/image2 */ +static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) +{ +BdrvMirrorState *m = bs->opaque; +int ret, escape, i, n; +char *filename2; + +/* Parse the blkmirror: prefix */ +if (strncmp(filename, "blkmirror:", strlen("blkmirror:"))) { +return -EINVAL; +} +filename += strlen("blkmirror:"); + +/* Parse the raw image filename */ +filename2 = g_malloc(strlen(filename)+1); +escape = 0; +for (i = n = 0; i < strlen(filename); i++) { +if (!escape && filename[i] == ':') { +break; +} +if (!escape && filename[i] == '\\') { +escape = 1; +} else { +escape = 0; +} + +if (!escape) { +filename2[n++] = filename[i]; +} +} +filename2[n] = '\0'; + +m->bs[0] = bdrv_new(""); +if (m->bs[0] == NULL) { +g_free(filename2); +return -ENOMEM; +} +ret = bdrv_open(m->bs[0], filename2, flags, NULL); +g_free(filename2); +if (ret < 0) { +return ret; +} +filename += i + 1; + +m->bs[1] = bdrv_new(""); +if (m->bs[1] == NULL) { +bdrv_delete(m->bs[0]); +return -ENOMEM; +} +ret = bdrv_open(m->bs[1], filename, flags, NULL); +if (ret < 0) { +bdrv_delete(m->bs[0]); +return ret; +} + +return 0; +} + +static void blkmirror_close(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs->opaque; +int i; + +for (i = 0; i < 2; i++) { +bdrv_delete(m->bs[i]); +m->bs[i] = NULL; +} +} + +static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs->opaque; +int ret; + +ret = bdrv_co_flush(m->bs[0]); +if (ret < 0) { +return ret; +} + +return bdrv_co_flush(m->bs[1]); +} + +static int64_t blkmirror_getlength(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs->opaque; + +return bdrv_getlength(m->bs[0]); +} + +static BlockDriverAIOCB *blkmirror_aio_readv(BlockDriverState *bs, + int64_t sector_num, + QEMUIOVector *qiov, + int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque) +{ +BdrvMirrorState *m = bs->opaque; +return bdrv_aio_readv(m->bs[0], sector_num, qiov, nb_sectors, cb, opaque); +} + +static void dup_aio_cancel(BlockDriverAIOCB *blockacb) +{ +DupAIOCB *acb = container_of(blockacb, DupAIOCB, common); +int i; + +for (i = 0 ; i < 2; i++) { +if (!acb->aios[i].finished) { +bdrv_aio_cancel(acb->aios[i].aiocb); +} +} +qemu_aio_release(acb); +} + +static AIOPool d
Re: [Qemu-devel] Live Block Migration using Mirroring
- Original Message - > From: "Stefan Hajnoczi" > To: "Federico Simoncelli" > Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com > Sent: Thursday, February 23, 2012 5:35:23 PM > Subject: Re: [Qemu-devel] Live Block Migration using Mirroring > > On Wed, Feb 22, 2012 at 5:13 PM, Federico Simoncelli > wrote: > > recently I've been working on live block migration combining the > > live > > snapshots and the blkmirror patch sent by Marcelo Tosatti few > > months ago. > > > > The design is summarized at this url as "Mirrored-Snapshot": > > > > http://www.ovirt.org/wiki/Features/Design/StorageLiveMigration > > After mirrored-snapshot completes we're left with the base and the > snapshot. Is the idea to implement live snapshot merge next? Or do > you have something else planned to avoid growing the backing file > chain each time mirrored-snapshot is used? The general idea is that I don't expect the need to migrate to a new storage to be very frequent. Being able to live merge the new snapshot would be great. -- Federico
Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver
- Original Message - > From: "Stefan Hajnoczi" > To: "Federico Simoncelli" > Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com > Sent: Thursday, February 23, 2012 5:28:23 PM > Subject: Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver > > On Thu, Feb 23, 2012 at 4:20 PM, Federico Simoncelli > wrote: > > - Original Message - > >> From: "Stefan Hajnoczi" > >> To: "Federico Simoncelli" > >> Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com > >> Sent: Thursday, February 23, 2012 5:18:41 PM > >> Subject: Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver > >> > >> On Thu, Feb 23, 2012 at 4:14 PM, Stefan Hajnoczi > >> > >> wrote: > >> > By the way, this code does not build against qemu.git/master. > >> > It > >> > uses > >> > interfaces which have been dropped. > >> > >> Ah, I see you changed that in Patch 2/3. Please don't do that, > >> it's > >> hard to review and breaks git-bisect. > > > > Squashing is easy (anyone could do it at the commit phase), > > reviewing > > is hard (if you hide your changes in someone else's code). > > I don't care if you or Marcelo wrote it, I want to see a coherent > piece of code. > > The way to do it is to merge the g_malloc() and BlockDriver interface > changes into this path. Then you have not snuck any significant > changes into Marcelo's code. Well for example I'd have snuck the qemu_vmalloc/qemu_vfree mistake. > Keep the BDRV_O_NO_BACKING separate. Ok, thanks. -- Federico
Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver
- Original Message - > From: "Stefan Hajnoczi" > To: "Federico Simoncelli" > Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com > Sent: Thursday, February 23, 2012 5:18:41 PM > Subject: Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver > > On Thu, Feb 23, 2012 at 4:14 PM, Stefan Hajnoczi > wrote: > > By the way, this code does not build against qemu.git/master. It > > uses > > interfaces which have been dropped. > > Ah, I see you changed that in Patch 2/3. Please don't do that, it's > hard to review and breaks git-bisect. Squashing is easy (anyone could do it at the commit phase), reviewing is hard (if you hide your changes in someone else's code). -- Federico
Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver
- Original Message - > From: "Stefan Hajnoczi" > To: "Federico Simoncelli" > Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com > Sent: Thursday, February 23, 2012 5:14:09 PM > Subject: Re: [Qemu-devel] [PATCH 1/3] Add blkmirror block driver > > On Wed, Feb 22, 2012 at 5:13 PM, Federico Simoncelli > wrote: > > From: Marcelo Tosatti > > > > Mirrored writes are used by live block copy. > > I think the right approach is to create a single blkmirror driver > that > also includes blkverify functionality. The code is basically the > same > except blkverify also compares reads - just use a flag to > enable/disable that behavior. > > Feel free to rename the blkverify driver to blkmirror if you wish. > > By the way, this code does not build against qemu.git/master. It > uses > interfaces which have been dropped. Yes you also need: [PATCH 2/3] Update the blkmirror block driver Which was sent separately to facilitate the review for people who already reviewed this. -- Federico
Re: [Qemu-devel] Live Block Migration using Mirroring
- Original Message - > From: "Stefan Hajnoczi" > To: "Federico Simoncelli" > Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com > Sent: Thursday, February 23, 2012 4:47:38 PM > Subject: Re: [Qemu-devel] Live Block Migration using Mirroring > > On Wed, Feb 22, 2012 at 5:13 PM, Federico Simoncelli > wrote: > > Step 3 - Mirrored Live Snapshot > > === > > A mirrored live snapshot is issued using src/hd0snap1 and > > dst/hd0snap1 as > > image files. (Where "<-" stands for "has backing file") > > > > [src/hd0base] <- [src/hd0snap1] <= VM1(read-write) > > ... <- [dst/hd0snap1] <= VM1(write-only) > > > > $ qemu-img create -f qcow2 \ > > -b /tmp/src/hd0base.qcow2 /tmp/src/hd0snap1.qcow2 20G > > Formatting '/tmp/src/hd0snap1.qcow2', fmt=qcow2 size=21474836480 > > backing_file='/tmp/src/hd0base.qcow2' encryption=off > > cluster_size=65536 > > > > $ qemu-img create -f qcow2 \ > > -b /tmp/dst/hd0base.qcow2 /tmp/dst/hd0snap1.qcow2 20G > > Formatting '/tmp/dst/hd0snap1.qcow2', fmt=qcow2 size=21474836480 > > backing_file='/tmp/src/hd0base.qcow2' encryption=off > > cluster_size=65536 > > At this stage /tmp/dst/hd0base.qcow2 does not exist yet. The > qemu-img > output you pasted shows /tmp/src/hd0base.qcow2 was actually used. > Typo? No that's part of the flag used in [PATCH 2/3] (Update the blkmirror block driver): BDRV_O_NO_BACKING It's also documented in the design: http://www.ovirt.org/wiki/File:StorageLiveMigration2.png > > (qemu) snapshot_blkdev -n ide0-hd0 \ > > blkmirror:/tmp/src/hd0snap1.qcow2:/tmp/dst/hd0snap1.qcow2 > > blkmirror > > > > Step 4 - Backing File Copy > > == > > An external manager copies src/hd0base to the destination > > dst/hd0base. > > > > [src/hd0base] <- [src/hd0snap1] <= VM1(read-write) > > [dst/hd0base] <- [dst/hd0snap1] <= VM1(write-only) > > > > $ cp -a /tmp/src/hd0base.qcow2 /tmp/dst/hd0base.qcow2 > > Are we missing a fixup step that changes backing_file in > dst/hd0snap1.qcow2 to point at dst/hd0base.qcow2? See above. -- Federico
Re: [Qemu-devel] [PATCH 3/3] Add nocreate option to snapshot_blkdev
- Original Message - > From: "Paolo Bonzini" > To: "Federico Simoncelli" > Cc: kw...@redhat.com, mtosa...@redhat.com, qemu-devel@nongnu.org > Sent: Thursday, February 23, 2012 10:48:59 AM > Subject: Re: [PATCH 3/3] Add nocreate option to snapshot_blkdev > > On 02/23/2012 10:39 AM, Federico Simoncelli wrote: > > I agree on the fact that using snapshot_blkdev is a misnomer but at > > the same time I like very much how blkmirror is modular and it can > > be used as a regular image filename. > > True, on the other hand if we add this we will have to keep it > forever. > > I'm worried that blkmirror does not satisfy _all_ mirroring needs, > for > example you cannot use block_stream to copy from the source to the > destination, so perhaps in the future we want to change it to > something > else. Are you talking about a mirroring where you block_stream the missing clusters in the destination from the source? I believe that it could be done without losing the blkmirror modularity probably extending the BlockDriver structure with some additional concepts. > So while I appreciate the easier debugging, I'm afraid that we'll > regret > adding a command-line-visible feature. > > > I'm worried that making it explicit with a "--mirror" option would > > take away its flexibility. > > What about adding block_reopen and extending the blkmirror string: > > > > (qemu) block_reopen ide-hd0 \ > > blkmirror:qcow2:/tmp/src/hd0snap1.qcow2:/tmp/dst/hd0snap1.qcow2 > > > > so that the format is passed as first argument (and if it's empty > > we > > would use the auto-detection). > > No, the format of the source and destination should be independent. > It > would be nice to have something like this: > > blkmirror:src=...,dst=...,srcformat=...,dstformat=... > > but commas are a pain because they need escaping. I like that, one limitation we need to keep in mind is that it should fit into the hard-coded filename limit of 1024 characters that (sadly) we have in multiple places. Another thing is that at this stage the mirroring is more an original/copy concept rather than source/destination. What about: blkmirror:...,copy=...,fmt=... (both images uses the same fmt) blkmirror:...,copy=...,fmt=...,copyfmt=... Or, eventually if you feel like that source/destination is more appropriate for the future, then: blkmirror:...,dst=...,fmt=...,dstfmt=... -- Federico
Re: [Qemu-devel] [PATCH 2/3] Update the blkmirror block driver
- Original Message - > From: "Paolo Bonzini" > To: "Federico Simoncelli" > Cc: qemu-devel@nongnu.org, kw...@redhat.com, mtosa...@redhat.com > Sent: Thursday, February 23, 2012 8:18:28 AM > Subject: Re: [PATCH 2/3] Update the blkmirror block driver > > On 02/22/2012 06:13 PM, Federico Simoncelli wrote: > > @@ -46,7 +46,7 @@ static int blkmirror_open(BlockDriverState *bs, > > const char *filename, int flags) > > filename += strlen("blkmirror:"); > > > > /* Parse the raw image filename */ > > -filename2 = qemu_malloc(strlen(filename)+1); > > +filename2 = qemu_vmalloc(strlen(filename)+1); > > escape = 0; > > for (i = n = 0; i < strlen(filename); i++) { > > if (!escape && filename[i] == ':') { > > @@ -66,11 +66,11 @@ static int blkmirror_open(BlockDriverState *bs, > > const char *filename, int flags) > > > > m->bs[0] = bdrv_new(""); > > if (m->bs[0] == NULL) { > > -free(filename2); > > +qemu_vfree(filename2); > > return -ENOMEM; > > } > > ret = bdrv_open(m->bs[0], filename2, flags, NULL); > > -free(filename2); > > +qemu_vfree(filename2); > > if (ret < 0) { > > return ret; > > } > > These should be g_malloc and g_free. Thanks! > Also, please squash this patch in part 1. Yes, I sent it as a separate patch to make it easier to review my changes (if someone already reviewed Marcelo's patch). Any comment on the BDRV_O_NO_BACKING flag I added? That's the real trick I'm pulling here. It basically allows to open the second image only for writing and it doesn't complain if the backing file is not there yet (it will be copied during step 4). -- Federico
Re: [Qemu-devel] [PATCH 3/3] Add nocreate option to snapshot_blkdev
- Original Message - > From: "Paolo Bonzini" > Cc: "Federico Simoncelli" , kw...@redhat.com, > mtosa...@redhat.com, qemu-devel@nongnu.org > Sent: Thursday, February 23, 2012 8:38:55 AM > Subject: Re: [PATCH 3/3] Add nocreate option to snapshot_blkdev > > On 02/23/2012 08:19 AM, Paolo Bonzini wrote: > >> > Signed-off-by: Federico Simoncelli > > What is the usecase, and how can this be tested? > > Oops, you explained it in part 0. > > > Step 5 - Final Switch to Destination > > > > VM1 is now able to switch to the destination for both read and > > write > > operations. > > > > [src/hd0base] <- [src/hd0snap1] <= VM1(read-write) > > > > (qemu) snapshot_blkdev -n ide0-hd0 /tmp/dst/hd0snap1.qcow2 > > It seems to me that reusing the snapshot_blkdev command is a bit of a > misnomer. It also forces you to use format autodetection for the > destination in the blkmirror phase. > > I see two possibilities: > > 1) you can make a new command block_reopen; this fixes only the > naming > problem. > > 2) you can hide the new format behind a new command to be used like > this: > >block_migrate --mirror ide-hd0 /tmp/dst/hd0snap1.qcow2 >... >block_migrate --switch ide-hd0 > > This leave the possibility to specify the format in the future: > >block_migrate --mirror ide-hd0 /tmp/dst/hd0snap1.qcow2 qcow2 > > And we could have another sub-command > >block_mirror --stream ide-hd0 /tmp/dst/hd0.raw > > to migrate block devices without shared storage and without an > intermediate snapshot. I agree on the fact that using snapshot_blkdev is a misnomer but at the same time I like very much how blkmirror is modular and it can be used as a regular image filename. I'm worried that making it explicit with a "--mirror" option would take away its flexibility. What about adding block_reopen and extending the blkmirror string: (qemu) block_reopen ide-hd0 \ blkmirror:qcow2:/tmp/src/hd0snap1.qcow2:/tmp/dst/hd0snap1.qcow2 so that the format is passed as first argument (and if it's empty we would use the auto-detection). -- Federico
[Qemu-devel] [PATCH 1/3] Add blkmirror block driver
From: Marcelo Tosatti Mirrored writes are used by live block copy. Signed-off-by: Marcelo Tosatti --- Makefile.objs |2 +- block/blkmirror.c | 282 docs/blkmirror.txt | 15 +++ 3 files changed, 298 insertions(+), 1 deletions(-) create mode 100644 block/blkmirror.c create mode 100644 docs/blkmirror.txt diff --git a/Makefile.objs b/Makefile.objs index 391e524..cd65e1b 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vv block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-nested-y += qed-check.o -block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o +block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o blkmirror.o block-nested-y += stream.o block-nested-$(CONFIG_WIN32) += raw-win32.o block-nested-$(CONFIG_POSIX) += raw-posix.o diff --git a/block/blkmirror.c b/block/blkmirror.c new file mode 100644 index 000..1c02710 --- /dev/null +++ b/block/blkmirror.c @@ -0,0 +1,282 @@ +/* + * Block driver for mirrored writes. + * + * Copyright (C) 2011 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include +#include "block_int.h" + +typedef struct { +BlockDriverState *bs[2]; +} BdrvMirrorState; + +typedef struct DupAIOCB DupAIOCB; + +typedef struct SingleAIOCB { +BlockDriverAIOCB *aiocb; +int finished; +DupAIOCB *parent; +} SingleAIOCB; + +struct DupAIOCB { +BlockDriverAIOCB common; +int count; + +BlockDriverCompletionFunc *cb; +SingleAIOCB aios[2]; +int ret; +}; + +/* Valid blkmirror filenames look like + * blkmirror:path/to/image1:path/to/image2 */ +static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) +{ +BdrvMirrorState *m = bs->opaque; +int ret, escape, i, n; +char *filename2; + +/* Parse the blkmirror: prefix */ +if (strncmp(filename, "blkmirror:", strlen("blkmirror:"))) { +return -EINVAL; +} +filename += strlen("blkmirror:"); + +/* Parse the raw image filename */ +filename2 = qemu_malloc(strlen(filename)+1); +escape = 0; +for (i = n = 0; i < strlen(filename); i++) { +if (!escape && filename[i] == ':') { +break; +} +if (!escape && filename[i] == '\\') { +escape = 1; +} else { +escape = 0; +} + +if (!escape) { +filename2[n++] = filename[i]; +} +} +filename2[n] = '\0'; + +m->bs[0] = bdrv_new(""); +if (m->bs[0] == NULL) { +free(filename2); +return -ENOMEM; +} +ret = bdrv_open(m->bs[0], filename2, flags, NULL); +free(filename2); +if (ret < 0) { +return ret; +} +filename += i + 1; + +m->bs[1] = bdrv_new(""); +if (m->bs[1] == NULL) { +bdrv_delete(m->bs[0]); +return -ENOMEM; +} +ret = bdrv_open(m->bs[1], filename, flags, NULL); +if (ret < 0) { +bdrv_delete(m->bs[0]); +return ret; +} + +return 0; +} + +static void blkmirror_close(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs->opaque; +int i; + +for (i = 0; i < 2; i++) { +bdrv_delete(m->bs[i]); +m->bs[i] = NULL; +} +} + +static int blkmirror_flush(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs->opaque; +int ret; + +ret = bdrv_flush(m->bs[0]); +if (ret < 0) { +return ret; +} + +return bdrv_flush(m->bs[1]); +} + +static int64_t blkmirror_getlength(BlockDriverState *bs) +{ +BdrvMirrorState *m = bs->opaque; + +return bdrv_getlength(m->bs[0]); +} + +static BlockDriverAIOCB *blkmirror_aio_readv(BlockDriverState *bs, + int64_t sector_num, + QEMUIOVector *qiov, + int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque) +{ +BdrvMirrorState *m = bs->opaque; +return bdrv_aio_readv(m->bs[0], sector_num, qiov, nb_sectors, cb, opaque); +} + +static void dup_aio_cancel(BlockDriverAIOCB *blockacb) +{ +DupAIOCB *acb = container_of(blockacb, DupAIOCB, common); +int i; + +for (i = 0 ; i < 2; i++) { +if (!acb->aios[i].finished) { +bdrv_aio_cancel(acb->aios[i].aiocb); +} +} +qemu_aio_release(acb); +} + +static AIOPool dup_aio_pool = { +.aiocb_size = sizeof(DupAIOCB), +.cancel = dup_aio_cancel, +}; + +static void blkmirror_aio_cb(void *opaque, int ret) +{ +SingleAIOCB *scb = opaque; +DupAIOCB *dcb = scb->parent; + +scb->finishe
[Qemu-devel] [PATCH 3/3] Add nocreate option to snapshot_blkdev
Signed-off-by: Federico Simoncelli --- blockdev.c | 14 -- hmp-commands.hx | 16 ++-- hmp.c|4 +++- qapi-schema.json |8 +++- qmp-commands.hx |2 +- 5 files changed, 29 insertions(+), 15 deletions(-) diff --git a/blockdev.c b/blockdev.c index 7a6613a..2c3e10a 100644 --- a/blockdev.c +++ b/blockdev.c @@ -647,7 +647,7 @@ void do_commit(Monitor *mon, const QDict *qdict) void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, bool has_format, const char *format, -Error **errp) +bool has_nocreate, bool nocreate, Error **errp) { BlockDriverState *bs; BlockDriver *drv, *old_drv, *proto_drv; @@ -686,11 +686,13 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, return; } -ret = bdrv_img_create(snapshot_file, format, bs->filename, - bs->drv->format_name, NULL, -1, flags); -if (ret) { -error_set(errp, QERR_UNDEFINED_ERROR); -return; +if (!(has_nocreate && nocreate)) { +ret = bdrv_img_create(snapshot_file, format, bs->filename, + bs->drv->format_name, NULL, -1, flags); +if (ret) { +error_set(errp, QERR_UNDEFINED_ERROR); +return; +} } bdrv_drain_all(); diff --git a/hmp-commands.hx b/hmp-commands.hx index 573b823..ff49412 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -868,14 +868,18 @@ ETEXI { .name = "snapshot_blkdev", -.args_type = "device:B,snapshot-file:s?,format:s?", -.params = "device [new-image-file] [format]", +.args_type = "nocreate:-n,device:B,snapshot-file:s?,format:s?", +.params = "[-n] device [new-image-file] [format]", .help = "initiates a live snapshot\n\t\t\t" "of device. If a new image file is specified, the\n\t\t\t" - "new image file will become the new root image.\n\t\t\t" - "If format is specified, the snapshot file will\n\t\t\t" - "be created in that format. Otherwise the\n\t\t\t" - "snapshot will be internal! (currently unsupported)", + "new image file will be created (unless -n is\n\t\t\t" + "specified) and will become the new root image.\n\t\t\t" + "The -n (nocreate) option is potentially dangerous\n\t\t\t" + "if the image wasn't prepared in the correct way\n\t\t\t" + "by an external manager. If format is specified,\n\t\t\t" + "the snapshot file will be created in that format.\n\t\t\t" + "Otherwise the snapshot will be internal!\n\t\t\t" + "(currently unsupported)", .mhandler.cmd = hmp_snapshot_blkdev, }, diff --git a/hmp.c b/hmp.c index 8ff8c94..44868c7 100644 --- a/hmp.c +++ b/hmp.c @@ -684,6 +684,7 @@ void hmp_block_resize(Monitor *mon, const QDict *qdict) void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict) { +int nocreate = qdict_get_try_bool(qdict, "nocreate", 0); const char *device = qdict_get_str(qdict, "device"); const char *filename = qdict_get_try_str(qdict, "snapshot-file"); const char *format = qdict_get_try_str(qdict, "format"); @@ -697,7 +698,8 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict) return; } -qmp_blockdev_snapshot_sync(device, filename, !!format, format, &errp); +qmp_blockdev_snapshot_sync(device, filename, + !!format, format, true, nocreate, &errp); hmp_handle_error(mon, &errp); } diff --git a/qapi-schema.json b/qapi-schema.json index d02ee86..ac652c2 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1116,9 +1116,14 @@ # @snapshot-file: the target of the new image. If the file exists, or if it # is a device, the snapshot will be created in the existing # file/device. If does not exist, a new file will be created. +# This behavior can be overridden using the nocreate option. # # @format: #optional the format of the snapshot image, default is 'qcow2'. # +# @nocreate: #optional use the target image avoiding the creation. This is +#a potentially dangerous option if the image wasn't prepared +#in the correct way by an external manager. +# # Returns: nothing on success # If @device is not a valid block device, DeviceNotFound
[Qemu-devel] [PATCH 2/3] Update the blkmirror block driver
Signed-off-by: Federico Simoncelli --- block/blkmirror.c | 26 +- 1 files changed, 13 insertions(+), 13 deletions(-) diff --git a/block/blkmirror.c b/block/blkmirror.c index 1c02710..1cfd2fb 100644 --- a/block/blkmirror.c +++ b/block/blkmirror.c @@ -46,7 +46,7 @@ static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) filename += strlen("blkmirror:"); /* Parse the raw image filename */ -filename2 = qemu_malloc(strlen(filename)+1); +filename2 = qemu_vmalloc(strlen(filename)+1); escape = 0; for (i = n = 0; i < strlen(filename); i++) { if (!escape && filename[i] == ':') { @@ -66,11 +66,11 @@ static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) m->bs[0] = bdrv_new(""); if (m->bs[0] == NULL) { -free(filename2); +qemu_vfree(filename2); return -ENOMEM; } ret = bdrv_open(m->bs[0], filename2, flags, NULL); -free(filename2); +qemu_vfree(filename2); if (ret < 0) { return ret; } @@ -81,7 +81,7 @@ static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) bdrv_delete(m->bs[0]); return -ENOMEM; } -ret = bdrv_open(m->bs[1], filename, flags, NULL); +ret = bdrv_open(m->bs[1], filename, flags | BDRV_O_NO_BACKING, NULL); if (ret < 0) { bdrv_delete(m->bs[0]); return ret; @@ -101,17 +101,17 @@ static void blkmirror_close(BlockDriverState *bs) } } -static int blkmirror_flush(BlockDriverState *bs) +static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs) { BdrvMirrorState *m = bs->opaque; int ret; -ret = bdrv_flush(m->bs[0]); +ret = bdrv_co_flush(m->bs[0]); if (ret < 0) { return ret; } -return bdrv_flush(m->bs[1]); +return bdrv_co_flush(m->bs[1]); } static int64_t blkmirror_getlength(BlockDriverState *bs) @@ -242,18 +242,18 @@ static BlockDriverAIOCB *blkmirror_aio_flush(BlockDriverState *bs, return &dcb->common; } -static int blkmirror_discard(BlockDriverState *bs, int64_t sector_num, - int nb_sectors) +static coroutine_fn int blkmirror_co_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) { BdrvMirrorState *m = bs->opaque; int ret; -ret = bdrv_discard(m->bs[0], sector_num, nb_sectors); +ret = bdrv_co_discard(m->bs[0], sector_num, nb_sectors); if (ret < 0) { return ret; } -return bdrv_discard(m->bs[1], sector_num, nb_sectors); +return bdrv_co_discard(m->bs[1], sector_num, nb_sectors); } @@ -266,8 +266,8 @@ static BlockDriver bdrv_blkmirror = { .bdrv_file_open = blkmirror_open, .bdrv_close = blkmirror_close, -.bdrv_flush = blkmirror_flush, -.bdrv_discard = blkmirror_discard, +.bdrv_co_flush_to_disk = blkmirror_co_flush, +.bdrv_co_discard= blkmirror_co_discard, .bdrv_aio_readv = blkmirror_aio_readv, .bdrv_aio_writev= blkmirror_aio_writev, -- 1.7.1
[Qemu-devel] Live Block Migration using Mirroring
Hi, recently I've been working on live block migration combining the live snapshots and the blkmirror patch sent by Marcelo Tosatti few months ago. The design is summarized at this url as "Mirrored-Snapshot": http://www.ovirt.org/wiki/Features/Design/StorageLiveMigration The design assumes that the qemu process can reach both the source and destination storages and no real VM migration between hosts is involved. The principal problem that it tries to solve is moving a VM to a new reachable storage (more space, faster) without temporarily disrupting its services. The following set of patches are implementing the required changes in QEMU. Here it is a quick example of the use case (for consistency with the design at the url above I will use the same step numbers): Preparation === $ mkdir /tmp/{src/dst} $ qemu-img create -f qcow2 /tmp/src/hd0base.qcow2 20G Formatting '/tmp/src/hd0base.qcow2', fmt=qcow2 size=21474836480 encryption=off cluster_size=65536 Step 1 - Initital Scenario == VM1 is running on the src/hd0base. (Where "<=" stands for "uses") [src/hd0base] <= VM1(read-write) $ qemu-system-x86_64 -hda /tmp/src/hd0base.qcow2 -monitor stdio QEMU 1.0.50 monitor - type 'help' for more information (qemu) Step 3 - Mirrored Live Snapshot === A mirrored live snapshot is issued using src/hd0snap1 and dst/hd0snap1 as image files. (Where "<-" stands for "has backing file") [src/hd0base] <- [src/hd0snap1] <= VM1(read-write) ... <- [dst/hd0snap1] <= VM1(write-only) $ qemu-img create -f qcow2 \ -b /tmp/src/hd0base.qcow2 /tmp/src/hd0snap1.qcow2 20G Formatting '/tmp/src/hd0snap1.qcow2', fmt=qcow2 size=21474836480 backing_file='/tmp/src/hd0base.qcow2' encryption=off cluster_size=65536 $ qemu-img create -f qcow2 \ -b /tmp/dst/hd0base.qcow2 /tmp/dst/hd0snap1.qcow2 20G Formatting '/tmp/dst/hd0snap1.qcow2', fmt=qcow2 size=21474836480 backing_file='/tmp/src/hd0base.qcow2' encryption=off cluster_size=65536 (qemu) snapshot_blkdev -n ide0-hd0 \ blkmirror:/tmp/src/hd0snap1.qcow2:/tmp/dst/hd0snap1.qcow2 blkmirror Step 4 - Backing File Copy == An external manager copies src/hd0base to the destination dst/hd0base. [src/hd0base] <- [src/hd0snap1] <= VM1(read-write) [dst/hd0base] <- [dst/hd0snap1] <= VM1(write-only) $ cp -a /tmp/src/hd0base.qcow2 /tmp/dst/hd0base.qcow2 Step 5 - Final Switch to Destination VM1 is now able to switch to the destination for both read and write operations. [src/hd0base] <- [src/hd0snap1] <= VM1(read-write) (qemu) snapshot_blkdev -n ide0-hd0 /tmp/dst/hd0snap1.qcow2 -- Federico
Re: [Qemu-devel] [vdsm] oVirt Live Snapshots
- Original Message - > From: "Shu Ming" > To: "Federico Simoncelli" > Cc: qemu-devel@nongnu.org, libvir-l...@redhat.com, "VDSM Project Development" > , > "Dave Allan" , "Eric Blake" > Sent: Thursday, February 2, 2012 1:59:01 PM > Subject: Re: [vdsm] oVirt Live Snapshots > > Can someone explain what is "DB" in this wiki page? It is the ovirt-engine database, where the VMs/images information and status is stored. That part of the wiki should be improved. > See, > > > Live snapshots operation extend regular snapshots as follow: > > • Create a locked snapshot in DB -- Federico
[Qemu-devel] oVirt Live Snapshots
Hi, oVirt, and more specifically VDSM, is currently implementing the live snapshot feature using the API/commands provided by libvirt and qemu. It would be great if you could review the design and the current open issues at: http://ovirt.org/wiki/Live_Snapshots Thank you, -- Federico
Re: [Qemu-devel] [PATCH] qemu: new option for snapshot_blkdev to avoid image creation
- Original Message - > From: "Dor Laor" > To: "Federico Simoncelli" > Cc: qemu-devel@nongnu.org, aba...@redhat.com, "Kevin Wolf" > Sent: Friday, October 7, 2011 12:45:06 AM > Subject: Re: [Qemu-devel] [PATCH] qemu: new option for snapshot_blkdev to > avoid image creation > > On 10/03/2011 06:09 PM, Federico Simoncelli wrote: > > Add the new option [-n] for snapshot_blkdev to avoid the image > > creation. > > The file provided as [new-image-file] is considered as already > > initialized > > and will be used after passing a check for the backing file. > > Seems ok to me as a way to go around fdget and still have selinux > gain. > Worth to get Kevin's view too. Hi Kevin, any news on this? > Federico, would you like to ack or extend the design: > http://wiki.qemu.org/Features/Snapshots Dor, should I do it now or only after my patch is acked? -- Federico
Re: [Qemu-devel] New option for snapshot_blkdev to avoid image creation
- Original Message - > From: "Stefan Hajnoczi" > To: "Federico Simoncelli" > Cc: qemu-devel@nongnu.org, aba...@redhat.com, dl...@redhat.com > Sent: Tuesday, October 4, 2011 9:33:48 AM > Subject: Re: [Qemu-devel] New option for snapshot_blkdev to avoid image > creation > > On Mon, Oct 03, 2011 at 04:09:01PM +, Federico Simoncelli wrote: > > In some situations might be useful to let qemu use an image that > > was > > prepared for a live snapshot. > > The advantage is that creating the snapshot file outside of the > > qemu > > process we can use the whole range of options provided by the > > format > > (eg for qcow2: encryption, cluster_size and preallocation). > > It is also possible to pre-set a relative path to the backing file > > (now it is created by default as absolute path). > > In the long run it can also avoid the danger of reimplementing > > qemu-img > > inside qemu (if we wanted to expose such options when a snapshot is > > requested). > > When the image file is created based on the backing file size: > > $ qemu-img create -f qcow2 -o backing_file=master.img vm001.qcow2 > > It turns out that bdrv_img_create() opens the backing file with > read/write permissions. This is generally a bad idea but especially > dangerous when the VM currently has the image file open already since > image formats are not designed for multiple initiators (clustering). Hi Stefan, are you sure that bdrv_img_create opens the backing file with read/write permissions? After a quick check I can't see that happening: $ ./qemu-img create -f qcow2 /tmp/master.img 1G Formatting '/tmp/master.img', fmt=qcow2 size=1073741824 encryption=off cluster_size=65536 $ strace -e trace=open ./qemu-img create -f qcow2 -o backing_file=/tmp/master.img /tmp/vm001.qcow2 [...] open("/tmp/vm001.qcow2", O_RDONLY|O_NONBLOCK) = -1 ENOENT (No such file or directory) open("/tmp/vm001.qcow2", O_RDONLY|O_NONBLOCK) = -1 ENOENT (No such file or directory) open("/tmp/master.img", O_RDONLY|O_NONBLOCK) = 3 open("/tmp/master.img", O_RDONLY|O_NONBLOCK) = 3 open("/tmp/master.img", O_RDONLY|O_DSYNC|O_CLOEXEC) = 3 open("/tmp/master.img", O_RDONLY|O_NONBLOCK) = 3 open("/tmp/master.img", O_RDONLY|O_NONBLOCK) = 3 open("/tmp/master.img", O_RDONLY|O_CLOEXEC) = 3 Formatting '/tmp/vm001.qcow2', fmt=qcow2 size=1073741824 backing_file='/tmp/master.img' encryption=off cluster_size=65536 open("/tmp/vm001.qcow2", O_RDONLY|O_NONBLOCK) = -1 ENOENT (No such file or directory) open("/tmp/vm001.qcow2", O_RDONLY|O_NONBLOCK) = -1 ENOENT (No such file or directory) open("/tmp/vm001.qcow2", O_WRONLY|O_CREAT|O_TRUNC, 0644) = 6 open("/tmp/vm001.qcow2", O_RDONLY|O_NONBLOCK) = 6 open("/tmp/vm001.qcow2", O_RDONLY|O_NONBLOCK) = 6 open("/tmp/vm001.qcow2", O_RDWR|O_DSYNC|O_CLOEXEC) = 6 open("/tmp/vm001.qcow2", O_RDONLY|O_NONBLOCK) = 6 open("/tmp/vm001.qcow2", O_RDONLY|O_NONBLOCK) = 6 open("/tmp/vm001.qcow2", O_RDWR|O_CLOEXEC) = 6 -- Federico
[Qemu-devel] [PATCH] qemu: new option for snapshot_blkdev to avoid image creation
Add the new option [-n] for snapshot_blkdev to avoid the image creation. The file provided as [new-image-file] is considered as already initialized and will be used after passing a check for the backing file. Signed-off-by: Federico Simoncelli --- blockdev.c | 54 -- hmp-commands.hx |7 --- qmp-commands.hx |4 ++-- 3 files changed, 58 insertions(+), 7 deletions(-) diff --git a/blockdev.c b/blockdev.c index 0827bf7..bd46808 100644 --- a/blockdev.c +++ b/blockdev.c @@ -550,8 +550,53 @@ void do_commit(Monitor *mon, const QDict *qdict) } } +static int check_snapshot_file(const char *filename, const char *oldfilename, + int flags, BlockDriver *drv) +{ +BlockDriverState *bs; +char bak_filename[1024], *abs_filename; +int ret = 0; + +bs = bdrv_new(""); +if (!bs) { +return -1; +} + +ret = bdrv_open(bs, filename, flags, drv); +if (ret) { +qerror_report(QERR_OPEN_FILE_FAILED, filename); +goto err0; +} + +if (bs->backing_file) { +path_combine(bak_filename, sizeof(bak_filename), + filename, bs->backing_file); + +abs_filename = realpath(bak_filename, NULL); +if (!abs_filename) { +ret = -1; +goto err1; +} + +if (strcmp(abs_filename, oldfilename)) { +qerror_report(QERR_OPEN_FILE_FAILED, filename); +ret = -1; +} + +free(abs_filename); +} + +err1: +bdrv_close(bs); + +err0: +bdrv_delete(bs); +return ret; +} + int do_snapshot_blkdev(Monitor *mon, const QDict *qdict, QObject **ret_data) { +const int nocreate = qdict_get_try_bool(qdict, "nocreate", 0); const char *device = qdict_get_str(qdict, "device"); const char *filename = qdict_get_try_str(qdict, "snapshot-file"); const char *format = qdict_get_try_str(qdict, "format"); @@ -597,8 +642,13 @@ int do_snapshot_blkdev(Monitor *mon, const QDict *qdict, QObject **ret_data) goto out; } -ret = bdrv_img_create(filename, format, bs->filename, - bs->drv->format_name, NULL, -1, flags); +if (nocreate) { +ret = check_snapshot_file(filename, bs->filename, flags, drv); +} else { +ret = bdrv_img_create(filename, format, bs->filename, + bs->drv->format_name, NULL, -1, flags); +} + if (ret) { goto out; } diff --git a/hmp-commands.hx b/hmp-commands.hx index 9e1cca8..eb9fcd4 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -840,11 +840,12 @@ ETEXI { .name = "snapshot_blkdev", -.args_type = "device:B,snapshot-file:s?,format:s?", -.params = "device [new-image-file] [format]", +.args_type = "nocreate:-n,device:B,snapshot-file:s?,format:s?", +.params = "[-n] device [new-image-file] [format]", .help = "initiates a live snapshot\n\t\t\t" "of device. If a new image file is specified, the\n\t\t\t" - "new image file will become the new root image.\n\t\t\t" + "new image file will be created (unless -n is\n\t\t\t" + "specified) and will become the new root image.\n\t\t\t" "If format is specified, the snapshot file will\n\t\t\t" "be created in that format. Otherwise the\n\t\t\t" "snapshot will be internal! (currently unsupported)", diff --git a/qmp-commands.hx b/qmp-commands.hx index d83bce5..7af36d8 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -695,8 +695,8 @@ EQMP { .name = "blockdev-snapshot-sync", -.args_type = "device:B,snapshot-file:s?,format:s?", -.params = "device [new-image-file] [format]", +.args_type = "nocreate:-n,device:B,snapshot-file:s?,format:s?", +.params = "[-n] device [new-image-file] [format]", .user_print = monitor_user_noop, .mhandler.cmd_new = do_snapshot_blkdev, }, -- 1.7.1
[Qemu-devel] New option for snapshot_blkdev to avoid image creation
In some situations might be useful to let qemu use an image that was prepared for a live snapshot. The advantage is that creating the snapshot file outside of the qemu process we can use the whole range of options provided by the format (eg for qcow2: encryption, cluster_size and preallocation). It is also possible to pre-set a relative path to the backing file (now it is created by default as absolute path). In the long run it can also avoid the danger of reimplementing qemu-img inside qemu (if we wanted to expose such options when a snapshot is requested). -- Federico.
[Qemu-devel] [PATCHv4] qemu-img: Add cache command line option
qemu-img currently writes disk images using writeback and filling up the cache buffers which are then flushed by the kernel preventing other processes from accessing the storage. This is particularly bad in cluster environments where time-based algorithms might be in place and accessing the storage within certain timeouts is critical. This patch adds the option to choose a cache method when writing disk images. Signed-off-by: Federico Simoncelli --- qemu-img-cmds.hx |6 ++-- qemu-img.c | 80 +- 2 files changed, 70 insertions(+), 16 deletions(-) diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index 3072d38..2b70618 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -22,13 +22,13 @@ STEXI ETEXI DEF("commit", img_commit, -"commit [-f fmt] filename") +"commit [-f fmt] [-t cache] filename") STEXI @item commit [-f @var{fmt}] @var{filename} ETEXI DEF("convert", img_convert, -"convert [-c] [-p] [-f fmt] [-O output_fmt] [-o options] [-s snapshot_name] filename [filename2 [...]] output_filename") +"convert [-c] [-p] [-f fmt] [-t cache] [-O output_fmt] [-o options] [-s snapshot_name] filename [filename2 [...]] output_filename") STEXI @item convert [-c] [-f @var{fmt}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] @var{filename} [@var{filename2} [...]] @var{output_filename} ETEXI @@ -46,7 +46,7 @@ STEXI ETEXI DEF("rebase", img_rebase, -"rebase [-f fmt] [-p] [-u] -b backing_file [-F backing_fmt] filename") +"rebase [-f fmt] [-t cache] [-p] [-u] -b backing_file [-F backing_fmt] filename") STEXI @item rebase [-f @var{fmt}] [-u] -b @var{backing_file} [-F @var{backing_fmt}] @var{filename} ETEXI diff --git a/qemu-img.c b/qemu-img.c index 4f162d1..f904e32 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -40,6 +40,7 @@ typedef struct img_cmd_t { /* Default to cache=writeback as data integrity is not important for qemu-tcg. */ #define BDRV_O_FLAGS BDRV_O_CACHE_WB +#define BDRV_DEFAULT_CACHE "writeback" static void format_print(void *opaque, const char *name) { @@ -64,6 +65,8 @@ static void help(void) "Command parameters:\n" " 'filename' is a disk image filename\n" " 'fmt' is the disk image format. It is guessed automatically in most cases\n" + " 'cache' is the cache mode used to write the output disk image, the valid\n" + "options are: 'none', 'writeback' (default), 'writethrough' and 'unsafe'\n" " 'size' is the disk image size in bytes. Optional suffixes\n" "'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M)\n" "and T (terabyte, 1024G) are supported. 'b' is ignored.\n" @@ -180,6 +183,27 @@ static int read_password(char *buf, int buf_size) } #endif +static int set_cache_flag(const char *mode, int *flags) +{ +*flags &= ~BDRV_O_CACHE_MASK; + +if (!strcmp(mode, "none") || !strcmp(mode, "off")) { +*flags |= BDRV_O_CACHE_WB; +*flags |= BDRV_O_NOCACHE; +} else if (!strcmp(mode, "writeback")) { +*flags |= BDRV_O_CACHE_WB; +} else if (!strcmp(mode, "unsafe")) { +*flags |= BDRV_O_CACHE_WB; +*flags |= BDRV_O_NO_FLUSH; +} else if (!strcmp(mode, "writethrough")) { +/* this is the default */ +} else { +return -1; +} + +return 0; +} + static int print_block_option_help(const char *filename, const char *fmt) { BlockDriver *drv, *proto_drv; @@ -441,13 +465,14 @@ static int img_check(int argc, char **argv) static int img_commit(int argc, char **argv) { -int c, ret; -const char *filename, *fmt; +int c, ret, flags; +const char *filename, *fmt, *cache; BlockDriverState *bs; fmt = NULL; +cache = BDRV_DEFAULT_CACHE; for(;;) { -c = getopt(argc, argv, "f:h"); +c = getopt(argc, argv, "f:ht:"); if (c == -1) { break; } @@ -459,6 +484,9 @@ static int img_commit(int argc, char **argv) case 'f': fmt = optarg; break; +case 't': +cache = optarg; +break; } } if (optind >= argc) { @@ -466,7 +494,14 @@ static int img_commit(int argc, char **argv) } filename = argv[optind++]; -bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR); +flags = BDRV_O_RDWR; +ret = set_cache_flag(cache, &flags); +if (ret < 0) { +error_report("Invalid cache option: %s\n"
Re: [Qemu-devel] [PATCH] qemu-img: Add cache command line option
- Original Message - > From: "Christoph Hellwig" > To: "Federico Simoncelli" > Cc: k...@vger.kernel.org, kw...@redhat.com, qemu-devel@nongnu.org, > a...@redhat.com > Sent: Thursday, June 16, 2011 4:28:09 PM > Subject: Re: [Qemu-devel] [PATCH] qemu-img: Add cache command line option > On Wed, Jun 15, 2011 at 09:46:10AM -0400, Federico Simoncelli wrote: > > qemu-img currently writes disk images using writeback and filling > > up the cache buffers which are then flushed by the kernel preventing > > other processes from accessing the storage. > > This is particularly bad in cluster environments where time-based > > algorithms might be in place and accessing the storage within > > certain timeouts is critical. > > This patch adds the option to choose a cache method when writing > > disk images. > > Allowing to chose the mode is of course fine, but what about also > choosing a good default? writethrough doesn't really make any sense > for qemu-img, given that we can trivially flush the cache at the end > of the operations. I'd also say that using the buffer cache doesn't > make sense either, as there is little point in caching these > operations. I totally agree with you, I didn't change the default to increase the chances for my patch to be accepted. If we want cache=none as default we can easily change: -#define BDRV_DEFAULT_CACHE "writeback" +#define BDRV_DEFAULT_CACHE "none" -- Federico
[Qemu-devel] [PATCHv3] qemu-img: Add cache command line option
qemu-img currently writes disk images using writeback and filling up the cache buffers which are then flushed by the kernel preventing other processes from accessing the storage. This is particularly bad in cluster environments where time-based algorithms might be in place and accessing the storage within certain timeouts is critical. This patch adds the option to choose a cache method when writing disk images. Signed-off-by: Federico Simoncelli --- qemu-img-cmds.hx |6 ++-- qemu-img.c | 80 +- 2 files changed, 70 insertions(+), 16 deletions(-) diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index 3072d38..2b70618 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -22,13 +22,13 @@ STEXI ETEXI DEF("commit", img_commit, -"commit [-f fmt] filename") +"commit [-f fmt] [-t cache] filename") STEXI @item commit [-f @var{fmt}] @var{filename} ETEXI DEF("convert", img_convert, -"convert [-c] [-p] [-f fmt] [-O output_fmt] [-o options] [-s snapshot_name] filename [filename2 [...]] output_filename") +"convert [-c] [-p] [-f fmt] [-t cache] [-O output_fmt] [-o options] [-s snapshot_name] filename [filename2 [...]] output_filename") STEXI @item convert [-c] [-f @var{fmt}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] @var{filename} [@var{filename2} [...]] @var{output_filename} ETEXI @@ -46,7 +46,7 @@ STEXI ETEXI DEF("rebase", img_rebase, -"rebase [-f fmt] [-p] [-u] -b backing_file [-F backing_fmt] filename") +"rebase [-f fmt] [-t cache] [-p] [-u] -b backing_file [-F backing_fmt] filename") STEXI @item rebase [-f @var{fmt}] [-u] -b @var{backing_file} [-F @var{backing_fmt}] @var{filename} ETEXI diff --git a/qemu-img.c b/qemu-img.c index 4f162d1..c2a106e 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -40,6 +40,7 @@ typedef struct img_cmd_t { /* Default to cache=writeback as data integrity is not important for qemu-tcg. */ #define BDRV_O_FLAGS BDRV_O_CACHE_WB +#define BDRV_DEFAULT_CACHE "writeback" static void format_print(void *opaque, const char *name) { @@ -64,6 +65,8 @@ static void help(void) "Command parameters:\n" " 'filename' is a disk image filename\n" " 'fmt' is the disk image format. It is guessed automatically in most cases\n" + " 'cache' is the cache mode used to write the output disk image, the valid\n" + "options are: 'none', 'writeback' (default), 'writethrough' and 'unsafe'\n" " 'size' is the disk image size in bytes. Optional suffixes\n" "'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M)\n" "and T (terabyte, 1024G) are supported. 'b' is ignored.\n" @@ -180,6 +183,27 @@ static int read_password(char *buf, int buf_size) } #endif +static int set_cache_flag(const char *mode, int *flags) +{ +*flags &= ~BDRV_O_CACHE_MASK; + +if (!strcmp(mode, "none") || !strcmp(mode, "off")) { +*flags |= BDRV_O_CACHE_WB; +*flags |= BDRV_O_NOCACHE; +} else if (!strcmp(mode, "writeback")) { +*flags |= BDRV_O_CACHE_WB; +} else if (!strcmp(mode, "unsafe")) { +*flags |= BDRV_O_CACHE_WB; +*flags |= BDRV_O_NO_FLUSH; +} else if (!strcmp(mode, "writethrough")) { +/* this is the default */ +} else { +return -1; +} + +return 0; +} + static int print_block_option_help(const char *filename, const char *fmt) { BlockDriver *drv, *proto_drv; @@ -441,13 +465,14 @@ static int img_check(int argc, char **argv) static int img_commit(int argc, char **argv) { -int c, ret; -const char *filename, *fmt; +int c, ret, flags; +const char *filename, *fmt, *cache; BlockDriverState *bs; fmt = NULL; +cache = BDRV_DEFAULT_CACHE; for(;;) { -c = getopt(argc, argv, "f:h"); +c = getopt(argc, argv, "f:ht:"); if (c == -1) { break; } @@ -459,6 +484,9 @@ static int img_commit(int argc, char **argv) case 'f': fmt = optarg; break; +case 't': +cache = optarg; +break; } } if (optind >= argc) { @@ -466,7 +494,14 @@ static int img_commit(int argc, char **argv) } filename = argv[optind++]; -bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR); +flags = BDRV_O_RDWR; +ret = set_cache_flag(cache, &flags); +if (ret < 0) { +error_report("Invalid cache option: %s\n"
[Qemu-devel] [PATCHv2] qemu-img: Add cache command line option
qemu-img currently writes disk images using writeback and filling up the cache buffers which are then flushed by the kernel preventing other processes from accessing the storage. This is particularly bad in cluster environments where time-based algorithms might be in place and accessing the storage within certain timeouts is critical. This patch adds the option to choose a cache method when writing disk images. Signed-off-by: Federico Simoncelli --- qemu-img.c | 80 ++- 1 files changed, 67 insertions(+), 13 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index 4f162d1..c2a106e 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -40,6 +40,7 @@ typedef struct img_cmd_t { /* Default to cache=writeback as data integrity is not important for qemu-tcg. */ #define BDRV_O_FLAGS BDRV_O_CACHE_WB +#define BDRV_DEFAULT_CACHE "writeback" static void format_print(void *opaque, const char *name) { @@ -64,6 +65,8 @@ static void help(void) "Command parameters:\n" " 'filename' is a disk image filename\n" " 'fmt' is the disk image format. It is guessed automatically in most cases\n" + " 'cache' is the cache mode used to write the output disk image, the valid\n" + "options are: 'none', 'writeback' (default), 'writethrough' and 'unsafe'\n" " 'size' is the disk image size in bytes. Optional suffixes\n" "'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M)\n" "and T (terabyte, 1024G) are supported. 'b' is ignored.\n" @@ -180,6 +183,27 @@ static int read_password(char *buf, int buf_size) } #endif +static int set_cache_flag(const char *mode, int *flags) +{ +*flags &= ~BDRV_O_CACHE_MASK; + +if (!strcmp(mode, "none") || !strcmp(mode, "off")) { +*flags |= BDRV_O_CACHE_WB; +*flags |= BDRV_O_NOCACHE; +} else if (!strcmp(mode, "writeback")) { +*flags |= BDRV_O_CACHE_WB; +} else if (!strcmp(mode, "unsafe")) { +*flags |= BDRV_O_CACHE_WB; +*flags |= BDRV_O_NO_FLUSH; +} else if (!strcmp(mode, "writethrough")) { +/* this is the default */ +} else { +return -1; +} + +return 0; +} + static int print_block_option_help(const char *filename, const char *fmt) { BlockDriver *drv, *proto_drv; @@ -441,13 +465,14 @@ static int img_check(int argc, char **argv) static int img_commit(int argc, char **argv) { -int c, ret; -const char *filename, *fmt; +int c, ret, flags; +const char *filename, *fmt, *cache; BlockDriverState *bs; fmt = NULL; +cache = BDRV_DEFAULT_CACHE; for(;;) { -c = getopt(argc, argv, "f:h"); +c = getopt(argc, argv, "f:ht:"); if (c == -1) { break; } @@ -459,6 +484,9 @@ static int img_commit(int argc, char **argv) case 'f': fmt = optarg; break; +case 't': +cache = optarg; +break; } } if (optind >= argc) { @@ -466,7 +494,14 @@ static int img_commit(int argc, char **argv) } filename = argv[optind++]; -bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR); +flags = BDRV_O_RDWR; +ret = set_cache_flag(cache, &flags); +if (ret < 0) { +error_report("Invalid cache option: %s\n", cache); +return -1; +} + +bs = bdrv_new_open(filename, fmt, flags); if (!bs) { return 1; } @@ -591,8 +626,8 @@ static int compare_sectors(const uint8_t *buf1, const uint8_t *buf2, int n, static int img_convert(int argc, char **argv) { int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size, cluster_sectors; -int progress = 0; -const char *fmt, *out_fmt, *out_baseimg, *out_filename; +int progress = 0, flags; +const char *fmt, *out_fmt, *cache, *out_baseimg, *out_filename; BlockDriver *drv, *proto_drv; BlockDriverState **bs = NULL, *out_bs = NULL; int64_t total_sectors, nb_sectors, sector_num, bs_offset; @@ -608,10 +643,11 @@ static int img_convert(int argc, char **argv) fmt = NULL; out_fmt = "raw"; +cache = BDRV_DEFAULT_CACHE; out_baseimg = NULL; compress = 0; for(;;) { -c = getopt(argc, argv, "f:O:B:s:hce6o:p"); +c = getopt(argc, argv, "f:O:B:s:hce6o:pt:"); if (c == -1) { break; } @@ -649,6 +685,9 @@ static int img_convert(int argc, char **argv) case 'p': progress = 1; break; +case 't': +
[Qemu-devel] [PATCH] qemu-img: Add cache command line option
qemu-img currently writes disk images using writeback and filling up the cache buffers which are then flushed by the kernel preventing other processes from accessing the storage. This is particularly bad in cluster environments where time-based algorithms might be in place and accessing the storage within certain timeouts is critical. This patch adds the option to choose a cache method when writing disk images. Signed-off-by: Federico Simoncelli --- qemu-img.c | 81 ++- 1 files changed, 68 insertions(+), 13 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index 4f162d1..7609db5 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -40,6 +40,7 @@ typedef struct img_cmd_t { /* Default to cache=writeback as data integrity is not important for qemu-tcg. */ #define BDRV_O_FLAGS BDRV_O_CACHE_WB +#define BDRV_DEFAULT_CACHE "writeback" static void format_print(void *opaque, const char *name) { @@ -64,6 +65,8 @@ static void help(void) "Command parameters:\n" " 'filename' is a disk image filename\n" " 'fmt' is the disk image format. It is guessed automatically in most cases\n" + " 'cache' is the cache mode used to write the output disk image, the valid\n" + "options are: 'none', 'writeback' (default), 'writethrough' and 'unsafe'\n" " 'size' is the disk image size in bytes. Optional suffixes\n" "'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M)\n" "and T (terabyte, 1024G) are supported. 'b' is ignored.\n" @@ -180,6 +183,27 @@ static int read_password(char *buf, int buf_size) } #endif +static int set_cache_flag(const char *mode, int *flags) +{ +*flags &= ~BDRV_O_CACHE_MASK; + +if (!strcmp(mode, "none") || !strcmp(mode, "off")) { +*flags |= BDRV_O_CACHE_WB; +*flags |= BDRV_O_NOCACHE; +} else if (!strcmp(mode, "writeback")) { +*flags |= BDRV_O_CACHE_WB; +} else if (!strcmp(mode, "unsafe")) { +*flags |= BDRV_O_CACHE_WB; +*flags |= BDRV_O_NO_FLUSH; +} else if (!strcmp(mode, "writethrough")) { +/* this is the default */ +} else { +return -1; +} + +return 0; +} + static int print_block_option_help(const char *filename, const char *fmt) { BlockDriver *drv, *proto_drv; @@ -441,13 +465,14 @@ static int img_check(int argc, char **argv) static int img_commit(int argc, char **argv) { -int c, ret; -const char *filename, *fmt; +int c, ret, flags; +const char *filename, *fmt, *cache; BlockDriverState *bs; fmt = NULL; +cache = BDRV_DEFAULT_CACHE; for(;;) { -c = getopt(argc, argv, "f:h"); +c = getopt(argc, argv, "f:ht:"); if (c == -1) { break; } @@ -459,6 +484,9 @@ static int img_commit(int argc, char **argv) case 'f': fmt = optarg; break; +case 't': +cache = optarg; +break; } } if (optind >= argc) { @@ -466,7 +494,14 @@ static int img_commit(int argc, char **argv) } filename = argv[optind++]; -bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR); +flags = BDRV_O_RDWR; +ret = set_cache_flag(cache, &flags); +if (ret < 0) { +error_report("Invalid cache option: %s\n", cache); +return -1; +} + +bs = bdrv_new_open(filename, fmt, flags); if (!bs) { return 1; } @@ -591,8 +626,8 @@ static int compare_sectors(const uint8_t *buf1, const uint8_t *buf2, int n, static int img_convert(int argc, char **argv) { int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size, cluster_sectors; -int progress = 0; -const char *fmt, *out_fmt, *out_baseimg, *out_filename; +int progress = 0, flags; +const char *fmt, *out_fmt, *cache, *out_baseimg, *out_filename; BlockDriver *drv, *proto_drv; BlockDriverState **bs = NULL, *out_bs = NULL; int64_t total_sectors, nb_sectors, sector_num, bs_offset; @@ -608,10 +643,11 @@ static int img_convert(int argc, char **argv) fmt = NULL; out_fmt = "raw"; +cache = BDRV_DEFAULT_CACHE; out_baseimg = NULL; compress = 0; for(;;) { -c = getopt(argc, argv, "f:O:B:s:hce6o:p"); +c = getopt(argc, argv, "f:O:B:s:hce6o:pt:"); if (c == -1) { break; } @@ -649,6 +685,9 @@ static int img_convert(int argc, char **argv) case 'p': progress = 1; break; +case 't': +