[Qemu-block] [PULL for-2.9 2/3] qapi/curl: Extend and fix blockdev-add schema
From: Max ReitzThe curl block driver accepts more options than just "filename"; also, the URL is actually expected to be passed through the "url" option instead of "filename". Signed-off-by: Max Reitz Reviewed-by: Jeff Cody Reviewed-by: Eric Blake Message-id: 20170331120431.1767-2-mre...@redhat.com Signed-off-by: Jeff Cody --- qapi/block-core.json | 103 ++- 1 file changed, 94 insertions(+), 9 deletions(-) diff --git a/qapi/block-core.json b/qapi/block-core.json index 4e8e4e3..8de39d1 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -2737,16 +2737,101 @@ '*debug': 'int' } } ## -# @BlockdevOptionsCurl: +# @BlockdevOptionsCurlBase: # -# Driver specific block device options for the curl backend. +# Driver specific block device options shared by all protocols supported by the +# curl backend. # -# @filename:path to the image file +# @url: URL of the image file +# +# @readahead: Size of the read-ahead cache; must be a multiple of +# 512 (defaults to 256 kB) +# +# @timeout: Timeout for connections, in seconds (defaults to 5) +# +# @username:Username for authentication (defaults to none) +# +# @password-secret: ID of a QCryptoSecret object providing a password +# for authentication (defaults to no password) +# +# @proxy-username: Username for proxy authentication (defaults to none) +# +# @proxy-password-secret: ID of a QCryptoSecret object providing a password +# for proxy authentication (defaults to no password) +# +# Since: 2.9 +## +{ 'struct': 'BlockdevOptionsCurlBase', + 'data': { 'url': 'str', +'*readahead': 'int', +'*timeout': 'int', +'*username': 'str', +'*password-secret': 'str', +'*proxy-username': 'str', +'*proxy-password-secret': 'str' } } + +## +# @BlockdevOptionsCurlHttp: +# +# Driver specific block device options for HTTP connections over the curl +# backend. URLs must start with "http://;. +# +# @cookie: List of cookies to set; format is +# "name1=content1; name2=content2;" as explained by +# CURLOPT_COOKIE(3). Defaults to no cookies. +# +# Since: 2.9 +## +{ 'struct': 'BlockdevOptionsCurlHttp', + 'base': 'BlockdevOptionsCurlBase', + 'data': { '*cookie': 'str' } } + +## +# @BlockdevOptionsCurlHttps: +# +# Driver specific block device options for HTTPS connections over the curl +# backend. URLs must start with "https://;. +# +# @cookie: List of cookies to set; format is +# "name1=content1; name2=content2;" as explained by +# CURLOPT_COOKIE(3). Defaults to no cookies. +# +# @sslverify: Whether to verify the SSL certificate's validity (defaults to +# true) +# +# Since: 2.9 +## +{ 'struct': 'BlockdevOptionsCurlHttps', + 'base': 'BlockdevOptionsCurlBase', + 'data': { '*cookie': 'str', +'*sslverify': 'bool' } } + +## +# @BlockdevOptionsCurlFtp: +# +# Driver specific block device options for FTP connections over the curl +# backend. URLs must start with "ftp://;. +# +# Since: 2.9 +## +{ 'struct': 'BlockdevOptionsCurlFtp', + 'base': 'BlockdevOptionsCurlBase', + 'data': { } } + +## +# @BlockdevOptionsCurlFtps: +# +# Driver specific block device options for FTPS connections over the curl +# backend. URLs must start with "ftps://". +# +# @sslverify: Whether to verify the SSL certificate's validity (defaults to +# true) # # Since: 2.9 ## -{ 'struct': 'BlockdevOptionsCurl', - 'data': { 'filename': 'str' } } +{ 'struct': 'BlockdevOptionsCurlFtps', + 'base': 'BlockdevOptionsCurlBase', + 'data': { '*sslverify': 'bool' } } ## # @BlockdevOptionsNbd: @@ -2815,13 +2900,13 @@ 'cloop': 'BlockdevOptionsGenericFormat', 'dmg':'BlockdevOptionsGenericFormat', 'file': 'BlockdevOptionsFile', - 'ftp':'BlockdevOptionsCurl', - 'ftps': 'BlockdevOptionsCurl', + 'ftp':'BlockdevOptionsCurlFtp', + 'ftps': 'BlockdevOptionsCurlFtps', 'gluster':'BlockdevOptionsGluster', 'host_cdrom': 'BlockdevOptionsFile', 'host_device':'BlockdevOptionsFile', - 'http': 'BlockdevOptionsCurl', - 'https': 'BlockdevOptionsCurl', + 'http': 'BlockdevOptionsCurlHttp', + 'https': 'BlockdevOptionsCurlHttps', 'iscsi': 'BlockdevOptionsIscsi', 'luks': 'BlockdevOptionsLUKS', 'nbd':'BlockdevOptionsNbd', -- 2.9.3
[Qemu-block] [PULL for-2.9 0/3] Block patches
The following changes since commit 95b31d709ba343ad237c3630047ee7438bac4065: Merge remote-tracking branch 'remotes/awilliam/tags/vfio-updates-20170331.0' into staging (2017-03-31 18:06:13 +0100) are available in the git repository at: git://github.com/codyprime/qemu-kvm-jtc.git tags/block-pull-request for you to fetch changes up to 34634ca28688652198f77d7001c0f1e204434663: block/curl: Check protocol prefix (2017-03-31 15:53:22 -0400) Block patches for -rc3 Eric Blake (1): rbd: Fix regression in legacy key/values containing escaped : Max Reitz (2): qapi/curl: Extend and fix blockdev-add schema block/curl: Check protocol prefix block/curl.c | 10 + block/rbd.c | 83 + qapi/block-core.json | 103 ++- 3 files changed, 146 insertions(+), 50 deletions(-) -- 2.9.3
[Qemu-block] [PULL for-2.9 3/3] block/curl: Check protocol prefix
From: Max ReitzIf the user has explicitly specified a block driver and thus a protocol, we have to make sure the URL's protocol prefix matches. Otherwise the latter will silently override the former which might catch some users by surprise. Signed-off-by: Max Reitz Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Jeff Cody Reviewed-by: Eric Blake Message-id: 20170331120431.1767-3-mre...@redhat.com Signed-off-by: Jeff Cody --- block/curl.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/block/curl.c b/block/curl.c index 34dbd33..2708d57 100644 --- a/block/curl.c +++ b/block/curl.c @@ -659,6 +659,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, const char *cookie; double d; const char *secretid; +const char *protocol_delimiter; static int inited = 0; @@ -700,6 +701,15 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, goto out_noclean; } +if (!strstart(file, bs->drv->protocol_name, _delimiter) || +!strstart(protocol_delimiter, "://", NULL)) +{ +error_setg(errp, "%s curl driver cannot handle the URL '%s' (does not " + "start with '%s://')", bs->drv->protocol_name, file, + bs->drv->protocol_name); +goto out_noclean; +} + s->username = g_strdup(qemu_opt_get(opts, CURL_BLOCK_OPT_USERNAME)); secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PASSWORD_SECRET); -- 2.9.3
[Qemu-block] [PULL for-2.9 1/3] rbd: Fix regression in legacy key/values containing escaped :
From: Eric BlakeCommit c7cacb3 accidentally broke legacy key-value parsing through pseudo-filename parsing of -drive file=rbd://..., for any key that contains an escaped ':'. Such a key is surprisingly common, thanks to mon_host specifying a 'host:port' string. The break happens because passing things from QDict through QemuOpts back to another QDict requires that we pack our parsed key/value pairs into a string, and then reparse that string, but the intermediate string that we created ("key1=value1:key2=value2") lost the \: escaping that was present in the original, so that we could no longer see which : were used as separators vs. those used as part of the original input. Fix it by collecting the key/value pairs through a QList, and sending that list on a round trip through a JSON QString (as in '["key1","value1","key2","value2"]') on its way through QemuOpts, rather than hand-rolling our own string. Since the string is only handled internally, this was faster than creating a full-blown struct of '[{"key1":"value1"},{"key2":"value2"}]', and safer at guaranteeing order compared to '{"key1":"value1","key2":"value2"}'. It would be nicer if we didn't have to round-trip through QemuOpts in the first place, but that's a much bigger task for later. Reproducer: ./x86_64-softmmu/qemu-system-x86_64 -nodefaults -nographic -qmp stdio \ -drive 'file=rbd:volumes/volume-ea141b5c-cdb3-4765-910d-e7008b209a70'\ ':id=compute:key=AQAVkvxXABAA9ZxWFYdRmV+DSwKr7BKKXg=='\ ':auth_supported=cephx\;none:mon_host=192.168.1.2\:6789'\ ',format=raw,if=none,id=drive-virtio-disk0,'\ 'serial=ea141b5c-cdb3-4765-910d-e7008b209a70,cache=writeback' Even without an RBD setup, this serves a test of whether we get the incorrect parser error of: qemu-system-x86_64: -drive file=rbd:...cache=writeback: conf option 6789 has no value or the correct behavior of hanging while trying to connect to the requested mon_host of 192.168.1.2:6789. Reported-by: Alexandru Avadanii Signed-off-by: Eric Blake Reviewed-by: Jeff Cody Reviewed-by: Max Reitz Message-id: 20170331152730.12514-1-ebl...@redhat.com Signed-off-by: Jeff Cody --- block/rbd.c | 83 +++-- 1 file changed, 42 insertions(+), 41 deletions(-) diff --git a/block/rbd.c b/block/rbd.c index 498322b..fbdb131 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -20,6 +20,7 @@ #include "crypto/secret.h" #include "qemu/cutils.h" #include "qapi/qmp/qstring.h" +#include "qapi/qmp/qjson.h" /* * When specifying the image filename use: @@ -135,18 +136,16 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options, Error **errp) { const char *start; -char *p, *buf, *keypairs; +char *p, *buf; +QList *keypairs = NULL; char *found_str; -size_t max_keypair_size; if (!strstart(filename, "rbd:", )) { error_setg(errp, "File name must start with 'rbd:'"); return; } -max_keypair_size = strlen(start) + 1; buf = g_strdup(start); -keypairs = g_malloc0(max_keypair_size); p = buf; found_str = qemu_rbd_next_tok(p, '/', ); @@ -194,33 +193,30 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options, } else if (!strcmp(name, "id")) { qdict_put(options, "user" , qstring_from_str(value)); } else { -/* FIXME: This is pretty ugly, and not the right way to do this. - *These should be contained in a structure, and then - *passed explicitly as individual key/value pairs to - *rados. Consider this legacy code that needs to be - *updated. */ -char *tmp = g_malloc0(max_keypair_size); -/* only use a delimiter if it is not the first keypair found */ -/* These are sets of unknown key/value pairs we'll pass along - * to ceph */ -if (keypairs[0]) { -snprintf(tmp, max_keypair_size, ":%s=%s", name, value); -pstrcat(keypairs, max_keypair_size, tmp); -} else { -snprintf(keypairs, max_keypair_size, "%s=%s", name, value); +/* + * We pass these internally to qemu_rbd_set_keypairs(), so + * we can get away with the simpler list of [ "key1", + * "value1", "key2", "value2" ] rather than a raw dict + * { "key1": "value1", "key2": "value2" } where we can't + * guarantee order, or even a more correct but complex + * [ { "key1": "value1" }, { "key2": "value2" } ] + */ +if (!keypairs) { +keypairs = qlist_new(); } -g_free(tmp); +qlist_append(keypairs, qstring_from_str(name)); +
[Qemu-block] [RFC 10/19] sysbus-ahci: Remove user_creatable flag
The sysbus-ahci devices are supposed to be created and wired by code from other devices, like calxeda_init() and xlnx_zynqmp_realize(), and won't work with -device. Remove the user_creatable flag from the device class. Cc: John SnowCc: qemu-block@nongnu.org Cc: Rob Herring Cc: Peter Maydell Cc: Alistair Francis Cc: "Edgar E. Iglesias" Signed-off-by: Eduardo Habkost --- hw/ide/ahci.c | 5 - 1 file changed, 5 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index b1b7780d56..68f2ce09ee 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -1721,11 +1721,6 @@ static void sysbus_ahci_class_init(ObjectClass *klass, void *data) dc->props = sysbus_ahci_properties; dc->reset = sysbus_ahci_reset; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); -/* - * FIXME: Set only for compatibility on q35 machine-type. - * Probably never meant to be user-creatable - */ -dc->user_creatable = true; } static const TypeInfo sysbus_ahci_info = { -- 2.11.0.259.g40922b1
[Qemu-block] [RFC 11/19] allwinner-ahci: Remove user_creatable flag
allwinner-ahci needs to be created and wired by the alwinner-a10 device, and won't work with -device. Remove the user_creatable flag from the device class. Cc: John SnowCc: qemu-block@nongnu.org Cc: Beniamino Galvani Cc: Peter Maydell Cc: qemu-...@nongnu.org Signed-off-by: Eduardo Habkost --- hw/ide/ahci.c | 5 - 1 file changed, 5 deletions(-) diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c index 68f2ce09ee..f60826d6e0 100644 --- a/hw/ide/ahci.c +++ b/hw/ide/ahci.c @@ -1815,11 +1815,6 @@ static void allwinner_ahci_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); dc->vmsd = _allwinner_ahci; -/* - * FIXME: Set only for compatibility on q35 machine-type. - * Probably never meant to be user-creatable - */ -dc->user_creatable = true; } static const TypeInfo allwinner_ahci_info = { -- 2.11.0.259.g40922b1
[Qemu-block] [RFC 03/19] sysbus: Set user_creatable=false by default on TYPE_SYS_BUS_DEVICE
commit 33cd52b5d7b9adfd009e95f07e6c64dd88ae2a31 unset cannot_instantiate_with_device_add_yet in TYPE_SYSBUS, making all kinds of untested devices available to -device and device_add. The problem with that is: setting has_dynamic_sysbus on a machine-type lets it accept all the 288 sysbus device types we have in QEMU, and most of them were never meant to be used with -device. That's a lot of untested code. Fortunately today we have just a few has_dynamic_sysbus=1 machines: virt, pc-q35-*, ppce500, and spapr. virt, ppce500, and spapr have extra checks to ensure just a few device types can be instantiated: * virt supports only TYPE_VFIO_CALXEDA_XGMAC, TYPE_VFIO_AMD_XGBE. * ppce500 supports only TYPE_ETSEC_COMMON. * spapr supports only TYPE_SPAPR_PCI_HOST_BRIDGE. q35 has no code to block unsupported sysbus devices, however, and accepts all device types. Fortunately, only the following 20 device types are compiled into the qemu-system-x86_64 and qemu-system-i386 binaries: * allwinner-ahci * amd-iommu * cfi.pflash01 * esp * fw_cfg_io * fw_cfg_mem * generic-sdhci * hpet * intel-iommu * ioapic * isabus-bridge * kvmclock * kvm-ioapic * kvmvapic * SUNW,fdtwo * sysbus-ahci * sysbus-fdc * sysbus-ohci * unimplemented-device * virtio-mmio Instead of requiring each machine-type with has_dynamic_sysbus=1 to implement its own mechanism to block unsupported devices, we can use the user_creatable flag to ensure we won't let the user plug anything that will never work. This patch adds user_creatable=true explicitly to the 24 device types mentioned above, to keep compatibility, and set user_creatable=false on TYPE_SYS_BUS_DEVICE. Subsequent patches will remove user_creatable=true from the devices that were not meant to be creatable using -device despite being currently accepted by q35. Cc: Peter MaydellCc: Alexander Graf Cc: John Snow Cc: Kevin Wolf Cc: Max Reitz Cc: Paolo Bonzini Cc: Richard Henderson Cc: Eduardo Habkost Cc: "Michael S. Tsirkin" Cc: Scott Wood Cc: Jason Wang Cc: David Gibson Cc: Gerd Hoffmann Cc: Alex Williamson Cc: qemu-block@nongnu.org Cc: qemu-...@nongnu.org Cc: Alistair Francis Cc: Beniamino Galvani Cc: "Edgar E. Iglesias" Cc: Gabriel L. Somlo Cc: Igor Mammedov Cc: Laszlo Ersek Cc: Marcel Apfelbaum Cc: Prasad J Pandit Cc: qemu-...@nongnu.org Cc: Shannon Zhao Cc: Thomas Huth Signed-off-by: Eduardo Habkost --- hw/block/fdc.c | 10 ++ hw/block/pflash_cfi01.c | 5 + hw/core/sysbus.c | 11 +++ hw/i386/amd_iommu.c | 5 + hw/i386/intel_iommu.c| 5 + hw/i386/kvm/clock.c | 5 + hw/i386/kvm/ioapic.c | 5 + hw/i386/kvmvapic.c | 5 + hw/ide/ahci.c| 10 ++ hw/intc/ioapic.c | 5 + hw/isa/isa-bus.c | 5 + hw/misc/unimp.c | 5 + hw/net/fsl_etsec/etsec.c | 1 + hw/nvram/fw_cfg.c| 10 ++ hw/ppc/spapr_pci.c | 1 + hw/scsi/esp.c| 5 + hw/sd/sdhci.c| 5 + hw/timer/hpet.c | 5 + hw/usb/hcd-ohci.c| 5 + hw/vfio/amd-xgbe.c | 1 + hw/vfio/calxeda-xgmac.c | 1 + hw/virtio/virtio-mmio.c | 5 + 22 files changed, 115 insertions(+) diff --git a/hw/block/fdc.c b/hw/block/fdc.c index a328693d15..a06c8e358c 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -2880,6 +2880,11 @@ static void sysbus_fdc_class_init(ObjectClass *klass, void *data) dc->props = sysbus_fdc_properties; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +/* + * FIXME: Set only for compatibility on q35 machine-type. + * Probably never meant to be user-creatable + */ +dc->user_creatable = true; } static const TypeInfo sysbus_fdc_info = { @@ -2906,6 +2911,11 @@ static void sun4m_fdc_class_init(ObjectClass *klass, void *data) dc->props = sun4m_fdc_properties; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +/* + * FIXME: Set only for compatibility on q35 machine-type. + * Probably never meant to be user-creatable + */ +dc->user_creatable = true; } static const TypeInfo sun4m_fdc_info = { diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c index 594d4cf6fe..f48dc20035 100644 --- a/hw/block/pflash_cfi01.c +++ b/hw/block/pflash_cfi01.c @@ -927,6 +927,11 @@ static void pflash_cfi01_class_init(ObjectClass *klass, void *data) dc->props = pflash_cfi01_properties; dc->vmsd =
[Qemu-block] [RFC 05/19] pflash_cfi01: Remove user_creatable flag
TYPE_CFI_PFLASH01 devices need to be mapped by pflash_cfi01_register() and can't be used with -device. Remove user_creatable from the device class. Cc: Kevin WolfCc: Max Reitz Cc: qemu-block@nongnu.org Cc: Laszlo Ersek Signed-off-by: Eduardo Habkost --- hw/block/pflash_cfi01.c | 5 - 1 file changed, 5 deletions(-) diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c index f48dc20035..594d4cf6fe 100644 --- a/hw/block/pflash_cfi01.c +++ b/hw/block/pflash_cfi01.c @@ -927,11 +927,6 @@ static void pflash_cfi01_class_init(ObjectClass *klass, void *data) dc->props = pflash_cfi01_properties; dc->vmsd = _pflash; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); -/* - * FIXME: Set only for compatibility on q35 machine-type. - * Probably never meant to be user-creatable - */ -dc->user_creatable = true; } -- 2.11.0.259.g40922b1
[Qemu-block] [RFC 04/19] fdc: Remove user_creatable flag from sysbus-fdc & SUNW, fdtwo
sysbus-fdc and SUNW,fdtwo devices need to be wired by the fdc initialization code, and can't be used with -device. Unset user_creatable on their device classes. Cc: John SnowCc: Kevin Wolf Cc: Max Reitz Cc: qemu-block@nongnu.org Cc: Thomas Huth Signed-off-by: Eduardo Habkost --- hw/block/fdc.c | 10 -- 1 file changed, 10 deletions(-) diff --git a/hw/block/fdc.c b/hw/block/fdc.c index a06c8e358c..a328693d15 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c @@ -2880,11 +2880,6 @@ static void sysbus_fdc_class_init(ObjectClass *klass, void *data) dc->props = sysbus_fdc_properties; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); -/* - * FIXME: Set only for compatibility on q35 machine-type. - * Probably never meant to be user-creatable - */ -dc->user_creatable = true; } static const TypeInfo sysbus_fdc_info = { @@ -2911,11 +2906,6 @@ static void sun4m_fdc_class_init(ObjectClass *klass, void *data) dc->props = sun4m_fdc_properties; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); -/* - * FIXME: Set only for compatibility on q35 machine-type. - * Probably never meant to be user-creatable - */ -dc->user_creatable = true; } static const TypeInfo sun4m_fdc_info = { -- 2.11.0.259.g40922b1
Re: [Qemu-block] [PATCH for-2.9] rbd: Fix regression in legacy key/values containing escaped :
Tested-by: Alexandru AvadaniiThank you very much for the quick fix! > -Original Message- > From: Eric Blake [mailto:ebl...@redhat.com] > Sent: Friday, March 31, 2017 6:28 PM > To: qemu-de...@nongnu.org > Cc: qemu-block@nongnu.org; jc...@redhat.com; arm...@redhat.com; > Alexandru Avadanii; Josh Durgin; Kevin Wolf; Max Reitz > Subject: [PATCH for-2.9] rbd: Fix regression in legacy key/values containing > escaped : > > Commit c7cacb3 accidentally broke legacy key-value parsing through pseudo- > filename parsing of -drive file=rbd://..., for any key that contains an > escaped > ':'. Such a key is surprisingly common, thanks to mon_host specifying a > 'host:port' string. The break happens because passing things from QDict > through QemuOpts back to another QDict requires that we pack our parsed > key/value pairs into a string, and then reparse that string, but the > intermediate string that we created ("key1=value1:key2=value2") lost the \: > escaping that was present in the original, so that we could no longer see > which : were used as separators vs. those used as part of the original input. > > Fix it by collecting the key/value pairs through a QList, and sending that > list on > a round trip through a JSON QString (as in > '["key1","value1","key2","value2"]') on its way through QemuOpts, rather > than hand-rolling our own string. Since the string is only handled > internally, > this was faster than creating a full-blown struct of > '[{"key1":"value1"},{"key2":"value2"}]', and safer at guaranteeing order > compared to '{"key1":"value1","key2":"value2"}'. > > It would be nicer if we didn't have to round-trip through QemuOpts in the > first place, but that's a much bigger task for later. > > Reproducer: > ./x86_64-softmmu/qemu-system-x86_64 -nodefaults -nographic -qmp stdio > \ -drive 'file=rbd:volumes/volume-ea141b5c-cdb3-4765-910d-e7008b209a70'\ > ':id=compute:key=AQAVkvxXABAA9ZxWFYdRmV+DSwKr7BKKXg=='\ > ':auth_supported=cephx\;none:mon_host=192.168.1.2\:6789'\ > ',format=raw,if=none,id=drive-virtio-disk0,'\ > 'serial=ea141b5c-cdb3-4765-910d-e7008b209a70,cache=writeback' > > Even without an RBD setup, this serves a test of whether we get the > incorrect parser error of: > qemu-system-x86_64: -drive file=rbd:...cache=writeback: conf option 6789 > has no value or the correct behavior of hanging while trying to connect to the > requested mon_host of 192.168.1.2:6789. > > Reported-by: Alexandru Avadanii > Signed-off-by: Eric Blake > --- > block/rbd.c | 83 +++ > -- > 1 file changed, 42 insertions(+), 41 deletions(-) > > diff --git a/block/rbd.c b/block/rbd.c > index 498322b..fbdb131 100644 > --- a/block/rbd.c > +++ b/block/rbd.c > @@ -20,6 +20,7 @@ > #include "crypto/secret.h" > #include "qemu/cutils.h" > #include "qapi/qmp/qstring.h" > +#include "qapi/qmp/qjson.h" > > /* > * When specifying the image filename use: > @@ -135,18 +136,16 @@ static void qemu_rbd_parse_filename(const char > *filename, QDict *options, > Error **errp) { > const char *start; > -char *p, *buf, *keypairs; > +char *p, *buf; > +QList *keypairs = NULL; > char *found_str; > -size_t max_keypair_size; > > if (!strstart(filename, "rbd:", )) { > error_setg(errp, "File name must start with 'rbd:'"); > return; > } > > -max_keypair_size = strlen(start) + 1; > buf = g_strdup(start); > -keypairs = g_malloc0(max_keypair_size); > p = buf; > > found_str = qemu_rbd_next_tok(p, '/', ); @@ -194,33 +193,30 @@ > static void qemu_rbd_parse_filename(const char *filename, QDict *options, > } else if (!strcmp(name, "id")) { > qdict_put(options, "user" , qstring_from_str(value)); > } else { > -/* FIXME: This is pretty ugly, and not the right way to do this. > - *These should be contained in a structure, and then > - *passed explicitly as individual key/value pairs to > - *rados. Consider this legacy code that needs to be > - *updated. */ > -char *tmp = g_malloc0(max_keypair_size); > -/* only use a delimiter if it is not the first keypair found */ > -/* These are sets of unknown key/value pairs we'll pass along > - * to ceph */ > -if (keypairs[0]) { > -snprintf(tmp, max_keypair_size, ":%s=%s", name, value); > -pstrcat(keypairs, max_keypair_size, tmp); > -} else { > -snprintf(keypairs, max_keypair_size, "%s=%s", name, value); > +/* > + * We pass these internally to qemu_rbd_set_keypairs(), so > + * we can get away with the simpler list of [ "key1", > + * "value1", "key2", "value2" ] rather
Re: [Qemu-block] [PATCH for-2.9 2/2] block/curl: Check protocol prefix
On 31.03.2017 21:30, Eric Blake wrote: > On 03/31/2017 07:04 AM, Max Reitz wrote: >> If the user has explicitly specified a block driver and thus a protocol, >> we have to make sure the URL's protocol prefix matches. Otherwise the >> latter will silently override the former which might catch some users by >> surprise. >> >> Signed-off-by: Max Reitz>> --- >> block/curl.c | 10 ++ >> 1 file changed, 10 insertions(+) >> > > It feels a little bit dirty that we are parsing the URL rather than > having a distinct discriminator, but I'm okay with the approach (with a > distinct discriminator, we would have to reconstruct a URL from the > discriminator + the rest of the server/path information, and that's more > work to libvirt to split a URL into the distinct JSON fields just to > have qemu rebuild a URL). Yes, you're right, there are a couple of points where the interface could be nicer. Another thing is the cookie string which could be a list of dicts or something similar. But in the end all of this block driver really is just an interface for libcurl, so I thought it best to just behave as such (i.e. take "blob" strings that are then just piped into libcurl). (The distinct discriminator would probably be just the block driver name. But then every user would have to strip the protocol part from the URL...) >> +if (!strstart(file, bs->drv->protocol_name, _delimiter) || >> +!strstart(protocol_delimiter, "://", NULL)) Regarding case sensitivity: I have to say that I actually can't remember when I saw a not full lower-cased protocol specified in a URL, and I'm glad about that. :-) I also think we can wait until someone files a bug (at which point we can always ask them how hard it would be to just write the protocol in lower case...). >> +{ >> +error_setg(errp, "%s curl driver cannot handle the URL '%s' (does >> not " >> + "start with '%s://')", bs->drv->protocol_name, file, >> + bs->drv->protocol_name); > > Perhaps splitting the message with an error_append_hint() for the > parenthetical half would also be appropriate, but the line is not too > terribly long so I won't insist on a respin. Good idea. But I don't think it's quite worth a respin either at this point. (Also, it can be argued that the part in parentheses is in fact the actual error source, so it kind of makes sense to keep it in the main error message.) > Reviewed-by: Eric Blake Thanks! Max signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [PATCH for-2.9 v8 0/3] Fix wipe of unaligned qcow2 image [was add blkdebug tests]
On 31.03.2017 20:53, Eric Blake wrote: > Available as a tag at: > git fetch git://repo.or.cz/qemu/ericb.git nbd-blkdebug-v8 > > (which is somewhat of a misnomer for this current version of the > series, but historically correct) > > v6 was: > https://lists.gnu.org/archive/html/qemu-devel/2017-03/msg01562.html > v7 was: > https://lists.gnu.org/archive/html/qemu-devel/2017-03/msg06175.html > > Since then: reorder patches 2 and 3, and update commit messages to match > > 001/3:[] [--] 'iotests: fix 097 when run with qcow' > 002/3:[0002] [FC] 'qcow2: Discard unaligned tail when wiping image' > 003/3:[0002] [FC] 'iotests: Improve image-clear tests on non-aligned image' > > Daniel P. Berrange (1): > iotests: fix 097 when run with qcow > > Eric Blake (2): > qcow2: Discard unaligned tail when wiping image > iotests: Improve image-clear tests on non-aligned image > > block/qcow2-cluster.c | 10 +-- > tests/qemu-iotests/097 | 27 +++ > tests/qemu-iotests/097.out | 180 > - > tests/qemu-iotests/176 | 131 + > tests/qemu-iotests/176.out | 150 + > tests/qemu-iotests/group | 1 + > 6 files changed, 345 insertions(+), 154 deletions(-) > create mode 100755 tests/qemu-iotests/176 > create mode 100644 tests/qemu-iotests/176.out Reviewed-by: Max ReitzThanks a lot, applied to my block branch: https://github.com/XanClic/qemu/commits/block Max signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [Qemu-devel] [PATCH for-2.9] block/parallels: Avoid overflows
On 03/31/2017 02:05 PM, Max Reitz wrote: Change the types of variables in allocate_clusters() to int64_t so we do not have to worry about potential overflows. Add an assertion that our accesses to s->bat[] do not result in a buffer overflow and that the implicit conversion performed when invoking bat_entry_off() does not result in an integer overflow. Coverity-id: 1307776 Signed-off-by: Max ReitzReviewed-by: Philippe Mathieu-Daudé --- This supercedes Peter's patch "block/parallels.c: avoid integer overflow in allocate_clusters()". --- block/parallels.c | 19 +-- 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 4173b3fb9d..90acf79687 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -192,8 +192,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { BDRVParallelsState *s = bs->opaque; -uint32_t idx, to_allocate, i; -int64_t pos, space; +int64_t pos, space, idx, to_allocate, i; pos = block_status(s, sector_num, nb_sectors, pnum); if (pos > 0) { @@ -201,11 +200,19 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, } idx = sector_num / s->tracks; -if (idx >= s->bat_size) { -return -EINVAL; -} - to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx; + +/* This function is called only by parallels_co_writev(), which will never + * pass a sector_num at or beyond the end of the image (because the block + * layer never passes such a sector_num to that function). Therefore, idx + * is always below s->bat_size. + * block_status() will limit *pnum so that sector_num + *pnum will not + * exceed the image end. Therefore, idx + to_allocate cannot exceed + * s->bat_size. + * Note that s->bat_size is an unsigned int, therefore idx + to_allocate + * will always fit into a uint32_t. */ +assert(idx < s->bat_size && idx + to_allocate <= s->bat_size); + space = to_allocate * s->tracks; if (s->data_end + space > bdrv_getlength(bs->file->bs) >> BDRV_SECTOR_BITS) { int ret;
Re: [Qemu-block] [PATCH for-2.9 1/2] qapi/curl: Extend and fix blockdev-add schema
On 03/31/2017 07:04 AM, Max Reitz wrote: > The curl block driver accepts more options than just "filename"; also, > the URL is actually expected to be passed through the "url" option > instead of "filename". > > Signed-off-by: Max Reitz> --- > qapi/block-core.json | 103 > ++- > 1 file changed, 94 insertions(+), 9 deletions(-) > > diff --git a/qapi/block-core.json b/qapi/block-core.json > index b5f0e9958c..033457ce86 100644 > --- a/qapi/block-core.json > +++ b/qapi/block-core.json > @@ -2737,16 +2737,101 @@ > '*debug': 'int' } } > > ## > -# @BlockdevOptionsCurl: > +# @BlockdevOptionsCurlBase: > # > -# Driver specific block device options for the curl backend. > +# Driver specific block device options shared by all protocols supported by > the > +# curl backend. > # > -# @filename:path to the image file > +# @url: URL of the image file > +# > +# @readahead: Size of the read-ahead cache; must be a multiple > of > +# 512 (defaults to 256 kB) > +# > +# @timeout: Timeout for connections, in seconds (defaults to > 5) > +# > +# @username:Username for authentication (defaults to none) > +# > +# @password-secret: ID of a QCryptoSecret object providing a password > +# for authentication (defaults to no password) > +# > +# @proxy-username: Username for proxy authentication (defaults to > none) > +# > +# @proxy-password-secret: ID of a QCryptoSecret object providing a password > +# for proxy authentication (defaults to no > password) > +# Matches runtime_opts of block/curl.c, modulo the fields that you restricted to specific protocols by creating subtypes. Looks like you covered everything correctly. Reviewed-by: Eric Blake -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [PATCH for-2.9 2/2] block/curl: Check protocol prefix
On 03/31/2017 07:04 AM, Max Reitz wrote: > If the user has explicitly specified a block driver and thus a protocol, > we have to make sure the URL's protocol prefix matches. Otherwise the > latter will silently override the former which might catch some users by > surprise. > > Signed-off-by: Max Reitz> --- > block/curl.c | 10 ++ > 1 file changed, 10 insertions(+) > It feels a little bit dirty that we are parsing the URL rather than having a distinct discriminator, but I'm okay with the approach (with a distinct discriminator, we would have to reconstruct a URL from the discriminator + the rest of the server/path information, and that's more work to libvirt to split a URL into the distinct JSON fields just to have qemu rebuild a URL). > +if (!strstart(file, bs->drv->protocol_name, _delimiter) || > +!strstart(protocol_delimiter, "://", NULL)) > +{ > +error_setg(errp, "%s curl driver cannot handle the URL '%s' (does > not " > + "start with '%s://')", bs->drv->protocol_name, file, > + bs->drv->protocol_name); Perhaps splitting the message with an error_append_hint() for the parenthetical half would also be appropriate, but the line is not too terribly long so I won't insist on a respin. Reviewed-by: Eric Blake -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [PATCH for-2.9 1/2] qapi/curl: Extend and fix blockdev-add schema
On Fri, Mar 31, 2017 at 02:04:30PM +0200, Max Reitz wrote: > The curl block driver accepts more options than just "filename"; also, > the URL is actually expected to be passed through the "url" option > instead of "filename". > > Signed-off-by: Max Reitz> --- > qapi/block-core.json | 103 > ++- > 1 file changed, 94 insertions(+), 9 deletions(-) > > diff --git a/qapi/block-core.json b/qapi/block-core.json > index b5f0e9958c..033457ce86 100644 > --- a/qapi/block-core.json > +++ b/qapi/block-core.json > @@ -2737,16 +2737,101 @@ > '*debug': 'int' } } > > ## > -# @BlockdevOptionsCurl: > +# @BlockdevOptionsCurlBase: > # > -# Driver specific block device options for the curl backend. > +# Driver specific block device options shared by all protocols supported by > the > +# curl backend. > # > -# @filename:path to the image file > +# @url: URL of the image file > +# > +# @readahead: Size of the read-ahead cache; must be a multiple > of > +# 512 (defaults to 256 kB) > +# > +# @timeout: Timeout for connections, in seconds (defaults to > 5) > +# > +# @username:Username for authentication (defaults to none) > +# > +# @password-secret: ID of a QCryptoSecret object providing a password > +# for authentication (defaults to no password) > +# > +# @proxy-username: Username for proxy authentication (defaults to > none) > +# > +# @proxy-password-secret: ID of a QCryptoSecret object providing a password > +# for proxy authentication (defaults to no > password) > +# > +# Since: 2.9 > +## > +{ 'struct': 'BlockdevOptionsCurlBase', > + 'data': { 'url': 'str', > +'*readahead': 'int', > +'*timeout': 'int', > +'*username': 'str', > +'*password-secret': 'str', > +'*proxy-username': 'str', > +'*proxy-password-secret': 'str' } } > + > +## > +# @BlockdevOptionsCurlHttp: > +# > +# Driver specific block device options for HTTP connections over the curl > +# backend. URLs must start with "http://;. > +# > +# @cookie: List of cookies to set; format is > +# "name1=content1; name2=content2;" as explained by > +# CURLOPT_COOKIE(3). Defaults to no cookies. > +# > +# Since: 2.9 > +## > +{ 'struct': 'BlockdevOptionsCurlHttp', > + 'base': 'BlockdevOptionsCurlBase', > + 'data': { '*cookie': 'str' } } > + > +## > +# @BlockdevOptionsCurlHttps: > +# > +# Driver specific block device options for HTTPS connections over the curl > +# backend. URLs must start with "https://;. > +# > +# @cookie: List of cookies to set; format is > +# "name1=content1; name2=content2;" as explained by > +# CURLOPT_COOKIE(3). Defaults to no cookies. > +# > +# @sslverify: Whether to verify the SSL certificate's validity (defaults to > +# true) > +# > +# Since: 2.9 > +## > +{ 'struct': 'BlockdevOptionsCurlHttps', > + 'base': 'BlockdevOptionsCurlBase', > + 'data': { '*cookie': 'str', > +'*sslverify': 'bool' } } > + > +## > +# @BlockdevOptionsCurlFtp: > +# > +# Driver specific block device options for FTP connections over the curl > +# backend. URLs must start with "ftp://;. > +# > +# Since: 2.9 > +## > +{ 'struct': 'BlockdevOptionsCurlFtp', > + 'base': 'BlockdevOptionsCurlBase', > + 'data': { } } > + > +## > +# @BlockdevOptionsCurlFtps: > +# > +# Driver specific block device options for FTPS connections over the curl > +# backend. URLs must start with "ftps://". > +# > +# @sslverify: Whether to verify the SSL certificate's validity (defaults to > +# true) > # > # Since: 2.9 > ## > -{ 'struct': 'BlockdevOptionsCurl', > - 'data': { 'filename': 'str' } } > +{ 'struct': 'BlockdevOptionsCurlFtps', > + 'base': 'BlockdevOptionsCurlBase', > + 'data': { '*sslverify': 'bool' } } > > ## > # @BlockdevOptionsNbd: > @@ -2815,13 +2900,13 @@ >'cloop': 'BlockdevOptionsGenericFormat', >'dmg':'BlockdevOptionsGenericFormat', >'file': 'BlockdevOptionsFile', > - 'ftp':'BlockdevOptionsCurl', > - 'ftps': 'BlockdevOptionsCurl', > + 'ftp':'BlockdevOptionsCurlFtp', > + 'ftps': 'BlockdevOptionsCurlFtps', >'gluster':'BlockdevOptionsGluster', >'host_cdrom': 'BlockdevOptionsFile', >'host_device':'BlockdevOptionsFile', > - 'http': 'BlockdevOptionsCurl', > - 'https': 'BlockdevOptionsCurl', > + 'http': 'BlockdevOptionsCurlHttp', > + 'https': 'BlockdevOptionsCurlHttps', >'iscsi': 'BlockdevOptionsIscsi', >'luks': 'BlockdevOptionsLUKS', >'nbd':'BlockdevOptionsNbd', > -- > 2.12.1 > > I already applied this to my branch, but it occured to me that it would
Re: [Qemu-block] [PATCH for-2.9 2/2] block/curl: Check protocol prefix
On 03/31/2017 07:04 AM, Max Reitz wrote: > If the user has explicitly specified a block driver and thus a protocol, > we have to make sure the URL's protocol prefix matches. Otherwise the > latter will silently override the former which might catch some users by > surprise. > > Signed-off-by: Max Reitz> --- > block/curl.c | 10 ++ > 1 file changed, 10 insertions(+) > > diff --git a/block/curl.c b/block/curl.c > index 34dbd335f4..2708d57c2f 100644 > --- a/block/curl.c > +++ b/block/curl.c > @@ -659,6 +659,7 @@ static int curl_open(BlockDriverState *bs, QDict > *options, int flags, > const char *cookie; > double d; > const char *secretid; > +const char *protocol_delimiter; > > static int inited = 0; > > @@ -700,6 +701,15 @@ static int curl_open(BlockDriverState *bs, QDict > *options, int flags, > goto out_noclean; > } > > +if (!strstart(file, bs->drv->protocol_name, _delimiter) || > +!strstart(protocol_delimiter, "://", NULL)) Do we care about case-insensitive comparisons here? But I'm fine with case-sensitive for now, until we have an actual report of someone that it breaks. -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [PATCH for-2.9 0/2] curl: Extend and fix blockdev-add schema
On Fri, Mar 31, 2017 at 02:04:29PM +0200, Max Reitz wrote: > Yes, it's yet another episode in our popular > get-blockdev-add-ready-for-2.9 drama! > > Right now, the schema for the curl block driver is seriously lacking. > This series improves things at least a bit. > > To improve things seriously, we might want to structure the URL instead > of it being just a plain string, and we might want to split the cookie > string into a list of dicts or something similar. However, strictly > speaking our curl block driver is *not* an (ht|f)tps? block driver but > just a curl driver. All it does is pass some options to libcurl and then > send and receive data from it. (We really should have just named it > "curl" from the start.) > > Therefore, it probably is for the best to leave these options rather > opaque and let libcurl do the interpretation. > > > Max Reitz (2): > qapi/curl: Extend and fix blockdev-add schema > block/curl: Check protocol prefix > > qapi/block-core.json | 103 > ++- > block/curl.c | 10 + > 2 files changed, 104 insertions(+), 9 deletions(-) > > -- > 2.12.1 > > Thanks, Applied to my block branch: git://github.com/codyprime/qemu-kvm-jtc.git block -Jeff
Re: [Qemu-block] [PATCH for-2.9 2/2] block/curl: Check protocol prefix
On Fri, Mar 31, 2017 at 02:04:31PM +0200, Max Reitz wrote: > If the user has explicitly specified a block driver and thus a protocol, > we have to make sure the URL's protocol prefix matches. Otherwise the > latter will silently override the former which might catch some users by > surprise. > > Signed-off-by: Max ReitzReviewed-by: Jeff Cody > --- > block/curl.c | 10 ++ > 1 file changed, 10 insertions(+) > > diff --git a/block/curl.c b/block/curl.c > index 34dbd335f4..2708d57c2f 100644 > --- a/block/curl.c > +++ b/block/curl.c > @@ -659,6 +659,7 @@ static int curl_open(BlockDriverState *bs, QDict > *options, int flags, > const char *cookie; > double d; > const char *secretid; > +const char *protocol_delimiter; > > static int inited = 0; > > @@ -700,6 +701,15 @@ static int curl_open(BlockDriverState *bs, QDict > *options, int flags, > goto out_noclean; > } > > +if (!strstart(file, bs->drv->protocol_name, _delimiter) || > +!strstart(protocol_delimiter, "://", NULL)) > +{ > +error_setg(errp, "%s curl driver cannot handle the URL '%s' (does > not " > + "start with '%s://')", bs->drv->protocol_name, file, > + bs->drv->protocol_name); > +goto out_noclean; > +} > + > s->username = g_strdup(qemu_opt_get(opts, CURL_BLOCK_OPT_USERNAME)); > secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PASSWORD_SECRET); > > -- > 2.12.1 > >
Re: [Qemu-block] [PATCH for-2.9 1/2] qapi/curl: Extend and fix blockdev-add schema
On Fri, Mar 31, 2017 at 02:04:30PM +0200, Max Reitz wrote: > The curl block driver accepts more options than just "filename"; also, > the URL is actually expected to be passed through the "url" option > instead of "filename". > > Signed-off-by: Max ReitzReviewed-by: Jeff Cody > --- > qapi/block-core.json | 103 > ++- > 1 file changed, 94 insertions(+), 9 deletions(-) > > diff --git a/qapi/block-core.json b/qapi/block-core.json > index b5f0e9958c..033457ce86 100644 > --- a/qapi/block-core.json > +++ b/qapi/block-core.json > @@ -2737,16 +2737,101 @@ > '*debug': 'int' } } > > ## > -# @BlockdevOptionsCurl: > +# @BlockdevOptionsCurlBase: > # > -# Driver specific block device options for the curl backend. > +# Driver specific block device options shared by all protocols supported by > the > +# curl backend. > # > -# @filename:path to the image file > +# @url: URL of the image file > +# > +# @readahead: Size of the read-ahead cache; must be a multiple > of > +# 512 (defaults to 256 kB) > +# > +# @timeout: Timeout for connections, in seconds (defaults to > 5) > +# > +# @username:Username for authentication (defaults to none) > +# > +# @password-secret: ID of a QCryptoSecret object providing a password > +# for authentication (defaults to no password) > +# > +# @proxy-username: Username for proxy authentication (defaults to > none) > +# > +# @proxy-password-secret: ID of a QCryptoSecret object providing a password > +# for proxy authentication (defaults to no > password) > +# > +# Since: 2.9 > +## > +{ 'struct': 'BlockdevOptionsCurlBase', > + 'data': { 'url': 'str', > +'*readahead': 'int', > +'*timeout': 'int', > +'*username': 'str', > +'*password-secret': 'str', > +'*proxy-username': 'str', > +'*proxy-password-secret': 'str' } } > + > +## > +# @BlockdevOptionsCurlHttp: > +# > +# Driver specific block device options for HTTP connections over the curl > +# backend. URLs must start with "http://;. > +# > +# @cookie: List of cookies to set; format is > +# "name1=content1; name2=content2;" as explained by > +# CURLOPT_COOKIE(3). Defaults to no cookies. > +# > +# Since: 2.9 > +## > +{ 'struct': 'BlockdevOptionsCurlHttp', > + 'base': 'BlockdevOptionsCurlBase', > + 'data': { '*cookie': 'str' } } > + > +## > +# @BlockdevOptionsCurlHttps: > +# > +# Driver specific block device options for HTTPS connections over the curl > +# backend. URLs must start with "https://;. > +# > +# @cookie: List of cookies to set; format is > +# "name1=content1; name2=content2;" as explained by > +# CURLOPT_COOKIE(3). Defaults to no cookies. > +# > +# @sslverify: Whether to verify the SSL certificate's validity (defaults to > +# true) > +# > +# Since: 2.9 > +## > +{ 'struct': 'BlockdevOptionsCurlHttps', > + 'base': 'BlockdevOptionsCurlBase', > + 'data': { '*cookie': 'str', > +'*sslverify': 'bool' } } > + > +## > +# @BlockdevOptionsCurlFtp: > +# > +# Driver specific block device options for FTP connections over the curl > +# backend. URLs must start with "ftp://;. > +# > +# Since: 2.9 > +## > +{ 'struct': 'BlockdevOptionsCurlFtp', > + 'base': 'BlockdevOptionsCurlBase', > + 'data': { } } > + > +## > +# @BlockdevOptionsCurlFtps: > +# > +# Driver specific block device options for FTPS connections over the curl > +# backend. URLs must start with "ftps://". > +# > +# @sslverify: Whether to verify the SSL certificate's validity (defaults to > +# true) > # > # Since: 2.9 > ## > -{ 'struct': 'BlockdevOptionsCurl', > - 'data': { 'filename': 'str' } } > +{ 'struct': 'BlockdevOptionsCurlFtps', > + 'base': 'BlockdevOptionsCurlBase', > + 'data': { '*sslverify': 'bool' } } > > ## > # @BlockdevOptionsNbd: > @@ -2815,13 +2900,13 @@ >'cloop': 'BlockdevOptionsGenericFormat', >'dmg':'BlockdevOptionsGenericFormat', >'file': 'BlockdevOptionsFile', > - 'ftp':'BlockdevOptionsCurl', > - 'ftps': 'BlockdevOptionsCurl', > + 'ftp':'BlockdevOptionsCurlFtp', > + 'ftps': 'BlockdevOptionsCurlFtps', >'gluster':'BlockdevOptionsGluster', >'host_cdrom': 'BlockdevOptionsFile', >'host_device':'BlockdevOptionsFile', > - 'http': 'BlockdevOptionsCurl', > - 'https': 'BlockdevOptionsCurl', > + 'http': 'BlockdevOptionsCurlHttp', > + 'https': 'BlockdevOptionsCurlHttps', >'iscsi': 'BlockdevOptionsIscsi', >'luks': 'BlockdevOptionsLUKS', >'nbd':'BlockdevOptionsNbd', > -- > 2.12.1 > >
[Qemu-block] [PATCH for-2.9 v8 0/3] Fix wipe of unaligned qcow2 image [was add blkdebug tests]
Available as a tag at: git fetch git://repo.or.cz/qemu/ericb.git nbd-blkdebug-v8 (which is somewhat of a misnomer for this current version of the series, but historically correct) v6 was: https://lists.gnu.org/archive/html/qemu-devel/2017-03/msg01562.html v7 was: https://lists.gnu.org/archive/html/qemu-devel/2017-03/msg06175.html Since then: reorder patches 2 and 3, and update commit messages to match 001/3:[] [--] 'iotests: fix 097 when run with qcow' 002/3:[0002] [FC] 'qcow2: Discard unaligned tail when wiping image' 003/3:[0002] [FC] 'iotests: Improve image-clear tests on non-aligned image' Daniel P. Berrange (1): iotests: fix 097 when run with qcow Eric Blake (2): qcow2: Discard unaligned tail when wiping image iotests: Improve image-clear tests on non-aligned image block/qcow2-cluster.c | 10 +-- tests/qemu-iotests/097 | 27 +++ tests/qemu-iotests/097.out | 180 - tests/qemu-iotests/176 | 131 + tests/qemu-iotests/176.out | 150 + tests/qemu-iotests/group | 1 + 6 files changed, 345 insertions(+), 154 deletions(-) create mode 100755 tests/qemu-iotests/176 create mode 100644 tests/qemu-iotests/176.out -- 2.9.3
[Qemu-block] [PATCH v8 2/3] qcow2: Discard unaligned tail when wiping image
There is a subtle difference between the fast (qcow2v3 with no extra data) and slow path (qcow2v2 format [aka 0.10], or when a snapshot is present) of qcow2_make_empty(). The slow path fails to discard the final (partial) cluster of an unaligned image. The problem stems from the fact that qcow2_discard_clusters() was silently ignoring sub-cluster head and tail on unaligned requests. A quick audit of all callers shows that qcow2_snapshot_create() has always passed a cluster-aligned request since the call was added in commit 1ebf561; qcow2_co_pdiscard() has passed a cluster-aligned request since commit ecdbead taught the block layer about preferred discard alignment; and qcow2_make_empty() was fixed to pass an aligned start (but not necessarily end) in commit a3e1505. Asserting that the start is always aligned also points out that we now have a dead check: rounding the end offset down can never result in a value less than the aligned start offset (the check was rendered dead with commit ecdbead). Meanwhile, we do not want to round the end cluster down in the one case of the end offset matching the (unaligned) file size - that final partial cluster should still be discarded. With those fixes in place, the fast and slow paths are back in sync at discarding an entire image; the next patch will update qemu-iotests to ensure we don't regress. Note that bdrv_co_pdiscard ignores ALL partial cluster requests, including the partial cluster at the end of an image; it can be argued that the partial cluster at the end should be special-cased so that a guest issuing discard requests at proper alignments everywhere else can likewise empty the entire image. But that optimization is left for another day. Signed-off-by: Eric Blake--- v8: swap patch order v7: new patch --- block/qcow2-cluster.c | 10 -- 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 78c11d4..100398c 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -1519,12 +1519,10 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS); -/* Round start up and end down */ -offset = align_offset(offset, s->cluster_size); -end_offset = start_of_cluster(s, end_offset); - -if (offset > end_offset) { -return 0; +/* The caller must cluster-align start; round end down except at EOF */ +assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); +if (end_offset != bs->total_sectors * BDRV_SECTOR_SIZE) { +end_offset = start_of_cluster(s, end_offset); } nb_clusters = size_to_clusters(s, end_offset - offset); -- 2.9.3
[Qemu-block] [PATCH v8 3/3] iotests: Improve image-clear tests on non-aligned image
Tweak 097 and 176 to operate on an image that is not cluster-aligned, to give further coverage of clearing out an entire image, including the recent fix to eliminate the difference between fast path (97) and slow (176) for qcow2. Also tested on qcow (97 only, since qcow lacks snapshots). Signed-off-by: Eric Blake--- v8: reorder patches v7: also write data in the unaligned cluster, and document bug it exposes v6: new patch --- tests/qemu-iotests/097 | 17 +- tests/qemu-iotests/097.out | 55 -- tests/qemu-iotests/176 | 17 +- tests/qemu-iotests/176.out | 55 -- 4 files changed, 108 insertions(+), 36 deletions(-) diff --git a/tests/qemu-iotests/097 b/tests/qemu-iotests/097 index 1d28aff..e22670c 100755 --- a/tests/qemu-iotests/097 +++ b/tests/qemu-iotests/097 @@ -66,13 +66,15 @@ echo echo "=== Test pass $i ===" echo -TEST_IMG="$TEST_IMG.base" _make_test_img 2100M -TEST_IMG="$TEST_IMG.itmd" _make_test_img -b "$TEST_IMG.base" 2100M -_make_test_img -b "$TEST_IMG.itmd" 2100M +len=$((2100 * 1024 * 1024 + 512)) # larger than 2G, and not cluster aligned +TEST_IMG="$TEST_IMG.base" _make_test_img $len +TEST_IMG="$TEST_IMG.itmd" _make_test_img -b "$TEST_IMG.base" $len +_make_test_img -b "$TEST_IMG.itmd" $len -$QEMU_IO -c 'write -P 1 0x7ffd 192k' "$TEST_IMG.base" | _filter_qemu_io -$QEMU_IO -c 'write -P 2 0x7ffe 128k' "$TEST_IMG.itmd" | _filter_qemu_io -$QEMU_IO -c 'write -P 3 0x7fff 64k' "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -P 1 0x7ffd 192k" "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "write -P 2 0x7ffe 128k" "$TEST_IMG.itmd" | _filter_qemu_io +$QEMU_IO -c "write -P 3 0x7fff 64k" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "write -P 4 $(($len - 512)) 512" "$TEST_IMG" | _filter_qemu_io if [ $i -lt 3 ]; then if [ $i == 0 ]; then @@ -90,11 +92,13 @@ if [ $i -lt 3 ]; then # Bottom should be unchanged $QEMU_IO -c 'read -P 1 0x7ffd 192k' "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "read -P 0 $((len - 512)) 512" "$TEST_IMG.base" | _filter_qemu_io # Intermediate should contain changes from top $QEMU_IO -c 'read -P 1 0x7ffd 64k' "$TEST_IMG.itmd" | _filter_qemu_io $QEMU_IO -c 'read -P 2 0x7ffe 64k' "$TEST_IMG.itmd" | _filter_qemu_io $QEMU_IO -c 'read -P 3 0x7fff 64k' "$TEST_IMG.itmd" | _filter_qemu_io +$QEMU_IO -c "read -P 4 $((len - 512)) 512" "$TEST_IMG.itmd" | _filter_qemu_io # And in pass 0, the top image should be empty, whereas in both other passes # it should be unchanged (which is both checked by qemu-img map) @@ -105,6 +109,7 @@ else $QEMU_IO -c 'read -P 1 0x7ffd 64k' "$TEST_IMG.base" | _filter_qemu_io $QEMU_IO -c 'read -P 2 0x7ffe 64k' "$TEST_IMG.base" | _filter_qemu_io $QEMU_IO -c 'read -P 3 0x7fff 64k' "$TEST_IMG.base" | _filter_qemu_io +$QEMU_IO -c "read -P 4 $((len - 512)) 512" "$TEST_IMG.base" | _filter_qemu_io # Both top and intermediate should be unchanged fi diff --git a/tests/qemu-iotests/097.out b/tests/qemu-iotests/097.out index 81fc225..f6705a1 100644 --- a/tests/qemu-iotests/097.out +++ b/tests/qemu-iotests/097.out @@ -2,104 +2,130 @@ QA output created by 097 === Test pass 0 === -Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600 -Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base -Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202010112 +Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.base +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202010112 backing_file=TEST_DIR/t.IMGFMT.itmd wrote 196608/196608 bytes at offset 2147287040 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 131072/131072 bytes at offset 2147352576 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) wrote 65536/65536 bytes at offset 2147418112 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Image committed. read 196608/196608 bytes at offset 2147287040 192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 65536/65536 bytes at offset 2147287040 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 65536/65536 bytes at offset 2147352576 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 65536/65536 bytes at offset 2147418112 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 2202009600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Offset Length File 0x7ffd 0x3
[Qemu-block] [PATCH v8 1/3] iotests: fix 097 when run with qcow
From: "Daniel P. Berrange"The previous commit: commit a3e1505daec31ef56f0489f8c8fff1b8e4ca92bd Author: Eric Blake Date: Mon Dec 5 09:49:34 2016 -0600 qcow2: Don't strand clusters near 2G intervals during commit extended the 097 test case so that it did two passes, once with an internal snapshot, once without. qcow (v1) does not support internal snapshots, so this change broke test 097 when run against qcow. This splits 097 in two, creating a new 176 that tests the internal snapshot codepath, effectively putting 097 back to its content before the above commit. Reviewed-by: Max Reitz Signed-off-by: Daniel P. Berrange Message-Id: <20170221115512.21918-8-berra...@redhat.com> [eblake: test collisions: s/173/176/g] Signed-off-by: Eric Blake --- tests/qemu-iotests/097 | 10 +--- tests/qemu-iotests/097.out | 125 ++-- tests/qemu-iotests/176 | 126 + tests/qemu-iotests/176.out | 119 ++ tests/qemu-iotests/group | 1 + 5 files changed, 251 insertions(+), 130 deletions(-) create mode 100755 tests/qemu-iotests/176 create mode 100644 tests/qemu-iotests/176.out diff --git a/tests/qemu-iotests/097 b/tests/qemu-iotests/097 index 4c33e80..1d28aff 100755 --- a/tests/qemu-iotests/097 +++ b/tests/qemu-iotests/097 @@ -56,26 +56,19 @@ _supported_os Linux # 3: Two-layer backing chain, commit to lower backing file # (in this case, the top image will implicitly stay unchanged) # -# Each pass is run twice, since qcow2 has different code paths for cleaning -# an image depending on whether it has a snapshot. -# # 020 already tests committing, so this only tests whether image chains are # working properly and that all images above the base are emptied; therefore, # no complicated patterns are necessary. Check near the 2G mark, as qcow2 # has been buggy at that boundary in the past. for i in 0 1 2 3; do -for j in 0 1; do echo -echo "=== Test pass $i.$j ===" +echo "=== Test pass $i ===" echo TEST_IMG="$TEST_IMG.base" _make_test_img 2100M TEST_IMG="$TEST_IMG.itmd" _make_test_img -b "$TEST_IMG.base" 2100M _make_test_img -b "$TEST_IMG.itmd" 2100M -if [ $j -eq 0 ]; then -$QEMU_IMG snapshot -c snap "$TEST_IMG" -fi $QEMU_IO -c 'write -P 1 0x7ffd 192k' "$TEST_IMG.base" | _filter_qemu_io $QEMU_IO -c 'write -P 2 0x7ffe 128k' "$TEST_IMG.itmd" | _filter_qemu_io @@ -121,7 +114,6 @@ $QEMU_IMG map "$TEST_IMG.itmd" | _filter_qemu_img_map $QEMU_IMG map "$TEST_IMG" | _filter_qemu_img_map done -done # success, all done diff --git a/tests/qemu-iotests/097.out b/tests/qemu-iotests/097.out index 8106cc9..81fc225 100644 --- a/tests/qemu-iotests/097.out +++ b/tests/qemu-iotests/097.out @@ -1,6 +1,6 @@ QA output created by 097 -=== Test pass 0.0 === +=== Test pass 0 === Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600 Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base @@ -29,66 +29,7 @@ Offset Length File 0x7ffd 0x1 TEST_DIR/t.IMGFMT.base 0x7ffe 0x2 TEST_DIR/t.IMGFMT.itmd -=== Test pass 0.1 === - -Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600 -Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base -Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.itmd -wrote 196608/196608 bytes at offset 2147287040 -192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -wrote 131072/131072 bytes at offset 2147352576 -128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -wrote 65536/65536 bytes at offset 2147418112 -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -Image committed. -read 196608/196608 bytes at offset 2147287040 -192 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 65536/65536 bytes at offset 2147287040 -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 65536/65536 bytes at offset 2147352576 -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -read 65536/65536 bytes at offset 2147418112 -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -Offset Length File -0x7ffd 0x3 TEST_DIR/t.IMGFMT.base -Offset Length File -0x7ffd 0x1 TEST_DIR/t.IMGFMT.base -0x7ffe 0x2 TEST_DIR/t.IMGFMT.itmd -Offset Length File -0x7ffd 0x1 TEST_DIR/t.IMGFMT.base -0x7ffe 0x2 TEST_DIR/t.IMGFMT.itmd - -=== Test pass 1.0 === - -Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=2202009600 -Formatting 'TEST_DIR/t.IMGFMT.itmd', fmt=IMGFMT size=2202009600 backing_file=TEST_DIR/t.IMGFMT.base -Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2202009600
Re: [Qemu-block] [Qemu-devel] [PATCH for-2.9] block/parallels: Avoid overflows
On 03/31/2017 12:05 PM, Max Reitz wrote: > Change the types of variables in allocate_clusters() to int64_t so we do > not have to worry about potential overflows. > > Add an assertion that our accesses to s->bat[] do not result in a buffer > overflow and that the implicit conversion performed when invoking > bat_entry_off() does not result in an integer overflow. > > Coverity-id: 1307776 > Signed-off-by: Max Reitz> --- > This supercedes Peter's patch "block/parallels.c: avoid integer overflow > in allocate_clusters()". > --- > block/parallels.c | 19 +-- > 1 file changed, 13 insertions(+), 6 deletions(-) > Reviewed-by: Eric Blake -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [PATCH] block/parallels.c: avoid integer overflow in allocate_clusters()
On 03/31/2017 04:47 PM, Max Reitz wrote: > On 31.03.2017 15:13, Peter Maydell wrote: >> Coverity (CID 1307776) points out that in the multiply: >> space = to_allocate * s->tracks; >> we are trying to calculate a 64 bit result but the types >> of to_allocate and s->tracks mean that we actually calculate >> a 32 bit result. Add an explicit cast to force a 64 bit >> multiply. >> >> Signed-off-by: Peter Maydell>> --- >> NB: compile-and-make-check tested only... >> --- >> block/parallels.c | 2 +- >> 1 file changed, 1 insertion(+), 1 deletion(-) >> >> diff --git a/block/parallels.c b/block/parallels.c >> index 4173b3f..3886c30 100644 >> --- a/block/parallels.c >> +++ b/block/parallels.c >> @@ -206,7 +206,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, >> int64_t sector_num, >> } >> >> to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx; >> -space = to_allocate * s->tracks; >> +space = (int64_t)to_allocate * s->tracks; >> if (s->data_end + space > bdrv_getlength(bs->file->bs) >> >> BDRV_SECTOR_BITS) { >> int ret; >> space += s->prealloc_size; > I think the division is technically fine because to_allocate will > roughly be *pnum / s->tracks (and since *pnum is an int, the > multiplication cannot overflow). > > However, it's still good to fix this, but I would do it differently: > Make idx, to_allocate, and i all uint64_t or int64_t instead of > uint32_t. This would also prevent accidental overflow when storing the > result of the division in: > > idx = sector_num / s->tracks; > if (idx >= s->bat_size) { > [...] > > The much greater problem to me appears to be that we don't check that > idx + to_allocate <= s->bat_size. I'm not sure whether there can be a > buffer overflow in the for loop below, but I'm not sure I really want to > know either... I think the block_status() call limits *pnum so that > there will not be an overflow, but then we should at least assert this. > > Max > technically we are protected by the check in static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, int64_t align, QEMUIOVector *qiov, int flags) ... /* Forward the request to the BlockDriver, possibly fragmenting it */ total_bytes = bdrv_getlength(bs); if (total_bytes < 0) { ret = total_bytes; goto out; } max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align); if (bytes <= max_bytes && bytes <= max_transfer) { ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0); goto out; } which guarantees that the request is always inside the length of the device. Thus we should be on the safe side with the mentioned access as bat_size is calculated from the size of the entire virtual disk. Den
Re: [Qemu-block] [PATCH for-2.9] rbd: Fix regression in legacy key/values containing escaped :
On Fri, Mar 31, 2017 at 10:27:30AM -0500, Eric Blake wrote: > Commit c7cacb3 accidentally broke legacy key-value parsing through > pseudo-filename parsing of -drive file=rbd://..., for any key that > contains an escaped ':'. Such a key is surprisingly common, thanks > to mon_host specifying a 'host:port' string. The break happens > because passing things from QDict through QemuOpts back to another > QDict requires that we pack our parsed key/value pairs into a string, > and then reparse that string, but the intermediate string that we > created ("key1=value1:key2=value2") lost the \: escaping that was > present in the original, so that we could no longer see which : were > used as separators vs. those used as part of the original input. > > Fix it by collecting the key/value pairs through a QList, and > sending that list on a round trip through a JSON QString (as in > '["key1","value1","key2","value2"]') on its way through QemuOpts, > rather than hand-rolling our own string. Since the string is only > handled internally, this was faster than creating a full-blown > struct of '[{"key1":"value1"},{"key2":"value2"}]', and safer at > guaranteeing order compared to '{"key1":"value1","key2":"value2"}'. > > It would be nicer if we didn't have to round-trip through QemuOpts > in the first place, but that's a much bigger task for later. > > Reproducer: > ./x86_64-softmmu/qemu-system-x86_64 -nodefaults -nographic -qmp stdio \ > -drive 'file=rbd:volumes/volume-ea141b5c-cdb3-4765-910d-e7008b209a70'\ > ':id=compute:key=AQAVkvxXABAA9ZxWFYdRmV+DSwKr7BKKXg=='\ > ':auth_supported=cephx\;none:mon_host=192.168.1.2\:6789'\ > ',format=raw,if=none,id=drive-virtio-disk0,'\ > 'serial=ea141b5c-cdb3-4765-910d-e7008b209a70,cache=writeback' > > Even without an RBD setup, this serves a test of whether we get > the incorrect parser error of: > qemu-system-x86_64: -drive file=rbd:...cache=writeback: conf option 6789 has > no value > or the correct behavior of hanging while trying to connect to > the requested mon_host of 192.168.1.2:6789. > > Reported-by: Alexandru Avadanii> Signed-off-by: Eric Blake > --- > block/rbd.c | 83 > +++-- > 1 file changed, 42 insertions(+), 41 deletions(-) > > diff --git a/block/rbd.c b/block/rbd.c > index 498322b..fbdb131 100644 > --- a/block/rbd.c > +++ b/block/rbd.c > @@ -20,6 +20,7 @@ > #include "crypto/secret.h" > #include "qemu/cutils.h" > #include "qapi/qmp/qstring.h" > +#include "qapi/qmp/qjson.h" > > /* > * When specifying the image filename use: > @@ -135,18 +136,16 @@ static void qemu_rbd_parse_filename(const char > *filename, QDict *options, > Error **errp) > { > const char *start; > -char *p, *buf, *keypairs; > +char *p, *buf; > +QList *keypairs = NULL; > char *found_str; > -size_t max_keypair_size; > > if (!strstart(filename, "rbd:", )) { > error_setg(errp, "File name must start with 'rbd:'"); > return; > } > > -max_keypair_size = strlen(start) + 1; > buf = g_strdup(start); > -keypairs = g_malloc0(max_keypair_size); > p = buf; > > found_str = qemu_rbd_next_tok(p, '/', ); > @@ -194,33 +193,30 @@ static void qemu_rbd_parse_filename(const char > *filename, QDict *options, > } else if (!strcmp(name, "id")) { > qdict_put(options, "user" , qstring_from_str(value)); > } else { > -/* FIXME: This is pretty ugly, and not the right way to do this. > - *These should be contained in a structure, and then > - *passed explicitly as individual key/value pairs to > - *rados. Consider this legacy code that needs to be > - *updated. */ > -char *tmp = g_malloc0(max_keypair_size); > -/* only use a delimiter if it is not the first keypair found */ > -/* These are sets of unknown key/value pairs we'll pass along > - * to ceph */ > -if (keypairs[0]) { > -snprintf(tmp, max_keypair_size, ":%s=%s", name, value); > -pstrcat(keypairs, max_keypair_size, tmp); > -} else { > -snprintf(keypairs, max_keypair_size, "%s=%s", name, value); > +/* > + * We pass these internally to qemu_rbd_set_keypairs(), so > + * we can get away with the simpler list of [ "key1", > + * "value1", "key2", "value2" ] rather than a raw dict > + * { "key1": "value1", "key2": "value2" } where we can't > + * guarantee order, or even a more correct but complex > + * [ { "key1": "value1" }, { "key2": "value2" } ] > + */ > +if (!keypairs) { > +keypairs = qlist_new(); > } > -g_free(tmp); > +qlist_append(keypairs,
Re: [Qemu-block] [PATCH for-2.9] rbd: Fix regression in legacy key/values containing escaped :
On 31.03.2017 17:27, Eric Blake wrote: > Commit c7cacb3 accidentally broke legacy key-value parsing through > pseudo-filename parsing of -drive file=rbd://..., for any key that > contains an escaped ':'. Such a key is surprisingly common, thanks > to mon_host specifying a 'host:port' string. The break happens > because passing things from QDict through QemuOpts back to another > QDict requires that we pack our parsed key/value pairs into a string, > and then reparse that string, but the intermediate string that we > created ("key1=value1:key2=value2") lost the \: escaping that was > present in the original, so that we could no longer see which : were > used as separators vs. those used as part of the original input. > > Fix it by collecting the key/value pairs through a QList, and > sending that list on a round trip through a JSON QString (as in > '["key1","value1","key2","value2"]') on its way through QemuOpts, > rather than hand-rolling our own string. Since the string is only > handled internally, this was faster than creating a full-blown > struct of '[{"key1":"value1"},{"key2":"value2"}]', and safer at > guaranteeing order compared to '{"key1":"value1","key2":"value2"}'. > > It would be nicer if we didn't have to round-trip through QemuOpts > in the first place, but that's a much bigger task for later. > > Reproducer: > ./x86_64-softmmu/qemu-system-x86_64 -nodefaults -nographic -qmp stdio \ > -drive 'file=rbd:volumes/volume-ea141b5c-cdb3-4765-910d-e7008b209a70'\ > ':id=compute:key=AQAVkvxXABAA9ZxWFYdRmV+DSwKr7BKKXg=='\ > ':auth_supported=cephx\;none:mon_host=192.168.1.2\:6789'\ > ',format=raw,if=none,id=drive-virtio-disk0,'\ > 'serial=ea141b5c-cdb3-4765-910d-e7008b209a70,cache=writeback' > > Even without an RBD setup, this serves a test of whether we get > the incorrect parser error of: > qemu-system-x86_64: -drive file=rbd:...cache=writeback: conf option 6789 has > no value > or the correct behavior of hanging while trying to connect to > the requested mon_host of 192.168.1.2:6789. > > Reported-by: Alexandru Avadanii> Signed-off-by: Eric Blake > --- > block/rbd.c | 83 > +++-- > 1 file changed, 42 insertions(+), 41 deletions(-) Reviewed-by: Max Reitz signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [Qemu-devel] [PATCH] block/parallels.c: avoid integer overflow in allocate_clusters()
On Fri, Mar 31, 2017 at 05:18:39PM +0100, Stefan Hajnoczi wrote: > On Fri, Mar 31, 2017 at 10:40:33AM -0300, Eduardo Habkost wrote: > > On Fri, Mar 31, 2017 at 10:27:44AM -0300, Philippe Mathieu-Daudé wrote: > > > Hi, > > > > > > Eduardo you seem skilled regarding Coccinelle scripts, is it possible to > > > write one to find those overflows? > > > > Probably not. AFAIK, Coccinelle rules are based on local code > > syntax only. This means it doesn't know the data type of > > expressions like (s->tracks). > > I'm surprised by that statement. Coccinelle isn't a text matcher, it's > a proper C compiler frontend that parses the all code in the compilation > unit. Therefore it must have the type information even for s->tracks. You are probably not wrong about it not being just a text matcher. But I'm not sure about it being able to have type information for s->tracks. The documentation isn't clear about that. The 'idexpression' declarations seems to accept some kind of C type annotations (I didn't know that!), but the documentation also says: "A more complex description of a location, such as a->b is considered to be an expression, not an idexpression". And 'expression' metavariables don't seem to support type annotations. My impression is that Coccinelle has limited support to understand simple variable declarations, but not the full set of C type declarations and type system rules that would allow it to figure out the type of an expression like s->tracks. But I really hope to be wrong, because that would be very useful. :) > Disclaimer: This should in no way be considered a volunteer offer to > write cocci scripts now or at any time in the future :). I'm not fluent > in the semantic patch syntax. I don't believe there's anybody in the world fluent in the SmPL syntax. Maybe its authors are, but I wouldn't be so sure. :) -- Eduardo
[Qemu-block] [PATCH for-2.9] block/parallels: Avoid overflows
Change the types of variables in allocate_clusters() to int64_t so we do not have to worry about potential overflows. Add an assertion that our accesses to s->bat[] do not result in a buffer overflow and that the implicit conversion performed when invoking bat_entry_off() does not result in an integer overflow. Coverity-id: 1307776 Signed-off-by: Max Reitz--- This supercedes Peter's patch "block/parallels.c: avoid integer overflow in allocate_clusters()". --- block/parallels.c | 19 +-- 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 4173b3fb9d..90acf79687 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -192,8 +192,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { BDRVParallelsState *s = bs->opaque; -uint32_t idx, to_allocate, i; -int64_t pos, space; +int64_t pos, space, idx, to_allocate, i; pos = block_status(s, sector_num, nb_sectors, pnum); if (pos > 0) { @@ -201,11 +200,19 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, } idx = sector_num / s->tracks; -if (idx >= s->bat_size) { -return -EINVAL; -} - to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx; + +/* This function is called only by parallels_co_writev(), which will never + * pass a sector_num at or beyond the end of the image (because the block + * layer never passes such a sector_num to that function). Therefore, idx + * is always below s->bat_size. + * block_status() will limit *pnum so that sector_num + *pnum will not + * exceed the image end. Therefore, idx + to_allocate cannot exceed + * s->bat_size. + * Note that s->bat_size is an unsigned int, therefore idx + to_allocate + * will always fit into a uint32_t. */ +assert(idx < s->bat_size && idx + to_allocate <= s->bat_size); + space = to_allocate * s->tracks; if (s->data_end + space > bdrv_getlength(bs->file->bs) >> BDRV_SECTOR_BITS) { int ret; -- 2.12.1
Re: [Qemu-block] [PATCH] block/parallels.c: avoid integer overflow in allocate_clusters()
On 31.03.2017 18:20, Stefan Hajnoczi wrote: > On Fri, Mar 31, 2017 at 03:47:39PM +0200, Max Reitz wrote: >> On 31.03.2017 15:13, Peter Maydell wrote: >>> Coverity (CID 1307776) points out that in the multiply: >>> space = to_allocate * s->tracks; >>> we are trying to calculate a 64 bit result but the types >>> of to_allocate and s->tracks mean that we actually calculate >>> a 32 bit result. Add an explicit cast to force a 64 bit >>> multiply. >>> >>> Signed-off-by: Peter Maydell>>> --- >>> NB: compile-and-make-check tested only... >>> --- >>> block/parallels.c | 2 +- >>> 1 file changed, 1 insertion(+), 1 deletion(-) >>> >>> diff --git a/block/parallels.c b/block/parallels.c >>> index 4173b3f..3886c30 100644 >>> --- a/block/parallels.c >>> +++ b/block/parallels.c >>> @@ -206,7 +206,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, >>> int64_t sector_num, >>> } >>> >>> to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx; >>> -space = to_allocate * s->tracks; >>> +space = (int64_t)to_allocate * s->tracks; >>> if (s->data_end + space > bdrv_getlength(bs->file->bs) >> >>> BDRV_SECTOR_BITS) { >>> int ret; >>> space += s->prealloc_size; >> >> I think the division is technically fine because to_allocate will >> roughly be *pnum / s->tracks (and since *pnum is an int, the >> multiplication cannot overflow). >> >> However, it's still good to fix this, but I would do it differently: >> Make idx, to_allocate, and i all uint64_t or int64_t instead of >> uint32_t. This would also prevent accidental overflow when storing the >> result of the division in: >> >> idx = sector_num / s->tracks; >> if (idx >= s->bat_size) { >> [...] >> >> The much greater problem to me appears to be that we don't check that >> idx + to_allocate <= s->bat_size. I'm not sure whether there can be a >> buffer overflow in the for loop below, but I'm not sure I really want to >> know either... I think the block_status() call limits *pnum so that >> there will not be an overflow, but then we should at least assert this. > > Will you send a new patch that supercedes this one? Well, since you're asking so nicely... Max signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [PATCH for-2.9] rbd: Fix regression in legacy key/values containing escaped :
On Fri, Mar 31, 2017 at 10:27:30AM -0500, Eric Blake wrote: > Commit c7cacb3 accidentally broke legacy key-value parsing through > pseudo-filename parsing of -drive file=rbd://..., for any key that > contains an escaped ':'. Such a key is surprisingly common, thanks > to mon_host specifying a 'host:port' string. The break happens > because passing things from QDict through QemuOpts back to another > QDict requires that we pack our parsed key/value pairs into a string, > and then reparse that string, but the intermediate string that we > created ("key1=value1:key2=value2") lost the \: escaping that was > present in the original, so that we could no longer see which : were > used as separators vs. those used as part of the original input. > > Fix it by collecting the key/value pairs through a QList, and > sending that list on a round trip through a JSON QString (as in > '["key1","value1","key2","value2"]') on its way through QemuOpts, > rather than hand-rolling our own string. Since the string is only > handled internally, this was faster than creating a full-blown > struct of '[{"key1":"value1"},{"key2":"value2"}]', and safer at > guaranteeing order compared to '{"key1":"value1","key2":"value2"}'. > > It would be nicer if we didn't have to round-trip through QemuOpts > in the first place, but that's a much bigger task for later. > > Reproducer: > ./x86_64-softmmu/qemu-system-x86_64 -nodefaults -nographic -qmp stdio \ > -drive 'file=rbd:volumes/volume-ea141b5c-cdb3-4765-910d-e7008b209a70'\ > ':id=compute:key=AQAVkvxXABAA9ZxWFYdRmV+DSwKr7BKKXg=='\ > ':auth_supported=cephx\;none:mon_host=192.168.1.2\:6789'\ > ',format=raw,if=none,id=drive-virtio-disk0,'\ > 'serial=ea141b5c-cdb3-4765-910d-e7008b209a70,cache=writeback' > > Even without an RBD setup, this serves a test of whether we get > the incorrect parser error of: > qemu-system-x86_64: -drive file=rbd:...cache=writeback: conf option 6789 has > no value > or the correct behavior of hanging while trying to connect to > the requested mon_host of 192.168.1.2:6789. > > Reported-by: Alexandru Avadanii> Signed-off-by: Eric Blake > --- > block/rbd.c | 83 > +++-- > 1 file changed, 42 insertions(+), 41 deletions(-) > > diff --git a/block/rbd.c b/block/rbd.c > index 498322b..fbdb131 100644 > --- a/block/rbd.c > +++ b/block/rbd.c > @@ -20,6 +20,7 @@ > #include "crypto/secret.h" > #include "qemu/cutils.h" > #include "qapi/qmp/qstring.h" > +#include "qapi/qmp/qjson.h" > > /* > * When specifying the image filename use: > @@ -135,18 +136,16 @@ static void qemu_rbd_parse_filename(const char > *filename, QDict *options, > Error **errp) > { > const char *start; > -char *p, *buf, *keypairs; > +char *p, *buf; > +QList *keypairs = NULL; > char *found_str; > -size_t max_keypair_size; > > if (!strstart(filename, "rbd:", )) { > error_setg(errp, "File name must start with 'rbd:'"); > return; > } > > -max_keypair_size = strlen(start) + 1; > buf = g_strdup(start); > -keypairs = g_malloc0(max_keypair_size); > p = buf; > > found_str = qemu_rbd_next_tok(p, '/', ); > @@ -194,33 +193,30 @@ static void qemu_rbd_parse_filename(const char > *filename, QDict *options, > } else if (!strcmp(name, "id")) { > qdict_put(options, "user" , qstring_from_str(value)); > } else { > -/* FIXME: This is pretty ugly, and not the right way to do this. > - *These should be contained in a structure, and then > - *passed explicitly as individual key/value pairs to > - *rados. Consider this legacy code that needs to be > - *updated. */ > -char *tmp = g_malloc0(max_keypair_size); > -/* only use a delimiter if it is not the first keypair found */ > -/* These are sets of unknown key/value pairs we'll pass along > - * to ceph */ > -if (keypairs[0]) { > -snprintf(tmp, max_keypair_size, ":%s=%s", name, value); > -pstrcat(keypairs, max_keypair_size, tmp); > -} else { > -snprintf(keypairs, max_keypair_size, "%s=%s", name, value); > +/* > + * We pass these internally to qemu_rbd_set_keypairs(), so > + * we can get away with the simpler list of [ "key1", > + * "value1", "key2", "value2" ] rather than a raw dict > + * { "key1": "value1", "key2": "value2" } where we can't > + * guarantee order, or even a more correct but complex > + * [ { "key1": "value1" }, { "key2": "value2" } ] > + */ > +if (!keypairs) { > +keypairs = qlist_new(); > } > -g_free(tmp); > +qlist_append(keypairs,
Re: [Qemu-block] [PATCH] block/parallels.c: avoid integer overflow in allocate_clusters()
On Fri, Mar 31, 2017 at 03:47:39PM +0200, Max Reitz wrote: > On 31.03.2017 15:13, Peter Maydell wrote: > > Coverity (CID 1307776) points out that in the multiply: > > space = to_allocate * s->tracks; > > we are trying to calculate a 64 bit result but the types > > of to_allocate and s->tracks mean that we actually calculate > > a 32 bit result. Add an explicit cast to force a 64 bit > > multiply. > > > > Signed-off-by: Peter Maydell> > --- > > NB: compile-and-make-check tested only... > > --- > > block/parallels.c | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/block/parallels.c b/block/parallels.c > > index 4173b3f..3886c30 100644 > > --- a/block/parallels.c > > +++ b/block/parallels.c > > @@ -206,7 +206,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, > > int64_t sector_num, > > } > > > > to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx; > > -space = to_allocate * s->tracks; > > +space = (int64_t)to_allocate * s->tracks; > > if (s->data_end + space > bdrv_getlength(bs->file->bs) >> > > BDRV_SECTOR_BITS) { > > int ret; > > space += s->prealloc_size; > > I think the division is technically fine because to_allocate will > roughly be *pnum / s->tracks (and since *pnum is an int, the > multiplication cannot overflow). > > However, it's still good to fix this, but I would do it differently: > Make idx, to_allocate, and i all uint64_t or int64_t instead of > uint32_t. This would also prevent accidental overflow when storing the > result of the division in: > > idx = sector_num / s->tracks; > if (idx >= s->bat_size) { > [...] > > The much greater problem to me appears to be that we don't check that > idx + to_allocate <= s->bat_size. I'm not sure whether there can be a > buffer overflow in the for loop below, but I'm not sure I really want to > know either... I think the block_status() call limits *pnum so that > there will not be an overflow, but then we should at least assert this. Will you send a new patch that supercedes this one? Stefan signature.asc Description: PGP signature
Re: [Qemu-block] [Qemu-devel] [PATCH] block/parallels.c: avoid integer overflow in allocate_clusters()
On Fri, Mar 31, 2017 at 10:40:33AM -0300, Eduardo Habkost wrote: > On Fri, Mar 31, 2017 at 10:27:44AM -0300, Philippe Mathieu-Daudé wrote: > > Hi, > > > > Eduardo you seem skilled regarding Coccinelle scripts, is it possible to > > write one to find those overflows? > > Probably not. AFAIK, Coccinelle rules are based on local code > syntax only. This means it doesn't know the data type of > expressions like (s->tracks). I'm surprised by that statement. Coccinelle isn't a text matcher, it's a proper C compiler frontend that parses the all code in the compilation unit. Therefore it must have the type information even for s->tracks. Disclaimer: This should in no way be considered a volunteer offer to write cocci scripts now or at any time in the future :). I'm not fluent in the semantic patch syntax. Stefan signature.asc Description: PGP signature
[Qemu-block] [PATCH for-2.9] rbd: Fix regression in legacy key/values containing escaped :
Commit c7cacb3 accidentally broke legacy key-value parsing through pseudo-filename parsing of -drive file=rbd://..., for any key that contains an escaped ':'. Such a key is surprisingly common, thanks to mon_host specifying a 'host:port' string. The break happens because passing things from QDict through QemuOpts back to another QDict requires that we pack our parsed key/value pairs into a string, and then reparse that string, but the intermediate string that we created ("key1=value1:key2=value2") lost the \: escaping that was present in the original, so that we could no longer see which : were used as separators vs. those used as part of the original input. Fix it by collecting the key/value pairs through a QList, and sending that list on a round trip through a JSON QString (as in '["key1","value1","key2","value2"]') on its way through QemuOpts, rather than hand-rolling our own string. Since the string is only handled internally, this was faster than creating a full-blown struct of '[{"key1":"value1"},{"key2":"value2"}]', and safer at guaranteeing order compared to '{"key1":"value1","key2":"value2"}'. It would be nicer if we didn't have to round-trip through QemuOpts in the first place, but that's a much bigger task for later. Reproducer: ./x86_64-softmmu/qemu-system-x86_64 -nodefaults -nographic -qmp stdio \ -drive 'file=rbd:volumes/volume-ea141b5c-cdb3-4765-910d-e7008b209a70'\ ':id=compute:key=AQAVkvxXABAA9ZxWFYdRmV+DSwKr7BKKXg=='\ ':auth_supported=cephx\;none:mon_host=192.168.1.2\:6789'\ ',format=raw,if=none,id=drive-virtio-disk0,'\ 'serial=ea141b5c-cdb3-4765-910d-e7008b209a70,cache=writeback' Even without an RBD setup, this serves a test of whether we get the incorrect parser error of: qemu-system-x86_64: -drive file=rbd:...cache=writeback: conf option 6789 has no value or the correct behavior of hanging while trying to connect to the requested mon_host of 192.168.1.2:6789. Reported-by: Alexandru AvadaniiSigned-off-by: Eric Blake --- block/rbd.c | 83 +++-- 1 file changed, 42 insertions(+), 41 deletions(-) diff --git a/block/rbd.c b/block/rbd.c index 498322b..fbdb131 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -20,6 +20,7 @@ #include "crypto/secret.h" #include "qemu/cutils.h" #include "qapi/qmp/qstring.h" +#include "qapi/qmp/qjson.h" /* * When specifying the image filename use: @@ -135,18 +136,16 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options, Error **errp) { const char *start; -char *p, *buf, *keypairs; +char *p, *buf; +QList *keypairs = NULL; char *found_str; -size_t max_keypair_size; if (!strstart(filename, "rbd:", )) { error_setg(errp, "File name must start with 'rbd:'"); return; } -max_keypair_size = strlen(start) + 1; buf = g_strdup(start); -keypairs = g_malloc0(max_keypair_size); p = buf; found_str = qemu_rbd_next_tok(p, '/', ); @@ -194,33 +193,30 @@ static void qemu_rbd_parse_filename(const char *filename, QDict *options, } else if (!strcmp(name, "id")) { qdict_put(options, "user" , qstring_from_str(value)); } else { -/* FIXME: This is pretty ugly, and not the right way to do this. - *These should be contained in a structure, and then - *passed explicitly as individual key/value pairs to - *rados. Consider this legacy code that needs to be - *updated. */ -char *tmp = g_malloc0(max_keypair_size); -/* only use a delimiter if it is not the first keypair found */ -/* These are sets of unknown key/value pairs we'll pass along - * to ceph */ -if (keypairs[0]) { -snprintf(tmp, max_keypair_size, ":%s=%s", name, value); -pstrcat(keypairs, max_keypair_size, tmp); -} else { -snprintf(keypairs, max_keypair_size, "%s=%s", name, value); +/* + * We pass these internally to qemu_rbd_set_keypairs(), so + * we can get away with the simpler list of [ "key1", + * "value1", "key2", "value2" ] rather than a raw dict + * { "key1": "value1", "key2": "value2" } where we can't + * guarantee order, or even a more correct but complex + * [ { "key1": "value1" }, { "key2": "value2" } ] + */ +if (!keypairs) { +keypairs = qlist_new(); } -g_free(tmp); +qlist_append(keypairs, qstring_from_str(name)); +qlist_append(keypairs, qstring_from_str(value)); } } -if (keypairs[0]) { -qdict_put(options, "=keyvalue-pairs", qstring_from_str(keypairs)); +if (keypairs) { +qdict_put(options, "=keyvalue-pairs", +
Re: [Qemu-block] [PATCH] block/parallels.c: avoid integer overflow in allocate_clusters()
On 31.03.2017 16:54, Denis V. Lunev wrote: > On 03/31/2017 04:47 PM, Max Reitz wrote: >> On 31.03.2017 15:13, Peter Maydell wrote: >>> Coverity (CID 1307776) points out that in the multiply: >>> space = to_allocate * s->tracks; >>> we are trying to calculate a 64 bit result but the types >>> of to_allocate and s->tracks mean that we actually calculate >>> a 32 bit result. Add an explicit cast to force a 64 bit >>> multiply. >>> >>> Signed-off-by: Peter Maydell>>> --- >>> NB: compile-and-make-check tested only... >>> --- >>> block/parallels.c | 2 +- >>> 1 file changed, 1 insertion(+), 1 deletion(-) >>> >>> diff --git a/block/parallels.c b/block/parallels.c >>> index 4173b3f..3886c30 100644 >>> --- a/block/parallels.c >>> +++ b/block/parallels.c >>> @@ -206,7 +206,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, >>> int64_t sector_num, >>> } >>> >>> to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx; >>> -space = to_allocate * s->tracks; >>> +space = (int64_t)to_allocate * s->tracks; >>> if (s->data_end + space > bdrv_getlength(bs->file->bs) >> >>> BDRV_SECTOR_BITS) { >>> int ret; >>> space += s->prealloc_size; >> I think the division is technically fine because to_allocate will >> roughly be *pnum / s->tracks (and since *pnum is an int, the >> multiplication cannot overflow). >> >> However, it's still good to fix this, but I would do it differently: >> Make idx, to_allocate, and i all uint64_t or int64_t instead of >> uint32_t. This would also prevent accidental overflow when storing the >> result of the division in: >> >> idx = sector_num / s->tracks; >> if (idx >= s->bat_size) { >> [...] >> >> The much greater problem to me appears to be that we don't check that >> idx + to_allocate <= s->bat_size. I'm not sure whether there can be a >> buffer overflow in the for loop below, but I'm not sure I really want to >> know either... I think the block_status() call limits *pnum so that >> there will not be an overflow, but then we should at least assert this. >> >> Max >> > technically we are protected by the check in > > static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, > BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, > int64_t align, QEMUIOVector *qiov, int flags) > ... > /* Forward the request to the BlockDriver, possibly fragmenting it */ > total_bytes = bdrv_getlength(bs); > if (total_bytes < 0) { > ret = total_bytes; > goto out; > } > > max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align); > if (bytes <= max_bytes && bytes <= max_transfer) { > ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0); > goto out; > } > > which guarantees that the request is always inside the length of the > device. Thus we should be on the safe side with the mentioned > access as bat_size is calculated from the size of the entire virtual > disk. Right, but then we wouldn't need the check on idx. With the way things are, it looks a bit confusing. Maybe we should just make it an assertion? assert(idx < s->bat_size && idx + to_allocate <= s->bat_size); Max signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [PATCH v7 3/3] qcow2: Discard unaligned tail when wiping image
On 03/31/2017 09:01 AM, Max Reitz wrote: >>Do I need to >> respin for that, or is it something a maintainer could tweak, or is the >> series fine as-is? > > Of course I can fix the code, but changing the commit messages is a bit > more involved... > >> For what it's worth, the policy of single test after >> the patch is somewhat opposite of Markus' approach of test first showing >> the buggy behavior, then patch that includes the testsuite fix to match >> the patch. But I can live with either order, so I won't respin without >> an explicit request to do so. > > Well, then consider this an explicit request. :-) Yes sir! v8 coming right up. -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [PATCH v7 3/3] qcow2: Discard unaligned tail when wiping image
On 31.03.2017 15:56, Eric Blake wrote: > On 03/31/2017 07:51 AM, Max Reitz wrote: >> On 31.03.2017 00:36, Eric Blake wrote: >>> The previous commit pointed out a subtle difference between the >>> fast and slow path of qcow2_make_empty(), where we failed to discard >>> the final (partial) cluster of an unaligned image. >>> > >>> +/* The caller must cluster-align start; round end down except at EOF */ >>> +assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); >>> +if (end_offset != bs->total_sectors * BDRV_SECTOR_SIZE) { >>> +end_offset = start_of_cluster(s, end_offset); >>> } >> >> This change looks good and works for qcow2_make_empty(), but >> qcow2_co_pdiscard() will still drop these requests: > > Yes, but qcow2_co_pdiscard() is advisory, and leaving the last partial > cluster undiscarded (consistent for what we do for all other partial > cluster requests) is different than our documented notion that > qcow2_make_empty() gets rid of all clusters no matter what. > >> We don't necessarily have to fix it for 2.9, so: > > Agreed that enhancing pdiscard to special-case a partial cluster at EOF > is not a bug fix, and therefore 2.10 material if we even want it. Why would we not want it? :-) > >> >> Reviewed-by: Max Reitz>> >>> >>> nb_clusters = size_to_clusters(s, end_offset - offset); >>> diff --git a/tests/qemu-iotests/176.out b/tests/qemu-iotests/176.out >>> index 990a41c..6271fa7 100644 >>> --- a/tests/qemu-iotests/176.out >>> +++ b/tests/qemu-iotests/176.out >>> @@ -35,7 +35,7 @@ Offset Length File >>> Offset Length File >>> 0x7ffd 0x1 TEST_DIR/t.IMGFMT.base >>> 0x7ffe 0x2 TEST_DIR/t.IMGFMT.itmd >>> -0x8340 0x200 TEST_DIR/t.IMGFMT >>> +0x8340 0x200 TEST_DIR/t.IMGFMT.itmd > > As to your comment about swapping patches 2 and 3, if I did that, then I > would not have this change to 176.out during the bug fix, and would > instead squash this line into the single patch touching the testsuite to > add the enhancement. Right. > How important is the swapped order? As you can probably guess, technically not important. But I having reference outputs that are not actually a reference kind of defeats the purpose in my opinion. >Do I need to > respin for that, or is it something a maintainer could tweak, or is the > series fine as-is? Of course I can fix the code, but changing the commit messages is a bit more involved... > For what it's worth, the policy of single test after > the patch is somewhat opposite of Markus' approach of test first showing > the buggy behavior, then patch that includes the testsuite fix to match > the patch. But I can live with either order, so I won't respin without > an explicit request to do so. Well, then consider this an explicit request. :-) Max signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [PATCH] qemu-io-cmds: Assert that global and nofile commands don't use ct->perms
On 31.03.2017 15:38, Peter Maydell wrote: > It would be a bug for a command with the CMD_NOFILE_OK or > CMD_FLAG_GLOBAL flags set to also set the ct->perms field, > because the former says "OK for a file not to be open" > but the latter is a check on a file. > > Add an assertion in qemuio_add_command() so we can catch that > sort of buggy command definition immediately rather than it > being a bug that only manifests when a particular set of > command line options is used. > > (Coverity gets confused about this (CID 1371723) and reports > that we might dereference a NULL blk pointer in this case, > because it can't tell that that code path never happens with > the cmdinfo_t that we have. This commit won't help unconfuse > it, but it does fix the underlying issue.) > > Signed-off-by: Peter Maydell> --- > We could also try to add some kind of assert in command() to > persuade coverity that it can't get there with a NULL block > and ct->perms non-zero, but I think I'd rather just mark the > issue as a false-positive and move on. I think having the assertion in qemuio_add_command() is nicer because if we break it we will definitely notice just by launching qemu-io. > qemu-io-cmds.c | 7 +++ > 1 file changed, 7 insertions(+) Thanks, applied to my block branch: https://github.com/XanClic/qemu/commits/block Max signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [PATCH v7 3/3] qcow2: Discard unaligned tail when wiping image
On 03/31/2017 07:51 AM, Max Reitz wrote: > On 31.03.2017 00:36, Eric Blake wrote: >> The previous commit pointed out a subtle difference between the >> fast and slow path of qcow2_make_empty(), where we failed to discard >> the final (partial) cluster of an unaligned image. >> >> +/* The caller must cluster-align start; round end down except at EOF */ >> +assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); >> +if (end_offset != bs->total_sectors * BDRV_SECTOR_SIZE) { >> +end_offset = start_of_cluster(s, end_offset); >> } > > This change looks good and works for qcow2_make_empty(), but > qcow2_co_pdiscard() will still drop these requests: Yes, but qcow2_co_pdiscard() is advisory, and leaving the last partial cluster undiscarded (consistent for what we do for all other partial cluster requests) is different than our documented notion that qcow2_make_empty() gets rid of all clusters no matter what. > We don't necessarily have to fix it for 2.9, so: Agreed that enhancing pdiscard to special-case a partial cluster at EOF is not a bug fix, and therefore 2.10 material if we even want it. > > Reviewed-by: Max Reitz> >> >> nb_clusters = size_to_clusters(s, end_offset - offset); >> diff --git a/tests/qemu-iotests/176.out b/tests/qemu-iotests/176.out >> index 990a41c..6271fa7 100644 >> --- a/tests/qemu-iotests/176.out >> +++ b/tests/qemu-iotests/176.out >> @@ -35,7 +35,7 @@ Offset Length File >> Offset Length File >> 0x7ffd 0x1 TEST_DIR/t.IMGFMT.base >> 0x7ffe 0x2 TEST_DIR/t.IMGFMT.itmd >> -0x8340 0x200 TEST_DIR/t.IMGFMT >> +0x8340 0x200 TEST_DIR/t.IMGFMT.itmd As to your comment about swapping patches 2 and 3, if I did that, then I would not have this change to 176.out during the bug fix, and would instead squash this line into the single patch touching the testsuite to add the enhancement. How important is the swapped order? Do I need to respin for that, or is it something a maintainer could tweak, or is the series fine as-is? For what it's worth, the policy of single test after the patch is somewhat opposite of Markus' approach of test first showing the buggy behavior, then patch that includes the testsuite fix to match the patch. But I can live with either order, so I won't respin without an explicit request to do so. -- Eric Blake eblake redhat com+1-919-301-3266 Libvirt virtualization library http://libvirt.org signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [Qemu-devel] [PATCH for-2.9 2/2] block/curl: Check protocol prefix
On 03/31/2017 09:04 AM, Max Reitz wrote: If the user has explicitly specified a block driver and thus a protocol, we have to make sure the URL's protocol prefix matches. Otherwise the latter will silently override the former which might catch some users by surprise. Signed-off-by: Max ReitzReviewed-by: Philippe Mathieu-Daudé --- block/curl.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/block/curl.c b/block/curl.c index 34dbd335f4..2708d57c2f 100644 --- a/block/curl.c +++ b/block/curl.c @@ -659,6 +659,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, const char *cookie; double d; const char *secretid; +const char *protocol_delimiter; static int inited = 0; @@ -700,6 +701,15 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, goto out_noclean; } +if (!strstart(file, bs->drv->protocol_name, _delimiter) || +!strstart(protocol_delimiter, "://", NULL)) +{ +error_setg(errp, "%s curl driver cannot handle the URL '%s' (does not " + "start with '%s://')", bs->drv->protocol_name, file, + bs->drv->protocol_name); +goto out_noclean; +} + s->username = g_strdup(qemu_opt_get(opts, CURL_BLOCK_OPT_USERNAME)); secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PASSWORD_SECRET);
Re: [Qemu-block] [PATCH] block/parallels.c: avoid integer overflow in allocate_clusters()
On 31.03.2017 15:13, Peter Maydell wrote: > Coverity (CID 1307776) points out that in the multiply: > space = to_allocate * s->tracks; > we are trying to calculate a 64 bit result but the types > of to_allocate and s->tracks mean that we actually calculate > a 32 bit result. Add an explicit cast to force a 64 bit > multiply. > > Signed-off-by: Peter Maydell> --- > NB: compile-and-make-check tested only... > --- > block/parallels.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/block/parallels.c b/block/parallels.c > index 4173b3f..3886c30 100644 > --- a/block/parallels.c > +++ b/block/parallels.c > @@ -206,7 +206,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, > int64_t sector_num, > } > > to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx; > -space = to_allocate * s->tracks; > +space = (int64_t)to_allocate * s->tracks; > if (s->data_end + space > bdrv_getlength(bs->file->bs) >> > BDRV_SECTOR_BITS) { > int ret; > space += s->prealloc_size; I think the division is technically fine because to_allocate will roughly be *pnum / s->tracks (and since *pnum is an int, the multiplication cannot overflow). However, it's still good to fix this, but I would do it differently: Make idx, to_allocate, and i all uint64_t or int64_t instead of uint32_t. This would also prevent accidental overflow when storing the result of the division in: idx = sector_num / s->tracks; if (idx >= s->bat_size) { [...] The much greater problem to me appears to be that we don't check that idx + to_allocate <= s->bat_size. I'm not sure whether there can be a buffer overflow in the for loop below, but I'm not sure I really want to know either... I think the block_status() call limits *pnum so that there will not be an overflow, but then we should at least assert this. Max signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [Qemu-devel] [PATCH] block/parallels.c: avoid integer overflow in allocate_clusters()
On Fri, Mar 31, 2017 at 10:27:44AM -0300, Philippe Mathieu-Daudé wrote: > Hi, > > Eduardo you seem skilled regarding Coccinelle scripts, is it possible to > write one to find those overflows? Probably not. AFAIK, Coccinelle rules are based on local code syntax only. This means it doesn't know the data type of expressions like (s->tracks). > > Peter having one more macro might help or confuses more? > > #define MULTIPLY64(a32, b32) ((int64_t)a32 * b32) > > On 03/31/2017 10:13 AM, Peter Maydell wrote: > > Coverity (CID 1307776) points out that in the multiply: > > space = to_allocate * s->tracks; > > we are trying to calculate a 64 bit result but the types > > of to_allocate and s->tracks mean that we actually calculate > > a 32 bit result. Add an explicit cast to force a 64 bit > > multiply. > > > > Signed-off-by: Peter Maydell> > Reviewed-by: Philippe Mathieu-Daudé > > > --- > > NB: compile-and-make-check tested only... > > --- > > block/parallels.c | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/block/parallels.c b/block/parallels.c > > index 4173b3f..3886c30 100644 > > --- a/block/parallels.c > > +++ b/block/parallels.c > > @@ -206,7 +206,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, > > int64_t sector_num, > > } > > > > to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx; > > -space = to_allocate * s->tracks; > > +space = (int64_t)to_allocate * s->tracks; > > if (s->data_end + space > bdrv_getlength(bs->file->bs) >> > > BDRV_SECTOR_BITS) { > > int ret; > > space += s->prealloc_size; > > -- Eduardo
Re: [Qemu-block] [PATCH v3 for-2.10 0/4] block: Add errp to b{lk, drv}_truncate()
On 28.03.2017 22:51, Max Reitz wrote: > Having an Error parameter for these functions makes sense because we > sometimes want a bit more information than just "Something failed". Some > drivers already use error_report() and the like to emit this additional > information, so it's rather obvious that we do want a real error object > here. > > > v3: > - Patch 2: Keep "Could not resize image" message in qcow2_create2() by >using error_prepend() [Kevin] > - Patch 3: Dropped archipelago > - Patch 4: > - Keep errno information where available [Kevin] > - Make all drivers generate error messages [Stefan/Eric] > - Drop generic error message from bdrv_truncate() [Stefan/Eric] > > > Max Reitz (4): > block/vhdx: Make vhdx_create() always set errp > block: Add errp to b{lk,drv}_truncate() > block: Add errp to BD.bdrv_truncate() > block: Add .bdrv_truncate() error messages > > include/block/block.h | 2 +- > include/block/block_int.h | 2 +- > include/sysemu/block-backend.h | 2 +- > block.c| 16 +++- > block/blkdebug.c | 4 ++-- > block/block-backend.c | 5 +++-- > block/commit.c | 5 +++-- > block/crypto.c | 5 +++-- > block/file-posix.c | 19 +-- > block/file-win32.c | 6 +++--- > block/gluster.c| 7 +-- > block/iscsi.c | 6 -- > block/mirror.c | 2 +- > block/nfs.c| 12 ++-- > block/parallels.c | 13 - > block/qcow.c | 6 +++--- > block/qcow2-refcount.c | 5 - > block/qcow2.c | 24 +++- > block/qed.c| 8 +--- > block/raw-format.c | 6 -- > block/rbd.c| 3 ++- > block/sheepdog.c | 14 ++ > block/vdi.c| 4 ++-- > block/vhdx-log.c | 2 +- > block/vhdx.c | 25 ++--- > block/vmdk.c | 13 +++-- > block/vpc.c| 13 +++-- > blockdev.c | 21 + > qemu-img.c | 17 - > qemu-io-cmds.c | 5 +++-- > 30 files changed, 147 insertions(+), 125 deletions(-) Applied to my block-next branch: https://github.com/XanClic/qemu/commits/block-next Max signature.asc Description: OpenPGP digital signature
[Qemu-block] [PATCH] qemu-io-cmds: Assert that global and nofile commands don't use ct->perms
It would be a bug for a command with the CMD_NOFILE_OK or CMD_FLAG_GLOBAL flags set to also set the ct->perms field, because the former says "OK for a file not to be open" but the latter is a check on a file. Add an assertion in qemuio_add_command() so we can catch that sort of buggy command definition immediately rather than it being a bug that only manifests when a particular set of command line options is used. (Coverity gets confused about this (CID 1371723) and reports that we might dereference a NULL blk pointer in this case, because it can't tell that that code path never happens with the cmdinfo_t that we have. This commit won't help unconfuse it, but it does fix the underlying issue.) Signed-off-by: Peter Maydell--- We could also try to add some kind of assert in command() to persuade coverity that it can't get there with a NULL block and ct->perms non-zero, but I think I'd rather just mark the issue as a false-positive and move on. qemu-io-cmds.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 2c48f9c..883f53b 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -35,6 +35,13 @@ static int compare_cmdname(const void *a, const void *b) void qemuio_add_command(const cmdinfo_t *ci) { +/* ci->perm assumes a file is open, but the GLOBAL and NOFILE_OK + * flags allow it not to be, so that combination is invalid. + * Catch it now rather than letting it manifest as a crash if a + * particular set of command line options are used. + */ +assert(ci->perm == 0 || + (ci->flags & (CMD_FLAG_GLOBAL | CMD_NOFILE_OK)) == 0); cmdtab = g_renew(cmdinfo_t, cmdtab, ++ncmds); cmdtab[ncmds - 1] = *ci; qsort(cmdtab, ncmds, sizeof(*cmdtab), compare_cmdname); -- 2.7.4
Re: [Qemu-block] [Qemu-devel] [PATCH] block/parallels.c: avoid integer overflow in allocate_clusters()
Hi, Eduardo you seem skilled regarding Coccinelle scripts, is it possible to write one to find those overflows? Peter having one more macro might help or confuses more? #define MULTIPLY64(a32, b32) ((int64_t)a32 * b32) On 03/31/2017 10:13 AM, Peter Maydell wrote: Coverity (CID 1307776) points out that in the multiply: space = to_allocate * s->tracks; we are trying to calculate a 64 bit result but the types of to_allocate and s->tracks mean that we actually calculate a 32 bit result. Add an explicit cast to force a 64 bit multiply. Signed-off-by: Peter MaydellReviewed-by: Philippe Mathieu-Daudé --- NB: compile-and-make-check tested only... --- block/parallels.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/parallels.c b/block/parallels.c index 4173b3f..3886c30 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -206,7 +206,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, } to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx; -space = to_allocate * s->tracks; +space = (int64_t)to_allocate * s->tracks; if (s->data_end + space > bdrv_getlength(bs->file->bs) >> BDRV_SECTOR_BITS) { int ret; space += s->prealloc_size;
[Qemu-block] [PATCH] block/parallels.c: avoid integer overflow in allocate_clusters()
Coverity (CID 1307776) points out that in the multiply: space = to_allocate * s->tracks; we are trying to calculate a 64 bit result but the types of to_allocate and s->tracks mean that we actually calculate a 32 bit result. Add an explicit cast to force a 64 bit multiply. Signed-off-by: Peter Maydell--- NB: compile-and-make-check tested only... --- block/parallels.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/parallels.c b/block/parallels.c index 4173b3f..3886c30 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -206,7 +206,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, } to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx; -space = to_allocate * s->tracks; +space = (int64_t)to_allocate * s->tracks; if (s->data_end + space > bdrv_getlength(bs->file->bs) >> BDRV_SECTOR_BITS) { int ret; space += s->prealloc_size; -- 2.7.4
Re: [Qemu-block] [PATCH for-2.9] iotests: Fix test 147
On 31.03.2017 00:12, Max Reitz wrote: > This test has been broken by changing NBD's blockdev-add interface (from > taking a SocketAddress to taking a SocketAddressFlat). This patch makes > it work again. > > Unfortunately, we cannot just flatten all of the addresses because > nbd-server-start still takes a plain SocketAddress. Therefore, we need > both and this is most easily achieved by writing the SocketAddress into > the code and flattening it where necessary. > > Signed-off-by: Max Reitz> --- > I can be convinced to squash this into Markus' patch "nbd: Tidy up > blockdev-add interface" which is the reason this test broke. I > personally don't have a strong opinion on the matter other than "We > won't get Markus to send a v4". > --- > tests/qemu-iotests/147 | 25 + > 1 file changed, 17 insertions(+), 8 deletions(-) Squashed into said patch: https://github.com/XanClic/qemu/commit/block^ (on my block branch: https://github.com/XanClic/qemu/commits/block) Max signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [PATCH v7 3/3] qcow2: Discard unaligned tail when wiping image
On 31.03.2017 00:36, Eric Blake wrote: > The previous commit pointed out a subtle difference between the > fast and slow path of qcow2_make_empty(), where we failed to discard > the final (partial) cluster of an unaligned image. > > The problem stems from the fact that qcow2_discard_clusters() was > silently ignoring sub-cluster head and tail on unaligned requests. > A quick audit of all callers shows that qcow2_snapshot_create() has > always passed a cluster-aligned request since the call was added > in commit 1ebf561; qcow2_co_pdiscard() has passed a cluster-aligned > request since commit ecdbead taught the block layer about preferred > discard alignment; and qcow2_make_empty() was fixed to pass an > aligned start (but not necessarily end) in commit a3e1505. > > Asserting that the start is always aligned also points out that we > now have a dead check: rounding the end offset down can never result > in a value less than the aligned start offset (the check was rendered > dead with commit ecdbead). Meanwhile, we do not want to round the > end cluster down in the one case of the end offset matching the > (unaligned) file size - that final partial cluster should still be > discarded. > > With those fixes in place, we can adjust the testsuite so that > qemu-iotests 97 and 176 are once again identical for qcow2, showing > that fast and slow paths are back in sync. > > Signed-off-by: Eric Blake> > --- > v7: new patch > --- > block/qcow2-cluster.c | 10 -- > tests/qemu-iotests/176.out | 2 +- > 2 files changed, 5 insertions(+), 7 deletions(-) > > diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c > index 78c11d4..100398c 100644 > --- a/block/qcow2-cluster.c > +++ b/block/qcow2-cluster.c > @@ -1519,12 +1519,10 @@ int qcow2_discard_clusters(BlockDriverState *bs, > uint64_t offset, > > end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS); > > -/* Round start up and end down */ > -offset = align_offset(offset, s->cluster_size); > -end_offset = start_of_cluster(s, end_offset); > - > -if (offset > end_offset) { > -return 0; > +/* The caller must cluster-align start; round end down except at EOF */ > +assert(QEMU_IS_ALIGNED(offset, s->cluster_size)); > +if (end_offset != bs->total_sectors * BDRV_SECTOR_SIZE) { > +end_offset = start_of_cluster(s, end_offset); > } This change looks good and works for qcow2_make_empty(), but qcow2_co_pdiscard() will still drop these requests: $ ./qemu-img create -f qcow2 foo.qcow2 512 Formatting 'foo.qcow2', fmt=qcow2 size=512 encryption=off cluster_size=65536 lazy_refcounts=off refcount_bits=16 $ ./qemu-io -c 'write 0 512' foo.qcow2 wrote 512/512 bytes at offset 0 512 bytes, 1 ops; 0.0062 sec (80.167 KiB/sec and 160.3335 ops/sec) $ ./qemu-img map foo.qcow2 Offset Length Mapped to File 0 0x200 0x5 foo.qcow2 $ ./qemu-io -c 'discard 0 512' foo.qcow2 discard 512/512 bytes at offset 0 512 bytes, 1 ops; 0. sec (34.877 MiB/sec and 71428.5714 ops/sec) $ ./qemu-img map foo.qcow2 Offset Length Mapped to File 0 0x200 0x5 foo.qcow2 We don't necessarily have to fix it for 2.9, so: Reviewed-by: Max Reitz > > nb_clusters = size_to_clusters(s, end_offset - offset); > diff --git a/tests/qemu-iotests/176.out b/tests/qemu-iotests/176.out > index 990a41c..6271fa7 100644 > --- a/tests/qemu-iotests/176.out > +++ b/tests/qemu-iotests/176.out > @@ -35,7 +35,7 @@ Offset Length File > Offset Length File > 0x7ffd 0x1 TEST_DIR/t.IMGFMT.base > 0x7ffe 0x2 TEST_DIR/t.IMGFMT.itmd > -0x8340 0x200 TEST_DIR/t.IMGFMT > +0x8340 0x200 TEST_DIR/t.IMGFMT.itmd > > === Test pass 1 === > signature.asc Description: OpenPGP digital signature
Re: [Qemu-block] [PATCH v7 2/3] iotests: Improve image-clear tests on non-aligned image
On 31.03.2017 00:36, Eric Blake wrote: > Tweak 097 and 176 to operate on an image that is not cluster-aligned, > to give further coverage of clearing out an entire image. > > Tested for qcow (97) and qcow2 (97 and 176). > > The fact that there is a subtle difference between the expected > outputs of 97 and 176 on pass 0 is evidence that we still don't > always clear out the full image in qcow2. Commit a3e1505 fixed > the case of clusters stranded at the 2G mark, but did not fix > stranding an unaligned tail. That will be the next patch, kept > separate to make it easier to see the impact. I'm not sure I like this very much. The reference output should always be what we actually expect it to be. Therefore, usually the issue is fixed first and the test is added only afterwards. If I want to see the impact I can just do what I always do in those cases: Go to the base commit, compile qemu, step forward to commits to the test, run the test, see it fail, compile qemu again, run the test, see it pass. So would it be possible to switch this and patch 3? (Apart from that, this patch looks good.) Max > Signed-off-by: Eric Blake> > --- > v7: also write data in the unaligned cluster, and document bug it exposes > v6: new patch > --- > tests/qemu-iotests/097 | 17 +- > tests/qemu-iotests/097.out | 55 > -- > tests/qemu-iotests/176 | 17 +- > tests/qemu-iotests/176.out | 55 > -- > 4 files changed, 108 insertions(+), 36 deletions(-) signature.asc Description: OpenPGP digital signature
[Qemu-block] [PATCH for-2.9 2/2] block/curl: Check protocol prefix
If the user has explicitly specified a block driver and thus a protocol, we have to make sure the URL's protocol prefix matches. Otherwise the latter will silently override the former which might catch some users by surprise. Signed-off-by: Max Reitz--- block/curl.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/block/curl.c b/block/curl.c index 34dbd335f4..2708d57c2f 100644 --- a/block/curl.c +++ b/block/curl.c @@ -659,6 +659,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, const char *cookie; double d; const char *secretid; +const char *protocol_delimiter; static int inited = 0; @@ -700,6 +701,15 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, goto out_noclean; } +if (!strstart(file, bs->drv->protocol_name, _delimiter) || +!strstart(protocol_delimiter, "://", NULL)) +{ +error_setg(errp, "%s curl driver cannot handle the URL '%s' (does not " + "start with '%s://')", bs->drv->protocol_name, file, + bs->drv->protocol_name); +goto out_noclean; +} + s->username = g_strdup(qemu_opt_get(opts, CURL_BLOCK_OPT_USERNAME)); secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PASSWORD_SECRET); -- 2.12.1
[Qemu-block] [PATCH for-2.9 0/2] curl: Extend and fix blockdev-add schema
Yes, it's yet another episode in our popular get-blockdev-add-ready-for-2.9 drama! Right now, the schema for the curl block driver is seriously lacking. This series improves things at least a bit. To improve things seriously, we might want to structure the URL instead of it being just a plain string, and we might want to split the cookie string into a list of dicts or something similar. However, strictly speaking our curl block driver is *not* an (ht|f)tps? block driver but just a curl driver. All it does is pass some options to libcurl and then send and receive data from it. (We really should have just named it "curl" from the start.) Therefore, it probably is for the best to leave these options rather opaque and let libcurl do the interpretation. Max Reitz (2): qapi/curl: Extend and fix blockdev-add schema block/curl: Check protocol prefix qapi/block-core.json | 103 ++- block/curl.c | 10 + 2 files changed, 104 insertions(+), 9 deletions(-) -- 2.12.1
Re: [Qemu-block] [Qemu-devel] callout to *file in bdrv_co_get_block_status
On 31/03/2017 09:55, Peter Lieven wrote: >>> Would it be an idea to introduce an inverse flag live BDRV_BLOCK_NOT_ZERO >>> for cases where we know that there is really DATA and thus can avoid the >>> second callout? >> How would you know that a block is nonzero? > I would trust the metadata. At least for VMDK and QCOW2v3. > Bad idea? The metadata only tells you that a block is zero, not that it's nonzero. What you are suggesting is really the same as removing the recursion. However, I still haven't understood clearly if it's a QEMU or tmpfs bug; if it's a tmpfs bug your suggestion would not fix tmpfs slowness on raw images. Paolo
Re: [Qemu-block] [Qemu-devel] [PATCH v2 7/7] vmdk: Update metadata for multiple clusters
On Fri, Mar 31, 2017 at 2:38 PM, Fam Zhengwrote: > On Fri, 03/31 14:17, Ashijeet Acharya wrote: >> On Fri, Mar 31, 2017 at 12:56 PM, Fam Zheng wrote: >> > On Sat, 03/25 16:48, Ashijeet Acharya wrote: >> >> Include a next pointer in VmdkMetaData struct to point to the previous >> >> allocated L2 table. Modify vmdk_L2update to start updating metadata for >> >> allocation of multiple clusters at once. >> >> >> >> Signed-off-by: Ashijeet Acharya >> >> --- >> >> block/vmdk.c | 131 >> >> ++- >> >> 1 file changed, 102 insertions(+), 29 deletions(-) >> >> >> >> diff --git a/block/vmdk.c b/block/vmdk.c >> >> index 3de8b8f..4517409 100644 >> >> --- a/block/vmdk.c >> >> +++ b/block/vmdk.c >> >> @@ -137,6 +137,8 @@ typedef struct VmdkMetaData { >> >> int valid; >> >> uint32_t *l2_cache_entry; >> >> uint32_t nb_clusters; >> >> +uint32_t offset; >> >> +struct VmdkMetaData *next; >> >> } VmdkMetaData; >> >> >> >> typedef struct VmdkGrainMarker { >> >> @@ -1037,29 +1039,81 @@ static void vmdk_refresh_limits(BlockDriverState >> >> *bs, Error **errp) >> >> } >> >> } >> >> >> >> -static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, >> >> - uint32_t offset) >> >> +static int vmdk_alloc_cluster_link_l2(VmdkExtent *extent, >> >> + VmdkMetaData *m_data, bool zeroed) >> >> { >> >> -offset = cpu_to_le32(offset); >> >> +int i; >> >> +uint32_t offset, temp_offset; >> >> + >> >> +if (zeroed) { >> >> +temp_offset = VMDK_GTE_ZEROED; >> >> +} else { >> >> +temp_offset = m_data->offset; >> >> +} >> >> + >> >> +temp_offset = cpu_to_le32(temp_offset); >> >> + >> >> /* update L2 table */ >> >> -if (bdrv_pwrite_sync(extent->file, >> >> +offset = temp_offset; >> >> +for (i = 0; i < m_data->nb_clusters; i++) { >> >> +if (bdrv_pwrite_sync(extent->file, >> >> ((int64_t)m_data->l2_offset * 512) >> >> -+ (m_data->l2_index * sizeof(offset)), >> >> -, sizeof(offset)) < 0) { >> >> -return VMDK_ERROR; >> >> ++ ((m_data->l2_index + i) * sizeof(offset)), >> >> +&(offset), sizeof(offset)) < 0) { >> >> +return VMDK_ERROR; >> >> +} >> >> +if (!zeroed) { >> >> +offset += 128; >> >> +} >> >> } >> >> + >> >> /* update backup L2 table */ >> >> +offset = temp_offset; >> >> if (extent->l1_backup_table_offset != 0) { >> >> m_data->l2_offset = extent->l1_backup_table[m_data->l1_index]; >> >> -if (bdrv_pwrite_sync(extent->file, >> >> -((int64_t)m_data->l2_offset * 512) >> >> -+ (m_data->l2_index * sizeof(offset)), >> >> -, sizeof(offset)) < 0) { >> >> -return VMDK_ERROR; >> >> +for (i = 0; i < m_data->nb_clusters; i++) { >> >> +if (bdrv_pwrite_sync(extent->file, >> >> +((int64_t)m_data->l2_offset * 512) >> >> ++ ((m_data->l2_index + i) * sizeof(offset)), >> >> +&(offset), sizeof(offset)) < 0) { >> >> +return VMDK_ERROR; >> >> +} >> >> +if (!zeroed) { >> >> +offset += 128; >> >> +} >> >> } >> >> } >> >> + >> >> +offset = temp_offset; >> >> if (m_data->l2_cache_entry) { >> >> -*m_data->l2_cache_entry = offset; >> >> +for (i = 0; i < m_data->nb_clusters; i++) { >> >> +*m_data->l2_cache_entry = offset; >> >> +m_data->l2_cache_entry++; >> >> + >> >> +if (!zeroed) { >> >> +offset += 128; >> >> +} >> >> +} >> >> +} >> >> + >> >> +return VMDK_OK; >> >> +} >> >> + >> >> +static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, >> >> + bool zeroed) >> >> +{ >> >> +int ret; >> >> + >> >> +while (m_data->next != NULL) { >> >> +VmdkMetaData *next; >> >> + >> >> +ret = vmdk_alloc_cluster_link_l2(extent, m_data, zeroed); >> >> +if (ret < 0) { >> >> +return ret; >> >> +} >> >> + >> >> +next = m_data->next; >> >> +m_data = next; >> > >> > I don't see why the next pointer is necessary. Also the tail is always >> > unused, >> > why do you need to allocate it? >> >> If I don't allocate the tail, I was getting a segfault in vmdk_pwritev(). > > That may be because the way you interate the linked list in vmdk_pwritev is: > >>while (m_data->next != NULL) { >>VmdkMetaData *next; >>next = m_data->next; >>g_free(m_data); >>m_data = next; >>} >> > > which does require a dummy tail. No, I remember it
Re: [Qemu-block] [Qemu-devel] [PATCH v2 7/7] vmdk: Update metadata for multiple clusters
On Fri, 03/31 14:17, Ashijeet Acharya wrote: > On Fri, Mar 31, 2017 at 12:56 PM, Fam Zhengwrote: > > On Sat, 03/25 16:48, Ashijeet Acharya wrote: > >> Include a next pointer in VmdkMetaData struct to point to the previous > >> allocated L2 table. Modify vmdk_L2update to start updating metadata for > >> allocation of multiple clusters at once. > >> > >> Signed-off-by: Ashijeet Acharya > >> --- > >> block/vmdk.c | 131 > >> ++- > >> 1 file changed, 102 insertions(+), 29 deletions(-) > >> > >> diff --git a/block/vmdk.c b/block/vmdk.c > >> index 3de8b8f..4517409 100644 > >> --- a/block/vmdk.c > >> +++ b/block/vmdk.c > >> @@ -137,6 +137,8 @@ typedef struct VmdkMetaData { > >> int valid; > >> uint32_t *l2_cache_entry; > >> uint32_t nb_clusters; > >> +uint32_t offset; > >> +struct VmdkMetaData *next; > >> } VmdkMetaData; > >> > >> typedef struct VmdkGrainMarker { > >> @@ -1037,29 +1039,81 @@ static void vmdk_refresh_limits(BlockDriverState > >> *bs, Error **errp) > >> } > >> } > >> > >> -static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, > >> - uint32_t offset) > >> +static int vmdk_alloc_cluster_link_l2(VmdkExtent *extent, > >> + VmdkMetaData *m_data, bool zeroed) > >> { > >> -offset = cpu_to_le32(offset); > >> +int i; > >> +uint32_t offset, temp_offset; > >> + > >> +if (zeroed) { > >> +temp_offset = VMDK_GTE_ZEROED; > >> +} else { > >> +temp_offset = m_data->offset; > >> +} > >> + > >> +temp_offset = cpu_to_le32(temp_offset); > >> + > >> /* update L2 table */ > >> -if (bdrv_pwrite_sync(extent->file, > >> +offset = temp_offset; > >> +for (i = 0; i < m_data->nb_clusters; i++) { > >> +if (bdrv_pwrite_sync(extent->file, > >> ((int64_t)m_data->l2_offset * 512) > >> -+ (m_data->l2_index * sizeof(offset)), > >> -, sizeof(offset)) < 0) { > >> -return VMDK_ERROR; > >> ++ ((m_data->l2_index + i) * sizeof(offset)), > >> +&(offset), sizeof(offset)) < 0) { > >> +return VMDK_ERROR; > >> +} > >> +if (!zeroed) { > >> +offset += 128; > >> +} > >> } > >> + > >> /* update backup L2 table */ > >> +offset = temp_offset; > >> if (extent->l1_backup_table_offset != 0) { > >> m_data->l2_offset = extent->l1_backup_table[m_data->l1_index]; > >> -if (bdrv_pwrite_sync(extent->file, > >> -((int64_t)m_data->l2_offset * 512) > >> -+ (m_data->l2_index * sizeof(offset)), > >> -, sizeof(offset)) < 0) { > >> -return VMDK_ERROR; > >> +for (i = 0; i < m_data->nb_clusters; i++) { > >> +if (bdrv_pwrite_sync(extent->file, > >> +((int64_t)m_data->l2_offset * 512) > >> ++ ((m_data->l2_index + i) * sizeof(offset)), > >> +&(offset), sizeof(offset)) < 0) { > >> +return VMDK_ERROR; > >> +} > >> +if (!zeroed) { > >> +offset += 128; > >> +} > >> } > >> } > >> + > >> +offset = temp_offset; > >> if (m_data->l2_cache_entry) { > >> -*m_data->l2_cache_entry = offset; > >> +for (i = 0; i < m_data->nb_clusters; i++) { > >> +*m_data->l2_cache_entry = offset; > >> +m_data->l2_cache_entry++; > >> + > >> +if (!zeroed) { > >> +offset += 128; > >> +} > >> +} > >> +} > >> + > >> +return VMDK_OK; > >> +} > >> + > >> +static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, > >> + bool zeroed) > >> +{ > >> +int ret; > >> + > >> +while (m_data->next != NULL) { > >> +VmdkMetaData *next; > >> + > >> +ret = vmdk_alloc_cluster_link_l2(extent, m_data, zeroed); > >> +if (ret < 0) { > >> +return ret; > >> +} > >> + > >> +next = m_data->next; > >> +m_data = next; > > > > I don't see why the next pointer is necessary. Also the tail is always > > unused, > > why do you need to allocate it? > > If I don't allocate the tail, I was getting a segfault in vmdk_pwritev(). That may be because the way you interate the linked list in vmdk_pwritev is: >while (m_data->next != NULL) { >VmdkMetaData *next; >next = m_data->next; >g_free(m_data); >m_data = next; >} > which does require a dummy tail. But after all it's still not clear to me why you cannot keep m_data on stack, and why you need the next pointer at all. Fam
Re: [Qemu-block] [PATCH v3 for-2.9 2/9] char: Fix socket with "type": "vsock" address
On Thu, Mar 30, 2017 at 07:43:10PM +0200, Markus Armbruster wrote: > Watch this: > > $ qemu-system-x86_64 -nodefaults -S -display none -qmp stdio > {"QMP": {"version": {"qemu": {"micro": 91, "minor": 8, "major": 2}, > "package": " (v2.8.0-1195-gf84141e-dirty)"}, "capabilities": []}} > { "execute": "qmp_capabilities" } > {"return": {}} > { "execute": "chardev-add", "arguments": { "id": "chr0", "backend": { > "type": "socket", "data": { "addr": { "type": "vsock", "data": { "cid": > "CID", "port": "P" }} > Aborted (core dumped) > > Crashes because SocketAddress_to_str() is blissfully unaware of > SOCKET_ADDRESS_KIND_VSOCK. Fix that. Pick the output format to match > socket_parse(), just like the existing formats. > > Cc: Stefan Hajnoczi> Cc: Paolo Bonzini > Cc: Marc-André Lureau > Signed-off-by: Markus Armbruster > Reviewed-by: Marc-André Lureau > Reviewed-by: Max Reitz > --- > chardev/char-socket.c | 4 > 1 file changed, 4 insertions(+) Reviewed-by: Stefan Hajnoczi signature.asc Description: PGP signature
Re: [Qemu-block] [Qemu-devel] [PATCH v2 7/7] vmdk: Update metadata for multiple clusters
On Fri, Mar 31, 2017 at 12:56 PM, Fam Zhengwrote: > On Sat, 03/25 16:48, Ashijeet Acharya wrote: >> Include a next pointer in VmdkMetaData struct to point to the previous >> allocated L2 table. Modify vmdk_L2update to start updating metadata for >> allocation of multiple clusters at once. >> >> Signed-off-by: Ashijeet Acharya >> --- >> block/vmdk.c | 131 >> ++- >> 1 file changed, 102 insertions(+), 29 deletions(-) >> >> diff --git a/block/vmdk.c b/block/vmdk.c >> index 3de8b8f..4517409 100644 >> --- a/block/vmdk.c >> +++ b/block/vmdk.c >> @@ -137,6 +137,8 @@ typedef struct VmdkMetaData { >> int valid; >> uint32_t *l2_cache_entry; >> uint32_t nb_clusters; >> +uint32_t offset; >> +struct VmdkMetaData *next; >> } VmdkMetaData; >> >> typedef struct VmdkGrainMarker { >> @@ -1037,29 +1039,81 @@ static void vmdk_refresh_limits(BlockDriverState >> *bs, Error **errp) >> } >> } >> >> -static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, >> - uint32_t offset) >> +static int vmdk_alloc_cluster_link_l2(VmdkExtent *extent, >> + VmdkMetaData *m_data, bool zeroed) >> { >> -offset = cpu_to_le32(offset); >> +int i; >> +uint32_t offset, temp_offset; >> + >> +if (zeroed) { >> +temp_offset = VMDK_GTE_ZEROED; >> +} else { >> +temp_offset = m_data->offset; >> +} >> + >> +temp_offset = cpu_to_le32(temp_offset); >> + >> /* update L2 table */ >> -if (bdrv_pwrite_sync(extent->file, >> +offset = temp_offset; >> +for (i = 0; i < m_data->nb_clusters; i++) { >> +if (bdrv_pwrite_sync(extent->file, >> ((int64_t)m_data->l2_offset * 512) >> -+ (m_data->l2_index * sizeof(offset)), >> -, sizeof(offset)) < 0) { >> -return VMDK_ERROR; >> ++ ((m_data->l2_index + i) * sizeof(offset)), >> +&(offset), sizeof(offset)) < 0) { >> +return VMDK_ERROR; >> +} >> +if (!zeroed) { >> +offset += 128; >> +} >> } >> + >> /* update backup L2 table */ >> +offset = temp_offset; >> if (extent->l1_backup_table_offset != 0) { >> m_data->l2_offset = extent->l1_backup_table[m_data->l1_index]; >> -if (bdrv_pwrite_sync(extent->file, >> -((int64_t)m_data->l2_offset * 512) >> -+ (m_data->l2_index * sizeof(offset)), >> -, sizeof(offset)) < 0) { >> -return VMDK_ERROR; >> +for (i = 0; i < m_data->nb_clusters; i++) { >> +if (bdrv_pwrite_sync(extent->file, >> +((int64_t)m_data->l2_offset * 512) >> ++ ((m_data->l2_index + i) * sizeof(offset)), >> +&(offset), sizeof(offset)) < 0) { >> +return VMDK_ERROR; >> +} >> +if (!zeroed) { >> +offset += 128; >> +} >> } >> } >> + >> +offset = temp_offset; >> if (m_data->l2_cache_entry) { >> -*m_data->l2_cache_entry = offset; >> +for (i = 0; i < m_data->nb_clusters; i++) { >> +*m_data->l2_cache_entry = offset; >> +m_data->l2_cache_entry++; >> + >> +if (!zeroed) { >> +offset += 128; >> +} >> +} >> +} >> + >> +return VMDK_OK; >> +} >> + >> +static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, >> + bool zeroed) >> +{ >> +int ret; >> + >> +while (m_data->next != NULL) { >> +VmdkMetaData *next; >> + >> +ret = vmdk_alloc_cluster_link_l2(extent, m_data, zeroed); >> +if (ret < 0) { >> +return ret; >> +} >> + >> +next = m_data->next; >> +m_data = next; > > I don't see why the next pointer is necessary. Also the tail is always > unused, > why do you need to allocate it? If I don't allocate the tail, I was getting a segfault in vmdk_pwritev(). > But more importantly, I think you could further batch multiple updates in the > same L2 table and only do one bdrv_pwrite_sync. Wouldn't the l2_offset need to change change for every subsequent L2 table i.e. after ever 512 cluster boundary? Ashijeet > > Fam > >> } >> >> return VMDK_OK; >> @@ -1271,7 +1325,7 @@ exit: >> */ >> static int handle_alloc(BlockDriverState *bs, VmdkExtent *extent, >> uint64_t offset, uint64_t *cluster_offset, >> -int64_t *bytes, VmdkMetaData *m_data, >> +int64_t *bytes, VmdkMetaData **m_data, >> bool allocate, uint32_t *total_alloc_clusters) >> { >> int l1_index, l2_offset, l2_index; >> @@ -1280,6 +1334,7 @@ static int handle_alloc(BlockDriverState *bs,
Re: [Qemu-block] [Qemu-devel] callout to *file in bdrv_co_get_block_status
Am 27.03.2017 um 17:06 schrieb Paolo Bonzini: > > On 27/03/2017 15:21, Peter Lieven wrote: I stumbled across the issue with lseek on a tmpfs because in the build process for our templates I temporarily have vmdks on a tmpfs and it takes ages before qemu-img convert starts to run (it iterates over every 64kb cluster with that callout to find_allocation and for some reason lseek is very slow on tmpfs). >>> Ok, thanks. Perhaps it's worth benchmarking tmpfs specifically. Apart >>> from the system call overhead (which does not really matter if you're >>> going to do a read), lseek on other filesystems should not be any slower >>> than read. >> Okay, but the even the read is not really necessary if the metadata is >> correct? > Yeah, what I mean is: > > - if you're going to do a read of non-zero blocks, the lseek you do > before reading those blocks should not matter. > > - if you're going to skip the read of non-zero blocks, the lseek you do > is always going to be faster than reading them and then checking with > buffer_is_nonzero. > >> Would it be an idea to introduce an inverse flag live BDRV_BLOCK_NOT_ZERO >> for cases where we know that there is really DATA and thus can avoid the >> second callout? > How would you know that a block is nonzero? I would trust the metadata. At least for VMDK and QCOW2v3. Bad idea? Peter
Re: [Qemu-block] [Qemu-devel] [PATCH v2 7/7] vmdk: Update metadata for multiple clusters
On Sat, 03/25 16:48, Ashijeet Acharya wrote: > Include a next pointer in VmdkMetaData struct to point to the previous > allocated L2 table. Modify vmdk_L2update to start updating metadata for > allocation of multiple clusters at once. > > Signed-off-by: Ashijeet Acharya> --- > block/vmdk.c | 131 > ++- > 1 file changed, 102 insertions(+), 29 deletions(-) > > diff --git a/block/vmdk.c b/block/vmdk.c > index 3de8b8f..4517409 100644 > --- a/block/vmdk.c > +++ b/block/vmdk.c > @@ -137,6 +137,8 @@ typedef struct VmdkMetaData { > int valid; > uint32_t *l2_cache_entry; > uint32_t nb_clusters; > +uint32_t offset; > +struct VmdkMetaData *next; > } VmdkMetaData; > > typedef struct VmdkGrainMarker { > @@ -1037,29 +1039,81 @@ static void vmdk_refresh_limits(BlockDriverState *bs, > Error **errp) > } > } > > -static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, > - uint32_t offset) > +static int vmdk_alloc_cluster_link_l2(VmdkExtent *extent, > + VmdkMetaData *m_data, bool zeroed) > { > -offset = cpu_to_le32(offset); > +int i; > +uint32_t offset, temp_offset; > + > +if (zeroed) { > +temp_offset = VMDK_GTE_ZEROED; > +} else { > +temp_offset = m_data->offset; > +} > + > +temp_offset = cpu_to_le32(temp_offset); > + > /* update L2 table */ > -if (bdrv_pwrite_sync(extent->file, > +offset = temp_offset; > +for (i = 0; i < m_data->nb_clusters; i++) { > +if (bdrv_pwrite_sync(extent->file, > ((int64_t)m_data->l2_offset * 512) > -+ (m_data->l2_index * sizeof(offset)), > -, sizeof(offset)) < 0) { > -return VMDK_ERROR; > ++ ((m_data->l2_index + i) * sizeof(offset)), > +&(offset), sizeof(offset)) < 0) { > +return VMDK_ERROR; > +} > +if (!zeroed) { > +offset += 128; > +} > } > + > /* update backup L2 table */ > +offset = temp_offset; > if (extent->l1_backup_table_offset != 0) { > m_data->l2_offset = extent->l1_backup_table[m_data->l1_index]; > -if (bdrv_pwrite_sync(extent->file, > -((int64_t)m_data->l2_offset * 512) > -+ (m_data->l2_index * sizeof(offset)), > -, sizeof(offset)) < 0) { > -return VMDK_ERROR; > +for (i = 0; i < m_data->nb_clusters; i++) { > +if (bdrv_pwrite_sync(extent->file, > +((int64_t)m_data->l2_offset * 512) > ++ ((m_data->l2_index + i) * sizeof(offset)), > +&(offset), sizeof(offset)) < 0) { > +return VMDK_ERROR; > +} > +if (!zeroed) { > +offset += 128; > +} > } > } > + > +offset = temp_offset; > if (m_data->l2_cache_entry) { > -*m_data->l2_cache_entry = offset; > +for (i = 0; i < m_data->nb_clusters; i++) { > +*m_data->l2_cache_entry = offset; > +m_data->l2_cache_entry++; > + > +if (!zeroed) { > +offset += 128; > +} > +} > +} > + > +return VMDK_OK; > +} > + > +static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, > + bool zeroed) > +{ > +int ret; > + > +while (m_data->next != NULL) { > +VmdkMetaData *next; > + > +ret = vmdk_alloc_cluster_link_l2(extent, m_data, zeroed); > +if (ret < 0) { > +return ret; > +} > + > +next = m_data->next; > +m_data = next; I don't see why the next pointer is necessary. Also the tail is always unused, why do you need to allocate it? But more importantly, I think you could further batch multiple updates in the same L2 table and only do one bdrv_pwrite_sync. Fam > } > > return VMDK_OK; > @@ -1271,7 +1325,7 @@ exit: > */ > static int handle_alloc(BlockDriverState *bs, VmdkExtent *extent, > uint64_t offset, uint64_t *cluster_offset, > -int64_t *bytes, VmdkMetaData *m_data, > +int64_t *bytes, VmdkMetaData **m_data, > bool allocate, uint32_t *total_alloc_clusters) > { > int l1_index, l2_offset, l2_index; > @@ -1280,6 +1334,7 @@ static int handle_alloc(BlockDriverState *bs, > VmdkExtent *extent, > uint32_t nb_clusters; > bool zeroed = false; > uint64_t skip_start_bytes, skip_end_bytes; > +VmdkMetaData *old_m_data; > int ret; > > ret = get_cluster_table(extent, offset, _index, _offset, > @@ -1330,13 +1385,21 @@ static int handle_alloc(BlockDriverState *bs, > VmdkExtent *extent, > if (ret < 0) { > return ret; >
Re: [Qemu-block] [Qemu-devel] [PATCH v2 3/7] vmdk: Factor out metadata loading code out of get_cluster_offset()
On Sat, 03/25 16:48, Ashijeet Acharya wrote: > Move the cluster tables loading code out of the existing > get_cluster_offset() function and implement it in separate Now it's renamed to vmdk_perform_cow() in previous patch, the commit message following it should be updated. > get_cluster_table() and vmdk_L2load() functions. This patch will help > us avoid code duplication in future patches of this series. > > Signed-off-by: Ashijeet Acharya> --- > block/vmdk.c | 99 > > 1 file changed, 99 insertions(+) > > diff --git a/block/vmdk.c b/block/vmdk.c > index f5fda2c..a42322e 100644 > --- a/block/vmdk.c > +++ b/block/vmdk.c > @@ -1037,6 +1037,105 @@ static void vmdk_refresh_limits(BlockDriverState *bs, > Error **errp) > } > > /* > + * vmdk_L2load Personally, I think vmdk_l2load is good enough, the upper case doesn't add readability but makes it slightly harder to type. > + * > + * Loads a new L2 table into memory. If the table is in the cache, the cache > + * is used; otherwise the L2 table is loaded from the image file. > + * > + * Returns: > + * VMDK_OK: on success > + * VMDK_ERROR:in error cases > + */ > +static int vmdk_L2load(VmdkExtent *extent, uint64_t offset, int l2_offset, > + uint32_t **new_l2_table, int *new_l2_index) > +{ > +int min_index, i, j; > +uint32_t *l2_table; > +uint32_t min_count; > + > +for (i = 0; i < L2_CACHE_SIZE; i++) { > +if (l2_offset == extent->l2_cache_offsets[i]) { > +/* increment the hit count */ > +if (++extent->l2_cache_counts[i] == 0x) { Please use UINT32_MAX. > +for (j = 0; j < L2_CACHE_SIZE; j++) { > +extent->l2_cache_counts[j] >>= 1; > +} > +} > +l2_table = extent->l2_cache + (i * extent->l2_size); > +goto found; > +} > +} > +/* not found: load a new entry in the least used one */ > +min_index = 0; > +min_count = 0x; Please use UINT32_MAX. > +for (i = 0; i < L2_CACHE_SIZE; i++) { > +if (extent->l2_cache_counts[i] < min_count) { > +min_count = extent->l2_cache_counts[i]; > +min_index = i; > +} > +} > +l2_table = extent->l2_cache + (min_index * extent->l2_size); > +if (bdrv_pread(extent->file, > +(int64_t)l2_offset * 512, > +l2_table, > +extent->l2_size * sizeof(uint32_t) > +) != extent->l2_size * sizeof(uint32_t)) { > +return VMDK_ERROR; > +} > + > +extent->l2_cache_offsets[min_index] = l2_offset; > +extent->l2_cache_counts[min_index] = 1; > +found: > +*new_l2_index = ((offset >> 9) / extent->cluster_sectors) % > extent->l2_size; > +*new_l2_table = l2_table; > + > +return VMDK_OK; > +} > + > +/* > + * get_cluster_table > + * > + * for a given offset, load (and allocate if needed) the l2 table. > + * > + * Returns: > + * VMDK_OK:on success > + * > + * VMDK_UNALLOC: if cluster is not mapped > + * > + * VMDK_ERROR: in error cases > + */ > +static int get_cluster_table(VmdkExtent *extent, uint64_t offset, > + int *new_l1_index, int *new_l2_offset, > + int *new_l2_index, uint32_t **new_l2_table) Again, a static function must be introduced with the code change where it is used, at least for once. It keeps the compiler happy (-Wunused-function) and makes reviewing easy. > +{ > +int l1_index, l2_offset, l2_index; > +uint32_t *l2_table; > +int ret; > + > +offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE; > +l1_index = (offset >> 9) / extent->l1_entry_sectors; > +if (l1_index >= extent->l1_size) { > +return VMDK_ERROR; > +} > +l2_offset = extent->l1_table[l1_index]; > +if (!l2_offset) { > +return VMDK_UNALLOC; > +} > + > +ret = vmdk_L2load(extent, offset, l2_offset, _table, _index); > +if (ret < 0) { > +return ret; > +} > + > +*new_l1_index = l1_index; > +*new_l2_offset = l2_offset; > +*new_l2_index = l2_index; > +*new_l2_table = l2_table; > + > +return VMDK_OK; > +} > + > +/* > * vmdk_perform_cow > * > * Copy backing file's cluster that covers @sector_num, otherwise write zero, > -- > 2.6.2 > > Fam