[PATCH v7 2/3] vhost-user-blk: split vhost_user_blk_sync_config()
Split vhost_user_blk_sync_config() out from vhost_user_blk_handle_config_change(), to be reused in the following commit. Signed-off-by: Vladimir Sementsov-Ogievskiy Acked-by: Raphael Norwitz Reviewed-by: Stefano Garzarella --- hw/block/vhost-user-blk.c | 26 +++--- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 5b7f46bbb0..48b3dabb8d 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -90,27 +90,39 @@ static void vhost_user_blk_set_config(VirtIODevice *vdev, const uint8_t *config) s->blkcfg.wce = blkcfg->wce; } +static int vhost_user_blk_sync_config(DeviceState *dev, Error **errp) +{ +int ret; +VirtIODevice *vdev = VIRTIO_DEVICE(dev); +VHostUserBlk *s = VHOST_USER_BLK(vdev); + +ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, + vdev->config_len, errp); +if (ret < 0) { +return ret; +} + +memcpy(vdev->config, &s->blkcfg, vdev->config_len); +virtio_notify_config(vdev); + +return 0; +} + static int vhost_user_blk_handle_config_change(struct vhost_dev *dev) { int ret; -VirtIODevice *vdev = dev->vdev; -VHostUserBlk *s = VHOST_USER_BLK(dev->vdev); Error *local_err = NULL; if (!dev->started) { return 0; } -ret = vhost_dev_get_config(dev, (uint8_t *)&s->blkcfg, - vdev->config_len, &local_err); +ret = vhost_user_blk_sync_config(DEVICE(dev->vdev), &local_err); if (ret < 0) { error_report_err(local_err); return ret; } -memcpy(dev->vdev->config, &s->blkcfg, vdev->config_len); -virtio_notify_config(dev->vdev); - return 0; } -- 2.34.1
[PATCH v7 3/3] qapi: introduce device-sync-config
Add command to sync config from vhost-user backend to the device. It may be helpful when VHOST_USER_SLAVE_CONFIG_CHANGE_MSG failed or not triggered interrupt to the guest or just not available (not supported by vhost-user server). Command result is racy if allow it during migration. Let's not allow that. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Markus Armbruster Acked-by: Raphael Norwitz --- hw/block/vhost-user-blk.c | 1 + hw/virtio/virtio-pci.c| 9 + include/hw/qdev-core.h| 6 ++ qapi/qdev.json| 24 system/qdev-monitor.c | 38 ++ 5 files changed, 78 insertions(+) diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 48b3dabb8d..7996e49821 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -591,6 +591,7 @@ static void vhost_user_blk_class_init(ObjectClass *klass, void *data) device_class_set_props(dc, vhost_user_blk_properties); dc->vmsd = &vmstate_vhost_user_blk; +dc->sync_config = vhost_user_blk_sync_config; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); vdc->realize = vhost_user_blk_device_realize; vdc->unrealize = vhost_user_blk_device_unrealize; diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 4d832fe845..c5a809b956 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -2385,6 +2385,14 @@ static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp) vpciklass->parent_dc_realize(qdev, errp); } +static int virtio_pci_sync_config(DeviceState *dev, Error **errp) +{ +VirtIOPCIProxy *proxy = VIRTIO_PCI(dev); +VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + +return qdev_sync_config(DEVICE(vdev), errp); +} + static void virtio_pci_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -2401,6 +2409,7 @@ static void virtio_pci_class_init(ObjectClass *klass, void *data) device_class_set_parent_realize(dc, virtio_pci_dc_realize, &vpciklass->parent_dc_realize); rc->phases.hold = virtio_pci_bus_reset_hold; +dc->sync_config = virtio_pci_sync_config; } static const TypeInfo virtio_pci_info = { diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index aa97c34a4b..94914858d8 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -95,6 +95,7 @@ typedef void (*DeviceUnrealize)(DeviceState *dev); typedef void (*DeviceReset)(DeviceState *dev); typedef void (*BusRealize)(BusState *bus, Error **errp); typedef void (*BusUnrealize)(BusState *bus); +typedef int (*DeviceSyncConfig)(DeviceState *dev, Error **errp); /** * struct DeviceClass - The base class for all devices. @@ -103,6 +104,9 @@ typedef void (*BusUnrealize)(BusState *bus); * property is changed to %true. * @unrealize: Callback function invoked when the #DeviceState:realized * property is changed to %false. + * @sync_config: Callback function invoked when QMP command device-sync-config + * is called. Should synchronize device configuration from host to guest part + * and notify the guest about the change. * @hotpluggable: indicates if #DeviceClass is hotpluggable, available * as readonly "hotpluggable" property of #DeviceState instance * @@ -162,6 +166,7 @@ struct DeviceClass { DeviceReset legacy_reset; DeviceRealize realize; DeviceUnrealize unrealize; +DeviceSyncConfig sync_config; /** * @vmsd: device state serialisation description for @@ -547,6 +552,7 @@ bool qdev_hotplug_allowed(DeviceState *dev, Error **errp); */ HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev); void qdev_unplug(DeviceState *dev, Error **errp); +int qdev_sync_config(DeviceState *dev, Error **errp); void qdev_simple_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); void qdev_machine_creation_done(void); diff --git a/qapi/qdev.json b/qapi/qdev.json index 53d147c7b4..25cbcf977b 100644 --- a/qapi/qdev.json +++ b/qapi/qdev.json @@ -163,3 +163,27 @@ ## { 'event': 'DEVICE_UNPLUG_GUEST_ERROR', 'data': { '*device': 'str', 'path': 'str' } } + +## +# @device-sync-config: +# +# Synchronize device configuration from host to guest part. First, +# copy the configuration from the host part (backend) to the guest +# part (frontend). Then notify guest software that device +# configuration changed. +# +# The command may be used to notify the guest about block device +# capcity change. Currently only vhost-user-blk device supports +# this. +# +# @id: the device's ID or QOM path +# +# Features: +# +# @unstable: The command is experimental. +# +# Since: 9.2 +## +{ 'command': 'device-sync-config', + 'features': [ 'unstable'
[PATCH v7 1/3] qdev-monitor: add option to report GenericError from find_device_state
Here we just prepare for the following patch, making possible to report GenericError as recommended. This patch doesn't aim to prevent further use of DeviceNotFound by future interfaces: - find_device_state() is used in blk_by_qdev_id() and qmp_get_blk() functions, which may lead to spread of DeviceNotFound anyway - also, nothing prevent simply copy-pasting find_device_state() calls with false argument Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Markus Armbruster Acked-by: Raphael Norwitz --- system/qdev-monitor.c | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c index 320c47b72d..2c76cef4d8 100644 --- a/system/qdev-monitor.c +++ b/system/qdev-monitor.c @@ -885,13 +885,20 @@ void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp) object_unref(OBJECT(dev)); } -static DeviceState *find_device_state(const char *id, Error **errp) +/* + * Note that creating new APIs using error classes other than GenericError is + * not recommended. Set use_generic_error=true for new interfaces. + */ +static DeviceState *find_device_state(const char *id, bool use_generic_error, + Error **errp) { Object *obj = object_resolve_path_at(qdev_get_peripheral(), id); DeviceState *dev; if (!obj) { -error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, +error_set(errp, + (use_generic_error ? + ERROR_CLASS_GENERIC_ERROR : ERROR_CLASS_DEVICE_NOT_FOUND), "Device '%s' not found", id); return NULL; } @@ -956,7 +963,7 @@ void qdev_unplug(DeviceState *dev, Error **errp) void qmp_device_del(const char *id, Error **errp) { -DeviceState *dev = find_device_state(id, errp); +DeviceState *dev = find_device_state(id, false, errp); if (dev != NULL) { if (dev->pending_deleted_event && (dev->pending_deleted_expires_ms == 0 || @@ -1076,7 +1083,7 @@ BlockBackend *blk_by_qdev_id(const char *id, Error **errp) GLOBAL_STATE_CODE(); -dev = find_device_state(id, errp); +dev = find_device_state(id, false, errp); if (dev == NULL) { return NULL; } -- 2.34.1
[PATCH v7 0/3] vhost-user-blk: live resize additional APIs
v7: update QAPI version 9.1 -> 9.2 Vladimir Sementsov-Ogievskiy (3): qdev-monitor: add option to report GenericError from find_device_state vhost-user-blk: split vhost_user_blk_sync_config() qapi: introduce device-sync-config hw/block/vhost-user-blk.c | 27 ++-- hw/virtio/virtio-pci.c| 9 +++ include/hw/qdev-core.h| 6 + qapi/qdev.json| 24 ++ system/qdev-monitor.c | 53 --- 5 files changed, 108 insertions(+), 11 deletions(-) -- 2.34.1
Re: [PATCH v3 3/7] qapi: block-job-change: make copy-mode parameter optional
On 22.10.24 15:35, Kevin Wolf wrote: Am 02.10.2024 um 16:06 hat Vladimir Sementsov-Ogievskiy geschrieben: We are going to add more parameters to change. We want to make possible to change only one or any subset of available options. So all the options should be optional. Signed-off-by: Vladimir Sementsov-Ogievskiy Acked-by: Markus Armbruster This is different from blockdev-reopen, which requires repeating all options (and can therefore reuse the type from blockdev-add). One of the reasons why we made it like that is that we had some options for which "option absent" was a meaningful value in itself, so it would have become ambiguous if an absent option in blockdev-reopen should mean "leave the existing value unchanged" or "unset the option". Right.. Thanks for noting this. I'll try to apply blockdev-reopen approach here. Are we confident that this will never happen with job options? In case of doubt, I would go for consistency with blockdev-reopen. Either way, it would be good to document the reasoning for whatever option we choose in the commit message. Kevin -- Best regards, Vladimir
Re: [PATCH v6 0/3] vhost-user-blk: live resize additional APIs
ping) On 20.09.24 12:49, Vladimir Sementsov-Ogievskiy wrote: v6: tiny fix: add document comment for sync_config field to fix qdoc generation. Also, add r-b and a-b marks. Vladimir Sementsov-Ogievskiy (3): qdev-monitor: add option to report GenericError from find_device_state vhost-user-blk: split vhost_user_blk_sync_config() qapi: introduce device-sync-config hw/block/vhost-user-blk.c | 27 ++-- hw/virtio/virtio-pci.c| 9 +++ include/hw/qdev-core.h| 6 + qapi/qdev.json| 24 ++ system/qdev-monitor.c | 53 --- 5 files changed, 108 insertions(+), 11 deletions(-) -- Best regards, Vladimir
Re: [Bug Report][RFC PATCH 1/1] block: fix failing assert on paused VM migration
On 10.10.24 17:30, Fabiano Rosas wrote: Vladimir Sementsov-Ogievskiy writes: On 09.10.24 23:53, Fabiano Rosas wrote: Vladimir Sementsov-Ogievskiy writes: On 30.09.24 17:07, Andrey Drobyshev wrote: On 9/30/24 12:25 PM, Vladimir Sementsov-Ogievskiy wrote: [add migration maintainers] On 24.09.24 15:56, Andrey Drobyshev wrote: [...] I doubt that this a correct way to go. As far as I understand, "inactive" actually means that "storage is not belong to qemu, but to someone else (another qemu process for example), and may be changed transparently". In turn this means that Qemu should do nothing with inactive disks. So the problem is that nobody called bdrv_activate_all on target, and we shouldn't ignore that. Hmm, I see in process_incoming_migration_bh() we do call bdrv_activate_all(), but only in some scenarios. May be, the condition should be less strict here. Why we need any condition here at all? Don't we want to activate block-layer on target after migration anyway? Hmm I'm not sure about the unconditional activation, since we at least have to honor LATE_BLOCK_ACTIVATE cap if it's set (and probably delay it in such a case). In current libvirt upstream I see such code: /* Migration capabilities which should always be enabled as long as they * are supported by QEMU. If the capability is supposed to be enabled on both * sides of migration, it won't be enabled unless both sides support it. */ static const qemuMigrationParamsAlwaysOnItem qemuMigrationParamsAlwaysOn[] = { {QEMU_MIGRATION_CAP_PAUSE_BEFORE_SWITCHOVER, QEMU_MIGRATION_SOURCE}, {QEMU_MIGRATION_CAP_LATE_BLOCK_ACTIVATE, QEMU_MIGRATION_DESTINATION}, }; which means that libvirt always wants LATE_BLOCK_ACTIVATE to be set. The code from process_incoming_migration_bh() you're referring to: /* If capability late_block_activate is set: * Only fire up the block code now if we're going to restart the * VM, else 'cont' will do it. * This causes file locking to happen; so we don't want it to happen * unless we really are starting the VM. */ if (!migrate_late_block_activate() || (autostart && (!global_state_received() || runstate_is_live(global_state_get_runstate() { /* Make sure all file formats throw away their mutable metadata. * If we get an error here, just don't restart the VM yet. */ bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); local_err = NULL; autostart = false; } } It states explicitly that we're either going to start VM right at this point if (autostart == true), or we wait till "cont" command happens. None of this is going to happen if we start another migration while still being in PAUSED state. So I think it seems reasonable to take such case into account. For instance, this patch does prevent the crash: diff --git a/migration/migration.c b/migration/migration.c index ae2be31557..3222f6745b 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -733,7 +733,8 @@ static void process_incoming_migration_bh(void *opaque) */ if (!migrate_late_block_activate() || (autostart && (!global_state_received() || -runstate_is_live(global_state_get_runstate() { +runstate_is_live(global_state_get_runstate( || + (!autostart && global_state_get_runstate() == RUN_STATE_PAUSED)) { /* Make sure all file formats throw away their mutable metadata. * If we get an error here, just don't restart the VM yet. */ bdrv_activate_all(&local_err); What are your thoughts on it? This bug is the same as https://gitlab.com/qemu-project/qemu/-/issues/2395 Hmmm... Don't we violate "late-block-activate" contract by this? Me go and check: # @late-block-activate: If enabled, the destination will not activate # block devices (and thus take locks) immediately at the end of # migration. (since 3.0) Yes, we'll violate it by this patch. So, for now the only exception is when autostart is enabled, but libvirt correctly use late-block-activate + !autostart. Interesting, when block layer is assumed to be activated.. Aha, only in qmp_cont(). So, what to do with this all: Either libvirt should not use late-block-activate for migration of stopped vm. This way target would be automatically activated Or if libvirt still need postponed activation (I assume, for correctly switching shared disks, etc), Libvirt should do some additional QMP call. It can't be "cont", if we don't want to run the VM. So, probably, we need add
Re: [Bug Report][RFC PATCH 1/1] block: fix failing assert on paused VM migration
On 09.10.24 23:53, Fabiano Rosas wrote: Vladimir Sementsov-Ogievskiy writes: On 30.09.24 17:07, Andrey Drobyshev wrote: On 9/30/24 12:25 PM, Vladimir Sementsov-Ogievskiy wrote: [add migration maintainers] On 24.09.24 15:56, Andrey Drobyshev wrote: [...] I doubt that this a correct way to go. As far as I understand, "inactive" actually means that "storage is not belong to qemu, but to someone else (another qemu process for example), and may be changed transparently". In turn this means that Qemu should do nothing with inactive disks. So the problem is that nobody called bdrv_activate_all on target, and we shouldn't ignore that. Hmm, I see in process_incoming_migration_bh() we do call bdrv_activate_all(), but only in some scenarios. May be, the condition should be less strict here. Why we need any condition here at all? Don't we want to activate block-layer on target after migration anyway? Hmm I'm not sure about the unconditional activation, since we at least have to honor LATE_BLOCK_ACTIVATE cap if it's set (and probably delay it in such a case). In current libvirt upstream I see such code: /* Migration capabilities which should always be enabled as long as they * are supported by QEMU. If the capability is supposed to be enabled on both * sides of migration, it won't be enabled unless both sides support it. */ static const qemuMigrationParamsAlwaysOnItem qemuMigrationParamsAlwaysOn[] = { {QEMU_MIGRATION_CAP_PAUSE_BEFORE_SWITCHOVER, QEMU_MIGRATION_SOURCE}, {QEMU_MIGRATION_CAP_LATE_BLOCK_ACTIVATE, QEMU_MIGRATION_DESTINATION}, }; which means that libvirt always wants LATE_BLOCK_ACTIVATE to be set. The code from process_incoming_migration_bh() you're referring to: /* If capability late_block_activate is set: * Only fire up the block code now if we're going to restart the * VM, else 'cont' will do it. * This causes file locking to happen; so we don't want it to happen * unless we really are starting the VM. */ if (!migrate_late_block_activate() || (autostart && (!global_state_received() || runstate_is_live(global_state_get_runstate() { /* Make sure all file formats throw away their mutable metadata. * If we get an error here, just don't restart the VM yet. */ bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); local_err = NULL; autostart = false; } } It states explicitly that we're either going to start VM right at this point if (autostart == true), or we wait till "cont" command happens. None of this is going to happen if we start another migration while still being in PAUSED state. So I think it seems reasonable to take such case into account. For instance, this patch does prevent the crash: diff --git a/migration/migration.c b/migration/migration.c index ae2be31557..3222f6745b 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -733,7 +733,8 @@ static void process_incoming_migration_bh(void *opaque) */ if (!migrate_late_block_activate() || (autostart && (!global_state_received() || -runstate_is_live(global_state_get_runstate() { +runstate_is_live(global_state_get_runstate( || + (!autostart && global_state_get_runstate() == RUN_STATE_PAUSED)) { /* Make sure all file formats throw away their mutable metadata. * If we get an error here, just don't restart the VM yet. */ bdrv_activate_all(&local_err); What are your thoughts on it? This bug is the same as https://gitlab.com/qemu-project/qemu/-/issues/2395 Hmmm... Don't we violate "late-block-activate" contract by this? Me go and check: # @late-block-activate: If enabled, the destination will not activate # block devices (and thus take locks) immediately at the end of # migration. (since 3.0) Yes, we'll violate it by this patch. So, for now the only exception is when autostart is enabled, but libvirt correctly use late-block-activate + !autostart. Interesting, when block layer is assumed to be activated.. Aha, only in qmp_cont(). So, what to do with this all: Either libvirt should not use late-block-activate for migration of stopped vm. This way target would be automatically activated Or if libvirt still need postponed activation (I assume, for correctly switching shared disks, etc), Libvirt should do some additional QMP call. It can't be "cont", if we don't want to run the VM. So, probably, we need additional "block-activate" QMP command for this. A third option might be to unconditionally activat
Re: [PATCH v9 4/7] qapi: add blockdev-replace command
On 02.10.24 17:41, Vladimir Sementsov-Ogievskiy wrote: On 26.06.24 14:53, Vladimir Sementsov-Ogievskiy wrote: diff --git a/qapi/block-core.json b/qapi/block-core.json index df5e07debd..0a6f08a6e0 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -6148,3 +6148,91 @@ ## { 'struct': 'DummyBlockCoreForceArrays', 'data': { 'unused-block-graph-info': ['BlockGraphInfo'] } } + +## +# @BlockParentType: +# +# @qdev: block device, such as created by device_add, and denoted by +# qdev-id +# +# @driver: block driver node, such as created by blockdev-add, and +# denoted by node-name +# +# @export: block export, such created by block-export-add, and +# denoted by export-id +# +# Since 9.1 +## +{ 'enum': 'BlockParentType', + 'data': ['qdev', 'driver', 'export'] } + +## +# @BdrvChildRefQdev: +# +# @qdev-id: the device's ID or QOM path +# +# Since 9.1 +## +{ 'struct': 'BdrvChildRefQdev', + 'data': { 'qdev-id': 'str' } } + +## +# @BdrvChildRefExport: +# +# @export-id: block export identifier +# +# Since 9.1 +## +{ 'struct': 'BdrvChildRefExport', + 'data': { 'export-id': 'str' } } + +## +# @BdrvChildRefDriver: +# +# @node-name: the node name of the parent block node +# +# @child: name of the child to be replaced, like "file" or "backing" +# +# Since 9.1 +## +{ 'struct': 'BdrvChildRefDriver', + 'data': { 'node-name': 'str', 'child': 'str' } } + +## +# @BlockdevReplace: +# +# @parent-type: type of the parent, which child is to be replaced +# +# @new-child: new child for replacement +# +# Since 9.1 +## +{ 'union': 'BlockdevReplace', + 'base': { + 'parent-type': 'BlockParentType', + 'new-child': 'str' + }, + 'discriminator': 'parent-type', + 'data': { + 'qdev': 'BdrvChildRefQdev', + 'export': 'BdrvChildRefExport', + 'driver': 'BdrvChildRefDriver' + } } + +## +# @blockdev-replace: +# +# Replace a block-node associated with device (selected by +# @qdev-id) or with block-export (selected by @export-id) or +# any child of block-node (selected by @node-name and @child) +# with @new-child block-node. +# +# Features: +# +# @unstable: This command is experimental. +# +# Since 9.1 +## +{ 'command': 'blockdev-replace', 'boxed': true, + 'features': [ 'unstable' ], + 'data': 'BlockdevReplace' } Looking back at this all, I now have another idea: instead of trying to unite different types of parents, maybe just publish concept of BdrvChild to QAPI? So that it will have unique id. Like for block-nodes, it could be auto-generated or specified by user. Then we'll add parameters for commands: device-add root-child-slot-id: ID block-export-add block-child-slot-id: ID and for block-drivers we already have BlockdevRef structure, it only lacks an id. { 'alternate': 'BlockdevRef', 'data': { 'definition': 'BlockdevOptions', 'reference': 'str' } } hmm.. Could it be as simple as { 'alternate': 'BlockdevRef', 'base': { '*child-slot-id': 'str' }, 'data': { 'definition': 'BlockdevOptions', 'reference': 'str' } } ? Oops that was obviously impossible idea :) Then, I think the only way is to introduce virtual "reference" BlockdevDriver, with only one parameter { 'reference': 'str' }, this way user will be able to specify file: {driver: reference, reference: NODE_NAME, child-slot-id: NEW_ID} Unfortunately, no: "../qapi/block-core.json:4781: alternate has unknown key 'base'" Anyway, I believe, some solution should exist, probably by improving QAPI generator. Or, add "child-slot-id" to base of BlockdevOptions, and add virtual "reference" BlockdevDriver, to handle case with reference. And finally, the new command becomes as simple as: { 'command': 'blockdev-replace', 'data': {'child-slot': 'str', 'new-child': 'str' } } -- Best regards, Vladimir
Re: [PATCH v6 0/3] vhost-user-blk: live resize additional APIs
Ping On 20.09.24 12:49, Vladimir Sementsov-Ogievskiy wrote: v6: tiny fix: add document comment for sync_config field to fix qdoc generation. Also, add r-b and a-b marks. Vladimir Sementsov-Ogievskiy (3): qdev-monitor: add option to report GenericError from find_device_state vhost-user-blk: split vhost_user_blk_sync_config() qapi: introduce device-sync-config hw/block/vhost-user-blk.c | 27 ++-- hw/virtio/virtio-pci.c| 9 +++ include/hw/qdev-core.h| 6 + qapi/qdev.json| 24 ++ system/qdev-monitor.c | 53 --- 5 files changed, 108 insertions(+), 11 deletions(-) -- Best regards, Vladimir
Re: [PATCH 0/2] fix backup-discard-source test for XFS
Hi Kevin! Now I revisit my old series, and looking here I see that I forget add you into CC. Does it still make sense? On 20.06.24 17:44, Vladimir Sementsov-Ogievskiy wrote: Hi all! As Kevin reported, the test doesn't work on XFS, as it rely on disk usage. Fix it, switching to dirty bitmap for guest write tracking. Vladimir Sementsov-Ogievskiy (2): iotests/backup-discard-source: convert size variable to be int iotests/backup-discard-source: don't use actual-size .../qemu-iotests/tests/backup-discard-source | 39 --- 1 file changed, 25 insertions(+), 14 deletions(-) -- Best regards, Vladimir
[PATCH v3 1/2] qapi: add qom-path to BLOCK_IO_ERROR event
We need something more reliable than "device" (which absent in modern interfaces) and "node-name" (which may absent, and actually don't specify the device, which is a source of error) to make a per-device throttling for the event in the following commit. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/block-backend.c | 21 + qapi/block-core.json | 7 +-- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index db6f9b92a3..9b70f44aec 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1028,22 +1028,34 @@ DeviceState *blk_get_attached_dev(BlockBackend *blk) return blk->dev; } -/* Return the qdev ID, or if no ID is assigned the QOM path, of the block - * device attached to the BlockBackend. */ -char *blk_get_attached_dev_id(BlockBackend *blk) +static char *blk_get_attached_dev_id_or_path(BlockBackend *blk, bool want_id) { DeviceState *dev = blk->dev; IO_CODE(); if (!dev) { return g_strdup(""); -} else if (dev->id) { +} else if (want_id && dev->id) { return g_strdup(dev->id); } return object_get_canonical_path(OBJECT(dev)) ?: g_strdup(""); } +/* + * Return the qdev ID, or if no ID is assigned the QOM path, of the block + * device attached to the BlockBackend. + */ +char *blk_get_attached_dev_id(BlockBackend *blk) +{ +return blk_get_attached_dev_id_or_path(blk, true); +} + +static char *blk_get_attached_dev_path(BlockBackend *blk) +{ +return blk_get_attached_dev_id_or_path(blk, false); +} + /* * Return the BlockBackend which has the device model @dev attached if it * exists, else null. @@ -2140,6 +2152,7 @@ static void send_qmp_error_event(BlockBackend *blk, optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; qapi_event_send_block_io_error(blk_name(blk), + blk_get_attached_dev_path(blk), bs ? bdrv_get_node_name(bs) : NULL, optype, action, blk_iostatus_is_enabled(blk), error == ENOSPC, strerror(error)); diff --git a/qapi/block-core.json b/qapi/block-core.json index c3b0a2376b..b3743022be 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -5580,6 +5580,8 @@ # # Emitted when a disk I/O error occurs # +# @qom-path: path to the device object in the QOM tree (since 9.2) +# # @device: device name. This is always present for compatibility # reasons, but it can be empty ("") if the image does not have a # device name associated. @@ -5610,7 +5612,8 @@ # .. qmp-example:: # # <- { "event": "BLOCK_IO_ERROR", -# "data": { "device": "ide0-hd1", +# "data": { "qom-path": "/machine/unattached/device[0]", +#"device": "ide0-hd1", #"node-name": "#block212", #"operation": "write", #"action": "stop", @@ -5618,7 +5621,7 @@ # "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } ## { 'event': 'BLOCK_IO_ERROR', - 'data': { 'device': 'str', '*node-name': 'str', + 'data': { 'qom-path': 'str', 'device': 'str', '*node-name': 'str', 'operation': 'IoOperationType', 'action': 'BlockErrorAction', '*nospace': 'bool', 'reason': 'str' } } -- 2.34.1
[PATCH v3 0/2] throttling for BLOCK_IO_ERROR
v2: switch to qom-path as discriminator, for this, add patch 01. Leonid Kaplan (1): block-backend: per-device throttling of BLOCK_IO_ERROR reports Vladimir Sementsov-Ogievskiy (1): qapi: add qom-path to BLOCK_IO_ERROR event block/block-backend.c | 21 + monitor/monitor.c | 7 +-- qapi/block-core.json | 9 +++-- 3 files changed, 29 insertions(+), 8 deletions(-) -- 2.34.1
[PATCH v3 2/2] block-backend: per-device throttling of BLOCK_IO_ERROR reports
From: Leonid Kaplan BLOCK_IO_ERROR events comes from guest, so we must throttle them. We still want per-device throttling, so let's use device id as a key. Signed-off-by: Leonid Kaplan Signed-off-by: Vladimir Sementsov-Ogievskiy --- monitor/monitor.c| 7 +-- qapi/block-core.json | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/monitor/monitor.c b/monitor/monitor.c index db52a9c7ef..56786c0ccc 100644 --- a/monitor/monitor.c +++ b/monitor/monitor.c @@ -308,6 +308,7 @@ int error_printf_unless_qmp(const char *fmt, ...) static MonitorQAPIEventConf monitor_qapi_event_conf[QAPI_EVENT__MAX] = { /* Limit guest-triggerable events to 1 per second */ [QAPI_EVENT_RTC_CHANGE]= { 1000 * SCALE_MS }, +[QAPI_EVENT_BLOCK_IO_ERROR]= { 1000 * SCALE_MS }, [QAPI_EVENT_WATCHDOG] = { 1000 * SCALE_MS }, [QAPI_EVENT_BALLOON_CHANGE]= { 1000 * SCALE_MS }, [QAPI_EVENT_QUORUM_REPORT_BAD] = { 1000 * SCALE_MS }, @@ -493,7 +494,8 @@ static unsigned int qapi_event_throttle_hash(const void *key) hash += g_str_hash(qdict_get_str(evstate->data, "node-name")); } -if (evstate->event == QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE) { +if (evstate->event == QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE || +evstate->event == QAPI_EVENT_BLOCK_IO_ERROR) { hash += g_str_hash(qdict_get_str(evstate->data, "qom-path")); } @@ -519,7 +521,8 @@ static gboolean qapi_event_throttle_equal(const void *a, const void *b) qdict_get_str(evb->data, "node-name")); } -if (eva->event == QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE) { +if (eva->event == QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE || +eva->event == QAPI_EVENT_BLOCK_IO_ERROR) { return !strcmp(qdict_get_str(eva->data, "qom-path"), qdict_get_str(evb->data, "qom-path")); } diff --git a/qapi/block-core.json b/qapi/block-core.json index b3743022be..835f90b118 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -5607,6 +5607,8 @@ # .. note:: If action is "stop", a STOP event will eventually follow #the BLOCK_IO_ERROR event. # +# .. note:: This event is rate-limited. +# # Since: 0.13 # # .. qmp-example:: -- 2.34.1
Re: [PATCH v9 4/7] qapi: add blockdev-replace command
On 26.06.24 14:53, Vladimir Sementsov-Ogievskiy wrote: diff --git a/qapi/block-core.json b/qapi/block-core.json index df5e07debd..0a6f08a6e0 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -6148,3 +6148,91 @@ ## { 'struct': 'DummyBlockCoreForceArrays', 'data': { 'unused-block-graph-info': ['BlockGraphInfo'] } } + +## +# @BlockParentType: +# +# @qdev: block device, such as created by device_add, and denoted by +# qdev-id +# +# @driver: block driver node, such as created by blockdev-add, and +# denoted by node-name +# +# @export: block export, such created by block-export-add, and +# denoted by export-id +# +# Since 9.1 +## +{ 'enum': 'BlockParentType', + 'data': ['qdev', 'driver', 'export'] } + +## +# @BdrvChildRefQdev: +# +# @qdev-id: the device's ID or QOM path +# +# Since 9.1 +## +{ 'struct': 'BdrvChildRefQdev', + 'data': { 'qdev-id': 'str' } } + +## +# @BdrvChildRefExport: +# +# @export-id: block export identifier +# +# Since 9.1 +## +{ 'struct': 'BdrvChildRefExport', + 'data': { 'export-id': 'str' } } + +## +# @BdrvChildRefDriver: +# +# @node-name: the node name of the parent block node +# +# @child: name of the child to be replaced, like "file" or "backing" +# +# Since 9.1 +## +{ 'struct': 'BdrvChildRefDriver', + 'data': { 'node-name': 'str', 'child': 'str' } } + +## +# @BlockdevReplace: +# +# @parent-type: type of the parent, which child is to be replaced +# +# @new-child: new child for replacement +# +# Since 9.1 +## +{ 'union': 'BlockdevReplace', + 'base': { + 'parent-type': 'BlockParentType', + 'new-child': 'str' + }, + 'discriminator': 'parent-type', + 'data': { + 'qdev': 'BdrvChildRefQdev', + 'export': 'BdrvChildRefExport', + 'driver': 'BdrvChildRefDriver' + } } + +## +# @blockdev-replace: +# +# Replace a block-node associated with device (selected by +# @qdev-id) or with block-export (selected by @export-id) or +# any child of block-node (selected by @node-name and @child) +# with @new-child block-node. +# +# Features: +# +# @unstable: This command is experimental. +# +# Since 9.1 +## +{ 'command': 'blockdev-replace', 'boxed': true, + 'features': [ 'unstable' ], + 'data': 'BlockdevReplace' } Looking back at this all, I now have another idea: instead of trying to unite different types of parents, maybe just publish concept of BdrvChild to QAPI? So that it will have unique id. Like for block-nodes, it could be auto-generated or specified by user. Then we'll add parameters for commands: device-add root-child-slot-id: ID block-export-add block-child-slot-id: ID and for block-drivers we already have BlockdevRef structure, it only lacks an id. { 'alternate': 'BlockdevRef', 'data': { 'definition': 'BlockdevOptions', 'reference': 'str' } } hmm.. Could it be as simple as { 'alternate': 'BlockdevRef', 'base': { '*child-slot-id': 'str' }, 'data': { 'definition': 'BlockdevOptions', 'reference': 'str' } } ? Unfortunately, no: "../qapi/block-core.json:4781: alternate has unknown key 'base'" Anyway, I believe, some solution should exist, probably by improving QAPI generator. Or, add "child-slot-id" to base of BlockdevOptions, and add virtual "reference" BlockdevDriver, to handle case with reference. And finally, the new command becomes as simple as: { 'command': 'blockdev-replace', 'data': {'child-slot': 'str', 'new-child': 'str' } } -- Best regards, Vladimir
[PATCH v3 6/7] qapi/block-core: deprecate block-job-change
That's a first step to move on newer job-* APIs. The difference between block-job-change and job-change is in find_block_job_locked() vs find_job_locked() functions. What's different? 1. find_block_job_locked() finds only block jobs, whereas find_job_locked() finds any kind of job. job-change is a compatible extension of block-job-change. 2. find_block_job_locked() reports DeviceNotActive on failure, when find_job_locked() reports GenericError. Since the kind of error reported isn't documented for either command, and clients shouldn't rely on undocumented error details, job-change is a compatible replacement for block-job-change. Signed-off-by: Vladimir Sementsov-Ogievskiy --- docs/about/deprecated.rst | 5 + qapi/block-core.json | 6 ++ 2 files changed, 11 insertions(+) diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index 1e21fbbf77..d2461924ff 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -147,6 +147,11 @@ options are removed in favor of using explicit ``blockdev-create`` and ``blockdev-add`` calls. See :doc:`/interop/live-block-operations` for details. +``block-job-change`` (since 9.2) +'''''''''''''''''''''''''''''''' + +Use ``job-change`` instead. + Incorrectly typed ``device_add`` arguments (since 6.2) '''''''''''''''''''''''''''''''''''''''''''''''''''''' diff --git a/qapi/block-core.json b/qapi/block-core.json index e314734b53..ed87a9dc1e 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -3100,9 +3100,15 @@ # # Change the block job's options. # +# Features: +# +# @deprecated: This command is deprecated. Use @job-change +# instead. +# # Since: 8.2 ## { 'command': 'block-job-change', + 'features': ['deprecated'], 'data': 'JobChangeOptions', 'boxed': true } ## -- 2.34.1
[PATCH v3 7/7] iotests/mirror-change-copy-mode: switch to job-change command
block-job-change is deprecated, let's move test to job-change. Signed-off-by: Vladimir Sementsov-Ogievskiy --- tests/qemu-iotests/tests/mirror-change-copy-mode | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/qemu-iotests/tests/mirror-change-copy-mode b/tests/qemu-iotests/tests/mirror-change-copy-mode index 51788b85c7..e972604ebf 100755 --- a/tests/qemu-iotests/tests/mirror-change-copy-mode +++ b/tests/qemu-iotests/tests/mirror-change-copy-mode @@ -150,7 +150,7 @@ class TestMirrorChangeCopyMode(iotests.QMPTestCase): len_before_change = result[0]['len'] # Change the copy mode while requests are happening. -self.vm.cmd('block-job-change', +self.vm.cmd('job-change', id='mirror', type='mirror', copy_mode='write-blocking') -- 2.34.1
[PATCH v3 2/7] blockjob: block_job_change_locked(): check job type
User may specify wrong type for the job id. Let's check it. Signed-off-by: Vladimir Sementsov-Ogievskiy --- blockjob.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/blockjob.c b/blockjob.c index 8cfbb15543..788cb1e07d 100644 --- a/blockjob.c +++ b/blockjob.c @@ -319,6 +319,12 @@ void block_job_change_locked(BlockJob *job, JobChangeOptions *opts, GLOBAL_STATE_CODE(); +if (job_type(&job->job) != opts->type) { +error_setg(errp, "Job '%s' is '%s' job, not '%s'", job->job.id, + job_type_str(&job->job), JobType_str(opts->type)); +return; +} + if (job_apply_verb_locked(&job->job, JOB_VERB_CHANGE, errp)) { return; } -- 2.34.1
[PATCH v3 4/7] blockjob: move change action implementation to job from block-job
Like for other block-job-* APIs we want have the actual functionality in job layer and make block-job-change to be a deprecated duplication of job-change in the following commit. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/mirror.c | 7 +++ blockdev.c | 2 +- blockjob.c | 26 -- include/block/blockjob.h | 11 --- include/block/blockjob_int.h | 7 --- include/qemu/job.h | 12 job-qmp.c| 1 + job.c| 23 +++ 8 files changed, 40 insertions(+), 49 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index 60e8d83e4f..63e35114f3 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1258,10 +1258,9 @@ static bool commit_active_cancel(Job *job, bool force) return force || !job_is_ready(job); } -static void mirror_change(BlockJob *job, JobChangeOptions *opts, - Error **errp) +static void mirror_change(Job *job, JobChangeOptions *opts, Error **errp) { -MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); +MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job); JobChangeOptionsMirror *change_opts = &opts->u.mirror; MirrorCopyMode current; @@ -1316,9 +1315,9 @@ static const BlockJobDriver mirror_job_driver = { .pause = mirror_pause, .complete = mirror_complete, .cancel = mirror_cancel, +.change = mirror_change, }, .drained_poll = mirror_drained_poll, -.change = mirror_change, .query = mirror_query, }; diff --git a/blockdev.c b/blockdev.c index 626f53102d..b1c3de3862 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3262,7 +3262,7 @@ void qmp_block_job_change(JobChangeOptions *opts, Error **errp) return; } -block_job_change_locked(job, opts, errp); +job_change_locked(&job->job, opts, errp); } void qmp_change_backing_file(const char *device, diff --git a/blockjob.c b/blockjob.c index 788cb1e07d..2769722b37 100644 --- a/blockjob.c +++ b/blockjob.c @@ -312,32 +312,6 @@ static bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) return block_job_set_speed_locked(job, speed, errp); } -void block_job_change_locked(BlockJob *job, JobChangeOptions *opts, - Error **errp) -{ -const BlockJobDriver *drv = block_job_driver(job); - -GLOBAL_STATE_CODE(); - -if (job_type(&job->job) != opts->type) { -error_setg(errp, "Job '%s' is '%s' job, not '%s'", job->job.id, - job_type_str(&job->job), JobType_str(opts->type)); -return; -} - -if (job_apply_verb_locked(&job->job, JOB_VERB_CHANGE, errp)) { -return; -} - -if (drv->change) { -job_unlock(); -drv->change(job, opts, errp); -job_lock(); -} else { -error_setg(errp, "Job type does not support change"); -} -} - void block_job_ratelimit_processed_bytes(BlockJob *job, uint64_t n) { IO_CODE(); diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 5dd1b08909..72e849a140 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -173,17 +173,6 @@ bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs); */ bool block_job_set_speed_locked(BlockJob *job, int64_t speed, Error **errp); -/** - * block_job_change_locked: - * @job: The job to change. - * @opts: The new options. - * @errp: Error object. - * - * Change the job according to opts. - */ -void block_job_change_locked(BlockJob *job, JobChangeOptions *opts, - Error **errp); - /** * block_job_query_locked: * @job: The job to get information about. diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index d9c3b911d0..58bc7a5cea 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -68,13 +68,6 @@ struct BlockJobDriver { void (*set_speed)(BlockJob *job, int64_t speed); -/* - * Change the @job's options according to @opts. - * - * Note that this can already be called before the job coroutine is running. - */ -void (*change)(BlockJob *job, JobChangeOptions *opts, Error **errp); - /* * Query information specific to this kind of block job. */ diff --git a/include/qemu/job.h b/include/qemu/job.h index 2b873f2576..6fa525dac3 100644 --- a/include/qemu/job.h +++ b/include/qemu/job.h @@ -27,6 +27,7 @@ #define JOB_H #include "qapi/qapi-types-job.h" +#include "qapi/qapi-types-block-core.h" #include "qemu/queue.h" #include "qemu/progress_meter.h" #include "qemu/coroutine.h" @@ -307,6 +308,12 @@ struct JobDrive
[PATCH v3 5/7] qapi: add job-change
Add a new-style command job-change, doing same thing as block-job-change. The aim is finally deprecate block-job-* APIs and move to job-* APIs. We add a new command to qapi/block-core.json, not to qapi/job.json to avoid resolving json file including loops for now. This all would be a lot simple to refactor when we finally drop deprecated block-job-* APIs. Signed-off-by: Vladimir Sementsov-Ogievskiy --- job-qmp.c| 14 ++ qapi/block-core.json | 10 ++ 2 files changed, 24 insertions(+) diff --git a/job-qmp.c b/job-qmp.c index c764bd3801..248e68f554 100644 --- a/job-qmp.c +++ b/job-qmp.c @@ -139,6 +139,20 @@ void qmp_job_dismiss(const char *id, Error **errp) job_dismiss_locked(&job, errp); } +void qmp_job_change(JobChangeOptions *opts, Error **errp) +{ +Job *job; + +JOB_LOCK_GUARD(); +job = find_job_locked(opts->id, errp); + +if (!job) { +return; +} + +job_change_locked(job, opts, errp); +} + /* Called with job_mutex held. */ static JobInfo *job_query_single_locked(Job *job, Error **errp) { diff --git a/qapi/block-core.json b/qapi/block-core.json index f370593037..e314734b53 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -3105,6 +3105,16 @@ { 'command': 'block-job-change', 'data': 'JobChangeOptions', 'boxed': true } +## +# @job-change: +# +# Change the block job's options. +# +# Since: 9.2 +## +{ 'command': 'job-change', + 'data': 'JobChangeOptions', 'boxed': true } + ## # @BlockdevDiscardOptions: # -- 2.34.1
[PATCH v3 3/7] qapi: block-job-change: make copy-mode parameter optional
We are going to add more parameters to change. We want to make possible to change only one or any subset of available options. So all the options should be optional. Signed-off-by: Vladimir Sementsov-Ogievskiy Acked-by: Markus Armbruster --- block/mirror.c | 4 qapi/block-core.json | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/block/mirror.c b/block/mirror.c index 2816bb1042..60e8d83e4f 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1272,6 +1272,10 @@ static void mirror_change(BlockJob *job, JobChangeOptions *opts, GLOBAL_STATE_CODE(); +if (!change_opts->has_copy_mode) { +return; +} + if (qatomic_read(&s->copy_mode) == change_opts->copy_mode) { return; } diff --git a/qapi/block-core.json b/qapi/block-core.json index 0156762024..f370593037 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -3072,11 +3072,12 @@ # # @copy-mode: Switch to this copy mode. Currently, only the switch # from 'background' to 'write-blocking' is implemented. +# If absent, copy mode remains the same. (optional since 9.2) # # Since: 8.2 ## { 'struct': 'JobChangeOptionsMirror', - 'data': { 'copy-mode' : 'MirrorCopyMode' } } + 'data': { '*copy-mode' : 'MirrorCopyMode' } } ## # @JobChangeOptions: -- 2.34.1
[PATCH v3 1/7] qapi: rename BlockJobChangeOptions to JobChangeOptions
We are going to move change action from block-job to job implementation, and then move to job-* extenral APIs, deprecating block-job-* APIs. This commit simplifies further transition. The commit is made by command git grep -l BlockJobChangeOptions | \ xargs sed -i 's/BlockJobChangeOptions/JobChangeOptions/g' Signed-off-by: Vladimir Sementsov-Ogievskiy Acked-by: Markus Armbruster --- block/mirror.c | 4 ++-- blockdev.c | 2 +- blockjob.c | 2 +- include/block/blockjob.h | 2 +- include/block/blockjob_int.h | 2 +- qapi/block-core.json | 12 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/block/mirror.c b/block/mirror.c index 61f0a717b7..2816bb1042 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1258,11 +1258,11 @@ static bool commit_active_cancel(Job *job, bool force) return force || !job_is_ready(job); } -static void mirror_change(BlockJob *job, BlockJobChangeOptions *opts, +static void mirror_change(BlockJob *job, JobChangeOptions *opts, Error **errp) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); -BlockJobChangeOptionsMirror *change_opts = &opts->u.mirror; +JobChangeOptionsMirror *change_opts = &opts->u.mirror; MirrorCopyMode current; /* diff --git a/blockdev.c b/blockdev.c index 6740663fda..626f53102d 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3251,7 +3251,7 @@ void qmp_block_job_dismiss(const char *id, Error **errp) job_dismiss_locked(&job, errp); } -void qmp_block_job_change(BlockJobChangeOptions *opts, Error **errp) +void qmp_block_job_change(JobChangeOptions *opts, Error **errp) { BlockJob *job; diff --git a/blockjob.c b/blockjob.c index d5f29e14af..8cfbb15543 100644 --- a/blockjob.c +++ b/blockjob.c @@ -312,7 +312,7 @@ static bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) return block_job_set_speed_locked(job, speed, errp); } -void block_job_change_locked(BlockJob *job, BlockJobChangeOptions *opts, +void block_job_change_locked(BlockJob *job, JobChangeOptions *opts, Error **errp) { const BlockJobDriver *drv = block_job_driver(job); diff --git a/include/block/blockjob.h b/include/block/blockjob.h index 7061ab7201..5dd1b08909 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -181,7 +181,7 @@ bool block_job_set_speed_locked(BlockJob *job, int64_t speed, Error **errp); * * Change the job according to opts. */ -void block_job_change_locked(BlockJob *job, BlockJobChangeOptions *opts, +void block_job_change_locked(BlockJob *job, JobChangeOptions *opts, Error **errp); /** diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h index 4c3d2e25a2..d9c3b911d0 100644 --- a/include/block/blockjob_int.h +++ b/include/block/blockjob_int.h @@ -73,7 +73,7 @@ struct BlockJobDriver { * * Note that this can already be called before the job coroutine is running. */ -void (*change)(BlockJob *job, BlockJobChangeOptions *opts, Error **errp); +void (*change)(BlockJob *job, JobChangeOptions *opts, Error **errp); /* * Query information specific to this kind of block job. diff --git a/qapi/block-core.json b/qapi/block-core.json index c3b0a2376b..0156762024 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -3068,18 +3068,18 @@ 'allow-preconfig': true } ## -# @BlockJobChangeOptionsMirror: +# @JobChangeOptionsMirror: # # @copy-mode: Switch to this copy mode. Currently, only the switch # from 'background' to 'write-blocking' is implemented. # # Since: 8.2 ## -{ 'struct': 'BlockJobChangeOptionsMirror', +{ 'struct': 'JobChangeOptionsMirror', 'data': { 'copy-mode' : 'MirrorCopyMode' } } ## -# @BlockJobChangeOptions: +# @JobChangeOptions: # # Block job options that can be changed after job creation. # @@ -3089,10 +3089,10 @@ # # Since: 8.2 ## -{ 'union': 'BlockJobChangeOptions', +{ 'union': 'JobChangeOptions', 'base': { 'id': 'str', 'type': 'JobType' }, 'discriminator': 'type', - 'data': { 'mirror': 'BlockJobChangeOptionsMirror' } } + 'data': { 'mirror': 'JobChangeOptionsMirror' } } ## # @block-job-change: @@ -3102,7 +3102,7 @@ # Since: 8.2 ## { 'command': 'block-job-change', - 'data': 'BlockJobChangeOptions', 'boxed': true } + 'data': 'JobChangeOptions', 'boxed': true } ## # @BlockdevDiscardOptions: -- 2.34.1
[PATCH v3 0/7] introduce job-change qmp command
Hi all! Here is new job-change command - a replacement for (becoming deprecated) block-job-change, as a first step of my "[RFC 00/15] block job API" v3: 01: add a-b by Markus 03: add a-b by Markus, s/9.1/9.2/ in QAPI 05: update commit message, s/9.1/9.2/ in QAPI 06: update commit message (and subject!), s/9.1/9.2/ in deprecated.rst Vladimir Sementsov-Ogievskiy (7): qapi: rename BlockJobChangeOptions to JobChangeOptions blockjob: block_job_change_locked(): check job type qapi: block-job-change: make copy-mode parameter optional blockjob: move change action implementation to job from block-job qapi: add job-change qapi/block-core: deprecate block-job-change iotests/mirror-change-copy-mode: switch to job-change command block/mirror.c| 13 +--- blockdev.c| 4 +-- blockjob.c| 20 docs/about/deprecated.rst | 5 +++ include/block/blockjob.h | 11 --- include/block/blockjob_int.h | 7 - include/qemu/job.h| 12 +++ job-qmp.c | 15 + job.c | 23 ++ qapi/block-core.json | 31 ++- .../tests/mirror-change-copy-mode | 2 +- 11 files changed, 90 insertions(+), 53 deletions(-) -- 2.34.1
Re: [PATCH 2/2] nbd/server: Allow users to adjust handshake limit in QMP
On 02.10.24 16:49, Markus Armbruster wrote: Eric Blake writes: Although defaulting the handshake limit to 10 seconds was a nice QoI change to weed out intentionally slow clients, it can interfere with integration testing done with manual NBD_OPT commands over 'nbdsh --opt-mode'. Expose a QMP knob 'handshake-max-secs' to allow the user to alter the timeout away from the default. The parameter name here intentionally matches the spelling of the constant added in commit fb1c2aaa98, and not the command-line spelling added in the previous patch for qemu-nbd; that's because in QMP, longer names serve as good self-documentation, and unlike the command line, machines don't have problems generating longer spellings. Signed-off-by: Eric Blake --- qapi/block-export.json | 10 ++ include/block/nbd.h| 6 +++--- block/monitor/block-hmp-cmds.c | 4 ++-- blockdev-nbd.c | 26 ++ 4 files changed, 33 insertions(+), 13 deletions(-) diff --git a/qapi/block-export.json b/qapi/block-export.json index ce33fe378df..c110dd375ad 100644 --- a/qapi/block-export.json +++ b/qapi/block-export.json @@ -17,6 +17,10 @@ # # @addr: Address on which to listen. # +# @handshake-max-secs: Time limit, in seconds, at which a client that +# has not completed the negotiation handshake will be disconnected, +# or 0 for no limit (since 9.2; default: 10). +# # @tls-creds: ID of the TLS credentials object (since 2.6). # # @tls-authz: ID of the QAuthZ authorization object used to validate @@ -34,6 +38,7 @@ ## { 'struct': 'NbdServerOptions', 'data': { 'addr': 'SocketAddress', +'*handshake-max-secs': 'uint32', '*tls-creds': 'str', '*tls-authz': 'str', '*max-connections': 'uint32' } } @@ -52,6 +57,10 @@ # # @addr: Address on which to listen. # +# @handshake-max-secs: Time limit, in seconds, at which a client that +# has not completed the negotiation handshake will be disconnected, +# or 0 for no limit (since 9.2; default: 10). +# # @tls-creds: ID of the TLS credentials object (since 2.6). # # @tls-authz: ID of the QAuthZ authorization object used to validate @@ -72,6 +81,7 @@ ## { 'command': 'nbd-server-start', 'data': { 'addr': 'SocketAddressLegacy', +'*handshake-max-secs': 'uint32', '*tls-creds': 'str', '*tls-authz': 'str', '*max-connections': 'uint32' }, Are we confident we'll never need less than a full second? Hmm, recent "[PATCH v2] chardev: introduce 'reconnect-ms' and deprecate 'reconnect'" shows that at least sometimes second is not enough precision. Maybe, using milliseconds consistently for all relatively short time intervals in QAPI would be a good rule? -- Best regards, Vladimir
Re: [PATCH 2/2] nbd/server: Allow users to adjust handshake limit in QMP
On 09.08.24 19:14, Eric Blake wrote: Although defaulting the handshake limit to 10 seconds was a nice QoI change to weed out intentionally slow clients, it can interfere with integration testing done with manual NBD_OPT commands over 'nbdsh --opt-mode'. Expose a QMP knob 'handshake-max-secs' to allow the user to alter the timeout away from the default. The parameter name here intentionally matches the spelling of the constant added in commit fb1c2aaa98, and not the command-line spelling added in the previous patch for qemu-nbd; that's because in QMP, longer names serve as good self-documentation, and unlike the command line, machines don't have problems generating longer spellings. Signed-off-by: Eric Blake Reviewed-by: Vladimir Sementsov-Ogievskiy --- qapi/block-export.json | 10 ++ include/block/nbd.h| 6 +++--- block/monitor/block-hmp-cmds.c | 4 ++-- blockdev-nbd.c | 26 ++ 4 files changed, 33 insertions(+), 13 deletions(-) diff --git a/qapi/block-export.json b/qapi/block-export.json index ce33fe378df..c110dd375ad 100644 --- a/qapi/block-export.json +++ b/qapi/block-export.json @@ -17,6 +17,10 @@ # # @addr: Address on which to listen. # +# @handshake-max-secs: Time limit, in seconds, at which a client that +# has not completed the negotiation handshake will be disconnected, +# or 0 for no limit (since 9.2; default: 10). +# # @tls-creds: ID of the TLS credentials object (since 2.6). # # @tls-authz: ID of the QAuthZ authorization object used to validate @@ -34,6 +38,7 @@ ## { 'struct': 'NbdServerOptions', 'data': { 'addr': 'SocketAddress', +'*handshake-max-secs': 'uint32', '*tls-creds': 'str', '*tls-authz': 'str', '*max-connections': 'uint32' } } @@ -52,6 +57,10 @@ # # @addr: Address on which to listen. # +# @handshake-max-secs: Time limit, in seconds, at which a client that +# has not completed the negotiation handshake will be disconnected, +# or 0 for no limit (since 9.2; default: 10). +# # @tls-creds: ID of the TLS credentials object (since 2.6). # # @tls-authz: ID of the QAuthZ authorization object used to validate @@ -72,6 +81,7 @@ ## { 'command': 'nbd-server-start', 'data': { 'addr': 'SocketAddressLegacy', +'*handshake-max-secs': 'uint32', '*tls-creds': 'str', '*tls-authz': 'str', '*max-connections': 'uint32' }, Hmm, can we make common base for these two structures, to avoid adding things always in two places? At some point would be good to somehow deprecate old syntax and finally remove it. SocketAddressLegacy is marked as deprecated in comment in qapi/sockets.json, but no word in deprecated.rst, and I'm unsure how is it possible to deprecate this.. May be, the only way is introducing new command, and deprecate nbd-server-start altogether. Aha, that should happen when add a possibility to start multiple nbd servers. -- Best regards, Vladimir
Re: [PATCH 1/2] qemu-nbd: Allow users to adjust handshake limit
On 09.08.24 19:14, Eric Blake wrote: Although defaulting the handshake limit to 10 seconds was a nice QoI change to weed out intentionally slow clients, it can interfere with integration testing done with manual NBD_OPT commands over 'nbdsh --opt-mode'. Expose a command line option to allow the user to alter the timeout away from the default. This option is unlikely to be used in enough scenarios to warrant a short option letter. The option --handshake-limit intentionally differs from the name of the constant added in commit fb1c2aaa98 (limit instead of max_secs) and the QMP name to be added in the next commit; this is because typing a longer command-line name is undesirable and there is sufficient --help text to document the units. Signed-off-by: Eric Blake Reviewed-by: Vladimir Sementsov-Ogievskiy -- Best regards, Vladimir
Re: [PATCH v3 5/5] block: add test non-active commit with zeroed data
On 01.09.24 17:24, Vincent Vanlaer wrote: Signed-off-by: Vincent Vanlaer --- tests/qemu-iotests/315 | 95 ++ tests/qemu-iotests/315.out | 54 ++ Please place new tests in tests/qemu-iotests/tests, with human readable name, something like commit-zeroes or what you want. 2 files changed, 149 insertions(+) create mode 100755 tests/qemu-iotests/315 create mode 100644 tests/qemu-iotests/315.out diff --git a/tests/qemu-iotests/315 b/tests/qemu-iotests/315 new file mode 100755 index 00..84865f8001 --- /dev/null +++ b/tests/qemu-iotests/315 @@ -0,0 +1,95 @@ +#!/usr/bin/env bash +# group: rw quick +# +# Test for commit of discarded blocks +# +# This tests committing a live snapshot where some of the blocks that +# are present in the base image are discarded in the intermediate image. +# This intends to check that these blocks are also discarded in the base +# image after the commit. +# +# Copyright (C) 2024 Vincent Vanlaer. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# creator +owner=libvirt-e6954...@volkihar.be + +seq=`basename $0` +echo "QA output created by $seq" + +status=1 # failure is the default! + +_cleanup() +{ +_cleanup_qemu +_rm_test_img "${TEST_IMG}.base" +_rm_test_img "${TEST_IMG}.mid" +_cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks Example of bash test in tests is tests/qemu-iotests/tests/qemu-img-bitmaps, so you'll need "cd .." here, before ". ./common.rc" I know, this all looks not optimal, but still, human-readable names are much better than numbers. With that: Tested-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Vladimir Sementsov-Ogievskiy +. ./common.rc +. ./common.filter +. ./common.qemu + -- Best regards, Vladimir
Re: [PATCH v3 4/5] block: allow commit to unmap zero blocks
On 01.09.24 17:24, Vincent Vanlaer wrote: Non-active block commits do not discard blocks only containing zeros, causing images to lose sparseness after the commit. This commit fixes that by writing zero blocks using blk_co_pwrite_zeroes rather than writing them out as any other arbitrary data. Signed-off-by: Vincent Vanlaer Reviewed-by: Vladimir Sementsov-Ogievskiy -- Best regards, Vladimir
Re: [PULL 3/5] block/reqlist: allow adding overlapping requests
On 01.10.24 19:28, Michael Tokarev wrote: 30.09.2024 11:43, Vladimir Sementsov-Ogievskiy wrote: From: Fiona Ebner Allow overlapping request by removing the assert that made it impossible. There are only two callers: 1. block_copy_task_create() It already asserts the very same condition before calling reqlist_init_req(). 2. cbw_snapshot_read_lock() There is no need to have read requests be non-overlapping in copy-before-write when used for snapshot-access. In fact, there was no protection against two callers of cbw_snapshot_read_lock() calling reqlist_init_req() with overlapping ranges and this could lead to an assertion failure [1]. In particular, with the reproducer script below [0], two cbw_co_snapshot_block_status() callers could race, with the second calling reqlist_init_req() before the first one finishes and removes its conflicting request. Hm. This one applies to 7.2 too (current oldest stable series), with the description above matching what the code is doing. I picked it up for up to 7.2. Please let me know if this shouldn't be done :) I don't see any problems) Still, that's not a guarantee that we don't have them. At least, we definitely lack a test for this case. -- Best regards, Vladimir
Re: [PATCH v3 21/22] qom/object: fix -Werror=maybe-uninitialized
On 01.10.24 18:22, Marc-André Lureau wrote: Hi Vladimir On Tue, Oct 1, 2024 at 6:06 PM Vladimir Sementsov-Ogievskiy mailto:vsement...@yandex-team.ru>> wrote: On 30.09.24 11:14, marcandre.lur...@redhat.com <mailto:marcandre.lur...@redhat.com> wrote: > From: Marc-André Lureau mailto:marcandre.lur...@redhat.com>> > > object_resolve_path_type() didn't always set *ambiguousp. > > Signed-off-by: Marc-André Lureau mailto:marcandre.lur...@redhat.com>> > --- > qom/object.c | 5 - > 1 file changed, 4 insertions(+), 1 deletion(-) > > diff --git a/qom/object.c b/qom/object.c > index 28c5b66eab..bdc8a2c666 100644 > --- a/qom/object.c > +++ b/qom/object.c > @@ -2201,6 +2201,9 @@ Object *object_resolve_path_type(const char *path, const char *typename, > } > } else { > obj = object_resolve_abs_path(object_get_root(), parts + 1, typename); > + if (ambiguousp) { > + *ambiguousp = false; > + } Doesn't this hunk in isolation fix the issue? With this object_resolve_path_type() should set the pointer on all paths if it is non-null.. Hmm, called object_resolve_partial_path() also doesn't set ambiguous on every path, so this hunk is at lease incomplete. yeah, but object_resolve_path_type() initializes it. I'm unsure about what semantics expected around ambigous pointers, but it seems to me that it is set only on failure paths, as a reason, why we failed. If this is true, I think, we need only the second hunk, which initializes local "ambig". right, and that seems good enough. Do you ack/rb this change then? qom/object: fix -Werror=maybe-uninitialized object_resolve_path_type() didn't always set *ambiguousp. Signed-off-by: Marc-André Lureau mailto:marcandre.lur...@redhat.com>> diff --git a/qom/object.c b/qom/object.c index 28c5b66eab..d3d3003541 100644 --- a/qom/object.c +++ b/qom/object.c @@ -2226,7 +2226,7 @@ Object *object_resolve_path_at(Object *parent, const char *path) Object *object_resolve_type_unambiguous(const char *typename, Error **errp) { - bool ambig; + bool ambig = false; Object *o = object_resolve_path_type("", typename, &ambig); if (ambig) { Yes, I think this one in isolation is OK: Reviewed-by: Vladimir Sementsov-Ogievskiy thanks! > } > > g_strfreev(parts); > @@ -2226,7 +2229,7 @@ Object *object_resolve_path_at(Object *parent, const char *path) > > Object *object_resolve_type_unambiguous(const char *typename, Error **errp) > { > - bool ambig; > + bool ambig = false; > Object *o = object_resolve_path_type("", typename, &ambig); > > if (ambig) { -- Best regards, Vladimir -- Marc-André Lureau -- Best regards, Vladimir
Re: [PATCH v3 21/22] qom/object: fix -Werror=maybe-uninitialized
On 30.09.24 11:14, marcandre.lur...@redhat.com wrote: From: Marc-André Lureau object_resolve_path_type() didn't always set *ambiguousp. Signed-off-by: Marc-André Lureau --- qom/object.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/qom/object.c b/qom/object.c index 28c5b66eab..bdc8a2c666 100644 --- a/qom/object.c +++ b/qom/object.c @@ -2201,6 +2201,9 @@ Object *object_resolve_path_type(const char *path, const char *typename, } } else { obj = object_resolve_abs_path(object_get_root(), parts + 1, typename); +if (ambiguousp) { +*ambiguousp = false; +} Doesn't this hunk in isolation fix the issue? With this object_resolve_path_type() should set the pointer on all paths if it is non-null.. Hmm, called object_resolve_partial_path() also doesn't set ambiguous on every path, so this hunk is at lease incomplete. I'm unsure about what semantics expected around ambigous pointers, but it seems to me that it is set only on failure paths, as a reason, why we failed. If this is true, I think, we need only the second hunk, which initializes local "ambig". } g_strfreev(parts); @@ -2226,7 +2229,7 @@ Object *object_resolve_path_at(Object *parent, const char *path) Object *object_resolve_type_unambiguous(const char *typename, Error **errp) { -bool ambig; +bool ambig = false; Object *o = object_resolve_path_type("", typename, &ambig); if (ambig) { -- Best regards, Vladimir
Re: [PATCH v3 20/22] block: fix -Werror=maybe-uninitialized false-positive
On 30.09.24 11:14, marcandre.lur...@redhat.com wrote: From: Marc-André Lureau ../block/file-posix.c:1405:17: error: ‘zoned’ may be used uninitialized [-Werror=maybe-uninitialized] 1405 | if (ret < 0 || zoned == BLK_Z_NONE) { Signed-off-by: Marc-André Lureau Reviewed-by: Vladimir Sementsov-Ogievskiy -- Best regards, Vladimir
Re: [Bug Report][RFC PATCH 1/1] block: fix failing assert on paused VM migration
On 30.09.24 17:07, Andrey Drobyshev wrote: On 9/30/24 12:25 PM, Vladimir Sementsov-Ogievskiy wrote: [add migration maintainers] On 24.09.24 15:56, Andrey Drobyshev wrote: [...] I doubt that this a correct way to go. As far as I understand, "inactive" actually means that "storage is not belong to qemu, but to someone else (another qemu process for example), and may be changed transparently". In turn this means that Qemu should do nothing with inactive disks. So the problem is that nobody called bdrv_activate_all on target, and we shouldn't ignore that. Hmm, I see in process_incoming_migration_bh() we do call bdrv_activate_all(), but only in some scenarios. May be, the condition should be less strict here. Why we need any condition here at all? Don't we want to activate block-layer on target after migration anyway? Hmm I'm not sure about the unconditional activation, since we at least have to honor LATE_BLOCK_ACTIVATE cap if it's set (and probably delay it in such a case). In current libvirt upstream I see such code: /* Migration capabilities which should always be enabled as long as they * are supported by QEMU. If the capability is supposed to be enabled on both * sides of migration, it won't be enabled unless both sides support it. */ static const qemuMigrationParamsAlwaysOnItem qemuMigrationParamsAlwaysOn[] = { {QEMU_MIGRATION_CAP_PAUSE_BEFORE_SWITCHOVER, QEMU_MIGRATION_SOURCE}, {QEMU_MIGRATION_CAP_LATE_BLOCK_ACTIVATE, QEMU_MIGRATION_DESTINATION}, }; which means that libvirt always wants LATE_BLOCK_ACTIVATE to be set. The code from process_incoming_migration_bh() you're referring to: /* If capability late_block_activate is set: * Only fire up the block code now if we're going to restart the * VM, else 'cont' will do it. * This causes file locking to happen; so we don't want it to happen * unless we really are starting the VM. */ if (!migrate_late_block_activate() || (autostart && (!global_state_received() || runstate_is_live(global_state_get_runstate() { /* Make sure all file formats throw away their mutable metadata. * If we get an error here, just don't restart the VM yet. */ bdrv_activate_all(&local_err); if (local_err) { error_report_err(local_err); local_err = NULL; autostart = false; } } It states explicitly that we're either going to start VM right at this point if (autostart == true), or we wait till "cont" command happens. None of this is going to happen if we start another migration while still being in PAUSED state. So I think it seems reasonable to take such case into account. For instance, this patch does prevent the crash: diff --git a/migration/migration.c b/migration/migration.c index ae2be31557..3222f6745b 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -733,7 +733,8 @@ static void process_incoming_migration_bh(void *opaque) */ if (!migrate_late_block_activate() || (autostart && (!global_state_received() || -runstate_is_live(global_state_get_runstate() { +runstate_is_live(global_state_get_runstate( || + (!autostart && global_state_get_runstate() == RUN_STATE_PAUSED)) { /* Make sure all file formats throw away their mutable metadata. * If we get an error here, just don't restart the VM yet. */ bdrv_activate_all(&local_err); What are your thoughts on it? Hmmm... Don't we violate "late-block-activate" contract by this? Me go and check: # @late-block-activate: If enabled, the destination will not activate # block devices (and thus take locks) immediately at the end of # migration. (since 3.0) Yes, we'll violate it by this patch. So, for now the only exception is when autostart is enabled, but libvirt correctly use late-block-activate + !autostart. Interesting, when block layer is assumed to be activated.. Aha, only in qmp_cont(). So, what to do with this all: Either libvirt should not use late-block-activate for migration of stopped vm. This way target would be automatically activated Or if libvirt still need postponed activation (I assume, for correctly switching shared disks, etc), Libvirt should do some additional QMP call. It can't be "cont", if we don't want to run the VM. So, probably, we need additional "block-activate" QMP command for this. And, anyway, trying to migrate inactive block layer should fail with some good error message rather than crash. -- Best regards, Vladimir
Re: [PATCH v3 16/22] target/loongarch: fix -Werror=maybe-uninitialized false-positive
On 30.09.24 11:14, marcandre.lur...@redhat.com wrote: From: Marc-André Lureau ../target/loongarch/gdbstub.c:55:20: error: ‘val’ may be used uninitialized [-Werror=maybe-uninitialized] 55 | return gdb_get_reg32(mem_buf, val); |^~~ ../target/loongarch/gdbstub.c:39:18: note: ‘val’ was declared here 39 | uint64_t val; Signed-off-by: Marc-André Lureau Reviewed-by: Vladimir Sementsov-Ogievskiy -- Best regards, Vladimir
Re: [PATCH v3 3/5] block: refactor error handling of commit_iteration
On 01.09.24 17:24, Vincent Vanlaer wrote: Signed-off-by: Vincent Vanlaer --- block/commit.c | 37 ++--- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/block/commit.c b/block/commit.c index 9eedd1fa47..288e413be3 100644 --- a/block/commit.c +++ b/block/commit.c @@ -130,7 +130,6 @@ static void commit_clean(Job *job) static int commit_iteration(CommitBlockJob *s, int64_t offset, int64_t *n, void *buf) { int ret = 0; -bool copy; bool error_in_source = true; /* Copy if allocated above the base */ @@ -140,19 +139,34 @@ static int commit_iteration(CommitBlockJob *s, int64_t offset, int64_t *n, void n, NULL, NULL, NULL); } -copy = (ret >= 0 && ret & BDRV_BLOCK_ALLOCATED); +if (ret < 0) { +goto handle_error; +} + trace_commit_one_iteration(s, offset, *n, ret); this way we loose trace point for error case. Let's move trace_... above "if (ret.." -if (copy) { + +if (ret & BDRV_BLOCK_ALLOCATED) { assert(*n < SIZE_MAX); ret = blk_co_pread(s->top, offset, *n, buf, 0); -if (ret >= 0) { -ret = blk_co_pwrite(s->base, offset, *n, buf, 0); -if (ret < 0) { -error_in_source = false; -} +if (ret < 0) { +goto handle_error; } + +ret = blk_co_pwrite(s->base, offset, *n, buf, 0); +if (ret < 0) { +error_in_source = false; +goto handle_error; +} + +block_job_ratelimit_processed_bytes(&s->common, *n); } + +/* Publish progress */ + +job_progress_update(&s->common.job, *n); + it looks a bit strange to fallthrough to "handle_error" from success path I suggest: @@ -166,17 +166,17 @@ static int commit_iteration(CommitBlockJob *s, int64_t offset, int64_t *n, void job_progress_update(&s->common.job, *n); -handle_error: -if (ret < 0) { -BlockErrorAction action = block_job_error_action(&s->common, s->on_error, - error_in_source, -ret); -if (action == BLOCK_ERROR_ACTION_REPORT) { -return ret; -} else { -*n = 0; -} +return 0; + +fail: +BlockErrorAction action = block_job_error_action(&s->common, s->on_error, + error_in_source, -ret); +if (action == BLOCK_ERROR_ACTION_REPORT) { +return ret; } +/* Retry iteration */ +*n = 0; return 0; } (also, "fail" name is more popular for such labels: $ git grep 'goto fail' | wc -l 1334 ) +handle_error: if (ret < 0) { BlockErrorAction action = block_job_error_action(&s->common, s->on_error, error_in_source, -ret); @@ -160,15 +174,8 @@ static int commit_iteration(CommitBlockJob *s, int64_t offset, int64_t *n, void return ret; } else { *n = 0; -return 0; } } -/* Publish progress */ -job_progress_update(&s->common.job, *n); - -if (copy) { -block_job_ratelimit_processed_bytes(&s->common, *n); -} return 0; } -- Best regards, Vladimir
Re: [PATCH v3 2/5] block: move commit_run loop to separate function
On 01.09.24 17:24, Vincent Vanlaer wrote: +static int commit_iteration(CommitBlockJob *s, int64_t offset, int64_t *n, void *buf) { Hmm over-80 line. Didn't you forget run ./checkpatch.pl ? -- Best regards, Vladimir
Re: [PATCH v3 2/5] block: move commit_run loop to separate function
On 01.09.24 17:24, Vincent Vanlaer wrote: Signed-off-by: Vincent Vanlaer --- block/commit.c | 85 -- 1 file changed, 48 insertions(+), 37 deletions(-) diff --git a/block/commit.c b/block/commit.c index 8dee25b313..9eedd1fa47 100644 --- a/block/commit.c +++ b/block/commit.c @@ -128,6 +128,51 @@ static void commit_clean(Job *job) blk_unref(s->top); } +static int commit_iteration(CommitBlockJob *s, int64_t offset, int64_t *n, void *buf) { +int ret = 0; +bool copy; +bool error_in_source = true; + +/* Copy if allocated above the base */ +WITH_GRAPH_RDLOCK_GUARD() { +ret = bdrv_co_common_block_status_above(blk_bs(s->top), +s->base_overlay, true, true, offset, COMMIT_BUFFER_SIZE, +n, NULL, NULL, NULL); +} + +copy = (ret >= 0 && ret & BDRV_BLOCK_ALLOCATED); +trace_commit_one_iteration(s, offset, *n, ret); +if (copy) { +assert(*n < SIZE_MAX); Probably a matter of taste, but I'd prefer working with local variable instead of dereferencing the pointer on every line. Like this: commit_iteration(..., int64_t bytes, int64_t *done, ...) { ... work with bytes variable ... out: *done = bytes; return 0; } anyway: Reviewed-by: Vladimir Sementsov-Ogievskiy + +ret = blk_co_pread(s->top, offset, *n, buf, 0); +if (ret >= 0) { +ret = blk_co_pwrite(s->base, offset, *n, buf, 0); +if (ret < 0) { +error_in_source = false; +} +} +} +if (ret < 0) { +BlockErrorAction action = block_job_error_action(&s->common, s->on_error, + error_in_source, -ret); +if (action == BLOCK_ERROR_ACTION_REPORT) { +return ret; +} else { +*n = 0; +return 0; +} +} +/* Publish progress */ +job_progress_update(&s->common.job, *n); + +if (copy) { +block_job_ratelimit_processed_bytes(&s->common, *n); +} + +return 0; +} + static int coroutine_fn commit_run(Job *job, Error **errp) { CommitBlockJob *s = container_of(job, CommitBlockJob, common.job); @@ -158,9 +203,6 @@ static int coroutine_fn commit_run(Job *job, Error **errp) buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE); for (offset = 0; offset < len; offset += n) { -bool copy; -bool error_in_source = true; - /* Note that even when no rate limit is applied we need to yield * with no pending I/O here so that bdrv_drain_all() returns. */ @@ -168,42 +210,11 @@ static int coroutine_fn commit_run(Job *job, Error **errp) if (job_is_cancelled(&s->common.job)) { break; } -/* Copy if allocated above the base */ -WITH_GRAPH_RDLOCK_GUARD() { -ret = bdrv_co_common_block_status_above(blk_bs(s->top), -s->base_overlay, true, true, offset, COMMIT_BUFFER_SIZE, -&n, NULL, NULL, NULL); -} -copy = (ret >= 0 && ret & BDRV_BLOCK_ALLOCATED); -trace_commit_one_iteration(s, offset, n, ret); -if (copy) { -assert(n < SIZE_MAX); - -ret = blk_co_pread(s->top, offset, n, buf, 0); -if (ret >= 0) { -ret = blk_co_pwrite(s->base, offset, n, buf, 0); -if (ret < 0) { -error_in_source = false; -} -} -} -if (ret < 0) { -BlockErrorAction action = -block_job_error_action(&s->common, s->on_error, - error_in_source, -ret); -if (action == BLOCK_ERROR_ACTION_REPORT) { -return ret; -} else { -n = 0; -continue; -} -} -/* Publish progress */ -job_progress_update(&s->common.job, n); +ret = commit_iteration(s, offset, &n, buf); -if (copy) { -block_job_ratelimit_processed_bytes(&s->common, n); +if (ret < 0) { +return ret; } } -- Best regards, Vladimir
Re: [PATCH v2 03/19] qapi/block-core: Drop temporary 'prefix'
On 30.09.24 16:23, Vladimir Sementsov-Ogievskiy wrote: On 04.09.24 14:18, Markus Armbruster wrote: Recent commit "qapi: Smarter camel_to_upper() to reduce need for 'prefix'" added a temporary 'prefix' to delay changing the generated code. Revert it. This improves XDbgBlockGraphNodeType's generated enumeration constant prefix from X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND to XDBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND. Signed-off-by: Markus Armbruster Reviewed-by: Vladimir Sementsov-Ogievskiy Oops sorry, I see now that was merged already. -- Best regards, Vladimir
Re: [PATCH v2 03/19] qapi/block-core: Drop temporary 'prefix'
On 04.09.24 14:18, Markus Armbruster wrote: Recent commit "qapi: Smarter camel_to_upper() to reduce need for 'prefix'" added a temporary 'prefix' to delay changing the generated code. Revert it. This improves XDbgBlockGraphNodeType's generated enumeration constant prefix from X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND to XDBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND. Signed-off-by: Markus Armbruster Reviewed-by: Vladimir Sementsov-Ogievskiy -- Best regards, Vladimir
Re: [Bug Report][RFC PATCH 1/1] block: fix failing assert on paused VM migration
[add migration maintainers] On 24.09.24 15:56, Andrey Drobyshev wrote: Instead of throwing an assert let's just ignore that flag is already set and return. We assume that it's going to be safe to ignore. Otherwise this assert fails when migrating a paused VM back and forth. Ideally we'd like to have a more sophisticated solution, e.g. not even scan the nodes which should be inactive at this point. Signed-off-by: Andrey Drobyshev --- block.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index 7d90007cae..c1dcf906d1 100644 --- a/block.c +++ b/block.c @@ -6973,7 +6973,15 @@ static int GRAPH_RDLOCK bdrv_inactivate_recurse(BlockDriverState *bs) return 0; } -assert(!(bs->open_flags & BDRV_O_INACTIVE)); +if (bs->open_flags & BDRV_O_INACTIVE) { +/* + * Return here instead of throwing assert as a workaround to + * prevent failure on migrating paused VM. + * Here we assume that if we're trying to inactivate BDS that's + * already inactive, it's safe to just ignore it. + */ +return 0; +} /* Inactivate this node */ if (bs->drv->bdrv_inactivate) { I doubt that this a correct way to go. As far as I understand, "inactive" actually means that "storage is not belong to qemu, but to someone else (another qemu process for example), and may be changed transparently". In turn this means that Qemu should do nothing with inactive disks. So the problem is that nobody called bdrv_activate_all on target, and we shouldn't ignore that. Hmm, I see in process_incoming_migration_bh() we do call bdrv_activate_all(), but only in some scenarios. May be, the condition should be less strict here. Why we need any condition here at all? Don't we want to activate block-layer on target after migration anyway? -- Best regards, Vladimir
Re: [PATCH v3 17/22] tests: fix -Werror=maybe-uninitialized false-positive
On 30.09.24 11:14, marcandre.lur...@redhat.com wrote: From: Marc-André Lureau ../tests/unit/test-block-iothread.c:773:17: error: ‘job’ may be used uninitialized [-Werror=maybe-uninitialized] /usr/include/glib-2.0/glib/gtestutils.h:73:53: error: ‘ret’ may be used uninitialized [-Werror=maybe-uninitialized] Signed-off-by: Marc-André Lureau Reviewed-by: Vladimir Sementsov-Ogievskiy -- Best regards, Vladimir
Re: [PATCH v3 11/22] block/block-copy: fix -Werror=maybe-uninitialized false-positive
On 30.09.24 11:14, marcandre.lur...@redhat.com wrote: From: Marc-André Lureau ../block/block-copy.c:591:12: error: ‘ret’ may be used uninitialized [-Werror=maybe-uninitialized] Signed-off-by: Marc-André Lureau Reviewed-by: Vladimir Sementsov-Ogievskiy -- Best regards, Vladimir
Re: [PATCH v3 06/22] block/mirror: fix -Werror=maybe-uninitialized false-positive
On 30.09.24 11:14, marcandre.lur...@redhat.com wrote: From: Marc-André Lureau ../block/mirror.c:404:5: error: ‘ret’ may be used uninitialized [-Werror=maybe-uninitialized] ../block/mirror.c:895:12: error: ‘ret’ may be used uninitialized [-Werror=maybe-uninitialized] ../block/mirror.c:578:12: error: ‘ret’ may be used uninitialized [-Werror=maybe-uninitialized] Change a variable to int, as suggested by Manos: "bdrv_co_preadv() which is int and is passed as an int argument to mirror_read_complete()" Signed-off-by: Marc-André Lureau Reviewed-by: Vladimir Sementsov-Ogievskiy -- Best regards, Vladimir
[PULL 1/5] copy-before-write: allow specifying minimum cluster size
From: Fiona Ebner In the context of backup fleecing, discarding the source will not work when the fleecing image has a larger granularity than the one used for block-copy operations (can happen if the backup target has smaller cluster size), because cbw_co_pdiscard_snapshot() will align down the discard requests and thus effectively ignore then. To make @discard-source work in such a scenario, allow specifying the minimum cluster size used for block-copy operations and thus in particular also the granularity for discard requests to the source. The type 'size' (corresponding to uint64_t in C) is used in QAPI to rule out negative inputs and for consistency with already existing @cluster-size parameters. Since block_copy_calculate_cluster_size() uses int64_t for its result, a check that the input is not too large is added in block_copy_state_new() before calling it. The calculation in block_copy_calculate_cluster_size() is done in the target int64_t type. Suggested-by: Vladimir Sementsov-Ogievskiy Acked-by: Markus Armbruster (QAPI schema) Signed-off-by: Fiona Ebner Message-Id: <20240711120915.310243-2-f.eb...@proxmox.com> [vsementsov: switch version to 9.2 in QAPI doc] Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/block-copy.c | 36 ++-- block/copy-before-write.c | 5 - include/block/block-copy.h | 1 + qapi/block-core.json | 8 +++- 4 files changed, 38 insertions(+), 12 deletions(-) diff --git a/block/block-copy.c b/block/block-copy.c index cc618e4561..93eb1b2664 100644 --- a/block/block-copy.c +++ b/block/block-copy.c @@ -310,6 +310,7 @@ void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range, } static int64_t block_copy_calculate_cluster_size(BlockDriverState *target, + int64_t min_cluster_size, Error **errp) { int ret; @@ -319,6 +320,9 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target, GLOBAL_STATE_CODE(); GRAPH_RDLOCK_GUARD_MAINLOOP(); +min_cluster_size = MAX(min_cluster_size, + (int64_t)BLOCK_COPY_CLUSTER_SIZE_DEFAULT); + target_does_cow = bdrv_backing_chain_next(target); /* @@ -329,13 +333,13 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target, ret = bdrv_get_info(target, &bdi); if (ret == -ENOTSUP && !target_does_cow) { /* Cluster size is not defined */ -warn_report("The target block device doesn't provide " -"information about the block size and it doesn't have a " -"backing file. The default block size of %u bytes is " -"used. If the actual block size of the target exceeds " -"this default, the backup may be unusable", -BLOCK_COPY_CLUSTER_SIZE_DEFAULT); -return BLOCK_COPY_CLUSTER_SIZE_DEFAULT; +warn_report("The target block device doesn't provide information about " +"the block size and it doesn't have a backing file. The " +"(default) block size of %" PRIi64 " bytes is used. If the " +"actual block size of the target exceeds this value, the " +"backup may be unusable", +min_cluster_size); +return min_cluster_size; } else if (ret < 0 && !target_does_cow) { error_setg_errno(errp, -ret, "Couldn't determine the cluster size of the target image, " @@ -345,16 +349,17 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target, return ret; } else if (ret < 0 && target_does_cow) { /* Not fatal; just trudge on ahead. */ -return BLOCK_COPY_CLUSTER_SIZE_DEFAULT; +return min_cluster_size; } -return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size); +return MAX(min_cluster_size, bdi.cluster_size); } BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, BlockDriverState *copy_bitmap_bs, const BdrvDirtyBitmap *bitmap, bool discard_source, + uint64_t min_cluster_size, Error **errp) { ERRP_GUARD(); @@ -365,7 +370,18 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target, GLOBAL_STATE_CODE(); -cluster_size = block_copy_calculate_cluster_size(target->bs, errp); +if (min_cluster_size > INT64_MAX) { +error_setg(errp, "min-cluster-size too large: %" PRIu64
[PULL 5/5] util/co-shared-resource: Remove unused co_try_get_from_shres
From: "Dr. David Alan Gilbert" co_try_get_from_shres hasn't been used since it was added in 55fa54a789 ("co-shared-resource: protect with a mutex") (Everyone uses the _locked version) Remove it. Signed-off-by: Dr. David Alan Gilbert Message-Id: <20240918124220.27871-1-d...@treblig.org> Signed-off-by: Vladimir Sementsov-Ogievskiy --- include/qemu/co-shared-resource.h | 7 --- util/qemu-co-shared-resource.c| 6 -- 2 files changed, 13 deletions(-) diff --git a/include/qemu/co-shared-resource.h b/include/qemu/co-shared-resource.h index 78ca5850f8..41be1a8131 100644 --- a/include/qemu/co-shared-resource.h +++ b/include/qemu/co-shared-resource.h @@ -44,13 +44,6 @@ SharedResource *shres_create(uint64_t total); */ void shres_destroy(SharedResource *s); -/* - * Try to allocate an amount of @n. Return true on success, and false - * if there is too little left of the collective resource to fulfill - * the request. - */ -bool co_try_get_from_shres(SharedResource *s, uint64_t n); - /* * Allocate an amount of @n, and, if necessary, yield until * that becomes possible. diff --git a/util/qemu-co-shared-resource.c b/util/qemu-co-shared-resource.c index a66cc07e75..752eb5a1c5 100644 --- a/util/qemu-co-shared-resource.c +++ b/util/qemu-co-shared-resource.c @@ -66,12 +66,6 @@ static bool co_try_get_from_shres_locked(SharedResource *s, uint64_t n) return false; } -bool co_try_get_from_shres(SharedResource *s, uint64_t n) -{ -QEMU_LOCK_GUARD(&s->lock); -return co_try_get_from_shres_locked(s, n); -} - void coroutine_fn co_get_from_shres(SharedResource *s, uint64_t n) { assert(n <= s->total); -- 2.34.1
[PULL 2/5] backup: add minimum cluster size to performance options
From: Fiona Ebner In the context of backup fleecing, discarding the source will not work when the fleecing image has a larger granularity than the one used for block-copy operations (can happen if the backup target has smaller cluster size), because cbw_co_pdiscard_snapshot() will align down the discard requests and thus effectively ignore then. To make @discard-source work in such a scenario, allow specifying the minimum cluster size used for block-copy operations and thus in particular also the granularity for discard requests to the source. Suggested-by: Vladimir Sementsov-Ogievskiy Acked-by: Markus Armbruster (QAPI schema) Signed-off-by: Fiona Ebner Message-Id: <20240711120915.310243-3-f.eb...@proxmox.com> [vsementsov: switch version to 9.2 in QAPI doc] Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/backup.c| 2 +- block/copy-before-write.c | 9 + block/copy-before-write.h | 1 + blockdev.c| 3 +++ qapi/block-core.json | 9 +++-- 5 files changed, 21 insertions(+), 3 deletions(-) diff --git a/block/backup.c b/block/backup.c index 3dd2e229d2..a1292c01ec 100644 --- a/block/backup.c +++ b/block/backup.c @@ -458,7 +458,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, } cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source, - &bcs, errp); + perf->min_cluster_size, &bcs, errp); if (!cbw) { goto error; } diff --git a/block/copy-before-write.c b/block/copy-before-write.c index a919b1f41b..e835987e52 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -548,6 +548,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source, BlockDriverState *target, const char *filter_node_name, bool discard_source, + uint64_t min_cluster_size, BlockCopyState **bcs, Error **errp) { @@ -567,6 +568,14 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source, qdict_put_str(opts, "file", bdrv_get_node_name(source)); qdict_put_str(opts, "target", bdrv_get_node_name(target)); +if (min_cluster_size > INT64_MAX) { +error_setg(errp, "min-cluster-size too large: %" PRIu64 " > %" PRIi64, + min_cluster_size, INT64_MAX); +qobject_unref(opts); +return NULL; +} +qdict_put_int(opts, "min-cluster-size", (int64_t)min_cluster_size); + top = bdrv_insert_node(source, opts, flags, errp); if (!top) { return NULL; diff --git a/block/copy-before-write.h b/block/copy-before-write.h index 01af0cd3c4..2a5d4ba693 100644 --- a/block/copy-before-write.h +++ b/block/copy-before-write.h @@ -40,6 +40,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source, BlockDriverState *target, const char *filter_node_name, bool discard_source, + uint64_t min_cluster_size, BlockCopyState **bcs, Error **errp); void bdrv_cbw_drop(BlockDriverState *bs); diff --git a/blockdev.c b/blockdev.c index 835064ed03..6740663fda 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2655,6 +2655,9 @@ static BlockJob *do_backup_common(BackupCommon *backup, if (backup->x_perf->has_max_chunk) { perf.max_chunk = backup->x_perf->max_chunk; } +if (backup->x_perf->has_min_cluster_size) { +perf.min_cluster_size = backup->x_perf->min_cluster_size; +} } if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) || diff --git a/qapi/block-core.json b/qapi/block-core.json index 6751022428..c3b0a2376b 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1551,11 +1551,16 @@ # it should not be less than job cluster size which is calculated # as maximum of target image cluster size and 64k. Default 0. # +# @min-cluster-size: Minimum size of blocks used by copy-before-write +# and background copy operations. Has to be a power of 2. No +# effect if smaller than the maximum of the target's cluster size +# and 64 KiB. Default 0. (Since 9.2) +# # Since: 6.0 ## { 'struct': 'BackupPerf', - 'data': { '*use-copy-range': 'bool', -'*max-workers': 'int', '*max-chunk': 'int64' } } + 'data': { '*use-copy-range': 'bool', '*max-workers': 'int', +'*max-chunk': 'int64', '*min-cluster-size': 'size' } } ## # @BackupCommon: -- 2.34.1
[PULL 4/5] block: Remove unused aio_task_pool_empty
From: "Dr. David Alan Gilbert" aio_task_pool_empty has been unused since it was added in 6e9b225f73 ("block: introduce aio task pool") Remove it. Signed-off-by: Dr. David Alan Gilbert Message-Id: <20240917002007.330689-1-d...@treblig.org> Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/aio_task.c | 5 - include/block/aio_task.h | 2 -- 2 files changed, 7 deletions(-) diff --git a/block/aio_task.c b/block/aio_task.c index 9bd17ea2c1..bb5c05f455 100644 --- a/block/aio_task.c +++ b/block/aio_task.c @@ -119,8 +119,3 @@ int aio_task_pool_status(AioTaskPool *pool) return pool->status; } - -bool aio_task_pool_empty(AioTaskPool *pool) -{ -return pool->busy_tasks == 0; -} diff --git a/include/block/aio_task.h b/include/block/aio_task.h index 18a9c41f4e..c81d637617 100644 --- a/include/block/aio_task.h +++ b/include/block/aio_task.h @@ -40,8 +40,6 @@ void aio_task_pool_free(AioTaskPool *); /* error code of failed task or 0 if all is OK */ int aio_task_pool_status(AioTaskPool *pool); -bool aio_task_pool_empty(AioTaskPool *pool); - /* User provides filled @task, however task->pool will be set automatically */ void coroutine_fn aio_task_pool_start_task(AioTaskPool *pool, AioTask *task); -- 2.34.1
[PULL 0/5] Block-jobs 2024-09-30
The following changes since commit 3b14a767eaca3df5534a162851f04787b363670e: Merge tag 'qemu-openbios-20240924' of https://github.com/mcayland/qemu into staging (2024-09-28 12:34:44 +0100) are available in the Git repository at: https://gitlab.com/vsementsov/qemu.git tags/pull-block-jobs-2024-09-30 for you to fetch changes up to b74987cd3bf9bd4bf452ed371d864cbd1dccb08e: util/co-shared-resource: Remove unused co_try_get_from_shres (2024-09-30 10:53:18 +0300) Block-jobs: improve backup fleecing Dr. David Alan Gilbert (2): block: Remove unused aio_task_pool_empty util/co-shared-resource: Remove unused co_try_get_from_shres Fiona Ebner (3): copy-before-write: allow specifying minimum cluster size backup: add minimum cluster size to performance options block/reqlist: allow adding overlapping requests block/aio_task.c | 5 - block/backup.c| 2 +- block/block-copy.c| 36 ++- block/copy-before-write.c | 17 +-- block/copy-before-write.h | 1 + block/reqlist.c | 2 -- blockdev.c| 3 +++ include/block/aio_task.h | 2 -- include/block/block-copy.h| 1 + include/qemu/co-shared-resource.h | 7 -- qapi/block-core.json | 17 --- util/qemu-co-shared-resource.c| 6 -- 12 files changed, 61 insertions(+), 38 deletions(-) -- 2.34.1
[PULL 3/5] block/reqlist: allow adding overlapping requests
From: Fiona Ebner Allow overlapping request by removing the assert that made it impossible. There are only two callers: 1. block_copy_task_create() It already asserts the very same condition before calling reqlist_init_req(). 2. cbw_snapshot_read_lock() There is no need to have read requests be non-overlapping in copy-before-write when used for snapshot-access. In fact, there was no protection against two callers of cbw_snapshot_read_lock() calling reqlist_init_req() with overlapping ranges and this could lead to an assertion failure [1]. In particular, with the reproducer script below [0], two cbw_co_snapshot_block_status() callers could race, with the second calling reqlist_init_req() before the first one finishes and removes its conflicting request. [0]: > #!/bin/bash -e > dd if=/dev/urandom of=/tmp/disk.raw bs=1M count=1024 > ./qemu-img create /tmp/fleecing.raw -f raw 1G > ( > ./qemu-system-x86_64 --qmp stdio \ > --blockdev raw,node-name=node0,file.driver=file,file.filename=/tmp/disk.raw \ > --blockdev > raw,node-name=node1,file.driver=file,file.filename=/tmp/fleecing.raw \ > < {"execute": "qmp_capabilities"} > {"execute": "blockdev-add", "arguments": { "driver": "copy-before-write", > "file": "node0", "target": "node1", "node-name": "node3" } } > {"execute": "blockdev-add", "arguments": { "driver": "snapshot-access", > "file": "node3", "node-name": "snap0" } } > {"execute": "nbd-server-start", "arguments": {"addr": { "type": "unix", > "data": { "path": "/tmp/nbd.socket" } } } } > {"execute": "block-export-add", "arguments": {"id": "exp0", "node-name": > "snap0", "type": "nbd", "name": "exp0"}} > EOF > ) & > sleep 5 > while true; do > ./qemu-nbd -d /dev/nbd0 > ./qemu-nbd -c /dev/nbd0 nbd:unix:/tmp/nbd.socket:exportname=exp0 -f raw -r > nbdinfo --map 'nbd+unix:///exp0?socket=/tmp/nbd.socket' > done [1]: > #5 0x71e5f0088eb2 in __GI___assert_fail (...) at ./assert/assert.c:101 > #6 0x615285438017 in reqlist_init_req (...) at ../block/reqlist.c:23 > #7 0x6152853e2d98 in cbw_snapshot_read_lock (...) at > ../block/copy-before-write.c:237 > #8 0x6152853e3068 in cbw_co_snapshot_block_status (...) at > ../block/copy-before-write.c:304 > #9 0x6152853f4d22 in bdrv_co_snapshot_block_status (...) at > ../block/io.c:3726 > #10 0x61528543a63e in snapshot_access_co_block_status (...) at > ../block/snapshot-access.c:48 > #11 0x6152853f1a0a in bdrv_co_do_block_status (...) at ../block/io.c:2474 > #12 0x6152853f2016 in bdrv_co_common_block_status_above (...) at > ../block/io.c:2652 > #13 0x6152853f22cf in bdrv_co_block_status_above (...) at > ../block/io.c:2732 > #14 0x00006152853d9a86 in blk_co_block_status_above (...) at > ../block/block-backend.c:1473 > #15 0x000061528538da6c in blockstatus_to_extents (...) at ../nbd/server.c:2374 > #16 0x61528538deb1 in nbd_co_send_block_status (...) at > ../nbd/server.c:2481 > #17 0x61528538f424 in nbd_handle_request (...) at ../nbd/server.c:2978 > #18 0x61528538f906 in nbd_trip (...) at ../nbd/server.c:3121 > #19 0x6152855a7caf in coroutine_trampoline (...) at > ../util/coroutine-ucontext.c:175 Cc: qemu-sta...@nongnu.org Suggested-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Fiona Ebner Message-Id: <20240712140716.517911-1-f.eb...@proxmox.com> Reviewed-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/copy-before-write.c | 3 ++- block/reqlist.c | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/block/copy-before-write.c b/block/copy-before-write.c index e835987e52..81afeff1c7 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -66,7 +66,8 @@ typedef struct BDRVCopyBeforeWriteState { /* * @frozen_read_reqs: current read requests for fleecing user in bs->file - * node. These areas must not be rewritten by guest. + * node. These areas must not be rewritten by guest. There can be multiple + * overlapping read requests. */ BlockReqList frozen_read_reqs; diff --git a/block/reqlist.c b/block/reqlist.c index 08cb57cfa4..098e807378 100644 --- a/block/reqlist.c +++ b/block/reqlist.c @@ -20,8 +20,6 @@ void reqlist_init_req(BlockReqList *reqs, BlockReq *req, int64_t offset, int64_t bytes) { -assert(!reqlist_find_conflict(reqs, offset, bytes)); - *req = (BlockReq) { .offset = offset, .bytes = bytes, -- 2.34.1
Re: [PATCH] block: Remove unused aio_task_pool_empty
On 17.09.24 03:20, d...@treblig.org wrote: From: "Dr. David Alan Gilbert" aio_task_pool_empty has been unused since it was added in 6e9b225f73 ("block: introduce aio task pool") Remove it. Signed-off-by: Dr. David Alan Gilbert Thanks, applied to my block branch. -- Best regards, Vladimir
Re: [PATCH] util/co-shared-resource: Remove unused co_try_get_from_shres
On 18.09.24 15:42, d...@treblig.org wrote: From: "Dr. David Alan Gilbert" co_try_get_from_shres hasn't been used since it was added in 55fa54a789 ("co-shared-resource: protect with a mutex") (Everyone uses the _locked version) Remove it. Signed-off-by: Dr. David Alan Gilbert Thanks, applied to my block branch. -- Best regards, Vladimir
[PATCH v6 1/3] qdev-monitor: add option to report GenericError from find_device_state
Here we just prepare for the following patch, making possible to report GenericError as recommended. This patch doesn't aim to prevent further use of DeviceNotFound by future interfaces: - find_device_state() is used in blk_by_qdev_id() and qmp_get_blk() functions, which may lead to spread of DeviceNotFound anyway - also, nothing prevent simply copy-pasting find_device_state() calls with false argument Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Markus Armbruster Acked-by: Raphael Norwitz --- system/qdev-monitor.c | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c index 44994ea0e1..6671137a91 100644 --- a/system/qdev-monitor.c +++ b/system/qdev-monitor.c @@ -885,13 +885,20 @@ void qmp_device_add(QDict *qdict, QObject **ret_data, Error **errp) object_unref(OBJECT(dev)); } -static DeviceState *find_device_state(const char *id, Error **errp) +/* + * Note that creating new APIs using error classes other than GenericError is + * not recommended. Set use_generic_error=true for new interfaces. + */ +static DeviceState *find_device_state(const char *id, bool use_generic_error, + Error **errp) { Object *obj = object_resolve_path_at(qdev_get_peripheral(), id); DeviceState *dev; if (!obj) { -error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND, +error_set(errp, + (use_generic_error ? + ERROR_CLASS_GENERIC_ERROR : ERROR_CLASS_DEVICE_NOT_FOUND), "Device '%s' not found", id); return NULL; } @@ -956,7 +963,7 @@ void qdev_unplug(DeviceState *dev, Error **errp) void qmp_device_del(const char *id, Error **errp) { -DeviceState *dev = find_device_state(id, errp); +DeviceState *dev = find_device_state(id, false, errp); if (dev != NULL) { if (dev->pending_deleted_event && (dev->pending_deleted_expires_ms == 0 || @@ -1076,7 +1083,7 @@ BlockBackend *blk_by_qdev_id(const char *id, Error **errp) GLOBAL_STATE_CODE(); -dev = find_device_state(id, errp); +dev = find_device_state(id, false, errp); if (dev == NULL) { return NULL; } -- 2.34.1
[PATCH v6 3/3] qapi: introduce device-sync-config
Add command to sync config from vhost-user backend to the device. It may be helpful when VHOST_USER_SLAVE_CONFIG_CHANGE_MSG failed or not triggered interrupt to the guest or just not available (not supported by vhost-user server). Command result is racy if allow it during migration. Let's not allow that. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: Markus Armbruster Acked-by: Raphael Norwitz --- hw/block/vhost-user-blk.c | 1 + hw/virtio/virtio-pci.c| 9 + include/hw/qdev-core.h| 6 ++ qapi/qdev.json| 24 system/qdev-monitor.c | 38 ++ 5 files changed, 78 insertions(+) diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 48b3dabb8d..7996e49821 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -591,6 +591,7 @@ static void vhost_user_blk_class_init(ObjectClass *klass, void *data) device_class_set_props(dc, vhost_user_blk_properties); dc->vmsd = &vmstate_vhost_user_blk; +dc->sync_config = vhost_user_blk_sync_config; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); vdc->realize = vhost_user_blk_device_realize; vdc->unrealize = vhost_user_blk_device_unrealize; diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 4d832fe845..c5a809b956 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -2385,6 +2385,14 @@ static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp) vpciklass->parent_dc_realize(qdev, errp); } +static int virtio_pci_sync_config(DeviceState *dev, Error **errp) +{ +VirtIOPCIProxy *proxy = VIRTIO_PCI(dev); +VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + +return qdev_sync_config(DEVICE(vdev), errp); +} + static void virtio_pci_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -2401,6 +2409,7 @@ static void virtio_pci_class_init(ObjectClass *klass, void *data) device_class_set_parent_realize(dc, virtio_pci_dc_realize, &vpciklass->parent_dc_realize); rc->phases.hold = virtio_pci_bus_reset_hold; +dc->sync_config = virtio_pci_sync_config; } static const TypeInfo virtio_pci_info = { diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index aa97c34a4b..94914858d8 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -95,6 +95,7 @@ typedef void (*DeviceUnrealize)(DeviceState *dev); typedef void (*DeviceReset)(DeviceState *dev); typedef void (*BusRealize)(BusState *bus, Error **errp); typedef void (*BusUnrealize)(BusState *bus); +typedef int (*DeviceSyncConfig)(DeviceState *dev, Error **errp); /** * struct DeviceClass - The base class for all devices. @@ -103,6 +104,9 @@ typedef void (*BusUnrealize)(BusState *bus); * property is changed to %true. * @unrealize: Callback function invoked when the #DeviceState:realized * property is changed to %false. + * @sync_config: Callback function invoked when QMP command device-sync-config + * is called. Should synchronize device configuration from host to guest part + * and notify the guest about the change. * @hotpluggable: indicates if #DeviceClass is hotpluggable, available * as readonly "hotpluggable" property of #DeviceState instance * @@ -162,6 +166,7 @@ struct DeviceClass { DeviceReset legacy_reset; DeviceRealize realize; DeviceUnrealize unrealize; +DeviceSyncConfig sync_config; /** * @vmsd: device state serialisation description for @@ -547,6 +552,7 @@ bool qdev_hotplug_allowed(DeviceState *dev, Error **errp); */ HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev); void qdev_unplug(DeviceState *dev, Error **errp); +int qdev_sync_config(DeviceState *dev, Error **errp); void qdev_simple_device_unplug_cb(HotplugHandler *hotplug_dev, DeviceState *dev, Error **errp); void qdev_machine_creation_done(void); diff --git a/qapi/qdev.json b/qapi/qdev.json index 53d147c7b4..2a581129c9 100644 --- a/qapi/qdev.json +++ b/qapi/qdev.json @@ -163,3 +163,27 @@ ## { 'event': 'DEVICE_UNPLUG_GUEST_ERROR', 'data': { '*device': 'str', 'path': 'str' } } + +## +# @device-sync-config: +# +# Synchronize device configuration from host to guest part. First, +# copy the configuration from the host part (backend) to the guest +# part (frontend). Then notify guest software that device +# configuration changed. +# +# The command may be used to notify the guest about block device +# capcity change. Currently only vhost-user-blk device supports +# this. +# +# @id: the device's ID or QOM path +# +# Features: +# +# @unstable: The command is experimental. +# +# Since: 9.1 +## +{ 'command': 'device-sync-config', + 'features': [ 'unstable'
[PATCH v6 0/3] vhost-user-blk: live resize additional APIs
v6: tiny fix: add document comment for sync_config field to fix qdoc generation. Also, add r-b and a-b marks. Vladimir Sementsov-Ogievskiy (3): qdev-monitor: add option to report GenericError from find_device_state vhost-user-blk: split vhost_user_blk_sync_config() qapi: introduce device-sync-config hw/block/vhost-user-blk.c | 27 ++-- hw/virtio/virtio-pci.c| 9 +++ include/hw/qdev-core.h| 6 + qapi/qdev.json| 24 ++ system/qdev-monitor.c | 53 --- 5 files changed, 108 insertions(+), 11 deletions(-) -- 2.34.1
[PATCH v6 2/3] vhost-user-blk: split vhost_user_blk_sync_config()
Split vhost_user_blk_sync_config() out from vhost_user_blk_handle_config_change(), to be reused in the following commit. Signed-off-by: Vladimir Sementsov-Ogievskiy Acked-by: Raphael Norwitz --- hw/block/vhost-user-blk.c | 26 +++--- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c index 5b7f46bbb0..48b3dabb8d 100644 --- a/hw/block/vhost-user-blk.c +++ b/hw/block/vhost-user-blk.c @@ -90,27 +90,39 @@ static void vhost_user_blk_set_config(VirtIODevice *vdev, const uint8_t *config) s->blkcfg.wce = blkcfg->wce; } +static int vhost_user_blk_sync_config(DeviceState *dev, Error **errp) +{ +int ret; +VirtIODevice *vdev = VIRTIO_DEVICE(dev); +VHostUserBlk *s = VHOST_USER_BLK(vdev); + +ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, + vdev->config_len, errp); +if (ret < 0) { +return ret; +} + +memcpy(vdev->config, &s->blkcfg, vdev->config_len); +virtio_notify_config(vdev); + +return 0; +} + static int vhost_user_blk_handle_config_change(struct vhost_dev *dev) { int ret; -VirtIODevice *vdev = dev->vdev; -VHostUserBlk *s = VHOST_USER_BLK(dev->vdev); Error *local_err = NULL; if (!dev->started) { return 0; } -ret = vhost_dev_get_config(dev, (uint8_t *)&s->blkcfg, - vdev->config_len, &local_err); +ret = vhost_user_blk_sync_config(DEVICE(dev->vdev), &local_err); if (ret < 0) { error_report_err(local_err); return ret; } -memcpy(dev->vdev->config, &s->blkcfg, vdev->config_len); -virtio_notify_config(dev->vdev); - return 0; } -- 2.34.1
Re: [PATCH v5 0/3] vhost-user-blk: live resize additional APIs
On 11.09.24 12:51, Michael S. Tsirkin wrote: On Tue, Jun 25, 2024 at 03:18:40PM +0300, Vladimir Sementsov-Ogievskiy wrote: v5: 03: drop extra check on is is runstate running Causes build failures when generating qdoc. https://gitlab.com/mstredhat/qemu/-/jobs/7792086965 Sorry for a delay, I'll send a v6 soon with fix for that. -- Best regards, Vladimir
Re: [PATCH v2] block/reqlist: allow adding overlapping requests
On 11.08.24 20:55, Michael Tokarev wrote: 12.07.2024 17:07, Fiona Ebner wrote: Allow overlapping request by removing the assert that made it impossible. There are only two callers: 1. block_copy_task_create() It already asserts the very same condition before calling reqlist_init_req(). 2. cbw_snapshot_read_lock() There is no need to have read requests be non-overlapping in copy-before-write when used for snapshot-access. In fact, there was no protection against two callers of cbw_snapshot_read_lock() calling reqlist_init_req() with overlapping ranges and this could lead to an assertion failure [1]. In particular, with the reproducer script below [0], two cbw_co_snapshot_block_status() callers could race, with the second calling reqlist_init_req() before the first one finishes and removes its conflicting request. [0]: #!/bin/bash -e dd if=/dev/urandom of=/tmp/disk.raw bs=1M count=1024 ./qemu-img create /tmp/fleecing.raw -f raw 1G ( ./qemu-system-x86_64 --qmp stdio \ --blockdev raw,node-name=node0,file.driver=file,file.filename=/tmp/disk.raw \ --blockdev raw,node-name=node1,file.driver=file,file.filename=/tmp/fleecing.raw \ < [1]: #5 0x71e5f0088eb2 in __GI___assert_fail (...) at ./assert/assert.c:101 #6 0x615285438017 in reqlist_init_req (...) at ../block/reqlist.c:23 #7 0x6152853e2d98 in cbw_snapshot_read_lock (...) at ../block/copy-before-write.c:237 #8 0x6152853e3068 in cbw_co_snapshot_block_status (...) at ../block/copy-before-write.c:304 #9 0x6152853f4d22 in bdrv_co_snapshot_block_status (...) at ../block/io.c:3726 #10 0x61528543a63e in snapshot_access_co_block_status (...) at ../block/snapshot-access.c:48 #11 0x6152853f1a0a in bdrv_co_do_block_status (...) at ../block/io.c:2474 #12 0x6152853f2016 in bdrv_co_common_block_status_above (...) at ../block/io.c:2652 #13 0x6152853f22cf in bdrv_co_block_status_above (...) at ../block/io.c:2732 #14 0x6152853d9a86 in blk_co_block_status_above (...) at ../block/block-backend.c:1473 #15 0x61528538da6c in blockstatus_to_extents (...) at ../nbd/server.c:2374 #16 0x61528538deb1 in nbd_co_send_block_status (...) at ../nbd/server.c:2481 #17 0x61528538f424 in nbd_handle_request (...) at ../nbd/server.c:2978 #18 0x61528538f906 in nbd_trip (...) at ../nbd/server.c:3121 #19 0x6152855a7caf in coroutine_trampoline (...) at ../util/coroutine-ucontext.c:175 Cc: qemu-sta...@nongnu.org Suggested-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Fiona Ebner Hi! Has this been forgotten or is it not needed for 9.1? My apologies, this is forgotten. I think rc4 is too late, I'll send Pull request as soon as 9.2 window open. -- Best regards, Vladimir
Re: [PATCH v2 1/2] block: zero data data corruption using prealloc-filter
On 16.07.24 16:32, Denis V. Lunev wrote: On 7/12/24 13:55, Vladimir Sementsov-Ogievskiy wrote: On 12.07.24 12:46, Andrey Drobyshev wrote: From: "Denis V. Lunev" We have observed that some clusters in the QCOW2 files are zeroed while preallocation filter is used. We are able to trace down the following sequence when prealloc-filter is used: co=0x55e7cbed7680 qcow2_co_pwritev_task() co=0x55e7cbed7680 preallocate_co_pwritev_part() co=0x55e7cbed7680 handle_write() co=0x55e7cbed7680 bdrv_co_do_pwrite_zeroes() co=0x55e7cbed7680 raw_do_pwrite_zeroes() co=0x7f9edb7fe500 do_fallocate() Here coroutine 0x55e7cbed7680 is being blocked waiting while coroutine 0x7f9edb7fe500 will finish with fallocate of the file area. OK. It is time to handle next coroutine, which co=0x55e7cbee91b0 qcow2_co_pwritev_task() co=0x55e7cbee91b0 preallocate_co_pwritev_part() co=0x55e7cbee91b0 handle_write() co=0x55e7cbee91b0 bdrv_co_do_pwrite_zeroes() co=0x55e7cbee91b0 raw_do_pwrite_zeroes() co=0x7f9edb7deb00 do_fallocate() The trouble comes here. Coroutine 0x55e7cbed7680 has not advanced file_end yet and coroutine 0x55e7cbee91b0 will start fallocate() for the same area. This means that if (once fallocate is started inside 0x7f9edb7deb00) original fallocate could end and the real write will be executed. In that case write() request is handled at the same time as fallocate(). The patch moves s->file_lock assignment before fallocate and that is text need to be updated crucial. The idea is that all subsequent requests into the area being preallocation will be issued as just writes without fallocate to this area and they will not proceed thanks to overlapping requests mechanics. If preallocation will fail, we will just switch to the normal expand-by-write behavior and that is not a problem except performance. Signed-off-by: Denis V. Lunev Tested-by: Andrey Drobyshev --- block/preallocate.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/block/preallocate.c b/block/preallocate.c index d215bc5d6d..ecf0aa4baa 100644 --- a/block/preallocate.c +++ b/block/preallocate.c @@ -383,6 +383,13 @@ handle_write(BlockDriverState *bs, int64_t offset, int64_t bytes, want_merge_zero = want_merge_zero && (prealloc_start <= offset); + /* + * Assign file_end before making actual preallocation. This will ensure + * that next request performed while preallocation is in progress will + * be passed without preallocation. + */ + s->file_end = prealloc_end; + ret = bdrv_co_pwrite_zeroes( bs->file, prealloc_start, prealloc_end - prealloc_start, BDRV_REQ_NO_FALLBACK | BDRV_REQ_SERIALISING | BDRV_REQ_NO_WAIT); @@ -391,7 +398,6 @@ handle_write(BlockDriverState *bs, int64_t offset, int64_t bytes, return false; } - s->file_end = prealloc_end; return want_merge_zero; } Hmm. But this way we set both s->file_end and s->zero_start prior to actual write_zero operation. This means that next write-zero operation may go fast-path (see preallocate_co_pwrite_zeroes()) and return success, even before actual finish of preallocation write_zeroes operation (which may also fail). Seems we need to update logic around s->zero_start too. Yes. This is not a problem at all. We go fast path and this new fast-pathed write request will stuck on overlapped request check. This if fine on success path. Sorry for long delay. By fast-path I meant when we return true from /* Now s->data_end, s->zero_start and s->file_end are valid. */ if (end <= s->file_end) { /* No preallocation needed. */ return want_merge_zero && offset >= s->zero_start; } and then, preallocate_co_pwrite_zeroes() just reports success immediately. So, we don't stuck in any overlap-check. Probably, that's not too bad: we just skip the write-zero after EOF. If user read from this area, zeroes should be returned anyway. Still, if we do so we mix our preallocation (which we truncate at the end) with user written zeroes. So we may truncate user-written zeroes in the end. How much is it bad it's a question, but it was not planned in original code. But error path is a trickier question. iris ~/src/qemu $ cat 1.c #include #include #include #include int main() { int fd = open("file", O_RDWR | O_CREAT); char buf[4096]; memset(buf, 'a', sizeof(buf)); pwrite(fd, buf, sizeof(buf), 4096); return 0; } iris ~/src/qemu $ This works just fine, thus error path would be also fine. Den -- Best regards, Vladimir
Re: [PATCH v2 1/3] nbd: CVE-XXX: Use cookie to track generation of nbd-server
On 02.08.24 04:32, Eric Blake wrote: As part of the QMP command nbd-server-start, the blockdev code was creating a single global nbd_server object, and telling the qio code to accept one or more client connections to the exposed listener socket. But even though we tear down the listener socket during a subsequent QMP nbd-server-stop, the qio code has handed off to a coroutine that may be executing asynchronously with the server shutdown, such that a client that connects to the socket before nbd-server-stop but delays disconnect or completion of the NBD handshake until after the followup QMP command can result in the nbd_blockdev_client_closed() callback running after nbd_server has already been torn down, causing a SEGV. Worse, if a second nbd_server object is created (possibly on a different socket address), a late client resolution from the first server can subtly interfere with the second server. This can be abused by a malicious client that does not know TLS secrets to cause qemu to SEGV after a legitimate client has concluded storage migration to a destination qemu, which can be turned into a denial of service attack even when qemu is set up to require only TLS clients. For environments without this patch, the CVE can be mitigated by ensuring (such as via a firewall) that only trusted clients can connect to an NBD server; using frameworks like libvirt that ensure that nbd-server-stop is not executed while any trusted clients are still connected will only help if there is also no possibility for an untrusted client to open a connection but then stall on the NBD handshake. Fix this by passing a cookie through to each client connection (whether or not that client actually knows the TLS details to successfully negotiate); then increment the cookie every time a server is torn down so that we can recognize any late client actions lingering from an old server. This patch does not address the fact that client sockets can be left open indefinitely after the server is torn down (possibly creating a resource leak, if a malicious client intentionally leaves lots of open sockets paused partway through NBD negotiation); the next patch will add some code to forcefully close any lingering clients as soon as possible when the server is torn down. Reported-by: Alexander Ivanov Signed-off-by: Eric Blake --- [..] -static void nbd_blockdev_client_closed(NBDClient *client, bool ignored) +static void nbd_blockdev_client_closed(NBDClient *client, uint32_t cookie, + bool ignored) { nbd_client_put(client); -assert(nbd_server->connections > 0); -nbd_server->connections--; -nbd_update_server_watch(nbd_server); +/* Ignore any (late) connection made under a previous server */ +if (cookie == nbd_cookie) { creating a getter nbd_client_get_cookie(client), and use it instead of passing together with client, will simplify the patch a lot. [*] Hmm.. don't we need some atomic accessors for nbd_cookie? and for nbs_server->connections.. The function is called from client, which live in coroutine and maybe in another thread? At least client code do atomic accesses of client->refcount.. +assert(nbd_server->connections > 0); +nbd_server->connections--; +nbd_update_server_watch(nbd_server); +} } [..] @@ -1621,7 +1622,7 @@ static void client_close(NBDClient *client, bool negotiated) /* Also tell the client, so that they release their reference. */ if (client->close_fn) { -client->close_fn(client, negotiated); +client->close_fn(client, client->close_cookie, negotiated); [*] passing client->close_cokkie together with client itself looks like we lack a getter for .close_cookie } } [..] Hmm, instead of cookies and additional NBDConn objects in the next patch, could we simply have a list of connected NBDClient objects in NBDServer and link to NBDServer in NBDClient? (Ok we actually don't have NBDServer, but NBDServerData in qemu, and several global variables in qemu-nbd, so some refactoring is needed, to put common state to NBDServer, and add clients list to it) This way, in nbd_server_free we'll just call client_close() in a loop. And in client_close we'll have nbd_server_client_detach(client->server, client), instead of client->close_fn(...). And server is freed only after all clients are closed. And client never try to detach from another server. This way, we also may implement several NBD servers working simultaneously if we want. -- Best regards, Vladimir
Re: [PATCH v2 1/3] block/commit: implement final flush
On 29.07.24 15:25, Kevin Wolf wrote: Am 19.07.2024 um 12:35 hat Vladimir Sementsov-Ogievskiy geschrieben: On 18.07.24 22:22, Kevin Wolf wrote: Am 26.06.2024 um 16:50 hat Vladimir Sementsov-Ogievskiy geschrieben: Actually block job is not completed without the final flush. It's rather unexpected to have broken target when job was successfully completed long ago and now we fail to flush or process just crashed/killed. Mirror job already has mirror_flush() for this. So, it's OK. Do this for commit job too. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/commit.c | 41 +++-- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/block/commit.c b/block/commit.c index 7c3fdcb0ca..81971692a2 100644 --- a/block/commit.c +++ b/block/commit.c @@ -134,6 +134,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp) int64_t n = 0; /* bytes */ QEMU_AUTO_VFREE void *buf = NULL; int64_t len, base_len; +bool need_final_flush = true; len = blk_co_getlength(s->top); if (len < 0) { @@ -155,8 +156,8 @@ static int coroutine_fn commit_run(Job *job, Error **errp) buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE); -for (offset = 0; offset < len; offset += n) { -bool copy; +for (offset = 0; offset < len || need_final_flush; offset += n) { In general, the control flow would be nicer to read if the final flush weren't integrated into the loop, but just added after it. But I assume this is pretty much required for pausing the job during error handling in the final flush if you don't want to duplicate a lot of the logic into a second loop? Right, that's the reason. This would probably be the right solution if it affected only commit. But I've thought a bit more about this and given that the same thing happens in all of the block jobs, I'm really wondering if introducing a block job helper function wouldn't be better, so that each block job could just add something like this after its main loop: do { ret = blk_co_flush(); } while (block_job_handle_error(job, ret)); And the helper would call block_job_error_action(), stop the job if necessary, check if it's cancelled, include a pause point etc. Agree. Thanks, I'll try. -- Best regards, Vladimir
Re: [PATCH v2 5/7] qapi: add job-change
On 18.07.24 13:59, Markus Armbruster wrote: Vladimir Sementsov-Ogievskiy writes: Add a new-style command job-change, doing same thing as block-job-change. The aim is finally deprecate block-job-* APIs and move to job-* APIs. We add a new command to qapi/block-core.json, not to qapi/job.json to avoid resolving json file including loops for now. This all would be a lot simple to refactor when we finally drop deprecated block-job-* APIs. @type argument of the new command immediately becomes deprecated. Where? Oops, that type-based union was dropped, so this sentence should be dropped as well. Signed-off-by: Vladimir Sementsov-Ogievskiy --- job-qmp.c| 14 ++ qapi/block-core.json | 10 ++ 2 files changed, 24 insertions(+) diff --git a/job-qmp.c b/job-qmp.c index c764bd3801..248e68f554 100644 --- a/job-qmp.c +++ b/job-qmp.c @@ -139,6 +139,20 @@ void qmp_job_dismiss(const char *id, Error **errp) job_dismiss_locked(&job, errp); } +void qmp_job_change(JobChangeOptions *opts, Error **errp) +{ +Job *job; + +JOB_LOCK_GUARD(); +job = find_job_locked(opts->id, errp); + +if (!job) { +return; +} + +job_change_locked(job, opts, errp); +} + /* Called with job_mutex held. */ static JobInfo *job_query_single_locked(Job *job, Error **errp) { diff --git a/qapi/block-core.json b/qapi/block-core.json index 660c7f4a48..9087ce300c 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -3104,6 +3104,16 @@ { 'command': 'block-job-change', 'data': 'JobChangeOptions', 'boxed': true } +## +# @job-change: +# +# Change the block job's options. +# +# Since: 9.1 +## +{ 'command': 'job-change', + 'data': 'JobChangeOptions', 'boxed': true } + ## # @BlockdevDiscardOptions: # -- Best regards, Vladimir
Re: [PATCH v2 6/7] qapi/block-core: derpecate block-job-change
On 18.07.24 14:01, Markus Armbruster wrote: Vladimir Sementsov-Ogievskiy writes: That's a first step to move on newer job-* APIs. The difference between block-job-change and job-change is in find_block_job_locked() vs find_job_locked() functions. What's different? 1. find_block_job_locked() do check, is found job a block-job. This OK Do you mean something like find_block_job_locked() finds only block jobs, whereas find_job_locked() finds any kind of job? Yes when moving to more generic API, no needs to document this change. 2. find_block_job_locked() reports DeviceNotActive on failure, when find_job_locked() reports GenericError. Still, for block-job-change errors are not documented at all, so be silent in deprecated.txt as well. Signed-off-by: Vladimir Sementsov-Ogievskiy -- Best regards, Vladimir
Re: [PATCH v2 3/4] block: allow commit to unmap zero blocks
On 14.07.24 00:56, Vincent Vanlaer wrote: Non-active block commits do not discard blocks only containing zeros, causing images to lose sparseness after the commit. This commit fixes that by writing zero blocks using blk_co_pwrite_zeroes rather than writing them out as any other arbitrary data. Signed-off-by: Vincent Vanlaer --- block/commit.c | 19 +++ 1 file changed, 19 insertions(+) diff --git a/block/commit.c b/block/commit.c index fb54fc9560..6ce30927ac 100644 --- a/block/commit.c +++ b/block/commit.c @@ -130,6 +130,7 @@ static void commit_clean(Job *job) typedef enum CommitMethod { COMMIT_METHOD_COPY, +COMMIT_METHOD_ZERO, COMMIT_METHOD_IGNORE, } CommitMethod; @@ -185,6 +186,18 @@ static int coroutine_fn commit_run(Job *job, Error **errp) if (ret >= 0) { if (!(ret & BDRV_BLOCK_ALLOCATED)) { commit_method = COMMIT_METHOD_IGNORE; +} else if (ret & BDRV_BLOCK_ZERO) { +int64_t target_offset; +int64_t target_bytes; +WITH_GRAPH_RDLOCK_GUARD() { +bdrv_round_to_subclusters(s->base_bs, offset, n, + &target_offset, &target_bytes); indentation broken +} + +if (target_offset == offset && +target_bytes == n) { +commit_method = COMMIT_METHOD_ZERO; Why this is needed? Could we blindly do write-zeroes at original (offset, n)? Underlying logic would use any possiblity to write zeroes effectively, and unaligned tails (if any) would be written as data. +} } switch (commit_method) { @@ -198,6 +211,11 @@ static int coroutine_fn commit_run(Job *job, Error **errp) } } break; +case COMMIT_METHOD_ZERO: +ret = blk_co_pwrite_zeroes(s->base, offset, n, +BDRV_REQ_MAY_UNMAP); +error_in_source = false; +break; case COMMIT_METHOD_IGNORE: break; default: @@ -216,6 +234,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp) continue; } } + extra unrelated hunk for style, I'd drop it /* Publish progress */ job_progress_update(&s->common.job, n); -- Best regards, Vladimir
Re: [PATCH v2 2/4] block: refactor commit_run for multiple write types
On 14.07.24 00:56, Vincent Vanlaer wrote: Signed-off-by: Vincent Vanlaer Reviewed-by: Vladimir Sementsov-Ogievskiy Honestly, I don't like this (mostly preexisting, but your patch make the problem more obvious) code for its "nested success path" if (ret >= 0) { ret = ... if (ret >= 0) { ... That's because we have the same complex error handling for all these errors. If refactor the commit_run(), I'd move get-block-status together with "if (copy)" block to separate function commmit_iteration(), which would have more readable style of error reporting, like: ret = ... if (ret < 0) { return ret; } ret = ... if (ret < 0) { return ret; } -- Best regards, Vladimir
Re: [PATCH v2 1/4] block: get type of block allocation in commit_run
On 14.07.24 00:56, Vincent Vanlaer wrote: bdrv_co_common_block_status_above not only returns whether the block is allocated, but also if it contains zeroes. Signed-off-by: Vincent Vanlaer Reviewed-by: Vladimir Sementsov-Ogievskiy -- Best regards, Vladimir
Re: [PATCH v5 0/3] vhost-user-blk: live resize additional APIs
On 01.08.24 11:37, Michael S. Tsirkin wrote: On Thu, Aug 01, 2024 at 11:35:19AM +0300, Vladimir Sementsov-Ogievskiy wrote: On 01.07.24 23:55, Michael S. Tsirkin wrote: On Mon, Jul 01, 2024 at 08:42:39AM -0400, Raphael Norwitz wrote: I have no issues with these APIs, but I'm not a QMP expert so others should review those bits. For the vhost-user-blk code: Acked-by: Raphael Norwitz Could the relevant bits get ack from qapi maintainers please? We go them. Could you queue the patches please? Tagged for after the freeze. Thanks! Oh right, I missed the freeze. OK, thanks! On Tue, Jun 25, 2024 at 8:19 AM Vladimir Sementsov-Ogievskiy wrote: v5: 03: drop extra check on is is runstate running Vladimir Sementsov-Ogievskiy (3): qdev-monitor: add option to report GenericError from find_device_state vhost-user-blk: split vhost_user_blk_sync_config() qapi: introduce device-sync-config hw/block/vhost-user-blk.c | 27 ++-- hw/virtio/virtio-pci.c| 9 +++ include/hw/qdev-core.h| 3 +++ qapi/qdev.json| 24 ++ system/qdev-monitor.c | 53 --- 5 files changed, 105 insertions(+), 11 deletions(-) -- 2.34.1 -- Best regards, Vladimir -- Best regards, Vladimir
Re: [PATCH v5 0/3] vhost-user-blk: live resize additional APIs
On 01.07.24 23:55, Michael S. Tsirkin wrote: On Mon, Jul 01, 2024 at 08:42:39AM -0400, Raphael Norwitz wrote: I have no issues with these APIs, but I'm not a QMP expert so others should review those bits. For the vhost-user-blk code: Acked-by: Raphael Norwitz Could the relevant bits get ack from qapi maintainers please? We go them. Could you queue the patches please? On Tue, Jun 25, 2024 at 8:19 AM Vladimir Sementsov-Ogievskiy wrote: v5: 03: drop extra check on is is runstate running Vladimir Sementsov-Ogievskiy (3): qdev-monitor: add option to report GenericError from find_device_state vhost-user-blk: split vhost_user_blk_sync_config() qapi: introduce device-sync-config hw/block/vhost-user-blk.c | 27 ++-- hw/virtio/virtio-pci.c| 9 +++ include/hw/qdev-core.h| 3 +++ qapi/qdev.json| 24 ++ system/qdev-monitor.c | 53 --- 5 files changed, 105 insertions(+), 11 deletions(-) -- 2.34.1 -- Best regards, Vladimir
Re: [PATCH v9 4/7] qapi: add blockdev-replace command
On 18.07.24 15:00, Markus Armbruster wrote: Vladimir Sementsov-Ogievskiy writes: Add a command that can replace bs in following BdrvChild structures: - qdev blk root child - block-export blk root child - any child of BlockDriverState selected by child-name Signed-off-by: Vladimir Sementsov-Ogievskiy --- blockdev.c | 56 +++ qapi/block-core.json | 88 ++ stubs/blk-by-qdev-id.c | 13 +++ stubs/meson.build | 1 + 4 files changed, 158 insertions(+) create mode 100644 stubs/blk-by-qdev-id.c diff --git a/blockdev.c b/blockdev.c index ba7e90b06e..2190467022 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3559,6 +3559,62 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, bdrv_try_change_aio_context(bs, new_context, NULL, errp); } +void qmp_blockdev_replace(BlockdevReplace *repl, Error **errp) +{ +BdrvChild *child = NULL; +BlockDriverState *new_child_bs; + +if (repl->parent_type == BLOCK_PARENT_TYPE_DRIVER) { +BlockDriverState *parent_bs; + +parent_bs = bdrv_find_node(repl->u.driver.node_name); +if (!parent_bs) { +error_setg(errp, "Block driver node with node-name '%s' not " + "found", repl->u.driver.node_name); +return; +} + +child = bdrv_find_child(parent_bs, repl->u.driver.child); +if (!child) { +error_setg(errp, "Block driver node '%s' doesn't have child " + "named '%s'", repl->u.driver.node_name, + repl->u.driver.child); +return; +} +} else { +/* Other types are similar, they work through blk */ +BlockBackend *blk; +bool is_qdev = repl->parent_type == BLOCK_PARENT_TYPE_QDEV; +const char *id = +is_qdev ? repl->u.qdev.qdev_id : repl->u.export.export_id; + +assert(is_qdev || repl->parent_type == BLOCK_PARENT_TYPE_EXPORT); + +blk = is_qdev ? blk_by_qdev_id(id, errp) : blk_by_export_id(id, errp); blk_by_export_id() finds export @exp, and returns the associated block backend exp->blk. Fine. blk_by_qdev_id() finds the device, and then searches @block_backends for a blk with blk->dev == blk. If a device has more than one block backend, you get the one first in @block_backends. I figure that's the one created first. Interface issue: when a device has multiple block backends, only one of them can be replaced, and which one is kind of random. Do such devices exist? Hmm.. I expect, they don't. If there is a problem, it's pre-existing, all callers of qmp_get_blk are affected. But honestly, I don't know. If no, could they exist? If yes, what should we do about it now? Maybe, continue the loop in blk_by_dev() up the the end, and if two blk found for the device, return an error? Or even abort? And add check to blk_attach_dev(), that we don't attach same device to several blks. +if (!blk) { +return; +} + +child = blk_root(blk); +if (!child) { +error_setg(errp, "%s '%s' is empty, nothing to replace", + is_qdev ? "Device" : "Export", id); +return; +} +} + +assert(child); +assert(child->bs); + +new_child_bs = bdrv_find_node(repl->new_child); +if (!new_child_bs) { +error_setg(errp, "Node '%s' not found", repl->new_child); +return; +} + +bdrv_replace_child_bs(child, new_child_bs, errp); +} + QemuOptsList qemu_common_drive_opts = { .name = "drive", .head = QTAILQ_HEAD_INITIALIZER(qemu_common_drive_opts.head), diff --git a/qapi/block-core.json b/qapi/block-core.json index df5e07debd..0a6f08a6e0 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -6148,3 +6148,91 @@ ## { 'struct': 'DummyBlockCoreForceArrays', 'data': { 'unused-block-graph-info': ['BlockGraphInfo'] } } + +## +# @BlockParentType: +# +# @qdev: block device, such as created by device_add, and denoted by +# qdev-id +# +# @driver: block driver node, such as created by blockdev-add, and +# denoted by node-name node-name and child? Hmm. I'd say that parent is denoted only by node-name, as parent is node itself (the fact that we have separate BdrvChild structure as a parent I'd keep as internal realization). But parent may have several children, and concrete child is denoted by @child. +# +# @export: block export, such created by block-export-add, and +# denoted by export-id +# +# Since 9.1 +## I'm kind of unhappy with this doc comment. I feel makes sense only in the context of its use.
Re: [PATCH v9 2/7] block/export: add blk_by_export_id()
On 18.07.24 14:48, Markus Armbruster wrote: Vladimir Sementsov-Ogievskiy writes: We need it for further blockdev-replace functionality. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/export/export.c | 18 ++ include/sysemu/block-backend-global-state.h | 1 + 2 files changed, 19 insertions(+) diff --git a/block/export/export.c b/block/export/export.c index 6d51ae8ed7..57beae7982 100644 --- a/block/export/export.c +++ b/block/export/export.c @@ -355,3 +355,21 @@ BlockExportInfoList *qmp_query_block_exports(Error **errp) return head; } + +BlockBackend *blk_by_export_id(const char *id, Error **errp) +{ +BlockExport *exp; + +exp = blk_exp_find(id); +if (exp == NULL) { +error_setg(errp, "Export '%s' not found", id); +return NULL; +} + +if (!exp->blk) { +error_setg(errp, "Export '%s' is empty", id); Can this happen? Hmm, looking at the code in blk_exp_add: assert(exp->blk != NULL); QLIST_INSERT_HEAD(&block_exports, exp, next); return exp; seems not. And I can't find anything changing exp->blk except for blk_exp_add(). Will switch to assertion. +return NULL; +} + +return exp->blk; +} diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h index ccb35546a1..410d0cc5c7 100644 --- a/include/sysemu/block-backend-global-state.h +++ b/include/sysemu/block-backend-global-state.h @@ -74,6 +74,7 @@ void blk_detach_dev(BlockBackend *blk, DeviceState *dev); DeviceState *blk_get_attached_dev(BlockBackend *blk); BlockBackend *blk_by_dev(void *dev); BlockBackend *blk_by_qdev_id(const char *id, Error **errp); +BlockBackend *blk_by_export_id(const char *id, Error **errp); void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque); void blk_activate(BlockBackend *blk, Error **errp); -- Best regards, Vladimir
Re: [PATCH v2 1/3] block/commit: implement final flush
On 18.07.24 22:22, Kevin Wolf wrote: Am 26.06.2024 um 16:50 hat Vladimir Sementsov-Ogievskiy geschrieben: Actually block job is not completed without the final flush. It's rather unexpected to have broken target when job was successfully completed long ago and now we fail to flush or process just crashed/killed. Mirror job already has mirror_flush() for this. So, it's OK. Do this for commit job too. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/commit.c | 41 +++-- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/block/commit.c b/block/commit.c index 7c3fdcb0ca..81971692a2 100644 --- a/block/commit.c +++ b/block/commit.c @@ -134,6 +134,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp) int64_t n = 0; /* bytes */ QEMU_AUTO_VFREE void *buf = NULL; int64_t len, base_len; +bool need_final_flush = true; len = blk_co_getlength(s->top); if (len < 0) { @@ -155,8 +156,8 @@ static int coroutine_fn commit_run(Job *job, Error **errp) buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE); -for (offset = 0; offset < len; offset += n) { -bool copy; +for (offset = 0; offset < len || need_final_flush; offset += n) { In general, the control flow would be nicer to read if the final flush weren't integrated into the loop, but just added after it. But I assume this is pretty much required for pausing the job during error handling in the final flush if you don't want to duplicate a lot of the logic into a second loop? Right, that's the reason. +bool copy = false; bool error_in_source = true; /* Note that even when no rate limit is applied we need to yield @@ -166,22 +167,34 @@ static int coroutine_fn commit_run(Job *job, Error **errp) if (job_is_cancelled(&s->common.job)) { break; } -/* Copy if allocated above the base */ -ret = blk_co_is_allocated_above(s->top, s->base_overlay, true, -offset, COMMIT_BUFFER_SIZE, &n); -copy = (ret > 0); -trace_commit_one_iteration(s, offset, n, ret); -if (copy) { -assert(n < SIZE_MAX); -ret = blk_co_pread(s->top, offset, n, buf, 0); -if (ret >= 0) { -ret = blk_co_pwrite(s->base, offset, n, buf, 0); -if (ret < 0) { -error_in_source = false; +if (offset < len) { +/* Copy if allocated above the base */ +ret = blk_co_is_allocated_above(s->top, s->base_overlay, true, +offset, COMMIT_BUFFER_SIZE, &n); +copy = (ret > 0); +trace_commit_one_iteration(s, offset, n, ret); +if (copy) { +assert(n < SIZE_MAX); + +ret = blk_co_pread(s->top, offset, n, buf, 0); +if (ret >= 0) { +ret = blk_co_pwrite(s->base, offset, n, buf, 0); +if (ret < 0) { +error_in_source = false; +} } } +} else { +assert(need_final_flush); +ret = blk_co_flush(s->base); +if (ret < 0) { +error_in_source = false; +} else { +need_final_flush = false; +} Should we set n = 0 in this block to avoid counting the last chunk twice for the progress? Oops, right. Will fix, thanks } + if (ret < 0) { BlockErrorAction action = block_job_error_action(&s->common, s->on_error, Kevin -- Best regards, Vladimir
Re: [PATCH v5 0/3] vhost-user-blk: live resize additional APIs
On 18.07.24 11:31, Markus Armbruster wrote: Vladimir Sementsov-Ogievskiy writes: ping. Markus, Eric, could someone give an ACC for QAPI part? I apologize for the delay. It was pretty bad. No problem, I myself make worse delays now when busy with other work, thanks for reviewing! -- Best regards, Vladimir
Re: [PATCH v5 3/3] qapi: introduce device-sync-config
On 18.07.24 11:27, Markus Armbruster wrote: Vladimir Sementsov-Ogievskiy writes: Add command to sync config from vhost-user backend to the device. It may be helpful when VHOST_USER_SLAVE_CONFIG_CHANGE_MSG failed or not triggered interrupt to the guest or just not available (not supported by vhost-user server). Command result is racy if allow it during migration. Let's allow the sync only in RUNNING state. Is this still accurate? The runstate_is_running() check is gone in v4, the migration_is_running() check remains. Right, better to fix commit message like: Command result is racy if allow it during migration. Let's not allow it. Signed-off-by: Vladimir Sementsov-Ogievskiy QAPI schema and QMP part: Signed-off-by: Markus Armbruster -- Best regards, Vladimir
Re: [PATCH v2] block/reqlist: allow adding overlapping requests
On 12.07.24 17:07, Fiona Ebner wrote: Allow overlapping request by removing the assert that made it impossible. There are only two callers: 1. block_copy_task_create() It already asserts the very same condition before calling reqlist_init_req(). 2. cbw_snapshot_read_lock() There is no need to have read requests be non-overlapping in copy-before-write when used for snapshot-access. In fact, there was no protection against two callers of cbw_snapshot_read_lock() calling reqlist_init_req() with overlapping ranges and this could lead to an assertion failure [1]. In particular, with the reproducer script below [0], two cbw_co_snapshot_block_status() callers could race, with the second calling reqlist_init_req() before the first one finishes and removes its conflicting request. [0]: #!/bin/bash -e dd if=/dev/urandom of=/tmp/disk.raw bs=1M count=1024 ./qemu-img create /tmp/fleecing.raw -f raw 1G ( ./qemu-system-x86_64 --qmp stdio \ --blockdev raw,node-name=node0,file.driver=file,file.filename=/tmp/disk.raw \ --blockdev raw,node-name=node1,file.driver=file,file.filename=/tmp/fleecing.raw \ < [1]: #5 0x71e5f0088eb2 in __GI___assert_fail (...) at ./assert/assert.c:101 #6 0x615285438017 in reqlist_init_req (...) at ../block/reqlist.c:23 #7 0x6152853e2d98 in cbw_snapshot_read_lock (...) at ../block/copy-before-write.c:237 #8 0x6152853e3068 in cbw_co_snapshot_block_status (...) at ../block/copy-before-write.c:304 #9 0x6152853f4d22 in bdrv_co_snapshot_block_status (...) at ../block/io.c:3726 #10 0x61528543a63e in snapshot_access_co_block_status (...) at ../block/snapshot-access.c:48 #11 0x6152853f1a0a in bdrv_co_do_block_status (...) at ../block/io.c:2474 #12 0x6152853f2016 in bdrv_co_common_block_status_above (...) at ../block/io.c:2652 #13 0x6152853f22cf in bdrv_co_block_status_above (...) at ../block/io.c:2732 #14 0x6152853d9a86 in blk_co_block_status_above (...) at ../block/block-backend.c:1473 #15 0x61528538da6c in blockstatus_to_extents (...) at ../nbd/server.c:2374 #16 0x61528538deb1 in nbd_co_send_block_status (...) at ../nbd/server.c:2481 #17 0x61528538f424 in nbd_handle_request (...) at ../nbd/server.c:2978 #18 0x61528538f906 in nbd_trip (...) at ../nbd/server.c:3121 #19 0x6152855a7caf in coroutine_trampoline (...) at ../util/coroutine-ucontext.c:175 Cc: qemu-sta...@nongnu.org Suggested-by: Vladimir Sementsov-Ogievskiy Signed-off-by: Fiona Ebner --- Changes in v2: * different approach, allowing overlapping requests for copy-before-write rather than waiting for them. block-copy already asserts there are no conflicts before adding a request. block/copy-before-write.c | 3 ++- block/reqlist.c | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/block/copy-before-write.c b/block/copy-before-write.c index 853e01a1eb..28f6a096cd 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -66,7 +66,8 @@ typedef struct BDRVCopyBeforeWriteState { /* * @frozen_read_reqs: current read requests for fleecing user in bs->file - * node. These areas must not be rewritten by guest. + * node. These areas must not be rewritten by guest. There can be multiple + * overlapping read requests. */ BlockReqList frozen_read_reqs; diff --git a/block/reqlist.c b/block/reqlist.c index 08cb57cfa4..098e807378 100644 --- a/block/reqlist.c +++ b/block/reqlist.c @@ -20,8 +20,6 @@ void reqlist_init_req(BlockReqList *reqs, BlockReq *req, int64_t offset, int64_t bytes) { -assert(!reqlist_find_conflict(reqs, offset, bytes)); - *req = (BlockReq) { .offset = offset, .bytes = bytes, Reviewed-by: Vladimir Sementsov-Ogievskiy Thanks, applied to my block branch -- Best regards, Vladimir
Re: [PATCH v3 0/2] backup: allow specifying minimum cluster size
On 11.07.24 15:09, Fiona Ebner wrote: Discussion for v2: https://lore.kernel.org/qemu-devel/20240528120114.344416-1-f.eb...@proxmox.com/ Changes in v3: * Pass min_cluster_size option directly without checking has_min_cluster_size, because the default is 0 anyways. * Calculate maximum of passed-in argument and default once at the beginning of block_copy_calculate_cluster_size() * Update warning message to reflect actual value used * Do not leak qdict in error case * Use PRI{i,u}64 macros Discussion for v1: https://lore.kernel.org/qemu-devel/20240308155158.830258-1-f.eb...@proxmox.com/ - Changes in v2: * Use 'size' type in QAPI. * Remove option in cbw_parse_options(), i.e. before parsing generic blockdev options. * Reword commit messages hoping to describe the issue in a more straight-forward way. In the context of backup fleecing, discarding the source will not work when the fleecing image has a larger granularity than the one used for block-copy operations (can happen if the backup target has smaller cluster size), because cbw_co_pdiscard_snapshot() will align down the discard requests and thus effectively ignore then. To make @discard-source work in such a scenario, allow specifying the minimum cluster size used for block-copy operations and thus in particular also the granularity for discard requests to the source. Fiona Ebner (2): copy-before-write: allow specifying minimum cluster size backup: add minimum cluster size to performance options block/backup.c | 2 +- block/block-copy.c | 36 ++-- block/copy-before-write.c | 14 +- block/copy-before-write.h | 1 + blockdev.c | 3 +++ include/block/block-copy.h | 1 + qapi/block-core.json | 17 ++--- 7 files changed, 59 insertions(+), 15 deletions(-) Thanks, applied to my block branch. -- Best regards, Vladimir
Re: [PATCH v3 2/2] backup: add minimum cluster size to performance options
On 11.07.24 15:09, Fiona Ebner wrote: In the context of backup fleecing, discarding the source will not work when the fleecing image has a larger granularity than the one used for block-copy operations (can happen if the backup target has smaller cluster size), because cbw_co_pdiscard_snapshot() will align down the discard requests and thus effectively ignore then. To make @discard-source work in such a scenario, allow specifying the minimum cluster size used for block-copy operations and thus in particular also the granularity for discard requests to the source. Suggested-by: Vladimir Sementsov-Ogievskiy Acked-by: Markus Armbruster (QAPI schema) Signed-off-by: Fiona Ebner Reviewed-by: Vladimir Sementsov-Ogievskiy -- Best regards, Vladimir
Re: [PATCH v3 1/2] copy-before-write: allow specifying minimum cluster size
On 11.07.24 15:09, Fiona Ebner wrote: In the context of backup fleecing, discarding the source will not work when the fleecing image has a larger granularity than the one used for block-copy operations (can happen if the backup target has smaller cluster size), because cbw_co_pdiscard_snapshot() will align down the discard requests and thus effectively ignore then. To make @discard-source work in such a scenario, allow specifying the minimum cluster size used for block-copy operations and thus in particular also the granularity for discard requests to the source. The type 'size' (corresponding to uint64_t in C) is used in QAPI to rule out negative inputs and for consistency with already existing @cluster-size parameters. Since block_copy_calculate_cluster_size() uses int64_t for its result, a check that the input is not too large is added in block_copy_state_new() before calling it. The calculation in block_copy_calculate_cluster_size() is done in the target int64_t type. Suggested-by: Vladimir Sementsov-Ogievskiy Acked-by: Markus Armbruster (QAPI schema) Signed-off-by: Fiona Ebner Reviewed-by: Vladimir Sementsov-Ogievskiy -- Best regards, Vladimir
Re: [PATCH] block/copy-before-write: wait for conflicts when read locking to avoid assertion failure
On 11.07.24 16:36, Fiona Ebner wrote: There is no protection against two callers of cbw_snapshot_read_lock() calling reqlist_init_req() with overlapping ranges, and reqlist_init_req() asserts that there are no conflicting requests. In particular, two cbw_co_snapshot_block_status() callers can race, with the second calling reqlist_init_req() before the first one finishes and removes its conflicting request, leading to an assertion failure. Reproducer script [0] and backtrace [1] are attached below. Understand. But seems in case of CBW read-lock, nothing bad in intersecting read requests? reqlist is shared with backup, where we care to avoid intersecting requests in the list. What about just move the assertion to block_copy_task_create() ? And add comment somewhere that we support intersecting reads in frozen_read_reqs. [0]: #!/bin/bash -e dd if=/dev/urandom of=/tmp/disk.raw bs=1M count=1024 ./qemu-img create /tmp/fleecing.raw -f raw 1G ( ./qemu-system-x86_64 --qmp stdio \ --blockdev raw,node-name=node0,file.driver=file,file.filename=/tmp/disk.raw \ --blockdev raw,node-name=node1,file.driver=file,file.filename=/tmp/fleecing.raw \ < [1]: #5 0x71e5f0088eb2 in __GI___assert_fail (...) at ./assert/assert.c:101 #6 0x615285438017 in reqlist_init_req (...) at ../block/reqlist.c:23 #7 0x6152853e2d98 in cbw_snapshot_read_lock (...) at ../block/copy-before-write.c:237 #8 0x6152853e3068 in cbw_co_snapshot_block_status (...) at ../block/copy-before-write.c:304 #9 0x6152853f4d22 in bdrv_co_snapshot_block_status (...) at ../block/io.c:3726 #10 0x61528543a63e in snapshot_access_co_block_status (...) at ../block/snapshot-access.c:48 #11 0x6152853f1a0a in bdrv_co_do_block_status (...) at ../block/io.c:2474 #12 0x6152853f2016 in bdrv_co_common_block_status_above (...) at ../block/io.c:2652 #13 0x6152853f22cf in bdrv_co_block_status_above (...) at ../block/io.c:2732 #14 0x6152853d9a86 in blk_co_block_status_above (...) at ../block/block-backend.c:1473 #15 0x61528538da6c in blockstatus_to_extents (...) at ../nbd/server.c:2374 #16 0x61528538deb1 in nbd_co_send_block_status (...) at ../nbd/server.c:2481 #17 0x61528538f424 in nbd_handle_request (...) at ../nbd/server.c:2978 #18 0x61528538f906 in nbd_trip (...) at ../nbd/server.c:3121 #19 0x6152855a7caf in coroutine_trampoline (...) at ../util/coroutine-ucontext.c:175 Signed-off-by: Fiona Ebner --- block/copy-before-write.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/copy-before-write.c b/block/copy-before-write.c index 853e01a1eb..376ff3f3e1 100644 --- a/block/copy-before-write.c +++ b/block/copy-before-write.c @@ -234,6 +234,7 @@ cbw_snapshot_read_lock(BlockDriverState *bs, int64_t offset, int64_t bytes, *req = (BlockReq) {.offset = -1, .bytes = -1}; *file = s->target; } else { +reqlist_wait_all(&s->frozen_read_reqs, offset, bytes, &s->lock); reqlist_init_req(&s->frozen_read_reqs, req, offset, bytes); *file = bs->file; } -- Best regards, Vladimir
Re: [PATCH 1/2] block: zero data data corruption using prealloc-filter
On 11.07.24 16:32, Andrey Drobyshev wrote: From: "Denis V. Lunev" We have observed that some clusters in the QCOW2 files are zeroed while preallocation filter is used. We are able to trace down the following sequence when prealloc-filter is used: co=0x55e7cbed7680 qcow2_co_pwritev_task() co=0x55e7cbed7680 preallocate_co_pwritev_part() co=0x55e7cbed7680 handle_write() co=0x55e7cbed7680 bdrv_co_do_pwrite_zeroes() co=0x55e7cbed7680 raw_do_pwrite_zeroes() co=0x7f9edb7fe500 do_fallocate() Here coroutine 0x55e7cbed7680 is being blocked waiting while coroutine 0x7f9edb7fe500 will finish with fallocate of the file area. OK. It is time to handle next coroutine, which co=0x55e7cbee91b0 qcow2_co_pwritev_task() co=0x55e7cbee91b0 preallocate_co_pwritev_part() co=0x55e7cbee91b0 handle_write() co=0x55e7cbee91b0 bdrv_co_do_pwrite_zeroes() co=0x55e7cbee91b0 raw_do_pwrite_zeroes() co=0x7f9edb7deb00 do_fallocate() The trouble comes here. Coroutine 0x55e7cbed7680 has not advanced file_end yet and coroutine 0x55e7cbee91b0 will start fallocate() for the same area. This means that if (once fallocate is started inside 0x7f9edb7deb00) original fallocate could end and the real write will be executed. In that case write() request is handled at the same time as fallocate(). Normally we should protect s->file_end while it is detected that preallocation is need. The patch introduces file_end_lock for it to be protected when run in the coroutine context. Note: the lock is taken only once it is detected that the preallocation is really required. This is not a frequent case due to the preallocation nature thus the patch should not have performance impact. Originally-by: Denis V. Lunev Co-authored-by: Andrey Drobyshev Signed-off-by: Denis V. Lunev Signed-off-by: Andrey Drobyshev --- block/preallocate.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/block/preallocate.c b/block/preallocate.c index d215bc5d6d..9cb2c97635 100644 --- a/block/preallocate.c +++ b/block/preallocate.c @@ -78,6 +78,8 @@ typedef struct BDRVPreallocateState { /* Gives up the resize permission on children when parents don't need it */ QEMUBH *drop_resize_bh; + +CoMutex file_end_lock; } BDRVPreallocateState; static int preallocate_drop_resize(BlockDriverState *bs, Error **errp); @@ -170,6 +172,8 @@ static int preallocate_open(BlockDriverState *bs, QDict *options, int flags, ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & bs->file->bs->supported_zero_flags); +qemu_co_mutex_init(&s->file_end_lock); + return 0; } @@ -342,6 +346,7 @@ handle_write(BlockDriverState *bs, int64_t offset, int64_t bytes, return false; } +QEMU_LOCK_GUARD(&s->file_end_lock); if (s->file_end < 0) { s->file_end = s->data_end; } @@ -353,6 +358,8 @@ handle_write(BlockDriverState *bs, int64_t offset, int64_t bytes, /* We have valid s->data_end, and request writes beyond it. */ +QEMU_LOCK_GUARD(&s->file_end_lock); + s->data_end = end; if (s->zero_start < 0 || !want_merge_zero) { s->zero_start = end; @@ -428,6 +435,8 @@ preallocate_co_truncate(BlockDriverState *bs, int64_t offset, BDRVPreallocateState *s = bs->opaque; int ret; +QEMU_LOCK_GUARD(&s->file_end_lock); + if (s->data_end >= 0 && offset > s->data_end) { if (s->file_end < 0) { s->file_end = bdrv_co_getlength(bs->file->bs); @@ -501,6 +510,8 @@ preallocate_co_getlength(BlockDriverState *bs) return s->data_end; } +QEMU_LOCK_GUARD(&s->file_end_lock); + ret = bdrv_co_getlength(bs->file->bs); if (has_prealloc_perms(bs)) { Hmm, seems preallocate driver not thread-safe neither coroutine-safe. I think co-mutex is good idea. Still, protecting only s->file_end may be not enough - we do want to keep data_end / zero_start / file_end variables correct - probably, just make the whole preallocation code a critical section? Maybe, atomic read of variables for fast-path (for writes which doesn't need preallocation) -- Best regards, Vladimir
Re: [PATCH v2 1/2] block: zero data data corruption using prealloc-filter
On 12.07.24 12:46, Andrey Drobyshev wrote: From: "Denis V. Lunev" We have observed that some clusters in the QCOW2 files are zeroed while preallocation filter is used. We are able to trace down the following sequence when prealloc-filter is used: co=0x55e7cbed7680 qcow2_co_pwritev_task() co=0x55e7cbed7680 preallocate_co_pwritev_part() co=0x55e7cbed7680 handle_write() co=0x55e7cbed7680 bdrv_co_do_pwrite_zeroes() co=0x55e7cbed7680 raw_do_pwrite_zeroes() co=0x7f9edb7fe500 do_fallocate() Here coroutine 0x55e7cbed7680 is being blocked waiting while coroutine 0x7f9edb7fe500 will finish with fallocate of the file area. OK. It is time to handle next coroutine, which co=0x55e7cbee91b0 qcow2_co_pwritev_task() co=0x55e7cbee91b0 preallocate_co_pwritev_part() co=0x55e7cbee91b0 handle_write() co=0x55e7cbee91b0 bdrv_co_do_pwrite_zeroes() co=0x55e7cbee91b0 raw_do_pwrite_zeroes() co=0x7f9edb7deb00 do_fallocate() The trouble comes here. Coroutine 0x55e7cbed7680 has not advanced file_end yet and coroutine 0x55e7cbee91b0 will start fallocate() for the same area. This means that if (once fallocate is started inside 0x7f9edb7deb00) original fallocate could end and the real write will be executed. In that case write() request is handled at the same time as fallocate(). The patch moves s->file_lock assignment before fallocate and that is text need to be updated crucial. The idea is that all subsequent requests into the area being preallocation will be issued as just writes without fallocate to this area and they will not proceed thanks to overlapping requests mechanics. If preallocation will fail, we will just switch to the normal expand-by-write behavior and that is not a problem except performance. Signed-off-by: Denis V. Lunev Tested-by: Andrey Drobyshev --- block/preallocate.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/block/preallocate.c b/block/preallocate.c index d215bc5d6d..ecf0aa4baa 100644 --- a/block/preallocate.c +++ b/block/preallocate.c @@ -383,6 +383,13 @@ handle_write(BlockDriverState *bs, int64_t offset, int64_t bytes, want_merge_zero = want_merge_zero && (prealloc_start <= offset); +/* + * Assign file_end before making actual preallocation. This will ensure + * that next request performed while preallocation is in progress will + * be passed without preallocation. + */ +s->file_end = prealloc_end; + ret = bdrv_co_pwrite_zeroes( bs->file, prealloc_start, prealloc_end - prealloc_start, BDRV_REQ_NO_FALLBACK | BDRV_REQ_SERIALISING | BDRV_REQ_NO_WAIT); @@ -391,7 +398,6 @@ handle_write(BlockDriverState *bs, int64_t offset, int64_t bytes, return false; } -s->file_end = prealloc_end; return want_merge_zero; } Hmm. But this way we set both s->file_end and s->zero_start prior to actual write_zero operation. This means that next write-zero operation may go fast-path (see preallocate_co_pwrite_zeroes()) and return success, even before actual finish of preallocation write_zeroes operation (which may also fail). Seems we need to update logic around s->zero_start too. -- Best regards, Vladimir
Re: [PATCH v5 0/3] vhost-user-blk: live resize additional APIs
ping. Markus, Eric, could someone give an ACC for QAPI part? On 25.06.24 15:18, Vladimir Sementsov-Ogievskiy wrote: v5: 03: drop extra check on is is runstate running Vladimir Sementsov-Ogievskiy (3): qdev-monitor: add option to report GenericError from find_device_state vhost-user-blk: split vhost_user_blk_sync_config() qapi: introduce device-sync-config hw/block/vhost-user-blk.c | 27 ++-- hw/virtio/virtio-pci.c| 9 +++ include/hw/qdev-core.h| 3 +++ qapi/qdev.json| 24 ++ system/qdev-monitor.c | 53 --- 5 files changed, 105 insertions(+), 11 deletions(-) -- Best regards, Vladimir
Re: [PATCH] block/curl: rewrite http header parsing function
On 01.07.24 09:55, Michael Tokarev wrote: 01.07.2024 09:54, Vladimir Sementsov-Ogievskiy wrote: + const char *t = "accept-ranges : bytes "; /* A lowercase template */ Note: you make parser less strict: you allow "bytes" to be uppercase (was allowed only for accept-ranges", and you allow whitespaces before colon. Yes, exactly. I should add this to the description (wanted to do that but forgot). I'll update the patch (without re-sending) - hopefully its' okay to keep your S-o-b :) Of course! -- Best regards, Vladimir
Re: [PATCH] block/curl: rewrite http header parsing function
On 29.06.24 17:25, Michael Tokarev wrote: Existing code was long, unclear and twisty. Signed-off-by: Michael Tokarev Reviewed-by: Vladimir Sementsov-Ogievskiy --- block/curl.c | 44 ++-- 1 file changed, 18 insertions(+), 26 deletions(-) diff --git a/block/curl.c b/block/curl.c index 419f7c89ef..9802d0319d 100644 --- a/block/curl.c +++ b/block/curl.c @@ -210,37 +210,29 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque) { BDRVCURLState *s = opaque; size_t realsize = size * nmemb; -const char *header = (char *)ptr; -const char *end = header + realsize; -const char *accept_ranges = "accept-ranges:"; -const char *bytes = "bytes"; +const char *p = ptr; +const char *end = p + realsize; +const char *t = "accept-ranges : bytes "; /* A lowercase template */ Note: you make parser less strict: you allow "bytes" to be uppercase (was allowed only for accept-ranges", and you allow whitespaces before colon. -if (realsize >= strlen(accept_ranges) -&& g_ascii_strncasecmp(header, accept_ranges, - strlen(accept_ranges)) == 0) { - -char *p = strchr(header, ':') + 1; - -/* Skip whitespace between the header name and value. */ -while (p < end && *p && g_ascii_isspace(*p)) { -p++; -} - -if (end - p >= strlen(bytes) -&& strncmp(p, bytes, strlen(bytes)) == 0) { - -/* Check that there is nothing but whitespace after the value. */ -p += strlen(bytes); -while (p < end && *p && g_ascii_isspace(*p)) { -p++; -} - -if (p == end || !*p) { -s->accept_range = true; +/* check if header matches the "t" template */ +for (;;) { +if (*t == ' ') { /* space in t matches any amount of isspace in p */ +if (p < end && g_ascii_isspace(*p)) { +++p; +} else { +++t; } +} else if (*t && p < end && *t == g_ascii_tolower(*p)) { +++p, ++t; +} else { +break; } } +if (!*t && p == end) { /* if we managed to reach ends of both strings */ +s->accept_range = true; +} + return realsize; } -- Best regards, Vladimir
Re: [PATCH] block/curl: use strlen instead of strchr
On 01.07.24 09:34, Vladimir Sementsov-Ogievskiy wrote: On 29.06.24 09:20, Michael Tokarev wrote: On 6/28/24 08:49, Vladimir Sementsov-Ogievskiy wrote: We already know where colon is, so no reason to search for it. Also, avoid a code, which looks like we forget to check return value of strchr() to NULL. Suggested-by: Kevin Wolf Signed-off-by: Vladimir Sementsov-Ogievskiy --- This replaces my patch [PATCH] block/curl: explicitly assert that strchr returns non-NULL value Supersedes: <20240627153059.589070-1-vsement...@yandex-team.ru> block/curl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/curl.c b/block/curl.c index 419f7c89ef..d03bfe4817 100644 --- a/block/curl.c +++ b/block/curl.c @@ -219,7 +219,7 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque) && g_ascii_strncasecmp(header, accept_ranges, strlen(accept_ranges)) == 0) { - char *p = strchr(header, ':') + 1; + char *p = header + strlen(accept_ranges); /* Skip whitespace between the header name and value. */ while (p < end && *p && g_ascii_isspace(*p)) { Heck. All these strlen()s look ugly, especially in the loop iterations.. I expect that strlen of string constant is calculated in compilation time. My aim was to fix Coverity complain (I don't see this complain in public qemu coverity project, that's why I don't specify CID in commit message), not to rewrite the whole function. So I'd prefer Kevein's suggesting which is both minimal and makes the code obviously correct.. The only simpler thing is to mark the problem false-positive in Coverity. Upd: I missed that you sent a patch, this changes things. Of course, you code looks nicer than old one. -- Best regards, Vladimir
Re: [PATCH] block/curl: use strlen instead of strchr
On 29.06.24 09:20, Michael Tokarev wrote: On 6/28/24 08:49, Vladimir Sementsov-Ogievskiy wrote: We already know where colon is, so no reason to search for it. Also, avoid a code, which looks like we forget to check return value of strchr() to NULL. Suggested-by: Kevin Wolf Signed-off-by: Vladimir Sementsov-Ogievskiy --- This replaces my patch [PATCH] block/curl: explicitly assert that strchr returns non-NULL value Supersedes: <20240627153059.589070-1-vsement...@yandex-team.ru> block/curl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/curl.c b/block/curl.c index 419f7c89ef..d03bfe4817 100644 --- a/block/curl.c +++ b/block/curl.c @@ -219,7 +219,7 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque) && g_ascii_strncasecmp(header, accept_ranges, strlen(accept_ranges)) == 0) { - char *p = strchr(header, ':') + 1; + char *p = header + strlen(accept_ranges); /* Skip whitespace between the header name and value. */ while (p < end && *p && g_ascii_isspace(*p)) { Heck. All these strlen()s look ugly, especially in the loop iterations.. I expect that strlen of string constant is calculated in compilation time. My aim was to fix Coverity complain (I don't see this complain in public qemu coverity project, that's why I don't specify CID in commit message), not to rewrite the whole function. So I'd prefer Kevein's suggesting which is both minimal and makes the code obviously correct.. The only simpler thing is to mark the problem false-positive in Coverity. -- Best regards, Vladimir
[PATCH] block/curl: use strlen instead of strchr
We already know where colon is, so no reason to search for it. Also, avoid a code, which looks like we forget to check return value of strchr() to NULL. Suggested-by: Kevin Wolf Signed-off-by: Vladimir Sementsov-Ogievskiy --- This replaces my patch [PATCH] block/curl: explicitly assert that strchr returns non-NULL value Supersedes: <20240627153059.589070-1-vsement...@yandex-team.ru> block/curl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/curl.c b/block/curl.c index 419f7c89ef..d03bfe4817 100644 --- a/block/curl.c +++ b/block/curl.c @@ -219,7 +219,7 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque) && g_ascii_strncasecmp(header, accept_ranges, strlen(accept_ranges)) == 0) { -char *p = strchr(header, ':') + 1; +char *p = header + strlen(accept_ranges); /* Skip whitespace between the header name and value. */ while (p < end && *p && g_ascii_isspace(*p)) { -- 2.34.1
Re: [PATCH] block/curl: explicitly assert that strchr returns non-NULL value
On 27.06.24 21:05, Kevin Wolf wrote: Am 27.06.2024 um 17:30 hat Vladimir Sementsov-Ogievskiy geschrieben: strchr may return NULL if colon is not found. It seems clearer to assert explicitly that we don't expect it here, than dereference 1 in the next line. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/curl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/curl.c b/block/curl.c index 419f7c89ef..ccfffd6c12 100644 --- a/block/curl.c +++ b/block/curl.c @@ -219,7 +219,9 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque) && g_ascii_strncasecmp(header, accept_ranges, strlen(accept_ranges)) == 0) { -char *p = strchr(header, ':') + 1; +char *p = strchr(header, ':'); +assert(p != NULL); +p += 1; I'm not sure if this is actually much clearer because it doesn't say why we don't expect NULL here. If you don't look at the context, it almost looks like an assert() where proper error handling is needed. If you do, then the original line is clear enough. My first thought was that maybe what we want is a comment, but we actually already know where the colon is. So how about this instead: char *p = header + strlen(accept_ranges); Oh, right. That's better. /* Skip whitespace between the header name and value. */ while (p < end && *p && g_ascii_isspace(*p)) { -- 2.34.1 -- Best regards, Vladimir
[PATCH] block/curl: explicitly assert that strchr returns non-NULL value
strchr may return NULL if colon is not found. It seems clearer to assert explicitly that we don't expect it here, than dereference 1 in the next line. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/curl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/curl.c b/block/curl.c index 419f7c89ef..ccfffd6c12 100644 --- a/block/curl.c +++ b/block/curl.c @@ -219,7 +219,9 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque) && g_ascii_strncasecmp(header, accept_ranges, strlen(accept_ranges)) == 0) { -char *p = strchr(header, ':') + 1; +char *p = strchr(header, ':'); +assert(p != NULL); +p += 1; /* Skip whitespace between the header name and value. */ while (p < end && *p && g_ascii_isspace(*p)) { -- 2.34.1
[PATCH v2 0/3] block-jobs: add final flush
Actually block job is not completed without the final flush. It's rather unexpected to have broken target when job was successfully completed long ago and now we fail to flush or process just crashed/killed. Mirror job already has mirror_flush() for this. So, it's OK. Add similar things for other jobs: backup, stream, commit. v2: rework stream and commit, also split into 3 commits. Vladimir Sementsov-Ogievskiy (3): block/commit: implement final flush block/stream: implement final flush block/backup: implement final flush block/backup.c | 2 +- block/block-copy.c | 7 block/commit.c | 41 +++ block/stream.c | 67 +++--- include/block/block-copy.h | 1 + 5 files changed, 77 insertions(+), 41 deletions(-) -- 2.34.1
[PATCH v2 3/3] block/backup: implement final flush
Actually block job is not completed without the final flush. It's rather unexpected to have broken target when job was successfully completed long ago and now we fail to flush or process just crashed/killed. Mirror job already has mirror_flush() for this. So, it's OK. Do this for backup job too. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/backup.c | 2 +- block/block-copy.c | 7 +++ include/block/block-copy.h | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/block/backup.c b/block/backup.c index 3dd2e229d2..fee78ba5ad 100644 --- a/block/backup.c +++ b/block/backup.c @@ -156,7 +156,7 @@ static int coroutine_fn backup_loop(BackupBlockJob *job) job->bg_bcs_call = s = block_copy_async(job->bcs, 0, QEMU_ALIGN_UP(job->len, job->cluster_size), job->perf.max_workers, job->perf.max_chunk, -backup_block_copy_callback, job); +true, backup_block_copy_callback, job); while (!block_copy_call_finished(s) && !job_is_cancelled(&job->common.job)) diff --git a/block/block-copy.c b/block/block-copy.c index 7e3b378528..842b0383db 100644 --- a/block/block-copy.c +++ b/block/block-copy.c @@ -54,6 +54,7 @@ typedef struct BlockCopyCallState { int max_workers; int64_t max_chunk; bool ignore_ratelimit; +bool need_final_flush; BlockCopyAsyncCallbackFunc cb; void *cb_opaque; /* Coroutine where async block-copy is running */ @@ -899,6 +900,10 @@ block_copy_common(BlockCopyCallState *call_state) */ } while (ret > 0 && !qatomic_read(&call_state->cancelled)); +if (ret == 0 && call_state->need_final_flush) { +ret = bdrv_co_flush(s->target->bs); +} + qatomic_store_release(&call_state->finished, true); if (call_state->cb) { @@ -954,6 +959,7 @@ int coroutine_fn block_copy(BlockCopyState *s, int64_t start, int64_t bytes, BlockCopyCallState *block_copy_async(BlockCopyState *s, int64_t offset, int64_t bytes, int max_workers, int64_t max_chunk, + bool need_final_flush, BlockCopyAsyncCallbackFunc cb, void *cb_opaque) { @@ -965,6 +971,7 @@ BlockCopyCallState *block_copy_async(BlockCopyState *s, .bytes = bytes, .max_workers = max_workers, .max_chunk = max_chunk, +.need_final_flush = need_final_flush, .cb = cb, .cb_opaque = cb_opaque, diff --git a/include/block/block-copy.h b/include/block/block-copy.h index bdc703bacd..6588ebaf77 100644 --- a/include/block/block-copy.h +++ b/include/block/block-copy.h @@ -62,6 +62,7 @@ int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes, BlockCopyCallState *block_copy_async(BlockCopyState *s, int64_t offset, int64_t bytes, int max_workers, int64_t max_chunk, + bool need_final_flush, BlockCopyAsyncCallbackFunc cb, void *cb_opaque); -- 2.34.1
[PATCH v2 2/3] block/stream: implement final flush
Actually block job is not completed without the final flush. It's rather unexpected to have broken target when job was successfully completed long ago and now we fail to flush or process just crashed/killed. Mirror job already has mirror_flush() for this. So, it's OK. Do this for stream job too. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/stream.c | 67 ++ 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/block/stream.c b/block/stream.c index 7031eef12b..893db258d4 100644 --- a/block/stream.c +++ b/block/stream.c @@ -160,6 +160,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp) int64_t offset = 0; int error = 0; int64_t n = 0; /* bytes */ +bool need_final_flush = true; WITH_GRAPH_RDLOCK_GUARD() { unfiltered_bs = bdrv_skip_filters(s->target_bs); @@ -175,10 +176,13 @@ static int coroutine_fn stream_run(Job *job, Error **errp) } job_progress_set_remaining(&s->common.job, len); -for ( ; offset < len; offset += n) { -bool copy; +for ( ; offset < len || need_final_flush; offset += n) { +bool copy = false; +bool error_is_read = true; int ret; +n = 0; + /* Note that even when no rate limit is applied we need to yield * with no pending I/O here so that bdrv_drain_all() returns. */ @@ -187,35 +191,46 @@ static int coroutine_fn stream_run(Job *job, Error **errp) break; } -copy = false; - -WITH_GRAPH_RDLOCK_GUARD() { -ret = bdrv_co_is_allocated(unfiltered_bs, offset, STREAM_CHUNK, &n); -if (ret == 1) { -/* Allocated in the top, no need to copy. */ -} else if (ret >= 0) { -/* - * Copy if allocated in the intermediate images. Limit to the - * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE). - */ -ret = bdrv_co_is_allocated_above(bdrv_cow_bs(unfiltered_bs), - s->base_overlay, true, - offset, n, &n); -/* Finish early if end of backing file has been reached */ -if (ret == 0 && n == 0) { -n = len - offset; +if (offset < len) { +WITH_GRAPH_RDLOCK_GUARD() { +ret = bdrv_co_is_allocated(unfiltered_bs, offset, STREAM_CHUNK, + &n); +if (ret == 1) { +/* Allocated in the top, no need to copy. */ +} else if (ret >= 0) { +/* + * Copy if allocated in the intermediate images. Limit to + * the known-unallocated area + * [offset, offset+n*BDRV_SECTOR_SIZE). + */ +ret = bdrv_co_is_allocated_above(bdrv_cow_bs(unfiltered_bs), + s->base_overlay, true, + offset, n, &n); +/* Finish early if end of backing file has been reached */ +if (ret == 0 && n == 0) { +n = len - offset; +} + +copy = (ret > 0); } - -copy = (ret > 0); } -} -trace_stream_one_iteration(s, offset, n, ret); -if (copy) { -ret = stream_populate(s->blk, offset, n); +trace_stream_one_iteration(s, offset, n, ret); +if (copy) { +ret = stream_populate(s->blk, offset, n); +} +} else { +assert(need_final_flush); +ret = blk_co_flush(s->blk); +if (ret < 0) { +error_is_read = false; +} else { +need_final_flush = false; +} } if (ret < 0) { BlockErrorAction action = -block_job_error_action(&s->common, s->on_error, true, -ret); +block_job_error_action(&s->common, s->on_error, + error_is_read, -ret); if (action == BLOCK_ERROR_ACTION_STOP) { n = 0; continue; -- 2.34.1
[PATCH v2 1/3] block/commit: implement final flush
Actually block job is not completed without the final flush. It's rather unexpected to have broken target when job was successfully completed long ago and now we fail to flush or process just crashed/killed. Mirror job already has mirror_flush() for this. So, it's OK. Do this for commit job too. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/commit.c | 41 +++-- 1 file changed, 27 insertions(+), 14 deletions(-) diff --git a/block/commit.c b/block/commit.c index 7c3fdcb0ca..81971692a2 100644 --- a/block/commit.c +++ b/block/commit.c @@ -134,6 +134,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp) int64_t n = 0; /* bytes */ QEMU_AUTO_VFREE void *buf = NULL; int64_t len, base_len; +bool need_final_flush = true; len = blk_co_getlength(s->top); if (len < 0) { @@ -155,8 +156,8 @@ static int coroutine_fn commit_run(Job *job, Error **errp) buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE); -for (offset = 0; offset < len; offset += n) { -bool copy; +for (offset = 0; offset < len || need_final_flush; offset += n) { +bool copy = false; bool error_in_source = true; /* Note that even when no rate limit is applied we need to yield @@ -166,22 +167,34 @@ static int coroutine_fn commit_run(Job *job, Error **errp) if (job_is_cancelled(&s->common.job)) { break; } -/* Copy if allocated above the base */ -ret = blk_co_is_allocated_above(s->top, s->base_overlay, true, -offset, COMMIT_BUFFER_SIZE, &n); -copy = (ret > 0); -trace_commit_one_iteration(s, offset, n, ret); -if (copy) { -assert(n < SIZE_MAX); -ret = blk_co_pread(s->top, offset, n, buf, 0); -if (ret >= 0) { -ret = blk_co_pwrite(s->base, offset, n, buf, 0); -if (ret < 0) { -error_in_source = false; +if (offset < len) { +/* Copy if allocated above the base */ +ret = blk_co_is_allocated_above(s->top, s->base_overlay, true, +offset, COMMIT_BUFFER_SIZE, &n); +copy = (ret > 0); +trace_commit_one_iteration(s, offset, n, ret); +if (copy) { +assert(n < SIZE_MAX); + +ret = blk_co_pread(s->top, offset, n, buf, 0); +if (ret >= 0) { +ret = blk_co_pwrite(s->base, offset, n, buf, 0); +if (ret < 0) { +error_in_source = false; +} } } +} else { +assert(need_final_flush); +ret = blk_co_flush(s->base); +if (ret < 0) { +error_in_source = false; +} else { +need_final_flush = false; +} } + if (ret < 0) { BlockErrorAction action = block_job_error_action(&s->common, s->on_error, -- 2.34.1
[PATCH v9 3/7] block: make bdrv_find_child() function public
To be reused soon for blockdev-replace functionality. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block.c | 13 + blockdev.c | 14 -- include/block/block_int-io.h | 2 ++ 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/block.c b/block.c index 468cf5e67d..f6292f459a 100644 --- a/block.c +++ b/block.c @@ -8174,6 +8174,19 @@ int bdrv_make_empty(BdrvChild *c, Error **errp) return 0; } +BdrvChild *bdrv_find_child(BlockDriverState *parent_bs, const char *child_name) +{ +BdrvChild *child; + +QLIST_FOREACH(child, &parent_bs->children, next) { +if (strcmp(child->name, child_name) == 0) { +return child; +} +} + +return NULL; +} + /* * Return the child that @bs acts as an overlay for, and from which data may be * copied in COW or COR operations. Usually this is the backing file. diff --git a/blockdev.c b/blockdev.c index 835064ed03..ba7e90b06e 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3452,20 +3452,6 @@ void qmp_blockdev_del(const char *node_name, Error **errp) bdrv_unref(bs); } -static BdrvChild * GRAPH_RDLOCK -bdrv_find_child(BlockDriverState *parent_bs, const char *child_name) -{ -BdrvChild *child; - -QLIST_FOREACH(child, &parent_bs->children, next) { -if (strcmp(child->name, child_name) == 0) { -return child; -} -} - -return NULL; -} - void qmp_x_blockdev_change(const char *parent, const char *child, const char *node, Error **errp) { diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h index 4a7cf2b4fd..11ed4aa927 100644 --- a/include/block/block_int-io.h +++ b/include/block/block_int-io.h @@ -130,6 +130,8 @@ bdrv_co_refresh_total_sectors(BlockDriverState *bs, int64_t hint); int co_wrapper_mixed_bdrv_rdlock bdrv_refresh_total_sectors(BlockDriverState *bs, int64_t hint); +BdrvChild * GRAPH_RDLOCK +bdrv_find_child(BlockDriverState *parent_bs, const char *child_name); BdrvChild * GRAPH_RDLOCK bdrv_cow_child(BlockDriverState *bs); BdrvChild * GRAPH_RDLOCK bdrv_filter_child(BlockDriverState *bs); BdrvChild * GRAPH_RDLOCK bdrv_filter_or_cow_child(BlockDriverState *bs); -- 2.34.1
[PATCH v9 6/7] iotests.py: introduce VM.assert_edges_list() method
Add an alternative method to check block graph, to be used in further commit. Signed-off-by: Vladimir Sementsov-Ogievskiy --- tests/qemu-iotests/iotests.py | 17 + 1 file changed, 17 insertions(+) diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index ea48af4a7b..8a5fd01eac 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -1108,6 +1108,23 @@ def check_bitmap_status(self, node_name, bitmap_name, fields): return fields.items() <= ret.items() +def get_block_graph(self): +""" +Returns block graph in form of edges list, where each edge is a tuple: + (parent_node_name, child_name, child_node_name) +""" +graph = self.qmp('x-debug-query-block-graph')['return'] + +nodes = {n['id']: n['name'] for n in graph['nodes']} +# Check that all names are unique: +assert len(set(nodes.values())) == len(nodes) + +return [(nodes[e['parent']], e['name'], nodes[e['child']]) +for e in graph['edges']] + +def assert_edges_list(self, edges): +assert sorted(edges) == sorted(self.get_block_graph()) + def assert_block_path(self, root, path, expected_node, graph=None): """ Check whether the node under the given path in the block graph -- 2.34.1
[PATCH v9 4/7] qapi: add blockdev-replace command
Add a command that can replace bs in following BdrvChild structures: - qdev blk root child - block-export blk root child - any child of BlockDriverState selected by child-name Signed-off-by: Vladimir Sementsov-Ogievskiy --- blockdev.c | 56 +++ qapi/block-core.json | 88 ++ stubs/blk-by-qdev-id.c | 13 +++ stubs/meson.build | 1 + 4 files changed, 158 insertions(+) create mode 100644 stubs/blk-by-qdev-id.c diff --git a/blockdev.c b/blockdev.c index ba7e90b06e..2190467022 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3559,6 +3559,62 @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread, bdrv_try_change_aio_context(bs, new_context, NULL, errp); } +void qmp_blockdev_replace(BlockdevReplace *repl, Error **errp) +{ +BdrvChild *child = NULL; +BlockDriverState *new_child_bs; + +if (repl->parent_type == BLOCK_PARENT_TYPE_DRIVER) { +BlockDriverState *parent_bs; + +parent_bs = bdrv_find_node(repl->u.driver.node_name); +if (!parent_bs) { +error_setg(errp, "Block driver node with node-name '%s' not " + "found", repl->u.driver.node_name); +return; +} + +child = bdrv_find_child(parent_bs, repl->u.driver.child); +if (!child) { +error_setg(errp, "Block driver node '%s' doesn't have child " + "named '%s'", repl->u.driver.node_name, + repl->u.driver.child); +return; +} +} else { +/* Other types are similar, they work through blk */ +BlockBackend *blk; +bool is_qdev = repl->parent_type == BLOCK_PARENT_TYPE_QDEV; +const char *id = +is_qdev ? repl->u.qdev.qdev_id : repl->u.export.export_id; + +assert(is_qdev || repl->parent_type == BLOCK_PARENT_TYPE_EXPORT); + +blk = is_qdev ? blk_by_qdev_id(id, errp) : blk_by_export_id(id, errp); +if (!blk) { +return; +} + +child = blk_root(blk); +if (!child) { +error_setg(errp, "%s '%s' is empty, nothing to replace", + is_qdev ? "Device" : "Export", id); +return; +} +} + +assert(child); +assert(child->bs); + +new_child_bs = bdrv_find_node(repl->new_child); +if (!new_child_bs) { +error_setg(errp, "Node '%s' not found", repl->new_child); +return; +} + +bdrv_replace_child_bs(child, new_child_bs, errp); +} + QemuOptsList qemu_common_drive_opts = { .name = "drive", .head = QTAILQ_HEAD_INITIALIZER(qemu_common_drive_opts.head), diff --git a/qapi/block-core.json b/qapi/block-core.json index df5e07debd..0a6f08a6e0 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -6148,3 +6148,91 @@ ## { 'struct': 'DummyBlockCoreForceArrays', 'data': { 'unused-block-graph-info': ['BlockGraphInfo'] } } + +## +# @BlockParentType: +# +# @qdev: block device, such as created by device_add, and denoted by +# qdev-id +# +# @driver: block driver node, such as created by blockdev-add, and +# denoted by node-name +# +# @export: block export, such created by block-export-add, and +# denoted by export-id +# +# Since 9.1 +## +{ 'enum': 'BlockParentType', + 'data': ['qdev', 'driver', 'export'] } + +## +# @BdrvChildRefQdev: +# +# @qdev-id: the device's ID or QOM path +# +# Since 9.1 +## +{ 'struct': 'BdrvChildRefQdev', + 'data': { 'qdev-id': 'str' } } + +## +# @BdrvChildRefExport: +# +# @export-id: block export identifier +# +# Since 9.1 +## +{ 'struct': 'BdrvChildRefExport', + 'data': { 'export-id': 'str' } } + +## +# @BdrvChildRefDriver: +# +# @node-name: the node name of the parent block node +# +# @child: name of the child to be replaced, like "file" or "backing" +# +# Since 9.1 +## +{ 'struct': 'BdrvChildRefDriver', + 'data': { 'node-name': 'str', 'child': 'str' } } + +## +# @BlockdevReplace: +# +# @parent-type: type of the parent, which child is to be replaced +# +# @new-child: new child for replacement +# +# Since 9.1 +## +{ 'union': 'BlockdevReplace', + 'base': { + 'parent-type': 'BlockParentType', + 'new-child': 'str' + }, + 'discriminator': 'parent-type', + 'data': { + 'qdev': 'BdrvChildRefQdev', + 'export': 'BdrvChildRefExport'
[PATCH v9 1/7] block-backend: blk_root(): drop const specifier on return type
We'll need get non-const child pointer for graph modifications in further commits. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/block-backend.c | 2 +- include/sysemu/block-backend-global-state.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index db6f9b92a3..71d5002198 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -2879,7 +2879,7 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in, bytes, read_flags, write_flags); } -const BdrvChild *blk_root(BlockBackend *blk) +BdrvChild *blk_root(BlockBackend *blk) { GLOBAL_STATE_CODE(); return blk->root; diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h index 49c12b0fa9..ccb35546a1 100644 --- a/include/sysemu/block-backend-global-state.h +++ b/include/sysemu/block-backend-global-state.h @@ -126,7 +126,7 @@ void blk_set_force_allow_inactivate(BlockBackend *blk); bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error **errp); void blk_unregister_buf(BlockBackend *blk, void *host, size_t size); -const BdrvChild *blk_root(BlockBackend *blk); +BdrvChild *blk_root(BlockBackend *blk); int blk_make_empty(BlockBackend *blk, Error **errp); -- 2.34.1
[PATCH v9 2/7] block/export: add blk_by_export_id()
We need it for further blockdev-replace functionality. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block/export/export.c | 18 ++ include/sysemu/block-backend-global-state.h | 1 + 2 files changed, 19 insertions(+) diff --git a/block/export/export.c b/block/export/export.c index 6d51ae8ed7..57beae7982 100644 --- a/block/export/export.c +++ b/block/export/export.c @@ -355,3 +355,21 @@ BlockExportInfoList *qmp_query_block_exports(Error **errp) return head; } + +BlockBackend *blk_by_export_id(const char *id, Error **errp) +{ +BlockExport *exp; + +exp = blk_exp_find(id); +if (exp == NULL) { +error_setg(errp, "Export '%s' not found", id); +return NULL; +} + +if (!exp->blk) { +error_setg(errp, "Export '%s' is empty", id); +return NULL; +} + +return exp->blk; +} diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h index ccb35546a1..410d0cc5c7 100644 --- a/include/sysemu/block-backend-global-state.h +++ b/include/sysemu/block-backend-global-state.h @@ -74,6 +74,7 @@ void blk_detach_dev(BlockBackend *blk, DeviceState *dev); DeviceState *blk_get_attached_dev(BlockBackend *blk); BlockBackend *blk_by_dev(void *dev); BlockBackend *blk_by_qdev_id(const char *id, Error **errp); +BlockBackend *blk_by_export_id(const char *id, Error **errp); void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque); void blk_activate(BlockBackend *blk, Error **errp); -- 2.34.1
[PATCH v9 7/7] iotests: add filter-insertion
Demonstrate new blockdev-replace API for filter insertion and removal. Signed-off-by: Vladimir Sementsov-Ogievskiy --- tests/qemu-iotests/tests/filter-insertion | 236 ++ tests/qemu-iotests/tests/filter-insertion.out | 5 + 2 files changed, 241 insertions(+) create mode 100755 tests/qemu-iotests/tests/filter-insertion create mode 100644 tests/qemu-iotests/tests/filter-insertion.out diff --git a/tests/qemu-iotests/tests/filter-insertion b/tests/qemu-iotests/tests/filter-insertion new file mode 100755 index 00..02a0978f96 --- /dev/null +++ b/tests/qemu-iotests/tests/filter-insertion @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +# +# Tests for inserting and removing filters in a block graph. +# +# Copyright (c) 2022 Virtuozzo International GmbH. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import os + +import iotests +from iotests import qemu_img_create, try_remove + + +disk = os.path.join(iotests.test_dir, 'disk') +sock = os.path.join(iotests.sock_dir, 'sock') +size = 1024 * 1024 + + +class TestFilterInsertion(iotests.QMPTestCase): +def setUp(self): +qemu_img_create('-f', iotests.imgfmt, disk, str(size)) + +self.vm = iotests.VM() +self.vm.launch() + +self.vm.cmd('blockdev-add', { +'node-name': 'disk0', +'driver': 'qcow2', +'file': { +'node-name': 'file0', +'driver': 'file', +'filename': disk +} +}) + +def tearDown(self): +self.vm.shutdown() +os.remove(disk) +try_remove(sock) + +def test_simple_insertion(self): +vm = self.vm + +vm.cmd('blockdev-add', { +'node-name': 'filter', +'driver': 'blkdebug', +'image': 'file0' +}) + +vm.cmd('blockdev-replace', { +'parent-type': 'driver', +'node-name': 'disk0', +'child': 'file', +'new-child': 'filter' +}) + +# Filter inserted: +# disk0 -file-> filter -file-> file0 +vm.assert_edges_list([ +('disk0', 'file', 'filter'), +('filter', 'image', 'file0') +]) + +vm.cmd('blockdev-replace', { +'parent-type': 'driver', +'node-name': 'disk0', +'child': 'file', +'new-child': 'file0' +}) + +# Filter replaced, but still exists: +# dik0 -file-> file0 <-file- filter +vm.assert_edges_list([ +('disk0', 'file', 'file0'), +('filter', 'image', 'file0') +]) + +vm.cmd('blockdev-del', node_name='filter') + +# Filter removed +# dik0 -file-> file0 +vm.assert_edges_list([ +('disk0', 'file', 'file0') +]) + +def test_insert_under_qdev(self): +vm = self.vm + +vm.cmd('device_add', driver='virtio-scsi') +vm.cmd('device_add', id='sda', driver='scsi-hd', + drive='disk0') + +vm.cmd('blockdev-add', { +'node-name': 'filter', +'driver': 'compress', +'file': 'disk0' +}) + +vm.cmd('blockdev-replace', { +'parent-type': 'qdev', +'qdev-id': 'sda', +'new-child': 'filter' +}) + +# Filter inserted: +# sda -root-> filter -file-> disk0 -file-> file0 +vm.assert_edges_list([ +# parent_node_name, child_name, child_node_name +('sda', 'root', 'filter'), +('fil
[PATCH v9 5/7] block: bdrv_get_xdbg_block_graph(): report export ids
Currently for block exports we report empty blk names. That's not good. Let's try to find corresponding block export and report its id. Signed-off-by: Vladimir Sementsov-Ogievskiy --- block.c | 4 block/export/export.c | 13 + include/block/export.h | 1 + stubs/blk-exp-find-by-blk.c | 9 + stubs/meson.build | 1 + 5 files changed, 28 insertions(+) create mode 100644 stubs/blk-exp-find-by-blk.c diff --git a/block.c b/block.c index f6292f459a..601475e835 100644 --- a/block.c +++ b/block.c @@ -6326,7 +6326,11 @@ XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp) for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) { char *allocated_name = NULL; const char *name = blk_name(blk); +BlockExport *exp = blk_exp_find_by_blk(blk); +if (!*name && exp) { +name = exp->id; +} if (!*name) { name = allocated_name = blk_get_attached_dev_id(blk); } diff --git a/block/export/export.c b/block/export/export.c index 57beae7982..8744a1171e 100644 --- a/block/export/export.c +++ b/block/export/export.c @@ -60,6 +60,19 @@ BlockExport *blk_exp_find(const char *id) return NULL; } +BlockExport *blk_exp_find_by_blk(BlockBackend *blk) +{ +BlockExport *exp; + +QLIST_FOREACH(exp, &block_exports, next) { +if (exp->blk == blk) { +return exp; +} +} + +return NULL; +} + static const BlockExportDriver *blk_exp_find_driver(BlockExportType type) { int i; diff --git a/include/block/export.h b/include/block/export.h index f2fe0f8078..16863d37cf 100644 --- a/include/block/export.h +++ b/include/block/export.h @@ -82,6 +82,7 @@ struct BlockExport { BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp); BlockExport *blk_exp_find(const char *id); +BlockExport *blk_exp_find_by_blk(BlockBackend *blk); void blk_exp_ref(BlockExport *exp); void blk_exp_unref(BlockExport *exp); void blk_exp_request_shutdown(BlockExport *exp); diff --git a/stubs/blk-exp-find-by-blk.c b/stubs/blk-exp-find-by-blk.c new file mode 100644 index 00..2fc1da953b --- /dev/null +++ b/stubs/blk-exp-find-by-blk.c @@ -0,0 +1,9 @@ +#include "qemu/osdep.h" +#include "sysemu/block-backend.h" +#include "block/export.h" + +BlockExport *blk_exp_find_by_blk(BlockBackend *blk) +{ +return NULL; +} + diff --git a/stubs/meson.build b/stubs/meson.build index 068998c1a5..cbe30f94e8 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -16,6 +16,7 @@ if have_block stub_ss.add(files('blk-commit-all.c')) stub_ss.add(files('blk-exp-close-all.c')) stub_ss.add(files('blk-by-qdev-id.c')) + stub_ss.add(files('blk-exp-find-by-blk.c')) stub_ss.add(files('blockdev-close-all-bdrv-states.c')) stub_ss.add(files('change-state-handler.c')) stub_ss.add(files('get-vm-name.c')) -- 2.34.1
[PATCH v9 0/7] blockdev-replace
Hi all! This series presents a new command blockdev-replace, which helps to insert/remove filters anywhere in the block graph. It can: - replace qdev block-node by qdev-id - replace export block-node by export-id - replace any child of parent block-node by node-name and child name So insertions is done in two steps: 1. blockdev_add (create filter node, unparented) [some parent] [new filter] | | V V [some child ] 2. blockdev-replace (replace child by the filter) [some parent] | V [new filter] | V [some child] And removal is done in reverse order: 1. blockdev-replace (go back to picture 1) 2. blockdev_del (remove filter node) Ideally, we to do both operations (add + replace or replace + del) in a transaction, but that would be another series. v9: rebase drop x- prefix and use unstable feature bump version to 9.1 in qapi spec update error message in blk_by_qdev_id stub v8: rebase. Also don't use "preallocate" filter in a test, as we don't support removal of this filter for now. Preallocate filter is really unusual, see discussion here: https://www.mail-archive.com/qemu-devel@nongnu.org/msg994945.html Vladimir Sementsov-Ogievskiy (7): block-backend: blk_root(): drop const specifier on return type block/export: add blk_by_export_id() block: make bdrv_find_child() function public qapi: add blockdev-replace command block: bdrv_get_xdbg_block_graph(): report export ids iotests.py: introduce VM.assert_edges_list() method iotests: add filter-insertion block.c | 17 ++ block/block-backend.c | 2 +- block/export/export.c | 31 +++ blockdev.c| 70 -- include/block/block_int-io.h | 2 + include/block/export.h| 1 + include/sysemu/block-backend-global-state.h | 3 +- qapi/block-core.json | 88 +++ stubs/blk-by-qdev-id.c| 13 + stubs/blk-exp-find-by-blk.c | 9 + stubs/meson.build | 2 + tests/qemu-iotests/iotests.py | 17 ++ tests/qemu-iotests/tests/filter-insertion | 236 ++ tests/qemu-iotests/tests/filter-insertion.out | 5 + 14 files changed, 480 insertions(+), 16 deletions(-) create mode 100644 stubs/blk-by-qdev-id.c create mode 100644 stubs/blk-exp-find-by-blk.c create mode 100755 tests/qemu-iotests/tests/filter-insertion create mode 100644 tests/qemu-iotests/tests/filter-insertion.out -- 2.34.1
Re: [PATCH v2] block-backend: per-device throttling of BLOCK_IO_ERROR reports
ping2 On 09.01.24 16:13, Vladimir Sementsov-Ogievskiy wrote: From: Leonid Kaplan BLOCK_IO_ERROR events comes from guest, so we must throttle them. We still want per-device throttling, so let's use device id as a key. Signed-off-by: Leonid Kaplan Signed-off-by: Vladimir Sementsov-Ogievskiy --- v2: add Note: to QAPI doc monitor/monitor.c| 10 ++ qapi/block-core.json | 2 ++ 2 files changed, 12 insertions(+) diff --git a/monitor/monitor.c b/monitor/monitor.c index 01ede1babd..ad0243e9d7 100644 --- a/monitor/monitor.c +++ b/monitor/monitor.c @@ -309,6 +309,7 @@ int error_printf_unless_qmp(const char *fmt, ...) static MonitorQAPIEventConf monitor_qapi_event_conf[QAPI_EVENT__MAX] = { /* Limit guest-triggerable events to 1 per second */ [QAPI_EVENT_RTC_CHANGE]= { 1000 * SCALE_MS }, +[QAPI_EVENT_BLOCK_IO_ERROR]= { 1000 * SCALE_MS }, [QAPI_EVENT_WATCHDOG] = { 1000 * SCALE_MS }, [QAPI_EVENT_BALLOON_CHANGE]= { 1000 * SCALE_MS }, [QAPI_EVENT_QUORUM_REPORT_BAD] = { 1000 * SCALE_MS }, @@ -498,6 +499,10 @@ static unsigned int qapi_event_throttle_hash(const void *key) hash += g_str_hash(qdict_get_str(evstate->data, "qom-path")); } +if (evstate->event == QAPI_EVENT_BLOCK_IO_ERROR) { +hash += g_str_hash(qdict_get_str(evstate->data, "device")); +} + return hash; } @@ -525,6 +530,11 @@ static gboolean qapi_event_throttle_equal(const void *a, const void *b) qdict_get_str(evb->data, "qom-path")); } +if (eva->event == QAPI_EVENT_BLOCK_IO_ERROR) { +return !strcmp(qdict_get_str(eva->data, "device"), + qdict_get_str(evb->data, "device")); +} + return TRUE; } diff --git a/qapi/block-core.json b/qapi/block-core.json index ca390c5700..32c2c2f030 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -5559,6 +5559,8 @@ # Note: If action is "stop", a STOP event will eventually follow the # BLOCK_IO_ERROR event # +# Note: This event is rate-limited. +# # Since: 0.13 # # Example: -- Best regards, Vladimir