[PATCH v7 2/3] vhost-user-blk: split vhost_user_blk_sync_config()

2024-11-06 Thread Vladimir Sementsov-Ogievskiy
Split vhost_user_blk_sync_config() out from
vhost_user_blk_handle_config_change(), to be reused in the following
commit.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Acked-by: Raphael Norwitz 
Reviewed-by: Stefano Garzarella 
---
 hw/block/vhost-user-blk.c | 26 +++---
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 5b7f46bbb0..48b3dabb8d 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -90,27 +90,39 @@ static void vhost_user_blk_set_config(VirtIODevice *vdev, 
const uint8_t *config)
 s->blkcfg.wce = blkcfg->wce;
 }
 
+static int vhost_user_blk_sync_config(DeviceState *dev, Error **errp)
+{
+int ret;
+VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+VHostUserBlk *s = VHOST_USER_BLK(vdev);
+
+ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
+   vdev->config_len, errp);
+if (ret < 0) {
+return ret;
+}
+
+memcpy(vdev->config, &s->blkcfg, vdev->config_len);
+virtio_notify_config(vdev);
+
+return 0;
+}
+
 static int vhost_user_blk_handle_config_change(struct vhost_dev *dev)
 {
 int ret;
-VirtIODevice *vdev = dev->vdev;
-VHostUserBlk *s = VHOST_USER_BLK(dev->vdev);
 Error *local_err = NULL;
 
 if (!dev->started) {
 return 0;
 }
 
-ret = vhost_dev_get_config(dev, (uint8_t *)&s->blkcfg,
-   vdev->config_len, &local_err);
+ret = vhost_user_blk_sync_config(DEVICE(dev->vdev), &local_err);
 if (ret < 0) {
 error_report_err(local_err);
 return ret;
 }
 
-memcpy(dev->vdev->config, &s->blkcfg, vdev->config_len);
-virtio_notify_config(dev->vdev);
-
 return 0;
 }
 
-- 
2.34.1




[PATCH v7 3/3] qapi: introduce device-sync-config

2024-11-06 Thread Vladimir Sementsov-Ogievskiy
Add command to sync config from vhost-user backend to the device. It
may be helpful when VHOST_USER_SLAVE_CONFIG_CHANGE_MSG failed or not
triggered interrupt to the guest or just not available (not supported
by vhost-user server).

Command result is racy if allow it during migration. Let's not allow
that.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Markus Armbruster 
Acked-by: Raphael Norwitz 
---
 hw/block/vhost-user-blk.c |  1 +
 hw/virtio/virtio-pci.c|  9 +
 include/hw/qdev-core.h|  6 ++
 qapi/qdev.json| 24 
 system/qdev-monitor.c | 38 ++
 5 files changed, 78 insertions(+)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 48b3dabb8d..7996e49821 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -591,6 +591,7 @@ static void vhost_user_blk_class_init(ObjectClass *klass, 
void *data)
 
 device_class_set_props(dc, vhost_user_blk_properties);
 dc->vmsd = &vmstate_vhost_user_blk;
+dc->sync_config = vhost_user_blk_sync_config;
 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
 vdc->realize = vhost_user_blk_device_realize;
 vdc->unrealize = vhost_user_blk_device_unrealize;
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 4d832fe845..c5a809b956 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -2385,6 +2385,14 @@ static void virtio_pci_dc_realize(DeviceState *qdev, 
Error **errp)
 vpciklass->parent_dc_realize(qdev, errp);
 }
 
+static int virtio_pci_sync_config(DeviceState *dev, Error **errp)
+{
+VirtIOPCIProxy *proxy = VIRTIO_PCI(dev);
+VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+return qdev_sync_config(DEVICE(vdev), errp);
+}
+
 static void virtio_pci_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
@@ -2401,6 +2409,7 @@ static void virtio_pci_class_init(ObjectClass *klass, 
void *data)
 device_class_set_parent_realize(dc, virtio_pci_dc_realize,
 &vpciklass->parent_dc_realize);
 rc->phases.hold = virtio_pci_bus_reset_hold;
+dc->sync_config = virtio_pci_sync_config;
 }
 
 static const TypeInfo virtio_pci_info = {
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index aa97c34a4b..94914858d8 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -95,6 +95,7 @@ typedef void (*DeviceUnrealize)(DeviceState *dev);
 typedef void (*DeviceReset)(DeviceState *dev);
 typedef void (*BusRealize)(BusState *bus, Error **errp);
 typedef void (*BusUnrealize)(BusState *bus);
+typedef int (*DeviceSyncConfig)(DeviceState *dev, Error **errp);
 
 /**
  * struct DeviceClass - The base class for all devices.
@@ -103,6 +104,9 @@ typedef void (*BusUnrealize)(BusState *bus);
  * property is changed to %true.
  * @unrealize: Callback function invoked when the #DeviceState:realized
  * property is changed to %false.
+ * @sync_config: Callback function invoked when QMP command device-sync-config
+ * is called. Should synchronize device configuration from host to guest part
+ * and notify the guest about the change.
  * @hotpluggable: indicates if #DeviceClass is hotpluggable, available
  * as readonly "hotpluggable" property of #DeviceState instance
  *
@@ -162,6 +166,7 @@ struct DeviceClass {
 DeviceReset legacy_reset;
 DeviceRealize realize;
 DeviceUnrealize unrealize;
+DeviceSyncConfig sync_config;
 
 /**
  * @vmsd: device state serialisation description for
@@ -547,6 +552,7 @@ bool qdev_hotplug_allowed(DeviceState *dev, Error **errp);
  */
 HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev);
 void qdev_unplug(DeviceState *dev, Error **errp);
+int qdev_sync_config(DeviceState *dev, Error **errp);
 void qdev_simple_device_unplug_cb(HotplugHandler *hotplug_dev,
   DeviceState *dev, Error **errp);
 void qdev_machine_creation_done(void);
diff --git a/qapi/qdev.json b/qapi/qdev.json
index 53d147c7b4..25cbcf977b 100644
--- a/qapi/qdev.json
+++ b/qapi/qdev.json
@@ -163,3 +163,27 @@
 ##
 { 'event': 'DEVICE_UNPLUG_GUEST_ERROR',
   'data': { '*device': 'str', 'path': 'str' } }
+
+##
+# @device-sync-config:
+#
+# Synchronize device configuration from host to guest part.  First,
+# copy the configuration from the host part (backend) to the guest
+# part (frontend).  Then notify guest software that device
+# configuration changed.
+#
+# The command may be used to notify the guest about block device
+# capcity change.  Currently only vhost-user-blk device supports
+# this.
+#
+# @id: the device's ID or QOM path
+#
+# Features:
+#
+# @unstable: The command is experimental.
+#
+# Since: 9.2
+##
+{ 'command': 'device-sync-config',
+  'features': [ 'unstable'

[PATCH v7 1/3] qdev-monitor: add option to report GenericError from find_device_state

2024-11-06 Thread Vladimir Sementsov-Ogievskiy
Here we just prepare for the following patch, making possible to report
GenericError as recommended.

This patch doesn't aim to prevent further use of DeviceNotFound by
future interfaces:

 - find_device_state() is used in blk_by_qdev_id() and qmp_get_blk()
   functions, which may lead to spread of DeviceNotFound anyway
 - also, nothing prevent simply copy-pasting find_device_state() calls
   with false argument

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Markus Armbruster 
Acked-by: Raphael Norwitz 
---
 system/qdev-monitor.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c
index 320c47b72d..2c76cef4d8 100644
--- a/system/qdev-monitor.c
+++ b/system/qdev-monitor.c
@@ -885,13 +885,20 @@ void qmp_device_add(QDict *qdict, QObject **ret_data, 
Error **errp)
 object_unref(OBJECT(dev));
 }
 
-static DeviceState *find_device_state(const char *id, Error **errp)
+/*
+ * Note that creating new APIs using error classes other than GenericError is
+ * not recommended. Set use_generic_error=true for new interfaces.
+ */
+static DeviceState *find_device_state(const char *id, bool use_generic_error,
+  Error **errp)
 {
 Object *obj = object_resolve_path_at(qdev_get_peripheral(), id);
 DeviceState *dev;
 
 if (!obj) {
-error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+error_set(errp,
+  (use_generic_error ?
+   ERROR_CLASS_GENERIC_ERROR : ERROR_CLASS_DEVICE_NOT_FOUND),
   "Device '%s' not found", id);
 return NULL;
 }
@@ -956,7 +963,7 @@ void qdev_unplug(DeviceState *dev, Error **errp)
 
 void qmp_device_del(const char *id, Error **errp)
 {
-DeviceState *dev = find_device_state(id, errp);
+DeviceState *dev = find_device_state(id, false, errp);
 if (dev != NULL) {
 if (dev->pending_deleted_event &&
 (dev->pending_deleted_expires_ms == 0 ||
@@ -1076,7 +1083,7 @@ BlockBackend *blk_by_qdev_id(const char *id, Error **errp)
 
 GLOBAL_STATE_CODE();
 
-dev = find_device_state(id, errp);
+dev = find_device_state(id, false, errp);
 if (dev == NULL) {
 return NULL;
 }
-- 
2.34.1




[PATCH v7 0/3] vhost-user-blk: live resize additional APIs

2024-11-06 Thread Vladimir Sementsov-Ogievskiy
v7: update QAPI version 9.1 -> 9.2

Vladimir Sementsov-Ogievskiy (3):
  qdev-monitor: add option to report GenericError from find_device_state
  vhost-user-blk: split vhost_user_blk_sync_config()
  qapi: introduce device-sync-config

 hw/block/vhost-user-blk.c | 27 ++--
 hw/virtio/virtio-pci.c|  9 +++
 include/hw/qdev-core.h|  6 +
 qapi/qdev.json| 24 ++
 system/qdev-monitor.c | 53 ---
 5 files changed, 108 insertions(+), 11 deletions(-)

-- 
2.34.1




Re: [PATCH v3 3/7] qapi: block-job-change: make copy-mode parameter optional

2024-10-29 Thread Vladimir Sementsov-Ogievskiy

On 22.10.24 15:35, Kevin Wolf wrote:

Am 02.10.2024 um 16:06 hat Vladimir Sementsov-Ogievskiy geschrieben:

We are going to add more parameters to change. We want to make possible
to change only one or any subset of available options. So all the
options should be optional.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Acked-by: Markus Armbruster 


This is different from blockdev-reopen, which requires repeating all
options (and can therefore reuse the type from blockdev-add). One of the
reasons why we made it like that is that we had some options for which
"option absent" was a meaningful value in itself, so it would have
become ambiguous if an absent option in blockdev-reopen should mean
"leave the existing value unchanged" or "unset the option".



Right.. Thanks for noting this. I'll try to apply blockdev-reopen approach here.


Are we confident that this will never happen with job options? In case
of doubt, I would go for consistency with blockdev-reopen.

Either way, it would be good to document the reasoning for whatever
option we choose in the commit message.

Kevin



--
Best regards,
Vladimir




Re: [PATCH v6 0/3] vhost-user-blk: live resize additional APIs

2024-10-22 Thread Vladimir Sementsov-Ogievskiy

ping)

On 20.09.24 12:49, Vladimir Sementsov-Ogievskiy wrote:

v6: tiny fix: add document comment for sync_config field to fix qdoc
generation. Also, add r-b and a-b marks.

Vladimir Sementsov-Ogievskiy (3):
   qdev-monitor: add option to report GenericError from find_device_state
   vhost-user-blk: split vhost_user_blk_sync_config()
   qapi: introduce device-sync-config

  hw/block/vhost-user-blk.c | 27 ++--
  hw/virtio/virtio-pci.c|  9 +++
  include/hw/qdev-core.h|  6 +
  qapi/qdev.json| 24 ++
  system/qdev-monitor.c | 53 ---
  5 files changed, 108 insertions(+), 11 deletions(-)



--
Best regards,
Vladimir




Re: [Bug Report][RFC PATCH 1/1] block: fix failing assert on paused VM migration

2024-10-10 Thread Vladimir Sementsov-Ogievskiy

On 10.10.24 17:30, Fabiano Rosas wrote:

Vladimir Sementsov-Ogievskiy  writes:


On 09.10.24 23:53, Fabiano Rosas wrote:

Vladimir Sementsov-Ogievskiy  writes:


On 30.09.24 17:07, Andrey Drobyshev wrote:

On 9/30/24 12:25 PM, Vladimir Sementsov-Ogievskiy wrote:

[add migration maintainers]

On 24.09.24 15:56, Andrey Drobyshev wrote:

[...]


I doubt that this a correct way to go.

As far as I understand, "inactive" actually means that "storage is not
belong to qemu, but to someone else (another qemu process for example),
and may be changed transparently". In turn this means that Qemu should
do nothing with inactive disks. So the problem is that nobody called
bdrv_activate_all on target, and we shouldn't ignore that.

Hmm, I see in process_incoming_migration_bh() we do call
bdrv_activate_all(), but only in some scenarios. May be, the condition
should be less strict here.

Why we need any condition here at all? Don't we want to activate
block-layer on target after migration anyway?



Hmm I'm not sure about the unconditional activation, since we at least
have to honor LATE_BLOCK_ACTIVATE cap if it's set (and probably delay it
in such a case).  In current libvirt upstream I see such code:


/* Migration capabilities which should always be enabled as long as they
* are supported by QEMU. If the capability is supposed to be enabled on both
* sides of migration, it won't be enabled unless both sides support it.
*/
static const qemuMigrationParamsAlwaysOnItem qemuMigrationParamsAlwaysOn[] = {
   {QEMU_MIGRATION_CAP_PAUSE_BEFORE_SWITCHOVER,
QEMU_MIGRATION_SOURCE},
   
   {QEMU_MIGRATION_CAP_LATE_BLOCK_ACTIVATE,

QEMU_MIGRATION_DESTINATION},
};


which means that libvirt always wants LATE_BLOCK_ACTIVATE to be set.

The code from process_incoming_migration_bh() you're referring to:


   /* If capability late_block_activate is set:
* Only fire up the block code now if we're going to restart the
* VM, else 'cont' will do it.
* This causes file locking to happen; so we don't want it to happen
* unless we really are starting the VM.
*/
   if (!migrate_late_block_activate() ||
(autostart && (!global_state_received() ||
   runstate_is_live(global_state_get_runstate() {
   /* Make sure all file formats throw away their mutable metadata.
* If we get an error here, just don't restart the VM yet. */
   bdrv_activate_all(&local_err);
   if (local_err) {
   error_report_err(local_err);
   local_err = NULL;
   autostart = false;
   }
   }


It states explicitly that we're either going to start VM right at this
point if (autostart == true), or we wait till "cont" command happens.
None of this is going to happen if we start another migration while
still being in PAUSED state.  So I think it seems reasonable to take
such case into account.  For instance, this patch does prevent the crash:


diff --git a/migration/migration.c b/migration/migration.c
index ae2be31557..3222f6745b 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -733,7 +733,8 @@ static void process_incoming_migration_bh(void *opaque)
 */
if (!migrate_late_block_activate() ||
 (autostart && (!global_state_received() ||
-runstate_is_live(global_state_get_runstate() {
+runstate_is_live(global_state_get_runstate( ||
+ (!autostart && global_state_get_runstate() == RUN_STATE_PAUSED)) {
/* Make sure all file formats throw away their mutable metadata.
 * If we get an error here, just don't restart the VM yet. */
bdrv_activate_all(&local_err);


What are your thoughts on it?



This bug is the same as https://gitlab.com/qemu-project/qemu/-/issues/2395



Hmmm... Don't we violate "late-block-activate" contract by this?

Me go and check:

# @late-block-activate: If enabled, the destination will not activate
# block devices (and thus take locks) immediately at the end of
# migration.  (since 3.0)

Yes, we'll violate it by this patch. So, for now the only exception is
when autostart is enabled, but libvirt correctly use
late-block-activate + !autostart.

Interesting, when block layer is assumed to be activated.. Aha, only in 
qmp_cont().


So, what to do with this all:

Either libvirt should not use late-block-activate for migration of
stopped vm. This way target would be automatically activated

Or if libvirt still need postponed activation (I assume, for correctly
switching shared disks, etc), Libvirt should do some additional QMP
call. It can't be "cont", if we don't want to run the VM. So,
probably, we need add

Re: [Bug Report][RFC PATCH 1/1] block: fix failing assert on paused VM migration

2024-10-09 Thread Vladimir Sementsov-Ogievskiy

On 09.10.24 23:53, Fabiano Rosas wrote:

Vladimir Sementsov-Ogievskiy  writes:


On 30.09.24 17:07, Andrey Drobyshev wrote:

On 9/30/24 12:25 PM, Vladimir Sementsov-Ogievskiy wrote:

[add migration maintainers]

On 24.09.24 15:56, Andrey Drobyshev wrote:

[...]


I doubt that this a correct way to go.

As far as I understand, "inactive" actually means that "storage is not
belong to qemu, but to someone else (another qemu process for example),
and may be changed transparently". In turn this means that Qemu should
do nothing with inactive disks. So the problem is that nobody called
bdrv_activate_all on target, and we shouldn't ignore that.

Hmm, I see in process_incoming_migration_bh() we do call
bdrv_activate_all(), but only in some scenarios. May be, the condition
should be less strict here.

Why we need any condition here at all? Don't we want to activate
block-layer on target after migration anyway?



Hmm I'm not sure about the unconditional activation, since we at least
have to honor LATE_BLOCK_ACTIVATE cap if it's set (and probably delay it
in such a case).  In current libvirt upstream I see such code:


/* Migration capabilities which should always be enabled as long as they
   * are supported by QEMU. If the capability is supposed to be enabled on both
   * sides of migration, it won't be enabled unless both sides support it.
   */
static const qemuMigrationParamsAlwaysOnItem qemuMigrationParamsAlwaysOn[] = {
  {QEMU_MIGRATION_CAP_PAUSE_BEFORE_SWITCHOVER,
   QEMU_MIGRATION_SOURCE},
  
  {QEMU_MIGRATION_CAP_LATE_BLOCK_ACTIVATE,

   QEMU_MIGRATION_DESTINATION},
};


which means that libvirt always wants LATE_BLOCK_ACTIVATE to be set.

The code from process_incoming_migration_bh() you're referring to:


  /* If capability late_block_activate is set:
   * Only fire up the block code now if we're going to restart the
   * VM, else 'cont' will do it.
   * This causes file locking to happen; so we don't want it to happen
   * unless we really are starting the VM.
   */
  if (!migrate_late_block_activate() ||
   (autostart && (!global_state_received() ||
  runstate_is_live(global_state_get_runstate() {
  /* Make sure all file formats throw away their mutable metadata.
   * If we get an error here, just don't restart the VM yet. */
  bdrv_activate_all(&local_err);
  if (local_err) {
  error_report_err(local_err);
  local_err = NULL;
  autostart = false;
  }
  }


It states explicitly that we're either going to start VM right at this
point if (autostart == true), or we wait till "cont" command happens.
None of this is going to happen if we start another migration while
still being in PAUSED state.  So I think it seems reasonable to take
such case into account.  For instance, this patch does prevent the crash:


diff --git a/migration/migration.c b/migration/migration.c
index ae2be31557..3222f6745b 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -733,7 +733,8 @@ static void process_incoming_migration_bh(void *opaque)
*/
   if (!migrate_late_block_activate() ||
(autostart && (!global_state_received() ||
-runstate_is_live(global_state_get_runstate() {
+runstate_is_live(global_state_get_runstate( ||
+ (!autostart && global_state_get_runstate() == RUN_STATE_PAUSED)) {
   /* Make sure all file formats throw away their mutable metadata.
* If we get an error here, just don't restart the VM yet. */
   bdrv_activate_all(&local_err);


What are your thoughts on it?



This bug is the same as https://gitlab.com/qemu-project/qemu/-/issues/2395



Hmmm... Don't we violate "late-block-activate" contract by this?

Me go and check:

# @late-block-activate: If enabled, the destination will not activate
# block devices (and thus take locks) immediately at the end of
# migration.  (since 3.0)

Yes, we'll violate it by this patch. So, for now the only exception is
when autostart is enabled, but libvirt correctly use
late-block-activate + !autostart.

Interesting, when block layer is assumed to be activated.. Aha, only in 
qmp_cont().


So, what to do with this all:

Either libvirt should not use late-block-activate for migration of
stopped vm. This way target would be automatically activated

Or if libvirt still need postponed activation (I assume, for correctly
switching shared disks, etc), Libvirt should do some additional QMP
call. It can't be "cont", if we don't want to run the VM. So,
probably, we need additional "block-activate" QMP command for this.


A third option might be to unconditionally activat

Re: [PATCH v9 4/7] qapi: add blockdev-replace command

2024-10-04 Thread Vladimir Sementsov-Ogievskiy

On 02.10.24 17:41, Vladimir Sementsov-Ogievskiy wrote:

On 26.06.24 14:53, Vladimir Sementsov-Ogievskiy wrote:

diff --git a/qapi/block-core.json b/qapi/block-core.json
index df5e07debd..0a6f08a6e0 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -6148,3 +6148,91 @@
  ##
  { 'struct': 'DummyBlockCoreForceArrays',
    'data': { 'unused-block-graph-info': ['BlockGraphInfo'] } }
+
+##
+# @BlockParentType:
+#
+# @qdev: block device, such as created by device_add, and denoted by
+# qdev-id
+#
+# @driver: block driver node, such as created by blockdev-add, and
+# denoted by node-name
+#
+# @export: block export, such created by block-export-add, and
+# denoted by export-id
+#
+# Since 9.1
+##
+{ 'enum': 'BlockParentType',
+  'data': ['qdev', 'driver', 'export'] }
+
+##
+# @BdrvChildRefQdev:
+#
+# @qdev-id: the device's ID or QOM path
+#
+# Since 9.1
+##
+{ 'struct': 'BdrvChildRefQdev',
+  'data': { 'qdev-id': 'str' } }
+
+##
+# @BdrvChildRefExport:
+#
+# @export-id: block export identifier
+#
+# Since 9.1
+##
+{ 'struct': 'BdrvChildRefExport',
+  'data': { 'export-id': 'str' } }
+
+##
+# @BdrvChildRefDriver:
+#
+# @node-name: the node name of the parent block node
+#
+# @child: name of the child to be replaced, like "file" or "backing"
+#
+# Since 9.1
+##
+{ 'struct': 'BdrvChildRefDriver',
+  'data': { 'node-name': 'str', 'child': 'str' } }
+
+##
+# @BlockdevReplace:
+#
+# @parent-type: type of the parent, which child is to be replaced
+#
+# @new-child: new child for replacement
+#
+# Since 9.1
+##
+{ 'union': 'BlockdevReplace',
+  'base': {
+  'parent-type': 'BlockParentType',
+  'new-child': 'str'
+  },
+  'discriminator': 'parent-type',
+  'data': {
+  'qdev': 'BdrvChildRefQdev',
+  'export': 'BdrvChildRefExport',
+  'driver': 'BdrvChildRefDriver'
+  } }
+
+##
+# @blockdev-replace:
+#
+# Replace a block-node associated with device (selected by
+# @qdev-id) or with block-export (selected by @export-id) or
+# any child of block-node (selected by @node-name and @child)
+# with @new-child block-node.
+#
+# Features:
+#
+# @unstable: This command is experimental.
+#
+# Since 9.1
+##
+{ 'command': 'blockdev-replace', 'boxed': true,
+  'features': [ 'unstable' ],
+  'data': 'BlockdevReplace' }



Looking back at this all, I now have another idea: instead of trying to unite 
different types of parents, maybe just publish concept of BdrvChild to QAPI? So 
that it will have unique id. Like for block-nodes, it could be auto-generated 
or specified by user.

Then we'll add parameters for commands:

device-add
    root-child-slot-id: ID

block-export-add
    block-child-slot-id: ID

and for block-drivers we already have BlockdevRef structure, it only lacks an 
id.

{ 'alternate': 'BlockdevRef',
   'data': { 'definition': 'BlockdevOptions',
     'reference': 'str' } }

hmm.. Could it be as simple as


{ 'alternate': 'BlockdevRef',
   'base': { '*child-slot-id': 'str' },
   'data': { 'definition': 'BlockdevOptions',
     'reference': 'str' } }

?


Oops that was obviously impossible idea :) Then, I think the only way is to introduce 
virtual "reference" BlockdevDriver, with only one parameter { 'reference': 
'str' }, this way user will be able to specify

file: {driver: reference, reference: NODE_NAME, child-slot-id: NEW_ID}



Unfortunately, no: "../qapi/block-core.json:4781: alternate has unknown key 
'base'"

Anyway, I believe, some solution should exist, probably by improving QAPI generator. Or, add 
"child-slot-id" to base of BlockdevOptions, and add virtual "reference" 
BlockdevDriver, to handle case with reference.




And finally, the new command becomes as simple as:

{ 'command': 'blockdev-replace',
   'data': {'child-slot': 'str', 'new-child': 'str' } }



--
Best regards,
Vladimir




Re: [PATCH v6 0/3] vhost-user-blk: live resize additional APIs

2024-10-03 Thread Vladimir Sementsov-Ogievskiy

Ping

On 20.09.24 12:49, Vladimir Sementsov-Ogievskiy wrote:

v6: tiny fix: add document comment for sync_config field to fix qdoc
generation. Also, add r-b and a-b marks.

Vladimir Sementsov-Ogievskiy (3):
   qdev-monitor: add option to report GenericError from find_device_state
   vhost-user-blk: split vhost_user_blk_sync_config()
   qapi: introduce device-sync-config

  hw/block/vhost-user-blk.c | 27 ++--
  hw/virtio/virtio-pci.c|  9 +++
  include/hw/qdev-core.h|  6 +
  qapi/qdev.json| 24 ++
  system/qdev-monitor.c | 53 ---
  5 files changed, 108 insertions(+), 11 deletions(-)



--
Best regards,
Vladimir




Re: [PATCH 0/2] fix backup-discard-source test for XFS

2024-10-02 Thread Vladimir Sementsov-Ogievskiy

Hi Kevin!

Now I revisit my old series, and looking here I see that I forget add you into 
CC.

Does it still make sense?

On 20.06.24 17:44, Vladimir Sementsov-Ogievskiy wrote:

Hi all!

As Kevin reported, the test doesn't work on XFS, as it rely on disk
usage.

Fix it, switching to dirty bitmap for guest write tracking.

Vladimir Sementsov-Ogievskiy (2):
   iotests/backup-discard-source: convert size variable to be int
   iotests/backup-discard-source: don't use actual-size

  .../qemu-iotests/tests/backup-discard-source  | 39 ---
  1 file changed, 25 insertions(+), 14 deletions(-)



--
Best regards,
Vladimir




[PATCH v3 1/2] qapi: add qom-path to BLOCK_IO_ERROR event

2024-10-02 Thread Vladimir Sementsov-Ogievskiy
We need something more reliable than "device" (which absent in modern
interfaces) and "node-name" (which may absent, and actually don't
specify the device, which is a source of error) to make a per-device
throttling for the event in the following commit.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/block-backend.c | 21 +
 qapi/block-core.json  |  7 +--
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index db6f9b92a3..9b70f44aec 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1028,22 +1028,34 @@ DeviceState *blk_get_attached_dev(BlockBackend *blk)
 return blk->dev;
 }
 
-/* Return the qdev ID, or if no ID is assigned the QOM path, of the block
- * device attached to the BlockBackend. */
-char *blk_get_attached_dev_id(BlockBackend *blk)
+static char *blk_get_attached_dev_id_or_path(BlockBackend *blk, bool want_id)
 {
 DeviceState *dev = blk->dev;
 IO_CODE();
 
 if (!dev) {
 return g_strdup("");
-} else if (dev->id) {
+} else if (want_id && dev->id) {
 return g_strdup(dev->id);
 }
 
 return object_get_canonical_path(OBJECT(dev)) ?: g_strdup("");
 }
 
+/*
+ * Return the qdev ID, or if no ID is assigned the QOM path, of the block
+ * device attached to the BlockBackend.
+ */
+char *blk_get_attached_dev_id(BlockBackend *blk)
+{
+return blk_get_attached_dev_id_or_path(blk, true);
+}
+
+static char *blk_get_attached_dev_path(BlockBackend *blk)
+{
+return blk_get_attached_dev_id_or_path(blk, false);
+}
+
 /*
  * Return the BlockBackend which has the device model @dev attached if it
  * exists, else null.
@@ -2140,6 +2152,7 @@ static void send_qmp_error_event(BlockBackend *blk,
 
 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
 qapi_event_send_block_io_error(blk_name(blk),
+   blk_get_attached_dev_path(blk),
bs ? bdrv_get_node_name(bs) : NULL, optype,
action, blk_iostatus_is_enabled(blk),
error == ENOSPC, strerror(error));
diff --git a/qapi/block-core.json b/qapi/block-core.json
index c3b0a2376b..b3743022be 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -5580,6 +5580,8 @@
 #
 # Emitted when a disk I/O error occurs
 #
+# @qom-path: path to the device object in the QOM tree (since 9.2)
+#
 # @device: device name.  This is always present for compatibility
 # reasons, but it can be empty ("") if the image does not have a
 # device name associated.
@@ -5610,7 +5612,8 @@
 # .. qmp-example::
 #
 # <- { "event": "BLOCK_IO_ERROR",
-#  "data": { "device": "ide0-hd1",
+#  "data": { "qom-path": "/machine/unattached/device[0]",
+#"device": "ide0-hd1",
 #"node-name": "#block212",
 #"operation": "write",
 #"action": "stop",
@@ -5618,7 +5621,7 @@
 #  "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
 ##
 { 'event': 'BLOCK_IO_ERROR',
-  'data': { 'device': 'str', '*node-name': 'str',
+  'data': { 'qom-path': 'str', 'device': 'str', '*node-name': 'str',
 'operation': 'IoOperationType',
 'action': 'BlockErrorAction', '*nospace': 'bool',
 'reason': 'str' } }
-- 
2.34.1




[PATCH v3 0/2] throttling for BLOCK_IO_ERROR

2024-10-02 Thread Vladimir Sementsov-Ogievskiy
v2: switch to qom-path as discriminator, for this, add patch 01.

Leonid Kaplan (1):
  block-backend: per-device throttling of BLOCK_IO_ERROR reports

Vladimir Sementsov-Ogievskiy (1):
  qapi: add qom-path to BLOCK_IO_ERROR event

 block/block-backend.c | 21 +
 monitor/monitor.c |  7 +--
 qapi/block-core.json  |  9 +++--
 3 files changed, 29 insertions(+), 8 deletions(-)

-- 
2.34.1




[PATCH v3 2/2] block-backend: per-device throttling of BLOCK_IO_ERROR reports

2024-10-02 Thread Vladimir Sementsov-Ogievskiy
From: Leonid Kaplan 

BLOCK_IO_ERROR events comes from guest, so we must throttle them.
We still want per-device throttling, so let's use device id as a key.

Signed-off-by: Leonid Kaplan 
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 monitor/monitor.c| 7 +--
 qapi/block-core.json | 2 ++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/monitor/monitor.c b/monitor/monitor.c
index db52a9c7ef..56786c0ccc 100644
--- a/monitor/monitor.c
+++ b/monitor/monitor.c
@@ -308,6 +308,7 @@ int error_printf_unless_qmp(const char *fmt, ...)
 static MonitorQAPIEventConf monitor_qapi_event_conf[QAPI_EVENT__MAX] = {
 /* Limit guest-triggerable events to 1 per second */
 [QAPI_EVENT_RTC_CHANGE]= { 1000 * SCALE_MS },
+[QAPI_EVENT_BLOCK_IO_ERROR]= { 1000 * SCALE_MS },
 [QAPI_EVENT_WATCHDOG]  = { 1000 * SCALE_MS },
 [QAPI_EVENT_BALLOON_CHANGE]= { 1000 * SCALE_MS },
 [QAPI_EVENT_QUORUM_REPORT_BAD] = { 1000 * SCALE_MS },
@@ -493,7 +494,8 @@ static unsigned int qapi_event_throttle_hash(const void 
*key)
 hash += g_str_hash(qdict_get_str(evstate->data, "node-name"));
 }
 
-if (evstate->event == QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE) {
+if (evstate->event == QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE ||
+evstate->event == QAPI_EVENT_BLOCK_IO_ERROR) {
 hash += g_str_hash(qdict_get_str(evstate->data, "qom-path"));
 }
 
@@ -519,7 +521,8 @@ static gboolean qapi_event_throttle_equal(const void *a, 
const void *b)
qdict_get_str(evb->data, "node-name"));
 }
 
-if (eva->event == QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE) {
+if (eva->event == QAPI_EVENT_MEMORY_DEVICE_SIZE_CHANGE ||
+eva->event == QAPI_EVENT_BLOCK_IO_ERROR) {
 return !strcmp(qdict_get_str(eva->data, "qom-path"),
qdict_get_str(evb->data, "qom-path"));
 }
diff --git a/qapi/block-core.json b/qapi/block-core.json
index b3743022be..835f90b118 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -5607,6 +5607,8 @@
 # .. note:: If action is "stop", a STOP event will eventually follow
 #the BLOCK_IO_ERROR event.
 #
+# .. note:: This event is rate-limited.
+#
 # Since: 0.13
 #
 # .. qmp-example::
-- 
2.34.1




Re: [PATCH v9 4/7] qapi: add blockdev-replace command

2024-10-02 Thread Vladimir Sementsov-Ogievskiy

On 26.06.24 14:53, Vladimir Sementsov-Ogievskiy wrote:

diff --git a/qapi/block-core.json b/qapi/block-core.json
index df5e07debd..0a6f08a6e0 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -6148,3 +6148,91 @@
  ##
  { 'struct': 'DummyBlockCoreForceArrays',
'data': { 'unused-block-graph-info': ['BlockGraphInfo'] } }
+
+##
+# @BlockParentType:
+#
+# @qdev: block device, such as created by device_add, and denoted by
+# qdev-id
+#
+# @driver: block driver node, such as created by blockdev-add, and
+# denoted by node-name
+#
+# @export: block export, such created by block-export-add, and
+# denoted by export-id
+#
+# Since 9.1
+##
+{ 'enum': 'BlockParentType',
+  'data': ['qdev', 'driver', 'export'] }
+
+##
+# @BdrvChildRefQdev:
+#
+# @qdev-id: the device's ID or QOM path
+#
+# Since 9.1
+##
+{ 'struct': 'BdrvChildRefQdev',
+  'data': { 'qdev-id': 'str' } }
+
+##
+# @BdrvChildRefExport:
+#
+# @export-id: block export identifier
+#
+# Since 9.1
+##
+{ 'struct': 'BdrvChildRefExport',
+  'data': { 'export-id': 'str' } }
+
+##
+# @BdrvChildRefDriver:
+#
+# @node-name: the node name of the parent block node
+#
+# @child: name of the child to be replaced, like "file" or "backing"
+#
+# Since 9.1
+##
+{ 'struct': 'BdrvChildRefDriver',
+  'data': { 'node-name': 'str', 'child': 'str' } }
+
+##
+# @BlockdevReplace:
+#
+# @parent-type: type of the parent, which child is to be replaced
+#
+# @new-child: new child for replacement
+#
+# Since 9.1
+##
+{ 'union': 'BlockdevReplace',
+  'base': {
+  'parent-type': 'BlockParentType',
+  'new-child': 'str'
+  },
+  'discriminator': 'parent-type',
+  'data': {
+  'qdev': 'BdrvChildRefQdev',
+  'export': 'BdrvChildRefExport',
+  'driver': 'BdrvChildRefDriver'
+  } }
+
+##
+# @blockdev-replace:
+#
+# Replace a block-node associated with device (selected by
+# @qdev-id) or with block-export (selected by @export-id) or
+# any child of block-node (selected by @node-name and @child)
+# with @new-child block-node.
+#
+# Features:
+#
+# @unstable: This command is experimental.
+#
+# Since 9.1
+##
+{ 'command': 'blockdev-replace', 'boxed': true,
+  'features': [ 'unstable' ],
+  'data': 'BlockdevReplace' }



Looking back at this all, I now have another idea: instead of trying to unite 
different types of parents, maybe just publish concept of BdrvChild to QAPI? So 
that it will have unique id. Like for block-nodes, it could be auto-generated 
or specified by user.

Then we'll add parameters for commands:

device-add
   root-child-slot-id: ID

block-export-add
   block-child-slot-id: ID

and for block-drivers we already have BlockdevRef structure, it only lacks an 
id.

{ 'alternate': 'BlockdevRef',
  'data': { 'definition': 'BlockdevOptions',
'reference': 'str' } }

hmm.. Could it be as simple as


{ 'alternate': 'BlockdevRef',
  'base': { '*child-slot-id': 'str' },
  'data': { 'definition': 'BlockdevOptions',
'reference': 'str' } }

?

Unfortunately, no: "../qapi/block-core.json:4781: alternate has unknown key 
'base'"

Anyway, I believe, some solution should exist, probably by improving QAPI generator. Or, add 
"child-slot-id" to base of BlockdevOptions, and add virtual "reference" 
BlockdevDriver, to handle case with reference.




And finally, the new command becomes as simple as:

{ 'command': 'blockdev-replace',
  'data': {'child-slot': 'str', 'new-child': 'str' } }

--
Best regards,
Vladimir




[PATCH v3 6/7] qapi/block-core: deprecate block-job-change

2024-10-02 Thread Vladimir Sementsov-Ogievskiy
That's a first step to move on newer job-* APIs.

The difference between block-job-change and job-change is in
find_block_job_locked() vs find_job_locked() functions. What's
different?

1. find_block_job_locked() finds only block jobs, whereas
   find_job_locked() finds any kind of job.  job-change is a
   compatible extension of block-job-change.

2. find_block_job_locked() reports DeviceNotActive on failure, when
   find_job_locked() reports GenericError.  Since the kind of error
   reported isn't documented for either command, and clients
   shouldn't rely on undocumented error details, job-change is a
   compatible replacement for block-job-change.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 docs/about/deprecated.rst | 5 +
 qapi/block-core.json  | 6 ++
 2 files changed, 11 insertions(+)

diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index 1e21fbbf77..d2461924ff 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -147,6 +147,11 @@ options are removed in favor of using explicit 
``blockdev-create`` and
 ``blockdev-add`` calls. See :doc:`/interop/live-block-operations` for
 details.
 
+``block-job-change`` (since 9.2)
+''''''''''''''''''''''''''''''''
+
+Use ``job-change`` instead.
+
 Incorrectly typed ``device_add`` arguments (since 6.2)
 ''''''''''''''''''''''''''''''''''''''''''''''''''''''
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index e314734b53..ed87a9dc1e 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3100,9 +3100,15 @@
 #
 # Change the block job's options.
 #
+# Features:
+#
+# @deprecated: This command is deprecated.  Use @job-change
+# instead.
+#
 # Since: 8.2
 ##
 { 'command': 'block-job-change',
+  'features': ['deprecated'],
   'data': 'JobChangeOptions', 'boxed': true }
 
 ##
-- 
2.34.1




[PATCH v3 7/7] iotests/mirror-change-copy-mode: switch to job-change command

2024-10-02 Thread Vladimir Sementsov-Ogievskiy
block-job-change is deprecated, let's move test to job-change.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/tests/mirror-change-copy-mode | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/tests/mirror-change-copy-mode 
b/tests/qemu-iotests/tests/mirror-change-copy-mode
index 51788b85c7..e972604ebf 100755
--- a/tests/qemu-iotests/tests/mirror-change-copy-mode
+++ b/tests/qemu-iotests/tests/mirror-change-copy-mode
@@ -150,7 +150,7 @@ class TestMirrorChangeCopyMode(iotests.QMPTestCase):
 len_before_change = result[0]['len']
 
 # Change the copy mode while requests are happening.
-self.vm.cmd('block-job-change',
+self.vm.cmd('job-change',
 id='mirror',
 type='mirror',
 copy_mode='write-blocking')
-- 
2.34.1




[PATCH v3 2/7] blockjob: block_job_change_locked(): check job type

2024-10-02 Thread Vladimir Sementsov-Ogievskiy
User may specify wrong type for the job id. Let's check it.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 blockjob.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/blockjob.c b/blockjob.c
index 8cfbb15543..788cb1e07d 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -319,6 +319,12 @@ void block_job_change_locked(BlockJob *job, 
JobChangeOptions *opts,
 
 GLOBAL_STATE_CODE();
 
+if (job_type(&job->job) != opts->type) {
+error_setg(errp, "Job '%s' is '%s' job, not '%s'", job->job.id,
+   job_type_str(&job->job), JobType_str(opts->type));
+return;
+}
+
 if (job_apply_verb_locked(&job->job, JOB_VERB_CHANGE, errp)) {
 return;
 }
-- 
2.34.1




[PATCH v3 4/7] blockjob: move change action implementation to job from block-job

2024-10-02 Thread Vladimir Sementsov-Ogievskiy
Like for other block-job-* APIs we want have the actual functionality
in job layer and make block-job-change to be a deprecated duplication
of job-change in the following commit.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/mirror.c   |  7 +++
 blockdev.c   |  2 +-
 blockjob.c   | 26 --
 include/block/blockjob.h | 11 ---
 include/block/blockjob_int.h |  7 ---
 include/qemu/job.h   | 12 
 job-qmp.c|  1 +
 job.c| 23 +++
 8 files changed, 40 insertions(+), 49 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 60e8d83e4f..63e35114f3 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1258,10 +1258,9 @@ static bool commit_active_cancel(Job *job, bool force)
 return force || !job_is_ready(job);
 }
 
-static void mirror_change(BlockJob *job, JobChangeOptions *opts,
-  Error **errp)
+static void mirror_change(Job *job, JobChangeOptions *opts, Error **errp)
 {
-MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
 JobChangeOptionsMirror *change_opts = &opts->u.mirror;
 MirrorCopyMode current;
 
@@ -1316,9 +1315,9 @@ static const BlockJobDriver mirror_job_driver = {
 .pause  = mirror_pause,
 .complete   = mirror_complete,
 .cancel = mirror_cancel,
+.change = mirror_change,
 },
 .drained_poll   = mirror_drained_poll,
-.change = mirror_change,
 .query  = mirror_query,
 };
 
diff --git a/blockdev.c b/blockdev.c
index 626f53102d..b1c3de3862 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3262,7 +3262,7 @@ void qmp_block_job_change(JobChangeOptions *opts, Error 
**errp)
 return;
 }
 
-block_job_change_locked(job, opts, errp);
+job_change_locked(&job->job, opts, errp);
 }
 
 void qmp_change_backing_file(const char *device,
diff --git a/blockjob.c b/blockjob.c
index 788cb1e07d..2769722b37 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -312,32 +312,6 @@ static bool block_job_set_speed(BlockJob *job, int64_t 
speed, Error **errp)
 return block_job_set_speed_locked(job, speed, errp);
 }
 
-void block_job_change_locked(BlockJob *job, JobChangeOptions *opts,
- Error **errp)
-{
-const BlockJobDriver *drv = block_job_driver(job);
-
-GLOBAL_STATE_CODE();
-
-if (job_type(&job->job) != opts->type) {
-error_setg(errp, "Job '%s' is '%s' job, not '%s'", job->job.id,
-   job_type_str(&job->job), JobType_str(opts->type));
-return;
-}
-
-if (job_apply_verb_locked(&job->job, JOB_VERB_CHANGE, errp)) {
-return;
-}
-
-if (drv->change) {
-job_unlock();
-drv->change(job, opts, errp);
-job_lock();
-} else {
-error_setg(errp, "Job type does not support change");
-}
-}
-
 void block_job_ratelimit_processed_bytes(BlockJob *job, uint64_t n)
 {
 IO_CODE();
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 5dd1b08909..72e849a140 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -173,17 +173,6 @@ bool block_job_has_bdrv(BlockJob *job, BlockDriverState 
*bs);
  */
 bool block_job_set_speed_locked(BlockJob *job, int64_t speed, Error **errp);
 
-/**
- * block_job_change_locked:
- * @job: The job to change.
- * @opts: The new options.
- * @errp: Error object.
- *
- * Change the job according to opts.
- */
-void block_job_change_locked(BlockJob *job, JobChangeOptions *opts,
- Error **errp);
-
 /**
  * block_job_query_locked:
  * @job: The job to get information about.
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index d9c3b911d0..58bc7a5cea 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -68,13 +68,6 @@ struct BlockJobDriver {
 
 void (*set_speed)(BlockJob *job, int64_t speed);
 
-/*
- * Change the @job's options according to @opts.
- *
- * Note that this can already be called before the job coroutine is 
running.
- */
-void (*change)(BlockJob *job, JobChangeOptions *opts, Error **errp);
-
 /*
  * Query information specific to this kind of block job.
  */
diff --git a/include/qemu/job.h b/include/qemu/job.h
index 2b873f2576..6fa525dac3 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -27,6 +27,7 @@
 #define JOB_H
 
 #include "qapi/qapi-types-job.h"
+#include "qapi/qapi-types-block-core.h"
 #include "qemu/queue.h"
 #include "qemu/progress_meter.h"
 #include "qemu/coroutine.h"
@@ -307,6 +308,12 @@ struct JobDrive

[PATCH v3 5/7] qapi: add job-change

2024-10-02 Thread Vladimir Sementsov-Ogievskiy
Add a new-style command job-change, doing same thing as
block-job-change. The aim is finally deprecate block-job-* APIs and
move to job-* APIs.

We add a new command to qapi/block-core.json, not to
qapi/job.json to avoid resolving json file including loops for now.
This all would be a lot simple to refactor when we finally drop
deprecated block-job-* APIs.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 job-qmp.c| 14 ++
 qapi/block-core.json | 10 ++
 2 files changed, 24 insertions(+)

diff --git a/job-qmp.c b/job-qmp.c
index c764bd3801..248e68f554 100644
--- a/job-qmp.c
+++ b/job-qmp.c
@@ -139,6 +139,20 @@ void qmp_job_dismiss(const char *id, Error **errp)
 job_dismiss_locked(&job, errp);
 }
 
+void qmp_job_change(JobChangeOptions *opts, Error **errp)
+{
+Job *job;
+
+JOB_LOCK_GUARD();
+job = find_job_locked(opts->id, errp);
+
+if (!job) {
+return;
+}
+
+job_change_locked(job, opts, errp);
+}
+
 /* Called with job_mutex held. */
 static JobInfo *job_query_single_locked(Job *job, Error **errp)
 {
diff --git a/qapi/block-core.json b/qapi/block-core.json
index f370593037..e314734b53 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3105,6 +3105,16 @@
 { 'command': 'block-job-change',
   'data': 'JobChangeOptions', 'boxed': true }
 
+##
+# @job-change:
+#
+# Change the block job's options.
+#
+# Since: 9.2
+##
+{ 'command': 'job-change',
+  'data': 'JobChangeOptions', 'boxed': true }
+
 ##
 # @BlockdevDiscardOptions:
 #
-- 
2.34.1




[PATCH v3 3/7] qapi: block-job-change: make copy-mode parameter optional

2024-10-02 Thread Vladimir Sementsov-Ogievskiy
We are going to add more parameters to change. We want to make possible
to change only one or any subset of available options. So all the
options should be optional.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Acked-by: Markus Armbruster 
---
 block/mirror.c   | 4 
 qapi/block-core.json | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/block/mirror.c b/block/mirror.c
index 2816bb1042..60e8d83e4f 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1272,6 +1272,10 @@ static void mirror_change(BlockJob *job, 
JobChangeOptions *opts,
 
 GLOBAL_STATE_CODE();
 
+if (!change_opts->has_copy_mode) {
+return;
+}
+
 if (qatomic_read(&s->copy_mode) == change_opts->copy_mode) {
 return;
 }
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 0156762024..f370593037 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3072,11 +3072,12 @@
 #
 # @copy-mode: Switch to this copy mode.  Currently, only the switch
 # from 'background' to 'write-blocking' is implemented.
+# If absent, copy mode remains the same.  (optional since 9.2)
 #
 # Since: 8.2
 ##
 { 'struct': 'JobChangeOptionsMirror',
-  'data': { 'copy-mode' : 'MirrorCopyMode' } }
+  'data': { '*copy-mode' : 'MirrorCopyMode' } }
 
 ##
 # @JobChangeOptions:
-- 
2.34.1




[PATCH v3 1/7] qapi: rename BlockJobChangeOptions to JobChangeOptions

2024-10-02 Thread Vladimir Sementsov-Ogievskiy
We are going to move change action from block-job to job
implementation, and then move to job-* extenral APIs, deprecating
block-job-* APIs. This commit simplifies further transition.

The commit is made by command

git grep -l BlockJobChangeOptions | \
xargs sed -i 's/BlockJobChangeOptions/JobChangeOptions/g'

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Acked-by: Markus Armbruster 
---
 block/mirror.c   |  4 ++--
 blockdev.c   |  2 +-
 blockjob.c   |  2 +-
 include/block/blockjob.h |  2 +-
 include/block/blockjob_int.h |  2 +-
 qapi/block-core.json | 12 ++--
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 61f0a717b7..2816bb1042 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1258,11 +1258,11 @@ static bool commit_active_cancel(Job *job, bool force)
 return force || !job_is_ready(job);
 }
 
-static void mirror_change(BlockJob *job, BlockJobChangeOptions *opts,
+static void mirror_change(BlockJob *job, JobChangeOptions *opts,
   Error **errp)
 {
 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-BlockJobChangeOptionsMirror *change_opts = &opts->u.mirror;
+JobChangeOptionsMirror *change_opts = &opts->u.mirror;
 MirrorCopyMode current;
 
 /*
diff --git a/blockdev.c b/blockdev.c
index 6740663fda..626f53102d 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3251,7 +3251,7 @@ void qmp_block_job_dismiss(const char *id, Error **errp)
 job_dismiss_locked(&job, errp);
 }
 
-void qmp_block_job_change(BlockJobChangeOptions *opts, Error **errp)
+void qmp_block_job_change(JobChangeOptions *opts, Error **errp)
 {
 BlockJob *job;
 
diff --git a/blockjob.c b/blockjob.c
index d5f29e14af..8cfbb15543 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -312,7 +312,7 @@ static bool block_job_set_speed(BlockJob *job, int64_t 
speed, Error **errp)
 return block_job_set_speed_locked(job, speed, errp);
 }
 
-void block_job_change_locked(BlockJob *job, BlockJobChangeOptions *opts,
+void block_job_change_locked(BlockJob *job, JobChangeOptions *opts,
  Error **errp)
 {
 const BlockJobDriver *drv = block_job_driver(job);
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 7061ab7201..5dd1b08909 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -181,7 +181,7 @@ bool block_job_set_speed_locked(BlockJob *job, int64_t 
speed, Error **errp);
  *
  * Change the job according to opts.
  */
-void block_job_change_locked(BlockJob *job, BlockJobChangeOptions *opts,
+void block_job_change_locked(BlockJob *job, JobChangeOptions *opts,
  Error **errp);
 
 /**
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index 4c3d2e25a2..d9c3b911d0 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -73,7 +73,7 @@ struct BlockJobDriver {
  *
  * Note that this can already be called before the job coroutine is 
running.
  */
-void (*change)(BlockJob *job, BlockJobChangeOptions *opts, Error **errp);
+void (*change)(BlockJob *job, JobChangeOptions *opts, Error **errp);
 
 /*
  * Query information specific to this kind of block job.
diff --git a/qapi/block-core.json b/qapi/block-core.json
index c3b0a2376b..0156762024 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3068,18 +3068,18 @@
   'allow-preconfig': true }
 
 ##
-# @BlockJobChangeOptionsMirror:
+# @JobChangeOptionsMirror:
 #
 # @copy-mode: Switch to this copy mode.  Currently, only the switch
 # from 'background' to 'write-blocking' is implemented.
 #
 # Since: 8.2
 ##
-{ 'struct': 'BlockJobChangeOptionsMirror',
+{ 'struct': 'JobChangeOptionsMirror',
   'data': { 'copy-mode' : 'MirrorCopyMode' } }
 
 ##
-# @BlockJobChangeOptions:
+# @JobChangeOptions:
 #
 # Block job options that can be changed after job creation.
 #
@@ -3089,10 +3089,10 @@
 #
 # Since: 8.2
 ##
-{ 'union': 'BlockJobChangeOptions',
+{ 'union': 'JobChangeOptions',
   'base': { 'id': 'str', 'type': 'JobType' },
   'discriminator': 'type',
-  'data': { 'mirror': 'BlockJobChangeOptionsMirror' } }
+  'data': { 'mirror': 'JobChangeOptionsMirror' } }
 
 ##
 # @block-job-change:
@@ -3102,7 +3102,7 @@
 # Since: 8.2
 ##
 { 'command': 'block-job-change',
-  'data': 'BlockJobChangeOptions', 'boxed': true }
+  'data': 'JobChangeOptions', 'boxed': true }
 
 ##
 # @BlockdevDiscardOptions:
-- 
2.34.1




[PATCH v3 0/7] introduce job-change qmp command

2024-10-02 Thread Vladimir Sementsov-Ogievskiy
Hi all!

Here is new job-change command - a replacement for (becoming deprecated)
block-job-change, as a first step of my "[RFC 00/15] block job API"

v3:
01: add a-b by Markus
03: add a-b by Markus, s/9.1/9.2/ in QAPI
05: update commit message, s/9.1/9.2/ in QAPI
06: update commit message (and subject!), s/9.1/9.2/ in deprecated.rst

Vladimir Sementsov-Ogievskiy (7):
  qapi: rename BlockJobChangeOptions to JobChangeOptions
  blockjob: block_job_change_locked(): check job type
  qapi: block-job-change: make copy-mode parameter optional
  blockjob: move change action implementation to job from block-job
  qapi: add job-change
  qapi/block-core: deprecate block-job-change
  iotests/mirror-change-copy-mode: switch to job-change command

 block/mirror.c| 13 +---
 blockdev.c|  4 +--
 blockjob.c| 20 
 docs/about/deprecated.rst |  5 +++
 include/block/blockjob.h  | 11 ---
 include/block/blockjob_int.h  |  7 -
 include/qemu/job.h| 12 +++
 job-qmp.c | 15 +
 job.c | 23 ++
 qapi/block-core.json  | 31 ++-
 .../tests/mirror-change-copy-mode |  2 +-
 11 files changed, 90 insertions(+), 53 deletions(-)

-- 
2.34.1




Re: [PATCH 2/2] nbd/server: Allow users to adjust handshake limit in QMP

2024-10-02 Thread Vladimir Sementsov-Ogievskiy

On 02.10.24 16:49, Markus Armbruster wrote:

Eric Blake  writes:


Although defaulting the handshake limit to 10 seconds was a nice QoI
change to weed out intentionally slow clients, it can interfere with
integration testing done with manual NBD_OPT commands over 'nbdsh
--opt-mode'.  Expose a QMP knob 'handshake-max-secs' to allow the user
to alter the timeout away from the default.

The parameter name here intentionally matches the spelling of the
constant added in commit fb1c2aaa98, and not the command-line spelling
added in the previous patch for qemu-nbd; that's because in QMP,
longer names serve as good self-documentation, and unlike the command
line, machines don't have problems generating longer spellings.

Signed-off-by: Eric Blake 
---
  qapi/block-export.json | 10 ++
  include/block/nbd.h|  6 +++---
  block/monitor/block-hmp-cmds.c |  4 ++--
  blockdev-nbd.c | 26 ++
  4 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/qapi/block-export.json b/qapi/block-export.json
index ce33fe378df..c110dd375ad 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -17,6 +17,10 @@
  #
  # @addr: Address on which to listen.
  #
+# @handshake-max-secs: Time limit, in seconds, at which a client that
+# has not completed the negotiation handshake will be disconnected,
+# or 0 for no limit (since 9.2; default: 10).
+#
  # @tls-creds: ID of the TLS credentials object (since 2.6).
  #
  # @tls-authz: ID of the QAuthZ authorization object used to validate
@@ -34,6 +38,7 @@
  ##
  { 'struct': 'NbdServerOptions',
'data': { 'addr': 'SocketAddress',
+'*handshake-max-secs': 'uint32',
  '*tls-creds': 'str',
  '*tls-authz': 'str',
  '*max-connections': 'uint32' } }
@@ -52,6 +57,10 @@
  #
  # @addr: Address on which to listen.
  #
+# @handshake-max-secs: Time limit, in seconds, at which a client that
+# has not completed the negotiation handshake will be disconnected,
+# or 0 for no limit (since 9.2; default: 10).
+#
  # @tls-creds: ID of the TLS credentials object (since 2.6).
  #
  # @tls-authz: ID of the QAuthZ authorization object used to validate
@@ -72,6 +81,7 @@
  ##
  { 'command': 'nbd-server-start',
'data': { 'addr': 'SocketAddressLegacy',
+'*handshake-max-secs': 'uint32',
  '*tls-creds': 'str',
  '*tls-authz': 'str',
  '*max-connections': 'uint32' },


Are we confident we'll never need less than a full second?



Hmm, recent "[PATCH v2] chardev: introduce 'reconnect-ms' and deprecate 
'reconnect'" shows that at least sometimes second is not enough precision.

Maybe, using milliseconds consistently for all relatively short time intervals 
in QAPI would be a good rule?

--
Best regards,
Vladimir




Re: [PATCH 2/2] nbd/server: Allow users to adjust handshake limit in QMP

2024-10-02 Thread Vladimir Sementsov-Ogievskiy

On 09.08.24 19:14, Eric Blake wrote:

Although defaulting the handshake limit to 10 seconds was a nice QoI
change to weed out intentionally slow clients, it can interfere with
integration testing done with manual NBD_OPT commands over 'nbdsh
--opt-mode'.  Expose a QMP knob 'handshake-max-secs' to allow the user
to alter the timeout away from the default.

The parameter name here intentionally matches the spelling of the
constant added in commit fb1c2aaa98, and not the command-line spelling
added in the previous patch for qemu-nbd; that's because in QMP,
longer names serve as good self-documentation, and unlike the command
line, machines don't have problems generating longer spellings.

Signed-off-by: Eric Blake 



Reviewed-by: Vladimir Sementsov-Ogievskiy 


---
  qapi/block-export.json | 10 ++
  include/block/nbd.h|  6 +++---
  block/monitor/block-hmp-cmds.c |  4 ++--
  blockdev-nbd.c | 26 ++
  4 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/qapi/block-export.json b/qapi/block-export.json
index ce33fe378df..c110dd375ad 100644
--- a/qapi/block-export.json
+++ b/qapi/block-export.json
@@ -17,6 +17,10 @@
  #
  # @addr: Address on which to listen.
  #
+# @handshake-max-secs: Time limit, in seconds, at which a client that
+# has not completed the negotiation handshake will be disconnected,
+# or 0 for no limit (since 9.2; default: 10).
+#
  # @tls-creds: ID of the TLS credentials object (since 2.6).
  #
  # @tls-authz: ID of the QAuthZ authorization object used to validate
@@ -34,6 +38,7 @@
  ##
  { 'struct': 'NbdServerOptions',
'data': { 'addr': 'SocketAddress',
+'*handshake-max-secs': 'uint32',
  '*tls-creds': 'str',
  '*tls-authz': 'str',
  '*max-connections': 'uint32' } }
@@ -52,6 +57,10 @@
  #
  # @addr: Address on which to listen.
  #
+# @handshake-max-secs: Time limit, in seconds, at which a client that
+# has not completed the negotiation handshake will be disconnected,
+# or 0 for no limit (since 9.2; default: 10).
+#
  # @tls-creds: ID of the TLS credentials object (since 2.6).
  #
  # @tls-authz: ID of the QAuthZ authorization object used to validate
@@ -72,6 +81,7 @@
  ##
  { 'command': 'nbd-server-start',
'data': { 'addr': 'SocketAddressLegacy',
+'*handshake-max-secs': 'uint32',
  '*tls-creds': 'str',
  '*tls-authz': 'str',
  '*max-connections': 'uint32' },


Hmm, can we make common base for these two structures, to avoid adding things 
always in two places? At some point would be good to somehow deprecate old 
syntax and finally remove it. SocketAddressLegacy is marked as deprecated in 
comment in qapi/sockets.json, but no word in deprecated.rst, and I'm unsure how 
is it possible to deprecate this.. May be, the only way is introducing new 
command, and deprecate nbd-server-start altogether. Aha, that should happen 
when add a possibility to start multiple nbd servers.

--
Best regards,
Vladimir




Re: [PATCH 1/2] qemu-nbd: Allow users to adjust handshake limit

2024-10-02 Thread Vladimir Sementsov-Ogievskiy

On 09.08.24 19:14, Eric Blake wrote:

Although defaulting the handshake limit to 10 seconds was a nice QoI
change to weed out intentionally slow clients, it can interfere with
integration testing done with manual NBD_OPT commands over 'nbdsh
--opt-mode'.  Expose a command line option to allow the user to alter
the timeout away from the default.  This option is unlikely to be used
in enough scenarios to warrant a short option letter.

The option --handshake-limit intentionally differs from the name of
the constant added in commit fb1c2aaa98 (limit instead of max_secs)
and the QMP name to be added in the next commit; this is because
typing a longer command-line name is undesirable and there is
sufficient --help text to document the units.

Signed-off-by: Eric Blake




Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir




Re: [PATCH v3 5/5] block: add test non-active commit with zeroed data

2024-10-02 Thread Vladimir Sementsov-Ogievskiy

On 01.09.24 17:24, Vincent Vanlaer wrote:

Signed-off-by: Vincent Vanlaer 
---
  tests/qemu-iotests/315 | 95 ++
  tests/qemu-iotests/315.out | 54 ++


Please place new tests in tests/qemu-iotests/tests, with human readable name, 
something like commit-zeroes or what you want.


  2 files changed, 149 insertions(+)
  create mode 100755 tests/qemu-iotests/315
  create mode 100644 tests/qemu-iotests/315.out

diff --git a/tests/qemu-iotests/315 b/tests/qemu-iotests/315
new file mode 100755
index 00..84865f8001
--- /dev/null
+++ b/tests/qemu-iotests/315
@@ -0,0 +1,95 @@
+#!/usr/bin/env bash
+# group: rw quick
+#
+# Test for commit of discarded blocks
+#
+# This tests committing a live snapshot where some of the blocks that
+# are present in the base image are discarded in the intermediate image.
+# This intends to check that these blocks are also discarded in the base
+# image after the commit.
+#
+# Copyright (C) 2024 Vincent Vanlaer.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# creator
+owner=libvirt-e6954...@volkihar.be
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+status=1   # failure is the default!
+
+_cleanup()
+{
+_cleanup_qemu
+_rm_test_img "${TEST_IMG}.base"
+_rm_test_img "${TEST_IMG}.mid"
+_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks


Example of bash test in tests is tests/qemu-iotests/tests/qemu-img-bitmaps, so you'll need "cd 
.." here, before ". ./common.rc"

I know, this all looks not optimal, but still, human-readable names are much 
better than numbers.

With that:

Tested-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Vladimir Sementsov-Ogievskiy 


+. ./common.rc
+. ./common.filter
+. ./common.qemu
+



--
Best regards,
Vladimir




Re: [PATCH v3 4/5] block: allow commit to unmap zero blocks

2024-10-02 Thread Vladimir Sementsov-Ogievskiy

On 01.09.24 17:24, Vincent Vanlaer wrote:

Non-active block commits do not discard blocks only containing zeros,
causing images to lose sparseness after the commit. This commit fixes
that by writing zero blocks using blk_co_pwrite_zeroes rather than
writing them out as any other arbitrary data.

Signed-off-by: Vincent Vanlaer


Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir




Re: [PULL 3/5] block/reqlist: allow adding overlapping requests

2024-10-02 Thread Vladimir Sementsov-Ogievskiy

On 01.10.24 19:28, Michael Tokarev wrote:

30.09.2024 11:43, Vladimir Sementsov-Ogievskiy wrote:

From: Fiona Ebner 

Allow overlapping request by removing the assert that made it
impossible. There are only two callers:

1. block_copy_task_create()

It already asserts the very same condition before calling
reqlist_init_req().

2. cbw_snapshot_read_lock()

There is no need to have read requests be non-overlapping in
copy-before-write when used for snapshot-access. In fact, there was no
protection against two callers of cbw_snapshot_read_lock() calling
reqlist_init_req() with overlapping ranges and this could lead to an
assertion failure [1].

In particular, with the reproducer script below [0], two
cbw_co_snapshot_block_status() callers could race, with the second
calling reqlist_init_req() before the first one finishes and removes
its conflicting request.


Hm.  This one applies to 7.2 too (current oldest stable series), with
the description above matching what the code is doing.

I picked it up for up to 7.2.  Please let me know if this shouldn't be
done :)



I don't see any problems) Still, that's not a guarantee that we don't have 
them. At least, we definitely lack a test for this case.


--
Best regards,
Vladimir




Re: [PATCH v3 21/22] qom/object: fix -Werror=maybe-uninitialized

2024-10-02 Thread Vladimir Sementsov-Ogievskiy

On 01.10.24 18:22, Marc-André Lureau wrote:

Hi Vladimir

On Tue, Oct 1, 2024 at 6:06 PM Vladimir Sementsov-Ogievskiy mailto:vsement...@yandex-team.ru>> wrote:

On 30.09.24 11:14, marcandre.lur...@redhat.com 
<mailto:marcandre.lur...@redhat.com> wrote:
 > From: Marc-André Lureau mailto:marcandre.lur...@redhat.com>>
 >
 > object_resolve_path_type() didn't always set *ambiguousp.
 >
 > Signed-off-by: Marc-André Lureau mailto:marcandre.lur...@redhat.com>>
 > ---
 >   qom/object.c | 5 -
 >   1 file changed, 4 insertions(+), 1 deletion(-)
 >
 > diff --git a/qom/object.c b/qom/object.c
 > index 28c5b66eab..bdc8a2c666 100644
 > --- a/qom/object.c
 > +++ b/qom/object.c
 > @@ -2201,6 +2201,9 @@ Object *object_resolve_path_type(const char *path, 
const char *typename,
 >           }
 >       } else {
 >           obj = object_resolve_abs_path(object_get_root(), parts + 1, 
typename);
 > +        if (ambiguousp) {
 > +            *ambiguousp = false;
 > +        }

Doesn't this hunk in isolation fix the issue? With this 
object_resolve_path_type() should set the pointer on all paths if it is 
non-null..



Hmm, called object_resolve_partial_path() also doesn't set ambiguous on 
every path, so this hunk is at lease incomplete.


yeah, but object_resolve_path_type() initializes it.

I'm unsure about what semantics expected around ambigous pointers, but it seems to me 
that it is set only on failure paths, as a reason, why we failed. If this is true, I 
think, we need only the second hunk, which initializes local "ambig".


right, and that seems good enough.

Do you ack/rb this change then?


     qom/object: fix -Werror=maybe-uninitialized

     object_resolve_path_type() didn't always set *ambiguousp.

     Signed-off-by: Marc-André Lureau mailto:marcandre.lur...@redhat.com>>

diff --git a/qom/object.c b/qom/object.c
index 28c5b66eab..d3d3003541 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -2226,7 +2226,7 @@ Object *object_resolve_path_at(Object *parent, const char 
*path)

  Object *object_resolve_type_unambiguous(const char *typename, Error **errp)
  {
-    bool ambig;
+    bool ambig = false;
      Object *o = object_resolve_path_type("", typename, &ambig);

      if (ambig) {




Yes, I think this one in isolation is OK:
Reviewed-by: Vladimir Sementsov-Ogievskiy 



thanks!

 >       }
 >
 >       g_strfreev(parts);
 > @@ -2226,7 +2229,7 @@ Object *object_resolve_path_at(Object *parent, 
const char *path)
 >
 >   Object *object_resolve_type_unambiguous(const char *typename, Error 
**errp)
 >   {
 > -    bool ambig;
 > +    bool ambig = false;
 >       Object *o = object_resolve_path_type("", typename, &ambig);
 >
 >       if (ambig) {

-- 
Best regards,

Vladimir




--
Marc-André Lureau


--
Best regards,
Vladimir




Re: [PATCH v3 21/22] qom/object: fix -Werror=maybe-uninitialized

2024-10-01 Thread Vladimir Sementsov-Ogievskiy

On 30.09.24 11:14, marcandre.lur...@redhat.com wrote:

From: Marc-André Lureau 

object_resolve_path_type() didn't always set *ambiguousp.

Signed-off-by: Marc-André Lureau 
---
  qom/object.c | 5 -
  1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/qom/object.c b/qom/object.c
index 28c5b66eab..bdc8a2c666 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -2201,6 +2201,9 @@ Object *object_resolve_path_type(const char *path, const 
char *typename,
  }
  } else {
  obj = object_resolve_abs_path(object_get_root(), parts + 1, typename);
+if (ambiguousp) {
+*ambiguousp = false;
+}


Doesn't this hunk in isolation fix the issue? With this 
object_resolve_path_type() should set the pointer on all paths if it is 
non-null..

Hmm, called object_resolve_partial_path() also doesn't set ambiguous on every 
path, so this hunk is at lease incomplete.

I'm unsure about what semantics expected around ambigous pointers, but it seems to me 
that it is set only on failure paths, as a reason, why we failed. If this is true, I 
think, we need only the second hunk, which initializes local "ambig".


  }
  
  g_strfreev(parts);

@@ -2226,7 +2229,7 @@ Object *object_resolve_path_at(Object *parent, const char 
*path)
  
  Object *object_resolve_type_unambiguous(const char *typename, Error **errp)

  {
-bool ambig;
+bool ambig = false;
  Object *o = object_resolve_path_type("", typename, &ambig);
  
  if (ambig) {


--
Best regards,
Vladimir




Re: [PATCH v3 20/22] block: fix -Werror=maybe-uninitialized false-positive

2024-10-01 Thread Vladimir Sementsov-Ogievskiy

On 30.09.24 11:14, marcandre.lur...@redhat.com wrote:

From: Marc-André Lureau

../block/file-posix.c:1405:17: error: ‘zoned’ may be used uninitialized 
[-Werror=maybe-uninitialized]
  1405 | if (ret < 0 || zoned == BLK_Z_NONE) {

Signed-off-by: Marc-André Lureau


Reviewed-by: Vladimir Sementsov-Ogievskiy 


--
Best regards,
Vladimir




Re: [Bug Report][RFC PATCH 1/1] block: fix failing assert on paused VM migration

2024-10-01 Thread Vladimir Sementsov-Ogievskiy

On 30.09.24 17:07, Andrey Drobyshev wrote:

On 9/30/24 12:25 PM, Vladimir Sementsov-Ogievskiy wrote:

[add migration maintainers]

On 24.09.24 15:56, Andrey Drobyshev wrote:

[...]


I doubt that this a correct way to go.

As far as I understand, "inactive" actually means that "storage is not
belong to qemu, but to someone else (another qemu process for example),
and may be changed transparently". In turn this means that Qemu should
do nothing with inactive disks. So the problem is that nobody called
bdrv_activate_all on target, and we shouldn't ignore that.

Hmm, I see in process_incoming_migration_bh() we do call
bdrv_activate_all(), but only in some scenarios. May be, the condition
should be less strict here.

Why we need any condition here at all? Don't we want to activate
block-layer on target after migration anyway?



Hmm I'm not sure about the unconditional activation, since we at least
have to honor LATE_BLOCK_ACTIVATE cap if it's set (and probably delay it
in such a case).  In current libvirt upstream I see such code:


/* Migration capabilities which should always be enabled as long as they
  * are supported by QEMU. If the capability is supposed to be enabled on both
  * sides of migration, it won't be enabled unless both sides support it.
  */
static const qemuMigrationParamsAlwaysOnItem qemuMigrationParamsAlwaysOn[] = {
 {QEMU_MIGRATION_CAP_PAUSE_BEFORE_SWITCHOVER,
  QEMU_MIGRATION_SOURCE},
 
 {QEMU_MIGRATION_CAP_LATE_BLOCK_ACTIVATE,

  QEMU_MIGRATION_DESTINATION},
};


which means that libvirt always wants LATE_BLOCK_ACTIVATE to be set.

The code from process_incoming_migration_bh() you're referring to:


 /* If capability late_block_activate is set:
  * Only fire up the block code now if we're going to restart the
  * VM, else 'cont' will do it.
  * This causes file locking to happen; so we don't want it to happen
  * unless we really are starting the VM.
  */
 if (!migrate_late_block_activate() ||
  (autostart && (!global_state_received() ||
 runstate_is_live(global_state_get_runstate() {
 /* Make sure all file formats throw away their mutable metadata.
  * If we get an error here, just don't restart the VM yet. */
 bdrv_activate_all(&local_err);
 if (local_err) {
 error_report_err(local_err);
 local_err = NULL;
 autostart = false;
 }
 }


It states explicitly that we're either going to start VM right at this
point if (autostart == true), or we wait till "cont" command happens.
None of this is going to happen if we start another migration while
still being in PAUSED state.  So I think it seems reasonable to take
such case into account.  For instance, this patch does prevent the crash:


diff --git a/migration/migration.c b/migration/migration.c
index ae2be31557..3222f6745b 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -733,7 +733,8 @@ static void process_incoming_migration_bh(void *opaque)
   */
  if (!migrate_late_block_activate() ||
   (autostart && (!global_state_received() ||
-runstate_is_live(global_state_get_runstate() {
+runstate_is_live(global_state_get_runstate( ||
+ (!autostart && global_state_get_runstate() == RUN_STATE_PAUSED)) {
  /* Make sure all file formats throw away their mutable metadata.
   * If we get an error here, just don't restart the VM yet. */
  bdrv_activate_all(&local_err);


What are your thoughts on it?



Hmmm... Don't we violate "late-block-activate" contract by this?

Me go and check:

# @late-block-activate: If enabled, the destination will not activate
# block devices (and thus take locks) immediately at the end of
# migration.  (since 3.0)

Yes, we'll violate it by this patch. So, for now the only exception is when 
autostart is enabled, but libvirt correctly use late-block-activate + 
!autostart.

Interesting, when block layer is assumed to be activated.. Aha, only in 
qmp_cont().


So, what to do with this all:

Either libvirt should not use late-block-activate for migration of stopped vm. 
This way target would be automatically activated

Or if libvirt still need postponed activation (I assume, for correctly switching shared disks, 
etc), Libvirt should do some additional QMP call. It can't be "cont", if we don't want to 
run the VM. So, probably, we need additional "block-activate" QMP command for this.

And, anyway, trying to migrate inactive block layer should fail with some good 
error message rather than crash.

--
Best regards,
Vladimir




Re: [PATCH v3 16/22] target/loongarch: fix -Werror=maybe-uninitialized false-positive

2024-10-01 Thread Vladimir Sementsov-Ogievskiy

On 30.09.24 11:14, marcandre.lur...@redhat.com wrote:

From: Marc-André Lureau

../target/loongarch/gdbstub.c:55:20: error: ‘val’ may be used uninitialized 
[-Werror=maybe-uninitialized]
55 | return gdb_get_reg32(mem_buf, val);
   |^~~
../target/loongarch/gdbstub.c:39:18: note: ‘val’ was declared here
39 | uint64_t val;

Signed-off-by: Marc-André Lureau



Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir




Re: [PATCH v3 3/5] block: refactor error handling of commit_iteration

2024-09-30 Thread Vladimir Sementsov-Ogievskiy

On 01.09.24 17:24, Vincent Vanlaer wrote:

Signed-off-by: Vincent Vanlaer 
---
  block/commit.c | 37 ++---
  1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/block/commit.c b/block/commit.c
index 9eedd1fa47..288e413be3 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -130,7 +130,6 @@ static void commit_clean(Job *job)
  
  static int commit_iteration(CommitBlockJob *s, int64_t offset, int64_t *n, void *buf) {

  int ret = 0;
-bool copy;
  bool error_in_source = true;
  
  /* Copy if allocated above the base */

@@ -140,19 +139,34 @@ static int commit_iteration(CommitBlockJob *s, int64_t 
offset, int64_t *n, void
  n, NULL, NULL, NULL);
  }
  
-copy = (ret >= 0 && ret & BDRV_BLOCK_ALLOCATED);

+if (ret < 0) {
+goto handle_error;
+}
+
  trace_commit_one_iteration(s, offset, *n, ret);


this way we loose trace point for error case. Let's move trace_... above "if 
(ret.."


-if (copy) {
+
+if (ret & BDRV_BLOCK_ALLOCATED) {
  assert(*n < SIZE_MAX);
  
  ret = blk_co_pread(s->top, offset, *n, buf, 0);

-if (ret >= 0) {
-ret = blk_co_pwrite(s->base, offset, *n, buf, 0);
-if (ret < 0) {
-error_in_source = false;
-}
+if (ret < 0) {
+goto handle_error;
  }
+
+ret = blk_co_pwrite(s->base, offset, *n, buf, 0);
+if (ret < 0) {
+error_in_source = false;
+goto handle_error;
+}
+
+block_job_ratelimit_processed_bytes(&s->common, *n);
  }
+
+/* Publish progress */
+
+job_progress_update(&s->common.job, *n);
+


it looks a bit strange to fallthrough to "handle_error" from success path


I suggest:

@@ -166,17 +166,17 @@ static int commit_iteration(CommitBlockJob *s, int64_t 
offset, int64_t *n, void
 
 job_progress_update(&s->common.job, *n);
 
-handle_error:

-if (ret < 0) {
-BlockErrorAction action = block_job_error_action(&s->common, 
s->on_error,
- error_in_source, 
-ret);
-if (action == BLOCK_ERROR_ACTION_REPORT) {
-return ret;
-} else {
-*n = 0;
-}
+return 0;
+
+fail:
+BlockErrorAction action = block_job_error_action(&s->common, s->on_error,
+ error_in_source, -ret);
+if (action == BLOCK_ERROR_ACTION_REPORT) {
+return ret;
 }
 
+/* Retry iteration */

+*n = 0;
 return 0;
 }



(also, "fail" name is more popular for such labels:
$ git grep 'goto fail' | wc -l
1334
)




+handle_error:
  if (ret < 0) {
  BlockErrorAction action = block_job_error_action(&s->common, 
s->on_error,
   error_in_source, 
-ret);
@@ -160,15 +174,8 @@ static int commit_iteration(CommitBlockJob *s, int64_t 
offset, int64_t *n, void
  return ret;
  } else {
  *n = 0;
-return 0;
  }
  }
-/* Publish progress */
-job_progress_update(&s->common.job, *n);
-
-if (copy) {
-block_job_ratelimit_processed_bytes(&s->common, *n);
-}
  
  return 0;

  }


--
Best regards,
Vladimir




Re: [PATCH v3 2/5] block: move commit_run loop to separate function

2024-09-30 Thread Vladimir Sementsov-Ogievskiy

On 01.09.24 17:24, Vincent Vanlaer wrote:

+static int commit_iteration(CommitBlockJob *s, int64_t offset, int64_t *n, 
void *buf) {


Hmm over-80 line. Didn't you forget run ./checkpatch.pl ?

--
Best regards,
Vladimir




Re: [PATCH v3 2/5] block: move commit_run loop to separate function

2024-09-30 Thread Vladimir Sementsov-Ogievskiy

On 01.09.24 17:24, Vincent Vanlaer wrote:

Signed-off-by: Vincent Vanlaer 
---
  block/commit.c | 85 --
  1 file changed, 48 insertions(+), 37 deletions(-)

diff --git a/block/commit.c b/block/commit.c
index 8dee25b313..9eedd1fa47 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -128,6 +128,51 @@ static void commit_clean(Job *job)
  blk_unref(s->top);
  }
  
+static int commit_iteration(CommitBlockJob *s, int64_t offset, int64_t *n, void *buf) {

+int ret = 0;
+bool copy;
+bool error_in_source = true;
+
+/* Copy if allocated above the base */
+WITH_GRAPH_RDLOCK_GUARD() {
+ret = bdrv_co_common_block_status_above(blk_bs(s->top),
+s->base_overlay, true, true, offset, COMMIT_BUFFER_SIZE,
+n, NULL, NULL, NULL);
+}
+
+copy = (ret >= 0 && ret & BDRV_BLOCK_ALLOCATED);
+trace_commit_one_iteration(s, offset, *n, ret);
+if (copy) {
+assert(*n < SIZE_MAX);


Probably a matter of taste, but I'd prefer working with local variable instead 
of dereferencing the pointer on every line.
Like this:

commit_iteration(..., int64_t bytes, int64_t *done, ...) {

... work with bytes variable ...

out:
  *done = bytes;
  return 0;
}


anyway:
Reviewed-by: Vladimir Sementsov-Ogievskiy 


+
+ret = blk_co_pread(s->top, offset, *n, buf, 0);
+if (ret >= 0) {
+ret = blk_co_pwrite(s->base, offset, *n, buf, 0);
+if (ret < 0) {
+error_in_source = false;
+}
+}
+}
+if (ret < 0) {
+BlockErrorAction action = block_job_error_action(&s->common, 
s->on_error,
+ error_in_source, 
-ret);
+if (action == BLOCK_ERROR_ACTION_REPORT) {
+return ret;
+} else {
+*n = 0;
+return 0;
+}
+}
+/* Publish progress */
+job_progress_update(&s->common.job, *n);
+
+if (copy) {
+block_job_ratelimit_processed_bytes(&s->common, *n);
+}
+
+return 0;
+}
+
  static int coroutine_fn commit_run(Job *job, Error **errp)
  {
  CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
@@ -158,9 +203,6 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
  buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
  
  for (offset = 0; offset < len; offset += n) {

-bool copy;
-bool error_in_source = true;
-
  /* Note that even when no rate limit is applied we need to yield
   * with no pending I/O here so that bdrv_drain_all() returns.
   */
@@ -168,42 +210,11 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
  if (job_is_cancelled(&s->common.job)) {
  break;
  }
-/* Copy if allocated above the base */
-WITH_GRAPH_RDLOCK_GUARD() {
-ret = bdrv_co_common_block_status_above(blk_bs(s->top),
-s->base_overlay, true, true, offset, COMMIT_BUFFER_SIZE,
-&n, NULL, NULL, NULL);
-}
  
-copy = (ret >= 0 && ret & BDRV_BLOCK_ALLOCATED);

-trace_commit_one_iteration(s, offset, n, ret);
-if (copy) {
-assert(n < SIZE_MAX);
-
-ret = blk_co_pread(s->top, offset, n, buf, 0);
-if (ret >= 0) {
-ret = blk_co_pwrite(s->base, offset, n, buf, 0);
-if (ret < 0) {
-error_in_source = false;
-}
-}
-}
-if (ret < 0) {
-BlockErrorAction action =
-block_job_error_action(&s->common, s->on_error,
-   error_in_source, -ret);
-if (action == BLOCK_ERROR_ACTION_REPORT) {
-return ret;
-} else {
-n = 0;
-continue;
-}
-}
-/* Publish progress */
-job_progress_update(&s->common.job, n);
+ret = commit_iteration(s, offset, &n, buf);
  
-if (copy) {

-block_job_ratelimit_processed_bytes(&s->common, n);
+if (ret < 0) {
+return ret;
  }
  }
  


--
Best regards,
Vladimir




Re: [PATCH v2 03/19] qapi/block-core: Drop temporary 'prefix'

2024-09-30 Thread Vladimir Sementsov-Ogievskiy

On 30.09.24 16:23, Vladimir Sementsov-Ogievskiy wrote:

On 04.09.24 14:18, Markus Armbruster wrote:

Recent commit "qapi: Smarter camel_to_upper() to reduce need for
'prefix'" added a temporary 'prefix' to delay changing the generated
code.

Revert it.  This improves XDbgBlockGraphNodeType's generated
enumeration constant prefix from
X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND to
XDBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND.

Signed-off-by: Markus Armbruster


Reviewed-by: Vladimir Sementsov-Ogievskiy 



Oops sorry, I see now that was merged already.

--
Best regards,
Vladimir




Re: [PATCH v2 03/19] qapi/block-core: Drop temporary 'prefix'

2024-09-30 Thread Vladimir Sementsov-Ogievskiy

On 04.09.24 14:18, Markus Armbruster wrote:

Recent commit "qapi: Smarter camel_to_upper() to reduce need for
'prefix'" added a temporary 'prefix' to delay changing the generated
code.

Revert it.  This improves XDbgBlockGraphNodeType's generated
enumeration constant prefix from
X_DBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND to
XDBG_BLOCK_GRAPH_NODE_TYPE_BLOCK_BACKEND.

Signed-off-by: Markus Armbruster


Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir




Re: [Bug Report][RFC PATCH 1/1] block: fix failing assert on paused VM migration

2024-09-30 Thread Vladimir Sementsov-Ogievskiy

[add migration maintainers]

On 24.09.24 15:56, Andrey Drobyshev wrote:

Instead of throwing an assert let's just ignore that flag is already set
and return.  We assume that it's going to be safe to ignore.  Otherwise
this assert fails when migrating a paused VM back and forth.

Ideally we'd like to have a more sophisticated solution, e.g. not even
scan the nodes which should be inactive at this point.

Signed-off-by: Andrey Drobyshev 
---
  block.c | 10 +-
  1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/block.c b/block.c
index 7d90007cae..c1dcf906d1 100644
--- a/block.c
+++ b/block.c
@@ -6973,7 +6973,15 @@ static int GRAPH_RDLOCK 
bdrv_inactivate_recurse(BlockDriverState *bs)
  return 0;
  }
  
-assert(!(bs->open_flags & BDRV_O_INACTIVE));

+if (bs->open_flags & BDRV_O_INACTIVE) {
+/*
+ * Return here instead of throwing assert as a workaround to
+ * prevent failure on migrating paused VM.
+ * Here we assume that if we're trying to inactivate BDS that's
+ * already inactive, it's safe to just ignore it.
+ */
+return 0;
+}
  
  /* Inactivate this node */

  if (bs->drv->bdrv_inactivate) {


I doubt that this a correct way to go.

As far as I understand, "inactive" actually means that "storage is not belong to 
qemu, but to someone else (another qemu process for example), and may be changed 
transparently". In turn this means that Qemu should do nothing with inactive disks. So the 
problem is that nobody called bdrv_activate_all on target, and we shouldn't ignore that.

Hmm, I see in process_incoming_migration_bh() we do call bdrv_activate_all(), 
but only in some scenarios. May be, the condition should be less strict here.

Why we need any condition here at all? Don't we want to activate block-layer on 
target after migration anyway?

--
Best regards,
Vladimir




Re: [PATCH v3 17/22] tests: fix -Werror=maybe-uninitialized false-positive

2024-09-30 Thread Vladimir Sementsov-Ogievskiy

On 30.09.24 11:14, marcandre.lur...@redhat.com wrote:

From: Marc-André Lureau

../tests/unit/test-block-iothread.c:773:17: error: ‘job’ may be used 
uninitialized [-Werror=maybe-uninitialized]
/usr/include/glib-2.0/glib/gtestutils.h:73:53: error: ‘ret’ may be used 
uninitialized [-Werror=maybe-uninitialized]

Signed-off-by: Marc-André Lureau


Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir




Re: [PATCH v3 11/22] block/block-copy: fix -Werror=maybe-uninitialized false-positive

2024-09-30 Thread Vladimir Sementsov-Ogievskiy

On 30.09.24 11:14, marcandre.lur...@redhat.com wrote:

From: Marc-André Lureau

../block/block-copy.c:591:12: error: ‘ret’ may be used uninitialized 
[-Werror=maybe-uninitialized]

Signed-off-by: Marc-André Lureau


Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir




Re: [PATCH v3 06/22] block/mirror: fix -Werror=maybe-uninitialized false-positive

2024-09-30 Thread Vladimir Sementsov-Ogievskiy

On 30.09.24 11:14, marcandre.lur...@redhat.com wrote:

From: Marc-André Lureau

../block/mirror.c:404:5: error: ‘ret’ may be used uninitialized 
[-Werror=maybe-uninitialized]
../block/mirror.c:895:12: error: ‘ret’ may be used uninitialized 
[-Werror=maybe-uninitialized]
../block/mirror.c:578:12: error: ‘ret’ may be used uninitialized 
[-Werror=maybe-uninitialized]

Change a variable to int, as suggested by Manos: "bdrv_co_preadv()
which is int and is passed as an int argument to mirror_read_complete()"

Signed-off-by: Marc-André Lureau


Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir




[PULL 1/5] copy-before-write: allow specifying minimum cluster size

2024-09-30 Thread Vladimir Sementsov-Ogievskiy
From: Fiona Ebner 

In the context of backup fleecing, discarding the source will not work
when the fleecing image has a larger granularity than the one used for
block-copy operations (can happen if the backup target has smaller
cluster size), because cbw_co_pdiscard_snapshot() will align down the
discard requests and thus effectively ignore then.

To make @discard-source work in such a scenario, allow specifying the
minimum cluster size used for block-copy operations and thus in
particular also the granularity for discard requests to the source.

The type 'size' (corresponding to uint64_t in C) is used in QAPI to
rule out negative inputs and for consistency with already existing
@cluster-size parameters. Since block_copy_calculate_cluster_size()
uses int64_t for its result, a check that the input is not too large
is added in block_copy_state_new() before calling it. The calculation
in block_copy_calculate_cluster_size() is done in the target int64_t
type.

Suggested-by: Vladimir Sementsov-Ogievskiy 
Acked-by: Markus Armbruster  (QAPI schema)
Signed-off-by: Fiona Ebner 
Message-Id: <20240711120915.310243-2-f.eb...@proxmox.com>
[vsementsov: switch version to 9.2 in QAPI doc]
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/block-copy.c | 36 ++--
 block/copy-before-write.c  |  5 -
 include/block/block-copy.h |  1 +
 qapi/block-core.json   |  8 +++-
 4 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/block/block-copy.c b/block/block-copy.c
index cc618e4561..93eb1b2664 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -310,6 +310,7 @@ void block_copy_set_copy_opts(BlockCopyState *s, bool 
use_copy_range,
 }
 
 static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+ int64_t min_cluster_size,
  Error **errp)
 {
 int ret;
@@ -319,6 +320,9 @@ static int64_t 
block_copy_calculate_cluster_size(BlockDriverState *target,
 GLOBAL_STATE_CODE();
 GRAPH_RDLOCK_GUARD_MAINLOOP();
 
+min_cluster_size = MAX(min_cluster_size,
+   (int64_t)BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
+
 target_does_cow = bdrv_backing_chain_next(target);
 
 /*
@@ -329,13 +333,13 @@ static int64_t 
block_copy_calculate_cluster_size(BlockDriverState *target,
 ret = bdrv_get_info(target, &bdi);
 if (ret == -ENOTSUP && !target_does_cow) {
 /* Cluster size is not defined */
-warn_report("The target block device doesn't provide "
-"information about the block size and it doesn't have a "
-"backing file. The default block size of %u bytes is "
-"used. If the actual block size of the target exceeds "
-"this default, the backup may be unusable",
-BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
-return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
+warn_report("The target block device doesn't provide information about 
"
+"the block size and it doesn't have a backing file. The "
+"(default) block size of %" PRIi64 " bytes is used. If the 
"
+"actual block size of the target exceeds this value, the "
+"backup may be unusable",
+min_cluster_size);
+return min_cluster_size;
 } else if (ret < 0 && !target_does_cow) {
 error_setg_errno(errp, -ret,
 "Couldn't determine the cluster size of the target image, "
@@ -345,16 +349,17 @@ static int64_t 
block_copy_calculate_cluster_size(BlockDriverState *target,
 return ret;
 } else if (ret < 0 && target_does_cow) {
 /* Not fatal; just trudge on ahead. */
-return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
+return min_cluster_size;
 }
 
-return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
+return MAX(min_cluster_size, bdi.cluster_size);
 }
 
 BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
  BlockDriverState *copy_bitmap_bs,
  const BdrvDirtyBitmap *bitmap,
  bool discard_source,
+ uint64_t min_cluster_size,
  Error **errp)
 {
 ERRP_GUARD();
@@ -365,7 +370,18 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, 
BdrvChild *target,
 
 GLOBAL_STATE_CODE();
 
-cluster_size = block_copy_calculate_cluster_size(target->bs, errp);
+if (min_cluster_size > INT64_MAX) {
+error_setg(errp, "min-cluster-size too large: %" PRIu64 

[PULL 5/5] util/co-shared-resource: Remove unused co_try_get_from_shres

2024-09-30 Thread Vladimir Sementsov-Ogievskiy
From: "Dr. David Alan Gilbert" 

co_try_get_from_shres hasn't been used since it was added in
  55fa54a789 ("co-shared-resource: protect with a mutex")

(Everyone uses the _locked version)
Remove it.

Signed-off-by: Dr. David Alan Gilbert 
Message-Id: <20240918124220.27871-1-d...@treblig.org>
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/qemu/co-shared-resource.h | 7 ---
 util/qemu-co-shared-resource.c| 6 --
 2 files changed, 13 deletions(-)

diff --git a/include/qemu/co-shared-resource.h 
b/include/qemu/co-shared-resource.h
index 78ca5850f8..41be1a8131 100644
--- a/include/qemu/co-shared-resource.h
+++ b/include/qemu/co-shared-resource.h
@@ -44,13 +44,6 @@ SharedResource *shres_create(uint64_t total);
  */
 void shres_destroy(SharedResource *s);
 
-/*
- * Try to allocate an amount of @n.  Return true on success, and false
- * if there is too little left of the collective resource to fulfill
- * the request.
- */
-bool co_try_get_from_shres(SharedResource *s, uint64_t n);
-
 /*
  * Allocate an amount of @n, and, if necessary, yield until
  * that becomes possible.
diff --git a/util/qemu-co-shared-resource.c b/util/qemu-co-shared-resource.c
index a66cc07e75..752eb5a1c5 100644
--- a/util/qemu-co-shared-resource.c
+++ b/util/qemu-co-shared-resource.c
@@ -66,12 +66,6 @@ static bool co_try_get_from_shres_locked(SharedResource *s, 
uint64_t n)
 return false;
 }
 
-bool co_try_get_from_shres(SharedResource *s, uint64_t n)
-{
-QEMU_LOCK_GUARD(&s->lock);
-return co_try_get_from_shres_locked(s, n);
-}
-
 void coroutine_fn co_get_from_shres(SharedResource *s, uint64_t n)
 {
 assert(n <= s->total);
-- 
2.34.1




[PULL 2/5] backup: add minimum cluster size to performance options

2024-09-30 Thread Vladimir Sementsov-Ogievskiy
From: Fiona Ebner 

In the context of backup fleecing, discarding the source will not work
when the fleecing image has a larger granularity than the one used for
block-copy operations (can happen if the backup target has smaller
cluster size), because cbw_co_pdiscard_snapshot() will align down the
discard requests and thus effectively ignore then.

To make @discard-source work in such a scenario, allow specifying the
minimum cluster size used for block-copy operations and thus in
particular also the granularity for discard requests to the source.

Suggested-by: Vladimir Sementsov-Ogievskiy 
Acked-by: Markus Armbruster  (QAPI schema)
Signed-off-by: Fiona Ebner 
Message-Id: <20240711120915.310243-3-f.eb...@proxmox.com>
[vsementsov: switch version to 9.2 in QAPI doc]
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/backup.c| 2 +-
 block/copy-before-write.c | 9 +
 block/copy-before-write.h | 1 +
 blockdev.c| 3 +++
 qapi/block-core.json  | 9 +++--
 5 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 3dd2e229d2..a1292c01ec 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -458,7 +458,7 @@ BlockJob *backup_job_create(const char *job_id, 
BlockDriverState *bs,
 }
 
 cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source,
-  &bcs, errp);
+  perf->min_cluster_size, &bcs, errp);
 if (!cbw) {
 goto error;
 }
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index a919b1f41b..e835987e52 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -548,6 +548,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
   BlockDriverState *target,
   const char *filter_node_name,
   bool discard_source,
+  uint64_t min_cluster_size,
   BlockCopyState **bcs,
   Error **errp)
 {
@@ -567,6 +568,14 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
 qdict_put_str(opts, "file", bdrv_get_node_name(source));
 qdict_put_str(opts, "target", bdrv_get_node_name(target));
 
+if (min_cluster_size > INT64_MAX) {
+error_setg(errp, "min-cluster-size too large: %" PRIu64 " > %" PRIi64,
+   min_cluster_size, INT64_MAX);
+qobject_unref(opts);
+return NULL;
+}
+qdict_put_int(opts, "min-cluster-size", (int64_t)min_cluster_size);
+
 top = bdrv_insert_node(source, opts, flags, errp);
 if (!top) {
 return NULL;
diff --git a/block/copy-before-write.h b/block/copy-before-write.h
index 01af0cd3c4..2a5d4ba693 100644
--- a/block/copy-before-write.h
+++ b/block/copy-before-write.h
@@ -40,6 +40,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
   BlockDriverState *target,
   const char *filter_node_name,
   bool discard_source,
+  uint64_t min_cluster_size,
   BlockCopyState **bcs,
   Error **errp);
 void bdrv_cbw_drop(BlockDriverState *bs);
diff --git a/blockdev.c b/blockdev.c
index 835064ed03..6740663fda 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2655,6 +2655,9 @@ static BlockJob *do_backup_common(BackupCommon *backup,
 if (backup->x_perf->has_max_chunk) {
 perf.max_chunk = backup->x_perf->max_chunk;
 }
+if (backup->x_perf->has_min_cluster_size) {
+perf.min_cluster_size = backup->x_perf->min_cluster_size;
+}
 }
 
 if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) ||
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 6751022428..c3b0a2376b 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1551,11 +1551,16 @@
 # it should not be less than job cluster size which is calculated
 # as maximum of target image cluster size and 64k.  Default 0.
 #
+# @min-cluster-size: Minimum size of blocks used by copy-before-write
+# and background copy operations.  Has to be a power of 2.  No
+# effect if smaller than the maximum of the target's cluster size
+# and 64 KiB.  Default 0.  (Since 9.2)
+#
 # Since: 6.0
 ##
 { 'struct': 'BackupPerf',
-  'data': { '*use-copy-range': 'bool',
-'*max-workers': 'int', '*max-chunk': 'int64' } }
+  'data': { '*use-copy-range': 'bool', '*max-workers': 'int',
+'*max-chunk': 'int64', '*min-cluster-size': 'size' } }
 
 ##
 # @BackupCommon:
-- 
2.34.1




[PULL 4/5] block: Remove unused aio_task_pool_empty

2024-09-30 Thread Vladimir Sementsov-Ogievskiy
From: "Dr. David Alan Gilbert" 

aio_task_pool_empty has been unused since it was added in
  6e9b225f73 ("block: introduce aio task pool")

Remove it.

Signed-off-by: Dr. David Alan Gilbert 
Message-Id: <20240917002007.330689-1-d...@treblig.org>
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/aio_task.c | 5 -
 include/block/aio_task.h | 2 --
 2 files changed, 7 deletions(-)

diff --git a/block/aio_task.c b/block/aio_task.c
index 9bd17ea2c1..bb5c05f455 100644
--- a/block/aio_task.c
+++ b/block/aio_task.c
@@ -119,8 +119,3 @@ int aio_task_pool_status(AioTaskPool *pool)
 
 return pool->status;
 }
-
-bool aio_task_pool_empty(AioTaskPool *pool)
-{
-return pool->busy_tasks == 0;
-}
diff --git a/include/block/aio_task.h b/include/block/aio_task.h
index 18a9c41f4e..c81d637617 100644
--- a/include/block/aio_task.h
+++ b/include/block/aio_task.h
@@ -40,8 +40,6 @@ void aio_task_pool_free(AioTaskPool *);
 /* error code of failed task or 0 if all is OK */
 int aio_task_pool_status(AioTaskPool *pool);
 
-bool aio_task_pool_empty(AioTaskPool *pool);
-
 /* User provides filled @task, however task->pool will be set automatically */
 void coroutine_fn aio_task_pool_start_task(AioTaskPool *pool, AioTask *task);
 
-- 
2.34.1




[PULL 0/5] Block-jobs 2024-09-30

2024-09-30 Thread Vladimir Sementsov-Ogievskiy
The following changes since commit 3b14a767eaca3df5534a162851f04787b363670e:

  Merge tag 'qemu-openbios-20240924' of https://github.com/mcayland/qemu into 
staging (2024-09-28 12:34:44 +0100)

are available in the Git repository at:

  https://gitlab.com/vsementsov/qemu.git tags/pull-block-jobs-2024-09-30

for you to fetch changes up to b74987cd3bf9bd4bf452ed371d864cbd1dccb08e:

  util/co-shared-resource: Remove unused co_try_get_from_shres (2024-09-30 
10:53:18 +0300)


Block-jobs: improve backup fleecing



Dr. David Alan Gilbert (2):
  block: Remove unused aio_task_pool_empty
  util/co-shared-resource: Remove unused co_try_get_from_shres

Fiona Ebner (3):
  copy-before-write: allow specifying minimum cluster size
  backup: add minimum cluster size to performance options
  block/reqlist: allow adding overlapping requests

 block/aio_task.c  |  5 -
 block/backup.c|  2 +-
 block/block-copy.c| 36 ++-
 block/copy-before-write.c | 17 +--
 block/copy-before-write.h |  1 +
 block/reqlist.c   |  2 --
 blockdev.c|  3 +++
 include/block/aio_task.h  |  2 --
 include/block/block-copy.h|  1 +
 include/qemu/co-shared-resource.h |  7 --
 qapi/block-core.json  | 17 ---
 util/qemu-co-shared-resource.c|  6 --
 12 files changed, 61 insertions(+), 38 deletions(-)

-- 
2.34.1




[PULL 3/5] block/reqlist: allow adding overlapping requests

2024-09-30 Thread Vladimir Sementsov-Ogievskiy
From: Fiona Ebner 

Allow overlapping request by removing the assert that made it
impossible. There are only two callers:

1. block_copy_task_create()

It already asserts the very same condition before calling
reqlist_init_req().

2. cbw_snapshot_read_lock()

There is no need to have read requests be non-overlapping in
copy-before-write when used for snapshot-access. In fact, there was no
protection against two callers of cbw_snapshot_read_lock() calling
reqlist_init_req() with overlapping ranges and this could lead to an
assertion failure [1].

In particular, with the reproducer script below [0], two
cbw_co_snapshot_block_status() callers could race, with the second
calling reqlist_init_req() before the first one finishes and removes
its conflicting request.

[0]:

> #!/bin/bash -e
> dd if=/dev/urandom of=/tmp/disk.raw bs=1M count=1024
> ./qemu-img create /tmp/fleecing.raw -f raw 1G
> (
> ./qemu-system-x86_64 --qmp stdio \
> --blockdev raw,node-name=node0,file.driver=file,file.filename=/tmp/disk.raw \
> --blockdev 
> raw,node-name=node1,file.driver=file,file.filename=/tmp/fleecing.raw \
> < {"execute": "qmp_capabilities"}
> {"execute": "blockdev-add", "arguments": { "driver": "copy-before-write", 
> "file": "node0", "target": "node1", "node-name": "node3" } }
> {"execute": "blockdev-add", "arguments": { "driver": "snapshot-access", 
> "file": "node3", "node-name": "snap0" } }
> {"execute": "nbd-server-start", "arguments": {"addr": { "type": "unix", 
> "data": { "path": "/tmp/nbd.socket" } } } }
> {"execute": "block-export-add", "arguments": {"id": "exp0", "node-name": 
> "snap0", "type": "nbd", "name": "exp0"}}
> EOF
> ) &
> sleep 5
> while true; do
> ./qemu-nbd -d /dev/nbd0
> ./qemu-nbd -c /dev/nbd0 nbd:unix:/tmp/nbd.socket:exportname=exp0 -f raw -r
> nbdinfo --map 'nbd+unix:///exp0?socket=/tmp/nbd.socket'
> done

[1]:

> #5  0x71e5f0088eb2 in __GI___assert_fail (...) at ./assert/assert.c:101
> #6  0x615285438017 in reqlist_init_req (...) at ../block/reqlist.c:23
> #7  0x6152853e2d98 in cbw_snapshot_read_lock (...) at 
> ../block/copy-before-write.c:237
> #8  0x6152853e3068 in cbw_co_snapshot_block_status (...) at 
> ../block/copy-before-write.c:304
> #9  0x6152853f4d22 in bdrv_co_snapshot_block_status (...) at 
> ../block/io.c:3726
> #10 0x61528543a63e in snapshot_access_co_block_status (...) at 
> ../block/snapshot-access.c:48
> #11 0x6152853f1a0a in bdrv_co_do_block_status (...) at ../block/io.c:2474
> #12 0x6152853f2016 in bdrv_co_common_block_status_above (...) at 
> ../block/io.c:2652
> #13 0x6152853f22cf in bdrv_co_block_status_above (...) at 
> ../block/io.c:2732
> #14 0x00006152853d9a86 in blk_co_block_status_above (...) at 
> ../block/block-backend.c:1473
> #15 0x000061528538da6c in blockstatus_to_extents (...) at ../nbd/server.c:2374
> #16 0x61528538deb1 in nbd_co_send_block_status (...) at 
> ../nbd/server.c:2481
> #17 0x61528538f424 in nbd_handle_request (...) at ../nbd/server.c:2978
> #18 0x61528538f906 in nbd_trip (...) at ../nbd/server.c:3121
> #19 0x6152855a7caf in coroutine_trampoline (...) at 
> ../util/coroutine-ucontext.c:175

Cc: qemu-sta...@nongnu.org
Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Fiona Ebner 
Message-Id: <20240712140716.517911-1-f.eb...@proxmox.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/copy-before-write.c | 3 ++-
 block/reqlist.c   | 2 --
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index e835987e52..81afeff1c7 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -66,7 +66,8 @@ typedef struct BDRVCopyBeforeWriteState {
 
 /*
  * @frozen_read_reqs: current read requests for fleecing user in bs->file
- * node. These areas must not be rewritten by guest.
+ * node. These areas must not be rewritten by guest. There can be multiple
+ * overlapping read requests.
  */
 BlockReqList frozen_read_reqs;
 
diff --git a/block/reqlist.c b/block/reqlist.c
index 08cb57cfa4..098e807378 100644
--- a/block/reqlist.c
+++ b/block/reqlist.c
@@ -20,8 +20,6 @@
 void reqlist_init_req(BlockReqList *reqs, BlockReq *req, int64_t offset,
   int64_t bytes)
 {
-assert(!reqlist_find_conflict(reqs, offset, bytes));
-
 *req = (BlockReq) {
 .offset = offset,
 .bytes = bytes,
-- 
2.34.1




Re: [PATCH] block: Remove unused aio_task_pool_empty

2024-09-27 Thread Vladimir Sementsov-Ogievskiy

On 17.09.24 03:20, d...@treblig.org wrote:

From: "Dr. David Alan Gilbert"

aio_task_pool_empty has been unused since it was added in
   6e9b225f73 ("block: introduce aio task pool")

Remove it.

Signed-off-by: Dr. David Alan Gilbert


Thanks, applied to my block branch.

--
Best regards,
Vladimir




Re: [PATCH] util/co-shared-resource: Remove unused co_try_get_from_shres

2024-09-27 Thread Vladimir Sementsov-Ogievskiy

On 18.09.24 15:42, d...@treblig.org wrote:

From: "Dr. David Alan Gilbert"

co_try_get_from_shres hasn't been used since it was added in
   55fa54a789 ("co-shared-resource: protect with a mutex")

(Everyone uses the _locked version)
Remove it.

Signed-off-by: Dr. David Alan Gilbert


Thanks, applied to my block branch.

--
Best regards,
Vladimir




[PATCH v6 1/3] qdev-monitor: add option to report GenericError from find_device_state

2024-09-20 Thread Vladimir Sementsov-Ogievskiy
Here we just prepare for the following patch, making possible to report
GenericError as recommended.

This patch doesn't aim to prevent further use of DeviceNotFound by
future interfaces:

 - find_device_state() is used in blk_by_qdev_id() and qmp_get_blk()
   functions, which may lead to spread of DeviceNotFound anyway
 - also, nothing prevent simply copy-pasting find_device_state() calls
   with false argument

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Markus Armbruster 
Acked-by: Raphael Norwitz 
---
 system/qdev-monitor.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/system/qdev-monitor.c b/system/qdev-monitor.c
index 44994ea0e1..6671137a91 100644
--- a/system/qdev-monitor.c
+++ b/system/qdev-monitor.c
@@ -885,13 +885,20 @@ void qmp_device_add(QDict *qdict, QObject **ret_data, 
Error **errp)
 object_unref(OBJECT(dev));
 }
 
-static DeviceState *find_device_state(const char *id, Error **errp)
+/*
+ * Note that creating new APIs using error classes other than GenericError is
+ * not recommended. Set use_generic_error=true for new interfaces.
+ */
+static DeviceState *find_device_state(const char *id, bool use_generic_error,
+  Error **errp)
 {
 Object *obj = object_resolve_path_at(qdev_get_peripheral(), id);
 DeviceState *dev;
 
 if (!obj) {
-error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+error_set(errp,
+  (use_generic_error ?
+   ERROR_CLASS_GENERIC_ERROR : ERROR_CLASS_DEVICE_NOT_FOUND),
   "Device '%s' not found", id);
 return NULL;
 }
@@ -956,7 +963,7 @@ void qdev_unplug(DeviceState *dev, Error **errp)
 
 void qmp_device_del(const char *id, Error **errp)
 {
-DeviceState *dev = find_device_state(id, errp);
+DeviceState *dev = find_device_state(id, false, errp);
 if (dev != NULL) {
 if (dev->pending_deleted_event &&
 (dev->pending_deleted_expires_ms == 0 ||
@@ -1076,7 +1083,7 @@ BlockBackend *blk_by_qdev_id(const char *id, Error **errp)
 
 GLOBAL_STATE_CODE();
 
-dev = find_device_state(id, errp);
+dev = find_device_state(id, false, errp);
 if (dev == NULL) {
 return NULL;
 }
-- 
2.34.1




[PATCH v6 3/3] qapi: introduce device-sync-config

2024-09-20 Thread Vladimir Sementsov-Ogievskiy
Add command to sync config from vhost-user backend to the device. It
may be helpful when VHOST_USER_SLAVE_CONFIG_CHANGE_MSG failed or not
triggered interrupt to the guest or just not available (not supported
by vhost-user server).

Command result is racy if allow it during migration. Let's not allow
that.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Markus Armbruster 
Acked-by: Raphael Norwitz 
---
 hw/block/vhost-user-blk.c |  1 +
 hw/virtio/virtio-pci.c|  9 +
 include/hw/qdev-core.h|  6 ++
 qapi/qdev.json| 24 
 system/qdev-monitor.c | 38 ++
 5 files changed, 78 insertions(+)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 48b3dabb8d..7996e49821 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -591,6 +591,7 @@ static void vhost_user_blk_class_init(ObjectClass *klass, 
void *data)
 
 device_class_set_props(dc, vhost_user_blk_properties);
 dc->vmsd = &vmstate_vhost_user_blk;
+dc->sync_config = vhost_user_blk_sync_config;
 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
 vdc->realize = vhost_user_blk_device_realize;
 vdc->unrealize = vhost_user_blk_device_unrealize;
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 4d832fe845..c5a809b956 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -2385,6 +2385,14 @@ static void virtio_pci_dc_realize(DeviceState *qdev, 
Error **errp)
 vpciklass->parent_dc_realize(qdev, errp);
 }
 
+static int virtio_pci_sync_config(DeviceState *dev, Error **errp)
+{
+VirtIOPCIProxy *proxy = VIRTIO_PCI(dev);
+VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+return qdev_sync_config(DEVICE(vdev), errp);
+}
+
 static void virtio_pci_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
@@ -2401,6 +2409,7 @@ static void virtio_pci_class_init(ObjectClass *klass, 
void *data)
 device_class_set_parent_realize(dc, virtio_pci_dc_realize,
 &vpciklass->parent_dc_realize);
 rc->phases.hold = virtio_pci_bus_reset_hold;
+dc->sync_config = virtio_pci_sync_config;
 }
 
 static const TypeInfo virtio_pci_info = {
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index aa97c34a4b..94914858d8 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -95,6 +95,7 @@ typedef void (*DeviceUnrealize)(DeviceState *dev);
 typedef void (*DeviceReset)(DeviceState *dev);
 typedef void (*BusRealize)(BusState *bus, Error **errp);
 typedef void (*BusUnrealize)(BusState *bus);
+typedef int (*DeviceSyncConfig)(DeviceState *dev, Error **errp);
 
 /**
  * struct DeviceClass - The base class for all devices.
@@ -103,6 +104,9 @@ typedef void (*BusUnrealize)(BusState *bus);
  * property is changed to %true.
  * @unrealize: Callback function invoked when the #DeviceState:realized
  * property is changed to %false.
+ * @sync_config: Callback function invoked when QMP command device-sync-config
+ * is called. Should synchronize device configuration from host to guest part
+ * and notify the guest about the change.
  * @hotpluggable: indicates if #DeviceClass is hotpluggable, available
  * as readonly "hotpluggable" property of #DeviceState instance
  *
@@ -162,6 +166,7 @@ struct DeviceClass {
 DeviceReset legacy_reset;
 DeviceRealize realize;
 DeviceUnrealize unrealize;
+DeviceSyncConfig sync_config;
 
 /**
  * @vmsd: device state serialisation description for
@@ -547,6 +552,7 @@ bool qdev_hotplug_allowed(DeviceState *dev, Error **errp);
  */
 HotplugHandler *qdev_get_hotplug_handler(DeviceState *dev);
 void qdev_unplug(DeviceState *dev, Error **errp);
+int qdev_sync_config(DeviceState *dev, Error **errp);
 void qdev_simple_device_unplug_cb(HotplugHandler *hotplug_dev,
   DeviceState *dev, Error **errp);
 void qdev_machine_creation_done(void);
diff --git a/qapi/qdev.json b/qapi/qdev.json
index 53d147c7b4..2a581129c9 100644
--- a/qapi/qdev.json
+++ b/qapi/qdev.json
@@ -163,3 +163,27 @@
 ##
 { 'event': 'DEVICE_UNPLUG_GUEST_ERROR',
   'data': { '*device': 'str', 'path': 'str' } }
+
+##
+# @device-sync-config:
+#
+# Synchronize device configuration from host to guest part.  First,
+# copy the configuration from the host part (backend) to the guest
+# part (frontend).  Then notify guest software that device
+# configuration changed.
+#
+# The command may be used to notify the guest about block device
+# capcity change.  Currently only vhost-user-blk device supports
+# this.
+#
+# @id: the device's ID or QOM path
+#
+# Features:
+#
+# @unstable: The command is experimental.
+#
+# Since: 9.1
+##
+{ 'command': 'device-sync-config',
+  'features': [ 'unstable'

[PATCH v6 0/3] vhost-user-blk: live resize additional APIs

2024-09-20 Thread Vladimir Sementsov-Ogievskiy
v6: tiny fix: add document comment for sync_config field to fix qdoc
generation. Also, add r-b and a-b marks.

Vladimir Sementsov-Ogievskiy (3):
  qdev-monitor: add option to report GenericError from find_device_state
  vhost-user-blk: split vhost_user_blk_sync_config()
  qapi: introduce device-sync-config

 hw/block/vhost-user-blk.c | 27 ++--
 hw/virtio/virtio-pci.c|  9 +++
 include/hw/qdev-core.h|  6 +
 qapi/qdev.json| 24 ++
 system/qdev-monitor.c | 53 ---
 5 files changed, 108 insertions(+), 11 deletions(-)

-- 
2.34.1




[PATCH v6 2/3] vhost-user-blk: split vhost_user_blk_sync_config()

2024-09-20 Thread Vladimir Sementsov-Ogievskiy
Split vhost_user_blk_sync_config() out from
vhost_user_blk_handle_config_change(), to be reused in the following
commit.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Acked-by: Raphael Norwitz 
---
 hw/block/vhost-user-blk.c | 26 +++---
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 5b7f46bbb0..48b3dabb8d 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -90,27 +90,39 @@ static void vhost_user_blk_set_config(VirtIODevice *vdev, 
const uint8_t *config)
 s->blkcfg.wce = blkcfg->wce;
 }
 
+static int vhost_user_blk_sync_config(DeviceState *dev, Error **errp)
+{
+int ret;
+VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+VHostUserBlk *s = VHOST_USER_BLK(vdev);
+
+ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg,
+   vdev->config_len, errp);
+if (ret < 0) {
+return ret;
+}
+
+memcpy(vdev->config, &s->blkcfg, vdev->config_len);
+virtio_notify_config(vdev);
+
+return 0;
+}
+
 static int vhost_user_blk_handle_config_change(struct vhost_dev *dev)
 {
 int ret;
-VirtIODevice *vdev = dev->vdev;
-VHostUserBlk *s = VHOST_USER_BLK(dev->vdev);
 Error *local_err = NULL;
 
 if (!dev->started) {
 return 0;
 }
 
-ret = vhost_dev_get_config(dev, (uint8_t *)&s->blkcfg,
-   vdev->config_len, &local_err);
+ret = vhost_user_blk_sync_config(DEVICE(dev->vdev), &local_err);
 if (ret < 0) {
 error_report_err(local_err);
 return ret;
 }
 
-memcpy(dev->vdev->config, &s->blkcfg, vdev->config_len);
-virtio_notify_config(dev->vdev);
-
 return 0;
 }
 
-- 
2.34.1




Re: [PATCH v5 0/3] vhost-user-blk: live resize additional APIs

2024-09-20 Thread Vladimir Sementsov-Ogievskiy

On 11.09.24 12:51, Michael S. Tsirkin wrote:

On Tue, Jun 25, 2024 at 03:18:40PM +0300, Vladimir Sementsov-Ogievskiy wrote:

v5:
03: drop extra check on is is runstate running


Causes build failures when generating qdoc.

https://gitlab.com/mstredhat/qemu/-/jobs/7792086965




Sorry for a delay, I'll send a v6 soon with fix for that.

--
Best regards,
Vladimir




Re: [PATCH v2] block/reqlist: allow adding overlapping requests

2024-08-28 Thread Vladimir Sementsov-Ogievskiy

On 11.08.24 20:55, Michael Tokarev wrote:

12.07.2024 17:07, Fiona Ebner wrote:

Allow overlapping request by removing the assert that made it
impossible. There are only two callers:

1. block_copy_task_create()

It already asserts the very same condition before calling
reqlist_init_req().

2. cbw_snapshot_read_lock()

There is no need to have read requests be non-overlapping in
copy-before-write when used for snapshot-access. In fact, there was no
protection against two callers of cbw_snapshot_read_lock() calling
reqlist_init_req() with overlapping ranges and this could lead to an
assertion failure [1].

In particular, with the reproducer script below [0], two
cbw_co_snapshot_block_status() callers could race, with the second
calling reqlist_init_req() before the first one finishes and removes
its conflicting request.

[0]:


#!/bin/bash -e
dd if=/dev/urandom of=/tmp/disk.raw bs=1M count=1024
./qemu-img create /tmp/fleecing.raw -f raw 1G
(
./qemu-system-x86_64 --qmp stdio \
--blockdev raw,node-name=node0,file.driver=file,file.filename=/tmp/disk.raw \
--blockdev raw,node-name=node1,file.driver=file,file.filename=/tmp/fleecing.raw 
\
<

[1]:


#5  0x71e5f0088eb2 in __GI___assert_fail (...) at ./assert/assert.c:101
#6  0x615285438017 in reqlist_init_req (...) at ../block/reqlist.c:23
#7  0x6152853e2d98 in cbw_snapshot_read_lock (...) at 
../block/copy-before-write.c:237
#8  0x6152853e3068 in cbw_co_snapshot_block_status (...) at 
../block/copy-before-write.c:304
#9  0x6152853f4d22 in bdrv_co_snapshot_block_status (...) at 
../block/io.c:3726
#10 0x61528543a63e in snapshot_access_co_block_status (...) at 
../block/snapshot-access.c:48
#11 0x6152853f1a0a in bdrv_co_do_block_status (...) at ../block/io.c:2474
#12 0x6152853f2016 in bdrv_co_common_block_status_above (...) at 
../block/io.c:2652
#13 0x6152853f22cf in bdrv_co_block_status_above (...) at ../block/io.c:2732
#14 0x6152853d9a86 in blk_co_block_status_above (...) at 
../block/block-backend.c:1473
#15 0x61528538da6c in blockstatus_to_extents (...) at ../nbd/server.c:2374
#16 0x61528538deb1 in nbd_co_send_block_status (...) at ../nbd/server.c:2481
#17 0x61528538f424 in nbd_handle_request (...) at ../nbd/server.c:2978
#18 0x61528538f906 in nbd_trip (...) at ../nbd/server.c:3121
#19 0x6152855a7caf in coroutine_trampoline (...) at 
../util/coroutine-ucontext.c:175


Cc: qemu-sta...@nongnu.org
Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Fiona Ebner 


Hi!

Has this been forgotten or is it not needed for 9.1?



My apologies, this is forgotten. I think rc4 is too late, I'll send Pull 
request as soon as 9.2 window open.

--
Best regards,
Vladimir




Re: [PATCH v2 1/2] block: zero data data corruption using prealloc-filter

2024-08-02 Thread Vladimir Sementsov-Ogievskiy

On 16.07.24 16:32, Denis V. Lunev wrote:

On 7/12/24 13:55, Vladimir Sementsov-Ogievskiy wrote:

On 12.07.24 12:46, Andrey Drobyshev wrote:

From: "Denis V. Lunev" 

We have observed that some clusters in the QCOW2 files are zeroed
while preallocation filter is used.

We are able to trace down the following sequence when prealloc-filter
is used:
 co=0x55e7cbed7680 qcow2_co_pwritev_task()
 co=0x55e7cbed7680 preallocate_co_pwritev_part()
 co=0x55e7cbed7680 handle_write()
 co=0x55e7cbed7680 bdrv_co_do_pwrite_zeroes()
 co=0x55e7cbed7680 raw_do_pwrite_zeroes()
 co=0x7f9edb7fe500 do_fallocate()

Here coroutine 0x55e7cbed7680 is being blocked waiting while coroutine
0x7f9edb7fe500 will finish with fallocate of the file area. OK. It is
time to handle next coroutine, which
 co=0x55e7cbee91b0 qcow2_co_pwritev_task()
 co=0x55e7cbee91b0 preallocate_co_pwritev_part()
 co=0x55e7cbee91b0 handle_write()
 co=0x55e7cbee91b0 bdrv_co_do_pwrite_zeroes()
 co=0x55e7cbee91b0 raw_do_pwrite_zeroes()
 co=0x7f9edb7deb00 do_fallocate()

The trouble comes here. Coroutine 0x55e7cbed7680 has not advanced
file_end yet and coroutine 0x55e7cbee91b0 will start fallocate() for
the same area. This means that if (once fallocate is started inside
0x7f9edb7deb00) original fallocate could end and the real write will
be executed. In that case write() request is handled at the same time
as fallocate().

The patch moves s->file_lock assignment before fallocate and that is


text need to be updated


crucial. The idea is that all subsequent requests into the area
being preallocation will be issued as just writes without fallocate
to this area and they will not proceed thanks to overlapping
requests mechanics. If preallocation will fail, we will just switch
to the normal expand-by-write behavior and that is not a problem
except performance.

Signed-off-by: Denis V. Lunev 
Tested-by: Andrey Drobyshev 
---
  block/preallocate.c | 8 +++-
  1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/block/preallocate.c b/block/preallocate.c
index d215bc5d6d..ecf0aa4baa 100644
--- a/block/preallocate.c
+++ b/block/preallocate.c
@@ -383,6 +383,13 @@ handle_write(BlockDriverState *bs, int64_t offset, int64_t 
bytes,
    want_merge_zero = want_merge_zero && (prealloc_start <= offset);
  +    /*
+ * Assign file_end before making actual preallocation. This will ensure
+ * that next request performed while preallocation is in progress will
+ * be passed without preallocation.
+ */
+    s->file_end = prealloc_end;
+
  ret = bdrv_co_pwrite_zeroes(
  bs->file, prealloc_start, prealloc_end - prealloc_start,
  BDRV_REQ_NO_FALLBACK | BDRV_REQ_SERIALISING | BDRV_REQ_NO_WAIT);
@@ -391,7 +398,6 @@ handle_write(BlockDriverState *bs, int64_t offset, int64_t 
bytes,
  return false;
  }
  -    s->file_end = prealloc_end;
  return want_merge_zero;
  }



Hmm. But this way we set both s->file_end and s->zero_start prior to actual 
write_zero operation. This means that next write-zero operation may go fast-path (see 
preallocate_co_pwrite_zeroes()) and return success, even before actual finish of 
preallocation write_zeroes operation (which may also fail). Seems we need to update 
logic around s->zero_start too.


Yes. This is not a problem at all. We go fast path and this new
fast-pathed write request will stuck on overlapped request check.
This if fine on success path.


Sorry for long delay. By fast-path I meant when we return true from

/* Now s->data_end, s->zero_start and s->file_end are valid. */
 
if (end <= s->file_end) {

/* No preallocation needed. */
return want_merge_zero && offset >= s->zero_start;
}

and then, preallocate_co_pwrite_zeroes() just reports success immediately. So, 
we don't stuck in any overlap-check.

Probably, that's not too bad: we just skip the write-zero after EOF. If user 
read from this area, zeroes should be returned anyway.

Still, if we do so we mix our preallocation (which we truncate at the end) with 
user written zeroes. So we may truncate user-written zeroes in the end. How 
much is it bad it's a question, but it was not planned in original code.



But error path is a trickier question.

iris ~/src/qemu $ cat 1.c
#include 
#include 
#include 
#include 

int main()
{
     int fd = open("file", O_RDWR | O_CREAT);
     char buf[4096];

     memset(buf, 'a', sizeof(buf));
     pwrite(fd, buf, sizeof(buf), 4096);

     return 0;
}
iris ~/src/qemu $

This works just fine, thus error path would be also fine.

Den


--
Best regards,
Vladimir




Re: [PATCH v2 1/3] nbd: CVE-XXX: Use cookie to track generation of nbd-server

2024-08-02 Thread Vladimir Sementsov-Ogievskiy

On 02.08.24 04:32, Eric Blake wrote:

As part of the QMP command nbd-server-start, the blockdev code was
creating a single global nbd_server object, and telling the qio code
to accept one or more client connections to the exposed listener
socket.  But even though we tear down the listener socket during a
subsequent QMP nbd-server-stop, the qio code has handed off to a
coroutine that may be executing asynchronously with the server
shutdown, such that a client that connects to the socket before
nbd-server-stop but delays disconnect or completion of the NBD
handshake until after the followup QMP command can result in the
nbd_blockdev_client_closed() callback running after nbd_server has
already been torn down, causing a SEGV.  Worse, if a second nbd_server
object is created (possibly on a different socket address), a late
client resolution from the first server can subtly interfere with the
second server.  This can be abused by a malicious client that does not
know TLS secrets to cause qemu to SEGV after a legitimate client has
concluded storage migration to a destination qemu, which can be turned
into a denial of service attack even when qemu is set up to require
only TLS clients.

For environments without this patch, the CVE can be mitigated by
ensuring (such as via a firewall) that only trusted clients can
connect to an NBD server; using frameworks like libvirt that ensure
that nbd-server-stop is not executed while any trusted clients are
still connected will only help if there is also no possibility for an
untrusted client to open a connection but then stall on the NBD
handshake.

Fix this by passing a cookie through to each client connection
(whether or not that client actually knows the TLS details to
successfully negotiate); then increment the cookie every time a server
is torn down so that we can recognize any late client actions
lingering from an old server.

This patch does not address the fact that client sockets can be left
open indefinitely after the server is torn down (possibly creating a
resource leak, if a malicious client intentionally leaves lots of open
sockets paused partway through NBD negotiation); the next patch will
add some code to forcefully close any lingering clients as soon as
possible when the server is torn down.

Reported-by: Alexander Ivanov 
Signed-off-by: Eric Blake 
---


[..]


-static void nbd_blockdev_client_closed(NBDClient *client, bool ignored)
+static void nbd_blockdev_client_closed(NBDClient *client, uint32_t cookie,
+   bool ignored)
  {
  nbd_client_put(client);
-assert(nbd_server->connections > 0);
-nbd_server->connections--;
-nbd_update_server_watch(nbd_server);
+/* Ignore any (late) connection made under a previous server */
+if (cookie == nbd_cookie) {


creating a getter nbd_client_get_cookie(client), and use it instead of passing 
together with client, will simplify the patch a lot. [*]

Hmm.. don't we need some atomic accessors for nbd_cookie? and for 
nbs_server->connections.. The function is called from client, which live in 
coroutine and maybe in another thread? At least client code do atomic accesses of 
client->refcount..


+assert(nbd_server->connections > 0);
+nbd_server->connections--;
+nbd_update_server_watch(nbd_server);
+}
  }



[..]


@@ -1621,7 +1622,7 @@ static void client_close(NBDClient *client, bool 
negotiated)

  /* Also tell the client, so that they release their reference.  */
  if (client->close_fn) {
-client->close_fn(client, negotiated);
+client->close_fn(client, client->close_cookie, negotiated);


[*] passing client->close_cokkie together with client itself looks like we lack 
a getter for .close_cookie


  }
  }



[..]


Hmm, instead of cookies and additional NBDConn objects in the next patch, could 
we simply have a list of connected NBDClient objects in NBDServer and link to 
NBDServer in NBDClient? (Ok we actually don't have NBDServer, but NBDServerData 
in qemu, and several global variables in qemu-nbd, so some refactoring is 
needed, to put common state to NBDServer, and add clients list to it)

This way, in nbd_server_free we'll just call client_close() in a loop. And in 
client_close we'll have nbd_server_client_detach(client->server, client), instead 
of client->close_fn(...). And server is freed only after all clients are closed. 
And client never try to detach from another server.

This way, we also may implement several NBD servers working simultaneously if 
we want.

--
Best regards,
Vladimir




Re: [PATCH v2 1/3] block/commit: implement final flush

2024-08-02 Thread Vladimir Sementsov-Ogievskiy

On 29.07.24 15:25, Kevin Wolf wrote:

Am 19.07.2024 um 12:35 hat Vladimir Sementsov-Ogievskiy geschrieben:

On 18.07.24 22:22, Kevin Wolf wrote:

Am 26.06.2024 um 16:50 hat Vladimir Sementsov-Ogievskiy geschrieben:

Actually block job is not completed without the final flush. It's
rather unexpected to have broken target when job was successfully
completed long ago and now we fail to flush or process just
crashed/killed.

Mirror job already has mirror_flush() for this. So, it's OK.

Do this for commit job too.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
   block/commit.c | 41 +++--
   1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/block/commit.c b/block/commit.c
index 7c3fdcb0ca..81971692a2 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -134,6 +134,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
   int64_t n = 0; /* bytes */
   QEMU_AUTO_VFREE void *buf = NULL;
   int64_t len, base_len;
+bool need_final_flush = true;
   len = blk_co_getlength(s->top);
   if (len < 0) {
@@ -155,8 +156,8 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
   buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
-for (offset = 0; offset < len; offset += n) {
-bool copy;
+for (offset = 0; offset < len || need_final_flush; offset += n) {


In general, the control flow would be nicer to read if the final flush
weren't integrated into the loop, but just added after it.

But I assume this is pretty much required for pausing the job during
error handling in the final flush if you don't want to duplicate a lot
of the logic into a second loop?


Right, that's the reason.


This would probably be the right solution if it affected only commit.
But I've thought a bit more about this and given that the same thing
happens in all of the block jobs, I'm really wondering if introducing a
block job helper function wouldn't be better, so that each block job
could just add something like this after its main loop:

 do {
 ret = blk_co_flush();
 } while (block_job_handle_error(job, ret));

And the helper would call block_job_error_action(), stop the job if
necessary, check if it's cancelled, include a pause point etc.



Agree. Thanks, I'll try.

--
Best regards,
Vladimir




Re: [PATCH v2 5/7] qapi: add job-change

2024-08-02 Thread Vladimir Sementsov-Ogievskiy

On 18.07.24 13:59, Markus Armbruster wrote:

Vladimir Sementsov-Ogievskiy  writes:


Add a new-style command job-change, doing same thing as
block-job-change. The aim is finally deprecate block-job-* APIs and
move to job-* APIs.

We add a new command to qapi/block-core.json, not to
qapi/job.json to avoid resolving json file including loops for now.
This all would be a lot simple to refactor when we finally drop
deprecated block-job-* APIs.

@type argument of the new command immediately becomes deprecated.


Where?


Oops, that type-based union was dropped, so this sentence should be dropped as 
well.




Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  job-qmp.c| 14 ++
  qapi/block-core.json | 10 ++
  2 files changed, 24 insertions(+)

diff --git a/job-qmp.c b/job-qmp.c
index c764bd3801..248e68f554 100644
--- a/job-qmp.c
+++ b/job-qmp.c
@@ -139,6 +139,20 @@ void qmp_job_dismiss(const char *id, Error **errp)
  job_dismiss_locked(&job, errp);
  }
  
+void qmp_job_change(JobChangeOptions *opts, Error **errp)

+{
+Job *job;
+
+JOB_LOCK_GUARD();
+job = find_job_locked(opts->id, errp);
+
+if (!job) {
+return;
+}
+
+job_change_locked(job, opts, errp);
+}
+
  /* Called with job_mutex held. */
  static JobInfo *job_query_single_locked(Job *job, Error **errp)
  {
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 660c7f4a48..9087ce300c 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3104,6 +3104,16 @@
  { 'command': 'block-job-change',
'data': 'JobChangeOptions', 'boxed': true }
  
+##

+# @job-change:
+#
+# Change the block job's options.
+#
+# Since: 9.1
+##
+{ 'command': 'job-change',
+  'data': 'JobChangeOptions', 'boxed': true }
+
  ##
  # @BlockdevDiscardOptions:
  #




--
Best regards,
Vladimir




Re: [PATCH v2 6/7] qapi/block-core: derpecate block-job-change

2024-08-02 Thread Vladimir Sementsov-Ogievskiy

On 18.07.24 14:01, Markus Armbruster wrote:

Vladimir Sementsov-Ogievskiy  writes:


That's a first step to move on newer job-* APIs.

The difference between block-job-change and job-change is in
find_block_job_locked() vs find_job_locked() functions. What's
different?

1. find_block_job_locked() do check, is found job a block-job. This OK


Do you mean something like find_block_job_locked() finds only block
jobs, whereas find_job_locked() finds any kind of job?


Yes




when moving to more generic API, no needs to document this change.

2. find_block_job_locked() reports DeviceNotActive on failure, when
find_job_locked() reports GenericError. Still, for block-job-change
errors are not documented at all, so be silent in deprecated.txt as
well.

Signed-off-by: Vladimir Sementsov-Ogievskiy 




--
Best regards,
Vladimir




Re: [PATCH v2 3/4] block: allow commit to unmap zero blocks

2024-08-02 Thread Vladimir Sementsov-Ogievskiy

On 14.07.24 00:56, Vincent Vanlaer wrote:

Non-active block commits do not discard blocks only containing zeros,
causing images to lose sparseness after the commit. This commit fixes
that by writing zero blocks using blk_co_pwrite_zeroes rather than
writing them out as any other arbitrary data.

Signed-off-by: Vincent Vanlaer 
---
  block/commit.c | 19 +++
  1 file changed, 19 insertions(+)

diff --git a/block/commit.c b/block/commit.c
index fb54fc9560..6ce30927ac 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -130,6 +130,7 @@ static void commit_clean(Job *job)
  
  typedef enum CommitMethod {

  COMMIT_METHOD_COPY,
+COMMIT_METHOD_ZERO,
  COMMIT_METHOD_IGNORE,
  } CommitMethod;
  
@@ -185,6 +186,18 @@ static int coroutine_fn commit_run(Job *job, Error **errp)

  if (ret >= 0) {
  if (!(ret & BDRV_BLOCK_ALLOCATED)) {
  commit_method = COMMIT_METHOD_IGNORE;
+} else if (ret & BDRV_BLOCK_ZERO) {
+int64_t target_offset;
+int64_t target_bytes;
+WITH_GRAPH_RDLOCK_GUARD() {
+bdrv_round_to_subclusters(s->base_bs, offset, n,
+   &target_offset, &target_bytes);


indentation broken


+}
+
+if (target_offset == offset &&
+target_bytes == n) {
+commit_method = COMMIT_METHOD_ZERO;


Why this is needed? Could we blindly do write-zeroes at original (offset, n)? 
Underlying logic would use any possiblity to write zeroes effectively, and 
unaligned tails (if any) would be written as data.


+}
  }
  
  switch (commit_method) {

@@ -198,6 +211,11 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
  }
  }
  break;
+case COMMIT_METHOD_ZERO:
+ret = blk_co_pwrite_zeroes(s->base, offset, n,
+BDRV_REQ_MAY_UNMAP);
+error_in_source = false;
+break;
  case COMMIT_METHOD_IGNORE:
  break;
  default:
@@ -216,6 +234,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
  continue;
  }
  }
+


extra unrelated hunk for style, I'd drop it


  /* Publish progress */
  job_progress_update(&s->common.job, n);
  


--
Best regards,
Vladimir




Re: [PATCH v2 2/4] block: refactor commit_run for multiple write types

2024-08-02 Thread Vladimir Sementsov-Ogievskiy

On 14.07.24 00:56, Vincent Vanlaer wrote:

Signed-off-by: Vincent Vanlaer


Reviewed-by: Vladimir Sementsov-Ogievskiy 

Honestly, I don't like this (mostly preexisting, but your patch make the problem more 
obvious) code for its "nested success path"

if (ret >= 0) {
  ret = ...
  if (ret >= 0) {

   ...


That's because we have the same complex error handling for all these errors. If refactor 
the commit_run(), I'd move get-block-status together with "if (copy)" block to 
separate function commmit_iteration(), which would have more readable style of error 
reporting, like:

ret = ...
if (ret < 0) {
   return ret;
}

ret = ...
if (ret < 0) {
   return ret;
}

 
--

Best regards,
Vladimir




Re: [PATCH v2 1/4] block: get type of block allocation in commit_run

2024-08-02 Thread Vladimir Sementsov-Ogievskiy

On 14.07.24 00:56, Vincent Vanlaer wrote:

bdrv_co_common_block_status_above not only returns whether the block is
allocated, but also if it contains zeroes.

Signed-off-by: Vincent Vanlaer


Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir




Re: [PATCH v5 0/3] vhost-user-blk: live resize additional APIs

2024-08-01 Thread Vladimir Sementsov-Ogievskiy

On 01.08.24 11:37, Michael S. Tsirkin wrote:

On Thu, Aug 01, 2024 at 11:35:19AM +0300, Vladimir Sementsov-Ogievskiy wrote:

On 01.07.24 23:55, Michael S. Tsirkin wrote:

On Mon, Jul 01, 2024 at 08:42:39AM -0400, Raphael Norwitz wrote:

I have no issues with these APIs, but I'm not a QMP expert so others
should review those bits.

For the vhost-user-blk code:

Acked-by: Raphael Norwitz 


Could the relevant bits get ack from qapi maintainers please?



We go them. Could you queue the patches please?



Tagged for after the freeze. Thanks!


Oh right, I missed the freeze. OK, thanks!







On Tue, Jun 25, 2024 at 8:19 AM Vladimir Sementsov-Ogievskiy
 wrote:


v5:
03: drop extra check on is is runstate running


Vladimir Sementsov-Ogievskiy (3):
qdev-monitor: add option to report GenericError from find_device_state
vhost-user-blk: split vhost_user_blk_sync_config()
qapi: introduce device-sync-config

   hw/block/vhost-user-blk.c | 27 ++--
   hw/virtio/virtio-pci.c|  9 +++
   include/hw/qdev-core.h|  3 +++
   qapi/qdev.json| 24 ++
   system/qdev-monitor.c | 53 ---
   5 files changed, 105 insertions(+), 11 deletions(-)

--
2.34.1





--
Best regards,
Vladimir




--
Best regards,
Vladimir




Re: [PATCH v5 0/3] vhost-user-blk: live resize additional APIs

2024-08-01 Thread Vladimir Sementsov-Ogievskiy

On 01.07.24 23:55, Michael S. Tsirkin wrote:

On Mon, Jul 01, 2024 at 08:42:39AM -0400, Raphael Norwitz wrote:

I have no issues with these APIs, but I'm not a QMP expert so others
should review those bits.

For the vhost-user-blk code:

Acked-by: Raphael Norwitz 


Could the relevant bits get ack from qapi maintainers please?



We go them. Could you queue the patches please?





On Tue, Jun 25, 2024 at 8:19 AM Vladimir Sementsov-Ogievskiy
 wrote:


v5:
03: drop extra check on is is runstate running


Vladimir Sementsov-Ogievskiy (3):
   qdev-monitor: add option to report GenericError from find_device_state
   vhost-user-blk: split vhost_user_blk_sync_config()
   qapi: introduce device-sync-config

  hw/block/vhost-user-blk.c | 27 ++--
  hw/virtio/virtio-pci.c|  9 +++
  include/hw/qdev-core.h|  3 +++
  qapi/qdev.json| 24 ++
  system/qdev-monitor.c | 53 ---
  5 files changed, 105 insertions(+), 11 deletions(-)

--
2.34.1





--
Best regards,
Vladimir




Re: [PATCH v9 4/7] qapi: add blockdev-replace command

2024-07-19 Thread Vladimir Sementsov-Ogievskiy

On 18.07.24 15:00, Markus Armbruster wrote:

Vladimir Sementsov-Ogievskiy  writes:


Add a command that can replace bs in following BdrvChild structures:

  - qdev blk root child
  - block-export blk root child
  - any child of BlockDriverState selected by child-name

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  blockdev.c | 56 +++
  qapi/block-core.json   | 88 ++
  stubs/blk-by-qdev-id.c | 13 +++
  stubs/meson.build  |  1 +
  4 files changed, 158 insertions(+)
  create mode 100644 stubs/blk-by-qdev-id.c

diff --git a/blockdev.c b/blockdev.c
index ba7e90b06e..2190467022 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3559,6 +3559,62 @@ void qmp_x_blockdev_set_iothread(const char *node_name, 
StrOrNull *iothread,
  bdrv_try_change_aio_context(bs, new_context, NULL, errp);
  }
  
+void qmp_blockdev_replace(BlockdevReplace *repl, Error **errp)

+{
+BdrvChild *child = NULL;
+BlockDriverState *new_child_bs;
+
+if (repl->parent_type == BLOCK_PARENT_TYPE_DRIVER) {
+BlockDriverState *parent_bs;
+
+parent_bs = bdrv_find_node(repl->u.driver.node_name);
+if (!parent_bs) {
+error_setg(errp, "Block driver node with node-name '%s' not "
+   "found", repl->u.driver.node_name);
+return;
+}
+
+child = bdrv_find_child(parent_bs, repl->u.driver.child);
+if (!child) {
+error_setg(errp, "Block driver node '%s' doesn't have child "
+   "named '%s'", repl->u.driver.node_name,
+   repl->u.driver.child);
+return;
+}
+} else {
+/* Other types are similar, they work through blk */
+BlockBackend *blk;
+bool is_qdev = repl->parent_type == BLOCK_PARENT_TYPE_QDEV;
+const char *id =
+is_qdev ? repl->u.qdev.qdev_id : repl->u.export.export_id;
+
+assert(is_qdev || repl->parent_type == BLOCK_PARENT_TYPE_EXPORT);
+
+blk = is_qdev ? blk_by_qdev_id(id, errp) : blk_by_export_id(id, errp);


blk_by_export_id() finds export @exp, and returns the associated block
backend exp->blk.  Fine.

blk_by_qdev_id() finds the device, and then searches @block_backends for
a blk with blk->dev == blk.  If a device has more than one block
backend, you get the one first in @block_backends.  I figure that's the
one created first.

Interface issue: when a device has multiple block backends, only one of
them can be replaced, and which one is kind of random.

Do such devices exist?


Hmm.. I expect, they don't. If there is a problem, it's pre-existing, all 
callers of qmp_get_blk are affected. But honestly, I don't know.



If no, could they exist?

If yes, what should we do about it now?


Maybe, continue the loop in blk_by_dev() up the the end, and if two blk found 
for the device, return an error? Or even abort?

And add check to blk_attach_dev(), that we don't attach same device to several 
blks.




+if (!blk) {
+return;
+}
+
+child = blk_root(blk);
+if (!child) {
+error_setg(errp, "%s '%s' is empty, nothing to replace",
+   is_qdev ? "Device" : "Export", id);
+return;
+}
+}
+
+assert(child);
+assert(child->bs);
+
+new_child_bs = bdrv_find_node(repl->new_child);
+if (!new_child_bs) {
+error_setg(errp, "Node '%s' not found", repl->new_child);
+return;
+}
+
+bdrv_replace_child_bs(child, new_child_bs, errp);
+}
+
  QemuOptsList qemu_common_drive_opts = {
  .name = "drive",
  .head = QTAILQ_HEAD_INITIALIZER(qemu_common_drive_opts.head),
diff --git a/qapi/block-core.json b/qapi/block-core.json
index df5e07debd..0a6f08a6e0 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -6148,3 +6148,91 @@
  ##
  { 'struct': 'DummyBlockCoreForceArrays',
'data': { 'unused-block-graph-info': ['BlockGraphInfo'] } }
+
+##
+# @BlockParentType:
+#
+# @qdev: block device, such as created by device_add, and denoted by
+# qdev-id
+#
+# @driver: block driver node, such as created by blockdev-add, and
+# denoted by node-name


node-name and child?


Hmm. I'd say that parent is denoted only by node-name, as parent is node itself 
(the fact that we have separate BdrvChild structure as a parent I'd keep as 
internal realization). But parent may have several children, and concrete child 
is denoted by @child.




+#
+# @export: block export, such created by block-export-add, and
+# denoted by export-id
+#
+# Since 9.1
+##


I'm kind of unhappy with this doc comment.  I feel makes sense only in
the context of its use.  

Re: [PATCH v9 2/7] block/export: add blk_by_export_id()

2024-07-19 Thread Vladimir Sementsov-Ogievskiy

On 18.07.24 14:48, Markus Armbruster wrote:

Vladimir Sementsov-Ogievskiy  writes:


We need it for further blockdev-replace functionality.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  block/export/export.c   | 18 ++
  include/sysemu/block-backend-global-state.h |  1 +
  2 files changed, 19 insertions(+)

diff --git a/block/export/export.c b/block/export/export.c
index 6d51ae8ed7..57beae7982 100644
--- a/block/export/export.c
+++ b/block/export/export.c
@@ -355,3 +355,21 @@ BlockExportInfoList *qmp_query_block_exports(Error **errp)
  
  return head;

  }
+
+BlockBackend *blk_by_export_id(const char *id, Error **errp)
+{
+BlockExport *exp;
+
+exp = blk_exp_find(id);
+if (exp == NULL) {
+error_setg(errp, "Export '%s' not found", id);
+return NULL;
+}
+
+if (!exp->blk) {
+error_setg(errp, "Export '%s' is empty", id);


Can this happen?



Hmm, looking at the code in blk_exp_add:

assert(exp->blk != NULL);
 
QLIST_INSERT_HEAD(&block_exports, exp, next);

return exp;


seems not. And I can't find anything changing exp->blk except for blk_exp_add().

Will switch to assertion.


+return NULL;
+}
+
+return exp->blk;
+}
diff --git a/include/sysemu/block-backend-global-state.h 
b/include/sysemu/block-backend-global-state.h
index ccb35546a1..410d0cc5c7 100644
--- a/include/sysemu/block-backend-global-state.h
+++ b/include/sysemu/block-backend-global-state.h
@@ -74,6 +74,7 @@ void blk_detach_dev(BlockBackend *blk, DeviceState *dev);
  DeviceState *blk_get_attached_dev(BlockBackend *blk);
  BlockBackend *blk_by_dev(void *dev);
  BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
+BlockBackend *blk_by_export_id(const char *id, Error **errp);
  void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
  
  void blk_activate(BlockBackend *blk, Error **errp);




--
Best regards,
Vladimir




Re: [PATCH v2 1/3] block/commit: implement final flush

2024-07-19 Thread Vladimir Sementsov-Ogievskiy

On 18.07.24 22:22, Kevin Wolf wrote:

Am 26.06.2024 um 16:50 hat Vladimir Sementsov-Ogievskiy geschrieben:

Actually block job is not completed without the final flush. It's
rather unexpected to have broken target when job was successfully
completed long ago and now we fail to flush or process just
crashed/killed.

Mirror job already has mirror_flush() for this. So, it's OK.

Do this for commit job too.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  block/commit.c | 41 +++--
  1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/block/commit.c b/block/commit.c
index 7c3fdcb0ca..81971692a2 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -134,6 +134,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
  int64_t n = 0; /* bytes */
  QEMU_AUTO_VFREE void *buf = NULL;
  int64_t len, base_len;
+bool need_final_flush = true;
  
  len = blk_co_getlength(s->top);

  if (len < 0) {
@@ -155,8 +156,8 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
  
  buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
  
-for (offset = 0; offset < len; offset += n) {

-bool copy;
+for (offset = 0; offset < len || need_final_flush; offset += n) {


In general, the control flow would be nicer to read if the final flush
weren't integrated into the loop, but just added after it.

But I assume this is pretty much required for pausing the job during
error handling in the final flush if you don't want to duplicate a lot
of the logic into a second loop?


Right, that's the reason.




+bool copy = false;
  bool error_in_source = true;
  
  /* Note that even when no rate limit is applied we need to yield

@@ -166,22 +167,34 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
  if (job_is_cancelled(&s->common.job)) {
  break;
  }
-/* Copy if allocated above the base */
-ret = blk_co_is_allocated_above(s->top, s->base_overlay, true,
-offset, COMMIT_BUFFER_SIZE, &n);
-copy = (ret > 0);
-trace_commit_one_iteration(s, offset, n, ret);
-if (copy) {
-assert(n < SIZE_MAX);
  
-ret = blk_co_pread(s->top, offset, n, buf, 0);

-if (ret >= 0) {
-ret = blk_co_pwrite(s->base, offset, n, buf, 0);
-if (ret < 0) {
-error_in_source = false;
+if (offset < len) {
+/* Copy if allocated above the base */
+ret = blk_co_is_allocated_above(s->top, s->base_overlay, true,
+offset, COMMIT_BUFFER_SIZE, &n);
+copy = (ret > 0);
+trace_commit_one_iteration(s, offset, n, ret);
+if (copy) {
+assert(n < SIZE_MAX);
+
+ret = blk_co_pread(s->top, offset, n, buf, 0);
+if (ret >= 0) {
+ret = blk_co_pwrite(s->base, offset, n, buf, 0);
+if (ret < 0) {
+error_in_source = false;
+}
  }
  }
+} else {
+assert(need_final_flush);
+ret = blk_co_flush(s->base);
+if (ret < 0) {
+error_in_source = false;
+} else {
+need_final_flush = false;
+}


Should we set n = 0 in this block to avoid counting the last chunk twice
for the progress?


Oops, right. Will fix, thanks




  }
+
  if (ret < 0) {
  BlockErrorAction action =
  block_job_error_action(&s->common, s->on_error,


Kevin



--
Best regards,
Vladimir




Re: [PATCH v5 0/3] vhost-user-blk: live resize additional APIs

2024-07-19 Thread Vladimir Sementsov-Ogievskiy

On 18.07.24 11:31, Markus Armbruster wrote:

Vladimir Sementsov-Ogievskiy  writes:


ping. Markus, Eric, could someone give an ACC for QAPI part?


I apologize for the delay.  It was pretty bad.



No problem, I myself make worse delays now when busy with other work, thanks 
for reviewing!

--
Best regards,
Vladimir




Re: [PATCH v5 3/3] qapi: introduce device-sync-config

2024-07-19 Thread Vladimir Sementsov-Ogievskiy

On 18.07.24 11:27, Markus Armbruster wrote:

Vladimir Sementsov-Ogievskiy  writes:


Add command to sync config from vhost-user backend to the device. It
may be helpful when VHOST_USER_SLAVE_CONFIG_CHANGE_MSG failed or not
triggered interrupt to the guest or just not available (not supported
by vhost-user server).

Command result is racy if allow it during migration. Let's allow the
sync only in RUNNING state.


Is this still accurate?  The runstate_is_running() check is gone in
v4, the migration_is_running() check remains.


Right, better to fix commit message like:

Command result is racy if allow it during migration. Let's not allow it.




Signed-off-by: Vladimir Sementsov-Ogievskiy 


QAPI schema and QMP part:
Signed-off-by: Markus Armbruster 



--
Best regards,
Vladimir




Re: [PATCH v2] block/reqlist: allow adding overlapping requests

2024-07-12 Thread Vladimir Sementsov-Ogievskiy

On 12.07.24 17:07, Fiona Ebner wrote:

Allow overlapping request by removing the assert that made it
impossible. There are only two callers:

1. block_copy_task_create()

It already asserts the very same condition before calling
reqlist_init_req().

2. cbw_snapshot_read_lock()

There is no need to have read requests be non-overlapping in
copy-before-write when used for snapshot-access. In fact, there was no
protection against two callers of cbw_snapshot_read_lock() calling
reqlist_init_req() with overlapping ranges and this could lead to an
assertion failure [1].

In particular, with the reproducer script below [0], two
cbw_co_snapshot_block_status() callers could race, with the second
calling reqlist_init_req() before the first one finishes and removes
its conflicting request.

[0]:


#!/bin/bash -e
dd if=/dev/urandom of=/tmp/disk.raw bs=1M count=1024
./qemu-img create /tmp/fleecing.raw -f raw 1G
(
./qemu-system-x86_64 --qmp stdio \
--blockdev raw,node-name=node0,file.driver=file,file.filename=/tmp/disk.raw \
--blockdev raw,node-name=node1,file.driver=file,file.filename=/tmp/fleecing.raw 
\
<

[1]:


#5  0x71e5f0088eb2 in __GI___assert_fail (...) at ./assert/assert.c:101
#6  0x615285438017 in reqlist_init_req (...) at ../block/reqlist.c:23
#7  0x6152853e2d98 in cbw_snapshot_read_lock (...) at 
../block/copy-before-write.c:237
#8  0x6152853e3068 in cbw_co_snapshot_block_status (...) at 
../block/copy-before-write.c:304
#9  0x6152853f4d22 in bdrv_co_snapshot_block_status (...) at 
../block/io.c:3726
#10 0x61528543a63e in snapshot_access_co_block_status (...) at 
../block/snapshot-access.c:48
#11 0x6152853f1a0a in bdrv_co_do_block_status (...) at ../block/io.c:2474
#12 0x6152853f2016 in bdrv_co_common_block_status_above (...) at 
../block/io.c:2652
#13 0x6152853f22cf in bdrv_co_block_status_above (...) at ../block/io.c:2732
#14 0x6152853d9a86 in blk_co_block_status_above (...) at 
../block/block-backend.c:1473
#15 0x61528538da6c in blockstatus_to_extents (...) at ../nbd/server.c:2374
#16 0x61528538deb1 in nbd_co_send_block_status (...) at ../nbd/server.c:2481
#17 0x61528538f424 in nbd_handle_request (...) at ../nbd/server.c:2978
#18 0x61528538f906 in nbd_trip (...) at ../nbd/server.c:3121
#19 0x6152855a7caf in coroutine_trampoline (...) at 
../util/coroutine-ucontext.c:175


Cc: qemu-sta...@nongnu.org
Suggested-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Fiona Ebner 
---

Changes in v2:
* different approach, allowing overlapping requests for
   copy-before-write rather than waiting for them. block-copy already
   asserts there are no conflicts before adding a request.

  block/copy-before-write.c | 3 ++-
  block/reqlist.c   | 2 --
  2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 853e01a1eb..28f6a096cd 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -66,7 +66,8 @@ typedef struct BDRVCopyBeforeWriteState {
  
  /*

   * @frozen_read_reqs: current read requests for fleecing user in bs->file
- * node. These areas must not be rewritten by guest.
+ * node. These areas must not be rewritten by guest. There can be multiple
+ * overlapping read requests.
   */
  BlockReqList frozen_read_reqs;
  
diff --git a/block/reqlist.c b/block/reqlist.c

index 08cb57cfa4..098e807378 100644
--- a/block/reqlist.c
+++ b/block/reqlist.c
@@ -20,8 +20,6 @@
  void reqlist_init_req(BlockReqList *reqs, BlockReq *req, int64_t offset,
int64_t bytes)
  {
-assert(!reqlist_find_conflict(reqs, offset, bytes));
-
  *req = (BlockReq) {
  .offset = offset,
  .bytes = bytes,



Reviewed-by: Vladimir Sementsov-Ogievskiy 

Thanks, applied to my block branch

--
Best regards,
Vladimir




Re: [PATCH v3 0/2] backup: allow specifying minimum cluster size

2024-07-12 Thread Vladimir Sementsov-Ogievskiy

On 11.07.24 15:09, Fiona Ebner wrote:

Discussion for v2:
https://lore.kernel.org/qemu-devel/20240528120114.344416-1-f.eb...@proxmox.com/

Changes in v3:
* Pass min_cluster_size option directly without checking
   has_min_cluster_size, because the default is 0 anyways.
* Calculate maximum of passed-in argument and default once at the
   beginning of block_copy_calculate_cluster_size()
* Update warning message to reflect actual value used
* Do not leak qdict in error case
* Use PRI{i,u}64 macros

Discussion for v1:
https://lore.kernel.org/qemu-devel/20240308155158.830258-1-f.eb...@proxmox.com/
-
Changes in v2:
* Use 'size' type in QAPI.
* Remove option in cbw_parse_options(), i.e. before parsing generic
   blockdev options.
* Reword commit messages hoping to describe the issue in a more
   straight-forward way.

In the context of backup fleecing, discarding the source will not work
when the fleecing image has a larger granularity than the one used for
block-copy operations (can happen if the backup target has smaller
cluster size), because cbw_co_pdiscard_snapshot() will align down the
discard requests and thus effectively ignore then.

To make @discard-source work in such a scenario, allow specifying the
minimum cluster size used for block-copy operations and thus in
particular also the granularity for discard requests to the source.

Fiona Ebner (2):
   copy-before-write: allow specifying minimum cluster size
   backup: add minimum cluster size to performance options

  block/backup.c |  2 +-
  block/block-copy.c | 36 ++--
  block/copy-before-write.c  | 14 +-
  block/copy-before-write.h  |  1 +
  blockdev.c |  3 +++
  include/block/block-copy.h |  1 +
  qapi/block-core.json   | 17 ++---
  7 files changed, 59 insertions(+), 15 deletions(-)



Thanks, applied to my block branch.

--
Best regards,
Vladimir




Re: [PATCH v3 2/2] backup: add minimum cluster size to performance options

2024-07-12 Thread Vladimir Sementsov-Ogievskiy

On 11.07.24 15:09, Fiona Ebner wrote:

In the context of backup fleecing, discarding the source will not work
when the fleecing image has a larger granularity than the one used for
block-copy operations (can happen if the backup target has smaller
cluster size), because cbw_co_pdiscard_snapshot() will align down the
discard requests and thus effectively ignore then.

To make @discard-source work in such a scenario, allow specifying the
minimum cluster size used for block-copy operations and thus in
particular also the granularity for discard requests to the source.

Suggested-by: Vladimir Sementsov-Ogievskiy
Acked-by: Markus Armbruster  (QAPI schema)
Signed-off-by: Fiona Ebner


Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir




Re: [PATCH v3 1/2] copy-before-write: allow specifying minimum cluster size

2024-07-12 Thread Vladimir Sementsov-Ogievskiy

On 11.07.24 15:09, Fiona Ebner wrote:

In the context of backup fleecing, discarding the source will not work
when the fleecing image has a larger granularity than the one used for
block-copy operations (can happen if the backup target has smaller
cluster size), because cbw_co_pdiscard_snapshot() will align down the
discard requests and thus effectively ignore then.

To make @discard-source work in such a scenario, allow specifying the
minimum cluster size used for block-copy operations and thus in
particular also the granularity for discard requests to the source.

The type 'size' (corresponding to uint64_t in C) is used in QAPI to
rule out negative inputs and for consistency with already existing
@cluster-size parameters. Since block_copy_calculate_cluster_size()
uses int64_t for its result, a check that the input is not too large
is added in block_copy_state_new() before calling it. The calculation
in block_copy_calculate_cluster_size() is done in the target int64_t
type.

Suggested-by: Vladimir Sementsov-Ogievskiy
Acked-by: Markus Armbruster  (QAPI schema)
Signed-off-by: Fiona Ebner


Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir




Re: [PATCH] block/copy-before-write: wait for conflicts when read locking to avoid assertion failure

2024-07-12 Thread Vladimir Sementsov-Ogievskiy

On 11.07.24 16:36, Fiona Ebner wrote:

There is no protection against two callers of cbw_snapshot_read_lock()
calling reqlist_init_req() with overlapping ranges, and
reqlist_init_req() asserts that there are no conflicting requests.

In particular, two cbw_co_snapshot_block_status() callers can race,
with the second calling reqlist_init_req() before the first one
finishes and removes its conflicting request, leading to an assertion
failure.

Reproducer script [0] and backtrace [1] are attached below.



Understand. But seems in case of CBW read-lock, nothing bad in intersecting 
read requests?

reqlist is shared with backup, where we care to avoid intersecting requests in 
the list. What about just move the assertion to block_copy_task_create() ? And 
add comment somewhere that we support intersecting reads in frozen_read_reqs.




[0]:


#!/bin/bash -e
dd if=/dev/urandom of=/tmp/disk.raw bs=1M count=1024
./qemu-img create /tmp/fleecing.raw -f raw 1G
(
./qemu-system-x86_64 --qmp stdio \
--blockdev raw,node-name=node0,file.driver=file,file.filename=/tmp/disk.raw \
--blockdev raw,node-name=node1,file.driver=file,file.filename=/tmp/fleecing.raw 
\
<

[1]:


#5  0x71e5f0088eb2 in __GI___assert_fail (...) at ./assert/assert.c:101
#6  0x615285438017 in reqlist_init_req (...) at ../block/reqlist.c:23
#7  0x6152853e2d98 in cbw_snapshot_read_lock (...) at 
../block/copy-before-write.c:237
#8  0x6152853e3068 in cbw_co_snapshot_block_status (...) at 
../block/copy-before-write.c:304
#9  0x6152853f4d22 in bdrv_co_snapshot_block_status (...) at 
../block/io.c:3726
#10 0x61528543a63e in snapshot_access_co_block_status (...) at 
../block/snapshot-access.c:48
#11 0x6152853f1a0a in bdrv_co_do_block_status (...) at ../block/io.c:2474
#12 0x6152853f2016 in bdrv_co_common_block_status_above (...) at 
../block/io.c:2652
#13 0x6152853f22cf in bdrv_co_block_status_above (...) at ../block/io.c:2732
#14 0x6152853d9a86 in blk_co_block_status_above (...) at 
../block/block-backend.c:1473
#15 0x61528538da6c in blockstatus_to_extents (...) at ../nbd/server.c:2374
#16 0x61528538deb1 in nbd_co_send_block_status (...) at ../nbd/server.c:2481
#17 0x61528538f424 in nbd_handle_request (...) at ../nbd/server.c:2978
#18 0x61528538f906 in nbd_trip (...) at ../nbd/server.c:3121
#19 0x6152855a7caf in coroutine_trampoline (...) at 
../util/coroutine-ucontext.c:175


Signed-off-by: Fiona Ebner 
---
  block/copy-before-write.c | 1 +
  1 file changed, 1 insertion(+)

diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 853e01a1eb..376ff3f3e1 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -234,6 +234,7 @@ cbw_snapshot_read_lock(BlockDriverState *bs, int64_t 
offset, int64_t bytes,
  *req = (BlockReq) {.offset = -1, .bytes = -1};
  *file = s->target;
  } else {
+reqlist_wait_all(&s->frozen_read_reqs, offset, bytes, &s->lock);
  reqlist_init_req(&s->frozen_read_reqs, req, offset, bytes);
  *file = bs->file;
  }


--
Best regards,
Vladimir




Re: [PATCH 1/2] block: zero data data corruption using prealloc-filter

2024-07-12 Thread Vladimir Sementsov-Ogievskiy

On 11.07.24 16:32, Andrey Drobyshev wrote:

From: "Denis V. Lunev" 

We have observed that some clusters in the QCOW2 files are zeroed
while preallocation filter is used.

We are able to trace down the following sequence when prealloc-filter
is used:
 co=0x55e7cbed7680 qcow2_co_pwritev_task()
 co=0x55e7cbed7680 preallocate_co_pwritev_part()
 co=0x55e7cbed7680 handle_write()
 co=0x55e7cbed7680 bdrv_co_do_pwrite_zeroes()
 co=0x55e7cbed7680 raw_do_pwrite_zeroes()
 co=0x7f9edb7fe500 do_fallocate()

Here coroutine 0x55e7cbed7680 is being blocked waiting while coroutine
0x7f9edb7fe500 will finish with fallocate of the file area. OK. It is
time to handle next coroutine, which
 co=0x55e7cbee91b0 qcow2_co_pwritev_task()
 co=0x55e7cbee91b0 preallocate_co_pwritev_part()
 co=0x55e7cbee91b0 handle_write()
 co=0x55e7cbee91b0 bdrv_co_do_pwrite_zeroes()
 co=0x55e7cbee91b0 raw_do_pwrite_zeroes()
 co=0x7f9edb7deb00 do_fallocate()

The trouble comes here. Coroutine 0x55e7cbed7680 has not advanced
file_end yet and coroutine 0x55e7cbee91b0 will start fallocate() for
the same area. This means that if (once fallocate is started inside
0x7f9edb7deb00) original fallocate could end and the real write will
be executed. In that case write() request is handled at the same time
as fallocate().

Normally we should protect s->file_end while it is detected that
preallocation is need. The patch introduces file_end_lock for it to be
protected when run in the coroutine context.

Note: the lock is taken only once it is detected that the preallocation
is really required. This is not a frequent case due to the preallocation
nature thus the patch should not have performance impact.

Originally-by: Denis V. Lunev 
Co-authored-by: Andrey Drobyshev 
Signed-off-by: Denis V. Lunev 
Signed-off-by: Andrey Drobyshev 
---
  block/preallocate.c | 11 +++
  1 file changed, 11 insertions(+)

diff --git a/block/preallocate.c b/block/preallocate.c
index d215bc5d6d..9cb2c97635 100644
--- a/block/preallocate.c
+++ b/block/preallocate.c
@@ -78,6 +78,8 @@ typedef struct BDRVPreallocateState {
  
  /* Gives up the resize permission on children when parents don't need it */

  QEMUBH *drop_resize_bh;
+
+CoMutex file_end_lock;
  } BDRVPreallocateState;
  
  static int preallocate_drop_resize(BlockDriverState *bs, Error **errp);

@@ -170,6 +172,8 @@ static int preallocate_open(BlockDriverState *bs, QDict 
*options, int flags,
  ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
  bs->file->bs->supported_zero_flags);
  
+qemu_co_mutex_init(&s->file_end_lock);

+
  return 0;
  }
  
@@ -342,6 +346,7 @@ handle_write(BlockDriverState *bs, int64_t offset, int64_t bytes,

  return false;
  }
  
+QEMU_LOCK_GUARD(&s->file_end_lock);

  if (s->file_end < 0) {
  s->file_end = s->data_end;
  }
@@ -353,6 +358,8 @@ handle_write(BlockDriverState *bs, int64_t offset, int64_t 
bytes,
  
  /* We have valid s->data_end, and request writes beyond it. */
  
+QEMU_LOCK_GUARD(&s->file_end_lock);

+
  s->data_end = end;
  if (s->zero_start < 0 || !want_merge_zero) {
  s->zero_start = end;
@@ -428,6 +435,8 @@ preallocate_co_truncate(BlockDriverState *bs, int64_t 
offset,
  BDRVPreallocateState *s = bs->opaque;
  int ret;
  
+QEMU_LOCK_GUARD(&s->file_end_lock);

+
  if (s->data_end >= 0 && offset > s->data_end) {
  if (s->file_end < 0) {
  s->file_end = bdrv_co_getlength(bs->file->bs);
@@ -501,6 +510,8 @@ preallocate_co_getlength(BlockDriverState *bs)
  return s->data_end;
  }
  
+QEMU_LOCK_GUARD(&s->file_end_lock);

+
  ret = bdrv_co_getlength(bs->file->bs);
  
  if (has_prealloc_perms(bs)) {



Hmm, seems preallocate driver not thread-safe neither coroutine-safe. I think 
co-mutex is good idea. Still, protecting only s->file_end may be not enough

- we do want to keep data_end / zero_start / file_end variables correct
- probably, just make the whole preallocation code a critical section? Maybe, 
atomic read of variables for fast-path (for writes which doesn't need 
preallocation)


--
Best regards,
Vladimir




Re: [PATCH v2 1/2] block: zero data data corruption using prealloc-filter

2024-07-12 Thread Vladimir Sementsov-Ogievskiy

On 12.07.24 12:46, Andrey Drobyshev wrote:

From: "Denis V. Lunev" 

We have observed that some clusters in the QCOW2 files are zeroed
while preallocation filter is used.

We are able to trace down the following sequence when prealloc-filter
is used:
 co=0x55e7cbed7680 qcow2_co_pwritev_task()
 co=0x55e7cbed7680 preallocate_co_pwritev_part()
 co=0x55e7cbed7680 handle_write()
 co=0x55e7cbed7680 bdrv_co_do_pwrite_zeroes()
 co=0x55e7cbed7680 raw_do_pwrite_zeroes()
 co=0x7f9edb7fe500 do_fallocate()

Here coroutine 0x55e7cbed7680 is being blocked waiting while coroutine
0x7f9edb7fe500 will finish with fallocate of the file area. OK. It is
time to handle next coroutine, which
 co=0x55e7cbee91b0 qcow2_co_pwritev_task()
 co=0x55e7cbee91b0 preallocate_co_pwritev_part()
 co=0x55e7cbee91b0 handle_write()
 co=0x55e7cbee91b0 bdrv_co_do_pwrite_zeroes()
 co=0x55e7cbee91b0 raw_do_pwrite_zeroes()
 co=0x7f9edb7deb00 do_fallocate()

The trouble comes here. Coroutine 0x55e7cbed7680 has not advanced
file_end yet and coroutine 0x55e7cbee91b0 will start fallocate() for
the same area. This means that if (once fallocate is started inside
0x7f9edb7deb00) original fallocate could end and the real write will
be executed. In that case write() request is handled at the same time
as fallocate().

The patch moves s->file_lock assignment before fallocate and that is


text need to be updated


crucial. The idea is that all subsequent requests into the area
being preallocation will be issued as just writes without fallocate
to this area and they will not proceed thanks to overlapping
requests mechanics. If preallocation will fail, we will just switch
to the normal expand-by-write behavior and that is not a problem
except performance.

Signed-off-by: Denis V. Lunev 
Tested-by: Andrey Drobyshev 
---
  block/preallocate.c | 8 +++-
  1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/block/preallocate.c b/block/preallocate.c
index d215bc5d6d..ecf0aa4baa 100644
--- a/block/preallocate.c
+++ b/block/preallocate.c
@@ -383,6 +383,13 @@ handle_write(BlockDriverState *bs, int64_t offset, int64_t 
bytes,
  
  want_merge_zero = want_merge_zero && (prealloc_start <= offset);
  
+/*

+ * Assign file_end before making actual preallocation. This will ensure
+ * that next request performed while preallocation is in progress will
+ * be passed without preallocation.
+ */
+s->file_end = prealloc_end;
+
  ret = bdrv_co_pwrite_zeroes(
  bs->file, prealloc_start, prealloc_end - prealloc_start,
  BDRV_REQ_NO_FALLBACK | BDRV_REQ_SERIALISING | BDRV_REQ_NO_WAIT);
@@ -391,7 +398,6 @@ handle_write(BlockDriverState *bs, int64_t offset, int64_t 
bytes,
  return false;
  }
  
-s->file_end = prealloc_end;

  return want_merge_zero;
  }
  



Hmm. But this way we set both s->file_end and s->zero_start prior to actual 
write_zero operation. This means that next write-zero operation may go fast-path (see 
preallocate_co_pwrite_zeroes()) and return success, even before actual finish of 
preallocation write_zeroes operation (which may also fail). Seems we need to update 
logic around s->zero_start too.

--
Best regards,
Vladimir




Re: [PATCH v5 0/3] vhost-user-blk: live resize additional APIs

2024-07-11 Thread Vladimir Sementsov-Ogievskiy

ping. Markus, Eric, could someone give an ACC for QAPI part?

On 25.06.24 15:18, Vladimir Sementsov-Ogievskiy wrote:

v5:
03: drop extra check on is is runstate running


Vladimir Sementsov-Ogievskiy (3):
   qdev-monitor: add option to report GenericError from find_device_state
   vhost-user-blk: split vhost_user_blk_sync_config()
   qapi: introduce device-sync-config

  hw/block/vhost-user-blk.c | 27 ++--
  hw/virtio/virtio-pci.c|  9 +++
  include/hw/qdev-core.h|  3 +++
  qapi/qdev.json| 24 ++
  system/qdev-monitor.c | 53 ---
  5 files changed, 105 insertions(+), 11 deletions(-)



--
Best regards,
Vladimir




Re: [PATCH] block/curl: rewrite http header parsing function

2024-07-01 Thread Vladimir Sementsov-Ogievskiy

On 01.07.24 09:55, Michael Tokarev wrote:

01.07.2024 09:54, Vladimir Sementsov-Ogievskiy wrote:


+    const char *t = "accept-ranges : bytes "; /* A lowercase template */


Note: you make parser less strict: you allow "bytes" to be uppercase (was allowed 
only for accept-ranges", and you allow whitespaces before colon.


Yes, exactly.

I should add this to the description (wanted to do that but forgot).
I'll update the patch (without re-sending) - hopefully its' okay to
keep your S-o-b :)



Of course!

--
Best regards,
Vladimir




Re: [PATCH] block/curl: rewrite http header parsing function

2024-06-30 Thread Vladimir Sementsov-Ogievskiy

On 29.06.24 17:25, Michael Tokarev wrote:

Existing code was long, unclear and twisty.

Signed-off-by: Michael Tokarev 


Reviewed-by: Vladimir Sementsov-Ogievskiy 


---
  block/curl.c | 44 ++--
  1 file changed, 18 insertions(+), 26 deletions(-)

diff --git a/block/curl.c b/block/curl.c
index 419f7c89ef..9802d0319d 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -210,37 +210,29 @@ static size_t curl_header_cb(void *ptr, size_t size, 
size_t nmemb, void *opaque)
  {
  BDRVCURLState *s = opaque;
  size_t realsize = size * nmemb;
-const char *header = (char *)ptr;
-const char *end = header + realsize;
-const char *accept_ranges = "accept-ranges:";
-const char *bytes = "bytes";
+const char *p = ptr;
+const char *end = p + realsize;
+const char *t = "accept-ranges : bytes "; /* A lowercase template */


Note: you make parser less strict: you allow "bytes" to be uppercase (was allowed 
only for accept-ranges", and you allow whitespaces before colon.

  
-if (realsize >= strlen(accept_ranges)

-&& g_ascii_strncasecmp(header, accept_ranges,
-   strlen(accept_ranges)) == 0) {
-
-char *p = strchr(header, ':') + 1;
-
-/* Skip whitespace between the header name and value. */
-while (p < end && *p && g_ascii_isspace(*p)) {
-p++;
-}
-
-if (end - p >= strlen(bytes)
-&& strncmp(p, bytes, strlen(bytes)) == 0) {
-
-/* Check that there is nothing but whitespace after the value. */
-p += strlen(bytes);
-while (p < end && *p && g_ascii_isspace(*p)) {
-p++;
-}
-
-if (p == end || !*p) {
-s->accept_range = true;
+/* check if header matches the "t" template */
+for (;;) {
+if (*t == ' ') { /* space in t matches any amount of isspace in p */
+if (p < end && g_ascii_isspace(*p)) {
+++p;
+} else {
+++t;
  }
+} else if (*t && p < end && *t == g_ascii_tolower(*p)) {
+++p, ++t;
+} else {
+break;
  }
  }
  
+if (!*t && p == end) { /* if we managed to reach ends of both strings */

+s->accept_range = true;
+}
+
  return realsize;
  }
  


--
Best regards,
Vladimir




Re: [PATCH] block/curl: use strlen instead of strchr

2024-06-30 Thread Vladimir Sementsov-Ogievskiy

On 01.07.24 09:34, Vladimir Sementsov-Ogievskiy wrote:

On 29.06.24 09:20, Michael Tokarev wrote:

On 6/28/24 08:49, Vladimir Sementsov-Ogievskiy wrote:

We already know where colon is, so no reason to search for it. Also,
avoid a code, which looks like we forget to check return value of
strchr() to NULL.

Suggested-by: Kevin Wolf 
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---

This replaces my patch
   [PATCH] block/curl: explicitly assert that strchr returns non-NULL value
Supersedes: <20240627153059.589070-1-vsement...@yandex-team.ru>

  block/curl.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/curl.c b/block/curl.c
index 419f7c89ef..d03bfe4817 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -219,7 +219,7 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t 
nmemb, void *opaque)
  && g_ascii_strncasecmp(header, accept_ranges,
 strlen(accept_ranges)) == 0) {
-    char *p = strchr(header, ':') + 1;
+    char *p = header + strlen(accept_ranges);
  /* Skip whitespace between the header name and value. */
  while (p < end && *p && g_ascii_isspace(*p)) {


Heck.  All these strlen()s look ugly, especially in the
loop iterations..


I expect that strlen of string constant is calculated in compilation time.

My aim was to fix Coverity complain (I don't see this complain in public qemu 
coverity project, that's why I don't specify CID in commit message), not to 
rewrite the whole function. So I'd prefer Kevein's suggesting which is both 
minimal and makes the code obviously correct.. The only simpler thing is to 
mark the problem false-positive in Coverity.




Upd: I missed that you sent a patch, this changes things. Of course, you code 
looks nicer than old one.

--
Best regards,
Vladimir




Re: [PATCH] block/curl: use strlen instead of strchr

2024-06-30 Thread Vladimir Sementsov-Ogievskiy

On 29.06.24 09:20, Michael Tokarev wrote:

On 6/28/24 08:49, Vladimir Sementsov-Ogievskiy wrote:

We already know where colon is, so no reason to search for it. Also,
avoid a code, which looks like we forget to check return value of
strchr() to NULL.

Suggested-by: Kevin Wolf 
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---

This replaces my patch
   [PATCH] block/curl: explicitly assert that strchr returns non-NULL value
Supersedes: <20240627153059.589070-1-vsement...@yandex-team.ru>

  block/curl.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/curl.c b/block/curl.c
index 419f7c89ef..d03bfe4817 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -219,7 +219,7 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t 
nmemb, void *opaque)
  && g_ascii_strncasecmp(header, accept_ranges,
 strlen(accept_ranges)) == 0) {
-    char *p = strchr(header, ':') + 1;
+    char *p = header + strlen(accept_ranges);
  /* Skip whitespace between the header name and value. */
  while (p < end && *p && g_ascii_isspace(*p)) {


Heck.  All these strlen()s look ugly, especially in the
loop iterations..


I expect that strlen of string constant is calculated in compilation time.

My aim was to fix Coverity complain (I don't see this complain in public qemu 
coverity project, that's why I don't specify CID in commit message), not to 
rewrite the whole function. So I'd prefer Kevein's suggesting which is both 
minimal and makes the code obviously correct.. The only simpler thing is to 
mark the problem false-positive in Coverity.

--
Best regards,
Vladimir




[PATCH] block/curl: use strlen instead of strchr

2024-06-27 Thread Vladimir Sementsov-Ogievskiy
We already know where colon is, so no reason to search for it. Also,
avoid a code, which looks like we forget to check return value of
strchr() to NULL.

Suggested-by: Kevin Wolf 
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---

This replaces my patch
  [PATCH] block/curl: explicitly assert that strchr returns non-NULL value
Supersedes: <20240627153059.589070-1-vsement...@yandex-team.ru>

 block/curl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/curl.c b/block/curl.c
index 419f7c89ef..d03bfe4817 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -219,7 +219,7 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t 
nmemb, void *opaque)
 && g_ascii_strncasecmp(header, accept_ranges,
strlen(accept_ranges)) == 0) {
 
-char *p = strchr(header, ':') + 1;
+char *p = header + strlen(accept_ranges);
 
 /* Skip whitespace between the header name and value. */
 while (p < end && *p && g_ascii_isspace(*p)) {
-- 
2.34.1




Re: [PATCH] block/curl: explicitly assert that strchr returns non-NULL value

2024-06-27 Thread Vladimir Sementsov-Ogievskiy

On 27.06.24 21:05, Kevin Wolf wrote:

Am 27.06.2024 um 17:30 hat Vladimir Sementsov-Ogievskiy geschrieben:

strchr may return NULL if colon is not found. It seems clearer to
assert explicitly that we don't expect it here, than dereference 1 in
the next line.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  block/curl.c | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/curl.c b/block/curl.c
index 419f7c89ef..ccfffd6c12 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -219,7 +219,9 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t 
nmemb, void *opaque)
  && g_ascii_strncasecmp(header, accept_ranges,
 strlen(accept_ranges)) == 0) {
  
-char *p = strchr(header, ':') + 1;

+char *p = strchr(header, ':');
+assert(p != NULL);
+p += 1;


I'm not sure if this is actually much clearer because it doesn't say why
we don't expect NULL here. If you don't look at the context, it almost
looks like an assert() where proper error handling is needed. If you do,
then the original line is clear enough.

My first thought was that maybe what we want is a comment, but we
actually already know where the colon is. So how about this instead:

 char *p = header + strlen(accept_ranges);



Oh, right. That's better.




  /* Skip whitespace between the header name and value. */
  while (p < end && *p && g_ascii_isspace(*p)) {
--
2.34.1





--
Best regards,
Vladimir




[PATCH] block/curl: explicitly assert that strchr returns non-NULL value

2024-06-27 Thread Vladimir Sementsov-Ogievskiy
strchr may return NULL if colon is not found. It seems clearer to
assert explicitly that we don't expect it here, than dereference 1 in
the next line.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/curl.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/block/curl.c b/block/curl.c
index 419f7c89ef..ccfffd6c12 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -219,7 +219,9 @@ static size_t curl_header_cb(void *ptr, size_t size, size_t 
nmemb, void *opaque)
 && g_ascii_strncasecmp(header, accept_ranges,
strlen(accept_ranges)) == 0) {
 
-char *p = strchr(header, ':') + 1;
+char *p = strchr(header, ':');
+assert(p != NULL);
+p += 1;
 
 /* Skip whitespace between the header name and value. */
 while (p < end && *p && g_ascii_isspace(*p)) {
-- 
2.34.1




[PATCH v2 0/3] block-jobs: add final flush

2024-06-26 Thread Vladimir Sementsov-Ogievskiy
Actually block job is not completed without the final flush. It's
rather unexpected to have broken target when job was successfully
completed long ago and now we fail to flush or process just
crashed/killed.

Mirror job already has mirror_flush() for this. So, it's OK.

Add similar things for other jobs: backup, stream, commit.

v2: rework stream and commit, also split into 3 commits.

Vladimir Sementsov-Ogievskiy (3):
  block/commit: implement final flush
  block/stream: implement final flush
  block/backup: implement final flush

 block/backup.c |  2 +-
 block/block-copy.c |  7 
 block/commit.c | 41 +++
 block/stream.c | 67 +++---
 include/block/block-copy.h |  1 +
 5 files changed, 77 insertions(+), 41 deletions(-)

-- 
2.34.1




[PATCH v2 3/3] block/backup: implement final flush

2024-06-26 Thread Vladimir Sementsov-Ogievskiy
Actually block job is not completed without the final flush. It's
rather unexpected to have broken target when job was successfully
completed long ago and now we fail to flush or process just
crashed/killed.

Mirror job already has mirror_flush() for this. So, it's OK.

Do this for backup job too.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/backup.c | 2 +-
 block/block-copy.c | 7 +++
 include/block/block-copy.h | 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/block/backup.c b/block/backup.c
index 3dd2e229d2..fee78ba5ad 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -156,7 +156,7 @@ static int coroutine_fn backup_loop(BackupBlockJob *job)
 job->bg_bcs_call = s = block_copy_async(job->bcs, 0,
 QEMU_ALIGN_UP(job->len, job->cluster_size),
 job->perf.max_workers, job->perf.max_chunk,
-backup_block_copy_callback, job);
+true, backup_block_copy_callback, job);
 
 while (!block_copy_call_finished(s) &&
!job_is_cancelled(&job->common.job))
diff --git a/block/block-copy.c b/block/block-copy.c
index 7e3b378528..842b0383db 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -54,6 +54,7 @@ typedef struct BlockCopyCallState {
 int max_workers;
 int64_t max_chunk;
 bool ignore_ratelimit;
+bool need_final_flush;
 BlockCopyAsyncCallbackFunc cb;
 void *cb_opaque;
 /* Coroutine where async block-copy is running */
@@ -899,6 +900,10 @@ block_copy_common(BlockCopyCallState *call_state)
  */
 } while (ret > 0 && !qatomic_read(&call_state->cancelled));
 
+if (ret == 0 && call_state->need_final_flush) {
+ret = bdrv_co_flush(s->target->bs);
+}
+
 qatomic_store_release(&call_state->finished, true);
 
 if (call_state->cb) {
@@ -954,6 +959,7 @@ int coroutine_fn block_copy(BlockCopyState *s, int64_t 
start, int64_t bytes,
 BlockCopyCallState *block_copy_async(BlockCopyState *s,
  int64_t offset, int64_t bytes,
  int max_workers, int64_t max_chunk,
+ bool need_final_flush,
  BlockCopyAsyncCallbackFunc cb,
  void *cb_opaque)
 {
@@ -965,6 +971,7 @@ BlockCopyCallState *block_copy_async(BlockCopyState *s,
 .bytes = bytes,
 .max_workers = max_workers,
 .max_chunk = max_chunk,
+.need_final_flush = need_final_flush,
 .cb = cb,
 .cb_opaque = cb_opaque,
 
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index bdc703bacd..6588ebaf77 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -62,6 +62,7 @@ int coroutine_fn block_copy(BlockCopyState *s, int64_t 
offset, int64_t bytes,
 BlockCopyCallState *block_copy_async(BlockCopyState *s,
  int64_t offset, int64_t bytes,
  int max_workers, int64_t max_chunk,
+ bool need_final_flush,
  BlockCopyAsyncCallbackFunc cb,
  void *cb_opaque);
 
-- 
2.34.1




[PATCH v2 2/3] block/stream: implement final flush

2024-06-26 Thread Vladimir Sementsov-Ogievskiy
Actually block job is not completed without the final flush. It's
rather unexpected to have broken target when job was successfully
completed long ago and now we fail to flush or process just
crashed/killed.

Mirror job already has mirror_flush() for this. So, it's OK.

Do this for stream job too.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/stream.c | 67 ++
 1 file changed, 41 insertions(+), 26 deletions(-)

diff --git a/block/stream.c b/block/stream.c
index 7031eef12b..893db258d4 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -160,6 +160,7 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 int64_t offset = 0;
 int error = 0;
 int64_t n = 0; /* bytes */
+bool need_final_flush = true;
 
 WITH_GRAPH_RDLOCK_GUARD() {
 unfiltered_bs = bdrv_skip_filters(s->target_bs);
@@ -175,10 +176,13 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 }
 job_progress_set_remaining(&s->common.job, len);
 
-for ( ; offset < len; offset += n) {
-bool copy;
+for ( ; offset < len || need_final_flush; offset += n) {
+bool copy = false;
+bool error_is_read = true;
 int ret;
 
+n = 0;
+
 /* Note that even when no rate limit is applied we need to yield
  * with no pending I/O here so that bdrv_drain_all() returns.
  */
@@ -187,35 +191,46 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
 break;
 }
 
-copy = false;
-
-WITH_GRAPH_RDLOCK_GUARD() {
-ret = bdrv_co_is_allocated(unfiltered_bs, offset, STREAM_CHUNK, 
&n);
-if (ret == 1) {
-/* Allocated in the top, no need to copy.  */
-} else if (ret >= 0) {
-/*
- * Copy if allocated in the intermediate images.  Limit to the
- * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE).
- */
-ret = bdrv_co_is_allocated_above(bdrv_cow_bs(unfiltered_bs),
- s->base_overlay, true,
- offset, n, &n);
-/* Finish early if end of backing file has been reached */
-if (ret == 0 && n == 0) {
-n = len - offset;
+if (offset < len) {
+WITH_GRAPH_RDLOCK_GUARD() {
+ret = bdrv_co_is_allocated(unfiltered_bs, offset, STREAM_CHUNK,
+   &n);
+if (ret == 1) {
+/* Allocated in the top, no need to copy.  */
+} else if (ret >= 0) {
+/*
+ * Copy if allocated in the intermediate images.  Limit to
+ * the known-unallocated area
+ * [offset, offset+n*BDRV_SECTOR_SIZE).
+ */
+ret = 
bdrv_co_is_allocated_above(bdrv_cow_bs(unfiltered_bs),
+ s->base_overlay, true,
+ offset, n, &n);
+/* Finish early if end of backing file has been reached */
+if (ret == 0 && n == 0) {
+n = len - offset;
+}
+
+copy = (ret > 0);
 }
-
-copy = (ret > 0);
 }
-}
-trace_stream_one_iteration(s, offset, n, ret);
-if (copy) {
-ret = stream_populate(s->blk, offset, n);
+trace_stream_one_iteration(s, offset, n, ret);
+if (copy) {
+ret = stream_populate(s->blk, offset, n);
+}
+} else {
+assert(need_final_flush);
+ret = blk_co_flush(s->blk);
+if (ret < 0) {
+error_is_read = false;
+} else {
+need_final_flush = false;
+}
 }
 if (ret < 0) {
 BlockErrorAction action =
-block_job_error_action(&s->common, s->on_error, true, -ret);
+block_job_error_action(&s->common, s->on_error,
+   error_is_read, -ret);
 if (action == BLOCK_ERROR_ACTION_STOP) {
 n = 0;
 continue;
-- 
2.34.1




[PATCH v2 1/3] block/commit: implement final flush

2024-06-26 Thread Vladimir Sementsov-Ogievskiy
Actually block job is not completed without the final flush. It's
rather unexpected to have broken target when job was successfully
completed long ago and now we fail to flush or process just
crashed/killed.

Mirror job already has mirror_flush() for this. So, it's OK.

Do this for commit job too.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/commit.c | 41 +++--
 1 file changed, 27 insertions(+), 14 deletions(-)

diff --git a/block/commit.c b/block/commit.c
index 7c3fdcb0ca..81971692a2 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -134,6 +134,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
 int64_t n = 0; /* bytes */
 QEMU_AUTO_VFREE void *buf = NULL;
 int64_t len, base_len;
+bool need_final_flush = true;
 
 len = blk_co_getlength(s->top);
 if (len < 0) {
@@ -155,8 +156,8 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
 
 buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
 
-for (offset = 0; offset < len; offset += n) {
-bool copy;
+for (offset = 0; offset < len || need_final_flush; offset += n) {
+bool copy = false;
 bool error_in_source = true;
 
 /* Note that even when no rate limit is applied we need to yield
@@ -166,22 +167,34 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
 if (job_is_cancelled(&s->common.job)) {
 break;
 }
-/* Copy if allocated above the base */
-ret = blk_co_is_allocated_above(s->top, s->base_overlay, true,
-offset, COMMIT_BUFFER_SIZE, &n);
-copy = (ret > 0);
-trace_commit_one_iteration(s, offset, n, ret);
-if (copy) {
-assert(n < SIZE_MAX);
 
-ret = blk_co_pread(s->top, offset, n, buf, 0);
-if (ret >= 0) {
-ret = blk_co_pwrite(s->base, offset, n, buf, 0);
-if (ret < 0) {
-error_in_source = false;
+if (offset < len) {
+/* Copy if allocated above the base */
+ret = blk_co_is_allocated_above(s->top, s->base_overlay, true,
+offset, COMMIT_BUFFER_SIZE, &n);
+copy = (ret > 0);
+trace_commit_one_iteration(s, offset, n, ret);
+if (copy) {
+assert(n < SIZE_MAX);
+
+ret = blk_co_pread(s->top, offset, n, buf, 0);
+if (ret >= 0) {
+ret = blk_co_pwrite(s->base, offset, n, buf, 0);
+if (ret < 0) {
+error_in_source = false;
+}
 }
 }
+} else {
+assert(need_final_flush);
+ret = blk_co_flush(s->base);
+if (ret < 0) {
+error_in_source = false;
+} else {
+need_final_flush = false;
+}
 }
+
 if (ret < 0) {
 BlockErrorAction action =
 block_job_error_action(&s->common, s->on_error,
-- 
2.34.1




[PATCH v9 3/7] block: make bdrv_find_child() function public

2024-06-26 Thread Vladimir Sementsov-Ogievskiy
To be reused soon for blockdev-replace functionality.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block.c  | 13 +
 blockdev.c   | 14 --
 include/block/block_int-io.h |  2 ++
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/block.c b/block.c
index 468cf5e67d..f6292f459a 100644
--- a/block.c
+++ b/block.c
@@ -8174,6 +8174,19 @@ int bdrv_make_empty(BdrvChild *c, Error **errp)
 return 0;
 }
 
+BdrvChild *bdrv_find_child(BlockDriverState *parent_bs, const char *child_name)
+{
+BdrvChild *child;
+
+QLIST_FOREACH(child, &parent_bs->children, next) {
+if (strcmp(child->name, child_name) == 0) {
+return child;
+}
+}
+
+return NULL;
+}
+
 /*
  * Return the child that @bs acts as an overlay for, and from which data may be
  * copied in COW or COR operations.  Usually this is the backing file.
diff --git a/blockdev.c b/blockdev.c
index 835064ed03..ba7e90b06e 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3452,20 +3452,6 @@ void qmp_blockdev_del(const char *node_name, Error 
**errp)
 bdrv_unref(bs);
 }
 
-static BdrvChild * GRAPH_RDLOCK
-bdrv_find_child(BlockDriverState *parent_bs, const char *child_name)
-{
-BdrvChild *child;
-
-QLIST_FOREACH(child, &parent_bs->children, next) {
-if (strcmp(child->name, child_name) == 0) {
-return child;
-}
-}
-
-return NULL;
-}
-
 void qmp_x_blockdev_change(const char *parent, const char *child,
const char *node, Error **errp)
 {
diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
index 4a7cf2b4fd..11ed4aa927 100644
--- a/include/block/block_int-io.h
+++ b/include/block/block_int-io.h
@@ -130,6 +130,8 @@ bdrv_co_refresh_total_sectors(BlockDriverState *bs, int64_t 
hint);
 int co_wrapper_mixed_bdrv_rdlock
 bdrv_refresh_total_sectors(BlockDriverState *bs, int64_t hint);
 
+BdrvChild * GRAPH_RDLOCK
+bdrv_find_child(BlockDriverState *parent_bs, const char *child_name);
 BdrvChild * GRAPH_RDLOCK bdrv_cow_child(BlockDriverState *bs);
 BdrvChild * GRAPH_RDLOCK bdrv_filter_child(BlockDriverState *bs);
 BdrvChild * GRAPH_RDLOCK bdrv_filter_or_cow_child(BlockDriverState *bs);
-- 
2.34.1




[PATCH v9 6/7] iotests.py: introduce VM.assert_edges_list() method

2024-06-26 Thread Vladimir Sementsov-Ogievskiy
Add an alternative method to check block graph, to be used in further
commit.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/iotests.py | 17 +
 1 file changed, 17 insertions(+)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index ea48af4a7b..8a5fd01eac 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -1108,6 +1108,23 @@ def check_bitmap_status(self, node_name, bitmap_name, 
fields):
 
 return fields.items() <= ret.items()
 
+def get_block_graph(self):
+"""
+Returns block graph in form of edges list, where each edge is a tuple:
+  (parent_node_name, child_name, child_node_name)
+"""
+graph = self.qmp('x-debug-query-block-graph')['return']
+
+nodes = {n['id']: n['name'] for n in graph['nodes']}
+# Check that all names are unique:
+assert len(set(nodes.values())) == len(nodes)
+
+return [(nodes[e['parent']], e['name'], nodes[e['child']])
+for e in graph['edges']]
+
+def assert_edges_list(self, edges):
+assert sorted(edges) == sorted(self.get_block_graph())
+
 def assert_block_path(self, root, path, expected_node, graph=None):
 """
 Check whether the node under the given path in the block graph
-- 
2.34.1




[PATCH v9 4/7] qapi: add blockdev-replace command

2024-06-26 Thread Vladimir Sementsov-Ogievskiy
Add a command that can replace bs in following BdrvChild structures:

 - qdev blk root child
 - block-export blk root child
 - any child of BlockDriverState selected by child-name

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 blockdev.c | 56 +++
 qapi/block-core.json   | 88 ++
 stubs/blk-by-qdev-id.c | 13 +++
 stubs/meson.build  |  1 +
 4 files changed, 158 insertions(+)
 create mode 100644 stubs/blk-by-qdev-id.c

diff --git a/blockdev.c b/blockdev.c
index ba7e90b06e..2190467022 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3559,6 +3559,62 @@ void qmp_x_blockdev_set_iothread(const char *node_name, 
StrOrNull *iothread,
 bdrv_try_change_aio_context(bs, new_context, NULL, errp);
 }
 
+void qmp_blockdev_replace(BlockdevReplace *repl, Error **errp)
+{
+BdrvChild *child = NULL;
+BlockDriverState *new_child_bs;
+
+if (repl->parent_type == BLOCK_PARENT_TYPE_DRIVER) {
+BlockDriverState *parent_bs;
+
+parent_bs = bdrv_find_node(repl->u.driver.node_name);
+if (!parent_bs) {
+error_setg(errp, "Block driver node with node-name '%s' not "
+   "found", repl->u.driver.node_name);
+return;
+}
+
+child = bdrv_find_child(parent_bs, repl->u.driver.child);
+if (!child) {
+error_setg(errp, "Block driver node '%s' doesn't have child "
+   "named '%s'", repl->u.driver.node_name,
+   repl->u.driver.child);
+return;
+}
+} else {
+/* Other types are similar, they work through blk */
+BlockBackend *blk;
+bool is_qdev = repl->parent_type == BLOCK_PARENT_TYPE_QDEV;
+const char *id =
+is_qdev ? repl->u.qdev.qdev_id : repl->u.export.export_id;
+
+assert(is_qdev || repl->parent_type == BLOCK_PARENT_TYPE_EXPORT);
+
+blk = is_qdev ? blk_by_qdev_id(id, errp) : blk_by_export_id(id, errp);
+if (!blk) {
+return;
+}
+
+child = blk_root(blk);
+if (!child) {
+error_setg(errp, "%s '%s' is empty, nothing to replace",
+   is_qdev ? "Device" : "Export", id);
+return;
+}
+}
+
+assert(child);
+assert(child->bs);
+
+new_child_bs = bdrv_find_node(repl->new_child);
+if (!new_child_bs) {
+error_setg(errp, "Node '%s' not found", repl->new_child);
+return;
+}
+
+bdrv_replace_child_bs(child, new_child_bs, errp);
+}
+
 QemuOptsList qemu_common_drive_opts = {
 .name = "drive",
 .head = QTAILQ_HEAD_INITIALIZER(qemu_common_drive_opts.head),
diff --git a/qapi/block-core.json b/qapi/block-core.json
index df5e07debd..0a6f08a6e0 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -6148,3 +6148,91 @@
 ##
 { 'struct': 'DummyBlockCoreForceArrays',
   'data': { 'unused-block-graph-info': ['BlockGraphInfo'] } }
+
+##
+# @BlockParentType:
+#
+# @qdev: block device, such as created by device_add, and denoted by
+# qdev-id
+#
+# @driver: block driver node, such as created by blockdev-add, and
+# denoted by node-name
+#
+# @export: block export, such created by block-export-add, and
+# denoted by export-id
+#
+# Since 9.1
+##
+{ 'enum': 'BlockParentType',
+  'data': ['qdev', 'driver', 'export'] }
+
+##
+# @BdrvChildRefQdev:
+#
+# @qdev-id: the device's ID or QOM path
+#
+# Since 9.1
+##
+{ 'struct': 'BdrvChildRefQdev',
+  'data': { 'qdev-id': 'str' } }
+
+##
+# @BdrvChildRefExport:
+#
+# @export-id: block export identifier
+#
+# Since 9.1
+##
+{ 'struct': 'BdrvChildRefExport',
+  'data': { 'export-id': 'str' } }
+
+##
+# @BdrvChildRefDriver:
+#
+# @node-name: the node name of the parent block node
+#
+# @child: name of the child to be replaced, like "file" or "backing"
+#
+# Since 9.1
+##
+{ 'struct': 'BdrvChildRefDriver',
+  'data': { 'node-name': 'str', 'child': 'str' } }
+
+##
+# @BlockdevReplace:
+#
+# @parent-type: type of the parent, which child is to be replaced
+#
+# @new-child: new child for replacement
+#
+# Since 9.1
+##
+{ 'union': 'BlockdevReplace',
+  'base': {
+  'parent-type': 'BlockParentType',
+  'new-child': 'str'
+  },
+  'discriminator': 'parent-type',
+  'data': {
+  'qdev': 'BdrvChildRefQdev',
+  'export': 'BdrvChildRefExport'

[PATCH v9 1/7] block-backend: blk_root(): drop const specifier on return type

2024-06-26 Thread Vladimir Sementsov-Ogievskiy
We'll need get non-const child pointer for graph modifications in
further commits.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/block-backend.c   | 2 +-
 include/sysemu/block-backend-global-state.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index db6f9b92a3..71d5002198 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -2879,7 +2879,7 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, 
int64_t off_in,
   bytes, read_flags, write_flags);
 }
 
-const BdrvChild *blk_root(BlockBackend *blk)
+BdrvChild *blk_root(BlockBackend *blk)
 {
 GLOBAL_STATE_CODE();
 return blk->root;
diff --git a/include/sysemu/block-backend-global-state.h 
b/include/sysemu/block-backend-global-state.h
index 49c12b0fa9..ccb35546a1 100644
--- a/include/sysemu/block-backend-global-state.h
+++ b/include/sysemu/block-backend-global-state.h
@@ -126,7 +126,7 @@ void blk_set_force_allow_inactivate(BlockBackend *blk);
 bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error 
**errp);
 void blk_unregister_buf(BlockBackend *blk, void *host, size_t size);
 
-const BdrvChild *blk_root(BlockBackend *blk);
+BdrvChild *blk_root(BlockBackend *blk);
 
 int blk_make_empty(BlockBackend *blk, Error **errp);
 
-- 
2.34.1




[PATCH v9 2/7] block/export: add blk_by_export_id()

2024-06-26 Thread Vladimir Sementsov-Ogievskiy
We need it for further blockdev-replace functionality.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/export/export.c   | 18 ++
 include/sysemu/block-backend-global-state.h |  1 +
 2 files changed, 19 insertions(+)

diff --git a/block/export/export.c b/block/export/export.c
index 6d51ae8ed7..57beae7982 100644
--- a/block/export/export.c
+++ b/block/export/export.c
@@ -355,3 +355,21 @@ BlockExportInfoList *qmp_query_block_exports(Error **errp)
 
 return head;
 }
+
+BlockBackend *blk_by_export_id(const char *id, Error **errp)
+{
+BlockExport *exp;
+
+exp = blk_exp_find(id);
+if (exp == NULL) {
+error_setg(errp, "Export '%s' not found", id);
+return NULL;
+}
+
+if (!exp->blk) {
+error_setg(errp, "Export '%s' is empty", id);
+return NULL;
+}
+
+return exp->blk;
+}
diff --git a/include/sysemu/block-backend-global-state.h 
b/include/sysemu/block-backend-global-state.h
index ccb35546a1..410d0cc5c7 100644
--- a/include/sysemu/block-backend-global-state.h
+++ b/include/sysemu/block-backend-global-state.h
@@ -74,6 +74,7 @@ void blk_detach_dev(BlockBackend *blk, DeviceState *dev);
 DeviceState *blk_get_attached_dev(BlockBackend *blk);
 BlockBackend *blk_by_dev(void *dev);
 BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
+BlockBackend *blk_by_export_id(const char *id, Error **errp);
 void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
 
 void blk_activate(BlockBackend *blk, Error **errp);
-- 
2.34.1




[PATCH v9 7/7] iotests: add filter-insertion

2024-06-26 Thread Vladimir Sementsov-Ogievskiy
Demonstrate new blockdev-replace API for filter insertion and removal.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/tests/filter-insertion | 236 ++
 tests/qemu-iotests/tests/filter-insertion.out |   5 +
 2 files changed, 241 insertions(+)
 create mode 100755 tests/qemu-iotests/tests/filter-insertion
 create mode 100644 tests/qemu-iotests/tests/filter-insertion.out

diff --git a/tests/qemu-iotests/tests/filter-insertion 
b/tests/qemu-iotests/tests/filter-insertion
new file mode 100755
index 00..02a0978f96
--- /dev/null
+++ b/tests/qemu-iotests/tests/filter-insertion
@@ -0,0 +1,236 @@
+#!/usr/bin/env python3
+#
+# Tests for inserting and removing filters in a block graph.
+#
+# Copyright (c) 2022 Virtuozzo International GmbH.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+
+import iotests
+from iotests import qemu_img_create, try_remove
+
+
+disk = os.path.join(iotests.test_dir, 'disk')
+sock = os.path.join(iotests.sock_dir, 'sock')
+size = 1024 * 1024
+
+
+class TestFilterInsertion(iotests.QMPTestCase):
+def setUp(self):
+qemu_img_create('-f', iotests.imgfmt, disk, str(size))
+
+self.vm = iotests.VM()
+self.vm.launch()
+
+self.vm.cmd('blockdev-add', {
+'node-name': 'disk0',
+'driver': 'qcow2',
+'file': {
+'node-name': 'file0',
+'driver': 'file',
+'filename': disk
+}
+})
+
+def tearDown(self):
+self.vm.shutdown()
+os.remove(disk)
+try_remove(sock)
+
+def test_simple_insertion(self):
+vm = self.vm
+
+vm.cmd('blockdev-add', {
+'node-name': 'filter',
+'driver': 'blkdebug',
+'image': 'file0'
+})
+
+vm.cmd('blockdev-replace', {
+'parent-type': 'driver',
+'node-name': 'disk0',
+'child': 'file',
+'new-child': 'filter'
+})
+
+# Filter inserted:
+# disk0 -file-> filter -file-> file0
+vm.assert_edges_list([
+('disk0', 'file', 'filter'),
+('filter', 'image', 'file0')
+])
+
+vm.cmd('blockdev-replace', {
+'parent-type': 'driver',
+'node-name': 'disk0',
+'child': 'file',
+'new-child': 'file0'
+})
+
+# Filter replaced, but still exists:
+# dik0 -file-> file0 <-file- filter
+vm.assert_edges_list([
+('disk0', 'file', 'file0'),
+('filter', 'image', 'file0')
+])
+
+vm.cmd('blockdev-del', node_name='filter')
+
+# Filter removed
+# dik0 -file-> file0
+vm.assert_edges_list([
+('disk0', 'file', 'file0')
+])
+
+def test_insert_under_qdev(self):
+vm = self.vm
+
+vm.cmd('device_add', driver='virtio-scsi')
+vm.cmd('device_add', id='sda', driver='scsi-hd',
+ drive='disk0')
+
+vm.cmd('blockdev-add', {
+'node-name': 'filter',
+'driver': 'compress',
+'file': 'disk0'
+})
+
+vm.cmd('blockdev-replace', {
+'parent-type': 'qdev',
+'qdev-id': 'sda',
+'new-child': 'filter'
+})
+
+# Filter inserted:
+# sda -root-> filter -file-> disk0 -file-> file0
+vm.assert_edges_list([
+# parent_node_name, child_name, child_node_name
+('sda', 'root', 'filter'),
+('fil

[PATCH v9 5/7] block: bdrv_get_xdbg_block_graph(): report export ids

2024-06-26 Thread Vladimir Sementsov-Ogievskiy
Currently for block exports we report empty blk names. That's not good.
Let's try to find corresponding block export and report its id.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block.c |  4 
 block/export/export.c   | 13 +
 include/block/export.h  |  1 +
 stubs/blk-exp-find-by-blk.c |  9 +
 stubs/meson.build   |  1 +
 5 files changed, 28 insertions(+)
 create mode 100644 stubs/blk-exp-find-by-blk.c

diff --git a/block.c b/block.c
index f6292f459a..601475e835 100644
--- a/block.c
+++ b/block.c
@@ -6326,7 +6326,11 @@ XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp)
 for (blk = blk_all_next(NULL); blk; blk = blk_all_next(blk)) {
 char *allocated_name = NULL;
 const char *name = blk_name(blk);
+BlockExport *exp = blk_exp_find_by_blk(blk);
 
+if (!*name && exp) {
+name = exp->id;
+}
 if (!*name) {
 name = allocated_name = blk_get_attached_dev_id(blk);
 }
diff --git a/block/export/export.c b/block/export/export.c
index 57beae7982..8744a1171e 100644
--- a/block/export/export.c
+++ b/block/export/export.c
@@ -60,6 +60,19 @@ BlockExport *blk_exp_find(const char *id)
 return NULL;
 }
 
+BlockExport *blk_exp_find_by_blk(BlockBackend *blk)
+{
+BlockExport *exp;
+
+QLIST_FOREACH(exp, &block_exports, next) {
+if (exp->blk == blk) {
+return exp;
+}
+}
+
+return NULL;
+}
+
 static const BlockExportDriver *blk_exp_find_driver(BlockExportType type)
 {
 int i;
diff --git a/include/block/export.h b/include/block/export.h
index f2fe0f8078..16863d37cf 100644
--- a/include/block/export.h
+++ b/include/block/export.h
@@ -82,6 +82,7 @@ struct BlockExport {
 
 BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp);
 BlockExport *blk_exp_find(const char *id);
+BlockExport *blk_exp_find_by_blk(BlockBackend *blk);
 void blk_exp_ref(BlockExport *exp);
 void blk_exp_unref(BlockExport *exp);
 void blk_exp_request_shutdown(BlockExport *exp);
diff --git a/stubs/blk-exp-find-by-blk.c b/stubs/blk-exp-find-by-blk.c
new file mode 100644
index 00..2fc1da953b
--- /dev/null
+++ b/stubs/blk-exp-find-by-blk.c
@@ -0,0 +1,9 @@
+#include "qemu/osdep.h"
+#include "sysemu/block-backend.h"
+#include "block/export.h"
+
+BlockExport *blk_exp_find_by_blk(BlockBackend *blk)
+{
+return NULL;
+}
+
diff --git a/stubs/meson.build b/stubs/meson.build
index 068998c1a5..cbe30f94e8 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -16,6 +16,7 @@ if have_block
   stub_ss.add(files('blk-commit-all.c'))
   stub_ss.add(files('blk-exp-close-all.c'))
   stub_ss.add(files('blk-by-qdev-id.c'))
+  stub_ss.add(files('blk-exp-find-by-blk.c'))
   stub_ss.add(files('blockdev-close-all-bdrv-states.c'))
   stub_ss.add(files('change-state-handler.c'))
   stub_ss.add(files('get-vm-name.c'))
-- 
2.34.1




[PATCH v9 0/7] blockdev-replace

2024-06-26 Thread Vladimir Sementsov-Ogievskiy
Hi all!

This series presents a new command blockdev-replace, which helps to
insert/remove filters anywhere in the block graph. It can:

 - replace qdev block-node by qdev-id
 - replace export block-node by export-id
 - replace any child of parent block-node by node-name and child name

So insertions is done in two steps:

1. blockdev_add (create filter node, unparented)

[some parent]  [new filter]
 |   |
 V   V
[some child   ]

2. blockdev-replace (replace child by the filter)

[some parent]
 | 
 V
[new filter]
 |
 V
[some child]

And removal is done in reverse order:

1. blockdev-replace (go back to picture 1)
2. blockdev_del (remove filter node)


Ideally, we to do both operations (add + replace or replace + del) in a
transaction, but that would be another series.

v9: rebase
drop x- prefix and use unstable feature
bump version to 9.1 in qapi spec
update error message in blk_by_qdev_id stub

v8: rebase. Also don't use "preallocate" filter in a test, as we don't
support removal of this filter for now. Preallocate filter is really
unusual, see discussion here:
https://www.mail-archive.com/qemu-devel@nongnu.org/msg994945.html

Vladimir Sementsov-Ogievskiy (7):
  block-backend: blk_root(): drop const specifier on return type
  block/export: add blk_by_export_id()
  block: make bdrv_find_child() function public
  qapi: add blockdev-replace command
  block: bdrv_get_xdbg_block_graph(): report export ids
  iotests.py: introduce VM.assert_edges_list() method
  iotests: add filter-insertion

 block.c   |  17 ++
 block/block-backend.c |   2 +-
 block/export/export.c |  31 +++
 blockdev.c|  70 --
 include/block/block_int-io.h  |   2 +
 include/block/export.h|   1 +
 include/sysemu/block-backend-global-state.h   |   3 +-
 qapi/block-core.json  |  88 +++
 stubs/blk-by-qdev-id.c|  13 +
 stubs/blk-exp-find-by-blk.c   |   9 +
 stubs/meson.build |   2 +
 tests/qemu-iotests/iotests.py |  17 ++
 tests/qemu-iotests/tests/filter-insertion | 236 ++
 tests/qemu-iotests/tests/filter-insertion.out |   5 +
 14 files changed, 480 insertions(+), 16 deletions(-)
 create mode 100644 stubs/blk-by-qdev-id.c
 create mode 100644 stubs/blk-exp-find-by-blk.c
 create mode 100755 tests/qemu-iotests/tests/filter-insertion
 create mode 100644 tests/qemu-iotests/tests/filter-insertion.out

-- 
2.34.1




Re: [PATCH v2] block-backend: per-device throttling of BLOCK_IO_ERROR reports

2024-06-26 Thread Vladimir Sementsov-Ogievskiy

ping2

On 09.01.24 16:13, Vladimir Sementsov-Ogievskiy wrote:

From: Leonid Kaplan 

BLOCK_IO_ERROR events comes from guest, so we must throttle them.
We still want per-device throttling, so let's use device id as a key.

Signed-off-by: Leonid Kaplan 
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---

v2: add Note: to QAPI doc

  monitor/monitor.c| 10 ++
  qapi/block-core.json |  2 ++
  2 files changed, 12 insertions(+)

diff --git a/monitor/monitor.c b/monitor/monitor.c
index 01ede1babd..ad0243e9d7 100644
--- a/monitor/monitor.c
+++ b/monitor/monitor.c
@@ -309,6 +309,7 @@ int error_printf_unless_qmp(const char *fmt, ...)
  static MonitorQAPIEventConf monitor_qapi_event_conf[QAPI_EVENT__MAX] = {
  /* Limit guest-triggerable events to 1 per second */
  [QAPI_EVENT_RTC_CHANGE]= { 1000 * SCALE_MS },
+[QAPI_EVENT_BLOCK_IO_ERROR]= { 1000 * SCALE_MS },
  [QAPI_EVENT_WATCHDOG]  = { 1000 * SCALE_MS },
  [QAPI_EVENT_BALLOON_CHANGE]= { 1000 * SCALE_MS },
  [QAPI_EVENT_QUORUM_REPORT_BAD] = { 1000 * SCALE_MS },
@@ -498,6 +499,10 @@ static unsigned int qapi_event_throttle_hash(const void 
*key)
  hash += g_str_hash(qdict_get_str(evstate->data, "qom-path"));
  }
  
+if (evstate->event == QAPI_EVENT_BLOCK_IO_ERROR) {

+hash += g_str_hash(qdict_get_str(evstate->data, "device"));
+}
+
  return hash;
  }
  
@@ -525,6 +530,11 @@ static gboolean qapi_event_throttle_equal(const void *a, const void *b)

 qdict_get_str(evb->data, "qom-path"));
  }
  
+if (eva->event == QAPI_EVENT_BLOCK_IO_ERROR) {

+return !strcmp(qdict_get_str(eva->data, "device"),
+   qdict_get_str(evb->data, "device"));
+}
+
  return TRUE;
  }
  
diff --git a/qapi/block-core.json b/qapi/block-core.json

index ca390c5700..32c2c2f030 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -5559,6 +5559,8 @@
  # Note: If action is "stop", a STOP event will eventually follow the
  # BLOCK_IO_ERROR event
  #
+# Note: This event is rate-limited.
+#
  # Since: 0.13
  #
  # Example:


--
Best regards,
Vladimir




  1   2   3   4   5   6   7   8   9   10   >