Re: [PATCH v2 1/6] virtio/virtio-pci: Handle extra notification data

2024-03-13 Thread Jason Wang
On Wed, Mar 13, 2024 at 7:55 PM Jonah Palmer  wrote:
>
> Add support to virtio-pci devices for handling the extra data sent
> from the driver to the device when the VIRTIO_F_NOTIFICATION_DATA
> transport feature has been negotiated.
>
> The extra data that's passed to the virtio-pci device when this
> feature is enabled varies depending on the device's virtqueue
> layout.
>
> In a split virtqueue layout, this data includes:
>  - upper 16 bits: shadow_avail_idx
>  - lower 16 bits: virtqueue index
>
> In a packed virtqueue layout, this data includes:
>  - upper 16 bits: 1-bit wrap counter & 15-bit shadow_avail_idx
>  - lower 16 bits: virtqueue index
>
> Tested-by: Lei Yang 
> Reviewed-by: Eugenio Pérez 
> Signed-off-by: Jonah Palmer 
> ---
>  hw/virtio/virtio-pci.c | 10 +++---
>  hw/virtio/virtio.c | 18 ++
>  include/hw/virtio/virtio.h |  1 +
>  3 files changed, 26 insertions(+), 3 deletions(-)
>
> diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
> index cb6940fc0e..0f5c3c3b2f 100644
> --- a/hw/virtio/virtio-pci.c
> +++ b/hw/virtio/virtio-pci.c
> @@ -384,7 +384,7 @@ static void virtio_ioport_write(void *opaque, uint32_t 
> addr, uint32_t val)
>  {
>  VirtIOPCIProxy *proxy = opaque;
>  VirtIODevice *vdev = virtio_bus_get_device(>bus);
> -uint16_t vector;
> +uint16_t vector, vq_idx;
>  hwaddr pa;
>
>  switch (addr) {
> @@ -408,8 +408,12 @@ static void virtio_ioport_write(void *opaque, uint32_t 
> addr, uint32_t val)
>  vdev->queue_sel = val;
>  break;
>  case VIRTIO_PCI_QUEUE_NOTIFY:
> -if (val < VIRTIO_QUEUE_MAX) {
> -virtio_queue_notify(vdev, val);
> +vq_idx = val;
> +if (vq_idx < VIRTIO_QUEUE_MAX) {
> +if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) {
> +virtio_queue_set_shadow_avail_data(vdev, val);
> +}
> +virtio_queue_notify(vdev, vq_idx);
>  }
>  break;
>  case VIRTIO_PCI_STATUS:
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index d229755eae..bcb9e09df0 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -2255,6 +2255,24 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, 
> int align)
>  }
>  }
>
> +void virtio_queue_set_shadow_avail_data(VirtIODevice *vdev, uint32_t data)
> +{
> +/* Lower 16 bits is the virtqueue index */
> +uint16_t i = data;
> +VirtQueue *vq = >vq[i];
> +
> +if (!vq->vring.desc) {
> +return;
> +}
> +
> +if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
> +vq->shadow_avail_wrap_counter = (data >> 31) & 0x1;
> +vq->shadow_avail_idx = (data >> 16) & 0x7FFF;
> +} else {
> +vq->shadow_avail_idx = (data >> 16);

Do we need to do a sanity check for this value?

Thanks

> +}
> +}
> +
>  static void virtio_queue_notify_vq(VirtQueue *vq)
>  {
>  if (vq->vring.desc && vq->handle_output) {
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index c8f72850bc..53915947a7 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -335,6 +335,7 @@ void virtio_queue_update_rings(VirtIODevice *vdev, int n);
>  void virtio_init_region_cache(VirtIODevice *vdev, int n);
>  void virtio_queue_set_align(VirtIODevice *vdev, int n, int align);
>  void virtio_queue_notify(VirtIODevice *vdev, int n);
> +void virtio_queue_set_shadow_avail_data(VirtIODevice *vdev, uint32_t data);
>  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n);
>  void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector);
>  int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
> --
> 2.39.3
>




Re: [PATCH for-9.0] mirror: Don't call job_pause_point() under graph lock

2024-03-13 Thread Eric Blake
On Wed, Mar 13, 2024 at 04:30:00PM +0100, Kevin Wolf wrote:
> Calling job_pause_point() while holding the graph reader lock
> potentially results in a deadlock: bdrv_graph_wrlock() first drains
> everything, including the mirror job, which pauses it. The job is only
> unpaused at the end of the drain section, which is when the graph writer
> lock has been successfully taken. However, if the job happens to be
> paused at a pause point where it still holds the reader lock, the writer
> lock can't be taken as long as the job is still paused.
> 
> Mark job_pause_point() as GRAPH_UNLOCKED and fix mirror accordingly.
> 
> Cc: qemu-sta...@nongnu.org
> Buglink: https://issues.redhat.com/browse/RHEL-28125
> Fixes: 004915a96a7a40e942ac85e6d22518cbcd283506
> Signed-off-by: Kevin Wolf 
> ---
>  include/qemu/job.h |  2 +-
>  block/mirror.c | 10 ++
>  2 files changed, 7 insertions(+), 5 deletions(-)

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.
Virtualization:  qemu.org | libguestfs.org




Re: [PATCH-for-9.0 3/4] qtest/libqos: Un-inline size_to_prdtl()

2024-03-13 Thread Philippe Mathieu-Daudé

On 13/3/24 20:39, Peter Maydell wrote:

On Wed, 13 Mar 2024 at 18:50, Philippe Mathieu-Daudé  wrote:


See previous commit and commit 9de9fa5cf2 ("Avoid using inlined
functions with external linkage") for rationale.

Signed-off-by: Philippe Mathieu-Daudé 
---
  tests/qtest/libqos/ahci.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qtest/libqos/ahci.c b/tests/qtest/libqos/ahci.c
index a2c94c6e06..135b23ffd9 100644
--- a/tests/qtest/libqos/ahci.c
+++ b/tests/qtest/libqos/ahci.c
@@ -662,7 +662,7 @@ unsigned ahci_pick_cmd(AHCIQState *ahci, uint8_t port)
  g_assert_not_reached();
  }

-inline unsigned size_to_prdtl(unsigned bytes, unsigned bytes_per_prd)
+unsigned size_to_prdtl(unsigned bytes, unsigned bytes_per_prd)
  {
  /* Each PRD can describe up to 4MiB */
  g_assert_cmphex(bytes_per_prd, <=, 4096 * 1024);


It looks like this function is only used in this file, so
we could make it static ?


Eh I did check for that, but sure how I missed it...




Re: [PATCH-for-9.0 4/4] meson: Enable -Wstatic-in-inline

2024-03-13 Thread Richard Henderson

On 3/13/24 08:49, Philippe Mathieu-Daudé wrote:

Compilers are clever enough to inline code when necessary.

The only case we accept an inline function is static in
header (we use C, not C++).

Add the -Wstatic-in-inline CPPFLAG to prevent public and
inline function to be added in the code base.

Signed-off-by: Philippe Mathieu-Daudé
---
  meson.build | 1 +
  1 file changed, 1 insertion(+)


Reviewed-by: Richard Henderson 

r~



Re: [PATCH-for-9.0 2/4] accel/hvf: Un-inline hvf_arch_supports_guest_debug()

2024-03-13 Thread Richard Henderson

On 3/13/24 08:49, Philippe Mathieu-Daudé wrote:

See previous commit and commit 9de9fa5cf2 ("Avoid using inlined
functions with external linkage") for rationale.

Signed-off-by: Philippe Mathieu-Daudé
---
  target/arm/hvf/hvf.c  | 2 +-
  target/i386/hvf/hvf.c | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)


Reviewed-by: Richard Henderson 

r~



Re: [PATCH-for-9.0 2/4] accel/hvf: Un-inline hvf_arch_supports_guest_debug()

2024-03-13 Thread Peter Maydell
On Wed, 13 Mar 2024 at 18:50, Philippe Mathieu-Daudé  wrote:
>
> See previous commit and commit 9de9fa5cf2 ("Avoid using inlined
> functions with external linkage") for rationale.
>
> Signed-off-by: Philippe Mathieu-Daudé 

Reviewed-by: Peter Maydell 

thanks
-- PMM



Re: [PATCH-for-9.0 3/4] qtest/libqos: Un-inline size_to_prdtl()

2024-03-13 Thread Peter Maydell
On Wed, 13 Mar 2024 at 18:50, Philippe Mathieu-Daudé  wrote:
>
> See previous commit and commit 9de9fa5cf2 ("Avoid using inlined
> functions with external linkage") for rationale.
>
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  tests/qtest/libqos/ahci.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/tests/qtest/libqos/ahci.c b/tests/qtest/libqos/ahci.c
> index a2c94c6e06..135b23ffd9 100644
> --- a/tests/qtest/libqos/ahci.c
> +++ b/tests/qtest/libqos/ahci.c
> @@ -662,7 +662,7 @@ unsigned ahci_pick_cmd(AHCIQState *ahci, uint8_t port)
>  g_assert_not_reached();
>  }
>
> -inline unsigned size_to_prdtl(unsigned bytes, unsigned bytes_per_prd)
> +unsigned size_to_prdtl(unsigned bytes, unsigned bytes_per_prd)
>  {
>  /* Each PRD can describe up to 4MiB */
>  g_assert_cmphex(bytes_per_prd, <=, 4096 * 1024);

It looks like this function is only used in this file, so
we could make it static ?

-- PMM



[PATCH-for-9.0 2/4] accel/hvf: Un-inline hvf_arch_supports_guest_debug()

2024-03-13 Thread Philippe Mathieu-Daudé
See previous commit and commit 9de9fa5cf2 ("Avoid using inlined
functions with external linkage") for rationale.

Signed-off-by: Philippe Mathieu-Daudé 
---
 target/arm/hvf/hvf.c  | 2 +-
 target/i386/hvf/hvf.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
index e5f0f60093..65a5601804 100644
--- a/target/arm/hvf/hvf.c
+++ b/target/arm/hvf/hvf.c
@@ -2246,7 +2246,7 @@ void hvf_arch_update_guest_debug(CPUState *cpu)
 hvf_arch_set_traps();
 }
 
-inline bool hvf_arch_supports_guest_debug(void)
+bool hvf_arch_supports_guest_debug(void)
 {
 return true;
 }
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c
index 11ffdd4c69..1ed8ed5154 100644
--- a/target/i386/hvf/hvf.c
+++ b/target/i386/hvf/hvf.c
@@ -708,7 +708,7 @@ void hvf_arch_update_guest_debug(CPUState *cpu)
 {
 }
 
-inline bool hvf_arch_supports_guest_debug(void)
+bool hvf_arch_supports_guest_debug(void)
 {
 return false;
 }
-- 
2.41.0




[PATCH-for-9.0 1/4] hw/arm/smmu: Avoid using inlined functions with external linkage again

2024-03-13 Thread Philippe Mathieu-Daudé
Similarly to commit 9de9fa5cf2 ("hw/arm/smmu-common: Avoid using
inlined functions with external linkage"):

  None of our code base require / use inlined functions with external
  linkage. Some places use internal inlining in the hot path. These
  two functions are certainly not in any hot path and don't justify
  any inlining, so these are likely oversights rather than intentional.

Fix:

  C compiler for the host machine: clang (clang 15.0.0 "Apple clang version 
15.0.0 (clang-1500.3.9.4)")
  ...
  hw/arm/smmu-common.c:203:43: error: static function 
'smmu_hash_remove_by_vmid' is
  used in an inline function with external linkage [-Werror,-Wstatic-in-inline]
  g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid, );
^
  include/hw/arm/smmu-common.h:197:1: note: use 'static' to give inline 
function 'smmu_iotlb_inv_vmid' internal linkage
  void smmu_iotlb_inv_vmid(SMMUState *s, uint16_t vmid);
  ^
  static
  hw/arm/smmu-common.c:139:17: note: 'smmu_hash_remove_by_vmid' declared here
  static gboolean smmu_hash_remove_by_vmid(gpointer key, gpointer value,
^

Fixes: ccc3ee3871 ("hw/arm/smmuv3: Add CMDs related to stage-2")
Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/arm/smmu-common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
index 4caedb4998..c4b540656c 100644
--- a/hw/arm/smmu-common.c
+++ b/hw/arm/smmu-common.c
@@ -197,7 +197,7 @@ void smmu_iotlb_inv_asid(SMMUState *s, uint16_t asid)
 g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_asid, );
 }
 
-inline void smmu_iotlb_inv_vmid(SMMUState *s, uint16_t vmid)
+void smmu_iotlb_inv_vmid(SMMUState *s, uint16_t vmid)
 {
 trace_smmu_iotlb_inv_vmid(vmid);
 g_hash_table_foreach_remove(s->iotlb, smmu_hash_remove_by_vmid, );
-- 
2.41.0




[PATCH-for-9.0 4/4] meson: Enable -Wstatic-in-inline

2024-03-13 Thread Philippe Mathieu-Daudé
Compilers are clever enough to inline code when necessary.

The only case we accept an inline function is static in
header (we use C, not C++).

Add the -Wstatic-in-inline CPPFLAG to prevent public and
inline function to be added in the code base.

Signed-off-by: Philippe Mathieu-Daudé 
---
 meson.build | 1 +
 1 file changed, 1 insertion(+)

diff --git a/meson.build b/meson.build
index b375248a76..f57397aa53 100644
--- a/meson.build
+++ b/meson.build
@@ -591,6 +591,7 @@ warn_flags = [
   '-Wold-style-definition',
   '-Wredundant-decls',
   '-Wshadow=local',
+  '-Wstatic-in-inline',
   '-Wstrict-prototypes',
   '-Wtype-limits',
   '-Wundef',
-- 
2.41.0




[PATCH-for-9.0 3/4] qtest/libqos: Un-inline size_to_prdtl()

2024-03-13 Thread Philippe Mathieu-Daudé
See previous commit and commit 9de9fa5cf2 ("Avoid using inlined
functions with external linkage") for rationale.

Signed-off-by: Philippe Mathieu-Daudé 
---
 tests/qtest/libqos/ahci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qtest/libqos/ahci.c b/tests/qtest/libqos/ahci.c
index a2c94c6e06..135b23ffd9 100644
--- a/tests/qtest/libqos/ahci.c
+++ b/tests/qtest/libqos/ahci.c
@@ -662,7 +662,7 @@ unsigned ahci_pick_cmd(AHCIQState *ahci, uint8_t port)
 g_assert_not_reached();
 }
 
-inline unsigned size_to_prdtl(unsigned bytes, unsigned bytes_per_prd)
+unsigned size_to_prdtl(unsigned bytes, unsigned bytes_per_prd)
 {
 /* Each PRD can describe up to 4MiB */
 g_assert_cmphex(bytes_per_prd, <=, 4096 * 1024);
-- 
2.41.0




[PATCH-for-9.0 0/4] overall: Avoid using inlined functions with external linkage again

2024-03-13 Thread Philippe Mathieu-Daudé
Mostly as a C style cleanup, use -Wstatic-in-inline to avoid
using inlined function with external linkage.

Philippe Mathieu-Daudé (4):
  hw/arm/smmu: Avoid using inlined functions with external linkage again
  accel/hvf: Un-inline hvf_arch_supports_guest_debug()
  qtest/libqos: Un-inline size_to_prdtl()
  meson: Enable -Wstatic-in-inline

 meson.build   | 1 +
 hw/arm/smmu-common.c  | 2 +-
 target/arm/hvf/hvf.c  | 2 +-
 target/i386/hvf/hvf.c | 2 +-
 tests/qtest/libqos/ahci.c | 2 +-
 5 files changed, 5 insertions(+), 4 deletions(-)

-- 
2.41.0




Re: [PATCH v3] linux-aio: add IO_CMD_FDSYNC command support

2024-03-13 Thread Stefan Hajnoczi
On Wed, Mar 13, 2024 at 10:49:31PM +0530, Prasad Pandit wrote:
> On Wed, 13 Mar 2024 at 20:48, Stefan Hajnoczi  wrote:
> > > +extern bool laio_has_fdsync(int);
> > Please declare this in include/block/raw-aio.h alongside the other laio 
> > APIs.
> >
> > FDSYNC support should be probed at open() time and the result should be
> > stored in a new bool field like s->laio_supports_fdsync. That way the
> > cost of laio_has_fdsync() on every flush request is avoided.
> 
> * Okay. Here 's' is a BDRVRawState object and file open seems to
> happen in the raw_open_common() function? I'll move the
> laio_has_fdsync() call there and see how it works.

Yes. Thanks!

Stefan


signature.asc
Description: PGP signature


Re: [PATCH v3] linux-aio: add IO_CMD_FDSYNC command support

2024-03-13 Thread Prasad Pandit
On Wed, 13 Mar 2024 at 20:48, Stefan Hajnoczi  wrote:
> > +extern bool laio_has_fdsync(int);
> Please declare this in include/block/raw-aio.h alongside the other laio APIs.
>
> FDSYNC support should be probed at open() time and the result should be
> stored in a new bool field like s->laio_supports_fdsync. That way the
> cost of laio_has_fdsync() on every flush request is avoided.

* Okay. Here 's' is a BDRVRawState object and file open seems to
happen in the raw_open_common() function? I'll move the
laio_has_fdsync() call there and see how it works.

Thank you.
---
  - Prasad




Re: [PATCH v4 4/5] qapi: blockdev-backup: add discard-source parameter

2024-03-13 Thread Markus Armbruster
Vladimir Sementsov-Ogievskiy  writes:

> Add a parameter that enables discard-after-copy. That is mostly useful
> in "push backup with fleecing" scheme, when source is snapshot-access
> format driver node, based on copy-before-write filter snapshot-access
> API:
>
> [guest]  [snapshot-access] ~~ blockdev-backup ~~> [backup target]
>||
>| root   | file
>vv
> [copy-before-write]
>| |
>| file| target
>v v
> [active disk]   [temp.img]
>
> In this case discard-after-copy does two things:
>
>  - discard data in temp.img to save disk space
>  - avoid further copy-before-write operation in discarded area
>
> Note that we have to declare WRITE permission on source in
> copy-before-write filter, for discard to work. Still we can't take it
> unconditionally, as it will break normal backup from RO source. So, we
> have to add a parameter and pass it thorough bdrv_open flags.
>
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> Reviewed-by: Fiona Ebner 
> Tested-by: Fiona Ebner 

[...]

> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 1874f880a8..2ef52ae9a7 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -1610,6 +1610,9 @@
>  # node specified by @drive.  If this option is not given, a node
>  # name is autogenerated.  (Since: 4.2)
>  #
> +# @discard-source: Discard blocks on source which are already copied

"have been copied"?

> +# to the target.  (Since 9.1)
> +#
>  # @x-perf: Performance options.  (Since 6.0)
>  #
>  # Features:
> @@ -1631,6 +1634,7 @@
>  '*on-target-error': 'BlockdevOnError',
>  '*auto-finalize': 'bool', '*auto-dismiss': 'bool',
>  '*filter-node-name': 'str',
> +'*discard-source': 'bool',
>  '*x-perf': { 'type': 'BackupPerf',
>   'features': [ 'unstable' ] } } }

QAPI schema
Acked-by: Markus Armbruster 




[PATCH v4 0/5] backup: discard-source parameter

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
Hi all! The main patch is 04, please look at it for description and
diagram.

v4: add t-b by Fiona
add r-b by Fiona to 02-05 (patch 01 still lack an r-b)
05: fix copyrights and subject in the test
04: since 9.0 --> since 9.1 (we missed a soft freeze for 9.0)

Vladimir Sementsov-Ogievskiy (5):
  block/copy-before-write: fix permission
  block/copy-before-write: support unligned snapshot-discard
  block/copy-before-write: create block_copy bitmap in filter node
  qapi: blockdev-backup: add discard-source parameter
  iotests: add backup-discard-source

 block/backup.c|   5 +-
 block/block-copy.c|  12 +-
 block/copy-before-write.c |  39 -
 block/copy-before-write.h |   1 +
 block/replication.c   |   4 +-
 blockdev.c|   2 +-
 include/block/block-common.h  |   2 +
 include/block/block-copy.h|   2 +
 include/block/block_int-global-state.h|   2 +-
 qapi/block-core.json  |   4 +
 tests/qemu-iotests/257.out| 112 ++---
 .../qemu-iotests/tests/backup-discard-source  | 152 ++
 .../tests/backup-discard-source.out   |   5 +
 13 files changed, 272 insertions(+), 70 deletions(-)
 create mode 100755 tests/qemu-iotests/tests/backup-discard-source
 create mode 100644 tests/qemu-iotests/tests/backup-discard-source.out

-- 
2.34.1




[PATCH for-9.0] mirror: Don't call job_pause_point() under graph lock

2024-03-13 Thread Kevin Wolf
Calling job_pause_point() while holding the graph reader lock
potentially results in a deadlock: bdrv_graph_wrlock() first drains
everything, including the mirror job, which pauses it. The job is only
unpaused at the end of the drain section, which is when the graph writer
lock has been successfully taken. However, if the job happens to be
paused at a pause point where it still holds the reader lock, the writer
lock can't be taken as long as the job is still paused.

Mark job_pause_point() as GRAPH_UNLOCKED and fix mirror accordingly.

Cc: qemu-sta...@nongnu.org
Buglink: https://issues.redhat.com/browse/RHEL-28125
Fixes: 004915a96a7a40e942ac85e6d22518cbcd283506
Signed-off-by: Kevin Wolf 
---
 include/qemu/job.h |  2 +-
 block/mirror.c | 10 ++
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/qemu/job.h b/include/qemu/job.h
index 9ea98b5927..2b873f2576 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -483,7 +483,7 @@ void job_enter(Job *job);
  *
  * Called with job_mutex *not* held.
  */
-void coroutine_fn job_pause_point(Job *job);
+void coroutine_fn GRAPH_UNLOCKED job_pause_point(Job *job);
 
 /**
  * @job: The job that calls the function.
diff --git a/block/mirror.c b/block/mirror.c
index 5145eb53e1..1bdce3b657 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -479,9 +479,9 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t 
offset,
 return bytes_handled;
 }
 
-static void coroutine_fn GRAPH_RDLOCK mirror_iteration(MirrorBlockJob *s)
+static void coroutine_fn GRAPH_UNLOCKED mirror_iteration(MirrorBlockJob *s)
 {
-BlockDriverState *source = s->mirror_top_bs->backing->bs;
+BlockDriverState *source;
 MirrorOp *pseudo_op;
 int64_t offset;
 /* At least the first dirty chunk is mirrored in one iteration. */
@@ -489,6 +489,10 @@ static void coroutine_fn GRAPH_RDLOCK 
mirror_iteration(MirrorBlockJob *s)
 bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
 int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES);
 
+bdrv_graph_co_rdlock();
+source = s->mirror_top_bs->backing->bs;
+bdrv_graph_co_rdunlock();
+
 bdrv_dirty_bitmap_lock(s->dirty_bitmap);
 offset = bdrv_dirty_iter_next(s->dbi);
 if (offset < 0) {
@@ -1066,9 +1070,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
 mirror_wait_for_free_in_flight_slot(s);
 continue;
 } else if (cnt != 0) {
-bdrv_graph_co_rdlock();
 mirror_iteration(s);
-bdrv_graph_co_rdunlock();
 }
 }
 
-- 
2.44.0




[PATCH v4 4/5] qapi: blockdev-backup: add discard-source parameter

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
Add a parameter that enables discard-after-copy. That is mostly useful
in "push backup with fleecing" scheme, when source is snapshot-access
format driver node, based on copy-before-write filter snapshot-access
API:

[guest]  [snapshot-access] ~~ blockdev-backup ~~> [backup target]
   ||
   | root   | file
   vv
[copy-before-write]
   | |
   | file| target
   v v
[active disk]   [temp.img]

In this case discard-after-copy does two things:

 - discard data in temp.img to save disk space
 - avoid further copy-before-write operation in discarded area

Note that we have to declare WRITE permission on source in
copy-before-write filter, for discard to work. Still we can't take it
unconditionally, as it will break normal backup from RO source. So, we
have to add a parameter and pass it thorough bdrv_open flags.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Fiona Ebner 
Tested-by: Fiona Ebner 
---
 block/backup.c |  5 +++--
 block/block-copy.c |  9 +
 block/copy-before-write.c  | 15 +--
 block/copy-before-write.h  |  1 +
 block/replication.c|  4 ++--
 blockdev.c |  2 +-
 include/block/block-common.h   |  2 ++
 include/block/block-copy.h |  1 +
 include/block/block_int-global-state.h |  2 +-
 qapi/block-core.json   |  4 
 10 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index ec29d6b810..3dd2e229d2 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -356,7 +356,7 @@ BlockJob *backup_job_create(const char *job_id, 
BlockDriverState *bs,
   BlockDriverState *target, int64_t speed,
   MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
   BitmapSyncMode bitmap_mode,
-  bool compress,
+  bool compress, bool discard_source,
   const char *filter_node_name,
   BackupPerf *perf,
   BlockdevOnError on_source_error,
@@ -457,7 +457,8 @@ BlockJob *backup_job_create(const char *job_id, 
BlockDriverState *bs,
 goto error;
 }
 
-cbw = bdrv_cbw_append(bs, target, filter_node_name, , errp);
+cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source,
+  , errp);
 if (!cbw) {
 goto error;
 }
diff --git a/block/block-copy.c b/block/block-copy.c
index 8fca2c3698..7e3b378528 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -137,6 +137,7 @@ typedef struct BlockCopyState {
 CoMutex lock;
 int64_t in_flight_bytes;
 BlockCopyMethod method;
+bool discard_source;
 BlockReqList reqs;
 QLIST_HEAD(, BlockCopyCallState) calls;
 /*
@@ -353,6 +354,7 @@ static int64_t 
block_copy_calculate_cluster_size(BlockDriverState *target,
 BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
  BlockDriverState *copy_bitmap_bs,
  const BdrvDirtyBitmap *bitmap,
+ bool discard_source,
  Error **errp)
 {
 ERRP_GUARD();
@@ -418,6 +420,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, 
BdrvChild *target,
 cluster_size),
 };
 
+s->discard_source = discard_source;
 block_copy_set_copy_opts(s, false, false);
 
 ratelimit_init(>rate_limit);
@@ -589,6 +592,12 @@ static coroutine_fn int block_copy_task_entry(AioTask 
*task)
 co_put_to_shres(s->mem, t->req.bytes);
 block_copy_task_end(t, ret);
 
+if (s->discard_source && ret == 0) {
+int64_t nbytes =
+MIN(t->req.offset + t->req.bytes, s->len) - t->req.offset;
+bdrv_co_pdiscard(s->source, t->req.offset, nbytes);
+}
+
 return ret;
 }
 
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index ed2c228da7..cd65524e26 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -44,6 +44,7 @@ typedef struct BDRVCopyBeforeWriteState {
 BdrvChild *target;
 OnCbwError on_cbw_error;
 uint32_t cbw_timeout_ns;
+bool discard_source;
 
 /*
  * @lock: protects access to @access_bitmap, @done_bitmap and
@@ -357,6 +358,8 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, 
BdrvChildRole role,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
 {
+BDRVCopyBeforeWriteState *s = bs->opaque;
+
 if (!(role & BDRV_CHILD_FILTERED)) {
 /*
  * Target child
@@ -381,6 +384,10 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, 
BdrvChildRole role,
  * start
  */
 *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+if (s->discard_source) {
+*nperm = *nperm | 

[PATCH v4 3/5] block/copy-before-write: create block_copy bitmap in filter node

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
Currently block_copy creates copy_bitmap in source node. But that is in
bad relation with .independent_close=true of copy-before-write filter:
source node may be detached and removed before .bdrv_close() handler
called, which should call block_copy_state_free(), which in turn should
remove copy_bitmap.

That's all not ideal: it would be better if internal bitmap of
block-copy object is not attached to any node. But that is not possible
now.

The simplest solution is just create copy_bitmap in filter node, where
anyway two other bitmaps are created.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Fiona Ebner 
Tested-by: Fiona Ebner 
---
 block/block-copy.c |   3 +-
 block/copy-before-write.c  |   2 +-
 include/block/block-copy.h |   1 +
 tests/qemu-iotests/257.out | 112 ++---
 4 files changed, 60 insertions(+), 58 deletions(-)

diff --git a/block/block-copy.c b/block/block-copy.c
index 9ee3dd7ef5..8fca2c3698 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -351,6 +351,7 @@ static int64_t 
block_copy_calculate_cluster_size(BlockDriverState *target,
 }
 
 BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ BlockDriverState *copy_bitmap_bs,
  const BdrvDirtyBitmap *bitmap,
  Error **errp)
 {
@@ -367,7 +368,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, 
BdrvChild *target,
 return NULL;
 }
 
-copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
+copy_bitmap = bdrv_create_dirty_bitmap(copy_bitmap_bs, cluster_size, NULL,
errp);
 if (!copy_bitmap) {
 return NULL;
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 6d89af0b29..ed2c228da7 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -468,7 +468,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, 
int flags,
 ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
  bs->file->bs->supported_zero_flags);
 
-s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp);
+s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap, errp);
 if (!s->bcs) {
 error_prepend(errp, "Cannot create block-copy-state: ");
 return -EINVAL;
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index 0700953ab8..8b41643bfa 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -25,6 +25,7 @@ typedef struct BlockCopyState BlockCopyState;
 typedef struct BlockCopyCallState BlockCopyCallState;
 
 BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ BlockDriverState *copy_bitmap_bs,
  const BdrvDirtyBitmap *bitmap,
  Error **errp);
 
diff --git a/tests/qemu-iotests/257.out b/tests/qemu-iotests/257.out
index aa76131ca9..c33dd7f3a9 100644
--- a/tests/qemu-iotests/257.out
+++ b/tests/qemu-iotests/257.out
@@ -120,16 +120,16 @@ write -P0x67 0x3fe 0x2
 "granularity": 65536,
 "persistent": false,
 "recording": false
-  }
-],
-"drive0": [
+  },
   {
 "busy": false,
 "count": 0,
 "granularity": 65536,
 "persistent": false,
 "recording": false
-  },
+  }
+],
+"drive0": [
   {
 "busy": false,
 "count": 458752,
@@ -596,16 +596,16 @@ write -P0x67 0x3fe 0x2
 "granularity": 65536,
 "persistent": false,
 "recording": false
-  }
-],
-"drive0": [
+  },
   {
 "busy": false,
 "count": 0,
 "granularity": 65536,
 "persistent": false,
 "recording": false
-  },
+  }
+],
+"drive0": [
   {
 "busy": false,
 "count": 458752,
@@ -865,16 +865,16 @@ write -P0x67 0x3fe 0x2
 "granularity": 65536,
 "persistent": false,
 "recording": false
-  }
-],
-"drive0": [
+  },
   {
 "busy": false,
 "count": 0,
 "granularity": 65536,
 "persistent": false,
 "recording": false
-  },
+  }
+],
+"drive0": [
   {
 "busy": false,
 "count": 458752,
@@ -1341,16 +1341,16 @@ write -P0x67 0x3fe 0x2
 "granularity": 65536,
 "persistent": false,
 "recording": false
-  }
-],
-"drive0": [
+  },
   {
 "busy": false,
 "count": 0,
 "granularity": 65536,
 "persistent": false,
 "recording": false
-  },
+  }
+],
+"drive0": [
   {
 "busy": false,
 "count": 458752,
@@ -1610,16 +1610,16 @@ write -P0x67 0x3fe 0x2
 "granularity": 65536,
 "persistent": 

[PATCH v4 5/5] iotests: add backup-discard-source

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
Add test for a new backup option: discard-source.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Fiona Ebner 
Tested-by: Fiona Ebner 
---
 .../qemu-iotests/tests/backup-discard-source  | 152 ++
 .../tests/backup-discard-source.out   |   5 +
 2 files changed, 157 insertions(+)
 create mode 100755 tests/qemu-iotests/tests/backup-discard-source
 create mode 100644 tests/qemu-iotests/tests/backup-discard-source.out

diff --git a/tests/qemu-iotests/tests/backup-discard-source 
b/tests/qemu-iotests/tests/backup-discard-source
new file mode 100755
index 00..2391b12acd
--- /dev/null
+++ b/tests/qemu-iotests/tests/backup-discard-source
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+#
+# Test backup discard-source parameter
+#
+# Copyright (c) Virtuozzo International GmbH.
+# Copyright (c) Yandex
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import os
+
+import iotests
+from iotests import qemu_img_create, qemu_img_map, qemu_io
+
+
+temp_img = os.path.join(iotests.test_dir, 'temp')
+source_img = os.path.join(iotests.test_dir, 'source')
+target_img = os.path.join(iotests.test_dir, 'target')
+size = '1M'
+
+
+def get_actual_size(vm, node_name):
+nodes = vm.cmd('query-named-block-nodes', flat=True)
+node = next(n for n in nodes if n['node-name'] == node_name)
+return node['image']['actual-size']
+
+
+class TestBackup(iotests.QMPTestCase):
+def setUp(self):
+qemu_img_create('-f', iotests.imgfmt, source_img, size)
+qemu_img_create('-f', iotests.imgfmt, temp_img, size)
+qemu_img_create('-f', iotests.imgfmt, target_img, size)
+qemu_io('-c', 'write 0 1M', source_img)
+
+self.vm = iotests.VM()
+self.vm.launch()
+
+self.vm.cmd('blockdev-add', {
+'node-name': 'cbw',
+'driver': 'copy-before-write',
+'file': {
+'driver': iotests.imgfmt,
+'file': {
+'driver': 'file',
+'filename': source_img,
+}
+},
+'target': {
+'driver': iotests.imgfmt,
+'discard': 'unmap',
+'node-name': 'temp',
+'file': {
+'driver': 'file',
+'filename': temp_img
+}
+}
+})
+
+self.vm.cmd('blockdev-add', {
+'node-name': 'access',
+'discard': 'unmap',
+'driver': 'snapshot-access',
+'file': 'cbw'
+})
+
+self.vm.cmd('blockdev-add', {
+'driver': iotests.imgfmt,
+'node-name': 'target',
+'file': {
+'driver': 'file',
+'filename': target_img
+}
+})
+
+self.assertLess(get_actual_size(self.vm, 'temp'), 512 * 1024)
+
+def tearDown(self):
+# That should fail, because region is discarded
+self.vm.hmp_qemu_io('access', 'read 0 1M')
+
+self.vm.shutdown()
+
+self.assertTrue('read failed: Permission denied' in self.vm.get_log())
+
+# Final check that temp image is empty
+mapping = qemu_img_map(temp_img)
+self.assertEqual(len(mapping), 1)
+self.assertEqual(mapping[0]['start'], 0)
+self.assertEqual(mapping[0]['length'], 1024 * 1024)
+self.assertEqual(mapping[0]['data'], False)
+
+os.remove(temp_img)
+os.remove(source_img)
+os.remove(target_img)
+
+def do_backup(self):
+self.vm.cmd('blockdev-backup', device='access',
+sync='full', target='target',
+job_id='backup0',
+discard_source=True)
+
+self.vm.event_wait(name='BLOCK_JOB_COMPLETED')
+
+def test_discard_written(self):
+"""
+1. Guest writes
+2. copy-before-write operation, data is stored to temp
+3. start backup(discard_source=True), check that data is
+   removed from temp
+"""
+# Trigger copy-before-write operation
+result = self.vm.hmp_qemu_io('cbw', 'write 0 1M')
+self.assert_qmp(result, 'return', '')
+
+# Check that data is written to temporary image
+self.assertGreater(get_actual_size(self.vm, 'temp'), 1024 * 1024)
+
+self.do_backup()
+
+def test_discard_cbw(self):
+"""
+ 

[PATCH v4 1/5] block/copy-before-write: fix permission

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
In case when source node does not have any parents, the condition still
works as required: backup job do create the parent by

  block_job_create -> block_job_add_bdrv -> bdrv_root_attach_child

Still, in this case checking @perm variable doesn't work, as backup job
creates the root blk with empty permissions (as it rely on CBW filter
to require correct permissions and don't want to create extra
conflicts).

So, we should not check @perm.

The hack may be dropped entirely when transactional insertion of
filter (when we don't try to recalculate permissions in intermediate
state, when filter does conflict with original parent of the source
node) merged (old big series
"[PATCH v5 00/45] Transactional block-graph modifying API"[1] and it's
current in-flight part is "[PATCH v8 0/7] blockdev-replace"[2])

[1] https://patchew.org/QEMU/20220330212902.590099-1-vsement...@openvz.org/
[2] https://patchew.org/QEMU/2023101718.932733-1-vsement...@yandex-team.ru/

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Tested-by: Fiona Ebner 
---
 block/copy-before-write.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 8aba27a71d..3e3af30c08 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -364,9 +364,13 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, 
BdrvChildRole role,
perm, shared, nperm, nshared);
 
 if (!QLIST_EMPTY(>parents)) {
-if (perm & BLK_PERM_WRITE) {
-*nperm = *nperm | BLK_PERM_CONSISTENT_READ;
-}
+/*
+ * Note, that source child may be shared with backup job. Backup 
job
+ * does create own blk parent on copy-before-write node, so this
+ * works even if source node does not have any parents before 
backup
+ * start
+ */
+*nperm = *nperm | BLK_PERM_CONSISTENT_READ;
 *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
 }
 }
-- 
2.34.1




[PATCH v4 2/5] block/copy-before-write: support unligned snapshot-discard

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
First thing that crashes on unligned access here is
bdrv_reset_dirty_bitmap(). Correct way is to align-down the
snapshot-discard request.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Fiona Ebner 
Tested-by: Fiona Ebner 
---
 block/copy-before-write.c | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 3e3af30c08..6d89af0b29 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -325,14 +325,24 @@ static int coroutine_fn GRAPH_RDLOCK
 cbw_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes)
 {
 BDRVCopyBeforeWriteState *s = bs->opaque;
+uint32_t cluster_size = block_copy_cluster_size(s->bcs);
+int64_t aligned_offset = QEMU_ALIGN_UP(offset, cluster_size);
+int64_t aligned_end = QEMU_ALIGN_DOWN(offset + bytes, cluster_size);
+int64_t aligned_bytes;
+
+if (aligned_end <= aligned_offset) {
+return 0;
+}
+aligned_bytes = aligned_end - aligned_offset;
 
 WITH_QEMU_LOCK_GUARD(>lock) {
-bdrv_reset_dirty_bitmap(s->access_bitmap, offset, bytes);
+bdrv_reset_dirty_bitmap(s->access_bitmap, aligned_offset,
+aligned_bytes);
 }
 
-block_copy_reset(s->bcs, offset, bytes);
+block_copy_reset(s->bcs, aligned_offset, aligned_bytes);
 
-return bdrv_co_pdiscard(s->target, offset, bytes);
+return bdrv_co_pdiscard(s->target, aligned_offset, aligned_bytes);
 }
 
 static void GRAPH_RDLOCK cbw_refresh_filename(BlockDriverState *bs)
-- 
2.34.1




Re: [PATCH v3] linux-aio: add IO_CMD_FDSYNC command support

2024-03-13 Thread Stefan Hajnoczi
On Wed, Mar 13, 2024 at 02:19:35PM +0530, Prasad Pandit wrote:
> From: Prasad Pandit 
> 
> Libaio defines IO_CMD_FDSYNC command to sync all outstanding
> asynchronous I/O operations, by flushing out file data to the
> disk storage.
> 
> Enable linux-aio to submit such aio request. This helps to
> reduce latency induced via pthread_create calls by
> thread-pool (aio=threads).
> 
> Signed-off-by: Prasad Pandit 
> ---
>  block/file-posix.c |  7 +++
>  block/linux-aio.c  | 22 +-
>  2 files changed, 28 insertions(+), 1 deletion(-)
> 
> v3: check if host kernel supports aio_fsync call
>   -> https://lists.nongnu.org/archive/html/qemu-devel/2024-03/msg03210.html
> 
> diff --git a/block/file-posix.c b/block/file-posix.c
> index 35684f7e21..a30494d1a8 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -2453,6 +2453,8 @@ static inline bool 
> raw_check_linux_io_uring(BDRVRawState *s)
>  #endif
>  
>  #ifdef CONFIG_LINUX_AIO
> +extern bool laio_has_fdsync(int);

Please declare this in include/block/raw-aio.h alongside the other laio APIs.

> +
>  static inline bool raw_check_linux_aio(BDRVRawState *s)
>  {
>  Error *local_err = NULL;
> @@ -2599,6 +2601,11 @@ static int coroutine_fn 
> raw_co_flush_to_disk(BlockDriverState *bs)
>  if (raw_check_linux_io_uring(s)) {
>  return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH);
>  }
> +#endif
> +#ifdef CONFIG_LINUX_AIO
> +if (raw_check_linux_aio(s) && laio_has_fdsync(s->fd)) {

FDSYNC support should be probed at open() time and the result should be
stored in a new bool field like s->laio_supports_fdsync. That way the
cost of laio_has_fdsync() on every flush request is avoided.

> +return laio_co_submit(s->fd, 0, NULL, QEMU_AIO_FLUSH, 0);
> +}
>  #endif
>  return raw_thread_pool_submit(handle_aiocb_flush, );
>  }
> diff --git a/block/linux-aio.c b/block/linux-aio.c
> index ec05d946f3..ed20a503e9 100644
> --- a/block/linux-aio.c
> +++ b/block/linux-aio.c
> @@ -67,6 +67,7 @@ struct LinuxAioState {
>  int event_max;
>  };
>  
> +bool laio_has_fdsync(int);
>  static void ioq_submit(LinuxAioState *s);
>  
>  static inline ssize_t io_event_ret(struct io_event *ev)
> @@ -384,6 +385,9 @@ static int laio_do_submit(int fd, struct qemu_laiocb 
> *laiocb, off_t offset,
>  case QEMU_AIO_READ:
>  io_prep_preadv(iocbs, fd, qiov->iov, qiov->niov, offset);
>  break;
> +case QEMU_AIO_FLUSH:
> +io_prep_fdsync(iocbs, fd);
> +break;
>  /* Currently Linux kernel does not support other operations */
>  default:
>  fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
> @@ -412,7 +416,7 @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, 
> QEMUIOVector *qiov,
>  AioContext *ctx = qemu_get_current_aio_context();
>  struct qemu_laiocb laiocb = {
>  .co = qemu_coroutine_self(),
> -.nbytes = qiov->size,
> +.nbytes = qiov ? qiov->size : 0,
>  .ctx= aio_get_linux_aio(ctx),
>  .ret= -EINPROGRESS,
>  .is_read= (type == QEMU_AIO_READ),
> @@ -486,3 +490,19 @@ void laio_cleanup(LinuxAioState *s)
>  }
>  g_free(s);
>  }
> +
> +bool laio_has_fdsync(int fd)
> +{
> +struct iocb cb;
> +struct iocb *cbs[] = {, NULL};
> +
> +io_context_t ctx = 0;
> +io_setup(1, );
> +
> +/* check if host kernel supports IO_CMD_FDSYNC */
> +io_prep_fdsync(, fd);
> +int ret = io_submit(ctx, 1, cbs);
> +
> +io_destroy(ctx);
> +return (ret == -EINVAL) ? false : true;
> +}
> -- 
> 2.44.0
> 


signature.asc
Description: PGP signature


[RFC 10/15] qapi: query-jobs: add information specific for job type

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
Duplicate the feature from query-block-jobs. It's a step to finally
deprecate query-block-jobs command and move to query-jobs.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/mirror.c | 14 --
 include/qemu/job.h |  5 +
 job-qmp.c  |  6 ++
 qapi/job.json  | 22 +++---
 4 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index e95c54fbc6..96dcbbc3e8 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1294,7 +1294,7 @@ static bool mirror_change(Job *job, JobChangeOptions 
*opts, Error **errp)
 return true;
 }
 
-static void mirror_query(BlockJob *job, BlockJobInfo *info)
+static void mirror_query_old(BlockJob *job, BlockJobInfo *info)
 {
 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
 
@@ -1303,6 +1303,15 @@ static void mirror_query(BlockJob *job, BlockJobInfo 
*info)
 };
 }
 
+static void mirror_query(Job *job, JobInfo *info)
+{
+MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
+
+info->u.mirror = (JobInfoMirror) {
+.actively_synced = qatomic_read(>actively_synced),
+};
+}
+
 static const BlockJobDriver mirror_job_driver = {
 .job_driver = {
 .instance_size  = sizeof(MirrorBlockJob),
@@ -1316,9 +1325,10 @@ static const BlockJobDriver mirror_job_driver = {
 .complete   = mirror_complete,
 .cancel = mirror_cancel,
 .change = mirror_change,
+.query  = mirror_query,
 },
 .drained_poll   = mirror_drained_poll,
-.query  = mirror_query,
+.query  = mirror_query_old,
 };
 
 static bool commit_active_change(Job *job, JobChangeOptions *opts, Error 
**errp)
diff --git a/include/qemu/job.h b/include/qemu/job.h
index eee1d5b50f..8a238b8658 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -315,6 +315,11 @@ struct JobDriver {
  */
 bool (*change)(Job *job, JobChangeOptions *opts, Error **errp);
 
+/*
+ * Query information specific to this kind of block job.
+ */
+void (*query)(Job *job, JobInfo *info);
+
 /**
  * Called when the job is freed.
  */
diff --git a/job-qmp.c b/job-qmp.c
index c048e03d9f..9643c5424d 100644
--- a/job-qmp.c
+++ b/job-qmp.c
@@ -177,6 +177,12 @@ static JobInfo *job_query_single_locked(Job *job, Error 
**errp)
   g_strdup(error_get_pretty(job->err)) : NULL,
 };
 
+if (job->driver->query) {
+job_unlock();
+job->driver->query(job, info);
+job_lock();
+}
+
 return info;
 }
 
diff --git a/qapi/job.json b/qapi/job.json
index 2f1b839cfc..036fec1b57 100644
--- a/qapi/job.json
+++ b/qapi/job.json
@@ -251,6 +251,20 @@
 ##
 { 'command': 'job-finalize', 'data': { 'id': 'str' } }
 
+##
+# @JobInfoMirror:
+#
+# Information specific to mirror block jobs.
+#
+# @actively-synced: Whether the source is actively synced to the
+# target, i.e. same data and new writes are done synchronously to
+# both.
+#
+# Since: 9.1
+##
+{ 'struct': 'JobInfoMirror',
+  'data': { 'actively-synced': 'bool' } }
+
 ##
 # @JobInfo:
 #
@@ -281,10 +295,12 @@
 #
 # Since: 3.0
 ##
-{ 'struct': 'JobInfo',
-  'data': { 'id': 'str', 'type': 'JobType', 'status': 'JobStatus',
+{ 'union': 'JobInfo',
+  'base': { 'id': 'str', 'type': 'JobType', 'status': 'JobStatus',
 'current-progress': 'int', 'total-progress': 'int',
-'*error': 'str' } }
+'*error': 'str' },
+  'discriminator': 'type',
+  'data': { 'mirror': 'JobInfoMirror' } }
 
 ##
 # @query-jobs:
-- 
2.34.1




[RFC 05/15] qapi: JobChangeOptions: make type member optional and deprecated

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
Now QEMU can understand type directly from the job itself, type is
redundant.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 blockjob.c   |  2 +-
 docs/about/deprecated.rst|  6 ++
 job-qmp.c| 17 +
 qapi/block-core.json | 10 --
 stubs/meson.build|  1 +
 stubs/qapi-jobchangeoptions-mapper.c |  8 
 6 files changed, 41 insertions(+), 3 deletions(-)
 create mode 100644 stubs/qapi-jobchangeoptions-mapper.c

diff --git a/blockjob.c b/blockjob.c
index 788cb1e07d..33c40e7d25 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -319,7 +319,7 @@ void block_job_change_locked(BlockJob *job, 
JobChangeOptions *opts,
 
 GLOBAL_STATE_CODE();
 
-if (job_type(>job) != opts->type) {
+if (opts->has_type && job_type(>job) != opts->type) {
 error_setg(errp, "Job '%s' is '%s' job, not '%s'", job->job.id,
job_type_str(>job), JobType_str(opts->type));
 return;
diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index dfd681cd02..64981e5e4a 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -142,6 +142,12 @@ accepted incorrect commands will return an error. Users 
should make sure that
 all arguments passed to ``device_add`` are consistent with the documented
 property types.
 
+
+``block-job-change`` argument ``type`` (since 9.1)
+''
+
+QEMU can get job type from the job itself (by @id), @type field is redundant.
+
 QEMU Machine Protocol (QMP) events
 --
 
diff --git a/job-qmp.c b/job-qmp.c
index 9e26fa899f..c486df9579 100644
--- a/job-qmp.c
+++ b/job-qmp.c
@@ -26,6 +26,8 @@
 #include "qemu/osdep.h"
 #include "qemu/job.h"
 #include "qapi/qapi-commands-job.h"
+#include "qapi/qapi-types-block-core.h"
+#include "qapi/qapi-visit-block-core.h"
 #include "qapi/error.h"
 #include "trace/trace-root.h"
 
@@ -186,3 +188,18 @@ JobInfoList *qmp_query_jobs(Error **errp)
 
 return head;
 }
+
+bool JobChangeOptions_mapper(JobChangeOptions *opts, JobType *out, Error 
**errp)
+{
+Job *job;
+
+JOB_LOCK_GUARD();
+
+job = find_job_locked(opts->id, errp);
+if (!job) {
+return false;
+}
+
+*out = job_type(job);
+return true;
+}
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 6041e7bd8f..3d890dfcd0 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3082,11 +3082,17 @@
 #
 # @type: The job type
 #
+# Features:
+#
+# @deprecated: Members @type is deprecated. Qemu can get type from
+# the job itself, @type is redundant.
+#
 # Since: 8.2
 ##
 { 'union': 'JobChangeOptions',
-  'base': { 'id': 'str', 'type': 'JobType' },
-  'discriminator': 'type',
+  'base': { 'id': 'str',
+'*type': { 'type': 'JobType', 'features': ['deprecated'] } },
+  'discriminator': 'JobType',
   'data': { 'mirror': 'JobChangeOptionsMirror' } }
 
 ##
diff --git a/stubs/meson.build b/stubs/meson.build
index 0bf25e6ca5..e480400a6c 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -31,6 +31,7 @@ stub_ss.add(files('module-opts.c'))
 stub_ss.add(files('monitor.c'))
 stub_ss.add(files('monitor-core.c'))
 stub_ss.add(files('physmem.c'))
+stub_ss.add(files('qapi-jobchangeoptions-mapper.c'))
 stub_ss.add(files('qemu-timer-notify-cb.c'))
 stub_ss.add(files('memory_device.c'))
 stub_ss.add(files('qmp-command-available.c'))
diff --git a/stubs/qapi-jobchangeoptions-mapper.c 
b/stubs/qapi-jobchangeoptions-mapper.c
new file mode 100644
index 00..e4acfd91b3
--- /dev/null
+++ b/stubs/qapi-jobchangeoptions-mapper.c
@@ -0,0 +1,8 @@
+#include "qemu/osdep.h"
+#include "qapi/qapi-visit-block-core.h"
+#include "qapi/qapi-types-job.h"
+
+bool JobChangeOptions_mapper(JobChangeOptions *opts, JobType *out, Error 
**errp)
+{
+g_assert_not_reached();
+}
-- 
2.34.1




[RFC 01/15] scripts/qapi: support type-based unions

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
Look at block-job-change command: we have to specify both 'id' to chose
the job to operate on and 'type' for QAPI union be parsed. But for user
this looks redundant: when we specify 'id', QEMU should be able to get
corresponding job's type.

This commit brings such a possibility: just specify some Enum type as
'discriminator', and define a function somewhere with prototype

bool YourUnionType_mapper(YourUnionType *, EnumType *out, Error **errp)

Further commits will use this functionality to upgrade block-job-change
interface and introduce some new interfaces.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 scripts/qapi/introspect.py |  5 +++-
 scripts/qapi/schema.py | 50 +++---
 scripts/qapi/types.py  |  3 ++-
 scripts/qapi/visit.py  | 43 ++--
 4 files changed, 73 insertions(+), 28 deletions(-)

diff --git a/scripts/qapi/introspect.py b/scripts/qapi/introspect.py
index 67c7d89aae..04d8d424f5 100644
--- a/scripts/qapi/introspect.py
+++ b/scripts/qapi/introspect.py
@@ -336,7 +336,10 @@ def visit_object_type_flat(self, name: str, info: 
Optional[QAPISourceInfo],
 'members': [self._gen_object_member(m) for m in members]
 }
 if variants:
-obj['tag'] = variants.tag_member.name
+if variants.tag_member:
+obj['tag'] = variants.tag_member.name
+else:
+obj['discriminator-type'] = variants.tag_type.name
 obj['variants'] = [self._gen_variant(v) for v in variants.variants]
 self._gen_tree(name, 'object', obj, ifcond, features)
 
diff --git a/scripts/qapi/schema.py b/scripts/qapi/schema.py
index 8ba5665bc6..0efe8d815c 100644
--- a/scripts/qapi/schema.py
+++ b/scripts/qapi/schema.py
@@ -585,16 +585,16 @@ def visit(self, visitor):
 
 
 class QAPISchemaVariants:
-def __init__(self, tag_name, info, tag_member, variants):
+def __init__(self, discriminator, info, tag_member, variants):
 # Unions pass tag_name but not tag_member.
 # Alternates pass tag_member but not tag_name.
 # After check(), tag_member is always set.
-assert bool(tag_member) != bool(tag_name)
-assert (isinstance(tag_name, str) or
+assert bool(tag_member) != bool(discriminator)
+assert (isinstance(discriminator, str) or
 isinstance(tag_member, QAPISchemaObjectTypeMember))
 for v in variants:
 assert isinstance(v, QAPISchemaVariant)
-self._tag_name = tag_name
+self.discriminator = discriminator
 self.info = info
 self.tag_member = tag_member
 self.variants = variants
@@ -604,16 +604,24 @@ def set_defined_in(self, name):
 v.set_defined_in(name)
 
 def check(self, schema, seen):
-if self._tag_name:  # union
-self.tag_member = seen.get(c_name(self._tag_name))
+self.tag_type = None
+
+if self.discriminator:  # assume union with type discriminator
+self.tag_type = schema.lookup_type(self.discriminator)
+
+# union with discriminator field
+if self.discriminator and not self.tag_type:
+tag_name = self.discriminator
+self.tag_member = seen.get(c_name(tag_name))
+self.tag_type = self.tag_member.type
 base = "'base'"
 # Pointing to the base type when not implicit would be
 # nice, but we don't know it here
-if not self.tag_member or self._tag_name != self.tag_member.name:
+if not self.tag_member or tag_name != self.tag_member.name:
 raise QAPISemError(
 self.info,
 "discriminator '%s' is not a member of %s"
-% (self._tag_name, base))
+% (tag_name, base))
 # Here we do:
 base_type = schema.lookup_type(self.tag_member.defined_in)
 assert base_type
@@ -623,29 +631,33 @@ def check(self, schema, seen):
 raise QAPISemError(
 self.info,
 "discriminator member '%s' of %s must be of enum type"
-% (self._tag_name, base))
+% (tag_name, base))
 if self.tag_member.optional:
 raise QAPISemError(
 self.info,
 "discriminator member '%s' of %s must not be optional"
-% (self._tag_name, base))
+% (tag_name, base))
 if self.tag_member.ifcond.is_present():
 raise QAPISemError(
 self.info,
 "discriminator member '%s' of %s must not be conditional"
-% (self._tag_name, base))
-else:   # alternate
+% (tag_name, base))
+elif not self.tag_type:  # alternate
 assert isinstance(self.tag_member.type, 

[RFC 12/15] qapi: rename BlockDeviceIoStatus to IoStatus

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
This status is already shared between block-jobs and block-devices, so
structure name is misleading a bit. We also will need to use the
structure both in block-core.json and job.json. So give it more generic
name first.

The commit is made by commands:

git grep -l BlockDeviceIoStatus | \
xargs sed -i 's/BlockDeviceIoStatus/IoStatus/g'

git grep -l BLOCK_DEVICE_IO_STATUS | \
xargs sed -i 's/BLOCK_DEVICE_IO_STATUS/IO_STATUS/g'

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/block-backend.c   | 14 +++---
 block/mirror.c  |  4 ++--
 block/monitor/block-hmp-cmds.c  |  4 ++--
 blockjob.c  | 10 +-
 include/block/blockjob.h|  2 +-
 include/sysemu/block-backend-global-state.h |  2 +-
 qapi/block-core.json| 10 +-
 7 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 9c4de79e6b..ec19c50e96 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -65,7 +65,7 @@ struct BlockBackend {
 
 BlockdevOnError on_read_error, on_write_error;
 bool iostatus_enabled;
-BlockDeviceIoStatus iostatus;
+IoStatus iostatus;
 
 uint64_t perm;
 uint64_t shared_perm;
@@ -1198,7 +1198,7 @@ void blk_iostatus_enable(BlockBackend *blk)
 {
 GLOBAL_STATE_CODE();
 blk->iostatus_enabled = true;
-blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+blk->iostatus = IO_STATUS_OK;
 }
 
 /* The I/O status is only enabled if the drive explicitly
@@ -1212,7 +1212,7 @@ bool blk_iostatus_is_enabled(const BlockBackend *blk)
 blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
 }
 
-BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
+IoStatus blk_iostatus(const BlockBackend *blk)
 {
 GLOBAL_STATE_CODE();
 return blk->iostatus;
@@ -1228,7 +1228,7 @@ void blk_iostatus_reset(BlockBackend *blk)
 {
 GLOBAL_STATE_CODE();
 if (blk_iostatus_is_enabled(blk)) {
-blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+blk->iostatus = IO_STATUS_OK;
 }
 }
 
@@ -1236,9 +1236,9 @@ void blk_iostatus_set_err(BlockBackend *blk, int error)
 {
 IO_CODE();
 assert(blk_iostatus_is_enabled(blk));
-if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
-blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
-  BLOCK_DEVICE_IO_STATUS_FAILED;
+if (blk->iostatus == IO_STATUS_OK) {
+blk->iostatus = error == ENOSPC ? IO_STATUS_NOSPACE :
+  IO_STATUS_FAILED;
 }
 }
 
diff --git a/block/mirror.c b/block/mirror.c
index 96dcbbc3e8..8e672f3309 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -923,7 +923,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
 MirrorBDSOpaque *mirror_top_opaque = s->mirror_top_bs->opaque;
 BlockDriverState *target_bs = blk_bs(s->target);
 bool need_drain = true;
-BlockDeviceIoStatus iostatus;
+IoStatus iostatus;
 int64_t length;
 int64_t target_length;
 BlockDriverInfo bdi;
@@ -1060,7 +1060,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
 iostatus = s->common.iostatus;
 }
 if (delta < BLOCK_JOB_SLICE_TIME &&
-iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
+iostatus == IO_STATUS_OK) {
 if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
 (cnt == 0 && s->in_flight > 0)) {
 trace_mirror_yield(s, cnt, s->buf_free_count, s->in_flight);
diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
index d954bec6f1..64acb9cd6a 100644
--- a/block/monitor/block-hmp-cmds.c
+++ b/block/monitor/block-hmp-cmds.c
@@ -642,9 +642,9 @@ static void print_block_info(Monitor *mon, BlockInfo *info,
 if (info->qdev) {
 monitor_printf(mon, "Attached to:  %s\n", info->qdev);
 }
-if (info->has_io_status && info->io_status != 
BLOCK_DEVICE_IO_STATUS_OK) {
+if (info->has_io_status && info->io_status != IO_STATUS_OK) {
 monitor_printf(mon, "I/O status:   %s\n",
-   BlockDeviceIoStatus_str(info->io_status));
+   IoStatus_str(info->io_status));
 }
 
 if (info->removable) {
diff --git a/blockjob.c b/blockjob.c
index d3cd4f4fbf..de1dd03b2d 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -394,9 +394,9 @@ BlockJobInfo *block_job_query_locked(BlockJob *job, Error 
**errp)
 /* Called with job lock held */
 static void block_job_iostatus_set_err_locked(BlockJob *job, int error)
 {
-if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
-job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
-  BLOCK_DEVICE_IO_STATUS_FAILED;
+if (job->iostatus == IO_STATUS_OK) {
+

[RFC 11/15] job-qmp: job_query_single_locked: add assertion on job ret

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
job->ret must be always set together with job->err. Let's assert this.
Reproting no-error to the user, when job->err is unset and job->ret is
somehow set would be a bug.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 job-qmp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/job-qmp.c b/job-qmp.c
index 9643c5424d..3e2172c26a 100644
--- a/job-qmp.c
+++ b/job-qmp.c
@@ -163,6 +163,7 @@ static JobInfo *job_query_single_locked(Job *job, Error 
**errp)
 uint64_t progress_total;
 
 assert(!job_is_internal(job));
+assert(!job->err == !job->ret);
 progress_get_snapshot(>progress, _current,
   _total);
 
-- 
2.34.1




[RFC 07/15] qapi: add job-change

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
Add a new-style command job-change, doing same thing as
block-job-change. The aim is finally deprecate block-job-* APIs and
move to job-* APIs.

We add a new command to qapi/block-core.json, not to
qapi/job.json to avoid resolving json file including loops for now.
This all would be a lot simple to refactor when we finally drop
deprecated block-job-* APIs.

@type argument of the new command immediately becomes deprecated.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 docs/about/deprecated.rst |  4 ++--
 job-qmp.c | 14 ++
 qapi/block-core.json  | 10 ++
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index 64981e5e4a..5ff98ef95f 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -143,8 +143,8 @@ all arguments passed to ``device_add`` are consistent with 
the documented
 property types.
 
 
-``block-job-change`` argument ``type`` (since 9.1)
-''
+``block-job-change`` and ``job-change``  argument ``type`` (since 9.1)
+''
 
 QEMU can get job type from the job itself (by @id), @type field is redundant.
 
diff --git a/job-qmp.c b/job-qmp.c
index abe9b59487..011a8736ea 100644
--- a/job-qmp.c
+++ b/job-qmp.c
@@ -141,6 +141,20 @@ void qmp_job_dismiss(const char *id, Error **errp)
 job_dismiss_locked(, errp);
 }
 
+void qmp_job_change(JobChangeOptions *opts, Error **errp)
+{
+Job *job;
+
+JOB_LOCK_GUARD();
+job = find_job_locked(opts->id, errp);
+
+if (!job) {
+return;
+}
+
+job_change_locked(job, opts, errp);
+}
+
 /* Called with job_mutex held. */
 static JobInfo *job_query_single_locked(Job *job, Error **errp)
 {
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 3d890dfcd0..f5cefa441b 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3105,6 +3105,16 @@
 { 'command': 'block-job-change',
   'data': 'JobChangeOptions', 'boxed': true }
 
+##
+# @job-change:
+#
+# Change the block job's options.
+#
+# Since: 9.1
+##
+{ 'command': 'job-change',
+  'data': 'JobChangeOptions', 'boxed': true }
+
 ##
 # @BlockdevDiscardOptions:
 #
-- 
2.34.1




[RFC 14/15] qapi: query-job: add block-job specific information

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
Add io-status and speed, which make sense only for block-jobs. This
allows us to finally deprecate old query-block-jobs API in the next
commit.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/backup.c   |  6 ++
 block/commit.c   |  6 ++
 block/mirror.c   |  8 
 block/stream.c   |  6 ++
 blockjob.c   | 10 ++
 include/block/blockjob.h |  6 ++
 qapi/job.json| 21 -
 7 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/block/backup.c b/block/backup.c
index bf086dc5f9..55bbe85bf6 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -343,6 +343,11 @@ static bool backup_change(Job *job, JobChangeOptions 
*opts, Error **errp)
 return block_job_change(bjob, >u.backup, errp);
 }
 
+static void backup_query(Job *job, JobInfo *info)
+{
+block_job_query(job, >u.backup);
+}
+
 static const BlockJobDriver backup_job_driver = {
 .job_driver = {
 .instance_size  = sizeof(BackupBlockJob),
@@ -356,6 +361,7 @@ static const BlockJobDriver backup_job_driver = {
 .pause  = backup_pause,
 .cancel = backup_cancel,
 .change = backup_change,
+.query  = backup_query,
 },
 .set_speed = backup_set_speed,
 };
diff --git a/block/commit.c b/block/commit.c
index ccb6097679..9199a6adc8 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -211,6 +211,11 @@ static bool commit_change(Job *job, JobChangeOptions 
*opts, Error **errp)
 return block_job_change(bjob, >u.commit, errp);
 }
 
+static void commit_query(Job *job, JobInfo *info)
+{
+block_job_query(job, >u.commit);
+}
+
 static const BlockJobDriver commit_job_driver = {
 .job_driver = {
 .instance_size = sizeof(CommitBlockJob),
@@ -222,6 +227,7 @@ static const BlockJobDriver commit_job_driver = {
 .abort = commit_abort,
 .clean = commit_clean,
 .change= commit_change,
+.query = commit_query,
 },
 };
 
diff --git a/block/mirror.c b/block/mirror.c
index 8e672f3309..e8092d56be 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1310,6 +1310,8 @@ static void mirror_query(Job *job, JobInfo *info)
 info->u.mirror = (JobInfoMirror) {
 .actively_synced = qatomic_read(>actively_synced),
 };
+
+block_job_query(job, qapi_JobInfoMirror_base(>u.mirror));
 }
 
 static const BlockJobDriver mirror_job_driver = {
@@ -1338,6 +1340,11 @@ static bool commit_active_change(Job *job, 
JobChangeOptions *opts, Error **errp)
 return block_job_change(bjob, >u.commit, errp);
 }
 
+static void commit_active_query(Job *job, JobInfo *info)
+{
+block_job_query(job, >u.commit);
+}
+
 static const BlockJobDriver commit_active_job_driver = {
 .job_driver = {
 .instance_size  = sizeof(MirrorBlockJob),
@@ -1351,6 +1358,7 @@ static const BlockJobDriver commit_active_job_driver = {
 .complete   = mirror_complete,
 .cancel = commit_active_cancel,
 .change = commit_active_change,
+.query  = commit_active_query,
 },
 .drained_poll   = mirror_drained_poll,
 };
diff --git a/block/stream.c b/block/stream.c
index 34f4588537..e5e4d0bc77 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -246,6 +246,11 @@ static bool stream_change(Job *job, JobChangeOptions 
*opts, Error **errp)
 return block_job_change(bjob, >u.stream, errp);
 }
 
+static void stream_query(Job *job, JobInfo *info)
+{
+block_job_query(job, >u.stream);
+}
+
 static const BlockJobDriver stream_job_driver = {
 .job_driver = {
 .instance_size = sizeof(StreamBlockJob),
@@ -256,6 +261,7 @@ static const BlockJobDriver stream_job_driver = {
 .clean = stream_clean,
 .user_resume   = block_job_user_resume,
 .change= stream_change,
+.query = stream_query,
 },
 };
 
diff --git a/blockjob.c b/blockjob.c
index de1dd03b2d..7dd1ed3ff2 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -306,6 +306,16 @@ bool block_job_set_speed_locked(BlockJob *job, int64_t 
speed, Error **errp)
 return true;
 }
 
+void block_job_query(Job *job, JobInfoBlockJob *info)
+{
+BlockJob *bjob = container_of(job, BlockJob, job);
+
+JOB_LOCK_GUARD();
+
+info->speed = bjob->speed;
+info->io_status = bjob->iostatus;
+}
+
 static bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
 {
 JOB_LOCK_GUARD();
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index fd7ba1a285..bc33c2ba77 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -231,4 +231,10 @@ const BlockJobDriver *block_job_driver(BlockJob *job);
 bool block_job_change(BlockJob *job, JobChangeOptionsBlockJob *opts,
   Error **errp);
 
+/**
+ * Common part of .query handler for block-jobs.
+ * Adds 

[RFC 06/15] blockjob: move change action implementation to job from block-job

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
Like for other block-job-* APIs we want have the actual functionality
in job layer and make block-job-change to be a deprecated duplication
of job-change in the following commit.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/mirror.c   |  7 +++
 blockdev.c   |  2 +-
 blockjob.c   | 26 --
 include/block/blockjob.h | 11 ---
 include/block/blockjob_int.h |  7 ---
 include/qemu/job.h   | 12 
 job-qmp.c|  1 +
 job.c| 23 +++
 8 files changed, 40 insertions(+), 49 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 2d0cd22c06..e670d0dd4f 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1251,10 +1251,9 @@ static bool commit_active_cancel(Job *job, bool force)
 return force || !job_is_ready(job);
 }
 
-static void mirror_change(BlockJob *job, JobChangeOptions *opts,
-  Error **errp)
+static void mirror_change(Job *job, JobChangeOptions *opts, Error **errp)
 {
-MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
 JobChangeOptionsMirror *change_opts = >u.mirror;
 MirrorCopyMode current;
 
@@ -1310,9 +1309,9 @@ static const BlockJobDriver mirror_job_driver = {
 .pause  = mirror_pause,
 .complete   = mirror_complete,
 .cancel = mirror_cancel,
+.change = mirror_change,
 },
 .drained_poll   = mirror_drained_poll,
-.change = mirror_change,
 .query  = mirror_query,
 };
 
diff --git a/blockdev.c b/blockdev.c
index 7881f6e5a6..7e13213040 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3256,7 +3256,7 @@ void qmp_block_job_change(JobChangeOptions *opts, Error 
**errp)
 return;
 }
 
-block_job_change_locked(job, opts, errp);
+job_change_locked(>job, opts, errp);
 }
 
 void qmp_change_backing_file(const char *device,
diff --git a/blockjob.c b/blockjob.c
index 33c40e7d25..2769722b37 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -312,32 +312,6 @@ static bool block_job_set_speed(BlockJob *job, int64_t 
speed, Error **errp)
 return block_job_set_speed_locked(job, speed, errp);
 }
 
-void block_job_change_locked(BlockJob *job, JobChangeOptions *opts,
- Error **errp)
-{
-const BlockJobDriver *drv = block_job_driver(job);
-
-GLOBAL_STATE_CODE();
-
-if (opts->has_type && job_type(>job) != opts->type) {
-error_setg(errp, "Job '%s' is '%s' job, not '%s'", job->job.id,
-   job_type_str(>job), JobType_str(opts->type));
-return;
-}
-
-if (job_apply_verb_locked(>job, JOB_VERB_CHANGE, errp)) {
-return;
-}
-
-if (drv->change) {
-job_unlock();
-drv->change(job, opts, errp);
-job_lock();
-} else {
-error_setg(errp, "Job type does not support change");
-}
-}
-
 void block_job_ratelimit_processed_bytes(BlockJob *job, uint64_t n)
 {
 IO_CODE();
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 5dd1b08909..72e849a140 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -173,17 +173,6 @@ bool block_job_has_bdrv(BlockJob *job, BlockDriverState 
*bs);
  */
 bool block_job_set_speed_locked(BlockJob *job, int64_t speed, Error **errp);
 
-/**
- * block_job_change_locked:
- * @job: The job to change.
- * @opts: The new options.
- * @errp: Error object.
- *
- * Change the job according to opts.
- */
-void block_job_change_locked(BlockJob *job, JobChangeOptions *opts,
- Error **errp);
-
 /**
  * block_job_query_locked:
  * @job: The job to get information about.
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index d9c3b911d0..58bc7a5cea 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -68,13 +68,6 @@ struct BlockJobDriver {
 
 void (*set_speed)(BlockJob *job, int64_t speed);
 
-/*
- * Change the @job's options according to @opts.
- *
- * Note that this can already be called before the job coroutine is 
running.
- */
-void (*change)(BlockJob *job, JobChangeOptions *opts, Error **errp);
-
 /*
  * Query information specific to this kind of block job.
  */
diff --git a/include/qemu/job.h b/include/qemu/job.h
index 9ea98b5927..d44cdb0cc8 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -27,6 +27,7 @@
 #define JOB_H
 
 #include "qapi/qapi-types-job.h"
+#include "qapi/qapi-types-block-core.h"
 #include "qemu/queue.h"
 #include "qemu/progress_meter.h"
 #include "qemu/coroutine.h"
@@ -307,6 +308,12 @@ struct JobDriver {
  */
 bool (*cancel)(Job *job, bool force);
 
+/**
+ * Change the @job's options according to @opts.
+ *
+ * Note that this can already 

[RFC 13/15] qapi: move IoStatus to common.json

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
We will need to use the structure both in block-core.json and job.json.
So, move it to common.json, which could be then included to job.json.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 qapi/block-core.json | 16 
 qapi/common.json | 16 
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 1f00d4f031..75c02e1586 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -570,22 +570,6 @@
 '*iops_size': 'int', '*group': 'str', 'cache': 'BlockdevCacheInfo',
 'write_threshold': 'int', '*dirty-bitmaps': ['BlockDirtyInfo'] } }
 
-##
-# @IoStatus:
-#
-# An enumeration of block device I/O status.
-#
-# @ok: The last I/O operation has succeeded
-#
-# @failed: The last I/O operation has failed
-#
-# @nospace: The last I/O operation has failed due to a no-space
-# condition
-#
-# Since: 1.0
-##
-{ 'enum': 'IoStatus', 'data': [ 'ok', 'failed', 'nospace' ] }
-
 ##
 # @BlockDirtyInfo:
 #
diff --git a/qapi/common.json b/qapi/common.json
index f1bb841951..3becca1300 100644
--- a/qapi/common.json
+++ b/qapi/common.json
@@ -19,6 +19,22 @@
 { 'enum': 'IoOperationType',
   'data': [ 'read', 'write' ] }
 
+##
+# @IoStatus:
+#
+# An enumeration of block device I/O status.
+#
+# @ok: The last I/O operation has succeeded
+#
+# @failed: The last I/O operation has failed
+#
+# @nospace: The last I/O operation has failed due to a no-space
+# condition
+#
+# Since: 1.0
+##
+{ 'enum': 'IoStatus', 'data': [ 'ok', 'failed', 'nospace' ] }
+
 ##
 # @OnOffAuto:
 #
-- 
2.34.1




[RFC 09/15] qapi: job-complete: introduce no-block-replace option for mirror

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
That's an alternative to block-job-cancel(hard=false) behavior for
mirror, which exactly is: complete the job, wait for in-flight
requests, but don't do any block graph modifications.

Why:

1. We move to deprecating block-job-* APIs and use job-* APIs instead.
   So we need to port somehow the functionality to job- API.

2. The specified behavior was also a kind of "quirk". It's strange to
   "cancel" something in a normal success path of user scenario. This
   block-job-cancel(hard=false) results in BLOCK_JOB_COMPLETE event, so
   definitely it's just another mode of "complete", not "cancel".

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/mirror.c   |  8 +---
 include/qemu/job.h   |  8 +++-
 job-qmp.c| 20 +++-
 job.c| 12 ++--
 qapi/job.json| 28 +---
 stubs/qapi-jobchangeoptions-mapper.c |  5 +
 tests/unit/test-bdrv-drain.c |  2 +-
 tests/unit/test-block-iothread.c |  2 +-
 tests/unit/test-blockjob.c   |  2 +-
 9 files changed, 70 insertions(+), 17 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index f474b87079..e95c54fbc6 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -69,6 +69,7 @@ typedef struct MirrorBlockJob {
  */
 bool actively_synced;
 bool should_complete;
+bool no_block_replace;
 int64_t granularity;
 size_t buf_size;
 int64_t bdev_length;
@@ -740,7 +741,7 @@ static int mirror_exit_common(Job *job)
 }
 bdrv_graph_rdunlock_main_loop();
 
-if (s->should_complete && !abort) {
+if (s->should_complete && !abort && !s->no_block_replace) {
 BlockDriverState *to_replace = s->to_replace ?: src;
 bool ro = bdrv_is_read_only(to_replace);
 
@@ -1167,7 +1168,7 @@ immediate_exit:
 return ret;
 }
 
-static void mirror_complete(Job *job, Error **errp)
+static void mirror_complete(Job *job, JobComplete *opts, Error **errp)
 {
 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
 
@@ -1178,7 +1179,7 @@ static void mirror_complete(Job *job, Error **errp)
 }
 
 /* block all operations on to_replace bs */
-if (s->replaces) {
+if (s->replaces && !opts->u.mirror.no_block_replace) {
 s->to_replace = bdrv_find_node(s->replaces);
 if (!s->to_replace) {
 error_setg(errp, "Node name '%s' not found", s->replaces);
@@ -1193,6 +1194,7 @@ static void mirror_complete(Job *job, Error **errp)
 }
 
 s->should_complete = true;
+s->no_block_replace = opts->u.mirror.no_block_replace;
 
 /* If the job is paused, it will be re-entered when it is resumed */
 WITH_JOB_LOCK_GUARD() {
diff --git a/include/qemu/job.h b/include/qemu/job.h
index 1c9da74a0c..eee1d5b50f 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -254,7 +254,7 @@ struct JobDriver {
  * Optional callback for job types whose completion must be triggered
  * manually.
  */
-void (*complete)(Job *job, Error **errp);
+void (*complete)(Job *job, JobComplete *opts, Error **errp);
 
 /**
  * If the callback is not NULL, prepare will be invoked when all the jobs
@@ -634,6 +634,12 @@ void job_early_fail(Job *job);
  */
 void job_transition_to_ready(Job *job);
 
+/**
+ * Asynchronously complete the specified @job.
+ * Called with job lock held, but might release it temporarily.
+ */
+void job_complete_opts_locked(Job *job, JobComplete *opts, Error **errp);
+
 /**
  * Asynchronously complete the specified @job.
  * Called with job lock held, but might release it temporarily.
diff --git a/job-qmp.c b/job-qmp.c
index 011a8736ea..c048e03d9f 100644
--- a/job-qmp.c
+++ b/job-qmp.c
@@ -93,19 +93,19 @@ void qmp_job_resume(const char *id, Error **errp)
 job_user_resume_locked(job, errp);
 }
 
-void qmp_job_complete(const char *id, Error **errp)
+void qmp_job_complete(JobComplete *opts, Error **errp)
 {
 Job *job;
 
 JOB_LOCK_GUARD();
-job = find_job_locked(id, errp);
+job = find_job_locked(opts->id, errp);
 
 if (!job) {
 return;
 }
 
 trace_qmp_job_complete(job);
-job_complete_locked(job, errp);
+job_complete_opts_locked(job, opts, errp);
 }
 
 void qmp_job_finalize(const char *id, Error **errp)
@@ -204,13 +204,13 @@ JobInfoList *qmp_query_jobs(Error **errp)
 return head;
 }
 
-bool JobChangeOptions_mapper(JobChangeOptions *opts, JobType *out, Error 
**errp)
+static bool job_mapper(const char *id, JobType *out, Error **errp)
 {
 Job *job;
 
 JOB_LOCK_GUARD();
 
-job = find_job_locked(opts->id, errp);
+job = find_job_locked(id, errp);
 if (!job) {
 return false;
 }
@@ -218,3 +218,13 @@ bool JobChangeOptions_mapper(JobChangeOptions *opts, 
JobType *out, Error **errp)
 *out = job_type(job);
 return true;
 }
+
+bool JobChangeOptions_mapper(JobChangeOptions *opts, JobType *out, Error 
**errp)
+{
+

[RFC 04/15] qapi: block-job-change: make copy-mode parameter optional

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
We are going to add more parameters to change. We want to make possible
to change only one or any subset of available options. So all the
options should be optional.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/mirror.c   | 5 +
 qapi/block-core.json | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/block/mirror.c b/block/mirror.c
index a177502e4f..2d0cd22c06 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1265,6 +1265,11 @@ static void mirror_change(BlockJob *job, 
JobChangeOptions *opts,
 
 GLOBAL_STATE_CODE();
 
+if (!change_opts->has_copy_mode) {
+/* Nothing to do */
+return;
+}
+
 if (qatomic_read(>copy_mode) == change_opts->copy_mode) {
 return;
 }
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 67dd0ef038..6041e7bd8f 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3071,7 +3071,7 @@
 # Since: 8.2
 ##
 { 'struct': 'JobChangeOptionsMirror',
-  'data': { 'copy-mode' : 'MirrorCopyMode' } }
+  'data': { '*copy-mode' : 'MirrorCopyMode' } }
 
 ##
 # @JobChangeOptions:
-- 
2.34.1




[RFC 02/15] qapi: rename BlockJobChangeOptions to JobChangeOptions

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
We are going to move change action from block-job to job
implementation, and then move to job-* extenral APIs, deprecating
block-job-* APIs. This commit simplifies further transition.

The commit is made by command

git grep -l BlockJobChangeOptions | \
xargs sed -i 's/BlockJobChangeOptions/JobChangeOptions/g'

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/mirror.c   |  4 ++--
 blockdev.c   |  2 +-
 blockjob.c   |  2 +-
 include/block/blockjob.h |  2 +-
 include/block/blockjob_int.h |  2 +-
 qapi/block-core.json | 12 ++--
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 5145eb53e1..a177502e4f 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1251,11 +1251,11 @@ static bool commit_active_cancel(Job *job, bool force)
 return force || !job_is_ready(job);
 }
 
-static void mirror_change(BlockJob *job, BlockJobChangeOptions *opts,
+static void mirror_change(BlockJob *job, JobChangeOptions *opts,
   Error **errp)
 {
 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-BlockJobChangeOptionsMirror *change_opts = >u.mirror;
+JobChangeOptionsMirror *change_opts = >u.mirror;
 MirrorCopyMode current;
 
 /*
diff --git a/blockdev.c b/blockdev.c
index d8fb3399f5..7881f6e5a6 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3245,7 +3245,7 @@ void qmp_block_job_dismiss(const char *id, Error **errp)
 job_dismiss_locked(, errp);
 }
 
-void qmp_block_job_change(BlockJobChangeOptions *opts, Error **errp)
+void qmp_block_job_change(JobChangeOptions *opts, Error **errp)
 {
 BlockJob *job;
 
diff --git a/blockjob.c b/blockjob.c
index d5f29e14af..8cfbb15543 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -312,7 +312,7 @@ static bool block_job_set_speed(BlockJob *job, int64_t 
speed, Error **errp)
 return block_job_set_speed_locked(job, speed, errp);
 }
 
-void block_job_change_locked(BlockJob *job, BlockJobChangeOptions *opts,
+void block_job_change_locked(BlockJob *job, JobChangeOptions *opts,
  Error **errp)
 {
 const BlockJobDriver *drv = block_job_driver(job);
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 7061ab7201..5dd1b08909 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -181,7 +181,7 @@ bool block_job_set_speed_locked(BlockJob *job, int64_t 
speed, Error **errp);
  *
  * Change the job according to opts.
  */
-void block_job_change_locked(BlockJob *job, BlockJobChangeOptions *opts,
+void block_job_change_locked(BlockJob *job, JobChangeOptions *opts,
  Error **errp);
 
 /**
diff --git a/include/block/blockjob_int.h b/include/block/blockjob_int.h
index 4c3d2e25a2..d9c3b911d0 100644
--- a/include/block/blockjob_int.h
+++ b/include/block/blockjob_int.h
@@ -73,7 +73,7 @@ struct BlockJobDriver {
  *
  * Note that this can already be called before the job coroutine is 
running.
  */
-void (*change)(BlockJob *job, BlockJobChangeOptions *opts, Error **errp);
+void (*change)(BlockJob *job, JobChangeOptions *opts, Error **errp);
 
 /*
  * Query information specific to this kind of block job.
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 1874f880a8..67dd0ef038 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3063,18 +3063,18 @@
   'allow-preconfig': true }
 
 ##
-# @BlockJobChangeOptionsMirror:
+# @JobChangeOptionsMirror:
 #
 # @copy-mode: Switch to this copy mode.  Currently, only the switch
 # from 'background' to 'write-blocking' is implemented.
 #
 # Since: 8.2
 ##
-{ 'struct': 'BlockJobChangeOptionsMirror',
+{ 'struct': 'JobChangeOptionsMirror',
   'data': { 'copy-mode' : 'MirrorCopyMode' } }
 
 ##
-# @BlockJobChangeOptions:
+# @JobChangeOptions:
 #
 # Block job options that can be changed after job creation.
 #
@@ -3084,10 +3084,10 @@
 #
 # Since: 8.2
 ##
-{ 'union': 'BlockJobChangeOptions',
+{ 'union': 'JobChangeOptions',
   'base': { 'id': 'str', 'type': 'JobType' },
   'discriminator': 'type',
-  'data': { 'mirror': 'BlockJobChangeOptionsMirror' } }
+  'data': { 'mirror': 'JobChangeOptionsMirror' } }
 
 ##
 # @block-job-change:
@@ -3097,7 +3097,7 @@
 # Since: 8.2
 ##
 { 'command': 'block-job-change',
-  'data': 'BlockJobChangeOptions', 'boxed': true }
+  'data': 'JobChangeOptions', 'boxed': true }
 
 ##
 # @BlockdevDiscardOptions:
-- 
2.34.1




[RFC 08/15] qapi: job-change: support speed parameter

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
Allow change job speed through job-change command. Old
block-job-set-speed would be deprecated soon.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/backup.c   |  8 ++
 block/commit.c   | 10 ++-
 block/mirror.c   | 60 
 block/stream.c   |  8 ++
 blockjob.c   | 13 +
 include/block/blockjob.h |  7 +
 include/qemu/job.h   |  2 +-
 qapi/block-core.json | 23 +--
 8 files changed, 103 insertions(+), 28 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index ec29d6b810..bf086dc5f9 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -336,6 +336,13 @@ static bool backup_cancel(Job *job, bool force)
 return true;
 }
 
+static bool backup_change(Job *job, JobChangeOptions *opts, Error **errp)
+{
+BlockJob *bjob = container_of(job, BlockJob, job);
+
+return block_job_change(bjob, >u.backup, errp);
+}
+
 static const BlockJobDriver backup_job_driver = {
 .job_driver = {
 .instance_size  = sizeof(BackupBlockJob),
@@ -348,6 +355,7 @@ static const BlockJobDriver backup_job_driver = {
 .clean  = backup_clean,
 .pause  = backup_pause,
 .cancel = backup_cancel,
+.change = backup_change,
 },
 .set_speed = backup_set_speed,
 };
diff --git a/block/commit.c b/block/commit.c
index 7c3fdcb0ca..ccb6097679 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -204,6 +204,13 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
 return 0;
 }
 
+static bool commit_change(Job *job, JobChangeOptions *opts, Error **errp)
+{
+BlockJob *bjob = container_of(job, BlockJob, job);
+
+return block_job_change(bjob, >u.commit, errp);
+}
+
 static const BlockJobDriver commit_job_driver = {
 .job_driver = {
 .instance_size = sizeof(CommitBlockJob),
@@ -213,7 +220,8 @@ static const BlockJobDriver commit_job_driver = {
 .run   = commit_run,
 .prepare   = commit_prepare,
 .abort = commit_abort,
-.clean = commit_clean
+.clean = commit_clean,
+.change= commit_change,
 },
 };
 
diff --git a/block/mirror.c b/block/mirror.c
index e670d0dd4f..f474b87079 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1251,41 +1251,45 @@ static bool commit_active_cancel(Job *job, bool force)
 return force || !job_is_ready(job);
 }
 
-static void mirror_change(Job *job, JobChangeOptions *opts, Error **errp)
+static bool mirror_change(Job *job, JobChangeOptions *opts, Error **errp)
 {
+BlockJob *bjob = container_of(job, BlockJob, job);
 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common.job);
 JobChangeOptionsMirror *change_opts = >u.mirror;
-MirrorCopyMode current;
-
-/*
- * The implementation relies on the fact that copy_mode is only written
- * under the BQL. Otherwise, further synchronization would be required.
- */
+MirrorCopyMode old_mode;
 
 GLOBAL_STATE_CODE();
 
-if (!change_opts->has_copy_mode) {
-/* Nothing to do */
-return;
-}
+if (change_opts->has_copy_mode) {
+old_mode = qatomic_read(>copy_mode);
 
-if (qatomic_read(>copy_mode) == change_opts->copy_mode) {
-return;
-}
+if (old_mode != change_opts->copy_mode) {
+if (change_opts->copy_mode != MIRROR_COPY_MODE_WRITE_BLOCKING) {
+error_setg(errp, "Change to copy mode '%s' is not implemented",
+   MirrorCopyMode_str(change_opts->copy_mode));
+return false;
+}
 
-if (change_opts->copy_mode != MIRROR_COPY_MODE_WRITE_BLOCKING) {
-error_setg(errp, "Change to copy mode '%s' is not implemented",
-   MirrorCopyMode_str(change_opts->copy_mode));
-return;
+if (old_mode != MIRROR_COPY_MODE_BACKGROUND) {
+error_setg(errp, "Expected current copy mode '%s', got '%s'",
+   MirrorCopyMode_str(MIRROR_COPY_MODE_BACKGROUND),
+   MirrorCopyMode_str(old_mode));
+return false;
+}
+}
 }
 
-current = qatomic_cmpxchg(>copy_mode, MIRROR_COPY_MODE_BACKGROUND,
-  change_opts->copy_mode);
-if (current != MIRROR_COPY_MODE_BACKGROUND) {
-error_setg(errp, "Expected current copy mode '%s', got '%s'",
-   MirrorCopyMode_str(MIRROR_COPY_MODE_BACKGROUND),
-   MirrorCopyMode_str(current));
+if (!block_job_change(bjob, qapi_JobChangeOptionsMirror_base(change_opts),
+  errp))
+{
+return false;
 }
+
+old_mode = qatomic_cmpxchg(>copy_mode, MIRROR_COPY_MODE_BACKGROUND,
+   change_opts->copy_mode);
+assert(old_mode == MIRROR_COPY_MODE_BACKGROUND);
+
+return 

[RFC 15/15] qapi/block-core: derpecate block-job- APIs

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
For change, pause, resume, complete, dismiss and finalize actions
corresponding job- and block-job commands are almost equal. The
difference is in find_block_job_locked() vs find_job_locked()
functions. What's different?

1. find_block_job_locked() do check, is found job a block-job. This OK
   when moving to more generic API, no needs to document this change.

2. find_block_job_locked() reports DeviceNotActive on failure, when
   find_job_locked() reports GenericError. So, lets document this
   difference in deprecated.txt. Still, for dismiss and finalize errors
   are not documented at all, so be silent in deprecated.txt as well.

For cancel, we have a new solution about soft-cancelling mirror:
job-complete(no-block-replace=true)

For set-speed, the action is supported by job-change.

For query, query-jobs is not equal to query-block-jobs, but now it has
enough information (see documentation changes for details).

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 docs/about/deprecated.rst | 73 +--
 qapi/block-core.json  | 59 ++-
 2 files changed, 129 insertions(+), 3 deletions(-)

diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst
index 5ff98ef95f..7db3ba983b 100644
--- a/docs/about/deprecated.rst
+++ b/docs/about/deprecated.rst
@@ -128,6 +128,75 @@ options are removed in favor of using explicit 
``blockdev-create`` and
 ``blockdev-add`` calls. See :doc:`/interop/live-block-operations` for
 details.
 
+``block-job-pause`` (since 9.1)
+'''
+
+Use ``job-pause`` instead. The only difference is that ``job-pause``
+always reports GenericError on failure when ``block-job-pause`` reports
+DeviceNotActive when block-job is not found.
+
+``block-job-resume`` (since 9.1)
+
+
+Use ``job-resume`` instead. The only difference is that ``job-resume``
+always reports GenericError on failure when ``block-job-resume`` reports
+DeviceNotActive when block-job is not found.
+
+``block-job-complete`` (since 9.1)
+''
+
+Use ``job-complete`` instead. The only difference is that ``job-complete``
+always reports GenericError on failure when ``block-job-complete`` reports
+DeviceNotActive when block-job is not found.
+
+``block-job-dismiss`` (since 9.1)
+'
+
+Use ``job-dismiss`` instead.
+
+``block-job-finalize`` (since 9.1)
+''
+
+Use ``job-finalize`` instead.
+
+``block-job-set-speed`` (since 9.1)
+'''
+
+Use ``job-change`` instead.
+
+``block-job-change`` (since 9.1)
+
+
+Use ``job-change`` instead.
+
+``block-job-cancel`` (since 9.1)
+
+
+Use ``job-cancel`` instead which correspond to
+``block-job-cancel`` (``force`` = true ).  For special case of
+soft-cancelling mirror in ready state, use ``job-complete``
+(``no-block-replace`` = true ).
+
+``query-block-jobs`` (since 9.1)
+
+Use ``query-jobs`` instead. Per-field recommendations:
+
+``query-block-jobs`` ``device`` field: use ``query-jobs`` ``id`` field.
+
+``query-block-jobs`` ``len`` and ``offset`` fields: use ``query-jobs``
+``current-progress`` and ``total-progress`` fields.
+
+``query-block-jobs`` ``paused``, ``ready``:
+use ``qeury-jobs`` ``status``.
+
+``auto-finalize``, ``auto-dismiss``: these are parameters set by user
+on job creation and never changed. So, no reason to query them. No
+support in ``query-jobs``.
+
+``busy``: it actually means nothing for user, it's a mistake to rely on
+it. No support in ``query-jobs``.
+
+
 Incorrectly typed ``device_add`` arguments (since 6.2)
 ''
 
@@ -143,8 +212,8 @@ all arguments passed to ``device_add`` are consistent with 
the documented
 property types.
 
 
-``block-job-change`` and ``job-change``  argument ``type`` (since 9.1)
-''
+``job-change`` argument ``type`` (since 9.1)
+
 
 QEMU can get job type from the job itself (by @id), @type field is redundant.
 
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 75c02e1586..793155f174 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1422,9 +1422,15 @@
 #
 # Returns: a list of @BlockJobInfo for each active block job
 #
+# Features:
+#
+# @deprecated: This command is deprecated.  Use @query-jobs
+# instead.
+#
 # Since: 1.1
 ##
 { 'command': 'query-block-jobs', 'returns': ['BlockJobInfo'],
+  'features': ['deprecated'],
   'allow-preconfig': true }
 
 ##
@@ -2875,6 +2881,11 @@
 # @speed: the maximum speed, in bytes per second, or 0 for unlimited.
 # Defaults to 0.
 #
+# Features:
+#
+# @deprecated: This command is deprecated.  Use @job-change
+# instead.
+#
 # Errors:
 # - If no background operation is active on this device,
 #   

[RFC 03/15] blockjob: block_job_change_locked(): check job type

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
User may specify wrong type for the job id. Let's check it.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 blockjob.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/blockjob.c b/blockjob.c
index 8cfbb15543..788cb1e07d 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -319,6 +319,12 @@ void block_job_change_locked(BlockJob *job, 
JobChangeOptions *opts,
 
 GLOBAL_STATE_CODE();
 
+if (job_type(>job) != opts->type) {
+error_setg(errp, "Job '%s' is '%s' job, not '%s'", job->job.id,
+   job_type_str(>job), JobType_str(opts->type));
+return;
+}
+
 if (job_apply_verb_locked(>job, JOB_VERB_CHANGE, errp)) {
 return;
 }
-- 
2.34.1




[RFC 00/15] block job API

2024-03-13 Thread Vladimir Sementsov-Ogievskiy
Hi all!

The series aims to reach feature parity between job-* and block-job-*
APIs and finally deprecate block-job-* APIs.

01: new type-based unions for QAPI

02-07: rework block-job-change and add similar job-change command

08: support set-speed in new API (as an option to job-change)

09: support soft-cancelling ready mirror job in new API (as an
option to job-complete)

10-14: prepare query-jobs to substitute query-block-jobs

15: finally, deprecate old APIs

RFC: the series are untested. I plan to move tests to using new APIs
instead of deprecated ones in v2.

Vladimir Sementsov-Ogievskiy (15):
  scripts/qapi: support type-based unions
  qapi: rename BlockJobChangeOptions to JobChangeOptions
  blockjob: block_job_change_locked(): check job type
  qapi: block-job-change: make copy-mode parameter optional
  qapi: JobChangeOptions: make type member optional and deprecated
  blockjob: move change action implementation to job from block-job
  qapi: add job-change
  qapi: job-change: support speed parameter
  qapi: job-complete: introduce no-block-replace option for mirror
  qapi: query-jobs: add information specific for job type
  job-qmp: job_query_single_locked: add assertion on job ret
  qapi: rename BlockDeviceIoStatus to IoStatus
  qapi: move IoStatus to common.json
  qapi: query-job: add block-job specific information
  qapi/block-core: derpecate block-job- APIs

 block/backup.c  |  14 ++
 block/block-backend.c   |  14 +-
 block/commit.c  |  16 ++-
 block/mirror.c  |  98 +-
 block/monitor/block-hmp-cmds.c  |   4 +-
 block/stream.c  |  14 ++
 blockdev.c  |   4 +-
 blockjob.c  |  39 +++---
 docs/about/deprecated.rst   |  75 +++
 include/block/blockjob.h|  26 ++--
 include/block/blockjob_int.h|   7 -
 include/qemu/job.h  |  25 +++-
 include/sysemu/block-backend-global-state.h |   2 +-
 job-qmp.c   |  55 +++-
 job.c   |  35 -
 qapi/block-core.json| 136 +++-
 qapi/common.json|  16 +++
 qapi/job.json   |  69 +-
 scripts/qapi/introspect.py  |   5 +-
 scripts/qapi/schema.py  |  50 ---
 scripts/qapi/types.py   |   3 +-
 scripts/qapi/visit.py   |  43 ++-
 stubs/meson.build   |   1 +
 stubs/qapi-jobchangeoptions-mapper.c|  13 ++
 tests/unit/test-bdrv-drain.c|   2 +-
 tests/unit/test-block-iothread.c|   2 +-
 tests/unit/test-blockjob.c  |   2 +-
 27 files changed, 616 insertions(+), 154 deletions(-)
 create mode 100644 stubs/qapi-jobchangeoptions-mapper.c

-- 
2.34.1




Re: [PATCH v2 2/6] virtio: Prevent creation of device using notification-data with ioeventfd

2024-03-13 Thread Jonah Palmer




On 3/13/24 10:35 AM, Eugenio Perez Martin wrote:

On Wed, Mar 13, 2024 at 12:55 PM Jonah Palmer  wrote:


Prevent the realization of a virtio device that attempts to use the
VIRTIO_F_NOTIFICATION_DATA transport feature without disabling
ioeventfd.

Due to ioeventfd not being able to carry the extra data associated with
this feature, having both enabled is a functional mismatch and therefore
Qemu should not continue the device's realization process.

Although the device does not yet know if the feature will be
successfully negotiated, many devices using this feature wont actually
work without this extra data and would fail FEATURES_OK anyway.

If ioeventfd is able to work with the extra notification data in the
future, this compatibility check can be removed.

Signed-off-by: Jonah Palmer 
---
  hw/virtio/virtio.c | 22 ++
  include/hw/virtio/virtio.h |  2 ++
  2 files changed, 24 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index bcb9e09df0..d0a433b465 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2971,6 +2971,20 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val)
  return ret;
  }

+void virtio_device_check_notification_compatibility(VirtIODevice *vdev,
+Error **errp)
+{
+VirtioBusState *bus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
+VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus);
+DeviceState *proxy = DEVICE(BUS(bus)->parent);
+
+if (virtio_host_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA) &&
+k->ioeventfd_enabled(proxy)) {
+error_setg(errp,
+   "notification_data=on without ioeventfd=off is not 
supported");
+}
+}
+
  size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
uint64_t host_features)
  {
@@ -3731,6 +3745,14 @@ static void virtio_device_realize(DeviceState *dev, 
Error **errp)
  }
  }

+/* Devices should not use both ioeventfd and notification data feature */
+virtio_device_check_notification_compatibility(vdev, );
+if (err != NULL) {
+error_propagate(errp, err);
+vdc->unrealize(dev);
+return;
+}
+
  virtio_bus_device_plugged(vdev, );
  if (err != NULL) {
  error_propagate(errp, err);
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 53915947a7..e0325d84d0 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -346,6 +346,8 @@ void virtio_queue_reset(VirtIODevice *vdev, uint32_t 
queue_index);
  void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index);
  void virtio_update_irq(VirtIODevice *vdev);
  int virtio_set_features(VirtIODevice *vdev, uint64_t val);
+void virtio_device_check_notification_compatibility(VirtIODevice *vdev,
+Error **errp);


Why not make it static?



Great question with no good answer! Will fix this.



  /* Base devices.  */
  typedef struct VirtIOBlkConf VirtIOBlkConf;
--
2.39.3







Re: [PATCH v2 2/6] virtio: Prevent creation of device using notification-data with ioeventfd

2024-03-13 Thread Eugenio Perez Martin
On Wed, Mar 13, 2024 at 12:55 PM Jonah Palmer  wrote:
>
> Prevent the realization of a virtio device that attempts to use the
> VIRTIO_F_NOTIFICATION_DATA transport feature without disabling
> ioeventfd.
>
> Due to ioeventfd not being able to carry the extra data associated with
> this feature, having both enabled is a functional mismatch and therefore
> Qemu should not continue the device's realization process.
>
> Although the device does not yet know if the feature will be
> successfully negotiated, many devices using this feature wont actually
> work without this extra data and would fail FEATURES_OK anyway.
>
> If ioeventfd is able to work with the extra notification data in the
> future, this compatibility check can be removed.
>
> Signed-off-by: Jonah Palmer 
> ---
>  hw/virtio/virtio.c | 22 ++
>  include/hw/virtio/virtio.h |  2 ++
>  2 files changed, 24 insertions(+)
>
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index bcb9e09df0..d0a433b465 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -2971,6 +2971,20 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t 
> val)
>  return ret;
>  }
>
> +void virtio_device_check_notification_compatibility(VirtIODevice *vdev,
> +Error **errp)
> +{
> +VirtioBusState *bus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
> +VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus);
> +DeviceState *proxy = DEVICE(BUS(bus)->parent);
> +
> +if (virtio_host_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA) &&
> +k->ioeventfd_enabled(proxy)) {
> +error_setg(errp,
> +   "notification_data=on without ioeventfd=off is not 
> supported");
> +}
> +}
> +
>  size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
>uint64_t host_features)
>  {
> @@ -3731,6 +3745,14 @@ static void virtio_device_realize(DeviceState *dev, 
> Error **errp)
>  }
>  }
>
> +/* Devices should not use both ioeventfd and notification data feature */
> +virtio_device_check_notification_compatibility(vdev, );
> +if (err != NULL) {
> +error_propagate(errp, err);
> +vdc->unrealize(dev);
> +return;
> +}
> +
>  virtio_bus_device_plugged(vdev, );
>  if (err != NULL) {
>  error_propagate(errp, err);
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index 53915947a7..e0325d84d0 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -346,6 +346,8 @@ void virtio_queue_reset(VirtIODevice *vdev, uint32_t 
> queue_index);
>  void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index);
>  void virtio_update_irq(VirtIODevice *vdev);
>  int virtio_set_features(VirtIODevice *vdev, uint64_t val);
> +void virtio_device_check_notification_compatibility(VirtIODevice *vdev,
> +Error **errp);

Why not make it static?

>
>  /* Base devices.  */
>  typedef struct VirtIOBlkConf VirtIOBlkConf;
> --
> 2.39.3
>




Re: [PULL v2 0/6] hw/nvme updates

2024-03-13 Thread Peter Maydell
On Tue, 12 Mar 2024 at 17:26, Klaus Jensen  wrote:
>
> From: Klaus Jensen 
>
> Hi,
>
> Sorry about the compilation error in v1. Did a full CI run for v2.
>
>   https://gitlab.com/birkelund/qemu/-/pipelines/1210559370
>
>
>
> The following changes since commit 8f3f329f5e0117bd1a23a79ab751f8a7d3471e4b:
>
>   Merge tag 'migration-20240311-pull-request' of 
> https://gitlab.com/peterx/qemu into staging (2024-03-12 11:35:41 +)
>
> are available in the Git repository at:
>
>   https://gitlab.com/birkelund/qemu.git tags/nvme-next-pull-request
>
> for you to fetch changes up to fa905f65c5549703279f68c253914799b10ada47:
>
>   hw/nvme: add machine compatibility parameter to enable msix exclusive bar 
> (2024-03-12 16:05:53 +0100)
>


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/9.0
for any user-visible changes.

-- PMM



[PATCH v2 6/6] virtio: Add VIRTIO_F_NOTIFICATION_DATA property definition

2024-03-13 Thread Jonah Palmer
Extend the virtio device property definitions to include the
VIRTIO_F_NOTIFICATION_DATA feature.

The default state of this feature is disabled, allowing it to be
explicitly enabled where it's supported.

Tested-by: Lei Yang 
Reviewed-by: Eugenio Pérez 
Signed-off-by: Jonah Palmer 
---
 include/hw/virtio/virtio.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index e0325d84d0..bc54c5e037 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -371,7 +371,9 @@ typedef struct VirtIORNGConf VirtIORNGConf;
 DEFINE_PROP_BIT64("packed", _state, _field, \
   VIRTIO_F_RING_PACKED, false), \
 DEFINE_PROP_BIT64("queue_reset", _state, _field, \
-  VIRTIO_F_RING_RESET, true)
+  VIRTIO_F_RING_RESET, true), \
+DEFINE_PROP_BIT64("notification_data", _state, _field, \
+  VIRTIO_F_NOTIFICATION_DATA, false)
 
 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n);
 bool virtio_queue_enabled_legacy(VirtIODevice *vdev, int n);
-- 
2.39.3




[PATCH v2 0/6] virtio,vhost: Add VIRTIO_F_NOTIFICATION_DATA support

2024-03-13 Thread Jonah Palmer
The goal of these patches are to add support to a variety of virtio and
vhost devices for the VIRTIO_F_NOTIFICATION_DATA transport feature. This
feature indicates that a driver will pass extra data (instead of just a
virtqueue's index) when notifying the corresponding device.

The data passed in by the driver when this feature is enabled varies in
format depending on if the device is using a split or packed virtqueue
layout:

 Split VQ
  - Upper 16 bits: shadow_avail_idx
  - Lower 16 bits: virtqueue index

 Packed VQ
  - Upper 16 bits: 1-bit wrap counter & 15-bit shadow_avail_idx
  - Lower 16 bits: virtqueue index

Also, due to the limitations of ioeventfd not being able to carry the
extra provided by the driver, having both VIRTIO_F_NOTIFICATION_DATA
feature and ioeventfd enabled is a functional mismatch. The user must
explicitly disable ioeventfd for the device in the Qemu arguments when
using this feature, else the device will fail to complete realization.

For example, a device must explicitly enable notification_data as well
as disable ioeventfd:

-device virtio-scsi-pci,...,ioeventfd=off,notification_data=on

A significant aspect of this effort has been to maintain compatibility
across different backends. As such, the feature is offered by backend
devices only when supported, with fallback mechanisms where backend
support is absent.

v2: Don't disable ioeventfd by default, user must disable it
Drop tags on patch 2/6

Jonah Palmer (6):
  virtio/virtio-pci: Handle extra notification data
  virtio: Prevent creation of device using notification-data with ioeventfd
  virtio-mmio: Handle extra notification data
  virtio-ccw: Handle extra notification data
  vhost/vhost-user: Add VIRTIO_F_NOTIFICATION_DATA to vhost feature bits
  virtio: Add VIRTIO_F_NOTIFICATION_DATA property definition

 hw/block/vhost-user-blk.c|  1 +
 hw/net/vhost_net.c   |  2 ++
 hw/s390x/s390-virtio-ccw.c   | 16 +++
 hw/scsi/vhost-scsi.c |  1 +
 hw/scsi/vhost-user-scsi.c|  1 +
 hw/virtio/vhost-user-fs.c|  2 +-
 hw/virtio/vhost-user-vsock.c |  1 +
 hw/virtio/virtio-mmio.c  |  9 ++--
 hw/virtio/virtio-pci.c   | 10 ++---
 hw/virtio/virtio.c   | 40 
 include/hw/virtio/virtio.h   |  7 ++-
 net/vhost-vdpa.c |  1 +
 12 files changed, 80 insertions(+), 11 deletions(-)

-- 
2.39.3




[PATCH v2 5/6] vhost/vhost-user: Add VIRTIO_F_NOTIFICATION_DATA to vhost feature bits

2024-03-13 Thread Jonah Palmer
Add support for the VIRTIO_F_NOTIFICATION_DATA feature across a variety
of vhost devices.

The inclusion of VIRTIO_F_NOTIFICATION_DATA in the feature bits arrays
for these devices ensures that the backend is capable of offering and
providing support for this feature, and that it can be disabled if the
backend does not support it.

Tested-by: Lei Yang 
Reviewed-by: Eugenio Pérez 
Signed-off-by: Jonah Palmer 
---
 hw/block/vhost-user-blk.c| 1 +
 hw/net/vhost_net.c   | 2 ++
 hw/scsi/vhost-scsi.c | 1 +
 hw/scsi/vhost-user-scsi.c| 1 +
 hw/virtio/vhost-user-fs.c| 2 +-
 hw/virtio/vhost-user-vsock.c | 1 +
 net/vhost-vdpa.c | 1 +
 7 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 6a856ad51a..983c0657da 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -51,6 +51,7 @@ static const int user_feature_bits[] = {
 VIRTIO_F_RING_PACKED,
 VIRTIO_F_IOMMU_PLATFORM,
 VIRTIO_F_RING_RESET,
+VIRTIO_F_NOTIFICATION_DATA,
 VHOST_INVALID_FEATURE_BIT
 };
 
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index e8e1661646..bb1f975b39 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -48,6 +48,7 @@ static const int kernel_feature_bits[] = {
 VIRTIO_F_IOMMU_PLATFORM,
 VIRTIO_F_RING_PACKED,
 VIRTIO_F_RING_RESET,
+VIRTIO_F_NOTIFICATION_DATA,
 VIRTIO_NET_F_HASH_REPORT,
 VHOST_INVALID_FEATURE_BIT
 };
@@ -55,6 +56,7 @@ static const int kernel_feature_bits[] = {
 /* Features supported by others. */
 static const int user_feature_bits[] = {
 VIRTIO_F_NOTIFY_ON_EMPTY,
+VIRTIO_F_NOTIFICATION_DATA,
 VIRTIO_RING_F_INDIRECT_DESC,
 VIRTIO_RING_F_EVENT_IDX,
 
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index ae26bc19a4..3d5fe0994d 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -38,6 +38,7 @@ static const int kernel_feature_bits[] = {
 VIRTIO_RING_F_EVENT_IDX,
 VIRTIO_SCSI_F_HOTPLUG,
 VIRTIO_F_RING_RESET,
+VIRTIO_F_NOTIFICATION_DATA,
 VHOST_INVALID_FEATURE_BIT
 };
 
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index a63b1f4948..0b050805a8 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -36,6 +36,7 @@ static const int user_feature_bits[] = {
 VIRTIO_RING_F_EVENT_IDX,
 VIRTIO_SCSI_F_HOTPLUG,
 VIRTIO_F_RING_RESET,
+VIRTIO_F_NOTIFICATION_DATA,
 VHOST_INVALID_FEATURE_BIT
 };
 
diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
index cca2cd41be..ae48cc1c96 100644
--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c
@@ -33,7 +33,7 @@ static const int user_feature_bits[] = {
 VIRTIO_F_RING_PACKED,
 VIRTIO_F_IOMMU_PLATFORM,
 VIRTIO_F_RING_RESET,
-
+VIRTIO_F_NOTIFICATION_DATA,
 VHOST_INVALID_FEATURE_BIT
 };
 
diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c
index 9431b9792c..802b44a07d 100644
--- a/hw/virtio/vhost-user-vsock.c
+++ b/hw/virtio/vhost-user-vsock.c
@@ -21,6 +21,7 @@ static const int user_feature_bits[] = {
 VIRTIO_RING_F_INDIRECT_DESC,
 VIRTIO_RING_F_EVENT_IDX,
 VIRTIO_F_NOTIFY_ON_EMPTY,
+VIRTIO_F_NOTIFICATION_DATA,
 VHOST_INVALID_FEATURE_BIT
 };
 
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 2a9ddb4552..5583ce5279 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -61,6 +61,7 @@ const int vdpa_feature_bits[] = {
 VIRTIO_F_RING_PACKED,
 VIRTIO_F_RING_RESET,
 VIRTIO_F_VERSION_1,
+VIRTIO_F_NOTIFICATION_DATA,
 VIRTIO_NET_F_CSUM,
 VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,
 VIRTIO_NET_F_CTRL_MAC_ADDR,
-- 
2.39.3




[PATCH v2 3/6] virtio-mmio: Handle extra notification data

2024-03-13 Thread Jonah Palmer
Add support to virtio-mmio devices for handling the extra data sent from
the driver to the device when the VIRTIO_F_NOTIFICATION_DATA transport
feature has been negotiated.

The extra data that's passed to the virtio-mmio device when this feature
is enabled varies depending on the device's virtqueue layout.

The data passed to the virtio-mmio device is in the same format as the
data passed to virtio-pci devices.

Tested-by: Lei Yang 
Acked-by: Eugenio Pérez 
Signed-off-by: Jonah Palmer 
---
 hw/virtio/virtio-mmio.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index 22f9fbcf5a..f99d5851a2 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -248,6 +248,7 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, 
uint64_t value,
 {
 VirtIOMMIOProxy *proxy = (VirtIOMMIOProxy *)opaque;
 VirtIODevice *vdev = virtio_bus_get_device(>bus);
+uint16_t vq_idx;
 
 trace_virtio_mmio_write_offset(offset, value);
 
@@ -407,8 +408,12 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, 
uint64_t value,
 }
 break;
 case VIRTIO_MMIO_QUEUE_NOTIFY:
-if (value < VIRTIO_QUEUE_MAX) {
-virtio_queue_notify(vdev, value);
+vq_idx = value;
+if (vq_idx < VIRTIO_QUEUE_MAX) {
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) {
+virtio_queue_set_shadow_avail_data(vdev, value);
+}
+virtio_queue_notify(vdev, vq_idx);
 }
 break;
 case VIRTIO_MMIO_INTERRUPT_ACK:
-- 
2.39.3




[PATCH v2 2/6] virtio: Prevent creation of device using notification-data with ioeventfd

2024-03-13 Thread Jonah Palmer
Prevent the realization of a virtio device that attempts to use the
VIRTIO_F_NOTIFICATION_DATA transport feature without disabling
ioeventfd.

Due to ioeventfd not being able to carry the extra data associated with
this feature, having both enabled is a functional mismatch and therefore
Qemu should not continue the device's realization process.

Although the device does not yet know if the feature will be
successfully negotiated, many devices using this feature wont actually
work without this extra data and would fail FEATURES_OK anyway.

If ioeventfd is able to work with the extra notification data in the
future, this compatibility check can be removed.

Signed-off-by: Jonah Palmer 
---
 hw/virtio/virtio.c | 22 ++
 include/hw/virtio/virtio.h |  2 ++
 2 files changed, 24 insertions(+)

diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index bcb9e09df0..d0a433b465 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2971,6 +2971,20 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val)
 return ret;
 }
 
+void virtio_device_check_notification_compatibility(VirtIODevice *vdev,
+Error **errp)
+{
+VirtioBusState *bus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
+VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus);
+DeviceState *proxy = DEVICE(BUS(bus)->parent);
+
+if (virtio_host_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA) &&
+k->ioeventfd_enabled(proxy)) {
+error_setg(errp,
+   "notification_data=on without ioeventfd=off is not 
supported");
+}
+}
+
 size_t virtio_get_config_size(const VirtIOConfigSizeParams *params,
   uint64_t host_features)
 {
@@ -3731,6 +3745,14 @@ static void virtio_device_realize(DeviceState *dev, 
Error **errp)
 }
 }
 
+/* Devices should not use both ioeventfd and notification data feature */
+virtio_device_check_notification_compatibility(vdev, );
+if (err != NULL) {
+error_propagate(errp, err);
+vdc->unrealize(dev);
+return;
+}
+
 virtio_bus_device_plugged(vdev, );
 if (err != NULL) {
 error_propagate(errp, err);
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 53915947a7..e0325d84d0 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -346,6 +346,8 @@ void virtio_queue_reset(VirtIODevice *vdev, uint32_t 
queue_index);
 void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index);
 void virtio_update_irq(VirtIODevice *vdev);
 int virtio_set_features(VirtIODevice *vdev, uint64_t val);
+void virtio_device_check_notification_compatibility(VirtIODevice *vdev,
+Error **errp);
 
 /* Base devices.  */
 typedef struct VirtIOBlkConf VirtIOBlkConf;
-- 
2.39.3




[PATCH v2 1/6] virtio/virtio-pci: Handle extra notification data

2024-03-13 Thread Jonah Palmer
Add support to virtio-pci devices for handling the extra data sent
from the driver to the device when the VIRTIO_F_NOTIFICATION_DATA
transport feature has been negotiated.

The extra data that's passed to the virtio-pci device when this
feature is enabled varies depending on the device's virtqueue
layout.

In a split virtqueue layout, this data includes:
 - upper 16 bits: shadow_avail_idx
 - lower 16 bits: virtqueue index

In a packed virtqueue layout, this data includes:
 - upper 16 bits: 1-bit wrap counter & 15-bit shadow_avail_idx
 - lower 16 bits: virtqueue index

Tested-by: Lei Yang 
Reviewed-by: Eugenio Pérez 
Signed-off-by: Jonah Palmer 
---
 hw/virtio/virtio-pci.c | 10 +++---
 hw/virtio/virtio.c | 18 ++
 include/hw/virtio/virtio.h |  1 +
 3 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index cb6940fc0e..0f5c3c3b2f 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -384,7 +384,7 @@ static void virtio_ioport_write(void *opaque, uint32_t 
addr, uint32_t val)
 {
 VirtIOPCIProxy *proxy = opaque;
 VirtIODevice *vdev = virtio_bus_get_device(>bus);
-uint16_t vector;
+uint16_t vector, vq_idx;
 hwaddr pa;
 
 switch (addr) {
@@ -408,8 +408,12 @@ static void virtio_ioport_write(void *opaque, uint32_t 
addr, uint32_t val)
 vdev->queue_sel = val;
 break;
 case VIRTIO_PCI_QUEUE_NOTIFY:
-if (val < VIRTIO_QUEUE_MAX) {
-virtio_queue_notify(vdev, val);
+vq_idx = val;
+if (vq_idx < VIRTIO_QUEUE_MAX) {
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) {
+virtio_queue_set_shadow_avail_data(vdev, val);
+}
+virtio_queue_notify(vdev, vq_idx);
 }
 break;
 case VIRTIO_PCI_STATUS:
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index d229755eae..bcb9e09df0 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -2255,6 +2255,24 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, 
int align)
 }
 }
 
+void virtio_queue_set_shadow_avail_data(VirtIODevice *vdev, uint32_t data)
+{
+/* Lower 16 bits is the virtqueue index */
+uint16_t i = data;
+VirtQueue *vq = >vq[i];
+
+if (!vq->vring.desc) {
+return;
+}
+
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) {
+vq->shadow_avail_wrap_counter = (data >> 31) & 0x1;
+vq->shadow_avail_idx = (data >> 16) & 0x7FFF;
+} else {
+vq->shadow_avail_idx = (data >> 16);
+}
+}
+
 static void virtio_queue_notify_vq(VirtQueue *vq)
 {
 if (vq->vring.desc && vq->handle_output) {
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index c8f72850bc..53915947a7 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -335,6 +335,7 @@ void virtio_queue_update_rings(VirtIODevice *vdev, int n);
 void virtio_init_region_cache(VirtIODevice *vdev, int n);
 void virtio_queue_set_align(VirtIODevice *vdev, int n, int align);
 void virtio_queue_notify(VirtIODevice *vdev, int n);
+void virtio_queue_set_shadow_avail_data(VirtIODevice *vdev, uint32_t data);
 uint16_t virtio_queue_vector(VirtIODevice *vdev, int n);
 void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector);
 int virtio_queue_set_host_notifier_mr(VirtIODevice *vdev, int n,
-- 
2.39.3




[PATCH v2 4/6] virtio-ccw: Handle extra notification data

2024-03-13 Thread Jonah Palmer
Add support to virtio-ccw devices for handling the extra data sent from
the driver to the device when the VIRTIO_F_NOTIFICATION_DATA transport
feature has been negotiated.

The extra data that's passed to the virtio-ccw device when this feature
is enabled varies depending on the device's virtqueue layout.

That data passed to the virtio-ccw device is in the same format as the
data passed to virtio-pci devices.

Tested-by: Lei Yang 
Acked-by: Eric Farman 
Acked-by: Thomas Huth 
Signed-off-by: Jonah Palmer 
---
 hw/s390x/s390-virtio-ccw.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index b1dcb3857f..7631e4aa41 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -140,9 +140,11 @@ static void subsystem_reset(void)
 static int virtio_ccw_hcall_notify(const uint64_t *args)
 {
 uint64_t subch_id = args[0];
-uint64_t queue = args[1];
+uint64_t data = args[1];
 SubchDev *sch;
+VirtIODevice *vdev;
 int cssid, ssid, schid, m;
+uint16_t vq_idx = data;
 
 if (ioinst_disassemble_sch_ident(subch_id, , , , )) {
 return -EINVAL;
@@ -151,12 +153,18 @@ static int virtio_ccw_hcall_notify(const uint64_t *args)
 if (!sch || !css_subch_visible(sch)) {
 return -EINVAL;
 }
-if (queue >= VIRTIO_QUEUE_MAX) {
+
+if (vq_idx >= VIRTIO_QUEUE_MAX) {
 return -EINVAL;
 }
-virtio_queue_notify(virtio_ccw_get_vdev(sch), queue);
-return 0;
 
+vdev = virtio_ccw_get_vdev(sch);
+if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) {
+virtio_queue_set_shadow_avail_data(vdev, data);
+}
+
+virtio_queue_notify(vdev, vq_idx);
+return 0;
 }
 
 static int virtio_ccw_hcall_early_printk(const uint64_t *args)
-- 
2.39.3




[PATCH v2] block: Use LVM tools for LV block device truncation

2024-03-13 Thread Alexander Ivanov
If a block device is an LVM logical volume we can resize it using
standard LVM tools.

Add a helper to detect if a device is a DM device. In raw_co_truncate()
check if the block device is DM and resize it executing lvresize.

Signed-off-by: Alexander Ivanov 
---
 block/file-posix.c | 61 ++
 1 file changed, 61 insertions(+)

diff --git a/block/file-posix.c b/block/file-posix.c
index 35684f7e21..5f07d98aa5 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2642,6 +2642,38 @@ raw_regular_truncate(BlockDriverState *bs, int fd, 
int64_t offset,
 return raw_thread_pool_submit(handle_aiocb_truncate, );
 }
 
+static bool device_is_dm(struct stat *st)
+{
+unsigned int maj, maj2;
+char line[32], devname[16];
+bool ret = false;
+FILE *f;
+
+if (!S_ISBLK(st->st_mode)) {
+return false;
+}
+
+f = fopen("/proc/devices", "r");
+if (!f) {
+return false;
+}
+
+maj = major(st->st_rdev);
+
+while (fgets(line, sizeof(line), f)) {
+if (sscanf(line, "%u %15s", , devname) != 2) {
+continue;
+}
+if (strcmp(devname, "device-mapper") == 0) {
+ret = (maj == maj2);
+break;
+}
+}
+
+fclose(f);
+return ret;
+}
+
 static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
 bool exact, PreallocMode prealloc,
 BdrvRequestFlags flags, Error **errp)
@@ -2670,6 +2702,35 @@ static int coroutine_fn raw_co_truncate(BlockDriverState 
*bs, int64_t offset,
 if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
 int64_t cur_length = raw_getlength(bs);
 
+/*
+ * Try to resize an LVM device using LVM tools.
+ */
+if (device_is_dm() && offset > 0) {
+int spawn_flags = G_SPAWN_SEARCH_PATH | G_SPAWN_STDOUT_TO_DEV_NULL;
+int status;
+bool success;
+char *err;
+GError *gerr = NULL;
+g_autofree char *size_str = g_strdup_printf("%ldB", offset);
+const char *cmd[] = {"lvresize", "-f", "-L",
+ size_str, bs->filename, NULL};
+
+success = g_spawn_sync(NULL, (gchar **)cmd, NULL, spawn_flags,
+   NULL, NULL, NULL, , , );
+
+if (success && WEXITSTATUS(status) == 0) {
+return 0;
+}
+
+if (!success) {
+error_setg(errp, "lvresize execution error: %s", 
gerr->message);
+} else {
+error_setg(errp, "%s", err);
+}
+
+return -EINVAL;
+}
+
 if (offset != cur_length && exact) {
 error_setg(errp, "Cannot resize device files");
 return -ENOTSUP;
-- 
2.40.1




Re: [PATCH for 9.0] migration: Skip only empty block devices

2024-03-13 Thread Kevin Wolf
Am 12.03.2024 um 13:27 hat Peter Xu geschrieben:
> On Tue, Mar 12, 2024 at 01:04:31PM +0100, Cédric Le Goater wrote:
> > The block .save_setup() handler calls a helper routine
> > init_blk_migration() which builds a list of block devices to take into
> > account for migration. When one device is found to be empty (sectors
> > == 0), the loop exits and all the remaining devices are ignored. This
> > is a regression introduced when bdrv_iterate() was removed.
> > 
> > Change that by skipping only empty devices.
> > 
> > Cc: Markus Armbruster 
> > Suggested: Kevin Wolf 
> 
> This should be:
> 
> Suggested-by: Kevin Wolf 
> 
> I think the missed "by" caused Kevin not in the cc list, I added Kevin in.
> 
> I'll hold a bit for Kevin to ACK, no repost needed just for above; I can
> fix it.

Thanks.

Reviewed-by: Kevin Wolf 




[PATCH v3] linux-aio: add IO_CMD_FDSYNC command support

2024-03-13 Thread Prasad Pandit
From: Prasad Pandit 

Libaio defines IO_CMD_FDSYNC command to sync all outstanding
asynchronous I/O operations, by flushing out file data to the
disk storage.

Enable linux-aio to submit such aio request. This helps to
reduce latency induced via pthread_create calls by
thread-pool (aio=threads).

Signed-off-by: Prasad Pandit 
---
 block/file-posix.c |  7 +++
 block/linux-aio.c  | 22 +-
 2 files changed, 28 insertions(+), 1 deletion(-)

v3: check if host kernel supports aio_fsync call
  -> https://lists.nongnu.org/archive/html/qemu-devel/2024-03/msg03210.html

diff --git a/block/file-posix.c b/block/file-posix.c
index 35684f7e21..a30494d1a8 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -2453,6 +2453,8 @@ static inline bool raw_check_linux_io_uring(BDRVRawState 
*s)
 #endif
 
 #ifdef CONFIG_LINUX_AIO
+extern bool laio_has_fdsync(int);
+
 static inline bool raw_check_linux_aio(BDRVRawState *s)
 {
 Error *local_err = NULL;
@@ -2599,6 +2601,11 @@ static int coroutine_fn 
raw_co_flush_to_disk(BlockDriverState *bs)
 if (raw_check_linux_io_uring(s)) {
 return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH);
 }
+#endif
+#ifdef CONFIG_LINUX_AIO
+if (raw_check_linux_aio(s) && laio_has_fdsync(s->fd)) {
+return laio_co_submit(s->fd, 0, NULL, QEMU_AIO_FLUSH, 0);
+}
 #endif
 return raw_thread_pool_submit(handle_aiocb_flush, );
 }
diff --git a/block/linux-aio.c b/block/linux-aio.c
index ec05d946f3..ed20a503e9 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -67,6 +67,7 @@ struct LinuxAioState {
 int event_max;
 };
 
+bool laio_has_fdsync(int);
 static void ioq_submit(LinuxAioState *s);
 
 static inline ssize_t io_event_ret(struct io_event *ev)
@@ -384,6 +385,9 @@ static int laio_do_submit(int fd, struct qemu_laiocb 
*laiocb, off_t offset,
 case QEMU_AIO_READ:
 io_prep_preadv(iocbs, fd, qiov->iov, qiov->niov, offset);
 break;
+case QEMU_AIO_FLUSH:
+io_prep_fdsync(iocbs, fd);
+break;
 /* Currently Linux kernel does not support other operations */
 default:
 fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
@@ -412,7 +416,7 @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, 
QEMUIOVector *qiov,
 AioContext *ctx = qemu_get_current_aio_context();
 struct qemu_laiocb laiocb = {
 .co = qemu_coroutine_self(),
-.nbytes = qiov->size,
+.nbytes = qiov ? qiov->size : 0,
 .ctx= aio_get_linux_aio(ctx),
 .ret= -EINPROGRESS,
 .is_read= (type == QEMU_AIO_READ),
@@ -486,3 +490,19 @@ void laio_cleanup(LinuxAioState *s)
 }
 g_free(s);
 }
+
+bool laio_has_fdsync(int fd)
+{
+struct iocb cb;
+struct iocb *cbs[] = {, NULL};
+
+io_context_t ctx = 0;
+io_setup(1, );
+
+/* check if host kernel supports IO_CMD_FDSYNC */
+io_prep_fdsync(, fd);
+int ret = io_submit(ctx, 1, cbs);
+
+io_destroy(ctx);
+return (ret == -EINVAL) ? false : true;
+}
-- 
2.44.0




Re: [PATCH for 9.0] migration: Skip only empty block devices

2024-03-13 Thread Markus Armbruster
Peter Xu  writes:

> On Tue, Mar 12, 2024 at 05:34:26PM -0400, Stefan Hajnoczi wrote:
>> I understand now. I missed that returning from init_blk_migration_it()
>> did not abort iteration. Thank you!
>
> I queued it, thanks both!

Thanks for cleaning up the mess I made!