date:20230309

[PATCH v2] block: replace TABs with space

2023-03-09 Thread Yeqi Fu

Bring the block files in line with the QEMU coding style, with spaces
for indentation.
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/371

Signed-off-by: Yeqi Fu 
---
 block/bochs.c   | 14 +++
 block/file-posix.c  | 92 ++---
 block/file-win32.c  | 53 +-
 block/parallels.c   | 28 +++---
 block/qcow.c| 44 +++---
 include/block/nbd.h |  2 +-
 6 files changed, 116 insertions(+), 117 deletions(-)

diff --git a/block/bochs.c b/block/bochs.c
index 2f5ae52c90..85d06f3315 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -293,15 +293,15 @@ static void bochs_close(BlockDriverState *bs)
 }
 
 static BlockDriver bdrv_bochs = {
-.format_name   = "bochs",
-.instance_size = sizeof(BDRVBochsState),
-.bdrv_probe= bochs_probe,
-.bdrv_open = bochs_open,
-.bdrv_child_perm = bdrv_default_perms,
+.format_name = "bochs",
+.instance_size = sizeof(BDRVBochsState),
+.bdrv_probe = bochs_probe,
+.bdrv_open = bochs_open,
+.bdrv_child_perm = bdrv_default_perms,
 .bdrv_refresh_limits = bochs_refresh_limits,
 .bdrv_co_preadv = bochs_co_preadv,
-.bdrv_close= bochs_close,
-.is_format  = true,
+.bdrv_close = bochs_close,
+.is_format = true,
 };
 
 static void bdrv_bochs_init(void)
diff --git a/block/file-posix.c b/block/file-posix.c
index 5760cf22d1..18159b6d83 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -124,7 +124,7 @@
 #define FTYPE_FILE   0
 #define FTYPE_CD 1
 
-#define MAX_BLOCKSIZE  4096
+#define MAX_BLOCKSIZE   4096
 
 /* Posix file locking bytes. Libvirt takes byte 0, we start from higher bytes,
  * leaving a few more bytes for its future use. */
@@ -3819,42 +3819,42 @@ static void coroutine_fn 
cdrom_co_lock_medium(BlockDriverState *bs, bool locked)
 }
 
 static BlockDriver bdrv_host_cdrom = {
-.format_name= "host_cdrom",
-.protocol_name  = "host_cdrom",
-.instance_size  = sizeof(BDRVRawState),
+.format_name = "host_cdrom",
+.protocol_name = "host_cdrom",
+.instance_size = sizeof(BDRVRawState),
 .bdrv_needs_filename = true,
-.bdrv_probe_device = cdrom_probe_device,
+.bdrv_probe_device = cdrom_probe_device,
 .bdrv_parse_filename = cdrom_parse_filename,
-.bdrv_file_open = cdrom_open,
-.bdrv_close = raw_close,
+.bdrv_file_open = cdrom_open,
+.bdrv_close = raw_close,
 .bdrv_reopen_prepare = raw_reopen_prepare,
-.bdrv_reopen_commit  = raw_reopen_commit,
-.bdrv_reopen_abort   = raw_reopen_abort,
+.bdrv_reopen_commit = raw_reopen_commit,
+.bdrv_reopen_abort = raw_reopen_abort,
 .bdrv_co_create_opts = bdrv_co_create_opts_simple,
-.create_opts = _create_opts_simple,
-.mutable_opts= mutable_opts,
+.create_opts = _create_opts_simple,
+.mutable_opts = mutable_opts,
 .bdrv_co_invalidate_cache = raw_co_invalidate_cache,
 
-.bdrv_co_preadv = raw_co_preadv,
-.bdrv_co_pwritev= raw_co_pwritev,
-.bdrv_co_flush_to_disk  = raw_co_flush_to_disk,
+.bdrv_co_preadv = raw_co_preadv,
+.bdrv_co_pwritev = raw_co_pwritev,
+.bdrv_co_flush_to_disk = raw_co_flush_to_disk,
 .bdrv_refresh_limits = raw_refresh_limits,
-.bdrv_co_io_plug= raw_co_io_plug,
-.bdrv_co_io_unplug  = raw_co_io_unplug,
+.bdrv_co_io_plug = raw_co_io_plug,
+.bdrv_co_io_unplug = raw_co_io_unplug,
 .bdrv_attach_aio_context = raw_aio_attach_aio_context,
 
-.bdrv_co_truncate   = raw_co_truncate,
-.bdrv_co_getlength  = raw_co_getlength,
-.has_variable_length= true,
-.bdrv_co_get_allocated_file_size= raw_co_get_allocated_file_size,
+.bdrv_co_truncate = raw_co_truncate,
+.bdrv_co_getlength = raw_co_getlength,
+.has_variable_length = true,
+.bdrv_co_get_allocated_file_size = raw_co_get_allocated_file_size,
 
 /* removable device support */
-.bdrv_co_is_inserted= cdrom_co_is_inserted,
-.bdrv_co_eject  = cdrom_co_eject,
-.bdrv_co_lock_medium= cdrom_co_lock_medium,
+.bdrv_co_is_inserted = cdrom_co_is_inserted,
+.bdrv_co_eject = cdrom_co_eject,
+.bdrv_co_lock_medium = cdrom_co_lock_medium,
 
 /* generic scsi device */
-.bdrv_co_ioctl  = hdev_co_ioctl,
+.bdrv_co_ioctl = hdev_co_ioctl,
 };
 #endif /* __linux__ */
 
@@ -3949,38 +3949,38 @@ static void coroutine_fn 
cdrom_co_lock_medium(BlockDriverState *bs, bool locked)
 }
 
 static BlockDriver bdrv_host_cdrom = {
-.format_name= "host_cdrom",
-.protocol_name  = "host_cdrom",
-.instance_size  = sizeof(BDRVRawState),
+.format_name = "host_cdrom",
+.protocol_name = "host_cdrom",
+.instance_size = sizeof(BDRVRawState),
 .bdrv_needs_filename = true,
-.bdrv_probe_device = cdrom_probe_device,
+

[PATCH v2 5/6] hmp: convert handle_hmp_command() to AIO_WAIT_WHILE_UNLOCKED()

2023-03-09 Thread Stefan Hajnoczi

The HMP monitor runs in the main loop thread. Calling
AIO_WAIT_WHILE(qemu_get_aio_context(), ...) from the main loop thread is
equivalent to AIO_WAIT_WHILE_UNLOCKED(NULL, ...) because neither unlocks
the AioContext and the latter's assertion that we're in the main loop
succeeds.

Reviewed-by: Philippe Mathieu-Daudé 
Tested-by: Philippe Mathieu-Daudé 
Reviewed-by: Markus Armbruster 
Reviewed-by: Kevin Wolf 
Signed-off-by: Stefan Hajnoczi 
---
 monitor/hmp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/monitor/hmp.c b/monitor/hmp.c
index fee410362f..5cab56d355 100644
--- a/monitor/hmp.c
+++ b/monitor/hmp.c
@@ -1167,7 +1167,7 @@ void handle_hmp_command(MonitorHMP *mon, const char 
*cmdline)
 Coroutine *co = qemu_coroutine_create(handle_hmp_command_co, );
 monitor_set_cur(co, >common);
 aio_co_enter(qemu_get_aio_context(), co);
-AIO_WAIT_WHILE(qemu_get_aio_context(), !data.done);
+AIO_WAIT_WHILE_UNLOCKED(NULL, !data.done);
 }
 
 qobject_unref(qdict);
-- 
2.39.2

[PATCH v2 4/6] block: convert bdrv_drain_all_begin() to AIO_WAIT_WHILE_UNLOCKED()

2023-03-09 Thread Stefan Hajnoczi

Since the AioContext argument was already NULL, AIO_WAIT_WHILE() was
never going to unlock the AioContext. Therefore it is possible to
replace AIO_WAIT_WHILE() with AIO_WAIT_WHILE_UNLOCKED().

Reviewed-by: Philippe Mathieu-Daudé 
Tested-by: Philippe Mathieu-Daudé 
Reviewed-by: Kevin Wolf 
Signed-off-by: Stefan Hajnoczi 
---
 block/io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/io.c b/block/io.c
index 8974d46941..db438c7657 100644
--- a/block/io.c
+++ b/block/io.c
@@ -520,7 +520,7 @@ void bdrv_drain_all_begin(void)
 bdrv_drain_all_begin_nopoll();
 
 /* Now poll the in-flight requests */
-AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
+AIO_WAIT_WHILE_UNLOCKED(NULL, bdrv_drain_all_poll());
 
 while ((bs = bdrv_next_all_states(bs))) {
 bdrv_drain_assert_idle(bs);
-- 
2.39.2

[PATCH v2 6/6] monitor: convert monitor_cleanup() to AIO_WAIT_WHILE_UNLOCKED()

2023-03-09 Thread Stefan Hajnoczi

monitor_cleanup() is called from the main loop thread. Calling
AIO_WAIT_WHILE(qemu_get_aio_context(), ...) from the main loop thread is
equivalent to AIO_WAIT_WHILE_UNLOCKED(NULL, ...) because neither unlocks
the AioContext and the latter's assertion that we're in the main loop
succeeds.

Reviewed-by: Philippe Mathieu-Daudé 
Tested-by: Philippe Mathieu-Daudé 
Reviewed-by: Markus Armbruster 
Reviewed-by: Kevin Wolf 
Signed-off-by: Stefan Hajnoczi 
---
 monitor/monitor.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/monitor/monitor.c b/monitor/monitor.c
index 8dc96f6af9..602535696c 100644
--- a/monitor/monitor.c
+++ b/monitor/monitor.c
@@ -666,7 +666,7 @@ void monitor_cleanup(void)
  * We need to poll both qemu_aio_context and iohandler_ctx to make
  * sure that the dispatcher coroutine keeps making progress and
  * eventually terminates.  qemu_aio_context is automatically
- * polled by calling AIO_WAIT_WHILE on it, but we must poll
+ * polled by calling AIO_WAIT_WHILE_UNLOCKED on it, but we must poll
  * iohandler_ctx manually.
  *
  * Letting the iothread continue while shutting down the dispatcher
@@ -679,7 +679,7 @@ void monitor_cleanup(void)
 aio_co_wake(qmp_dispatcher_co);
 }
 
-AIO_WAIT_WHILE(qemu_get_aio_context(),
+AIO_WAIT_WHILE_UNLOCKED(NULL,
(aio_poll(iohandler_get_aio_context(), false),
 qatomic_mb_read(_dispatcher_co_busy)));
 
-- 
2.39.2

[PATCH v2 1/6] block: don't acquire AioContext lock in bdrv_drain_all()

2023-03-09 Thread Stefan Hajnoczi

There is no need for the AioContext lock in bdrv_drain_all() because
nothing in AIO_WAIT_WHILE() needs the lock and the condition is atomic.

AIO_WAIT_WHILE_UNLOCKED() has no use for the AioContext parameter other
than performing a check that is nowadays already done by the
GLOBAL_STATE_CODE()/IO_CODE() macros. Set the ctx argument to NULL here
to help us keep track of all converted callers. Eventually all callers
will have been converted and then the argument can be dropped entirely.

Reviewed-by: Kevin Wolf 
Signed-off-by: Stefan Hajnoczi 
---
 block/block-backend.c | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 278b04ce69..d2b6b3652d 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1835,14 +1835,8 @@ void blk_drain_all(void)
 bdrv_drain_all_begin();
 
 while ((blk = blk_all_next(blk)) != NULL) {
-AioContext *ctx = blk_get_aio_context(blk);
-
-aio_context_acquire(ctx);
-
 /* We may have -ENOMEDIUM completions in flight */
-AIO_WAIT_WHILE(ctx, qatomic_mb_read(>in_flight) > 0);
-
-aio_context_release(ctx);
+AIO_WAIT_WHILE_UNLOCKED(NULL, qatomic_mb_read(>in_flight) > 0);
 }
 
 bdrv_drain_all_end();
-- 
2.39.2

[PATCH v2 0/6] block: switch to AIO_WAIT_WHILE_UNLOCKED() where possible

2023-03-09 Thread Stefan Hajnoczi

v2:
- Clarify NULL ctx argument in Patch 1 commit description [Kevin]

AIO_WAIT_WHILE_UNLOCKED() is the future replacement for AIO_WAIT_WHILE(). Most
callers haven't been converted yet because they rely on the AioContext lock. I
looked through the code and found the easy cases that can be converted today.

Stefan Hajnoczi (6):
  block: don't acquire AioContext lock in bdrv_drain_all()
  block: convert blk_exp_close_all_type() to AIO_WAIT_WHILE_UNLOCKED()
  block: convert bdrv_graph_wrlock() to AIO_WAIT_WHILE_UNLOCKED()
  block: convert bdrv_drain_all_begin() to AIO_WAIT_WHILE_UNLOCKED()
  hmp: convert handle_hmp_command() to AIO_WAIT_WHILE_UNLOCKED()
  monitor: convert monitor_cleanup() to AIO_WAIT_WHILE_UNLOCKED()

 block/block-backend.c | 8 +---
 block/export/export.c | 2 +-
 block/graph-lock.c| 2 +-
 block/io.c| 2 +-
 monitor/hmp.c | 2 +-
 monitor/monitor.c | 4 ++--
 6 files changed, 7 insertions(+), 13 deletions(-)

-- 
2.39.2

[PATCH v2 2/6] block: convert blk_exp_close_all_type() to AIO_WAIT_WHILE_UNLOCKED()

2023-03-09 Thread Stefan Hajnoczi

There is no change in behavior. Switch to AIO_WAIT_WHILE_UNLOCKED()
instead of AIO_WAIT_WHILE() to document that this code has already been
audited and converted. The AioContext argument is already NULL so
aio_context_release() is never called anyway.

Reviewed-by: Philippe Mathieu-Daudé 
Tested-by: Philippe Mathieu-Daudé 
Reviewed-by: Kevin Wolf 
Signed-off-by: Stefan Hajnoczi 
---
 block/export/export.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/export/export.c b/block/export/export.c
index 28a91c9c42..e3fee60611 100644
--- a/block/export/export.c
+++ b/block/export/export.c
@@ -306,7 +306,7 @@ void blk_exp_close_all_type(BlockExportType type)
 blk_exp_request_shutdown(exp);
 }
 
-AIO_WAIT_WHILE(NULL, blk_exp_has_type(type));
+AIO_WAIT_WHILE_UNLOCKED(NULL, blk_exp_has_type(type));
 }
 
 void blk_exp_close_all(void)
-- 
2.39.2

[PATCH v2 3/6] block: convert bdrv_graph_wrlock() to AIO_WAIT_WHILE_UNLOCKED()

2023-03-09 Thread Stefan Hajnoczi

The following conversion is safe and does not change behavior:

 GLOBAL_STATE_CODE();
 ...
  -  AIO_WAIT_WHILE(qemu_get_aio_context(), ...);
  +  AIO_WAIT_WHILE_UNLOCKED(NULL, ...);

Since we're in GLOBAL_STATE_CODE(), qemu_get_aio_context() is our home
thread's AioContext. Thus AIO_WAIT_WHILE() does not unlock the
AioContext:

  if (ctx_ && in_aio_context_home_thread(ctx_)) {\
  while ((cond)) {   \
  aio_poll(ctx_, true);  \
  waited_ = true;\
  }  \

And that means AIO_WAIT_WHILE_UNLOCKED(NULL, ...) can be substituted.

Reviewed-by: Philippe Mathieu-Daudé 
Tested-by: Philippe Mathieu-Daudé 
Reviewed-by: Kevin Wolf 
Signed-off-by: Stefan Hajnoczi 
---
 block/graph-lock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/graph-lock.c b/block/graph-lock.c
index 454c31e691..639526608f 100644
--- a/block/graph-lock.c
+++ b/block/graph-lock.c
@@ -127,7 +127,7 @@ void bdrv_graph_wrlock(void)
  * reader lock.
  */
 qatomic_set(_writer, 0);
-AIO_WAIT_WHILE(qemu_get_aio_context(), reader_count() >= 1);
+AIO_WAIT_WHILE_UNLOCKED(NULL, reader_count() >= 1);
 qatomic_set(_writer, 1);
 
 /*
-- 
2.39.2

Re: [PATCH] block: replace TABs with space

2023-03-09 Thread Eric Blake

On Fri, Mar 10, 2023 at 12:10:08AM +0800, Yeqi Fu wrote:
> Bring the block files in line with the QEMU coding style, with spaces
> for indentation.
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/371
> 
> Signed-off-by: Yeqi Fu 
> ---
>  block/bochs.c   | 12 +--
>  block/file-posix.c  | 52 ++---
>  block/file-win32.c  | 38 -
>  block/parallels.c   | 10 -
>  block/qcow.c| 10 -
>  include/block/nbd.h |  2 +-
>  6 files changed, 62 insertions(+), 62 deletions(-)
> 
> diff --git a/block/bochs.c b/block/bochs.c
> index 2f5ae52c90..8ff38ac0d9 100644
> --- a/block/bochs.c
> +++ b/block/bochs.c
> @@ -293,15 +293,15 @@ static void bochs_close(BlockDriverState *bs)
>  }
>  
>  static BlockDriver bdrv_bochs = {
> -.format_name = "bochs",
> -.instance_size   = sizeof(BDRVBochsState),
> -.bdrv_probe  = bochs_probe,
> -.bdrv_open   = bochs_open,
> +.format_name= "bochs",
> +.instance_size  = sizeof(BDRVBochsState),
> +.bdrv_probe = bochs_probe,
> +.bdrv_open  = bochs_open,
>  .bdrv_child_perm = bdrv_default_perms,
>  .bdrv_refresh_limits = bochs_refresh_limits,

Hmm. When shown in the diff, it looks like you are changing the
alignment of the .bdrv_probe line; but in reality, the extra prefix
from diff changes which tabstop the '=' goes out to, so you are
preserving the original column.  Still, it looks really awkward that
we have one alignment for .format_name through .bdrv_open, and another
alignment for .bdrv_child_perm and .bdrv_refresh_limits.

My personal preference: '=' alignment is a lost cause.  It causes
pointless churn if we later add a field with a longer name (all other
lines have to reindent to match the new length).  I'd prefer exactly
one space before '=' on all lines that you touch (and maybe touch a
few more lines in the process).  But if we DO stick with alignment,
I'd much rather that ALL '=' be in the same column, rather than the
pre-existing half-and-half mix, even though your patch kept that
visually undisturbed (other than when diff injects a prefix that
rejiggers the tab stops).

I wouldn't mind a v2 if we can reach consensus on whether to
consistently align '=' or always use just one space; but I don't know
if that consensus will be easy, so I can also live with:

Reviewed-by: Eric Blake 

even if we end up using v1 as-is.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH] qed: remove spurious BDRV_POLL_WHILE()

2023-03-09 Thread Kevin Wolf

Am 09.03.2023 um 17:31 hat Stefan Hajnoczi geschrieben:
> This looks like a copy-paste or merge error. BDRV_POLL_WHILE() is
> already called above. It's not needed in the qemu_in_coroutine() case.
> 
> Fixes: 9fb4dfc570ce ("qed: make bdrv_qed_do_open a coroutine_fn")
> Signed-off-by: Stefan Hajnoczi 

Thanks, applied to the block branch.

Kevin

Re: [PATCH for-8.0] ide: Fix manual in-flight count for TRIM BH

2023-03-09 Thread Kevin Wolf

Am 09.03.2023 um 14:59 hat Paolo Bonzini geschrieben:
> On 3/9/23 13:31, Hanna Czenczek wrote:
> > On 09.03.23 13:08, Paolo Bonzini wrote:
> > > On Thu, Mar 9, 2023 at 1:05 PM Paolo Bonzini  wrote:
> > > > I think having to do this is problematic, because the blk_drain should
> > > > leave no pending operation.
> > > > 
> > > > Here it seems okay because you do it in a controlled situation, but the
> > > > main thread can also issue blk_drain(), or worse bdrv_drained_begin(),
> > > > and there would be pending I/O operations when it returns.
> > 
> > Not really.  We would stop in the middle of a trim that processes a list
> > of discard requests.  So I see it more like stopping in the middle of
> > anything that processes guest requests.  Once drain ends, we continue
> > processing them, and that’s not exactly pending I/O.
> > 
> > There is a pending object in s->bus->dma->aiocb on the IDE side, so
> > there is a pending DMA operation, but naïvely, I don’t see that as a
> > problem.
> 
> What about the bdrv_drain_all() when a VM stops, would the guest continue to
> access memory and disks after bdrv_drain() return?

That one shouldn't be a problem because the devices are stopped before
the backends.

> Migration could also be a problem, because the partial TRIM would not be
> recorded in the s->bus->error_status field of IDEState (no surprise there,
> it's not an error).  Also, errors happening after bdrv_drain() might not be
> migrated correctly.

Yes, I think it would be good to have the I/O operation fully completed
on the IDE level rather than just in the block layer.

> > Or the issue is generally that IDE uses dma_* functions, which might
> > cause I/O functions to be run from new BHs (I guess through
> > reschedule_dma()?).

None of those complicated scenarios actually. The problem solved by the
request queuing is simply that nothing else stops the guest from
submitting new requests to drained nodes if the CPUs are still running.

Drain uses aio_disable_external() to disable processing of external
events, in particular the ioeventfd used by virtio-blk and virtio-scsi.
But IDE submits requests through MMIO or port I/O, i.e. the vcpu thread
exits to userspace and calls directly into the IDE code, so it's
completely unaffected by aio_disable_external().

> Ah, you mean that you can have pending I/O operations while blk->in_flight
> is zero?  That would be a problem indeed.  We already have BlockDevOps for
> ide-cd and ide-hd, should we add a .drained_poll callback there?

To be more precise, you suggested in the call that .drained_poll should
return that IDE is still busy while aiocb != NULL. Without having looked
at the code in detail yet, that seems to make sense to me. And maybe
even the blk_inc/dec_in_flight() pair can then go completely away
because IDE takes care of its drain state itself then.

Kevin

[PATCH] block: replace TABs with space

2023-03-09 Thread Yeqi Fu

Bring the block files in line with the QEMU coding style, with spaces
for indentation.
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/371

Signed-off-by: Yeqi Fu 
---
 block/bochs.c   | 12 +--
 block/file-posix.c  | 52 ++---
 block/file-win32.c  | 38 -
 block/parallels.c   | 10 -
 block/qcow.c| 10 -
 include/block/nbd.h |  2 +-
 6 files changed, 62 insertions(+), 62 deletions(-)

diff --git a/block/bochs.c b/block/bochs.c
index 2f5ae52c90..8ff38ac0d9 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -293,15 +293,15 @@ static void bochs_close(BlockDriverState *bs)
 }
 
 static BlockDriver bdrv_bochs = {
-.format_name   = "bochs",
-.instance_size = sizeof(BDRVBochsState),
-.bdrv_probe= bochs_probe,
-.bdrv_open = bochs_open,
+.format_name= "bochs",
+.instance_size  = sizeof(BDRVBochsState),
+.bdrv_probe = bochs_probe,
+.bdrv_open  = bochs_open,
 .bdrv_child_perm = bdrv_default_perms,
 .bdrv_refresh_limits = bochs_refresh_limits,
 .bdrv_co_preadv = bochs_co_preadv,
-.bdrv_close= bochs_close,
-.is_format  = true,
+.bdrv_close = bochs_close,
+.is_format  = true,
 };
 
 static void bdrv_bochs_init(void)
diff --git a/block/file-posix.c b/block/file-posix.c
index 5760cf22d1..9ba1600a77 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -124,7 +124,7 @@
 #define FTYPE_FILE   0
 #define FTYPE_CD 1
 
-#define MAX_BLOCKSIZE  4096
+#define MAX_BLOCKSIZE   4096
 
 /* Posix file locking bytes. Libvirt takes byte 0, we start from higher bytes,
  * leaving a few more bytes for its future use. */
@@ -3819,14 +3819,14 @@ static void coroutine_fn 
cdrom_co_lock_medium(BlockDriverState *bs, bool locked)
 }
 
 static BlockDriver bdrv_host_cdrom = {
-.format_name= "host_cdrom",
-.protocol_name  = "host_cdrom",
-.instance_size  = sizeof(BDRVRawState),
+.format_name = "host_cdrom",
+.protocol_name   = "host_cdrom",
+.instance_size   = sizeof(BDRVRawState),
 .bdrv_needs_filename = true,
-.bdrv_probe_device = cdrom_probe_device,
+.bdrv_probe_device   = cdrom_probe_device,
 .bdrv_parse_filename = cdrom_parse_filename,
-.bdrv_file_open = cdrom_open,
-.bdrv_close = raw_close,
+.bdrv_file_open  = cdrom_open,
+.bdrv_close  = raw_close,
 .bdrv_reopen_prepare = raw_reopen_prepare,
 .bdrv_reopen_commit  = raw_reopen_commit,
 .bdrv_reopen_abort   = raw_reopen_abort,
@@ -3835,12 +3835,12 @@ static BlockDriver bdrv_host_cdrom = {
 .mutable_opts= mutable_opts,
 .bdrv_co_invalidate_cache = raw_co_invalidate_cache,
 
-.bdrv_co_preadv = raw_co_preadv,
-.bdrv_co_pwritev= raw_co_pwritev,
-.bdrv_co_flush_to_disk  = raw_co_flush_to_disk,
-.bdrv_refresh_limits = raw_refresh_limits,
-.bdrv_co_io_plug= raw_co_io_plug,
-.bdrv_co_io_unplug  = raw_co_io_unplug,
+.bdrv_co_preadv  = raw_co_preadv,
+.bdrv_co_pwritev = raw_co_pwritev,
+.bdrv_co_flush_to_disk   = raw_co_flush_to_disk,
+.bdrv_refresh_limits = raw_refresh_limits,
+.bdrv_co_io_plug = raw_co_io_plug,
+.bdrv_co_io_unplug   = raw_co_io_unplug,
 .bdrv_attach_aio_context = raw_aio_attach_aio_context,
 
 .bdrv_co_truncate   = raw_co_truncate,
@@ -3949,27 +3949,27 @@ static void coroutine_fn 
cdrom_co_lock_medium(BlockDriverState *bs, bool locked)
 }
 
 static BlockDriver bdrv_host_cdrom = {
-.format_name= "host_cdrom",
-.protocol_name  = "host_cdrom",
-.instance_size  = sizeof(BDRVRawState),
+.format_name = "host_cdrom",
+.protocol_name   = "host_cdrom",
+.instance_size   = sizeof(BDRVRawState),
 .bdrv_needs_filename = true,
-.bdrv_probe_device = cdrom_probe_device,
+.bdrv_probe_device   = cdrom_probe_device,
 .bdrv_parse_filename = cdrom_parse_filename,
-.bdrv_file_open = cdrom_open,
-.bdrv_close = raw_close,
+.bdrv_file_open  = cdrom_open,
+.bdrv_close  = raw_close,
 .bdrv_reopen_prepare = raw_reopen_prepare,
 .bdrv_reopen_commit  = raw_reopen_commit,
 .bdrv_reopen_abort   = raw_reopen_abort,
 .bdrv_co_create_opts = bdrv_co_create_opts_simple,
 .create_opts = _create_opts_simple,
-.mutable_opts   = mutable_opts,
+.mutable_opts= mutable_opts,
 
-.bdrv_co_preadv = raw_co_preadv,
-.bdrv_co_pwritev= raw_co_pwritev,
-.bdrv_co_flush_to_disk  = raw_co_flush_to_disk,
-.bdrv_refresh_limits = raw_refresh_limits,
-.bdrv_co_io_plug= raw_co_io_plug,
-.bdrv_co_io_unplug  = raw_co_io_unplug,
+.bdrv_co_preadv  = raw_co_preadv,
+.bdrv_co_pwritev =

[PATCH] qed: remove spurious BDRV_POLL_WHILE()

2023-03-09 Thread Stefan Hajnoczi

This looks like a copy-paste or merge error. BDRV_POLL_WHILE() is
already called above. It's not needed in the qemu_in_coroutine() case.

Fixes: 9fb4dfc570ce ("qed: make bdrv_qed_do_open a coroutine_fn")
Signed-off-by: Stefan Hajnoczi 
---
 block/qed.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/block/qed.c b/block/qed.c
index ed94bb61ca..0705a7b4e2 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -594,7 +594,6 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict 
*options, int flags,
 qemu_coroutine_enter(qemu_coroutine_create(bdrv_qed_open_entry, ));
 BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS);
 }
-BDRV_POLL_WHILE(bs, qoc.ret == -EINPROGRESS);
 return qoc.ret;
 }
 
-- 
2.39.2

Re: [PATCH 4/9] nbd: mark more coroutine_fns, do not use co_wrappers

2023-03-09 Thread Eric Blake

On Thu, Mar 09, 2023 at 09:44:51AM +0100, Paolo Bonzini wrote:
> Signed-off-by: Paolo Bonzini 
> ---
>  nbd/server.c | 48 
>  1 file changed, 24 insertions(+), 24 deletions(-)
> 
> diff --git a/nbd/server.c b/nbd/server.c
> index a4750e41880a..6f5fcade2a54 100644
> --- a/nbd/server.c
> +++ b/nbd/server.c
> @@ -1409,8 +1409,8 @@ nbd_read_eof(NBDClient *client, void *buffer, size_t 
> size, Error **errp)
>  return 1;
>  }
>  
> -static int nbd_receive_request(NBDClient *client, NBDRequest *request,
> -   Error **errp)
> +static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest 
> *request,
> +Error **errp)
>  {

Should we rename this nbd_co_receive_request() while at it?

...
> @@ -2198,9 +2198,9 @@ static int coroutine_fn 
> blockalloc_to_extents(BlockBackend *blk,
>   * @ea is converted to BE by the function
>   * @last controls whether NBD_REPLY_FLAG_DONE is sent.
>   */
> -static int nbd_co_send_extents(NBDClient *client, uint64_t handle,
> -   NBDExtentArray *ea,
> -   bool last, uint32_t context_id, Error **errp)
> +static int coroutine_fn nbd_co_send_extents(NBDClient *client, uint64_t 
> handle,
> +NBDExtentArray *ea,
> +   bool  last, uint32_t context_id, 
> Error **errp)

Whitespace damage.

...
> @@ -2297,8 +2297,8 @@ static int nbd_co_send_bitmap(NBDClient *client, 
> uint64_t handle,
>   * to the client (although the caller may still need to disconnect after
>   * reporting the error).
>   */
> -static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
> -  Error **errp)
> +static int coroutine_fn nbd_co_receive_request(NBDRequestData *req, 
> NBDRequest *request,
> +   Error **errp)
>  {
>  NBDClient *client = req->client;
>  int valid_flags;
> @@ -2446,7 +2446,7 @@ static coroutine_fn int nbd_do_cmd_read(NBDClient 
> *client, NBDRequest *request,

Most uses of coroutine_fn in this patch occur after the return type,
but in this and later hunks, the function has it the other way around.
Should we touch that up in this patch?  Likewise, should we add _co_
in the name of these pre-existing coroutine_fn functions
nbd_do_cmd_read and nbd_handle_request?

But I'm liking the efforts to use our annotations more consistently,
particularly if it is a result of you making progress on having the
compiler point out inconsistencies.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH for-8.0] ide: Fix manual in-flight count for TRIM BH

2023-03-09 Thread Paolo Bonzini


On 3/9/23 13:31, Hanna Czenczek wrote:

On 09.03.23 13:08, Paolo Bonzini wrote:

On Thu, Mar 9, 2023 at 1:05 PM Paolo Bonzini  wrote:

I think having to do this is problematic, because the blk_drain should
leave no pending operation.

Here it seems okay because you do it in a controlled situation, but the
main thread can also issue blk_drain(), or worse bdrv_drained_begin(),
and there would be pending I/O operations when it returns.


Not really.  We would stop in the middle of a trim that processes a list 
of discard requests.  So I see it more like stopping in the middle of 
anything that processes guest requests.  Once drain ends, we continue 
processing them, and that’s not exactly pending I/O.


There is a pending object in s->bus->dma->aiocb on the IDE side, so 
there is a pending DMA operation, but naïvely, I don’t see that as a 
problem.


What about the bdrv_drain_all() when a VM stops, would the guest 
continue to access memory and disks after bdrv_drain() return?


Migration could also be a problem, because the partial TRIM would not be 
recorded in the s->bus->error_status field of IDEState (no surprise 
there, it's not an error).  Also, errors happening after bdrv_drain() 
might not be migrated correctly.


Or the issue is generally that IDE uses dma_* functions, which might 
cause I/O functions to be run from new BHs (I guess through 
reschedule_dma()?).


Ah, you mean that you can have pending I/O operations while 
blk->in_flight is zero?  That would be a problem indeed.  We already 
have BlockDevOps for ide-cd and ide-hd, should we add a .drained_poll 
callback there?



Hmm, what about making blk_aio_prwv non-static and calling
bdrv_co_pdiscard directly from IDE?


You mean transforming ide_issue_trim_cb() into an iterative coroutine 
(instead of being recursive and using AIO) and invoking it via 
blk_aio_prwv()?


It doesn’t feel right to call a bdrv_* function directly from a user 
external to the core block layer, so in this case I’d rather fall back 
to Fiona’s idea of invoking all discards concurrently.


Yeah, honestly it doesn't feel very much right to me either.

Paolo

Re: [PATCH v2 2/3] block: make BlockBackend->disable_request_queuing atomic

2023-03-09 Thread Paolo Bonzini


On 3/9/23 13:31, Stefan Hajnoczi wrote:

On Thu, Mar 09, 2023 at 10:07:40AM +0100, Paolo Bonzini wrote:

On 3/7/23 22:04, Stefan Hajnoczi wrote:

This field is accessed by multiple threads without a lock. Use explicit
qatomic_read()/qatomic_set() calls. There is no need for acquire/release
because blk_set_disable_request_queuing() doesn't provide any
guarantees (it helps that it's used at BlockBackend creation time and
not when there is I/O in flight).


This in turn means itdoes not need to be atomic - atomics are only needed if
there are concurrent writes.  No big deal; I am now resurrecting the series
from the time I had noticed the queued_requests thread-safety problem, so
this will be simplified in 8.1.  For now your version is okay, thanks for
fixing it!


I was under the impression that variables accessed by multiple threads
outside a lock or similar primitive need memory_order_relaxed both as
documentation and to tell the compiler that they should indeed be atomic
(but without ordering guarantees).


Atomic accesses are needed to avoid data races.  Data races are 
concurrent accesses, of which at least one is a non-atomic write.  (A is 
concurrent with B is you can't be sure that A happens before B or vice 
versa; this intuitively is the "lock or similar primitive" that you 
mentioned.  Happens-before changes from one execution to the other, but 
it is enough to somehow prove there _is_ an ordering; for example, given 
two accesses that are done while a mutex is taken, one will always 
happen before the other).


In this case all writes to disable_request_queuing happen not just 
outside I/O, but even *before* the first I/O.  No writes that are 
concurrent with reads => no need to use atomic for reads.


For example the stdin global variable is accessed from multiple threads 
and you would never use atomics to read the pointer.  Just don't write 
to it and there won't be data races.



I think memory_order_relaxed also tells the compiler to do a bit more,
like to generate just a single store to the variable for each occurrence
in the code ("speculative" and "out-of-thin air" stores).


The correspondence is not necessarily 1:1, some optimizations are 
possible; for example this:


  qatomic_set(, 0);
  a = qatomic_read();
  qatomic_set(, a + 1);

can be changed to

  a = 0;
  qatomic_set(, 1);

(because it is safe to assume that no other thread sees the state where 
x==0).  Or the first read here:


  a = qatomic_read();
  a = qatomic_read();

can be optimized out, unlike Linux's READ_ONCE().

I have no idea if compilers actually perform these optimizations, but if 
they do they are neither frequent (maybe in languages that inline a lot 
more, but not in QEMU) nor problematic.  Even though there's some 
freedom in removing/consolidating code, it's true as you said that 
speculation is a no-no!



It's the documentation part that's most interesting in this case. Do we
not want to identify variables that are accessed outside a lock and
therefore require some thought?


I think it depends, see the stdin example before.  As the API is now, I 
agree that using qatomic_read() is the right thing to do.  In principle 
the flag could bounce back and forth many times. :/


With a better API, the balance may tilt on the side of not using 
atomics.  We'll see when I post the patch. :)


Paolo

Re: [PATCH 1/6] block: don't acquire AioContext lock in bdrv_drain_all()

2023-03-09 Thread Stefan Hajnoczi

On Wed, Mar 08, 2023 at 06:25:43PM +0100, Kevin Wolf wrote:
> Am 08.03.2023 um 15:26 hat Stefan Hajnoczi geschrieben:
> > On Wed, Mar 08, 2023 at 09:48:17AM +0100, Kevin Wolf wrote:
> > > Am 07.03.2023 um 20:20 hat Stefan Hajnoczi geschrieben:
> > > > On Tue, Mar 07, 2023 at 06:17:22PM +0100, Kevin Wolf wrote:
> > > > > Am 01.03.2023 um 21:57 hat Stefan Hajnoczi geschrieben:
> > > > > > There is no need for the AioContext lock in bdrv_drain_all() because
> > > > > > nothing in AIO_WAIT_WHILE() needs the lock and the condition is 
> > > > > > atomic.
> > > > > > 
> > > > > > Note that the NULL AioContext argument to AIO_WAIT_WHILE() is odd. 
> > > > > > In
> > > > > > the future it can be removed.
> > > > > 
> > > > > It can be removed for all callers that run in the main loop context. 
> > > > > For
> > > > > code running in an iothread, it's still important to pass a non-NULL
> > > > > context. This makes me doubt that the ctx parameter can really be
> > > > > removed without changing more.
> > > > > 
> > > > > Is your plan to remove the if from AIO_WAIT_WHILE_INTERNAL(), too, and
> > > > > to poll qemu_get_current_aio_context() instead of ctx_ or the main
> > > > > context?
> > > > 
> > > > This is what I'd like once everything has been converted to
> > > > AIO_WAIT_WHILE_UNLOCKED() - and at this point we might as well call it
> > > > AIO_WAIT_WHILE() again:
> > > > 
> > > >   #define AIO_WAIT_WHILE(cond) ({\
> > > >   bool waited_ = false;  \
> > > >   AioWait *wait_ = _aio_wait; \
> > > >   /* Increment wait_->num_waiters before evaluating cond. */ \
> > > >   qatomic_inc(_->num_waiters);  \
> > > >   /* Paired with smp_mb in aio_wait_kick(). */   \
> > > >   smp_mb();  \
> > > >   while ((cond)) {   \
> > > >   aio_poll(qemu_get_current_aio_context(), true);\
> > > >   waited_ = true;\
> > > >   }  \
> > > >   qatomic_dec(_->num_waiters);  \
> > > >   waited_; })
> > > 
> > > Ok, yes, this is what I tried to describe above.
> > > 
> > > > However, I just realized this only works in the main loop thread because
> > > > that's where aio_wait_kick() notifications are received. An IOThread
> > > > running AIO_WAIT_WHILE() won't be woken when another thread (including
> > > > the main loop thread) calls aio_wait_kick().
> > > 
> > > Which is of course a limitation we already have today. You can wait for
> > > things in your own iothread, or for all threads from the main loop.
> > > 
> > > However, in the future multiqueue world, the first case probably becomes
> > > pretty much useless because even for the same node, you could get
> > > activity in any thread.
> > > 
> > > So essentially AIO_WAIT_WHILE() becomes GLOBAL_STATE_CODE(). Which is
> > > probably a good idea anyway, but I'm not entirely sure how many places
> > > we currently have where it's called from an iothread. I know the drain
> > > in mirror_run(), but Emanuele already had a patch in his queue where
> > > bdrv_co_yield_to_drain() schedules drain in the main context, so if that
> > > works, mirror_run() would be solved.
> > > 
> > > https://gitlab.com/eesposit/qemu/-/commit/63562351aca4fb05d5711eb410feb96e64b5d4ad
> > > 
> > > > I would propose introducing a QemuCond for each condition that we wait
> > > > on, but QemuCond lacks event loop integration. The current thread would
> > > > be unable to run aio_poll() while also waiting on a QemuCond.
> > > > 
> > > > Life outside coroutines is hard, man! I need to think about this more.
> > > > Luckily this problem doesn't block this patch series.
> > > 
> > > I hope that we don't really need all of this if we can limit running
> > > synchronous code to the main loop.
> > 
> > Great idea, I think you're right.
> > 
> > I'll audit the code to find the IOThread AIO_WAIT_WHILE() callers and
> > maybe a future patch series can work on that.
> > 
> > > > > > There is an assertion in
> > > > > > AIO_WAIT_WHILE() that checks that we're in the main loop AioContext 
> > > > > > and
> > > > > > we would lose that check by dropping the argument. However, that 
> > > > > > was a
> > > > > > precursor to the GLOBAL_STATE_CODE()/IO_CODE() macros and is now a
> > > > > > duplicate check. So I think we won't lose much by dropping it, but 
> > > > > > let's
> > > > > > do a few more AIO_WAIT_WHILE_UNLOCKED() coversions of this sort to
> > > > > > confirm this is the case.
> > > > > > 
> > > > > > Signed-off-by: Stefan Hajnoczi 
> > > > > 
> > > > > Yes, it seems that we don't lose much, except maybe some consistency 
> > > > > in
> > > > > the intermediate state. The

Re: [PATCH for-8.0] ide: Fix manual in-flight count for TRIM BH

2023-03-09 Thread Hanna Czenczek


On 09.03.23 13:08, Paolo Bonzini wrote:

On Thu, Mar 9, 2023 at 1:05 PM Paolo Bonzini  wrote:

I think having to do this is problematic, because the blk_drain should
leave no pending operation.

Here it seems okay because you do it in a controlled situation, but the
main thread can also issue blk_drain(), or worse bdrv_drained_begin(),
and there would be pending I/O operations when it returns.


Not really.  We would stop in the middle of a trim that processes a list 
of discard requests.  So I see it more like stopping in the middle of 
anything that processes guest requests.  Once drain ends, we continue 
processing them, and that’s not exactly pending I/O.


There is a pending object in s->bus->dma->aiocb on the IDE side, so 
there is a pending DMA operation, but naïvely, I don’t see that as a 
problem.



Unfortunately I don't have a solution (I'm not considering the idea of
using disable_request_queuing and having even more atomics magic in
block/block-backend.c), but I'll read the thread.


I wouldn’t disable request queuing, because its introducing commit 
message (cf3129323f900ef5ddbccbe86e4fa801e88c566e) specifically says it 
fixes IDE.  I suppose the reason might actually be this problem here, in 
that before request queuing, it was possible that IDE would continue 
issuing discard requests even while drained, because processing the list 
didn’t stop.  Maybe.


Or the issue is generally that IDE uses dma_* functions, which might 
cause I/O functions to be run from new BHs (I guess through 
reschedule_dma()?).



Hmm, what about making blk_aio_prwv non-static and calling
bdrv_co_pdiscard directly from IDE?


You mean transforming ide_issue_trim_cb() into an iterative coroutine 
(instead of being recursive and using AIO) and invoking it via 
blk_aio_prwv()?


It doesn’t feel right to call a bdrv_* function directly from a user 
external to the core block layer, so in this case I’d rather fall back 
to Fiona’s idea of invoking all discards concurrently.


Hanna

Re: [PATCH v2 2/3] block: make BlockBackend->disable_request_queuing atomic

2023-03-09 Thread Stefan Hajnoczi

On Thu, Mar 09, 2023 at 10:07:40AM +0100, Paolo Bonzini wrote:
> On 3/7/23 22:04, Stefan Hajnoczi wrote:
> > This field is accessed by multiple threads without a lock. Use explicit
> > qatomic_read()/qatomic_set() calls. There is no need for acquire/release
> > because blk_set_disable_request_queuing() doesn't provide any
> > guarantees (it helps that it's used at BlockBackend creation time and
> > not when there is I/O in flight).
> 
> This in turn means itdoes not need to be atomic - atomics are only needed if
> there are concurrent writes.  No big deal; I am now resurrecting the series
> from the time I had noticed the queued_requests thread-safety problem, so
> this will be simplified in 8.1.  For now your version is okay, thanks for
> fixing it!

I was under the impression that variables accessed by multiple threads
outside a lock or similar primitive need memory_order_relaxed both as
documentation and to tell the compiler that they should indeed be atomic
(but without ordering guarantees).

I think memory_order_relaxed also tells the compiler to do a bit more,
like to generate just a single store to the variable for each occurrence
in the code ("speculative" and "out-of-thin air" stores).

It's the documentation part that's most interesting in this case. Do we
not want to identify variables that are accessed outside a lock and
therefore require some thought?

Stefan

signature.asc
Description: PGP signature

Re: [PATCH v2 2/3] block: make BlockBackend->disable_request_queuing atomic

2023-03-09 Thread Stefan Hajnoczi

On Thu, Mar 09, 2023 at 12:18:00PM +0100, Paolo Bonzini wrote:
> On 3/7/23 22:04, Stefan Hajnoczi wrote:
> >   static int coroutine_fn GRAPH_RDLOCK
> > @@ -1271,7 +1271,8 @@ static void coroutine_fn 
> > blk_wait_while_drained(BlockBackend *blk)
> >   {
> >   assert(blk->in_flight > 0);
> > -if (qatomic_read(>quiesce_counter) && 
> > !blk->disable_request_queuing) {
> > +if (qatomic_read(>quiesce_counter) &&
> > +!qatomic_read(>disable_request_queuing)) {
> 
> The qatomic_inc in blk_inc_in_flight() made me a bit nervous that
> smp_mb__after_rmw() was needed there, but it's okay.

Yes. I wrote it under the assumption that sequentially consistent
operations like qatomic_inc() are implicit barriers.

> First, anyway blk_wait_while_drained() has to _eventually_ pause the device,
> not immediately.  Even if it misses that blk->quiesce_counter == 1, the I/O
> will proceed and it'll just take a little more polling before
> bdrv_drained_begin() exits.
> 
> Second, I checked with CPPMEM the barriers in AIO_WAIT_WHILE() and
> aio_wait_kick() save the day, even if loading blk->quiesce_counter is
> reordered before the incremented value (1) is stored to blk->in_flight.
> 
> The CPPMEM model here uses mo_relaxed to force all possible kinds of havoc:
> 
> int main() {
>   atomic_int quiesce_counter = 0;
>   atomic_int waiters = 0;
>   atomic_int in_flight = 0;
> 
>   {{{ { quiesce_counter.store(1, mo_relaxed);
> waiters.store(1, mo_relaxed);// AIO_WAIT_WHILE starts here
> atomic_thread_fence(mo_seq_cst);
> in_flight.load(mo_relaxed).readsvalue(1); } // if 1, sleep
> 
>   ||| { in_flight.store(1, mo_relaxed);  // bdrv_inc_in_flight
> quiesce_counter.load(mo_relaxed).readsvalue(1); // go down "if"
> in_flight.store(0, mo_release);  // bdrv_dec_in_flight
> atomic_thread_fence(mo_seq_cst); // aio_wait_kick starts here
> waiters.load(mo_relaxed).readsvalue(0); }   // if 0, do not wake
>   }}};
> 
>   return 0;
> }
> 
> 
> Because CPPMEM shows no execution consistent with the buggy .readsvalue(),
> either AIO_WAIT_WHILE will not go to sleep or it will be woken up with
> in_flight == 0.  The polling loop ends and drained_end restarts the
> coroutine from blk->queued_requests.

Okay.

Stefan


signature.asc
Description: PGP signature

Re: [PATCH for-8.0] ide: Fix manual in-flight count for TRIM BH

2023-03-09 Thread Paolo Bonzini

On Thu, Mar 9, 2023 at 1:05 PM Paolo Bonzini  wrote:
> I think having to do this is problematic, because the blk_drain should
> leave no pending operation.
>
> Here it seems okay because you do it in a controlled situation, but the
> main thread can also issue blk_drain(), or worse bdrv_drained_begin(),
> and there would be pending I/O operations when it returns.
>
> Unfortunately I don't have a solution (I'm not considering the idea of
> using disable_request_queuing and having even more atomics magic in
> block/block-backend.c), but I'll read the thread.

Hmm, what about making blk_aio_prwv non-static and calling
bdrv_co_pdiscard directly from IDE?

Paolo

Re: [PATCH for-8.0] ide: Fix manual in-flight count for TRIM BH

2023-03-09 Thread Paolo Bonzini


On 3/9/23 12:44, Hanna Czenczek wrote:

+ *
+ * Note that TRIM operations call blk_aio_pdiscard() multiple
+ * times (and finally increment s->blk's in-flight counter while
+ * ide_trim_bh_cb() is scheduled), so we have to loop blk_drain()
+ * until the whole operation is done.
   */
  if (s->bus->dma->aiocb) {
  trace_ide_cancel_dma_sync_remaining();
-blk_drain(s->blk);
-assert(s->bus->dma->aiocb == NULL);
+while (s->bus->dma->aiocb) {
+blk_drain(s->blk);
+}


I think having to do this is problematic, because the blk_drain should 
leave no pending operation.


Here it seems okay because you do it in a controlled situation, but the 
main thread can also issue blk_drain(), or worse bdrv_drained_begin(), 
and there would be pending I/O operations when it returns.


Unfortunately I don't have a solution (I'm not considering the idea of 
using disable_request_queuing and having even more atomics magic in 
block/block-backend.c), but I'll read the thread.


Paolo

[PATCH for-8.0] ide: Fix manual in-flight count for TRIM BH

2023-03-09 Thread Hanna Czenczek

Commit 7e5cdb345f77d76cb4877fe6230c4e17a7d0d0ca ("ide: Increment BB
in-flight counter for TRIM BH") fixed a problem when cancelling trims:
ide_cancel_dma_sync() drains the BB to stop a potentially ongoing
request, and then asserts that no request is active anymore.  When
trying to cancel a trim, this would only drain a single
blk_aio_pdiscard() operation, but not necessarily the trim as a whole.
Said commit fixed that by counting the whole trim as an operation,
incrementing the in-flight counter for it, so the blk_drain() in
ide_cancel_dma_sync() would wait for it.

Fiona found a problem with this, specifically that draining a BB while
an IDE trim operation is going on can produce a deadlock: An ongoing
blk_aio_pdiscard() will be settled, but any further discard in the same
trim will go into the BB's queued_request list and wait there until the
drain stops.  Meanwhile, the whole trim keeps the BB's in_flight_counter
incremented, so the BB will never be considered drained, and qemu hangs.

Therefore, we cannot keep the in-flight counter incremented throughout
the trim operation.  We should still increment it before the completion
BH (ide_trim_bh_cb()) is scheduled and decrement it there, so that the
blk_drain() in ide_cancel_dma_sync() can await completion of this
scheduled BH.  With that change, however, it can take multiple
iterations of blk_drain() to really settle the whole trim operation, and
so ide_cancel_dma_sync() must run it in a loop.

Reported-by: Fiona Ebner 
Fixes: 7e5cdb345f77d76cb4877fe6230c4e17a7d0d0ca
   ("ide: Increment BB in-flight counter for TRIM BH")
Signed-off-by: Hanna Czenczek 
---
Tested with a small test boot sector that issues a trim operation with
any number of discards from 0 to 64.  Before this patch, doing so hangs
starting from 2 discards; and before 7e5cdb345f, doing so crashes qemu
with any number of discards.  With this patch, it always works.
---
 hw/ide/core.c | 23 +--
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index 2d034731cf..54c51084d2 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -444,7 +444,7 @@ static void ide_trim_bh_cb(void *opaque)
 iocb->bh = NULL;
 qemu_aio_unref(iocb);
 
-/* Paired with an increment in ide_issue_trim() */
+/* Paired with an increment in ide_issue_trim_cb() */
 blk_dec_in_flight(blk);
 }
 
@@ -504,6 +504,14 @@ static void ide_issue_trim_cb(void *opaque, int ret)
 done:
 iocb->aiocb = NULL;
 if (iocb->bh) {
+/*
+ * Paired with a decrement in ide_trim_bh_cb(): Ensure we have
+ * an in-flight count while this TrimAIOCB object lives.
+ * There is no ongoing blk_aio_pdiscard() operation anymore,
+ * so here we increment the counter manually before returning.
+ */
+blk_inc_in_flight(s->blk);
+
 replay_bh_schedule_event(iocb->bh);
 }
 }
@@ -515,9 +523,6 @@ BlockAIOCB *ide_issue_trim(
 IDEState *s = opaque;
 TrimAIOCB *iocb;
 
-/* Paired with a decrement in ide_trim_bh_cb() */
-blk_inc_in_flight(s->blk);
-
 iocb = blk_aio_get(_aiocb_info, s->blk, cb, cb_opaque);
 iocb->s = s;
 iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
@@ -737,11 +742,17 @@ void ide_cancel_dma_sync(IDEState *s)
  * In the future we'll be able to safely cancel the I/O if the
  * whole DMA operation will be submitted to disk with a single
  * aio operation with preadv/pwritev.
+ *
+ * Note that TRIM operations call blk_aio_pdiscard() multiple
+ * times (and finally increment s->blk's in-flight counter while
+ * ide_trim_bh_cb() is scheduled), so we have to loop blk_drain()
+ * until the whole operation is done.
  */
 if (s->bus->dma->aiocb) {
 trace_ide_cancel_dma_sync_remaining();
-blk_drain(s->blk);
-assert(s->bus->dma->aiocb == NULL);
+while (s->bus->dma->aiocb) {
+blk_drain(s->blk);
+}
 }
 }
 
-- 
2.39.1

[PATCH nbd 1/4] nbd: Add multi-conn option

2023-03-09 Thread Richard W.M. Jones

Add multi-conn option to the NBD client.  This commit just adds the
option, it is not functional.

Setting this to a value > 1 permits multiple connections to the NBD
server; a typical value might be 4.  The default is 1, meaning only a
single connection is made.  If the NBD server does not advertise that
it is safe for multi-conn then this setting is forced to 1.

Signed-off-by: Richard W.M. Jones 
---
 block/nbd.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/block/nbd.c b/block/nbd.c
index bf2894ad5c..5ffae0b798 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -49,6 +49,7 @@
 
 #define EN_OPTSTR ":exportname="
 #define MAX_NBD_REQUESTS16
+#define MAX_MULTI_CONN  16
 
 #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ (uint64_t)(intptr_t)(bs))
 #define INDEX_TO_HANDLE(bs, index)  ((index)  ^ (uint64_t)(intptr_t)(bs))
@@ -98,6 +99,7 @@ typedef struct BDRVNBDState {
 /* Connection parameters */
 uint32_t reconnect_delay;
 uint32_t open_timeout;
+uint32_t multi_conn;
 SocketAddress *saddr;
 char *export;
 char *tlscredsid;
@@ -1803,6 +1805,15 @@ static QemuOptsList nbd_runtime_opts = {
 "attempts until successful or until @open-timeout seconds "
 "have elapsed. Default 0",
 },
+{
+.name = "multi-conn",
+.type = QEMU_OPT_NUMBER,
+.help = "If > 1 permit up to this number of connections to the "
+"server. The server must also advertise multi-conn "
+"support.  If <= 1, only a single connection is made "
+"to the server even if the server advertises multi-conn. "
+"Default 1",
+},
 { /* end of list */ }
 },
 };
@@ -1858,6 +1869,10 @@ static int nbd_process_options(BlockDriverState *bs, 
QDict *options,
 
 s->reconnect_delay = qemu_opt_get_number(opts, "reconnect-delay", 0);
 s->open_timeout = qemu_opt_get_number(opts, "open-timeout", 0);
+s->multi_conn = qemu_opt_get_number(opts, "multi-conn", 1);
+if (s->multi_conn > MAX_MULTI_CONN) {
+s->multi_conn = MAX_MULTI_CONN;
+}
 
 ret = 0;
 
@@ -1912,6 +1927,15 @@ static int nbd_open(BlockDriverState *bs, QDict 
*options, int flags,
 
 nbd_client_connection_enable_retry(s->conn);
 
+/*
+ * We set s->multi_conn in nbd_process_options above, but now that
+ * we have connected if the server doesn't advertise that it is
+ * safe for multi-conn, force it to 1.
+ */
+if (!(s->info.flags & NBD_FLAG_CAN_MULTI_CONN)) {
+s->multi_conn = 1;
+}
+
 return 0;
 
 fail:
-- 
2.39.2

[PATCH nbd 2/4] nbd: Split out block device state from underlying NBD connections

2023-03-09 Thread Richard W.M. Jones

To implement multi-conn, we will put multiple underlying NBD
connections (ie. NBDClientConnection) inside the NBD block device
handle (BDRVNBDState).  This requires first breaking the one-to-one
relationship between NBDClientConnection and BDRVNBDState.

To do this a new structure (NBDConnState) is implemented.
NBDConnState takes all the per-connection state out of the block
driver struct.  BDRVNBDState now contains a conns[] array of pointers
to NBDConnState, one for each underlying multi-conn connection.

After this change there is still a one-to-one relationship because we
only ever use the zeroth slot in the conns[] array.  Thus this does
not implement multi-conn yet.

Signed-off-by: Richard W.M. Jones 
---
 block/coroutines.h |   5 +-
 block/nbd.c| 674 -
 2 files changed, 358 insertions(+), 321 deletions(-)

diff --git a/block/coroutines.h b/block/coroutines.h
index dd9f3d449b..14b01d8591 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -62,7 +62,7 @@ int coroutine_fn GRAPH_RDLOCK
 bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
 
 int coroutine_fn
-nbd_co_do_establish_connection(BlockDriverState *bs, bool blocking,
+nbd_co_do_establish_connection(BlockDriverState *bs, void *cs, bool blocking,
Error **errp);
 
 
@@ -86,6 +86,7 @@ bdrv_common_block_status_above(BlockDriverState *bs,
BlockDriverState **file,
int *depth);
 int co_wrapper_mixed
-nbd_do_establish_connection(BlockDriverState *bs, bool blocking, Error **errp);
+nbd_do_establish_connection(BlockDriverState *bs, void *cs, bool blocking,
+Error **errp);
 
 #endif /* BLOCK_COROUTINES_H */
diff --git a/block/nbd.c b/block/nbd.c
index 5ffae0b798..84e8a1add0 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -51,8 +51,8 @@
 #define MAX_NBD_REQUESTS16
 #define MAX_MULTI_CONN  16
 
-#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ (uint64_t)(intptr_t)(bs))
-#define INDEX_TO_HANDLE(bs, index)  ((index)  ^ (uint64_t)(intptr_t)(bs))
+#define HANDLE_TO_INDEX(cs, handle) ((handle) ^ (uint64_t)(intptr_t)(cs))
+#define INDEX_TO_HANDLE(cs, index)  ((index)  ^ (uint64_t)(intptr_t)(cs))
 
 typedef struct {
 Coroutine *coroutine;
@@ -67,7 +67,10 @@ typedef enum NBDClientState {
 NBD_CLIENT_QUIT
 } NBDClientState;
 
-typedef struct BDRVNBDState {
+/* A single client connection. */
+typedef struct NBDConnState {
+struct BDRVNBDState *s; /* Points to enclosing BDRVNBDState */
+
 QIOChannel *ioc; /* The current I/O channel */
 NBDExportInfo info;
 
@@ -94,7 +97,12 @@ typedef struct BDRVNBDState {
 
 QEMUTimer *open_timer;
 
-BlockDriverState *bs;
+NBDClientConnection *conn;
+} NBDConnState;
+
+typedef struct BDRVNBDState {
+/* The underlying NBD connections */
+NBDConnState *conns[MAX_MULTI_CONN];
 
 /* Connection parameters */
 uint32_t reconnect_delay;
@@ -108,7 +116,7 @@ typedef struct BDRVNBDState {
 char *x_dirty_bitmap;
 bool alloc_depth;
 
-NBDClientConnection *conn;
+BlockDriverState *bs;
 } BDRVNBDState;
 
 static void nbd_yank(void *opaque);
@@ -117,14 +125,16 @@ static void nbd_clear_bdrvstate(BlockDriverState *bs)
 {
 BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
 
-nbd_client_connection_release(s->conn);
-s->conn = NULL;
+nbd_client_connection_release(s->conns[0]->conn);
+s->conns[0]->conn = NULL;
 
 yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name));
 
 /* Must not leave timers behind that would access freed data */
-assert(!s->reconnect_delay_timer);
-assert(!s->open_timer);
+assert(!s->conns[0]->reconnect_delay_timer);
+assert(!s->conns[0]->open_timer);
+
+g_free(s->conns[0]);
 
 object_unref(OBJECT(s->tlscreds));
 qapi_free_SocketAddress(s->saddr);
@@ -151,139 +161,143 @@ static bool coroutine_fn 
nbd_recv_coroutine_wake_one(NBDClientRequest *req)
 return false;
 }
 
-static void coroutine_fn nbd_recv_coroutines_wake(BDRVNBDState *s)
+static void coroutine_fn nbd_recv_coroutines_wake(NBDConnState *cs)
 {
 int i;
 
-QEMU_LOCK_GUARD(>receive_mutex);
+QEMU_LOCK_GUARD(>receive_mutex);
 for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-if (nbd_recv_coroutine_wake_one(>requests[i])) {
+if (nbd_recv_coroutine_wake_one(>requests[i])) {
 return;
 }
 }
 }
 
 /* Called with s->requests_lock held.  */
-static void coroutine_fn nbd_channel_error_locked(BDRVNBDState *s, int ret)
+static void coroutine_fn nbd_channel_error_locked(NBDConnState *cs, int ret)
 {
-if (s->state == NBD_CLIENT_CONNECTED) {
-qio_channel_shutdown(s->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
+if (cs->state == NBD_CLIENT_CONNECTED) {
+qio_channel_shutdown(cs->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
 }
 
 if (ret == -EIO) {
-if (s->state ==

[PATCH nbd 3/4] nbd: Open multiple NBD connections if multi-conn is set

2023-03-09 Thread Richard W.M. Jones

If the user opts in to multi-conn and the server advertises it, open
multiple NBD connections.  They are opened with the same parameters as
the initial connection.  Similarly on the close path the connections
are closed.

However only the zeroth connection is used, so this commit does not
actually enable multi-conn capabilities.

(XXX) The strategy here is very naive.  Firstly if you were going to
open them, you'd probably want to open them in parallel.  Secondly it
would make sense to delay opening until multiple parallel requests are
seen (perhaps above some threshold), so that simple or shortlived NBD
operations do not require multiple connections to be made.

Signed-off-by: Richard W.M. Jones 
---
 block/nbd.c | 128 
 1 file changed, 90 insertions(+), 38 deletions(-)

diff --git a/block/nbd.c b/block/nbd.c
index 84e8a1add0..4c99c3f865 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -124,18 +124,23 @@ static void nbd_yank(void *opaque);
 static void nbd_clear_bdrvstate(BlockDriverState *bs)
 {
 BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
+size_t i;
 
-nbd_client_connection_release(s->conns[0]->conn);
-s->conns[0]->conn = NULL;
+for (i = 0; i < MAX_MULTI_CONN; ++i) {
+if (s->conns[i]) {
+nbd_client_connection_release(s->conns[i]->conn);
+s->conns[i]->conn = NULL;
+
+/* Must not leave timers behind that would access freed data */
+assert(!s->conns[i]->reconnect_delay_timer);
+assert(!s->conns[i]->open_timer);
+
+g_free(s->conns[i]);
+}
+}
 
 yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name));
 
-/* Must not leave timers behind that would access freed data */
-assert(!s->conns[0]->reconnect_delay_timer);
-assert(!s->conns[0]->open_timer);
-
-g_free(s->conns[0]);
-
 object_unref(OBJECT(s->tlscreds));
 qapi_free_SocketAddress(s->saddr);
 s->saddr = NULL;
@@ -1905,43 +1910,39 @@ static int nbd_process_options(BlockDriverState *bs, 
QDict *options,
 return ret;
 }
 
-static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
-Error **errp)
+static NBDConnState *init_conn_state(BDRVNBDState *s)
 {
+NBDConnState *cs;
+
+cs = g_new0(NBDConnState, 1);
+cs->s = s;
+qemu_mutex_init(>requests_lock);
+qemu_co_queue_init(>free_sema);
+qemu_co_mutex_init(>send_mutex);
+qemu_co_mutex_init(>receive_mutex);
+
+return cs;
+}
+
+static int conn_state_connect(BlockDriverState *bs, NBDConnState *cs,
+  Error **errp)
+{
+BDRVNBDState *s = cs->s;
 int ret;
-BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
 
-s->bs = bs;
-
-s->conns[0] = g_new0(NBDConnState, 1);
-s->conns[0]->s = s;
-qemu_mutex_init(>conns[0]->requests_lock);
-qemu_co_queue_init(>conns[0]->free_sema);
-qemu_co_mutex_init(>conns[0]->send_mutex);
-qemu_co_mutex_init(>conns[0]->receive_mutex);
-
-if (!yank_register_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name), errp)) {
-return -EEXIST;
-}
-
-ret = nbd_process_options(bs, options, errp);
-if (ret < 0) {
-goto fail;
-}
-
-s->conns[0]->conn =
+cs->conn =
 nbd_client_connection_new(s->saddr, true, s->export,
   s->x_dirty_bitmap, s->tlscreds,
   s->tlshostname);
 
 if (s->open_timeout) {
-nbd_client_connection_enable_retry(s->conns[0]->conn);
-open_timer_init(s->conns[0], qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
+nbd_client_connection_enable_retry(cs->conn);
+open_timer_init(cs, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) +
 s->open_timeout * NANOSECONDS_PER_SECOND);
 }
 
-s->conns[0]->state = NBD_CLIENT_CONNECTING_WAIT;
-ret = nbd_do_establish_connection(bs, s->conns[0], true, errp);
+cs->state = NBD_CLIENT_CONNECTING_WAIT;
+ret = nbd_do_establish_connection(bs, cs, true, errp);
 if (ret < 0) {
 goto fail;
 }
@@ -1951,9 +1952,44 @@ static int nbd_open(BlockDriverState *bs, QDict 
*options, int flags,
  * Delete it, because we do not want it to be around when this node
  * is drained or closed.
  */
-open_timer_del(s->conns[0]);
+open_timer_del(cs);
 
-nbd_client_connection_enable_retry(s->conns[0]->conn);
+nbd_client_connection_enable_retry(cs->conn);
+
+return 0;
+
+fail:
+open_timer_del(cs);
+return ret;
+}
+
+static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
+Error **errp)
+{
+int ret;
+BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
+size_t i;
+
+s->bs = bs;
+
+if (!yank_register_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name), errp)) {
+return -EEXIST;
+}
+
+ret = nbd_process_options(bs, options, errp);
+if (ret < 0) {
+goto fail;
+}
+
+/*
+

[PATCH nbd 4/4] nbd: Enable multi-conn using round-robin

2023-03-09 Thread Richard W.M. Jones

Enable NBD multi-conn by spreading operations across multiple
connections.

(XXX) This uses a naive round-robin approach which could be improved.
For example we could look at how many requests are in flight and
assign operations to the connections with fewest.  Or we could try to
estimate (based on size of requests outstanding) the load on each
connection.  But this implementation doesn't do any of that.

Signed-off-by: Richard W.M. Jones 
---
 block/nbd.c | 67 +++--
 1 file changed, 49 insertions(+), 18 deletions(-)

diff --git a/block/nbd.c b/block/nbd.c
index 4c99c3f865..df32ba67ed 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -1232,6 +1232,26 @@ static int coroutine_fn nbd_co_request(NBDConnState *cs, 
NBDRequest *request,
 return ret ? ret : request_ret;
 }
 
+/*
+ * If multi-conn, choose a connection for this operation.
+ */
+static NBDConnState *choose_connection(BDRVNBDState *s)
+{
+static size_t next;
+size_t i;
+
+if (s->multi_conn <= 1) {
+return s->conns[0];
+}
+
+/* XXX Stupid simple round robin. */
+i = qatomic_fetch_inc();
+i %= s->multi_conn;
+
+assert(s->conns[i] != NULL);
+return s->conns[i];
+}
+
 static int coroutine_fn nbd_client_co_preadv(BlockDriverState *bs, int64_t 
offset,
  int64_t bytes, QEMUIOVector *qiov,
  BdrvRequestFlags flags)
@@ -1244,7 +1264,7 @@ static int coroutine_fn 
nbd_client_co_preadv(BlockDriverState *bs, int64_t offse
 .from = offset,
 .len = bytes,
 };
-NBDConnState * const cs = s->conns[0];
+NBDConnState * const cs = choose_connection(s);
 
 assert(bytes <= NBD_MAX_BUFFER_SIZE);
 
@@ -1301,7 +1321,7 @@ static int coroutine_fn 
nbd_client_co_pwritev(BlockDriverState *bs, int64_t offs
 .from = offset,
 .len = bytes,
 };
-NBDConnState * const cs = s->conns[0];
+NBDConnState * const cs = choose_connection(s);
 
 assert(!(cs->info.flags & NBD_FLAG_READ_ONLY));
 if (flags & BDRV_REQ_FUA) {
@@ -1326,7 +1346,7 @@ static int coroutine_fn 
nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_
 .from = offset,
 .len = bytes,  /* .len is uint32_t actually */
 };
-NBDConnState * const cs = s->conns[0];
+NBDConnState * const cs = choose_connection(s);
 
 assert(bytes <= UINT32_MAX); /* rely on max_pwrite_zeroes */
 
@@ -1357,7 +1377,13 @@ static int coroutine_fn 
nbd_client_co_flush(BlockDriverState *bs)
 {
 BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
 NBDRequest request = { .type = NBD_CMD_FLUSH };
-NBDConnState * const cs = s->conns[0];
+
+/*
+ * Multi-conn (if used) guarantees that flushing on any connection
+ * flushes caches on all connections, so we can perform this
+ * operation on any.
+ */
+NBDConnState * const cs = choose_connection(s);
 
 if (!(cs->info.flags & NBD_FLAG_SEND_FLUSH)) {
 return 0;
@@ -1378,7 +1404,7 @@ static int coroutine_fn 
nbd_client_co_pdiscard(BlockDriverState *bs, int64_t off
 .from = offset,
 .len = bytes, /* len is uint32_t */
 };
-NBDConnState * const cs = s->conns[0];
+NBDConnState * const cs = choose_connection(s);
 
 assert(bytes <= UINT32_MAX); /* rely on max_pdiscard */
 
@@ -1398,7 +1424,7 @@ static int coroutine_fn nbd_client_co_block_status(
 NBDExtent extent = { 0 };
 BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
 Error *local_err = NULL;
-NBDConnState * const cs = s->conns[0];
+NBDConnState * const cs = choose_connection(s);
 
 NBDRequest request = {
 .type = NBD_CMD_BLOCK_STATUS,
@@ -2027,7 +2053,7 @@ static int coroutine_fn nbd_co_flush(BlockDriverState *bs)
 static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
 {
 BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-NBDConnState * const cs = s->conns[0];
+NBDConnState * const cs = choose_connection(s);
 uint32_t min = cs->info.min_block;
 uint32_t max = MIN_NON_ZERO(NBD_MAX_BUFFER_SIZE, cs->info.max_block);
 
@@ -2085,7 +2111,7 @@ static int coroutine_fn nbd_co_truncate(BlockDriverState 
*bs, int64_t offset,
 BdrvRequestFlags flags, Error **errp)
 {
 BDRVNBDState *s = bs->opaque;
-NBDConnState * const cs = s->conns[0];
+NBDConnState * const cs = choose_connection(s);
 
 if (offset != cs->info.size && exact) {
 error_setg(errp, "Cannot resize NBD nodes");
@@ -2168,24 +2194,29 @@ static const char *const nbd_strong_runtime_opts[] = {
 static void nbd_cancel_in_flight(BlockDriverState *bs)
 {
 BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
-NBDConnState * const cs = s->conns[0];
+size_t i;
+NBDConnState *cs;
 
-reconnect_delay_timer_del(cs);
+for (i = 0; i < MAX_MULTI_CONN; ++i) {
+cs = s->conns[i];
 
-qemu_mutex_lock(>requests_lock);
-if (cs->state ==

[PATCH nbd 0/4] Enable multi-conn NBD [for discussion only]

2023-03-09 Thread Richard W.M. Jones

[ Patch series also available here, along with this cover letter and the
  script used to generate test results:
  https://gitlab.com/rwmjones/qemu/-/commits/2023-nbd-multi-conn-v1 ]

This patch series adds multi-conn support to the NBD block driver in
qemu.  It is only meant for discussion and testing because it has a
number of obvious shortcomings (see "XXX" in commit messages and
code).  If we decided this was a good idea, we can work on a better
patch.

The Network Block Driver (NBD) protocol allows servers to advertise
that they are capable of multi-conn.  This means they obey certain
requirements around how data is cached, allowing multiple connections
to be safely opened to the NBD server.  For example, a flush or FUA
operation on one connection is guaranteed to flush the cache on all
connections.

Clients that use multi-conn can achieve better performance.  This
seems to be down to at least two factors:

 - Avoids "head of line blocking" of large requests.

 - With NBD over Unix domain sockets, more cores can be used.

qemu-nbd, nbdkit and libnbd have all supported multi-conn for ages,
but qemu's built in NBD client does not, which is what this patch
fixes.

Below I've produced a few benchmarks.

Note these are mostly concoted to try to test NBD performance and may
not make sense in their own terms (eg. qemu's disk image layer has a
curl client so you wouldn't need to run one separately).  In the real
world we use long pipelines of NBD operations where different tools
are mixed together to achieve efficient downloads, conversions, disk
modifications and sparsification, and they would exercise different
aspects of this.

I've also included nbdcopy as a client for comparison in some tests.

All tests were run 4 times, the first result discarded, and the last 3
averaged.  If any of the last 3 were > 10% away from the average then
the test was stopped.

My summary:

 - It works effectively for qemu client & nbdkit server, especially in
   cases where the server does large, heavyweight requests.  This is
   important for us because virt-v2v uses an nbdkit Python plugin and
   various other heavyweight plugins (eg. plugins that access remote
   servers for each request).

 - It seems to make little or no difference with qemu + qemu-nbd
   server.  I speculate that's because qemu-nbd doesn't support system
   threads, so networking is bottlenecked through a single core.  Even
   though there are coroutines handling different sockets, they must
   all wait in turn to issue send(3) or recv(3) calls on the same
   core.

 - qemu-img unfortunately uses a single thread for all coroutines so
   it suffers from a similar problem to qemu-nbd.  This change would
   be much more effective if we could distribute coroutines across
   threads.

 - For tests which are highly bottlenecked on disk I/O (eg. the large
   local file test and null test) multi-conn doesn't make much
   difference.

 - Multi-conn even with only 2 connections can make up for the
   overhead of range requests, exceeding the performance of wget.

 - In the curlremote test, qemu-nbd is especially slow, for unknown
   reasons.


Integrity test (./multi-conn.pl integrity)
==

nbdkit-sparse-random-plugin
  | ^
  | nbd+unix| nbd+unix
  v |
   qemu-img convert

Reading from and writing the same data back to nbdkit sparse-random
plugin checks that the data written is the same as the data read.
This uses two Unix domain sockets, with or without multi-conn.  This
test is mainly here to check we don't crash or corrupt data with this
patch.

  server  clientmulti-conn
  ---
nbdkitqemu-img  [u/s]   9.07s   
nbdkitqemu-img  1   9.05s   
nbdkitqemu-img  2   9.02s   
nbdkitqemu-img  4   8.98s   

[u/s] = upstream qemu 7.2.0


Curl local server test (./multi-conn.pl curlhttp)
=

Localhost Apache serving a file over http
  |
  | http
  v
nbdkit-curl-plugin   or   qemu-nbd
  |
  | nbd+unix
  v
qemu-img convert   or   nbdcopy

We download an image from a local web server through
nbdkit-curl-plugin or qemu-nbd using the curl block driver, over NBD.
The image is copied to /dev/null.

  server  clientmulti-conn
  ---
  qemu-nbd nbdcopy  1   8.88s   
  qemu-nbd nbdcopy  2   8.64s   
  qemu-nbd nbdcopy  4   8.37s   
  qemu-nbdqemu-img  [u/s]   6.47s   
  qemu-nbdqemu-img  1   6.56s   
  qemu-nbdqemu-img  2   6.63s   
  qemu-nbdqemu-img  4   6.50s   
nbdkit nbdcopy  1   12.15s  
nbdkit

Re: [PATCH v2 2/3] block: make BlockBackend->disable_request_queuing atomic

2023-03-09 Thread Paolo Bonzini


On 3/7/23 22:04, Stefan Hajnoczi wrote:

  static int coroutine_fn GRAPH_RDLOCK
@@ -1271,7 +1271,8 @@ static void coroutine_fn 
blk_wait_while_drained(BlockBackend *blk)
  {
  assert(blk->in_flight > 0);
  
-if (qatomic_read(>quiesce_counter) && !blk->disable_request_queuing) {

+if (qatomic_read(>quiesce_counter) &&
+!qatomic_read(>disable_request_queuing)) {


The qatomic_inc in blk_inc_in_flight() made me a bit nervous that 
smp_mb__after_rmw() was needed there, but it's okay.


First, anyway blk_wait_while_drained() has to _eventually_ pause the 
device, not immediately.  Even if it misses that blk->quiesce_counter == 
1, the I/O will proceed and it'll just take a little more polling before 
bdrv_drained_begin() exits.


Second, I checked with CPPMEM the barriers in AIO_WAIT_WHILE() and 
aio_wait_kick() save the day, even if loading blk->quiesce_counter is 
reordered before the incremented value (1) is stored to blk->in_flight.


The CPPMEM model here uses mo_relaxed to force all possible kinds of havoc:

int main() {
  atomic_int quiesce_counter = 0;
  atomic_int waiters = 0;
  atomic_int in_flight = 0;

  {{{ { quiesce_counter.store(1, mo_relaxed);
waiters.store(1, mo_relaxed);// AIO_WAIT_WHILE starts here
atomic_thread_fence(mo_seq_cst);
in_flight.load(mo_relaxed).readsvalue(1); } // if 1, sleep

  ||| { in_flight.store(1, mo_relaxed);  // bdrv_inc_in_flight
quiesce_counter.load(mo_relaxed).readsvalue(1); // go down "if"
in_flight.store(0, mo_release);  // bdrv_dec_in_flight
atomic_thread_fence(mo_seq_cst); // aio_wait_kick starts here
waiters.load(mo_relaxed).readsvalue(0); }   // if 0, do not wake
  }}};

  return 0;
}


Because CPPMEM shows no execution consistent with the buggy 
.readsvalue(), either AIO_WAIT_WHILE will not go to sleep or it will be 
woken up with in_flight == 0.  The polling loop ends and drained_end 
restarts the coroutine from blk->queued_requests.


Paolo


  blk_dec_in_flight(blk);
  qemu_co_queue_wait(>queued_requests, NULL);
  blk_inc_in_flight(blk);
-- 2.39.2

Re: [PATCH 0/3] block: remove separate bdrv_file_open callback

2023-03-09 Thread Philippe Mathieu-Daudé


On 9/3/23 09:50, Paolo Bonzini wrote:

The value of the bdrv_file_open is sometimes checked to distinguish
protocol and format drivers, but apart from that there is no difference
between bdrv_file_open and bdrv_open.

However, they can all be distinguished by the non-NULL .protocol_name
member.  Change the checks to use .protocol_name instead of .bdrv_file_open,
and unify the two callbacks.


Nice cleanup. Series:

Reviewed-by: Philippe Mathieu-Daudé 


Paolo Bonzini (3):
   block: make assertion more generic
   block: do not check bdrv_file_open
   block: remove separate bdrv_file_open callback

Re: [PATCH 5/9] 9pfs: mark more coroutine_fns

2023-03-09 Thread Christian Schoenebeck

On Thursday, March 9, 2023 9:44:52 AM CET Paolo Bonzini wrote:
> Signed-off-by: Paolo Bonzini 
> ---
>  hw/9pfs/9p.h| 4 ++--
>  hw/9pfs/codir.c | 6 +++---
>  2 files changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
> index 2fce4140d1e9..1b0d805b9c12 100644
> --- a/hw/9pfs/9p.h
> +++ b/hw/9pfs/9p.h
> @@ -203,7 +203,7 @@ typedef struct V9fsDir {
>  QemuMutex readdir_mutex_L;
>  } V9fsDir;
>  
> -static inline void v9fs_readdir_lock(V9fsDir *dir)
> +static inline void coroutine_fn v9fs_readdir_lock(V9fsDir *dir)
>  {
>  if (dir->proto_version == V9FS_PROTO_2000U) {
>  qemu_co_mutex_lock(>readdir_mutex_u);
> @@ -212,7 +212,7 @@ static inline void v9fs_readdir_lock(V9fsDir *dir)
>  }
>  }
>  
> -static inline void v9fs_readdir_unlock(V9fsDir *dir)
> +static inline void coroutine_fn v9fs_readdir_unlock(V9fsDir *dir)
>  {
>  if (dir->proto_version == V9FS_PROTO_2000U) {
>  qemu_co_mutex_unlock(>readdir_mutex_u);
> diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
> index 7ba63be489e7..0d0ffa1d2ba8 100644
> --- a/hw/9pfs/codir.c
> +++ b/hw/9pfs/codir.c
> @@ -68,9 +68,9 @@ int coroutine_fn v9fs_co_readdir(V9fsPDU *pdu, V9fsFidState 
> *fidp,
>   *
>   * See v9fs_co_readdir_many() (as its only user) below for details.
>   */
> -static int do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
> -   struct V9fsDirEnt **entries, off_t offset,
> -   int32_t maxsize, bool dostat)
> +static int coroutine_fn do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
> +struct V9fsDirEnt **entries, off_t 
> offset,
> +int32_t maxsize, bool dostat)

You should probably fix wrapping here, as the line exceeds 80 characters.

Except of that:

Reviewed-by: Christian Schoenebeck 

>  {
>  V9fsState *s = pdu->s;
>  V9fsString name;
>

Re: [PATCH v2 2/3] block: make BlockBackend->disable_request_queuing atomic

2023-03-09 Thread Paolo Bonzini


On 3/7/23 22:04, Stefan Hajnoczi wrote:

This field is accessed by multiple threads without a lock. Use explicit
qatomic_read()/qatomic_set() calls. There is no need for acquire/release
because blk_set_disable_request_queuing() doesn't provide any
guarantees (it helps that it's used at BlockBackend creation time and
not when there is I/O in flight).


This in turn means itdoes not need to be atomic - atomics are only 
needed if there are concurrent writes.  No big deal; I am now 
resurrecting the series from the time I had noticed the queued_requests 
thread-safety problem, so this will be simplified in 8.1.  For now your 
version is okay, thanks for fixing it!


Paolo


Signed-off-by: Stefan Hajnoczi 
Reviewed-by: Hanna Czenczek 
---
  block/block-backend.c | 7 ---
  1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 68807be32b..0cba4add20 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -82,7 +82,7 @@ struct BlockBackend {
  
  int quiesce_counter; /* atomic: written under BQL, read by other threads */

  CoQueue queued_requests;
-bool disable_request_queuing;
+bool disable_request_queuing; /* atomic */
  
  VMChangeStateEntry *vmsh;

  bool force_allow_inactivate;
@@ -1232,7 +1232,7 @@ void blk_set_allow_aio_context_change(BlockBackend *blk, 
bool allow)
  void blk_set_disable_request_queuing(BlockBackend *blk, bool disable)
  {
  IO_CODE();
-blk->disable_request_queuing = disable;
+qatomic_set(>disable_request_queuing, disable);
  }
  
  static int coroutine_fn GRAPH_RDLOCK

@@ -1271,7 +1271,8 @@ static void coroutine_fn 
blk_wait_while_drained(BlockBackend *blk)
  {
  assert(blk->in_flight > 0);
  
-if (qatomic_read(>quiesce_counter) && !blk->disable_request_queuing) {

+if (qatomic_read(>quiesce_counter) &&
+!qatomic_read(>disable_request_queuing)) {
  blk_dec_in_flight(blk);
  qemu_co_queue_wait(>queued_requests, NULL);
  blk_inc_in_flight(blk);

[PATCH 3/3] block: remove separate bdrv_file_open callback

2023-03-09 Thread Paolo Bonzini

bdrv_file_open and bdrv_open are completely equivalent, they are
never checked except to see which one to invoke.  So merge them
into a single one.

Signed-off-by: Paolo Bonzini 
---
 block.c  | 2 --
 block/blkdebug.c | 2 +-
 block/blkio.c| 2 +-
 block/blkverify.c| 2 +-
 block/curl.c | 8 
 block/file-posix.c   | 8 
 block/file-win32.c   | 4 ++--
 block/gluster.c  | 8 
 block/iscsi.c| 4 ++--
 block/nbd.c  | 6 +++---
 block/nfs.c  | 2 +-
 block/null.c | 4 ++--
 block/nvme.c | 2 +-
 block/rbd.c  | 3 ++-
 block/ssh.c  | 2 +-
 block/vvfat.c| 2 +-
 include/block/block_int-common.h | 3 ---
 17 files changed, 30 insertions(+), 34 deletions(-)

diff --git a/block.c b/block.c
index 71f0ea24870e..e6d20f89ba82 100644
--- a/block.c
+++ b/block.c
@@ -1628,8 +1628,6 @@ static int bdrv_open_driver(BlockDriverState *bs, 
BlockDriver *drv,
 
 assert(!drv->bdrv_needs_filename || bs->filename[0]);
 if (drv->bdrv_open) {
-ret = drv->bdrv_file_open(bs, options, open_flags, _err);
-} else if (drv->bdrv_open) {
 ret = drv->bdrv_open(bs, options, open_flags, _err);
 } else {
 ret = 0;
diff --git a/block/blkdebug.c b/block/blkdebug.c
index addad914b3f7..c9ae3cb6ae3d 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -1072,7 +1072,7 @@ static BlockDriver bdrv_blkdebug = {
 .is_filter  = true,
 
 .bdrv_parse_filename= blkdebug_parse_filename,
-.bdrv_file_open = blkdebug_open,
+.bdrv_open  = blkdebug_open,
 .bdrv_close = blkdebug_close,
 .bdrv_reopen_prepare= blkdebug_reopen_prepare,
 .bdrv_child_perm= blkdebug_child_perm,
diff --git a/block/blkio.c b/block/blkio.c
index 0cdc99a72960..c2efe1a8feec 100644
--- a/block/blkio.c
+++ b/block/blkio.c
@@ -997,7 +997,7 @@ static void blkio_refresh_limits(BlockDriverState *bs, 
Error **errp)
 .format_name = name, \
 .protocol_name   = name, \
 .instance_size   = sizeof(BDRVBlkioState), \
-.bdrv_file_open  = blkio_file_open, \
+.bdrv_open   = blkio_open, \
 .bdrv_close  = blkio_close, \
 .bdrv_co_getlength   = blkio_co_getlength, \
 .bdrv_co_truncate= blkio_truncate, \
diff --git a/block/blkverify.c b/block/blkverify.c
index 1c16f86b2e70..9b59b34c02ba 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -312,7 +312,7 @@ static BlockDriver bdrv_blkverify = {
 .instance_size= sizeof(BDRVBlkverifyState),
 
 .bdrv_parse_filename  = blkverify_parse_filename,
-.bdrv_file_open   = blkverify_open,
+.bdrv_open= blkverify_open,
 .bdrv_close   = blkverify_close,
 .bdrv_child_perm  = bdrv_default_perms,
 .bdrv_co_getlength= blkverify_co_getlength,
diff --git a/block/curl.c b/block/curl.c
index 8bb39a134e4b..809858692652 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -1032,7 +1032,7 @@ static BlockDriver bdrv_http = {
 
 .instance_size  = sizeof(BDRVCURLState),
 .bdrv_parse_filename= curl_parse_filename,
-.bdrv_file_open = curl_open,
+.bdrv_open  = curl_open,
 .bdrv_close = curl_close,
 .bdrv_co_getlength  = curl_co_getlength,
 
@@ -1051,7 +1051,7 @@ static BlockDriver bdrv_https = {
 
 .instance_size  = sizeof(BDRVCURLState),
 .bdrv_parse_filename= curl_parse_filename,
-.bdrv_file_open = curl_open,
+.bdrv_open  = curl_open,
 .bdrv_close = curl_close,
 .bdrv_co_getlength  = curl_co_getlength,
 
@@ -1070,7 +1070,7 @@ static BlockDriver bdrv_ftp = {
 
 .instance_size  = sizeof(BDRVCURLState),
 .bdrv_parse_filename= curl_parse_filename,
-.bdrv_file_open = curl_open,
+.bdrv_open  = curl_open,
 .bdrv_close = curl_close,
 .bdrv_co_getlength  = curl_co_getlength,
 
@@ -1089,7 +1089,7 @@ static BlockDriver bdrv_ftps = {
 
 .instance_size  = sizeof(BDRVCURLState),
 .bdrv_parse_filename= curl_parse_filename,
-.bdrv_file_open = curl_open,
+.bdrv_open  = curl_open,
 .bdrv_close = curl_close,
 .bdrv_co_getlength  = curl_co_getlength,
 
diff --git a/block/file-posix.c b/block/file-posix.c
index 5760cf22d17d..578b512ed515 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -3323,7 +3323,7 @@ BlockDriver

[PATCH 0/3] block: remove separate bdrv_file_open callback

2023-03-09 Thread Paolo Bonzini

The value of the bdrv_file_open is sometimes checked to distinguish
protocol and format drivers, but apart from that there is no difference
between bdrv_file_open and bdrv_open.

However, they can all be distinguished by the non-NULL .protocol_name
member.  Change the checks to use .protocol_name instead of .bdrv_file_open,
and unify the two callbacks.

Paolo

Paolo Bonzini (3):
  block: make assertion more generic
  block: do not check bdrv_file_open
  block: remove separate bdrv_file_open callback

 block.c  | 17 +++--
 block/blkdebug.c |  2 +-
 block/blkio.c|  2 +-
 block/blkverify.c|  2 +-
 block/curl.c |  8 
 block/file-posix.c   |  8 
 block/file-win32.c   |  4 ++--
 block/gluster.c  |  8 
 block/iscsi.c|  4 ++--
 block/nbd.c  |  6 +++---
 block/nfs.c  |  2 +-
 block/null.c |  4 ++--
 block/nvme.c |  2 +-
 block/rbd.c  |  3 ++-
 block/ssh.c  |  2 +-
 block/vvfat.c|  2 +-
 include/block/block_int-common.h |  3 ---
 17 files changed, 37 insertions(+), 42 deletions(-)

-- 
2.39.2

[PATCH 2/3] block: do not check bdrv_file_open

2023-03-09 Thread Paolo Bonzini

The set of BlockDrivers that have .bdrv_file_open coincides with those
that have .protocol_name and guess what---checking drv->bdrv_file_open
is done to see if the driver is a protocol.  So check drv->protocol_name
instead.

Signed-off-by: Paolo Bonzini 
---
 block.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/block.c b/block.c
index 075da6517b7f..71f0ea24870e 100644
--- a/block.c
+++ b/block.c
@@ -914,7 +914,6 @@ BlockDriver *bdrv_find_protocol(const char *filename,
 int i;
 
 GLOBAL_STATE_CODE();
-/* TODO Drivers without bdrv_file_open must be specified explicitly */
 
 /*
  * XXX(hch): we really should not let host device detection
@@ -1628,7 +1627,7 @@ static int bdrv_open_driver(BlockDriverState *bs, 
BlockDriver *drv,
 bs->opaque = g_malloc0(drv->instance_size);
 
 assert(!drv->bdrv_needs_filename || bs->filename[0]);
-if (drv->bdrv_file_open) {
+if (drv->bdrv_open) {
 ret = drv->bdrv_file_open(bs, options, open_flags, _err);
 } else if (drv->bdrv_open) {
 ret = drv->bdrv_open(bs, options, open_flags, _err);
@@ -1940,7 +1939,7 @@ static int bdrv_open_common(BlockDriverState *bs, 
BlockBackend *file,
 open_flags = bdrv_open_flags(bs, bs->open_flags);
 node_name = qemu_opt_get(opts, "node-name");
 
-assert(!drv->bdrv_file_open || file == NULL);
+assert(!drv->protocol_name || file == NULL);
 ret = bdrv_open_driver(bs, drv, node_name, options, open_flags, errp);
 if (ret < 0) {
 goto fail_opts;
@@ -2040,7 +2039,7 @@ static int bdrv_fill_options(QDict **options, const char 
*filename,
 }
 /* If the user has explicitly specified the driver, this choice should
  * override the BDRV_O_PROTOCOL flag */
-protocol = drv->bdrv_file_open;
+protocol = drv->protocol_name;
 }
 
 if (protocol) {
@@ -4000,7 +3999,7 @@ bdrv_open_inherit(const char *filename, const char 
*reference, QDict *options,
 }
 
 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
-assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
+assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->protocol_name);
 /* file must be NULL if a protocol BDS is about to be created
  * (the inverse results in an error message from bdrv_open_common()) */
 assert(!(flags & BDRV_O_PROTOCOL) || !file);
@@ -5785,7 +5784,7 @@ int64_t coroutine_fn 
bdrv_co_get_allocated_file_size(BlockDriverState *bs)
 return drv->bdrv_co_get_allocated_file_size(bs);
 }
 
-if (drv->bdrv_file_open) {
+if (drv->protocol_name) {
 /*
  * Protocol drivers default to -ENOTSUP (most of their data is
  * not stored in any of their children (if they even have any),
@@ -7888,7 +7887,7 @@ void bdrv_refresh_filename(BlockDriverState *bs)
  *   Both of these conditions are represented by 
generate_json_filename.
  */
 if (primary_child_bs->exact_filename[0] &&
-primary_child_bs->drv->bdrv_file_open &&
+primary_child_bs->drv->protocol_name &&
 !drv->is_filter && !generate_json_filename)
 {
 strcpy(bs->exact_filename, primary_child_bs->exact_filename);
-- 
2.39.2

[PATCH 1/3] block: make assertion more generic

2023-03-09 Thread Paolo Bonzini

.bdrv_needs_filename is only set for drivers that also set bdrv_file_open,
i.e. protocol drivers.

So we can make the assertion always, it will always pass for those drivers
that use bdrv_open.

Signed-off-by: Paolo Bonzini 
---
 block.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block.c b/block.c
index 0dd604d0f6a8..075da6517b7f 100644
--- a/block.c
+++ b/block.c
@@ -1627,8 +1627,8 @@ static int bdrv_open_driver(BlockDriverState *bs, 
BlockDriver *drv,
 bs->drv = drv;
 bs->opaque = g_malloc0(drv->instance_size);
 
+assert(!drv->bdrv_needs_filename || bs->filename[0]);
 if (drv->bdrv_file_open) {
-assert(!drv->bdrv_needs_filename || bs->filename[0]);
 ret = drv->bdrv_file_open(bs, options, open_flags, _err);
 } else if (drv->bdrv_open) {
 ret = drv->bdrv_open(bs, options, open_flags, _err);
-- 
2.39.2

[PATCH 6/9] qemu-pr-helper: mark more coroutine_fns

2023-03-09 Thread Paolo Bonzini

do_sgio can suspend via the coroutine function thread_pool_submit_co, so it
has to be coroutine_fn as well---and the same is true of all its direct and
indirect callers.

Signed-off-by: Paolo Bonzini 
---
 scsi/qemu-pr-helper.c | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
index 199227a556e6..9df82d93a7d2 100644
--- a/scsi/qemu-pr-helper.c
+++ b/scsi/qemu-pr-helper.c
@@ -177,8 +177,8 @@ static int do_sgio_worker(void *opaque)
 return status;
 }
 
-static int do_sgio(int fd, const uint8_t *cdb, uint8_t *sense,
-uint8_t *buf, int *sz, int dir)
+static int coroutine_fn do_sgio(int fd, const uint8_t *cdb, uint8_t *sense,
+uint8_t *buf, int *sz, int dir)
 {
 ThreadPool *pool = aio_get_thread_pool(qemu_get_aio_context());
 int r;
@@ -320,7 +320,7 @@ static SCSISense mpath_generic_sense(int r)
 }
 }
 
-static int mpath_reconstruct_sense(int fd, int r, uint8_t *sense)
+static int coroutine_fn mpath_reconstruct_sense(int fd, int r, uint8_t *sense)
 {
 switch (r) {
 case MPATH_PR_SUCCESS:
@@ -372,8 +372,8 @@ static int mpath_reconstruct_sense(int fd, int r, uint8_t 
*sense)
 }
 }
 
-static int multipath_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
-   uint8_t *data, int sz)
+static int coroutine_fn multipath_pr_in(int fd, const uint8_t *cdb, uint8_t 
*sense,
+uint8_t *data, int sz)
 {
 int rq_servact = cdb[1];
 struct prin_resp resp;
@@ -427,8 +427,8 @@ static int multipath_pr_in(int fd, const uint8_t *cdb, 
uint8_t *sense,
 return mpath_reconstruct_sense(fd, r, sense);
 }
 
-static int multipath_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
-const uint8_t *param, int sz)
+static int coroutine_fn multipath_pr_out(int fd, const uint8_t *cdb, uint8_t 
*sense,
+ const uint8_t *param, int sz)
 {
 int rq_servact = cdb[1];
 int rq_scope = cdb[2] >> 4;
@@ -545,8 +545,8 @@ static int multipath_pr_out(int fd, const uint8_t *cdb, 
uint8_t *sense,
 }
 #endif
 
-static int do_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
-uint8_t *data, int *resp_sz)
+static int coroutine_fn do_pr_in(int fd, const uint8_t *cdb, uint8_t *sense,
+ uint8_t *data, int *resp_sz)
 {
 #ifdef CONFIG_MPATH
 if (is_mpath(fd)) {
@@ -563,8 +563,8 @@ static int do_pr_in(int fd, const uint8_t *cdb, uint8_t 
*sense,
SG_DXFER_FROM_DEV);
 }
 
-static int do_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
- const uint8_t *param, int sz)
+static int coroutine_fn do_pr_out(int fd, const uint8_t *cdb, uint8_t *sense,
+  const uint8_t *param, int sz)
 {
 int resp_sz;
 
-- 
2.39.2

[PATCH 7/9] tests: mark more coroutine_fns

2023-03-09 Thread Paolo Bonzini

Signed-off-by: Paolo Bonzini 
---
 tests/unit/test-thread-pool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/test-thread-pool.c b/tests/unit/test-thread-pool.c
index 6020e65d6986..493b966b94f9 100644
--- a/tests/unit/test-thread-pool.c
+++ b/tests/unit/test-thread-pool.c
@@ -71,7 +71,7 @@ static void test_submit_aio(void)
 g_assert_cmpint(data.ret, ==, 0);
 }
 
-static void co_test_cb(void *opaque)
+static void coroutine_fn co_test_cb(void *opaque)
 {
 WorkerTestData *data = opaque;
 
-- 
2.39.2

[PATCH 8/9] qcow2: mark various functions as coroutine_fn and GRAPH_RDLOCK

2023-03-09 Thread Paolo Bonzini

Functions that can do I/O (including calling bdrv_is_allocated
and bdrv_block_status functions) are prime candidates for being
coroutine_fns.  Make the change for those that are themselves called
only from coroutine_fns.  Also annotate that they are called with the
graph rdlock taken, thus allowing them to call bdrv_co_*() functions
for I/O.

Signed-off-by: Paolo Bonzini 
---
 block/qcow2-bitmap.c   |  2 +-
 block/qcow2-cluster.c  | 20 
 block/qcow2-refcount.c |  8 
 block/qcow2-snapshot.c | 25 +
 block/qcow2.c  | 26 +-
 block/qcow2.h  | 15 ---
 6 files changed, 51 insertions(+), 45 deletions(-)

diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 5f456a2785c6..a952fd58d85e 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -1221,7 +1221,7 @@ out:
 }
 
 /* Checks to see if it's safe to resize bitmaps */
-int qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error **errp)
+int coroutine_fn qcow2_truncate_bitmaps_check(BlockDriverState *bs, Error 
**errp)
 {
 BDRVQcow2State *s = bs->opaque;
 Qcow2BitmapList *bm_list;
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index a9e6622fe300..92526dea504c 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1126,7 +1126,7 @@ err:
  * Frees the allocated clusters because the request failed and they won't
  * actually be linked.
  */
-void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m)
+void coroutine_fn qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta 
*m)
 {
 BDRVQcow2State *s = bs->opaque;
 if (!has_data_file(bs) && !m->keep_old_clusters) {
@@ -1156,9 +1156,11 @@ void qcow2_alloc_cluster_abort(BlockDriverState *bs, 
QCowL2Meta *m)
  *
  * Returns 0 on success, -errno on failure.
  */
-static int calculate_l2_meta(BlockDriverState *bs, uint64_t 
host_cluster_offset,
- uint64_t guest_offset, unsigned bytes,
- uint64_t *l2_slice, QCowL2Meta **m, bool keep_old)
+static int coroutine_fn calculate_l2_meta(BlockDriverState *bs,
+ uint64_t host_cluster_offset,
+  uint64_t guest_offset, unsigned 
bytes,
+  uint64_t *l2_slice, QCowL2Meta **m,
+ bool keep_old)
 {
 BDRVQcow2State *s = bs->opaque;
 int sc_index, l2_index = offset_to_l2_slice_index(s, guest_offset);
@@ -1599,8 +1601,10 @@ out:
  * function has been waiting for another request and the allocation must be
  * restarted, but the whole request should not be failed.
  */
-static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
-   uint64_t *host_offset, uint64_t 
*nb_clusters)
+static int coroutine_fn do_alloc_cluster_offset(BlockDriverState *bs,
+   uint64_t guest_offset,
+uint64_t *host_offset,
+   uint64_t *nb_clusters)
 {
 BDRVQcow2State *s = bs->opaque;
 
@@ -2065,8 +2069,8 @@ static int zero_in_l2_slice(BlockDriverState *bs, 
uint64_t offset,
 return nb_clusters;
 }
 
-static int zero_l2_subclusters(BlockDriverState *bs, uint64_t offset,
-   unsigned nb_subclusters)
+static int coroutine_fn zero_l2_subclusters(BlockDriverState *bs, uint64_t 
offset,
+unsigned nb_subclusters)
 {
 BDRVQcow2State *s = bs->opaque;
 uint64_t *l2_slice;
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index b092f89da98b..b2a81ff707ab 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1030,8 +1030,8 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, 
uint64_t size)
 return offset;
 }
 
-int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
-int64_t nb_clusters)
+int64_t coroutine_fn qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t 
offset,
+ int64_t nb_clusters)
 {
 BDRVQcow2State *s = bs->opaque;
 uint64_t cluster_index, refcount;
@@ -1069,7 +1069,7 @@ int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, 
uint64_t offset,
 
 /* only used to allocate compressed sectors. We try to allocate
contiguous sectors. size must be <= cluster_size */
-int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
+int64_t coroutine_fn qcow2_alloc_bytes(BlockDriverState *bs, int size)
 {
 BDRVQcow2State *s = bs->opaque;
 int64_t offset;
@@ -3685,7 +3685,7 @@ out:
 return ret;
 }
 
-int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
+int64_t coroutine_fn qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
 {
 BDRVQcow2State *s = bs->opaque;
 int64_t i;
diff --git

[PATCH 4/9] nbd: mark more coroutine_fns, do not use co_wrappers

2023-03-09 Thread Paolo Bonzini

Signed-off-by: Paolo Bonzini 
---
 nbd/server.c | 48 
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index a4750e41880a..6f5fcade2a54 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1409,8 +1409,8 @@ nbd_read_eof(NBDClient *client, void *buffer, size_t 
size, Error **errp)
 return 1;
 }
 
-static int nbd_receive_request(NBDClient *client, NBDRequest *request,
-   Error **errp)
+static int coroutine_fn nbd_receive_request(NBDClient *client, NBDRequest 
*request,
+Error **errp)
 {
 uint8_t buf[NBD_REQUEST_SIZE];
 uint32_t magic;
@@ -1895,12 +1895,12 @@ static inline void set_be_simple_reply(NBDSimpleReply 
*reply, uint64_t error,
 stq_be_p(>handle, handle);
 }
 
-static int nbd_co_send_simple_reply(NBDClient *client,
-uint64_t handle,
-uint32_t error,
-void *data,
-size_t len,
-Error **errp)
+static int coroutine_fn nbd_co_send_simple_reply(NBDClient *client,
+ uint64_t handle,
+ uint32_t error,
+ void *data,
+ size_t len,
+ Error **errp)
 {
 NBDSimpleReply reply;
 int nbd_err = system_errno_to_nbd_errno(error);
@@ -2038,8 +2038,8 @@ static int coroutine_fn nbd_co_send_sparse_read(NBDClient 
*client,
 stl_be_p(, pnum);
 ret = nbd_co_send_iov(client, iov, 1, errp);
 } else {
-ret = blk_pread(exp->common.blk, offset + progress, pnum,
-data + progress, 0);
+ret = blk_co_pread(exp->common.blk, offset + progress, pnum,
+   data + progress, 0);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "reading from file failed");
 break;
@@ -2198,9 +2198,9 @@ static int coroutine_fn 
blockalloc_to_extents(BlockBackend *blk,
  * @ea is converted to BE by the function
  * @last controls whether NBD_REPLY_FLAG_DONE is sent.
  */
-static int nbd_co_send_extents(NBDClient *client, uint64_t handle,
-   NBDExtentArray *ea,
-   bool last, uint32_t context_id, Error **errp)
+static int coroutine_fn nbd_co_send_extents(NBDClient *client, uint64_t handle,
+NBDExtentArray *ea,
+   bool  last, uint32_t context_id, 
Error **errp)
 {
 NBDStructuredMeta chunk;
 struct iovec iov[] = {
@@ -2277,10 +2277,10 @@ static void bitmap_to_extents(BdrvDirtyBitmap *bitmap,
 bdrv_dirty_bitmap_unlock(bitmap);
 }
 
-static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle,
-  BdrvDirtyBitmap *bitmap, uint64_t offset,
-  uint32_t length, bool dont_fragment, bool last,
-  uint32_t context_id, Error **errp)
+static int coroutine_fn nbd_co_send_bitmap(NBDClient *client, uint64_t handle,
+   BdrvDirtyBitmap *bitmap, uint64_t 
offset,
+   uint32_t length, bool 
dont_fragment, bool last,
+   uint32_t context_id, Error **errp)
 {
 unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
 g_autoptr(NBDExtentArray) ea = nbd_extent_array_new(nb_extents);
@@ -2297,8 +2297,8 @@ static int nbd_co_send_bitmap(NBDClient *client, uint64_t 
handle,
  * to the client (although the caller may still need to disconnect after
  * reporting the error).
  */
-static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
-  Error **errp)
+static int coroutine_fn nbd_co_receive_request(NBDRequestData *req, NBDRequest 
*request,
+   Error **errp)
 {
 NBDClient *client = req->client;
 int valid_flags;
@@ -2446,7 +2446,7 @@ static coroutine_fn int nbd_do_cmd_read(NBDClient 
*client, NBDRequest *request,
data, request->len, errp);
 }
 
-ret = blk_pread(exp->common.blk, request->from, request->len, data, 0);
+ret = blk_co_pread(exp->common.blk, request->from, request->len, data, 0);
 if (ret < 0) {
 return nbd_send_generic_reply(client, request->handle, ret,
   "reading from file failed", errp);
@@ -2513,8 +2513,8 @@ static coroutine_fn int nbd_handle_request(NBDClient 
*client,
 if (request->flags & NBD_CMD_FLAG_FUA) {
 flags |=

[PATCH 9/9] vmdk: make vmdk_is_cid_valid a coroutine_fn

2023-03-09 Thread Paolo Bonzini

Functions that can do I/O are prime candidates for being coroutine_fns.  Make 
the
change for the one that is itself called only from coroutine_fns.  Unfortunately
vmdk does not use a coroutine_fn for the bulk of the open (like qcow2 does) so
vmdk_read_cid cannot have the same treatment.

Signed-off-by: Paolo Bonzini 
---
 block/vmdk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index f5f49018fe4a..3f8c731e32e8 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -376,7 +376,7 @@ out:
 return ret;
 }
 
-static int vmdk_is_cid_valid(BlockDriverState *bs)
+static int coroutine_fn vmdk_is_cid_valid(BlockDriverState *bs)
 {
 BDRVVmdkState *s = bs->opaque;
 uint32_t cur_pcid;
-- 
2.39.2

[PATCH 0/9] (mostly) block: add more coroutine_fn annotations, use bdrv/blk_co_*

2023-03-09 Thread Paolo Bonzini

"Mostly" because there is a 9pfs patch in here too.

The series was developed with the help of vrc and the clang TSA annotations.

Paolo

Paolo Bonzini (9):
  vvfat: mark various functions as coroutine_fn
  blkdebug: add missing coroutine_fn annotation
  mirror: make mirror_flush a coroutine_fn
  nbd: mark more coroutine_fns
  9pfs: mark more coroutine_fns
  qemu-pr-helper: mark more coroutine_fns
  tests: mark more coroutine_fns
  qcow2: mark various functions as coroutine_fn and GRAPH_RDLOCK
  vmdk: make vmdk_is_cid_valid a coroutine_fn

 block/blkdebug.c  |  4 +--
 block/mirror.c|  4 +--
 block/qcow2-bitmap.c  |  2 +-
 block/qcow2-cluster.c | 20 +++-
 block/qcow2-refcount.c|  8 ++---
 block/qcow2-snapshot.c| 25 +++
 block/qcow2.c | 26 
 block/qcow2.h | 15 -
 block/vmdk.c  |  2 +-
 block/vvfat.c | 58 ++-
 hw/9pfs/9p.h  |  4 +--
 hw/9pfs/codir.c   |  6 ++--
 nbd/server.c  | 48 ++---
 scsi/qemu-pr-helper.c | 22 ++---
 tests/unit/test-thread-pool.c |  2 +-
 15 files changed, 127 insertions(+), 119 deletions(-)

-- 
2.39.2

[PATCH 1/9] vvfat: mark various functions as coroutine_fn

2023-03-09 Thread Paolo Bonzini

Functions that can do I/O are prime candidates for being coroutine_fns.  Make 
the
change for those that are themselves called only from coroutine_fns.

In addition, coroutine_fns should do I/O using bdrv_co_*() functions, for
which it is required to hold the BlockDriverState graph lock.  So also nnotate
functions on the I/O path with TSA attributes, making it possible to
switch them to use bdrv_co_*() functions.

Signed-off-by: Paolo Bonzini 
---
 block/vvfat.c | 58 ++-
 1 file changed, 30 insertions(+), 28 deletions(-)

diff --git a/block/vvfat.c b/block/vvfat.c
index fd45e86416b2..0ddc91fc096a 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1053,7 +1053,7 @@ static BDRVVVFATState *vvv = NULL;
 #endif
 
 static int enable_write_target(BlockDriverState *bs, Error **errp);
-static int is_consistent(BDRVVVFATState *s);
+static int coroutine_fn is_consistent(BDRVVVFATState *s);
 
 static QemuOptsList runtime_opts = {
 .name = "vvfat",
@@ -1469,8 +1469,8 @@ static void print_mapping(const mapping_t* mapping)
 }
 #endif
 
-static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
-uint8_t *buf, int nb_sectors)
+static int coroutine_fn GRAPH_RDLOCK
+vvfat_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int 
nb_sectors)
 {
 BDRVVVFATState *s = bs->opaque;
 int i;
@@ -1490,8 +1490,8 @@ static int vvfat_read(BlockDriverState *bs, int64_t 
sector_num,
 DLOG(fprintf(stderr, "sectors %" PRId64 "+%" PRId64
  " allocated\n", sector_num,
  n >> BDRV_SECTOR_BITS));
-if (bdrv_pread(s->qcow, sector_num * BDRV_SECTOR_SIZE, n,
-   buf + i * 0x200, 0) < 0) {
+if (bdrv_co_pread(s->qcow, sector_num * BDRV_SECTOR_SIZE, n,
+  buf + i * 0x200, 0) < 0) {
 return -1;
 }
 i += (n >> BDRV_SECTOR_BITS) - 1;
@@ -1532,7 +1532,7 @@ static int vvfat_read(BlockDriverState *bs, int64_t 
sector_num,
 return 0;
 }
 
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
 vvfat_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
 QEMUIOVector *qiov, BdrvRequestFlags flags)
 {
@@ -1796,8 +1796,8 @@ static inline uint32_t modified_fat_get(BDRVVVFATState* s,
 }
 }
 
-static inline bool cluster_was_modified(BDRVVVFATState *s,
-uint32_t cluster_num)
+static inline bool coroutine_fn GRAPH_RDLOCK
+cluster_was_modified(BDRVVVFATState *s, uint32_t cluster_num)
 {
 int was_modified = 0;
 int i;
@@ -1852,8 +1852,8 @@ typedef enum {
  * Further, the files/directories handled by this function are
  * assumed to be *not* deleted (and *only* those).
  */
-static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
-direntry_t* direntry, const char* path)
+static uint32_t coroutine_fn GRAPH_RDLOCK
+get_cluster_count_for_direntry(BDRVVVFATState* s, direntry_t* direntry, const 
char* path)
 {
 /*
  * This is a little bit tricky:
@@ -1979,9 +1979,9 @@ static uint32_t 
get_cluster_count_for_direntry(BDRVVVFATState* s,
 if (res) {
 return -1;
 }
-res = bdrv_pwrite(s->qcow, offset * BDRV_SECTOR_SIZE,
-  BDRV_SECTOR_SIZE, s->cluster_buffer,
-  0);
+res = bdrv_co_pwrite(s->qcow, offset * 
BDRV_SECTOR_SIZE,
+ BDRV_SECTOR_SIZE, 
s->cluster_buffer,
+ 0);
 if (res < 0) {
 return -2;
 }
@@ -2011,8 +2011,8 @@ static uint32_t 
get_cluster_count_for_direntry(BDRVVVFATState* s,
  * It returns 0 upon inconsistency or error, and the number of clusters
  * used by the directory, its subdirectories and their files.
  */
-static int check_directory_consistency(BDRVVVFATState *s,
-int cluster_num, const char* path)
+static int coroutine_fn GRAPH_RDLOCK
+check_directory_consistency(BDRVVVFATState *s, int cluster_num, const char* 
path)
 {
 int ret = 0;
 unsigned char* cluster = g_malloc(s->cluster_size);
@@ -2138,7 +2138,8 @@ DLOG(fprintf(stderr, "check direntry %d:\n", i); 
print_direntry(direntries + i))
 }
 
 /* returns 1 on success */
-static int is_consistent(BDRVVVFATState* s)
+static int coroutine_fn GRAPH_RDLOCK
+is_consistent(BDRVVVFATState* s)
 {
 int i, check;
 int used_clusters_count = 0;
@@ -2414,8 +2415,8 @@ static int commit_mappings(BDRVVVFATState* s,
 return 0;
 }
 
-static int commit_direntries(BDRVVVFATState* s,
-int dir_index, int parent_mapping_index)
+static int coroutine_fn GRAPH_RDLOCK
+commit_direntries(BDRVVVFATState* s, int dir_index,

[PATCH 3/9] mirror: make mirror_flush a coroutine_fn, do not use co_wrappers

2023-03-09 Thread Paolo Bonzini

mirror_flush calls a mixed function blk_flush but it is only called
from mirror_run; so call the coroutine version and make mirror_flush
a coroutine_fn too.

Signed-off-by: Paolo Bonzini 
---
 block/mirror.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 663e2b700241..af9bbd23d4cf 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -886,9 +886,9 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
 /* Called when going out of the streaming phase to flush the bulk of the
  * data to the medium, or just before completing.
  */
-static int mirror_flush(MirrorBlockJob *s)
+static int coroutine_fn mirror_flush(MirrorBlockJob *s)
 {
-int ret = blk_flush(s->target);
+int ret = blk_co_flush(s->target);
 if (ret < 0) {
 if (mirror_error_action(s, false, -ret) == BLOCK_ERROR_ACTION_REPORT) {
 s->ret = ret;
-- 
2.39.2

[PATCH 2/9] blkdebug: add missing coroutine_fn annotation

2023-03-09 Thread Paolo Bonzini

Signed-off-by: Paolo Bonzini 
---
 block/blkdebug.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/blkdebug.c b/block/blkdebug.c
index 978c8cff9e33..addad914b3f7 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -583,8 +583,8 @@ out:
 return ret;
 }
 
-static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
-  BlkdebugIOType iotype)
+static int coroutine_fn rule_check(BlockDriverState *bs, uint64_t offset,
+   uint64_t bytes, BlkdebugIOType iotype)
 {
 BDRVBlkdebugState *s = bs->opaque;
 BlkdebugRule *rule = NULL;
-- 
2.39.2

[PATCH 5/9] 9pfs: mark more coroutine_fns

2023-03-09 Thread Paolo Bonzini

Signed-off-by: Paolo Bonzini 
---
 hw/9pfs/9p.h| 4 ++--
 hw/9pfs/codir.c | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
index 2fce4140d1e9..1b0d805b9c12 100644
--- a/hw/9pfs/9p.h
+++ b/hw/9pfs/9p.h
@@ -203,7 +203,7 @@ typedef struct V9fsDir {
 QemuMutex readdir_mutex_L;
 } V9fsDir;
 
-static inline void v9fs_readdir_lock(V9fsDir *dir)
+static inline void coroutine_fn v9fs_readdir_lock(V9fsDir *dir)
 {
 if (dir->proto_version == V9FS_PROTO_2000U) {
 qemu_co_mutex_lock(>readdir_mutex_u);
@@ -212,7 +212,7 @@ static inline void v9fs_readdir_lock(V9fsDir *dir)
 }
 }
 
-static inline void v9fs_readdir_unlock(V9fsDir *dir)
+static inline void coroutine_fn v9fs_readdir_unlock(V9fsDir *dir)
 {
 if (dir->proto_version == V9FS_PROTO_2000U) {
 qemu_co_mutex_unlock(>readdir_mutex_u);
diff --git a/hw/9pfs/codir.c b/hw/9pfs/codir.c
index 7ba63be489e7..0d0ffa1d2ba8 100644
--- a/hw/9pfs/codir.c
+++ b/hw/9pfs/codir.c
@@ -68,9 +68,9 @@ int coroutine_fn v9fs_co_readdir(V9fsPDU *pdu, V9fsFidState 
*fidp,
  *
  * See v9fs_co_readdir_many() (as its only user) below for details.
  */
-static int do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
-   struct V9fsDirEnt **entries, off_t offset,
-   int32_t maxsize, bool dostat)
+static int coroutine_fn do_readdir_many(V9fsPDU *pdu, V9fsFidState *fidp,
+struct V9fsDirEnt **entries, off_t 
offset,
+int32_t maxsize, bool dostat)
 {
 V9fsState *s = pdu->s;
 V9fsString name;
-- 
2.39.2

46 matches

Mail list logo