Re: [PULL 00/51] Block layer patches

2022-12-14 Thread Peter Maydell
On Wed, 14 Dec 2022 at 13:45, Kevin Wolf  wrote:
>
> The following changes since commit 5204b499a6cae4dfd9fe762d5e6e82224892383b:
>
>   mailmap: Fix Stefan Weil author email (2022-12-13 15:56:57 -0500)
>
> are available in the Git repository at:
>
>   https://repo.or.cz/qemu/kevin.git tags/for-upstream
>
> for you to fetch changes up to 2ad19e5dc950d4b340894846b9e71c0b20f9a1cc:
>
>   block: GRAPH_RDLOCK for functions only called by co_wrappers (2022-12-14 
> 13:13:07 +0100)
>
> 
> Block layer patches
>
> - Code cleanups around block graph modification
> - Simplify drain
> - coroutine_fn correctness fixes, including splitting generated
>   coroutine wrappers into co_wrapper (to be called only from
>   non-coroutine context) and co_wrapper_mixed (both coroutine and
>   non-coroutine context)
> - Introduce a block graph rwlock
>
> 

Fails to build on the tsan-build job:
https://gitlab.com/qemu-project/qemu/-/jobs/3476176683

In file included from ../hw/nvram/fw_cfg-interface.c:10:
In file included from /builds/qemu-project/qemu/include/hw/nvram/fw_cfg.h:7:
In file included from /builds/qemu-project/qemu/include/sysemu/dma.h:15:
In file included from /builds/qemu-project/qemu/include/block/block.h:27:
In file included from
/builds/qemu-project/qemu/include/block/block-global-state.h:27:
In file included from /builds/qemu-project/qemu/include/block/block-common.h:27:
In file included from /builds/qemu-project/qemu/include/block/aio.h:25:
/builds/qemu-project/qemu/include/block/graph-lock.h:62:31: error:
invalid capability name 'graph-lock'; capability name must be 'mutex'
or 'role' [-Werror,-Wthread-safety-attributes]
typedef struct TSA_CAPABILITY("graph-lock") BdrvGraphLock {
   ^

(I see the same error on my x86 macos system.)

thanks
-- PMM



[PULL v2 10/30] qapi block: Elide redundant has_FOO in generated C

2022-12-14 Thread Markus Armbruster
The has_FOO for pointer-valued FOO are redundant, except for arrays.
They are also a nuisance to work with.  Recent commit "qapi: Start to
elide redundant has_FOO in generated C" provided the means to elide
them step by step.  This is the step for qapi/block*.json.

Said commit explains the transformation in more detail.

There is one instance of the invariant violation mentioned there:
qcow2_signal_corruption() passes false, "" when node_name is an empty
string.  Take care to pass NULL then.

The previous two commits cleaned up two more.

Additionally, helper bdrv_latency_histogram_stats() loses its output
parameters and returns a value instead.

Cc: Kevin Wolf 
Cc: Hanna Reitz 
Cc: qemu-block@nongnu.org
Signed-off-by: Markus Armbruster 
Message-Id: <20221104160712.3005652-11-arm...@redhat.com>
[Fixes for #ifndef LIBRBD_SUPPORTS_ENCRYPTION and MacOS squashed in]
---
 block/block-backend.c  |   2 +-
 block/copy-before-write.c  |   2 +-
 block/dirty-bitmap.c   |   1 -
 block/export/export.c  |   2 +-
 block/export/vduse-blk.c   |   3 +-
 block/gluster.c|   3 -
 block/monitor/block-hmp-cmds.c |  48 --
 block/qapi-sysemu.c|  73 +--
 block/qapi.c   |  62 +
 block/qcow.c   |  10 +-
 block/qcow2.c  |  18 ++--
 block/qed.c|   2 +-
 block/quorum.c |   2 +-
 block/rbd.c|  17 ++--
 block/ssh.c|   2 +-
 blockdev-nbd.c |   9 +-
 blockdev.c | 164 +
 blockjob.c |   2 -
 monitor/hmp-cmds.c |   3 +-
 qemu-img.c |  13 ++-
 qemu-nbd.c |   2 -
 scripts/qapi/schema.py |   3 -
 ui/cocoa.m |  11 +--
 23 files changed, 173 insertions(+), 281 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index d98a96ff37..bf0ea3cfed 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1860,7 +1860,7 @@ static void send_qmp_error_event(BlockBackend *blk,
 BlockDriverState *bs = blk_bs(blk);
 
 optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
-qapi_event_send_block_io_error(blk_name(blk), !!bs,
+qapi_event_send_block_io_error(blk_name(blk),
bs ? bdrv_get_node_name(bs) : NULL, optype,
action, blk_iostatus_is_enabled(blk),
error == ENOSPC, strerror(error));
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index 4abaa7339e..b28ae25ec1 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -432,7 +432,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, 
int flags,
 return -EINVAL;
 }
 
-if (opts->has_bitmap) {
+if (opts->bitmap) {
 bitmap = block_dirty_bitmap_lookup(opts->bitmap->node,
opts->bitmap->name, NULL, errp);
 if (!bitmap) {
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index bf3dc0512a..9c39550698 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -541,7 +541,6 @@ BlockDirtyInfoList 
*bdrv_query_dirty_bitmaps(BlockDriverState *bs)
 
 info->count = bdrv_get_dirty_count(bm);
 info->granularity = bdrv_dirty_bitmap_granularity(bm);
-info->has_name = !!bm->name;
 info->name = g_strdup(bm->name);
 info->recording = bdrv_dirty_bitmap_recording(bm);
 info->busy = bdrv_dirty_bitmap_busy(bm);
diff --git a/block/export/export.c b/block/export/export.c
index 7cc0c25c1c..28a91c9c42 100644
--- a/block/export/export.c
+++ b/block/export/export.c
@@ -114,7 +114,7 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error 
**errp)
 ctx = bdrv_get_aio_context(bs);
 aio_context_acquire(ctx);
 
-if (export->has_iothread) {
+if (export->iothread) {
 IOThread *iothread;
 AioContext *new_ctx;
 Error **set_context_errp;
diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c
index f101c24c3f..350d6fdaf0 100644
--- a/block/export/vduse-blk.c
+++ b/block/export/vduse-blk.c
@@ -265,8 +265,7 @@ static int vduse_blk_exp_create(BlockExport *exp, 
BlockExportOptions *opts,
 }
 vblk_exp->num_queues = num_queues;
 vblk_exp->handler.blk = exp->blk;
-vblk_exp->handler.serial = g_strdup(vblk_opts->has_serial ?
-vblk_opts->serial : "");
+vblk_exp->handler.serial = g_strdup(vblk_opts->serial ?: "");
 vblk_exp->handler.logical_block_size = logical_block_size;
 vblk_exp->handler.writable = opts->writable;
 
diff --git a/block/gluster.c b/block/gluster.c
index 7c90f7ba4b..7efc296399 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -830,7 +830,6 @@ static int qemu_gluster_open(BlockDriverState *bs,  QDict 

Re: [PATCH v4 2/4] hw/nvme: rename shadow doorbell related trace events

2022-12-14 Thread Keith Busch
On Mon, Dec 12, 2022 at 12:44:07PM +0100, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Rename the trace events related to writing the event index and reading
> the doorbell value to make it more clear that the event is associated
> with an actual update (write or read respectively).
> 
> Reviewed-by: Philippe Mathieu-Daudé 
> Signed-off-by: Klaus Jensen 

Looks good.

Reviewed-by: Keith Busch 



Re: [PATCH v4 3/4] hw/nvme: fix missing endian conversions for doorbell buffers

2022-12-14 Thread Keith Busch
On Mon, Dec 12, 2022 at 12:44:08PM +0100, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> The eventidx and doorbell value are not handling endianness correctly.
> Fix this.
> 
> Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support")
> Cc: qemu-sta...@nongnu.org
> Reported-by: Guenter Roeck 
> Signed-off-by: Klaus Jensen 

Looks good.

Reviewed-by: Keith Busch 



Re: [PATCH v4 4/4] hw/nvme: fix missing cq eventidx update

2022-12-14 Thread Keith Busch
On Mon, Dec 12, 2022 at 12:44:09PM +0100, Klaus Jensen wrote:
> From: Klaus Jensen 
> 
> Prior to reading the shadow doorbell cq head, we have to update the
> eventidx. Otherwise, we risk that the driver will skip an mmio doorbell
> write. This happens on riscv64, as reported by Guenter.
> 
> Adding the missing update to the cq eventidx fixes the issue.
> 
> Fixes: 3f7fe8de3d49 ("hw/nvme: Implement shadow doorbell buffer support")
> Cc: qemu-sta...@nongnu.org
> Cc: qemu-ri...@nongnu.org
> Reported-by: Guenter Roeck 
> Signed-off-by: Klaus Jensen 


Looks good.

Reviewed-by: Keith Busch 



[PULL 13/14] block/vmdk: Simplify vmdk_co_create() to return directly

2022-12-14 Thread Markus Armbruster
Cc: Fam Zheng 
Cc: Kevin Wolf 
Cc: Hanna Reitz 
Cc: qemu-block@nongnu.org
Signed-off-by: Markus Armbruster 
Message-Id: <20221122134917.1217307-3-arm...@redhat.com>
Reviewed-by: Peter Maydell 
Reviewed-by: Philippe Mathieu-Daudé 
---
 block/vmdk.c | 28 +++-
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 26376352b9..bac3d8db50 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -2821,7 +2821,6 @@ static BlockBackend *vmdk_co_create_cb(int64_t size, int 
idx,
 static int coroutine_fn vmdk_co_create(BlockdevCreateOptions *create_options,
Error **errp)
 {
-int ret;
 BlockdevCreateOptionsVmdk *opts;
 
 opts = &create_options->u.vmdk;
@@ -2829,24 +2828,19 @@ static int coroutine_fn 
vmdk_co_create(BlockdevCreateOptions *create_options,
 /* Validate options */
 if (!QEMU_IS_ALIGNED(opts->size, BDRV_SECTOR_SIZE)) {
 error_setg(errp, "Image size must be a multiple of 512 bytes");
-ret = -EINVAL;
-goto out;
+return -EINVAL;
 }
 
-ret = vmdk_co_do_create(opts->size,
-opts->subformat,
-opts->adapter_type,
-opts->backing_file,
-opts->hwversion,
-opts->toolsversion,
-false,
-opts->zeroed_grain,
-vmdk_co_create_cb,
-opts, errp);
-return ret;
-
-out:
-return ret;
+return vmdk_co_do_create(opts->size,
+ opts->subformat,
+ opts->adapter_type,
+ opts->backing_file,
+ opts->hwversion,
+ opts->toolsversion,
+ false,
+ opts->zeroed_grain,
+ vmdk_co_create_cb,
+ opts, errp);
 }
 
 static void vmdk_close(BlockDriverState *bs)
-- 
2.37.3




[PULL 42/51] test-bdrv-drain: Fix incorrrect drain assumptions

2022-12-14 Thread Kevin Wolf
The test case assumes that a drain only happens in one specific place
where it drains explicitly. This assumption happened to hold true until
now, but block layer functions may drain interally (any graph
modifications are going to do that through bdrv_graph_wrlock()), so this
is incorrect. Make sure that the test code in .drained_begin only runs
where we actually want it to run.

When scheduling a BH from .drained_begin, we also need to increase the
in_flight counter to make sure that the operation is actually completed
in time before the node that it works on goes away.

Signed-off-by: Kevin Wolf 
Message-Id: <20221207131838.239125-10-kw...@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito 
Signed-off-by: Kevin Wolf 
---
 tests/unit/test-bdrv-drain.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 2686a8acee..8cedea4959 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -1107,6 +1107,7 @@ struct detach_by_parent_data {
 BlockDriverState *c;
 BdrvChild *child_c;
 bool by_parent_cb;
+bool detach_on_drain;
 };
 static struct detach_by_parent_data detach_by_parent_data;
 
@@ -1114,6 +1115,7 @@ static void detach_indirect_bh(void *opaque)
 {
 struct detach_by_parent_data *data = opaque;
 
+bdrv_dec_in_flight(data->child_b->bs);
 bdrv_unref_child(data->parent_b, data->child_b);
 
 bdrv_ref(data->c);
@@ -1128,12 +1130,21 @@ static void detach_by_parent_aio_cb(void *opaque, int 
ret)
 
 g_assert_cmpint(ret, ==, 0);
 if (data->by_parent_cb) {
+bdrv_inc_in_flight(data->child_b->bs);
 detach_indirect_bh(data);
 }
 }
 
 static void detach_by_driver_cb_drained_begin(BdrvChild *child)
 {
+struct detach_by_parent_data *data = &detach_by_parent_data;
+
+if (!data->detach_on_drain) {
+return;
+}
+data->detach_on_drain = false;
+
+bdrv_inc_in_flight(data->child_b->bs);
 aio_bh_schedule_oneshot(qemu_get_current_aio_context(),
 detach_indirect_bh, &detach_by_parent_data);
 child_of_bds.drained_begin(child);
@@ -1174,8 +1185,14 @@ static void test_detach_indirect(bool by_parent_cb)
 detach_by_driver_cb_class = child_of_bds;
 detach_by_driver_cb_class.drained_begin =
 detach_by_driver_cb_drained_begin;
+detach_by_driver_cb_class.drained_end = NULL;
+detach_by_driver_cb_class.drained_poll = NULL;
 }
 
+detach_by_parent_data = (struct detach_by_parent_data) {
+.detach_on_drain = false,
+};
+
 /* Create all involved nodes */
 parent_a = bdrv_new_open_driver(&bdrv_test, "parent-a", BDRV_O_RDWR,
 &error_abort);
@@ -1227,6 +1244,7 @@ static void test_detach_indirect(bool by_parent_cb)
 .child_b = child_b,
 .c = c,
 .by_parent_cb = by_parent_cb,
+.detach_on_drain = true,
 };
 acb = blk_aio_preadv(blk, 0, &qiov, 0, detach_by_parent_aio_cb, NULL);
 g_assert(acb != NULL);
-- 
2.38.1




[PULL 40/51] clang-tsa: Add macros for shared locks

2022-12-14 Thread Kevin Wolf
Signed-off-by: Kevin Wolf 
Message-Id: <20221207131838.239125-8-kw...@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito 
Signed-off-by: Kevin Wolf 
---
 include/qemu/clang-tsa.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/include/qemu/clang-tsa.h b/include/qemu/clang-tsa.h
index 211ee0ae73..ba06fb8c92 100644
--- a/include/qemu/clang-tsa.h
+++ b/include/qemu/clang-tsa.h
@@ -62,6 +62,7 @@
  * void Foo(void) TSA_REQUIRES(mutex);
  */
 #define TSA_REQUIRES(...) TSA(requires_capability(__VA_ARGS__))
+#define TSA_REQUIRES_SHARED(...) TSA(requires_shared_capability(__VA_ARGS__))
 
 /* TSA_EXCLUDES() is used to annotate functions: the caller of the
  * function MUST NOT hold resource, the function first acquires the
@@ -82,6 +83,7 @@
  * void Foo(void) TSA_ACQUIRE(mutex);
  */
 #define TSA_ACQUIRE(...) TSA(acquire_capability(__VA_ARGS__))
+#define TSA_ACQUIRE_SHARED(...) TSA(acquire_shared_capability(__VA_ARGS__))
 
 /* TSA_RELEASE() is used to annotate functions: the caller of the
  * function MUST hold the resource, but the function will then release it.
@@ -91,6 +93,7 @@
  * void Foo(void) TSA_RELEASE(mutex);
  */
 #define TSA_RELEASE(...) TSA(release_capability(__VA_ARGS__))
+#define TSA_RELEASE_SHARED(...) TSA(release_shared_capability(__VA_ARGS__))
 
 /* TSA_NO_TSA is used to annotate functions.  Use only when you need to.
  *
@@ -106,5 +109,6 @@
  * More than one mutex may be specified, comma-separated.
  */
 #define TSA_ASSERT(...) TSA(assert_capability(__VA_ARGS__))
+#define TSA_ASSERT_SHARED(...) TSA(assert_shared_capability(__VA_ARGS__))
 
 #endif /* #ifndef CLANG_TSA_H */
-- 
2.38.1




[PULL 30/51] block-coroutine-wrapper.py: support functions without bs arg

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

Right now, we take the first parameter of the function to get the
BlockDriverState to pass to bdrv_poll_co(), that internally calls
functions that figure in which aiocontext the coroutine should run.

However, it is useless to pass a bs just to get its own AioContext,
so instead pass it directly, and default to the main loop if no
BlockDriverState is passed as parameter.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20221128142337.657646-12-eespo...@redhat.com>
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block/block-gen.h  |  6 +++---
 scripts/block-coroutine-wrapper.py | 16 
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/block/block-gen.h b/block/block-gen.h
index f80cf4897d..08d977f493 100644
--- a/block/block-gen.h
+++ b/block/block-gen.h
@@ -30,7 +30,7 @@
 
 /* Base structure for argument packing structures */
 typedef struct BdrvPollCo {
-BlockDriverState *bs;
+AioContext *ctx;
 bool in_progress;
 int ret;
 Coroutine *co; /* Keep pointer here for debugging */
@@ -40,8 +40,8 @@ static inline int bdrv_poll_co(BdrvPollCo *s)
 {
 assert(!qemu_in_coroutine());
 
-bdrv_coroutine_enter(s->bs, s->co);
-BDRV_POLL_WHILE(s->bs, s->in_progress);
+aio_co_enter(s->ctx, s->co);
+AIO_WAIT_WHILE(s->ctx, s->in_progress);
 
 return s->ret;
 }
diff --git a/scripts/block-coroutine-wrapper.py 
b/scripts/block-coroutine-wrapper.py
index 2090c3bf73..f540003af1 100644
--- a/scripts/block-coroutine-wrapper.py
+++ b/scripts/block-coroutine-wrapper.py
@@ -75,12 +75,14 @@ def __init__(self, return_type: str, name: str, args: str,
 
 t = self.args[0].type
 if t == 'BlockDriverState *':
-bs = 'bs'
+ctx = 'bdrv_get_aio_context(bs)'
 elif t == 'BdrvChild *':
-bs = 'child->bs'
+ctx = 'bdrv_get_aio_context(child->bs)'
+elif t == 'BlockBackend *':
+ctx = 'blk_get_aio_context(blk)'
 else:
-bs = 'blk_bs(blk)'
-self.bs = bs
+ctx = 'qemu_get_aio_context()'
+self.ctx = ctx
 
 def gen_list(self, format: str) -> str:
 return ', '.join(format.format_map(arg.__dict__) for arg in self.args)
@@ -127,7 +129,7 @@ def create_mixed_wrapper(func: FuncDecl) -> str:
 return {name}({ func.gen_list('{name}') });
 }} else {{
 {struct_name} s = {{
-.poll_state.bs = {func.bs},
+.poll_state.ctx = {func.ctx},
 .poll_state.in_progress = true,
 
 { func.gen_block('.{name} = {name},') }
@@ -150,7 +152,7 @@ def create_co_wrapper(func: FuncDecl) -> str:
 int {func.name}({ func.gen_list('{decl}') })
 {{
 {struct_name} s = {{
-.poll_state.bs = {func.bs},
+.poll_state.ctx = {func.ctx},
 .poll_state.in_progress = true,
 
 { func.gen_block('.{name} = {name},') }
@@ -166,8 +168,6 @@ def create_co_wrapper(func: FuncDecl) -> str:
 def gen_wrapper(func: FuncDecl) -> str:
 assert not '_co_' in func.name
 assert func.return_type == 'int'
-assert func.args[0].type in ['BlockDriverState *', 'BdrvChild *',
- 'BlockBackend *']
 
 name = func.co_name
 struct_name = func.struct_name
-- 
2.38.1




[PULL 05/51] qed: Don't yield in bdrv_qed_co_drain_begin()

2022-12-14 Thread Kevin Wolf
We want to change .bdrv_co_drained_begin() back to be a non-coroutine
callback, so in preparation, avoid yielding in its implementation.

Because we increase bs->in_flight and bdrv_drained_begin() polls, the
behaviour is unchanged.

Signed-off-by: Kevin Wolf 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Emanuele Giuseppe Esposito 
Reviewed-by: Hanna Reitz 
Message-Id: <20221118174110.55183-2-kw...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 block/qed.c | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/block/qed.c b/block/qed.c
index 2f36ad342c..013f826c44 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -282,9 +282,8 @@ static void coroutine_fn 
qed_unplug_allocating_write_reqs(BDRVQEDState *s)
 qemu_co_mutex_unlock(&s->table_lock);
 }
 
-static void coroutine_fn qed_need_check_timer_entry(void *opaque)
+static void coroutine_fn qed_need_check_timer(BDRVQEDState *s)
 {
-BDRVQEDState *s = opaque;
 int ret;
 
 trace_qed_need_check_timer_cb(s);
@@ -310,9 +309,20 @@ static void coroutine_fn qed_need_check_timer_entry(void 
*opaque)
 (void) ret;
 }
 
+static void coroutine_fn qed_need_check_timer_entry(void *opaque)
+{
+BDRVQEDState *s = opaque;
+
+qed_need_check_timer(opaque);
+bdrv_dec_in_flight(s->bs);
+}
+
 static void qed_need_check_timer_cb(void *opaque)
 {
+BDRVQEDState *s = opaque;
 Coroutine *co = qemu_coroutine_create(qed_need_check_timer_entry, opaque);
+
+bdrv_inc_in_flight(s->bs);
 qemu_coroutine_enter(co);
 }
 
@@ -363,8 +373,12 @@ static void coroutine_fn 
bdrv_qed_co_drain_begin(BlockDriverState *bs)
  * header is flushed.
  */
 if (s->need_check_timer && timer_pending(s->need_check_timer)) {
+Coroutine *co;
+
 qed_cancel_need_check_timer(s);
-qed_need_check_timer_entry(s);
+co = qemu_coroutine_create(qed_need_check_timer_entry, s);
+bdrv_inc_in_flight(bs);
+aio_co_enter(bdrv_get_aio_context(bs), co);
 }
 }
 
-- 
2.38.1




[PULL 22/51] nbd/server.c: add coroutine_fn annotations

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

These functions end up calling bdrv_*() implemented as generated_co_wrapper
functions.
In addition, they also happen to be always called in coroutine context,
meaning all callers are coroutine_fn.
This means that the g_c_w function will enter the qemu_in_coroutine()
case and eventually suspend (or in other words call qemu_coroutine_yield()).
Therefore we can mark such functions coroutine_fn too.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Kevin Wolf 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20221128142337.657646-4-eespo...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 nbd/server.c | 29 -
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index ada16089f3..4af5c793a7 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -2141,14 +2141,15 @@ static int nbd_extent_array_add(NBDExtentArray *ea,
 return 0;
 }
 
-static int blockstatus_to_extents(BlockDriverState *bs, uint64_t offset,
-  uint64_t bytes, NBDExtentArray *ea)
+static int coroutine_fn blockstatus_to_extents(BlockDriverState *bs,
+   uint64_t offset, uint64_t bytes,
+   NBDExtentArray *ea)
 {
 while (bytes) {
 uint32_t flags;
 int64_t num;
-int ret = bdrv_block_status_above(bs, NULL, offset, bytes, &num,
-  NULL, NULL);
+int ret = bdrv_co_block_status_above(bs, NULL, offset, bytes, &num,
+ NULL, NULL);
 
 if (ret < 0) {
 return ret;
@@ -2168,13 +2169,14 @@ static int blockstatus_to_extents(BlockDriverState *bs, 
uint64_t offset,
 return 0;
 }
 
-static int blockalloc_to_extents(BlockDriverState *bs, uint64_t offset,
- uint64_t bytes, NBDExtentArray *ea)
+static int coroutine_fn blockalloc_to_extents(BlockDriverState *bs,
+  uint64_t offset, uint64_t bytes,
+  NBDExtentArray *ea)
 {
 while (bytes) {
 int64_t num;
-int ret = bdrv_is_allocated_above(bs, NULL, false, offset, bytes,
-  &num);
+int ret = bdrv_co_is_allocated_above(bs, NULL, false, offset, bytes,
+ &num);
 
 if (ret < 0) {
 return ret;
@@ -2220,11 +,12 @@ static int nbd_co_send_extents(NBDClient *client, 
uint64_t handle,
 }
 
 /* Get block status from the exported device and send it to the client */
-static int nbd_co_send_block_status(NBDClient *client, uint64_t handle,
-BlockDriverState *bs, uint64_t offset,
-uint32_t length, bool dont_fragment,
-bool last, uint32_t context_id,
-Error **errp)
+static int
+coroutine_fn nbd_co_send_block_status(NBDClient *client, uint64_t handle,
+  BlockDriverState *bs, uint64_t offset,
+  uint32_t length, bool dont_fragment,
+  bool last, uint32_t context_id,
+  Error **errp)
 {
 int ret;
 unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
-- 
2.38.1




[PULL 45/51] block: remove unnecessary assert_bdrv_graph_writable()

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

We don't protect bdrv->aio_context with the graph rwlock,
so these assertions are not needed

Signed-off-by: Emanuele Giuseppe Esposito 
Signed-off-by: Kevin Wolf 
Message-Id: <20221207131838.239125-13-kw...@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/block.c b/block.c
index df52c6b012..bdffadcdaa 100644
--- a/block.c
+++ b/block.c
@@ -7214,7 +7214,6 @@ static void bdrv_detach_aio_context(BlockDriverState *bs)
 if (bs->quiesce_counter) {
 aio_enable_external(bs->aio_context);
 }
-assert_bdrv_graph_writable(bs);
 bs->aio_context = NULL;
 }
 
@@ -7228,7 +7227,6 @@ static void bdrv_attach_aio_context(BlockDriverState *bs,
 aio_disable_external(new_context);
 }
 
-assert_bdrv_graph_writable(bs);
 bs->aio_context = new_context;
 
 if (bs->drv && bs->drv->bdrv_attach_aio_context) {
@@ -7309,7 +7307,6 @@ static void bdrv_set_aio_context_commit(void *opaque)
 BlockDriverState *bs = (BlockDriverState *) state->bs;
 AioContext *new_context = state->new_ctx;
 AioContext *old_context = bdrv_get_aio_context(bs);
-assert_bdrv_graph_writable(bs);
 
 /*
  * Take the old AioContex when detaching it from bs.
-- 
2.38.1




[PULL 47/51] graph-lock: TSA annotations for lock/unlock functions

2022-12-14 Thread Kevin Wolf
Signed-off-by: Kevin Wolf 
Message-Id: <20221207131838.239125-15-kw...@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito 
Signed-off-by: Kevin Wolf 
---
 include/block/graph-lock.h | 80 +-
 block/graph-lock.c |  3 ++
 2 files changed, 73 insertions(+), 10 deletions(-)

diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h
index 85e8a53b73..50b7e7b1b6 100644
--- a/include/block/graph-lock.h
+++ b/include/block/graph-lock.h
@@ -21,6 +21,7 @@
 #define GRAPH_LOCK_H
 
 #include "qemu/osdep.h"
+#include "qemu/clang-tsa.h"
 
 #include "qemu/coroutine.h"
 
@@ -57,6 +58,35 @@
  */
 typedef struct BdrvGraphRWlock BdrvGraphRWlock;
 
+/* Dummy lock object to use for Thread Safety Analysis (TSA) */
+typedef struct TSA_CAPABILITY("graph-lock") BdrvGraphLock {
+} BdrvGraphLock;
+
+extern BdrvGraphLock graph_lock;
+
+/*
+ * clang doesn't check consistency in locking annotations between forward
+ * declarations and the function definition. Having the annotation on the
+ * definition, but not the declaration in a header file, may give the reader
+ * a false sense of security because the condition actually remains unchecked
+ * for callers in other source files.
+ *
+ * Therefore, as a convention, for public functions, GRAPH_RDLOCK and
+ * GRAPH_WRLOCK annotations should be present only in the header file.
+ */
+#define GRAPH_WRLOCK TSA_REQUIRES(graph_lock)
+#define GRAPH_RDLOCK TSA_REQUIRES_SHARED(graph_lock)
+
+/*
+ * TSA annotations are not part of function types, so checks are defeated when
+ * using a function pointer. As a workaround, annotate function pointers with
+ * this macro that will require that the lock is at least taken while reading
+ * the pointer. In most cases this is equivalent to actually protecting the
+ * function call.
+ */
+#define GRAPH_RDLOCK_PTR TSA_GUARDED_BY(graph_lock)
+#define GRAPH_WRLOCK_PTR TSA_GUARDED_BY(graph_lock)
+
 /*
  * register_aiocontext:
  * Add AioContext @ctx to the list of AioContext.
@@ -85,14 +115,14 @@ void unregister_aiocontext(AioContext *ctx);
  * This function polls. Callers must not hold the lock of any AioContext other
  * than the current one.
  */
-void bdrv_graph_wrlock(void);
+void bdrv_graph_wrlock(void) TSA_ACQUIRE(graph_lock) TSA_NO_TSA;
 
 /*
  * bdrv_graph_wrunlock:
  * Write finished, reset global has_writer to 0 and restart
  * all readers that are waiting.
  */
-void bdrv_graph_wrunlock(void);
+void bdrv_graph_wrunlock(void) TSA_RELEASE(graph_lock) TSA_NO_TSA;
 
 /*
  * bdrv_graph_co_rdlock:
@@ -116,7 +146,8 @@ void bdrv_graph_wrunlock(void);
  * loop) to take it and wait that the coroutine ends, so that
  * we always signal that a reader is running.
  */
-void coroutine_fn bdrv_graph_co_rdlock(void);
+void coroutine_fn TSA_ACQUIRE_SHARED(graph_lock) TSA_NO_TSA
+bdrv_graph_co_rdlock(void);
 
 /*
  * bdrv_graph_rdunlock:
@@ -124,7 +155,8 @@ void coroutine_fn bdrv_graph_co_rdlock(void);
  * If the writer is waiting for reads to finish (has_writer == 1), signal
  * the writer that we are done via aio_wait_kick() to let it continue.
  */
-void coroutine_fn bdrv_graph_co_rdunlock(void);
+void coroutine_fn TSA_RELEASE_SHARED(graph_lock) TSA_NO_TSA
+bdrv_graph_co_rdunlock(void);
 
 /*
  * bdrv_graph_rd{un}lock_main_loop:
@@ -132,8 +164,11 @@ void coroutine_fn bdrv_graph_co_rdunlock(void);
  * in the main loop. It is just asserting that we are not
  * in a coroutine and in GLOBAL_STATE_CODE.
  */
-void bdrv_graph_rdlock_main_loop(void);
-void bdrv_graph_rdunlock_main_loop(void);
+void TSA_ACQUIRE_SHARED(graph_lock) TSA_NO_TSA
+bdrv_graph_rdlock_main_loop(void);
+
+void TSA_RELEASE_SHARED(graph_lock) TSA_NO_TSA
+bdrv_graph_rdunlock_main_loop(void);
 
 /*
  * assert_bdrv_graph_readable:
@@ -150,6 +185,17 @@ void assert_bdrv_graph_readable(void);
  */
 void assert_bdrv_graph_writable(void);
 
+/*
+ * Calling this function tells TSA that we know that the lock is effectively
+ * taken even though we cannot prove it (yet) with GRAPH_RDLOCK. This can be
+ * useful in intermediate stages of a conversion to using the GRAPH_RDLOCK
+ * macro.
+ */
+static inline void TSA_ASSERT_SHARED(graph_lock) TSA_NO_TSA
+assume_graph_lock(void)
+{
+}
+
 typedef struct GraphLockable { } GraphLockable;
 
 /*
@@ -159,13 +205,21 @@ typedef struct GraphLockable { } GraphLockable;
  */
 #define GML_OBJ_() (&(GraphLockable) { })
 
-static inline GraphLockable *graph_lockable_auto_lock(GraphLockable *x)
+/*
+ * This is not marked as TSA_ACQUIRE() because TSA doesn't understand the
+ * cleanup attribute and would therefore complain that the graph is never
+ * unlocked. TSA_ASSERT() makes sure that the following calls know that we
+ * hold the lock while unlocking is left unchecked.
+ */
+static inline GraphLockable * TSA_ASSERT(graph_lock) TSA_NO_TSA
+graph_lockable_auto_lock(GraphLockable *x)
 {
 bdrv_graph_co_rdlock();
 return x;
 }
 
-static inline void graph_lockable_auto_unlock(GraphLockable *x)
+static inline void TSA_NO_T

[PULL 02/51] block: drop bdrv_remove_filter_or_cow_child

2022-12-14 Thread Kevin Wolf
From: Vladimir Sementsov-Ogievskiy 

Drop this simple wrapper used only in one place. We have too many graph
modifying functions even without it.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
Message-Id: <20221107163558.618889-3-vsement...@yandex-team.ru>
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block.c | 15 +--
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/block.c b/block.c
index c0c1b3df91..dc761209ac 100644
--- a/block.c
+++ b/block.c
@@ -93,8 +93,6 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
 static void bdrv_replace_child_noperm(BdrvChild *child,
   BlockDriverState *new_bs);
 static void bdrv_remove_child(BdrvChild *child, Transaction *tran);
-static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
-Transaction *tran);
 
 static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
BlockReopenQueue *queue,
@@ -5065,17 +5063,6 @@ static void bdrv_remove_child(BdrvChild *child, 
Transaction *tran)
 tran_add(tran, &bdrv_remove_child_drv, child);
 }
 
-/*
- * A function to remove backing-chain child of @bs if exists: cow child for
- * format nodes (always .backing) and filter child for filters (may be .file or
- * .backing)
- */
-static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
-Transaction *tran)
-{
-bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran);
-}
-
 static int bdrv_replace_node_noperm(BlockDriverState *from,
 BlockDriverState *to,
 bool auto_skip, Transaction *tran,
@@ -5160,7 +5147,7 @@ static int bdrv_replace_node_common(BlockDriverState 
*from,
 }
 
 if (detach_subchain) {
-bdrv_remove_filter_or_cow_child(to_cow_parent, tran);
+bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran);
 }
 
 found = g_hash_table_new(NULL, NULL);
-- 
2.38.1




[PULL 09/51] block: Inline bdrv_drain_invoke()

2022-12-14 Thread Kevin Wolf
bdrv_drain_invoke() has now two entirely separate cases that share no
code any more and are selected depending on a bool parameter. Each case
has only one caller. Just inline the function.

Signed-off-by: Kevin Wolf 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Emanuele Giuseppe Esposito 
Reviewed-by: Hanna Reitz 
Message-Id: <20221118174110.55183-6-kw...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 block/io.c | 23 ++-
 1 file changed, 6 insertions(+), 17 deletions(-)

diff --git a/block/io.c b/block/io.c
index f4ca62b034..a25103be6f 100644
--- a/block/io.c
+++ b/block/io.c
@@ -242,21 +242,6 @@ typedef struct {
 bool ignore_bds_parents;
 } BdrvCoDrainData;
 
-/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */
-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
-{
-if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) ||
-(!begin && !bs->drv->bdrv_drain_end)) {
-return;
-}
-
-if (begin) {
-bs->drv->bdrv_drain_begin(bs);
-} else {
-bs->drv->bdrv_drain_end(bs);
-}
-}
-
 /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
 bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
  BdrvChild *ignore_parent, bool ignore_bds_parents)
@@ -390,7 +375,9 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
 }
 
 bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
-bdrv_drain_invoke(bs, true);
+if (bs->drv && bs->drv->bdrv_drain_begin) {
+bs->drv->bdrv_drain_begin(bs);
+}
 }
 
 static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
@@ -461,7 +448,9 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool 
recursive,
 assert(bs->quiesce_counter > 0);
 
 /* Re-enable things in child-to-parent order */
-bdrv_drain_invoke(bs, false);
+if (bs->drv && bs->drv->bdrv_drain_end) {
+bs->drv->bdrv_drain_end(bs);
+}
 bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
 
 old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
-- 
2.38.1




[PULL 41/51] configure: Enable -Wthread-safety if present

2022-12-14 Thread Kevin Wolf
This enables clang's thread safety analysis (TSA), which we'll use to
statically check the block graph locking.

Signed-off-by: Kevin Wolf 
Message-Id: <20221207131838.239125-9-kw...@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito 
Signed-off-by: Kevin Wolf 
---
 configure | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configure b/configure
index 26c7bc5154..45d693a241 100755
--- a/configure
+++ b/configure
@@ -1208,6 +1208,7 @@ add_to warn_flags -Wnested-externs
 add_to warn_flags -Wendif-labels
 add_to warn_flags -Wexpansion-to-defined
 add_to warn_flags -Wimplicit-fallthrough=2
+add_to warn_flags -Wthread-safety
 
 nowarn_flags=
 add_to nowarn_flags -Wno-initializer-overrides
-- 
2.38.1




[PULL 23/51] block-backend: replace bdrv_*_above with blk_*_above

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

Avoid mixing bdrv_* functions with blk_*, so create blk_* counterparts
for bdrv_block_status_above and bdrv_is_allocated_above.

Note that since blk_co_block_status_above only calls the g_c_w function
bdrv_common_block_status_above and is marked as coroutine_fn, call
directly bdrv_co_common_block_status_above() to avoid using a g_c_w.
Same applies to blk_co_is_allocated_above.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20221128142337.657646-5-eespo...@redhat.com>
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 include/sysemu/block-backend-io.h |  9 +
 block/block-backend.c | 21 
 block/commit.c|  4 ++--
 nbd/server.c  | 32 +++
 4 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/include/sysemu/block-backend-io.h 
b/include/sysemu/block-backend-io.h
index 50f5aa2e07..ee3eb12610 100644
--- a/include/sysemu/block-backend-io.h
+++ b/include/sysemu/block-backend-io.h
@@ -92,6 +92,15 @@ int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, 
int64_t off_in,
int64_t bytes, BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags);
 
+int coroutine_fn blk_co_block_status_above(BlockBackend *blk,
+   BlockDriverState *base,
+   int64_t offset, int64_t bytes,
+   int64_t *pnum, int64_t *map,
+   BlockDriverState **file);
+int coroutine_fn blk_co_is_allocated_above(BlockBackend *blk,
+   BlockDriverState *base,
+   bool include_base, int64_t offset,
+   int64_t bytes, int64_t *pnum);
 
 /*
  * "I/O or GS" API functions. These functions can run without
diff --git a/block/block-backend.c b/block/block-backend.c
index feaf2181fa..2852a892de 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1424,6 +1424,27 @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, 
int64_t offset,
 return blk_co_pwritev_part(blk, offset, bytes, qiov, 0, flags);
 }
 
+int coroutine_fn blk_co_block_status_above(BlockBackend *blk,
+   BlockDriverState *base,
+   int64_t offset, int64_t bytes,
+   int64_t *pnum, int64_t *map,
+   BlockDriverState **file)
+{
+IO_CODE();
+return bdrv_co_block_status_above(blk_bs(blk), base, offset, bytes, pnum,
+  map, file);
+}
+
+int coroutine_fn blk_co_is_allocated_above(BlockBackend *blk,
+   BlockDriverState *base,
+   bool include_base, int64_t offset,
+   int64_t bytes, int64_t *pnum)
+{
+IO_CODE();
+return bdrv_co_is_allocated_above(blk_bs(blk), base, include_base, offset,
+  bytes, pnum);
+}
+
 typedef struct BlkRwCo {
 BlockBackend *blk;
 int64_t offset;
diff --git a/block/commit.c b/block/commit.c
index 0029b31944..b346341767 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -155,8 +155,8 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
 break;
 }
 /* Copy if allocated above the base */
-ret = bdrv_is_allocated_above(blk_bs(s->top), s->base_overlay, true,
-  offset, COMMIT_BUFFER_SIZE, &n);
+ret = blk_co_is_allocated_above(s->top, s->base_overlay, true,
+offset, COMMIT_BUFFER_SIZE, &n);
 copy = (ret > 0);
 trace_commit_one_iteration(s, offset, n, ret);
 if (copy) {
diff --git a/nbd/server.c b/nbd/server.c
index 4af5c793a7..c53c39560e 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1991,7 +1991,7 @@ static int coroutine_fn 
nbd_co_send_structured_error(NBDClient *client,
 }
 
 /* Do a sparse read and send the structured reply to the client.
- * Returns -errno if sending fails. bdrv_block_status_above() failure is
+ * Returns -errno if sending fails. blk_co_block_status_above() failure is
  * reported to the client, at which point this function succeeds.
  */
 static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
@@ -2007,10 +2007,10 @@ static int coroutine_fn 
nbd_co_send_sparse_read(NBDClient *client,
 
 while (progress < size) {
 int64_t pnum;
-int status = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
- offset + progress,
- size - progress, &pnum, NULL,
-

[PULL 51/51] block: GRAPH_RDLOCK for functions only called by co_wrappers

2022-12-14 Thread Kevin Wolf
The generated coroutine wrappers already take care to take the lock in
the non-coroutine path, and assume that the lock is already taken in the
coroutine path.

The only thing we need to do for the wrapped function is adding the
GRAPH_RDLOCK annotation. Doing so also allows us to mark the
corresponding callbacks in BlockDriver as GRAPH_RDLOCK_PTR.

Signed-off-by: Kevin Wolf 
Message-Id: <20221207131838.239125-19-kw...@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito 
Signed-off-by: Kevin Wolf 
---
 block/coroutines.h   | 17 ++---
 include/block/block_int-common.h | 20 +---
 block.c  |  2 ++
 block/io.c   |  2 ++
 4 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/block/coroutines.h b/block/coroutines.h
index 48e9081aa1..2a1e0b3c9d 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -37,9 +37,11 @@
  * the I/O API.
  */
 
-int coroutine_fn bdrv_co_check(BlockDriverState *bs,
-   BdrvCheckResult *res, BdrvCheckMode fix);
-int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
 
 int coroutine_fn
 bdrv_co_common_block_status_above(BlockDriverState *bs,
@@ -53,10 +55,11 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
   BlockDriverState **file,
   int *depth);
 
-int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs,
-   QEMUIOVector *qiov, int64_t pos);
-int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs,
-QEMUIOVector *qiov, int64_t pos);
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+
+int coroutine_fn GRAPH_RDLOCK
+bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
 
 int coroutine_fn
 nbd_co_do_establish_connection(BlockDriverState *bs, bool blocking,
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index b1f0d88307..c34c525fa6 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -641,8 +641,8 @@ struct BlockDriver {
 /*
  * Invalidate any cached meta-data.
  */
-void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs,
-  Error **errp);
+void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_invalidate_cache)(
+BlockDriverState *bs, Error **errp);
 
 /*
  * Flushes all data for all layers by calling bdrv_co_flush for underlying
@@ -701,12 +701,11 @@ struct BlockDriver {
  Error **errp);
 BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs);
 
-int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs,
-  QEMUIOVector *qiov,
-  int64_t pos);
-int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs,
-  QEMUIOVector *qiov,
-  int64_t pos);
+int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_save_vmstate)(
+BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+
+int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_load_vmstate)(
+BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
 
 /* removable device specific */
 bool (*bdrv_is_inserted)(BlockDriverState *bs);
@@ -724,9 +723,8 @@ struct BlockDriver {
  * Returns 0 for completed check, -errno for internal errors.
  * The check results are stored in result.
  */
-int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs,
-  BdrvCheckResult *result,
-  BdrvCheckMode fix);
+int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_check)(
+BlockDriverState *bs, BdrvCheckResult *result, BdrvCheckMode fix);
 
 void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event);
 
diff --git a/block.c b/block.c
index 1a82fd101a..9c2ac757e4 100644
--- a/block.c
+++ b/block.c
@@ -5402,6 +5402,7 @@ int coroutine_fn bdrv_co_check(BlockDriverState *bs,
BdrvCheckResult *res, BdrvCheckMode fix)
 {
 IO_CODE();
+assert_bdrv_graph_readable();
 if (bs->drv == NULL) {
 return -ENOMEDIUM;
 }
@@ -6617,6 +6618,7 @@ int coroutine_fn 
bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp)
 IO_CODE();
 
 assert(!(bs->open_flags & BDRV_O_INACTIVE));
+assert_bdrv_graph_readable();
 
 if (bs->drv->bdrv_co_invalidate_cache) {
 bs->drv->bdrv_co_invalidate_cache(bs, &local_err);
diff --git a/block/io.c

[PULL 00/51] Block layer patches

2022-12-14 Thread Kevin Wolf
The following changes since commit 5204b499a6cae4dfd9fe762d5e6e82224892383b:

  mailmap: Fix Stefan Weil author email (2022-12-13 15:56:57 -0500)

are available in the Git repository at:

  https://repo.or.cz/qemu/kevin.git tags/for-upstream

for you to fetch changes up to 2ad19e5dc950d4b340894846b9e71c0b20f9a1cc:

  block: GRAPH_RDLOCK for functions only called by co_wrappers (2022-12-14 
13:13:07 +0100)


Block layer patches

- Code cleanups around block graph modification
- Simplify drain
- coroutine_fn correctness fixes, including splitting generated
  coroutine wrappers into co_wrapper (to be called only from
  non-coroutine context) and co_wrapper_mixed (both coroutine and
  non-coroutine context)
- Introduce a block graph rwlock


Emanuele Giuseppe Esposito (21):
  block-io: introduce coroutine_fn duplicates for 
bdrv_common_block_status_above callers
  block-copy: add coroutine_fn annotations
  nbd/server.c: add coroutine_fn annotations
  block-backend: replace bdrv_*_above with blk_*_above
  block/vmdk: add coroutine_fn annotations
  block: avoid duplicating filename string in bdrv_create
  block: distinguish between bdrv_create running in coroutine and not
  block: bdrv_create_file is a coroutine_fn
  block: rename generated_co_wrapper in co_wrapper_mixed
  block-coroutine-wrapper.py: introduce co_wrapper
  block-coroutine-wrapper.py: support functions without bs arg
  block-coroutine-wrapper.py: support also basic return types
  block: convert bdrv_create to co_wrapper
  block/dirty-bitmap: convert coroutine-only functions to co_wrapper
  graph-lock: Implement guard macros
  async: Register/unregister aiocontext in graph lock list
  block: wrlock in bdrv_replace_child_noperm
  block: remove unnecessary assert_bdrv_graph_writable()
  block: assert that graph read and writes are performed correctly
  block-coroutine-wrapper.py: introduce annotations that take the graph 
rdlock
  block: use co_wrapper_mixed_bdrv_rdlock in functions taking the rdlock

Kevin Wolf (25):
  qed: Don't yield in bdrv_qed_co_drain_begin()
  test-bdrv-drain: Don't yield in .bdrv_co_drained_begin/end()
  block: Revert .bdrv_drained_begin/end to non-coroutine_fn
  block: Remove drained_end_counter
  block: Inline bdrv_drain_invoke()
  block: Fix locking for bdrv_reopen_queue_child()
  block: Drain individual nodes during reopen
  block: Don't use subtree drains in bdrv_drop_intermediate()
  stream: Replace subtree drain with a single node drain
  block: Remove subtree drains
  block: Call drain callbacks only once
  block: Remove ignore_bds_parents parameter from drain_begin/end.
  block: Drop out of coroutine in bdrv_do_drained_begin_quiesce()
  block: Don't poll in bdrv_replace_child_noperm()
  block: Remove poll parameter from bdrv_parent_drained_begin_single()
  block: Factor out bdrv_drain_all_begin_nopoll()
  Import clang-tsa.h
  clang-tsa: Add TSA_ASSERT() macro
  clang-tsa: Add macros for shared locks
  configure: Enable -Wthread-safety if present
  test-bdrv-drain: Fix incorrrect drain assumptions
  block: Fix locking in external_snapshot_prepare()
  graph-lock: TSA annotations for lock/unlock functions
  Mark assert_bdrv_graph_readable/writable() GRAPH_RD/WRLOCK
  block: GRAPH_RDLOCK for functions only called by co_wrappers

Paolo Bonzini (1):
  graph-lock: Introduce a lock to protect block graph operations

Vladimir Sementsov-Ogievskiy (4):
  block: Inline bdrv_detach_child()
  block: drop bdrv_remove_filter_or_cow_child
  block: bdrv_refresh_perms(): allow external tran
  block: refactor bdrv_list_refresh_perms to allow any list of nodes

 docs/devel/block-coroutine-wrapper.rst |   6 +-
 configure  |   1 +
 block/block-gen.h  |  11 +-
 block/coroutines.h |  21 +-
 include/block/aio.h|   9 +
 include/block/block-common.h   |  27 ++-
 include/block/block-copy.h |   5 +-
 include/block/block-global-state.h |  15 +-
 include/block/block-io.h   | 136 +--
 include/block/block_int-common.h   |  49 ++--
 include/block/block_int-global-state.h |  17 --
 include/block/block_int-io.h   |  12 -
 include/block/block_int.h  |   1 +
 include/block/dirty-bitmap.h   |  10 +-
 include/block/graph-lock.h | 280 +++
 include/qemu/clang-tsa.h   | 114 ++
 include/sysemu/block-backend-io.h  |  77 ---
 block.c| 404 ++---
 block/block-backend.c  |  25 +-
 block/block-copy.c |  21 +-
 block/commit.c  

[PULL 35/51] graph-lock: Introduce a lock to protect block graph operations

2022-12-14 Thread Kevin Wolf
From: Paolo Bonzini 

Block layer graph operations are always run under BQL in the main loop.
This is proved by the assertion qemu_in_main_thread() and its wrapper
macro GLOBAL_STATE_CODE.

However, there are also concurrent coroutines running in other iothreads
that always try to traverse the graph. Currently this is protected
(among various other things) by the AioContext lock, but once this is
removed, we need to make sure that reads do not happen while modifying
the graph.

We distinguish between writer (main loop, under BQL) that modifies the
graph, and readers (all other coroutines running in various AioContext),
that go through the graph edges, reading ->parents and->children.

The writer (main loop) has "exclusive" access, so it first waits for any
current read to finish, and then prevents incoming ones from entering
while it has the exclusive access.

The readers (coroutines in multiple AioContext) are free to access the
graph as long the writer is not modifying the graph. In case it is, they
go in a CoQueue and sleep until the writer is done.

If a coroutine changes AioContext, the counter in the original and new
AioContext are left intact, since the writer does not care where the
reader is, but only if there is one.

As a result, some AioContexts might have a negative reader count, to
balance the positive count of the AioContext that took the lock.  This
also means that when an AioContext is deleted it may have a nonzero
reader count. In that case we transfer the count to a global shared
counter so that the writer is always aware of all readers.

Signed-off-by: Paolo Bonzini 
Signed-off-by: Emanuele Giuseppe Esposito 
Signed-off-by: Kevin Wolf 
Message-Id: <20221207131838.239125-3-kw...@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 include/block/aio.h|   9 ++
 include/block/block_int.h  |   1 +
 include/block/graph-lock.h | 139 
 block/graph-lock.c | 261 +
 block/meson.build  |   1 +
 5 files changed, 411 insertions(+)
 create mode 100644 include/block/graph-lock.h
 create mode 100644 block/graph-lock.c

diff --git a/include/block/aio.h b/include/block/aio.h
index d128558f1d..0f65a3cc9e 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -22,6 +22,7 @@
 #include "qemu/event_notifier.h"
 #include "qemu/thread.h"
 #include "qemu/timer.h"
+#include "block/graph-lock.h"
 
 typedef struct BlockAIOCB BlockAIOCB;
 typedef void BlockCompletionFunc(void *opaque, int ret);
@@ -127,6 +128,14 @@ struct AioContext {
 /* Used by AioContext users to protect from multi-threaded access.  */
 QemuRecMutex lock;
 
+/*
+ * Keep track of readers and writers of the block layer graph.
+ * This is essential to avoid performing additions and removal
+ * of nodes and edges from block graph while some
+ * other thread is traversing it.
+ */
+BdrvGraphRWlock *bdrv_graph;
+
 /* The list of registered AIO handlers.  Protected by ctx->list_lock. */
 AioHandlerList aio_handlers;
 
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 7d50b6bbd1..b35b0138ed 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -26,6 +26,7 @@
 
 #include "block_int-global-state.h"
 #include "block_int-io.h"
+#include "block/graph-lock.h"
 
 /* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */
 
diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h
new file mode 100644
index 00..82edb62cfa
--- /dev/null
+++ b/include/block/graph-lock.h
@@ -0,0 +1,139 @@
+/*
+ * Graph lock: rwlock to protect block layer graph manipulations (add/remove
+ * edges and nodes)
+ *
+ *  Copyright (c) 2022 Red Hat
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+#ifndef GRAPH_LOCK_H
+#define GRAPH_LOCK_H
+
+#include "qemu/osdep.h"
+
+#include "qemu/coroutine.h"
+
+/**
+ * Graph Lock API
+ * This API provides a rwlock used to protect block layer
+ * graph modifications like edge (BdrvChild) and node (BlockDriverState)
+ * addition and removal.
+ * Currently we have 1 writer only, the Main loop, and many
+ * readers, mostly coroutines running in other AioContext thus other threads.
+ *
+ * We distinguish between writer (main loop, under BQL) that modifies the
+ * graph,

[PULL 26/51] block: distinguish between bdrv_create running in coroutine and not

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

Call two different functions depending on whether bdrv_create
is in coroutine or not, following the same pattern as
generated_co_wrapper functions.

This allows to also call the coroutine function directly,
without using CreateCo or relying in bdrv_create().

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Kevin Wolf 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20221128142337.657646-8-eespo...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 block.c | 69 -
 1 file changed, 34 insertions(+), 35 deletions(-)

diff --git a/block.c b/block.c
index 9d77ec8818..0a1d484a27 100644
--- a/block.c
+++ b/block.c
@@ -526,63 +526,62 @@ typedef struct CreateCo {
 Error *err;
 } CreateCo;
 
-static void coroutine_fn bdrv_create_co_entry(void *opaque)
+static int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename,
+   QemuOpts *opts, Error **errp)
 {
-Error *local_err = NULL;
 int ret;
+GLOBAL_STATE_CODE();
+ERRP_GUARD();
 
+if (!drv->bdrv_co_create_opts) {
+error_setg(errp, "Driver '%s' does not support image creation",
+   drv->format_name);
+return -ENOTSUP;
+}
+
+ret = drv->bdrv_co_create_opts(drv, filename, opts, errp);
+if (ret < 0 && !*errp) {
+error_setg_errno(errp, -ret, "Could not create image");
+}
+
+return ret;
+}
+
+static void coroutine_fn bdrv_create_co_entry(void *opaque)
+{
 CreateCo *cco = opaque;
-assert(cco->drv);
 GLOBAL_STATE_CODE();
 
-ret = cco->drv->bdrv_co_create_opts(cco->drv,
-cco->filename, cco->opts, &local_err);
-error_propagate(&cco->err, local_err);
-cco->ret = ret;
+cco->ret = bdrv_co_create(cco->drv, cco->filename, cco->opts, &cco->err);
+aio_wait_kick();
 }
 
 int bdrv_create(BlockDriver *drv, const char* filename,
 QemuOpts *opts, Error **errp)
 {
-int ret;
-
 GLOBAL_STATE_CODE();
 
-Coroutine *co;
-CreateCo cco = {
-.drv = drv,
-.filename = filename,
-.opts = opts,
-.ret = NOT_DONE,
-.err = NULL,
-};
-
-if (!drv->bdrv_co_create_opts) {
-error_setg(errp, "Driver '%s' does not support image creation", 
drv->format_name);
-return -ENOTSUP;
-}
-
 if (qemu_in_coroutine()) {
 /* Fast-path if already in coroutine context */
-bdrv_create_co_entry(&cco);
+return bdrv_co_create(drv, filename, opts, errp);
 } else {
+Coroutine *co;
+CreateCo cco = {
+.drv = drv,
+.filename = filename,
+.opts = opts,
+.ret = NOT_DONE,
+.err = NULL,
+};
+
 co = qemu_coroutine_create(bdrv_create_co_entry, &cco);
 qemu_coroutine_enter(co);
 while (cco.ret == NOT_DONE) {
 aio_poll(qemu_get_aio_context(), true);
 }
+error_propagate(errp, cco.err);
+return cco.ret;
 }
-
-ret = cco.ret;
-if (ret < 0) {
-if (cco.err) {
-error_propagate(errp, cco.err);
-} else {
-error_setg_errno(errp, -ret, "Could not create image");
-}
-}
-
-return ret;
 }
 
 /**
-- 
2.38.1




[PULL 18/51] block: Don't poll in bdrv_replace_child_noperm()

2022-12-14 Thread Kevin Wolf
In order to make sure that bdrv_replace_child_noperm() doesn't have to
poll any more, get rid of the bdrv_parent_drained_begin_single() call.

This is possible now because we can require that the parent is already
drained through the child in question when the function is called and we
don't call the parent drain callbacks more than once.

The additional drain calls needed in callers cause the test case to run
its code in the drain handler too early (bdrv_attach_child() drains
now), so modify it to only enable the code after the test setup has
completed.

Signed-off-by: Kevin Wolf 
Message-Id: <20221118174110.55183-15-kw...@redhat.com>
Reviewed-by: Hanna Reitz 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Kevin Wolf 
---
 include/block/block-io.h |   8 +++
 block.c  | 103 ++-
 block/io.c   |   2 +-
 tests/unit/test-bdrv-drain.c |  10 
 4 files changed, 108 insertions(+), 15 deletions(-)

diff --git a/include/block/block-io.h b/include/block/block-io.h
index 8f5e75756a..65e6d2569b 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -292,6 +292,14 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector 
*qiov, int64_t pos);
  */
 void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
 
+/**
+ * bdrv_parent_drained_poll_single:
+ *
+ * Returns true if there is any pending activity to cease before @c can be
+ * called quiesced, false otherwise.
+ */
+bool bdrv_parent_drained_poll_single(BdrvChild *c);
+
 /**
  * bdrv_parent_drained_end_single:
  *
diff --git a/block.c b/block.c
index 1a2a8d9de9..faaeca8472 100644
--- a/block.c
+++ b/block.c
@@ -2407,6 +2407,20 @@ static void bdrv_replace_child_abort(void *opaque)
 
 GLOBAL_STATE_CODE();
 /* old_bs reference is transparently moved from @s to @s->child */
+if (!s->child->bs) {
+/*
+ * The parents were undrained when removing old_bs from the child. New
+ * requests can't have been made, though, because the child was empty.
+ *
+ * TODO Make bdrv_replace_child_noperm() transactionable to avoid
+ * undraining the parent in the first place. Once this is done, having
+ * new_bs drained when calling bdrv_replace_child_tran() is not a
+ * requirement any more.
+ */
+bdrv_parent_drained_begin_single(s->child, false);
+assert(!bdrv_parent_drained_poll_single(s->child));
+}
+assert(s->child->quiesced_parent);
 bdrv_replace_child_noperm(s->child, s->old_bs);
 bdrv_unref(new_bs);
 }
@@ -2422,12 +2436,19 @@ static TransactionActionDrv bdrv_replace_child_drv = {
  *
  * Note: real unref of old_bs is done only on commit.
  *
+ * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be
+ * kept drained until the transaction is completed.
+ *
  * The function doesn't update permissions, caller is responsible for this.
  */
 static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
 Transaction *tran)
 {
 BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
+
+assert(child->quiesced_parent);
+assert(!new_bs || new_bs->quiesce_counter);
+
 *s = (BdrvReplaceChildState) {
 .child = child,
 .old_bs = child->bs,
@@ -2850,6 +2871,14 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission 
qapi_perm)
 return permissions[qapi_perm];
 }
 
+/*
+ * Replaces the node that a BdrvChild points to without updating permissions.
+ *
+ * If @new_bs is non-NULL, the parent of @child must already be drained through
+ * @child.
+ *
+ * This function does not poll.
+ */
 static void bdrv_replace_child_noperm(BdrvChild *child,
   BlockDriverState *new_bs)
 {
@@ -2857,6 +2886,28 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
 int new_bs_quiesce_counter;
 
 assert(!child->frozen);
+
+/*
+ * If we want to change the BdrvChild to point to a drained node as its new
+ * child->bs, we need to make sure that its new parent is drained, too. In
+ * other words, either child->quiesce_parent must already be true or we 
must
+ * be able to set it and keep the parent's quiesce_counter consistent with
+ * that, but without polling or starting new requests (this function
+ * guarantees that it doesn't poll, and starting new requests would be
+ * against the invariants of drain sections).
+ *
+ * To keep things simple, we pick the first option (child->quiesce_parent
+ * must already be true). We also generalise the rule a bit to make it
+ * easier to verify in callers and more likely to be covered in test cases:
+ * The parent must be quiesced through this child even if new_bs isn't
+ * currently drained.
+ *
+ * The only exception is for callers that always pass new_bs == NULL. In
+ * this case, we obviously never need to consider the case of a drain

[PULL 08/51] block: Remove drained_end_counter

2022-12-14 Thread Kevin Wolf
drained_end_counter is unused now, nobody changes its value any more. It
can be removed.

In cases where we had two almost identical functions that only differed
in whether the caller passes drained_end_counter, or whether they would
poll for a local drained_end_counter to reach 0, these become a single
function.

Signed-off-by: Kevin Wolf 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Emanuele Giuseppe Esposito 
Message-Id: <20221118174110.55183-5-kw...@redhat.com>
Reviewed-by: Hanna Reitz 
Signed-off-by: Kevin Wolf 
---
 include/block/block-io.h | 24 
 include/block/block_int-common.h |  6 +-
 block.c  |  5 +-
 block/block-backend.c|  4 +-
 block/io.c   | 98 
 blockjob.c   |  2 +-
 6 files changed, 30 insertions(+), 109 deletions(-)

diff --git a/include/block/block-io.h b/include/block/block-io.h
index b099d7db45..054e964c9b 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -237,21 +237,6 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, 
int64_t src_offset,
 int64_t bytes, BdrvRequestFlags read_flags,
 BdrvRequestFlags write_flags);
 
-/**
- * bdrv_drained_end_no_poll:
- *
- * Same as bdrv_drained_end(), but do not poll for the subgraph to
- * actually become unquiesced.  Therefore, no graph changes will occur
- * with this function.
- *
- * *drained_end_counter is incremented for every background operation
- * that is scheduled, and will be decremented for every operation once
- * it settles.  The caller must poll until it reaches 0.  The counter
- * should be accessed using atomic operations only.
- */
-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter);
-
-
 /*
  * "I/O or GS" API functions. These functions can run without
  * the BQL, but only in one specific iothread/main loop.
@@ -311,9 +296,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool 
poll);
  * bdrv_parent_drained_end_single:
  *
  * End a quiesced section for the parent of @c.
- *
- * This polls @bs's AioContext until all scheduled sub-drained_ends
- * have settled, which may result in graph changes.
  */
 void bdrv_parent_drained_end_single(BdrvChild *c);
 
@@ -361,12 +343,6 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs);
  * bdrv_drained_end:
  *
  * End a quiescent section started by bdrv_drained_begin().
- *
- * This polls @bs's AioContext until all scheduled sub-drained_ends
- * have settled.  On one hand, that may result in graph changes.  On
- * the other, this requires that the caller either runs in the main
- * loop; or that all involved nodes (@bs and all of its parents) are
- * in the caller's AioContext.
  */
 void bdrv_drained_end(BlockDriverState *bs);
 
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 40d646d1ed..2b97576f6d 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -939,15 +939,11 @@ struct BdrvChildClass {
  * These functions must not change the graph (and therefore also must not
  * call aio_poll(), which could change the graph indirectly).
  *
- * If drained_end() schedules background operations, it must atomically
- * increment *drained_end_counter for each such operation and atomically
- * decrement it once the operation has settled.
- *
  * Note that this can be nested. If drained_begin() was called twice, new
  * I/O is allowed only after drained_end() was called twice, too.
  */
 void (*drained_begin)(BdrvChild *child);
-void (*drained_end)(BdrvChild *child, int *drained_end_counter);
+void (*drained_end)(BdrvChild *child);
 
 /*
  * Returns whether the parent has pending requests for the child. This
diff --git a/block.c b/block.c
index 466770b9ac..ce71545551 100644
--- a/block.c
+++ b/block.c
@@ -1235,11 +1235,10 @@ static bool bdrv_child_cb_drained_poll(BdrvChild *child)
 return bdrv_drain_poll(bs, false, NULL, false);
 }
 
-static void bdrv_child_cb_drained_end(BdrvChild *child,
-  int *drained_end_counter)
+static void bdrv_child_cb_drained_end(BdrvChild *child)
 {
 BlockDriverState *bs = child->opaque;
-bdrv_drained_end_no_poll(bs, drained_end_counter);
+bdrv_drained_end(bs);
 }
 
 static int bdrv_child_cb_inactivate(BdrvChild *child)
diff --git a/block/block-backend.c b/block/block-backend.c
index d98a96ff37..feaf2181fa 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -129,7 +129,7 @@ static void blk_root_inherit_options(BdrvChildRole role, 
bool parent_is_format,
 }
 static void blk_root_drained_begin(BdrvChild *child);
 static bool blk_root_drained_poll(BdrvChild *child);
-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter);
+static void blk_root_drained_end(BdrvChild *child);
 
 static void blk_r

[PULL 24/51] block/vmdk: add coroutine_fn annotations

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

These functions end up calling bdrv_create() implemented as generated_co_wrapper
functions.
In addition, they also happen to be always called in coroutine context,
meaning all callers are coroutine_fn.
This means that the g_c_w function will enter the qemu_in_coroutine()
case and eventually suspend (or in other words call qemu_coroutine_yield()).
Therefore we can mark such functions coroutine_fn too.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Kevin Wolf 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20221128142337.657646-6-eespo...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 block/vmdk.c | 36 +++-
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/block/vmdk.c b/block/vmdk.c
index 26376352b9..0c32bf2e83 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -2285,10 +2285,11 @@ exit:
 return ret;
 }
 
-static int vmdk_create_extent(const char *filename, int64_t filesize,
-  bool flat, bool compress, bool zeroed_grain,
-  BlockBackend **pbb,
-  QemuOpts *opts, Error **errp)
+static int coroutine_fn vmdk_create_extent(const char *filename,
+   int64_t filesize, bool flat,
+   bool compress, bool zeroed_grain,
+   BlockBackend **pbb,
+   QemuOpts *opts, Error **errp)
 {
 int ret;
 BlockBackend *blk = NULL;
@@ -2366,14 +2367,14 @@ static int filename_decompose(const char *filename, 
char *path, char *prefix,
  *   non-split format.
  * idx >= 1: get the n-th extent if in a split subformat
  */
-typedef BlockBackend *(*vmdk_create_extent_fn)(int64_t size,
-   int idx,
-   bool flat,
-   bool split,
-   bool compress,
-   bool zeroed_grain,
-   void *opaque,
-   Error **errp);
+typedef BlockBackend * coroutine_fn (*vmdk_create_extent_fn)(int64_t size,
+ int idx,
+ bool flat,
+ bool split,
+ bool compress,
+ bool zeroed_grain,
+ void *opaque,
+ Error **errp);
 
 static void vmdk_desc_add_extent(GString *desc,
  const char *extent_line_fmt,
@@ -2616,7 +2617,7 @@ typedef struct {
 QemuOpts *opts;
 } VMDKCreateOptsData;
 
-static BlockBackend *vmdk_co_create_opts_cb(int64_t size, int idx,
+static BlockBackend * coroutine_fn vmdk_co_create_opts_cb(int64_t size, int 
idx,
 bool flat, bool split, bool 
compress,
 bool zeroed_grain, void *opaque,
 Error **errp)
@@ -2768,10 +2769,11 @@ exit:
 return ret;
 }
 
-static BlockBackend *vmdk_co_create_cb(int64_t size, int idx,
-   bool flat, bool split, bool compress,
-   bool zeroed_grain, void *opaque,
-   Error **errp)
+static BlockBackend * coroutine_fn vmdk_co_create_cb(int64_t size, int idx,
+ bool flat, bool split,
+ bool compress,
+ bool zeroed_grain,
+ void *opaque, Error 
**errp)
 {
 int ret;
 BlockDriverState *bs;
-- 
2.38.1




[PULL 39/51] clang-tsa: Add TSA_ASSERT() macro

2022-12-14 Thread Kevin Wolf
Signed-off-by: Kevin Wolf 
Message-Id: <20221207131838.239125-7-kw...@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito 
Signed-off-by: Kevin Wolf 
---
 include/qemu/clang-tsa.h | 9 +
 1 file changed, 9 insertions(+)

diff --git a/include/qemu/clang-tsa.h b/include/qemu/clang-tsa.h
index 0a3361dfc8..211ee0ae73 100644
--- a/include/qemu/clang-tsa.h
+++ b/include/qemu/clang-tsa.h
@@ -98,4 +98,13 @@
  */
 #define TSA_NO_TSA TSA(no_thread_safety_analysis)
 
+/*
+ * TSA_ASSERT() is used to annotate functions: This function will assert that
+ * the lock is held. When it returns, the caller of the function is assumed to
+ * already hold the resource.
+ *
+ * More than one mutex may be specified, comma-separated.
+ */
+#define TSA_ASSERT(...) TSA(assert_capability(__VA_ARGS__))
+
 #endif /* #ifndef CLANG_TSA_H */
-- 
2.38.1




[PULL 04/51] block: refactor bdrv_list_refresh_perms to allow any list of nodes

2022-12-14 Thread Kevin Wolf
From: Vladimir Sementsov-Ogievskiy 

We are going to increase usage of collecting nodes in a list to then
update, and calling bdrv_topological_dfs() each time is not convenient,
and not correct as we are going to interleave graph modifying with
filling the node list.

So, let's switch to a function that takes any list of nodes, adds all
their subtrees and do topological sort. And finally, refresh
permissions.

While being here, make the function public, as we'll want to use it
from blockdev.c in near future.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
Message-Id: <20221107163558.618889-5-vsement...@yandex-team.ru>
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block.c | 51 ---
 1 file changed, 32 insertions(+), 19 deletions(-)

diff --git a/block.c b/block.c
index f2f9178832..385ed3cd53 100644
--- a/block.c
+++ b/block.c
@@ -2521,8 +2521,12 @@ static int bdrv_node_refresh_perm(BlockDriverState *bs, 
BlockReopenQueue *q,
 return 0;
 }
 
-static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q,
-   Transaction *tran, Error **errp)
+/*
+ * @list is a product of bdrv_topological_dfs() (may be called several times) -
+ * a topologically sorted subgraph.
+ */
+static int bdrv_do_refresh_perms(GSList *list, BlockReopenQueue *q,
+ Transaction *tran, Error **errp)
 {
 int ret;
 BlockDriverState *bs;
@@ -2544,6 +2548,24 @@ static int bdrv_list_refresh_perms(GSList *list, 
BlockReopenQueue *q,
 return 0;
 }
 
+/*
+ * @list is any list of nodes. List is completed by all subtrees and
+ * topologically sorted. It's not a problem if some node occurs in the @list
+ * several times.
+ */
+static int bdrv_list_refresh_perms(GSList *list, BlockReopenQueue *q,
+   Transaction *tran, Error **errp)
+{
+g_autoptr(GHashTable) found = g_hash_table_new(NULL, NULL);
+g_autoptr(GSList) refresh_list = NULL;
+
+for ( ; list; list = list->next) {
+refresh_list = bdrv_topological_dfs(refresh_list, found, list->data);
+}
+
+return bdrv_do_refresh_perms(refresh_list, q, tran, errp);
+}
+
 void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
   uint64_t *shared_perm)
 {
@@ -2604,7 +2626,7 @@ static int bdrv_refresh_perms(BlockDriverState *bs, 
Transaction *tran,
 tran = local_tran = tran_new();
 }
 
-ret = bdrv_list_refresh_perms(list, NULL, tran, errp);
+ret = bdrv_do_refresh_perms(list, NULL, tran, errp);
 
 if (local_tran) {
 tran_finalize(local_tran, ret);
@@ -4360,7 +4382,6 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, 
Error **errp)
 BlockReopenQueueEntry *bs_entry, *next;
 AioContext *ctx;
 Transaction *tran = tran_new();
-g_autoptr(GHashTable) found = NULL;
 g_autoptr(GSList) refresh_list = NULL;
 
 assert(qemu_get_current_aio_context() == qemu_get_aio_context());
@@ -4390,18 +4411,15 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, 
Error **errp)
 bs_entry->prepared = true;
 }
 
-found = g_hash_table_new(NULL, NULL);
 QTAILQ_FOREACH(bs_entry, bs_queue, entry) {
 BDRVReopenState *state = &bs_entry->state;
 
-refresh_list = bdrv_topological_dfs(refresh_list, found, state->bs);
+refresh_list = g_slist_prepend(refresh_list, state->bs);
 if (state->old_backing_bs) {
-refresh_list = bdrv_topological_dfs(refresh_list, found,
-state->old_backing_bs);
+refresh_list = g_slist_prepend(refresh_list, 
state->old_backing_bs);
 }
 if (state->old_file_bs) {
-refresh_list = bdrv_topological_dfs(refresh_list, found,
-state->old_file_bs);
+refresh_list = g_slist_prepend(refresh_list, state->old_file_bs);
 }
 }
 
@@ -5118,7 +5136,6 @@ static int bdrv_replace_node_common(BlockDriverState 
*from,
 Error **errp)
 {
 Transaction *tran = tran_new();
-g_autoptr(GHashTable) found = NULL;
 g_autoptr(GSList) refresh_list = NULL;
 BlockDriverState *to_cow_parent = NULL;
 int ret;
@@ -5159,10 +5176,8 @@ static int bdrv_replace_node_common(BlockDriverState 
*from,
 bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran);
 }
 
-found = g_hash_table_new(NULL, NULL);
-
-refresh_list = bdrv_topological_dfs(refresh_list, found, to);
-refresh_list = bdrv_topological_dfs(refresh_list, found, from);
+refresh_list = g_slist_prepend(refresh_list, to);
+refresh_list = g_slist_prepend(refresh_list, from);
 
 ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
 if (ret < 0) {
@@ -5247,7 +5262,6 @@ int bdrv_replace_child_bs(BdrvChild *child, 
BlockDriverState *new_bs,
 {
 int ret;
 Trans

[PULL 01/51] block: Inline bdrv_detach_child()

2022-12-14 Thread Kevin Wolf
From: Vladimir Sementsov-Ogievskiy 

The only caller is bdrv_root_unref_child(), let's just do the logic
directly in it. It simplifies further conversion of
bdrv_root_unref_child() to transaction actions.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
Message-Id: <20221107163558.618889-2-vsement...@yandex-team.ru>
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block.c | 46 +++---
 1 file changed, 19 insertions(+), 27 deletions(-)

diff --git a/block.c b/block.c
index a18f052374..c0c1b3df91 100644
--- a/block.c
+++ b/block.c
@@ -3070,30 +3070,6 @@ static BdrvChild 
*bdrv_attach_child_noperm(BlockDriverState *parent_bs,
 tran, errp);
 }
 
-static void bdrv_detach_child(BdrvChild *child)
-{
-BlockDriverState *old_bs = child->bs;
-
-GLOBAL_STATE_CODE();
-bdrv_replace_child_noperm(child, NULL);
-bdrv_child_free(child);
-
-if (old_bs) {
-/*
- * Update permissions for old node. We're just taking a parent away, so
- * we're loosening restrictions. Errors of permission update are not
- * fatal in this case, ignore them.
- */
-bdrv_refresh_perms(old_bs, NULL);
-
-/*
- * When the parent requiring a non-default AioContext is removed, the
- * node moves back to the main AioContext
- */
-bdrv_try_change_aio_context(old_bs, qemu_get_aio_context(), NULL, 
NULL);
-}
-}
-
 /*
  * This function steals the reference to child_bs from the caller.
  * That reference is later dropped by bdrv_root_unref_child().
@@ -3182,12 +3158,28 @@ out:
 /* Callers must ensure that child->frozen is false. */
 void bdrv_root_unref_child(BdrvChild *child)
 {
-BlockDriverState *child_bs;
+BlockDriverState *child_bs = child->bs;
 
 GLOBAL_STATE_CODE();
+bdrv_replace_child_noperm(child, NULL);
+bdrv_child_free(child);
+
+if (child_bs) {
+/*
+ * Update permissions for old node. We're just taking a parent away, so
+ * we're loosening restrictions. Errors of permission update are not
+ * fatal in this case, ignore them.
+ */
+bdrv_refresh_perms(child_bs, NULL);
+
+/*
+ * When the parent requiring a non-default AioContext is removed, the
+ * node moves back to the main AioContext
+ */
+bdrv_try_change_aio_context(child_bs, qemu_get_aio_context(), NULL,
+NULL);
+}
 
-child_bs = child->bs;
-bdrv_detach_child(child);
 bdrv_unref(child_bs);
 }
 
-- 
2.38.1




[PULL 20/51] block-io: introduce coroutine_fn duplicates for bdrv_common_block_status_above callers

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

bdrv_common_block_status_above() is a g_c_w, and it is being called by
many "wrapper" functions like bdrv_is_allocated(),
bdrv_is_allocated_above() and bdrv_block_status_above().

Because we want to eventually split the coroutine from non-coroutine
case in g_c_w, create duplicate wrappers that take care of directly
calling the same coroutine functions called in the g_c_w.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Kevin Wolf 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20221128142337.657646-2-eespo...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 include/block/block-io.h | 15 +++
 block/io.c   | 58 +---
 2 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/include/block/block-io.h b/include/block/block-io.h
index 92aaa7c1e9..72919254cd 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -94,14 +94,29 @@ bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
 int bdrv_block_status(BlockDriverState *bs, int64_t offset,
   int64_t bytes, int64_t *pnum, int64_t *map,
   BlockDriverState **file);
+
+int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
+BlockDriverState *base,
+int64_t offset, int64_t bytes,
+int64_t *pnum, int64_t *map,
+BlockDriverState **file);
 int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
 int64_t offset, int64_t bytes, int64_t *pnum,
 int64_t *map, BlockDriverState **file);
+
+int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset,
+  int64_t bytes, int64_t *pnum);
 int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
   int64_t *pnum);
+
+int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
+BlockDriverState *base,
+bool include_base, int64_t offset,
+int64_t bytes, int64_t *pnum);
 int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
 bool include_base, int64_t offset, int64_t bytes,
 int64_t *pnum);
+
 int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
   int64_t bytes);
 
diff --git a/block/io.c b/block/io.c
index 38e57d1f67..fb 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2533,6 +2533,17 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
 return ret;
 }
 
+int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
+BlockDriverState *base,
+int64_t offset, int64_t bytes,
+int64_t *pnum, int64_t *map,
+BlockDriverState **file)
+{
+IO_CODE();
+return bdrv_co_common_block_status_above(bs, base, false, true, offset,
+ bytes, pnum, map, file, NULL);
+}
+
 int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
 int64_t offset, int64_t bytes, int64_t *pnum,
 int64_t *map, BlockDriverState **file)
@@ -2578,6 +2589,22 @@ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState 
*bs, int64_t offset,
 return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO);
 }
 
+int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset,
+  int64_t bytes, int64_t *pnum)
+{
+int ret;
+int64_t dummy;
+IO_CODE();
+
+ret = bdrv_co_common_block_status_above(bs, bs, true, false, offset,
+bytes, pnum ? pnum : &dummy, NULL,
+NULL, NULL);
+if (ret < 0) {
+return ret;
+}
+return !!(ret & BDRV_BLOCK_ALLOCATED);
+}
+
 int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
   int64_t *pnum)
 {
@@ -2594,6 +2621,29 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t 
offset, int64_t bytes,
 return !!(ret & BDRV_BLOCK_ALLOCATED);
 }
 
+/* See bdrv_is_allocated_above for documentation */
+int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
+BlockDriverState *base,
+bool include_base, int64_t offset,
+int64_t bytes, int64_t *pnum)
+{
+int depth;
+int ret;
+IO_CODE();
+
+ret = bdrv_co_common_block_sta

[PULL 49/51] block-coroutine-wrapper.py: introduce annotations that take the graph rdlock

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

Add co_wrapper_bdrv_rdlock and co_wrapper_mixed_bdrv_rdlock option to
the block-coroutine-wrapper.py script.

This "_bdrv_rdlock" option takes and releases the graph rdlock when a
coroutine function is created.

This means that when used together with "_mixed", the function marked
with co_wrapper_mixed_bdrv_rdlock will support both coroutine and
non-coroutine case, and in the latter case it will create a coroutine
that takes and releases the rdlock. When called from a coroutine, the
caller must already hold the graph lock.

Example:
void co_wrapper_mixed_bdrv_rdlock bdrv_f1();

Becomes

static void bdrv_co_enter_f1()
{
bdrv_graph_co_rdlock();
bdrv_co_function();
bdrv_graph_co_rdunlock();
}

void bdrv_f1()
{
if (qemu_in_coroutine) {
assume_graph_lock();
bdrv_co_function();
} else {
qemu_co_enter(bdrv_co_enter_f1);
...
}
}

When used alone, the function will not work in coroutine context, and
when called in non-coroutine context it will create a new coroutine that
takes care of taking and releasing the rdlock automatically.

Example:
void co_wrapper_bdrv_rdlock bdrv_f1();

Becomes

static void bdrv_co_enter_f1()
{
bdrv_graph_co_rdlock();
bdrv_co_function();
bdrv_graph_co_rdunlock();
}

void bdrv_f1()
{
assert(!qemu_in_coroutine());
qemu_co_enter(bdrv_co_enter_f1);
...
}

About their usage:
- co_wrapper does not take the rdlock, so it can be used also outside
  the block layer.
- co_wrapper_mixed will be used by many blk_* functions, since the
  coroutine function needs to call blk_wait_while_drained() and
  the rdlock *must* be taken afterwards, otherwise it's a deadlock.
  In the future this annotation will go away, and blk_* will use
  co_wrapper directly.
- co_wrapper_bdrv_rdlock will be used by BlockDriver callbacks, ideally
  by all of them in the future.
- co_wrapper_mixed_bdrv_rdlock will be used by the remaining functions
  that are still called by coroutine and non-coroutine context. In the
  future this annotation will go away, as we will split such mixed
  functions.

Signed-off-by: Emanuele Giuseppe Esposito 
Signed-off-by: Kevin Wolf 
Message-Id: <20221207131838.239125-17-kw...@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 include/block/block-common.h   |  9 -
 scripts/block-coroutine-wrapper.py | 12 
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/include/block/block-common.h b/include/block/block-common.h
index 6cf603ab06..4749c46a5e 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h
@@ -40,14 +40,21 @@
  *
  * Usage: read docs/devel/block-coroutine-wrapper.rst
  *
- * There are 2 kind of specifiers:
+ * There are 4 kind of specifiers:
  * - co_wrapper functions can be called by only non-coroutine context, because
  *   they always generate a new coroutine.
  * - co_wrapper_mixed functions can be called by both coroutine and
  *   non-coroutine context.
+ * - co_wrapper_bdrv_rdlock are co_wrapper functions but automatically take and
+ *   release the graph rdlock when creating a new coroutine
+ * - co_wrapper_mixed_bdrv_rdlock are co_wrapper_mixed functions but
+ *   automatically take and release the graph rdlock when creating a new
+ *   coroutine.
  */
 #define co_wrapper
 #define co_wrapper_mixed
+#define co_wrapper_bdrv_rdlock
+#define co_wrapper_mixed_bdrv_rdlock
 
 #include "block/dirty-bitmap.h"
 #include "block/blockjob.h"
diff --git a/scripts/block-coroutine-wrapper.py 
b/scripts/block-coroutine-wrapper.py
index 71a06e917a..6e087fa0b7 100644
--- a/scripts/block-coroutine-wrapper.py
+++ b/scripts/block-coroutine-wrapper.py
@@ -69,6 +69,7 @@ def __init__(self, return_type: str, name: str, args: str,
 self.struct_name = snake_to_camel(self.name)
 self.args = [ParamDecl(arg.strip()) for arg in args.split(',')]
 self.create_only_co = 'mixed' not in variant
+self.graph_rdlock = 'bdrv_rdlock' in variant
 
 subsystem, subname = self.name.split('_', 1)
 self.co_name = f'{subsystem}_co_{subname}'
@@ -123,10 +124,13 @@ def create_mixed_wrapper(func: FuncDecl) -> str:
 """
 name = func.co_name
 struct_name = func.struct_name
+graph_assume_lock = 'assume_graph_lock();' if func.graph_rdlock else ''
+
 return f"""\
 {func.return_type} {func.name}({ func.gen_list('{decl}') })
 {{
 if (qemu_in_coroutine()) {{
+{graph_assume_lock}
 return {name}({ func.gen_list('{name}') });
 }} else {{
 {struct_name} s = {{
@@ -174,6 +178,12 @@ def gen_wrapper(func: FuncDecl) -> str:
 name = func.co_name
 struct_name = func.struct_name
 
+graph_lock=''
+graph_unlock=''
+if func.graph_rdlock:
+graph_lock='bdrv_graph_co_rdlock();'
+graph_unlock='bdrv_graph_co_rdunlock();'
+
 creation_function = create_mixed_wrapper
 if func.creat

[PULL 21/51] block-copy: add coroutine_fn annotations

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

These functions end up calling bdrv_common_block_status_above(), a
generated_co_wrapper function.
In addition, they also happen to be always called in coroutine context,
meaning all callers are coroutine_fn.
This means that the g_c_w function will enter the qemu_in_coroutine()
case and eventually suspend (or in other words call qemu_coroutine_yield()).
Therefore we can mark such functions coroutine_fn too.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Paolo Bonzini 
Reviewed-by: Kevin Wolf 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20221128142337.657646-3-eespo...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 include/block/block-copy.h |  5 +++--
 block/block-copy.c | 21 -
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index ba0b425d78..8cea4f9b90 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -36,8 +36,9 @@ void block_copy_set_progress_meter(BlockCopyState *s, 
ProgressMeter *pm);
 void block_copy_state_free(BlockCopyState *s);
 
 void block_copy_reset(BlockCopyState *s, int64_t offset, int64_t bytes);
-int64_t block_copy_reset_unallocated(BlockCopyState *s,
- int64_t offset, int64_t *count);
+int64_t coroutine_fn block_copy_reset_unallocated(BlockCopyState *s,
+  int64_t offset,
+  int64_t *count);
 
 int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
 bool ignore_ratelimit, uint64_t timeout_ns,
diff --git a/block/block-copy.c b/block/block-copy.c
index bb947afdda..5e59d6262f 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -577,8 +577,9 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
 return ret;
 }
 
-static int block_copy_block_status(BlockCopyState *s, int64_t offset,
-   int64_t bytes, int64_t *pnum)
+static coroutine_fn int block_copy_block_status(BlockCopyState *s,
+int64_t offset,
+int64_t bytes, int64_t *pnum)
 {
 int64_t num;
 BlockDriverState *base;
@@ -590,8 +591,8 @@ static int block_copy_block_status(BlockCopyState *s, 
int64_t offset,
 base = NULL;
 }
 
-ret = bdrv_block_status_above(s->source->bs, base, offset, bytes, &num,
-  NULL, NULL);
+ret = bdrv_co_block_status_above(s->source->bs, base, offset, bytes, &num,
+ NULL, NULL);
 if (ret < 0 || num < s->cluster_size) {
 /*
  * On error or if failed to obtain large enough chunk just fallback to
@@ -613,8 +614,9 @@ static int block_copy_block_status(BlockCopyState *s, 
int64_t offset,
  * Check if the cluster starting at offset is allocated or not.
  * return via pnum the number of contiguous clusters sharing this allocation.
  */
-static int block_copy_is_cluster_allocated(BlockCopyState *s, int64_t offset,
-   int64_t *pnum)
+static int coroutine_fn block_copy_is_cluster_allocated(BlockCopyState *s,
+int64_t offset,
+int64_t *pnum)
 {
 BlockDriverState *bs = s->source->bs;
 int64_t count, total_count = 0;
@@ -624,7 +626,7 @@ static int block_copy_is_cluster_allocated(BlockCopyState 
*s, int64_t offset,
 assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
 
 while (true) {
-ret = bdrv_is_allocated(bs, offset, bytes, &count);
+ret = bdrv_co_is_allocated(bs, offset, bytes, &count);
 if (ret < 0) {
 return ret;
 }
@@ -669,8 +671,9 @@ void block_copy_reset(BlockCopyState *s, int64_t offset, 
int64_t bytes)
  * @return 0 when the cluster at @offset was unallocated,
  * 1 otherwise, and -ret on error.
  */
-int64_t block_copy_reset_unallocated(BlockCopyState *s,
- int64_t offset, int64_t *count)
+int64_t coroutine_fn block_copy_reset_unallocated(BlockCopyState *s,
+  int64_t offset,
+  int64_t *count)
 {
 int ret;
 int64_t clusters, bytes;
-- 
2.38.1




[PULL 48/51] Mark assert_bdrv_graph_readable/writable() GRAPH_RD/WRLOCK

2022-12-14 Thread Kevin Wolf
Signed-off-by: Kevin Wolf 
Message-Id: <20221207131838.239125-16-kw...@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito 
Signed-off-by: Kevin Wolf 
---
 include/block/block_int-common.h | 4 ++--
 include/block/graph-lock.h   | 4 ++--
 block.c  | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index a6bc6b7fe9..b1f0d88307 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -898,8 +898,8 @@ struct BdrvChildClass {
 void (*activate)(BdrvChild *child, Error **errp);
 int (*inactivate)(BdrvChild *child);
 
-void (*attach)(BdrvChild *child);
-void (*detach)(BdrvChild *child);
+void GRAPH_WRLOCK_PTR (*attach)(BdrvChild *child);
+void GRAPH_WRLOCK_PTR (*detach)(BdrvChild *child);
 
 /*
  * Notifies the parent that the filename of its child has changed (e.g.
diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h
index 50b7e7b1b6..0c66386167 100644
--- a/include/block/graph-lock.h
+++ b/include/block/graph-lock.h
@@ -176,14 +176,14 @@ bdrv_graph_rdunlock_main_loop(void);
  * or there is at least a reader helding the rdlock.
  * In this way an incoming writer is aware of the read and waits.
  */
-void assert_bdrv_graph_readable(void);
+void GRAPH_RDLOCK assert_bdrv_graph_readable(void);
 
 /*
  * assert_bdrv_graph_writable:
  * Make sure that the writer is the main loop and has set @has_writer,
  * so that incoming readers will pause.
  */
-void assert_bdrv_graph_writable(void);
+void GRAPH_WRLOCK assert_bdrv_graph_writable(void);
 
 /*
  * Calling this function tells TSA that we know that the lock is effectively
diff --git a/block.c b/block.c
index ff53b41af3..1a82fd101a 100644
--- a/block.c
+++ b/block.c
@@ -1402,7 +1402,7 @@ static void bdrv_inherited_options(BdrvChildRole role, 
bool parent_is_format,
 *child_flags = flags;
 }
 
-static void bdrv_child_cb_attach(BdrvChild *child)
+static void GRAPH_WRLOCK bdrv_child_cb_attach(BdrvChild *child)
 {
 BlockDriverState *bs = child->opaque;
 
@@ -1444,7 +1444,7 @@ static void bdrv_child_cb_attach(BdrvChild *child)
 }
 }
 
-static void bdrv_child_cb_detach(BdrvChild *child)
+static void GRAPH_WRLOCK bdrv_child_cb_detach(BdrvChild *child)
 {
 BlockDriverState *bs = child->opaque;
 
-- 
2.38.1




[PULL 15/51] block: Call drain callbacks only once

2022-12-14 Thread Kevin Wolf
We only need to call both the BlockDriver's callback and the parent
callbacks when going from undrained to drained or vice versa. A second
drain section doesn't make a difference for the driver or the parent,
they weren't supposed to send new requests before and after the second
drain.

One thing that gets in the way is the 'ignore_bds_parents' parameter in
bdrv_do_drained_begin_quiesce() and bdrv_do_drained_end(): It means that
bdrv_drain_all_begin() increases bs->quiesce_counter, but does not
quiesce the parent through BdrvChildClass callbacks. If an additional
drain section is started now, bs->quiesce_counter will be non-zero, but
we would still need to quiesce the parent through BdrvChildClass in
order to keep things consistent (and unquiesce it on the matching
bdrv_drained_end(), even though the counter would not reach 0 yet as
long as the bdrv_drain_all() section is still active).

Instead of keeping track of this, let's just get rid of the parameter.
It was introduced in commit 6cd5c9d7b2d as an optimisation so that
during bdrv_drain_all(), we wouldn't recursively drain all parents up to
the root for each node, resulting in quadratic complexity. As it happens,
calling the callbacks only once solves the same problem, so as of this
patch, we'll still have O(n) complexity and ignore_bds_parents is not
needed any more.

This patch only ignores the 'ignore_bds_parents' parameter. It will be
removed in a separate patch.

Signed-off-by: Kevin Wolf 
Reviewed-by: Hanna Reitz 
Message-Id: <20221118174110.55183-12-kw...@redhat.com>
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Kevin Wolf 
---
 include/block/block_int-common.h |  8 
 block.c  | 25 +++--
 block/io.c   | 30 ++
 tests/unit/test-bdrv-drain.c | 16 ++--
 4 files changed, 39 insertions(+), 40 deletions(-)

diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 791dddfd7d..a6bc6b7fe9 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -980,13 +980,13 @@ struct BdrvChild {
 bool frozen;
 
 /*
- * How many times the parent of this child has been drained
+ * True if the parent of this child has been drained by this BdrvChild
  * (through klass->drained_*).
- * Usually, this is equal to bs->quiesce_counter (potentially
- * reduced by bdrv_drain_all_count).  It may differ while the
+ *
+ * It is generally true if bs->quiesce_counter > 0. It may differ while the
  * child is entering or leaving a drained section.
  */
-int parent_quiesce_counter;
+bool quiesced_parent;
 
 QLIST_ENTRY(BdrvChild) next;
 QLIST_ENTRY(BdrvChild) next_parent;
diff --git a/block.c b/block.c
index 7ea0b82049..b8bab06e55 100644
--- a/block.c
+++ b/block.c
@@ -2855,7 +2855,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
 {
 BlockDriverState *old_bs = child->bs;
 int new_bs_quiesce_counter;
-int drain_saldo;
 
 assert(!child->frozen);
 assert(old_bs != new_bs);
@@ -2865,16 +2864,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
 assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
 }
 
-new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
-drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter;
-
 /*
  * If the new child node is drained but the old one was not, flush
  * all outstanding requests to the old child node.
  */
-while (drain_saldo > 0 && child->klass->drained_begin) {
+new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
+if (new_bs_quiesce_counter && !child->quiesced_parent) {
 bdrv_parent_drained_begin_single(child, true);
-drain_saldo--;
 }
 
 if (old_bs) {
@@ -2890,16 +2886,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
 if (new_bs) {
 assert_bdrv_graph_writable(new_bs);
 QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
-
-/*
- * Polling in bdrv_parent_drained_begin_single() may have led to the 
new
- * node's quiesce_counter having been decreased.  Not a problem, we 
just
- * need to recognize this here and then invoke drained_end 
appropriately
- * more often.
- */
-assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
-drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
-
 if (child->klass->attach) {
 child->klass->attach(child);
 }
@@ -2908,10 +2894,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
 /*
  * If the old child node was drained but the new one is not, allow
  * requests to come in only after the new node has been attached.
+ *
+ * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single()
+ * polls, which could have changed the value.
 

[PULL 03/51] block: bdrv_refresh_perms(): allow external tran

2022-12-14 Thread Kevin Wolf
From: Vladimir Sementsov-Ogievskiy 

Allow passing external Transaction pointer, stop creating extra
Transaction objects.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
Message-Id: <20221107163558.618889-4-vsement...@yandex-team.ru>
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block.c | 31 ---
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/block.c b/block.c
index dc761209ac..f2f9178832 100644
--- a/block.c
+++ b/block.c
@@ -2591,15 +2591,24 @@ char *bdrv_perm_names(uint64_t perm)
 }
 
 
-static int bdrv_refresh_perms(BlockDriverState *bs, Error **errp)
+/* @tran is allowed to be NULL. In this case no rollback is possible */
+static int bdrv_refresh_perms(BlockDriverState *bs, Transaction *tran,
+  Error **errp)
 {
 int ret;
-Transaction *tran = tran_new();
+Transaction *local_tran = NULL;
 g_autoptr(GSList) list = bdrv_topological_dfs(NULL, NULL, bs);
 GLOBAL_STATE_CODE();
 
+if (!tran) {
+tran = local_tran = tran_new();
+}
+
 ret = bdrv_list_refresh_perms(list, NULL, tran, errp);
-tran_finalize(tran, ret);
+
+if (local_tran) {
+tran_finalize(local_tran, ret);
+}
 
 return ret;
 }
@@ -2615,7 +2624,7 @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, 
uint64_t shared,
 
 bdrv_child_set_perm(c, perm, shared, tran);
 
-ret = bdrv_refresh_perms(c->bs, &local_err);
+ret = bdrv_refresh_perms(c->bs, tran, &local_err);
 
 tran_finalize(tran, ret);
 
@@ -3099,7 +3108,7 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState 
*child_bs,
 goto out;
 }
 
-ret = bdrv_refresh_perms(child_bs, errp);
+ret = bdrv_refresh_perms(child_bs, tran, errp);
 
 out:
 tran_finalize(tran, ret);
@@ -3140,7 +3149,7 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
 goto out;
 }
 
-ret = bdrv_refresh_perms(parent_bs, errp);
+ret = bdrv_refresh_perms(parent_bs, tran, errp);
 if (ret < 0) {
 goto out;
 }
@@ -3168,7 +3177,7 @@ void bdrv_root_unref_child(BdrvChild *child)
  * we're loosening restrictions. Errors of permission update are not
  * fatal in this case, ignore them.
  */
-bdrv_refresh_perms(child_bs, NULL);
+bdrv_refresh_perms(child_bs, NULL, NULL);
 
 /*
  * When the parent requiring a non-default AioContext is removed, the
@@ -3410,7 +3419,7 @@ int bdrv_set_backing_hd(BlockDriverState *bs, 
BlockDriverState *backing_hd,
 goto out;
 }
 
-ret = bdrv_refresh_perms(bs, errp);
+ret = bdrv_refresh_perms(bs, tran, errp);
 out:
 tran_finalize(tran, ret);
 
@@ -5223,7 +5232,7 @@ int bdrv_append(BlockDriverState *bs_new, 
BlockDriverState *bs_top,
 goto out;
 }
 
-ret = bdrv_refresh_perms(bs_new, errp);
+ret = bdrv_refresh_perms(bs_new, tran, errp);
 out:
 tran_finalize(tran, ret);
 
@@ -6523,7 +6532,7 @@ int bdrv_activate(BlockDriverState *bs, Error **errp)
  */
 if (bs->open_flags & BDRV_O_INACTIVE) {
 bs->open_flags &= ~BDRV_O_INACTIVE;
-ret = bdrv_refresh_perms(bs, errp);
+ret = bdrv_refresh_perms(bs, NULL, errp);
 if (ret < 0) {
 bs->open_flags |= BDRV_O_INACTIVE;
 return ret;
@@ -6668,7 +6677,7 @@ static int bdrv_inactivate_recurse(BlockDriverState *bs)
  * We only tried to loosen restrictions, so errors are not fatal, ignore
  * them.
  */
-bdrv_refresh_perms(bs, NULL);
+bdrv_refresh_perms(bs, NULL, NULL);
 
 /* Recursively inactivate children */
 QLIST_FOREACH(child, &bs->children, next) {
-- 
2.38.1




[PULL 33/51] block/dirty-bitmap: convert coroutine-only functions to co_wrapper

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

bdrv_can_store_new_dirty_bitmap and bdrv_remove_persistent_dirty_bitmap
check if they are running in a coroutine, directly calling the
coroutine callback if it's the case.
Except that no coroutine calls such functions, therefore that check
can be removed, and function creation can be offloaded to
c_w.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Kevin Wolf 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20221128142337.657646-15-eespo...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 include/block/block-common.h |  5 +-
 include/block/block-io.h | 10 +++-
 include/block/dirty-bitmap.h | 10 +++-
 block/dirty-bitmap.c | 88 +---
 block/meson.build|  1 +
 5 files changed, 22 insertions(+), 92 deletions(-)

diff --git a/include/block/block-common.h b/include/block/block-common.h
index 847e4d4626..6cf603ab06 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h
@@ -29,8 +29,6 @@
 #include "qemu/iov.h"
 #include "qemu/coroutine.h"
 #include "block/accounting.h"
-#include "block/dirty-bitmap.h"
-#include "block/blockjob.h"
 #include "qemu/hbitmap.h"
 #include "qemu/transactions.h"
 
@@ -51,6 +49,9 @@
 #define co_wrapper
 #define co_wrapper_mixed
 
+#include "block/dirty-bitmap.h"
+#include "block/blockjob.h"
+
 /* block.c */
 typedef struct BlockDriver BlockDriver;
 typedef struct BdrvChild BdrvChild;
diff --git a/include/block/block-io.h b/include/block/block-io.h
index 72cf45975b..52869ea08e 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -215,8 +215,14 @@ AioContext *child_of_bds_get_parent_aio_context(BdrvChild 
*c);
 void bdrv_io_plug(BlockDriverState *bs);
 void bdrv_io_unplug(BlockDriverState *bs);
 
-bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
- uint32_t granularity, Error **errp);
+bool coroutine_fn bdrv_co_can_store_new_dirty_bitmap(BlockDriverState *bs,
+ const char *name,
+ uint32_t granularity,
+ Error **errp);
+bool co_wrapper bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs,
+const char *name,
+uint32_t granularity,
+Error **errp);
 
 /**
  *
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 6528336c4c..c3700cec76 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -34,8 +34,14 @@ int bdrv_dirty_bitmap_check(const BdrvDirtyBitmap *bitmap, 
uint32_t flags,
 Error **errp);
 void bdrv_release_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
-int bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
-Error **errp);
+
+int coroutine_fn bdrv_co_remove_persistent_dirty_bitmap(BlockDriverState *bs,
+const char *name,
+Error **errp);
+int co_wrapper bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs,
+   const char *name,
+   Error **errp);
+
 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
 void bdrv_enable_dirty_bitmap_locked(BdrvDirtyBitmap *bitmap);
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index bf3dc0512a..21cf592889 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -388,7 +388,7 @@ void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
  * not fail.
  * This function doesn't release corresponding BdrvDirtyBitmap.
  */
-static int coroutine_fn
+int coroutine_fn
 bdrv_co_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
Error **errp)
 {
@@ -399,45 +399,6 @@ bdrv_co_remove_persistent_dirty_bitmap(BlockDriverState 
*bs, const char *name,
 return 0;
 }
 
-typedef struct BdrvRemovePersistentDirtyBitmapCo {
-BlockDriverState *bs;
-const char *name;
-Error **errp;
-int ret;
-} BdrvRemovePersistentDirtyBitmapCo;
-
-static void coroutine_fn
-bdrv_co_remove_persistent_dirty_bitmap_entry(void *opaque)
-{
-BdrvRemovePersistentDirtyBitmapCo *s = opaque;
-
-s->ret = bdrv_co_remove_persistent_dirty_bitmap(s->bs, s->name, s->errp);
-aio_wait_kick();
-}
-
-int bdrv_remove_persistent_dirty_bitmap(BlockDriverState *bs, const char *name,
-Error **errp)
-{
-if (qemu_in_coroutine()) {
-return bdrv_co_remove_persistent_dirty_bitmap(bs, name, errp);
-} else {

[PULL 16/51] block: Remove ignore_bds_parents parameter from drain_begin/end.

2022-12-14 Thread Kevin Wolf
ignore_bds_parents is now ignored during drain_begin and drain_end, so
we can just remove it there. It is still a valid optimisation for
drain_all in bdrv_drained_poll(), so leave it around there.

Signed-off-by: Kevin Wolf 
Message-Id: <20221118174110.55183-13-kw...@redhat.com>
Reviewed-by: Hanna Reitz 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Kevin Wolf 
---
 include/block/block-io.h |  3 +--
 block.c  |  2 +-
 block/io.c   | 58 +++-
 3 files changed, 24 insertions(+), 39 deletions(-)

diff --git a/include/block/block-io.h b/include/block/block-io.h
index 9c36a16a1f..8f5e75756a 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -329,8 +329,7 @@ void bdrv_drained_begin(BlockDriverState *bs);
  * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already
  * running requests to complete.
  */
-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
-   BdrvChild *parent, bool ignore_bds_parents);
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent);
 
 /**
  * bdrv_drained_end:
diff --git a/block.c b/block.c
index b8bab06e55..1a2a8d9de9 100644
--- a/block.c
+++ b/block.c
@@ -1226,7 +1226,7 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c)
 static void bdrv_child_cb_drained_begin(BdrvChild *child)
 {
 BlockDriverState *bs = child->opaque;
-bdrv_do_drained_begin_quiesce(bs, NULL, false);
+bdrv_do_drained_begin_quiesce(bs, NULL);
 }
 
 static bool bdrv_child_cb_drained_poll(BdrvChild *child)
diff --git a/block/io.c b/block/io.c
index 87d6f22ec4..2e9503df6a 100644
--- a/block/io.c
+++ b/block/io.c
@@ -45,13 +45,12 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs);
 static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
 int64_t offset, int64_t bytes, BdrvRequestFlags flags);
 
-static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
-  bool ignore_bds_parents)
+static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
 {
 BdrvChild *c, *next;
 
 QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
-if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
+if (c == ignore) {
 continue;
 }
 bdrv_parent_drained_begin_single(c, false);
@@ -70,13 +69,12 @@ void bdrv_parent_drained_end_single(BdrvChild *c)
 }
 }
 
-static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
-bool ignore_bds_parents)
+static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
 {
 BdrvChild *c;
 
 QLIST_FOREACH(c, &bs->parents, next_parent) {
-if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
+if (c == ignore) {
 continue;
 }
 bdrv_parent_drained_end_single(c);
@@ -242,7 +240,6 @@ typedef struct {
 bool begin;
 bool poll;
 BdrvChild *parent;
-bool ignore_bds_parents;
 } BdrvCoDrainData;
 
 /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
@@ -269,9 +266,8 @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
 }
 
 static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
-  bool ignore_bds_parents, bool poll);
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
-bool ignore_bds_parents);
+  bool poll);
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent);
 
 static void bdrv_co_drain_bh_cb(void *opaque)
 {
@@ -284,11 +280,10 @@ static void bdrv_co_drain_bh_cb(void *opaque)
 aio_context_acquire(ctx);
 bdrv_dec_in_flight(bs);
 if (data->begin) {
-bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents,
-  data->poll);
+bdrv_do_drained_begin(bs, data->parent, data->poll);
 } else {
 assert(!data->poll);
-bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents);
+bdrv_do_drained_end(bs, data->parent);
 }
 aio_context_release(ctx);
 } else {
@@ -303,7 +298,6 @@ static void bdrv_co_drain_bh_cb(void *opaque)
 static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
 bool begin,
 BdrvChild *parent,
-bool ignore_bds_parents,
 bool poll)
 {
 BdrvCoDrainData data;
@@ -321,7 +315,6 @@ static void coroutine_fn 
bdrv_co_yield_to_drain(BlockDriverState *bs,
 .done = false,
 .begin = begin,
 .parent = parent,
-.ignore_bds_parents = ignore_bds_pare

[PULL 27/51] block: bdrv_create_file is a coroutine_fn

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

It is always called in coroutine_fn callbacks, therefore
it can directly call bdrv_co_create().

Rename it to bdrv_co_create_file too.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Kevin Wolf 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20221128142337.657646-9-eespo...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 include/block/block-global-state.h | 3 ++-
 block.c| 5 +++--
 block/crypto.c | 2 +-
 block/parallels.c  | 2 +-
 block/qcow.c   | 2 +-
 block/qcow2.c  | 4 ++--
 block/qed.c| 2 +-
 block/raw-format.c | 2 +-
 block/vdi.c| 2 +-
 block/vhdx.c   | 2 +-
 block/vmdk.c   | 2 +-
 block/vpc.c| 2 +-
 12 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/include/block/block-global-state.h 
b/include/block/block-global-state.h
index 00e0cf8aea..387a7cbb2e 100644
--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h
@@ -57,7 +57,8 @@ BlockDriver *bdrv_find_protocol(const char *filename,
 BlockDriver *bdrv_find_format(const char *format_name);
 int bdrv_create(BlockDriver *drv, const char* filename,
 QemuOpts *opts, Error **errp);
-int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
+int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts,
+ Error **errp);
 
 BlockDriverState *bdrv_new(void);
 int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
diff --git a/block.c b/block.c
index 0a1d484a27..c8ac91eb63 100644
--- a/block.c
+++ b/block.c
@@ -719,7 +719,8 @@ out:
 return ret;
 }
 
-int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
+int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts,
+ Error **errp)
 {
 QemuOpts *protocol_opts;
 BlockDriver *drv;
@@ -760,7 +761,7 @@ int bdrv_create_file(const char *filename, QemuOpts *opts, 
Error **errp)
 goto out;
 }
 
-ret = bdrv_create(drv, filename, protocol_opts, errp);
+ret = bdrv_co_create(drv, filename, protocol_opts, errp);
 out:
 qemu_opts_del(protocol_opts);
 qobject_unref(qdict);
diff --git a/block/crypto.c b/block/crypto.c
index 2fb8add458..bbeb9f437c 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -703,7 +703,7 @@ static int coroutine_fn 
block_crypto_co_create_opts_luks(BlockDriver *drv,
 }
 
 /* Create protocol layer */
-ret = bdrv_create_file(filename, opts, errp);
+ret = bdrv_co_create_file(filename, opts, errp);
 if (ret < 0) {
 goto fail;
 }
diff --git a/block/parallels.c b/block/parallels.c
index fa08c1104b..bbea2f2221 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -646,7 +646,7 @@ static int coroutine_fn 
parallels_co_create_opts(BlockDriver *drv,
 }
 
 /* Create and open the file (protocol layer) */
-ret = bdrv_create_file(filename, opts, errp);
+ret = bdrv_co_create_file(filename, opts, errp);
 if (ret < 0) {
 goto done;
 }
diff --git a/block/qcow.c b/block/qcow.c
index daa38839ab..18e17a5b12 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -973,7 +973,7 @@ static int coroutine_fn qcow_co_create_opts(BlockDriver 
*drv,
 }
 
 /* Create and open the file (protocol layer) */
-ret = bdrv_create_file(filename, opts, errp);
+ret = bdrv_co_create_file(filename, opts, errp);
 if (ret < 0) {
 goto fail;
 }
diff --git a/block/qcow2.c b/block/qcow2.c
index 4dd3ff..7cc49a3a6c 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3871,7 +3871,7 @@ static int coroutine_fn qcow2_co_create_opts(BlockDriver 
*drv,
 }
 
 /* Create and open the file (protocol layer) */
-ret = bdrv_create_file(filename, opts, errp);
+ret = bdrv_co_create_file(filename, opts, errp);
 if (ret < 0) {
 goto finish;
 }
@@ -3886,7 +3886,7 @@ static int coroutine_fn qcow2_co_create_opts(BlockDriver 
*drv,
 /* Create and open an external data file (protocol layer) */
 val = qdict_get_try_str(qdict, BLOCK_OPT_DATA_FILE);
 if (val) {
-ret = bdrv_create_file(val, opts, errp);
+ret = bdrv_co_create_file(val, opts, errp);
 if (ret < 0) {
 goto finish;
 }
diff --git a/block/qed.c b/block/qed.c
index c2691a85b1..9d54c8eec5 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -778,7 +778,7 @@ static int coroutine_fn bdrv_qed_co_create_opts(BlockDriver 
*drv,
 }
 
 /* Create and open the file (protocol layer) */
-ret = bdrv_create_file(filename, opts, errp);
+ret = bdrv_co_create_file(filename, opts, errp);
 if (ret < 0) {
 goto fail;
 }
diff --git a/block/raw-format.c b/block/raw-format.c
index a68014ef0b..28905b09ee 100644
--- a/block/raw-f

[PULL 11/51] block: Drain individual nodes during reopen

2022-12-14 Thread Kevin Wolf
bdrv_reopen() and friends use subtree drains as a lazy way of covering
all the nodes they touch. Turns out that this lazy way is a lot more
complicated than just draining the nodes individually, even not
accounting for the additional complexity in the drain mechanism itself.

Simplify the code by switching to draining the individual nodes that are
already managed in the BlockReopenQueue anyway.

Signed-off-by: Kevin Wolf 
Message-Id: <20221118174110.55183-8-kw...@redhat.com>
Reviewed-by: Hanna Reitz 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Kevin Wolf 
---
 block.c | 16 +---
 block/replication.c |  6 --
 blockdev.c  | 13 -
 3 files changed, 9 insertions(+), 26 deletions(-)

diff --git a/block.c b/block.c
index 3266519455..dd329a16ce 100644
--- a/block.c
+++ b/block.c
@@ -4173,7 +4173,7 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
  * returns a pointer to bs_queue, which is either the newly allocated
  * bs_queue, or the existing bs_queue being used.
  *
- * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
+ * bs is drained here and undrained by bdrv_reopen_queue_free().
  *
  * To be called with bs->aio_context locked.
  */
@@ -4195,12 +4195,10 @@ static BlockReopenQueue 
*bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
 int flags;
 QemuOpts *opts;
 
-/* Make sure that the caller remembered to use a drained section. This is
- * important to avoid graph changes between the recursive queuing here and
- * bdrv_reopen_multiple(). */
-assert(bs->quiesce_counter > 0);
 GLOBAL_STATE_CODE();
 
+bdrv_drained_begin(bs);
+
 if (bs_queue == NULL) {
 bs_queue = g_new0(BlockReopenQueue, 1);
 QTAILQ_INIT(bs_queue);
@@ -4351,6 +4349,12 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue)
 if (bs_queue) {
 BlockReopenQueueEntry *bs_entry, *next;
 QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
+AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs);
+
+aio_context_acquire(ctx);
+bdrv_drained_end(bs_entry->state.bs);
+aio_context_release(ctx);
+
 qobject_unref(bs_entry->state.explicit_options);
 qobject_unref(bs_entry->state.options);
 g_free(bs_entry);
@@ -4494,7 +4498,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool 
keep_old_opts,
 
 GLOBAL_STATE_CODE();
 
-bdrv_subtree_drained_begin(bs);
 queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
 
 if (ctx != qemu_get_aio_context()) {
@@ -4505,7 +4508,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool 
keep_old_opts,
 if (ctx != qemu_get_aio_context()) {
 aio_context_acquire(ctx);
 }
-bdrv_subtree_drained_end(bs);
 
 return ret;
 }
diff --git a/block/replication.c b/block/replication.c
index f1eed25e43..c62f48a874 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -374,9 +374,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool 
writable,
 s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs);
 }
 
-bdrv_subtree_drained_begin(hidden_disk->bs);
-bdrv_subtree_drained_begin(secondary_disk->bs);
-
 if (s->orig_hidden_read_only) {
 QDict *opts = qdict_new();
 qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
@@ -401,9 +398,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool 
writable,
 aio_context_acquire(ctx);
 }
 }
-
-bdrv_subtree_drained_end(hidden_disk->bs);
-bdrv_subtree_drained_end(secondary_disk->bs);
 }
 
 static void backup_job_cleanup(BlockDriverState *bs)
diff --git a/blockdev.c b/blockdev.c
index 3f1dec6242..8ffb3d9537 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -3547,8 +3547,6 @@ fail:
 void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
 {
 BlockReopenQueue *queue = NULL;
-GSList *drained = NULL;
-GSList *p;
 
 /* Add each one of the BDS that we want to reopen to the queue */
 for (; reopen_list != NULL; reopen_list = reopen_list->next) {
@@ -3585,9 +3583,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList 
*reopen_list, Error **errp)
 ctx = bdrv_get_aio_context(bs);
 aio_context_acquire(ctx);
 
-bdrv_subtree_drained_begin(bs);
 queue = bdrv_reopen_queue(queue, bs, qdict, false);
-drained = g_slist_prepend(drained, bs);
 
 aio_context_release(ctx);
 }
@@ -3598,15 +3594,6 @@ void qmp_blockdev_reopen(BlockdevOptionsList 
*reopen_list, Error **errp)
 
 fail:
 bdrv_reopen_queue_free(queue);
-for (p = drained; p; p = p->next) {
-BlockDriverState *bs = p->data;
-AioContext *ctx = bdrv_get_aio_context(bs);
-
-aio_context_acquire(ctx);
-bdrv_subtree_drained_end(bs);
-aio_context_release(ctx);
-}
-g_slist_free(drained);
 }
 
 void qmp_blockdev_del(const char *node_na

[PULL 12/51] block: Don't use subtree drains in bdrv_drop_intermediate()

2022-12-14 Thread Kevin Wolf
Instead of using a subtree drain from the top node (which also drains
child nodes of base that we're not even interested in), use a normal
drain for base, which automatically drains all of the parents, too.

Signed-off-by: Kevin Wolf 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
Message-Id: <20221118174110.55183-9-kw...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 block.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index dd329a16ce..db043346d8 100644
--- a/block.c
+++ b/block.c
@@ -5600,7 +5600,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, 
BlockDriverState *base,
 GLOBAL_STATE_CODE();
 
 bdrv_ref(top);
-bdrv_subtree_drained_begin(top);
+bdrv_drained_begin(base);
 
 if (!top->drv || !base->drv) {
 goto exit;
@@ -5673,7 +5673,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, 
BlockDriverState *base,
 
 ret = 0;
 exit:
-bdrv_subtree_drained_end(top);
+bdrv_drained_end(base);
 bdrv_unref(top);
 return ret;
 }
-- 
2.38.1




[PULL 14/51] block: Remove subtree drains

2022-12-14 Thread Kevin Wolf
Subtree drains are not used any more. Remove them.

After this, BdrvChildClass.attach/detach() don't poll any more.

Signed-off-by: Kevin Wolf 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
Message-Id: <20221118174110.55183-11-kw...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 include/block/block-io.h |  18 +--
 include/block/block_int-common.h |   1 -
 include/block/block_int-io.h |  12 --
 block.c  |  20 +--
 block/io.c   | 121 +++---
 tests/unit/test-bdrv-drain.c | 261 ++-
 6 files changed, 44 insertions(+), 389 deletions(-)

diff --git a/include/block/block-io.h b/include/block/block-io.h
index 054e964c9b..9c36a16a1f 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -302,8 +302,7 @@ void bdrv_parent_drained_end_single(BdrvChild *c);
 /**
  * bdrv_drain_poll:
  *
- * Poll for pending requests in @bs, its parents (except for @ignore_parent),
- * and if @recursive is true its children as well (used for subtree drain).
+ * Poll for pending requests in @bs and its parents (except for 
@ignore_parent).
  *
  * If @ignore_bds_parents is true, parents that are BlockDriverStates must
  * ignore the drain request because they will be drained separately (used for
@@ -311,8 +310,8 @@ void bdrv_parent_drained_end_single(BdrvChild *c);
  *
  * This is part of bdrv_drained_begin.
  */
-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
- BdrvChild *ignore_parent, bool ignore_bds_parents);
+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
+ bool ignore_bds_parents);
 
 /**
  * bdrv_drained_begin:
@@ -333,12 +332,6 @@ void bdrv_drained_begin(BlockDriverState *bs);
 void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
BdrvChild *parent, bool ignore_bds_parents);
 
-/**
- * Like bdrv_drained_begin, but recursively begins a quiesced section for
- * exclusive access to all child nodes as well.
- */
-void bdrv_subtree_drained_begin(BlockDriverState *bs);
-
 /**
  * bdrv_drained_end:
  *
@@ -346,9 +339,4 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs);
  */
 void bdrv_drained_end(BlockDriverState *bs);
 
-/**
- * End a quiescent section started by bdrv_subtree_drained_begin().
- */
-void bdrv_subtree_drained_end(BlockDriverState *bs);
-
 #endif /* BLOCK_IO_H */
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 2b97576f6d..791dddfd7d 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -1184,7 +1184,6 @@ struct BlockDriverState {
 
 /* Accessed with atomic ops.  */
 int quiesce_counter;
-int recursive_quiesce_counter;
 
 unsigned int write_gen;   /* Current data generation */
 
diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
index 4b0b3e17ef..8bc061ebb8 100644
--- a/include/block/block_int-io.h
+++ b/include/block/block_int-io.h
@@ -179,16 +179,4 @@ void bdrv_bsc_invalidate_range(BlockDriverState *bs,
  */
 void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes);
 
-
-/*
- * "I/O or GS" API functions. These functions can run without
- * the BQL, but only in one specific iothread/main loop.
- *
- * See include/block/block-io.h for more information about
- * the "I/O or GS" API.
- */
-
-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState 
*old_parent);
-
 #endif /* BLOCK_INT_IO_H */
diff --git a/block.c b/block.c
index 97bfb1494f..7ea0b82049 100644
--- a/block.c
+++ b/block.c
@@ -1232,7 +1232,7 @@ static void bdrv_child_cb_drained_begin(BdrvChild *child)
 static bool bdrv_child_cb_drained_poll(BdrvChild *child)
 {
 BlockDriverState *bs = child->opaque;
-return bdrv_drain_poll(bs, false, NULL, false);
+return bdrv_drain_poll(bs, NULL, false);
 }
 
 static void bdrv_child_cb_drained_end(BdrvChild *child)
@@ -1482,8 +1482,6 @@ static void bdrv_child_cb_attach(BdrvChild *child)
 assert(!bs->file);
 bs->file = child;
 }
-
-bdrv_apply_subtree_drain(child, bs);
 }
 
 static void bdrv_child_cb_detach(BdrvChild *child)
@@ -1494,8 +1492,6 @@ static void bdrv_child_cb_detach(BdrvChild *child)
 bdrv_backing_detach(child);
 }
 
-bdrv_unapply_subtree_drain(child, bs);
-
 assert_bdrv_graph_writable(bs);
 QLIST_REMOVE(child, next);
 if (child == bs->backing) {
@@ -2882,9 +2878,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
 }
 
 if (old_bs) {
-/* Detach first so that the recursive drain sections coming from @child
- * are already gone and we only end the drain sections that came from
- * elsewhere. */
 if (child->klass->detach) {
 child->klass->detach(child);
 }
@@ -2899,17 +2892,14 @@ static void bdrv_replace_child_noperm(B

[PULL 43/51] block: Fix locking in external_snapshot_prepare()

2022-12-14 Thread Kevin Wolf
bdrv_img_create() polls internally (when calling bdrv_create(), which is
a co_wrapper), so it can't be called while holding the lock of any
AioContext except the current one without causing deadlocks. Drop the
lock around the call in external_snapshot_prepare().

Signed-off-by: Kevin Wolf 
Message-Id: <20221207131838.239125-11-kw...@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito 
Signed-off-by: Kevin Wolf 
---
 block.c| 4 
 blockdev.c | 4 
 2 files changed, 8 insertions(+)

diff --git a/block.c b/block.c
index 6191ac1f44..44d59362d6 100644
--- a/block.c
+++ b/block.c
@@ -6924,6 +6924,10 @@ bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
 return true;
 }
 
+/*
+ * Must not be called while holding the lock of an AioContext other than the
+ * current one.
+ */
 void bdrv_img_create(const char *filename, const char *fmt,
  const char *base_filename, const char *base_fmt,
  char *options, uint64_t img_size, int flags, bool quiet,
diff --git a/blockdev.c b/blockdev.c
index 8ffb3d9537..011e48df7b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1524,10 +1524,14 @@ static void external_snapshot_prepare(BlkActionState 
*common,
 goto out;
 }
 bdrv_refresh_filename(state->old_bs);
+
+aio_context_release(aio_context);
 bdrv_img_create(new_image_file, format,
 state->old_bs->filename,
 state->old_bs->drv->format_name,
 NULL, size, flags, false, &local_err);
+aio_context_acquire(aio_context);
+
 if (local_err) {
 error_propagate(errp, local_err);
 goto out;
-- 
2.38.1




[PULL 06/51] test-bdrv-drain: Don't yield in .bdrv_co_drained_begin/end()

2022-12-14 Thread Kevin Wolf
We want to change .bdrv_co_drained_begin/end() back to be non-coroutine
callbacks, so in preparation, avoid yielding in their implementation.

This does almost the same as the existing logic in bdrv_drain_invoke(),
by creating and entering coroutines internally. However, since the test
case is by far the heaviest user of coroutine code in drain callbacks,
it is preferable to have the complexity in the test case rather than the
drain core, which is already complicated enough without this.

The behaviour for bdrv_drain_begin() is unchanged because we increase
bs->in_flight and this is still polled. However, bdrv_drain_end()
doesn't wait for the spawned coroutine to complete any more. This is
fine, we don't rely on bdrv_drain_end() restarting all operations
immediately before the next aio_poll().

Signed-off-by: Kevin Wolf 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Emanuele Giuseppe Esposito 
Reviewed-by: Hanna Reitz 
Message-Id: <20221118174110.55183-3-kw...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 tests/unit/test-bdrv-drain.c | 64 ++--
 1 file changed, 46 insertions(+), 18 deletions(-)

diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
index 09dc4a4891..24f34e24ad 100644
--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
@@ -38,12 +38,22 @@ typedef struct BDRVTestState {
 bool sleep_in_drain_begin;
 } BDRVTestState;
 
+static void coroutine_fn sleep_in_drain_begin(void *opaque)
+{
+BlockDriverState *bs = opaque;
+
+qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 10);
+bdrv_dec_in_flight(bs);
+}
+
 static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
 {
 BDRVTestState *s = bs->opaque;
 s->drain_count++;
 if (s->sleep_in_drain_begin) {
-qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 10);
+Coroutine *co = qemu_coroutine_create(sleep_in_drain_begin, bs);
+bdrv_inc_in_flight(bs);
+aio_co_enter(bdrv_get_aio_context(bs), co);
 }
 }
 
@@ -1916,6 +1926,21 @@ static int coroutine_fn 
bdrv_replace_test_co_preadv(BlockDriverState *bs,
 return 0;
 }
 
+static void coroutine_fn bdrv_replace_test_drain_co(void *opaque)
+{
+BlockDriverState *bs = opaque;
+BDRVReplaceTestState *s = bs->opaque;
+
+/* Keep waking io_co up until it is done */
+while (s->io_co) {
+aio_co_wake(s->io_co);
+s->io_co = NULL;
+qemu_coroutine_yield();
+}
+s->drain_co = NULL;
+bdrv_dec_in_flight(bs);
+}
+
 /**
  * If .drain_count is 0, wake up .io_co if there is one; and set
  * .was_drained.
@@ -1926,20 +1951,27 @@ static void coroutine_fn 
bdrv_replace_test_co_drain_begin(BlockDriverState *bs)
 BDRVReplaceTestState *s = bs->opaque;
 
 if (!s->drain_count) {
-/* Keep waking io_co up until it is done */
-s->drain_co = qemu_coroutine_self();
-while (s->io_co) {
-aio_co_wake(s->io_co);
-s->io_co = NULL;
-qemu_coroutine_yield();
-}
-s->drain_co = NULL;
-
+s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs);
+bdrv_inc_in_flight(bs);
+aio_co_enter(bdrv_get_aio_context(bs), s->drain_co);
 s->was_drained = true;
 }
 s->drain_count++;
 }
 
+static void coroutine_fn bdrv_replace_test_read_entry(void *opaque)
+{
+BlockDriverState *bs = opaque;
+char data;
+QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1);
+int ret;
+
+/* Queue a read request post-drain */
+ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0);
+g_assert(ret >= 0);
+bdrv_dec_in_flight(bs);
+}
+
 /**
  * Reduce .drain_count, set .was_undrained once it reaches 0.
  * If .drain_count reaches 0 and the node has a backing file, issue a
@@ -1951,17 +1983,13 @@ static void coroutine_fn 
bdrv_replace_test_co_drain_end(BlockDriverState *bs)
 
 g_assert(s->drain_count > 0);
 if (!--s->drain_count) {
-int ret;
-
 s->was_undrained = true;
 
 if (bs->backing) {
-char data;
-QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1);
-
-/* Queue a read request post-drain */
-ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0);
-g_assert(ret >= 0);
+Coroutine *co = qemu_coroutine_create(bdrv_replace_test_read_entry,
+  bs);
+bdrv_inc_in_flight(bs);
+aio_co_enter(bdrv_get_aio_context(bs), co);
 }
 }
 }
-- 
2.38.1




[PULL 31/51] block-coroutine-wrapper.py: support also basic return types

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

Extend the regex to cover also return type, pointers included.
This implies that the value returned by the function cannot be
a simple "int" anymore, but the custom return type.
Therefore remove poll_state->ret and instead use a per-function
custom "ret" field.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Kevin Wolf 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20221128142337.657646-13-eespo...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 block/block-gen.h  |  5 +
 scripts/block-coroutine-wrapper.py | 19 +++
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/block/block-gen.h b/block/block-gen.h
index 08d977f493..89b7daaa1f 100644
--- a/block/block-gen.h
+++ b/block/block-gen.h
@@ -32,18 +32,15 @@
 typedef struct BdrvPollCo {
 AioContext *ctx;
 bool in_progress;
-int ret;
 Coroutine *co; /* Keep pointer here for debugging */
 } BdrvPollCo;
 
-static inline int bdrv_poll_co(BdrvPollCo *s)
+static inline void bdrv_poll_co(BdrvPollCo *s)
 {
 assert(!qemu_in_coroutine());
 
 aio_co_enter(s->ctx, s->co);
 AIO_WAIT_WHILE(s->ctx, s->in_progress);
-
-return s->ret;
 }
 
 #endif /* BLOCK_BLOCK_GEN_H */
diff --git a/scripts/block-coroutine-wrapper.py 
b/scripts/block-coroutine-wrapper.py
index f540003af1..71a06e917a 100644
--- a/scripts/block-coroutine-wrapper.py
+++ b/scripts/block-coroutine-wrapper.py
@@ -92,7 +92,8 @@ def gen_block(self, format: str) -> str:
 
 
 # Match wrappers declared with a co_wrapper mark
-func_decl_re = re.compile(r'^int\s*co_wrapper'
+func_decl_re = re.compile(r'^(?P[a-zA-Z][a-zA-Z0-9_]* [\*]?)'
+  r'\s*co_wrapper'
   r'(?P(_[a-z][a-z0-9_]*)?)\s*'
   r'(?P[a-z][a-z0-9_]*)'
   r'\((?P[^)]*)\);$', re.MULTILINE)
@@ -100,7 +101,7 @@ def gen_block(self, format: str) -> str:
 
 def func_decl_iter(text: str) -> Iterator:
 for m in func_decl_re.finditer(text):
-yield FuncDecl(return_type='int',
+yield FuncDecl(return_type=m.group('return_type'),
name=m.group('wrapper_name'),
args=m.group('args'),
variant=m.group('variant'))
@@ -123,7 +124,7 @@ def create_mixed_wrapper(func: FuncDecl) -> str:
 name = func.co_name
 struct_name = func.struct_name
 return f"""\
-int {func.name}({ func.gen_list('{decl}') })
+{func.return_type} {func.name}({ func.gen_list('{decl}') })
 {{
 if (qemu_in_coroutine()) {{
 return {name}({ func.gen_list('{name}') });
@@ -137,7 +138,8 @@ def create_mixed_wrapper(func: FuncDecl) -> str:
 
 s.poll_state.co = qemu_coroutine_create({name}_entry, &s);
 
-return bdrv_poll_co(&s.poll_state);
+bdrv_poll_co(&s.poll_state);
+return s.ret;
 }}
 }}"""
 
@@ -149,7 +151,7 @@ def create_co_wrapper(func: FuncDecl) -> str:
 name = func.co_name
 struct_name = func.struct_name
 return f"""\
-int {func.name}({ func.gen_list('{decl}') })
+{func.return_type} {func.name}({ func.gen_list('{decl}') })
 {{
 {struct_name} s = {{
 .poll_state.ctx = {func.ctx},
@@ -161,13 +163,13 @@ def create_co_wrapper(func: FuncDecl) -> str:
 
 s.poll_state.co = qemu_coroutine_create({name}_entry, &s);
 
-return bdrv_poll_co(&s.poll_state);
+bdrv_poll_co(&s.poll_state);
+return s.ret;
 }}"""
 
 
 def gen_wrapper(func: FuncDecl) -> str:
 assert not '_co_' in func.name
-assert func.return_type == 'int'
 
 name = func.co_name
 struct_name = func.struct_name
@@ -183,6 +185,7 @@ def gen_wrapper(func: FuncDecl) -> str:
 
 typedef struct {struct_name} {{
 BdrvPollCo poll_state;
+{func.return_type} ret;
 { func.gen_block('{decl};') }
 }} {struct_name};
 
@@ -190,7 +193,7 @@ def gen_wrapper(func: FuncDecl) -> str:
 {{
 {struct_name} *s = opaque;
 
-s->poll_state.ret = {name}({ func.gen_list('s->{name}') });
+s->ret = {name}({ func.gen_list('s->{name}') });
 s->poll_state.in_progress = false;
 
 aio_wait_kick();
-- 
2.38.1




[PULL 46/51] block: assert that graph read and writes are performed correctly

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

Remove the old assert_bdrv_graph_writable, and replace it with
the new version using graph-lock API.

See the function documentation for more information.

Signed-off-by: Emanuele Giuseppe Esposito 
Signed-off-by: Kevin Wolf 
Message-Id: <20221207131838.239125-14-kw...@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 include/block/block_int-global-state.h | 17 -
 include/block/graph-lock.h | 15 +++
 block.c|  4 ++--
 block/graph-lock.c | 11 +++
 4 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/include/block/block_int-global-state.h 
b/include/block/block_int-global-state.h
index b49f4eb35b..2f0993f6e9 100644
--- a/include/block/block_int-global-state.h
+++ b/include/block/block_int-global-state.h
@@ -310,21 +310,4 @@ void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
  */
 void bdrv_drain_all_end_quiesce(BlockDriverState *bs);
 
-/**
- * Make sure that the function is running under both drain and BQL.
- * The latter protects from concurrent writings
- * from the GS API, while the former prevents concurrent reads
- * from I/O.
- */
-static inline void assert_bdrv_graph_writable(BlockDriverState *bs)
-{
-/*
- * TODO: this function is incomplete. Because the users of this
- * assert lack the necessary drains, check only for BQL.
- * Once the necessary drains are added,
- * assert also for qatomic_read(&bs->quiesce_counter) > 0
- */
-assert(qemu_in_main_thread());
-}
-
 #endif /* BLOCK_INT_GLOBAL_STATE_H */
diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h
index b27d8a5fb1..85e8a53b73 100644
--- a/include/block/graph-lock.h
+++ b/include/block/graph-lock.h
@@ -135,6 +135,21 @@ void coroutine_fn bdrv_graph_co_rdunlock(void);
 void bdrv_graph_rdlock_main_loop(void);
 void bdrv_graph_rdunlock_main_loop(void);
 
+/*
+ * assert_bdrv_graph_readable:
+ * Make sure that the reader is either the main loop,
+ * or there is at least a reader helding the rdlock.
+ * In this way an incoming writer is aware of the read and waits.
+ */
+void assert_bdrv_graph_readable(void);
+
+/*
+ * assert_bdrv_graph_writable:
+ * Make sure that the writer is the main loop and has set @has_writer,
+ * so that incoming readers will pause.
+ */
+void assert_bdrv_graph_writable(void);
+
 typedef struct GraphLockable { } GraphLockable;
 
 /*
diff --git a/block.c b/block.c
index bdffadcdaa..ff53b41af3 100644
--- a/block.c
+++ b/block.c
@@ -1406,7 +1406,7 @@ static void bdrv_child_cb_attach(BdrvChild *child)
 {
 BlockDriverState *bs = child->opaque;
 
-assert_bdrv_graph_writable(bs);
+assert_bdrv_graph_writable();
 QLIST_INSERT_HEAD(&bs->children, child, next);
 if (bs->drv->is_filter || (child->role & BDRV_CHILD_FILTERED)) {
 /*
@@ -1452,7 +1452,7 @@ static void bdrv_child_cb_detach(BdrvChild *child)
 bdrv_backing_detach(child);
 }
 
-assert_bdrv_graph_writable(bs);
+assert_bdrv_graph_writable();
 QLIST_REMOVE(child, next);
 if (child == bs->backing) {
 assert(child != bs->file);
diff --git a/block/graph-lock.c b/block/graph-lock.c
index e033c6d9ac..c4d9d2c274 100644
--- a/block/graph-lock.c
+++ b/block/graph-lock.c
@@ -259,3 +259,14 @@ void bdrv_graph_rdunlock_main_loop(void)
 GLOBAL_STATE_CODE();
 assert(!qemu_in_coroutine());
 }
+
+void assert_bdrv_graph_readable(void)
+{
+assert(qemu_in_main_thread() || reader_count());
+}
+
+void assert_bdrv_graph_writable(void)
+{
+assert(qemu_in_main_thread());
+assert(qatomic_read(&has_writer));
+}
-- 
2.38.1




[PULL 19/51] block: Remove poll parameter from bdrv_parent_drained_begin_single()

2022-12-14 Thread Kevin Wolf
All callers of bdrv_parent_drained_begin_single() pass poll=false now,
so we don't need the parameter any more.

Signed-off-by: Kevin Wolf 
Message-Id: <20221118174110.55183-16-kw...@redhat.com>
Reviewed-by: Hanna Reitz 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Kevin Wolf 
---
 include/block/block-io.h | 5 ++---
 block.c  | 4 ++--
 block/io.c   | 8 ++--
 3 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/include/block/block-io.h b/include/block/block-io.h
index 65e6d2569b..92aaa7c1e9 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -287,10 +287,9 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector 
*qiov, int64_t pos);
 /**
  * bdrv_parent_drained_begin_single:
  *
- * Begin a quiesced section for the parent of @c. If @poll is true, wait for
- * any pending activity to cease.
+ * Begin a quiesced section for the parent of @c.
  */
-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
+void bdrv_parent_drained_begin_single(BdrvChild *c);
 
 /**
  * bdrv_parent_drained_poll_single:
diff --git a/block.c b/block.c
index faaeca8472..97073092c4 100644
--- a/block.c
+++ b/block.c
@@ -2417,7 +2417,7 @@ static void bdrv_replace_child_abort(void *opaque)
  * new_bs drained when calling bdrv_replace_child_tran() is not a
  * requirement any more.
  */
-bdrv_parent_drained_begin_single(s->child, false);
+bdrv_parent_drained_begin_single(s->child);
 assert(!bdrv_parent_drained_poll_single(s->child));
 }
 assert(s->child->quiesced_parent);
@@ -3090,7 +3090,7 @@ static BdrvChild 
*bdrv_attach_child_common(BlockDriverState *child_bs,
  * a problem, we already did this), but it will still poll until the parent
  * is fully quiesced, so it will not be negatively affected either.
  */
-bdrv_parent_drained_begin_single(new_child, false);
+bdrv_parent_drained_begin_single(new_child);
 bdrv_replace_child_noperm(new_child, child_bs);
 
 BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
diff --git a/block/io.c b/block/io.c
index ae64830eac..38e57d1f67 100644
--- a/block/io.c
+++ b/block/io.c
@@ -53,7 +53,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, 
BdrvChild *ignore)
 if (c == ignore) {
 continue;
 }
-bdrv_parent_drained_begin_single(c, false);
+bdrv_parent_drained_begin_single(c);
 }
 }
 
@@ -105,9 +105,8 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, 
BdrvChild *ignore,
 return busy;
 }
 
-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
+void bdrv_parent_drained_begin_single(BdrvChild *c)
 {
-AioContext *ctx = bdrv_child_get_parent_aio_context(c);
 IO_OR_GS_CODE();
 
 assert(!c->quiesced_parent);
@@ -116,9 +115,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool 
poll)
 if (c->klass->drained_begin) {
 c->klass->drained_begin(c);
 }
-if (poll) {
-AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c));
-}
 }
 
 static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
-- 
2.38.1




[PULL 44/51] block: wrlock in bdrv_replace_child_noperm

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

Protect the main function where graph is modified.

Signed-off-by: Emanuele Giuseppe Esposito 
Signed-off-by: Kevin Wolf 
Message-Id: <20221207131838.239125-12-kw...@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/block.c b/block.c
index 44d59362d6..df52c6b012 100644
--- a/block.c
+++ b/block.c
@@ -2836,8 +2836,6 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission 
qapi_perm)
  *
  * If @new_bs is non-NULL, the parent of @child must already be drained through
  * @child.
- *
- * This function does not poll.
  */
 static void bdrv_replace_child_noperm(BdrvChild *child,
   BlockDriverState *new_bs)
@@ -2875,23 +2873,24 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
 assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
 }
 
+/* TODO Pull this up into the callers to avoid polling here */
+bdrv_graph_wrlock();
 if (old_bs) {
 if (child->klass->detach) {
 child->klass->detach(child);
 }
-assert_bdrv_graph_writable(old_bs);
 QLIST_REMOVE(child, next_parent);
 }
 
 child->bs = new_bs;
 
 if (new_bs) {
-assert_bdrv_graph_writable(new_bs);
 QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
 if (child->klass->attach) {
 child->klass->attach(child);
 }
 }
+bdrv_graph_wrunlock();
 
 /*
  * If the parent was drained through this BdrvChild previously, but new_bs
-- 
2.38.1




[PULL 28/51] block: rename generated_co_wrapper in co_wrapper_mixed

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

In preparation to the incoming new function specifiers,
rename g_c_w with a more meaningful name and document it.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20221128142337.657646-10-eespo...@redhat.com>
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 docs/devel/block-coroutine-wrapper.rst |  6 +--
 block/coroutines.h |  4 +-
 include/block/block-common.h   | 11 +++--
 include/block/block-io.h   | 44 -
 include/sysemu/block-backend-io.h  | 68 +-
 scripts/block-coroutine-wrapper.py |  6 +--
 6 files changed, 71 insertions(+), 68 deletions(-)

diff --git a/docs/devel/block-coroutine-wrapper.rst 
b/docs/devel/block-coroutine-wrapper.rst
index 412851986b..64acc8d65d 100644
--- a/docs/devel/block-coroutine-wrapper.rst
+++ b/docs/devel/block-coroutine-wrapper.rst
@@ -26,12 +26,12 @@ called ``bdrv_foo()``. In this case the script 
can help. To
 trigger the generation:
 
 1. You need ``bdrv_foo`` declaration somewhere (for example, in
-   ``block/coroutines.h``) with the ``generated_co_wrapper`` mark,
+   ``block/coroutines.h``) with the ``co_wrapper_mixed`` mark,
like this:
 
 .. code-block:: c
 
-int generated_co_wrapper bdrv_foo();
+int co_wrapper_mixed bdrv_foo();
 
 2. You need to feed this declaration to block-coroutine-wrapper script.
For this, add the .h (or .c) file with the declaration to the
@@ -46,7 +46,7 @@ Links
 
 1. The script location is ``scripts/block-coroutine-wrapper.py``.
 
-2. Generic place for private ``generated_co_wrapper`` declarations is
+2. Generic place for private ``co_wrapper_mixed`` declarations is
``block/coroutines.h``, for public declarations:
``include/block/block.h``
 
diff --git a/block/coroutines.h b/block/coroutines.h
index 3a2bad564f..17da4db963 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -71,7 +71,7 @@ nbd_co_do_establish_connection(BlockDriverState *bs, bool 
blocking,
  * the "I/O or GS" API.
  */
 
-int generated_co_wrapper
+int co_wrapper_mixed
 bdrv_common_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
bool include_base,
@@ -82,7 +82,7 @@ bdrv_common_block_status_above(BlockDriverState *bs,
int64_t *map,
BlockDriverState **file,
int *depth);
-int generated_co_wrapper
+int co_wrapper_mixed
 nbd_do_establish_connection(BlockDriverState *bs, bool blocking, Error **errp);
 
 #endif /* BLOCK_COROUTINES_H */
diff --git a/include/block/block-common.h b/include/block/block-common.h
index 297704c1e9..ec2309055b 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h
@@ -35,14 +35,17 @@
 #include "qemu/transactions.h"
 
 /*
- * generated_co_wrapper
+ * co_wrapper{*}: Function specifiers used by block-coroutine-wrapper.py
  *
- * Function specifier, which does nothing but mark functions to be
+ * Function specifiers, which do nothing but mark functions to be
  * generated by scripts/block-coroutine-wrapper.py
  *
- * Read more in docs/devel/block-coroutine-wrapper.rst
+ * Usage: read docs/devel/block-coroutine-wrapper.rst
+ *
+ * co_wrapper_mixed functions can be called by both coroutine and
+ * non-coroutine context.
  */
-#define generated_co_wrapper
+#define co_wrapper_mixed
 
 /* block.c */
 typedef struct BlockDriver BlockDriver;
diff --git a/include/block/block-io.h b/include/block/block-io.h
index 72919254cd..72cf45975b 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -39,19 +39,19 @@
  * to catch when they are accidentally called by the wrong API.
  */
 
-int generated_co_wrapper bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
-int64_t bytes,
-BdrvRequestFlags flags);
+int co_wrapper_mixed bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
+int64_t bytes,
+BdrvRequestFlags flags);
 int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
-int generated_co_wrapper bdrv_pread(BdrvChild *child, int64_t offset,
-int64_t bytes, void *buf,
-BdrvRequestFlags flags);
-int generated_co_wrapper bdrv_pwrite(BdrvChild *child, int64_t offset,
- int64_t bytes, const void *buf,
- BdrvRequestFlags flags);
-int generated_co_wrapper bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
-  int64_t bytes, const void *buf,
-  BdrvRequestFlags flags);
+int co_wrapper_mixed bdrv_pread(BdrvChild *child, int64_t offset,
+int64_t

[PULL 36/51] graph-lock: Implement guard macros

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

Similar to the implementation in lockable.h, implement macros to
automatically take and release the rdlock.

Create the empty GraphLockable and GraphLockableMainloop structs only to
use it as a type for G_DEFINE_AUTOPTR_CLEANUP_FUNC.

Signed-off-by: Emanuele Giuseppe Esposito 
Signed-off-by: Kevin Wolf 
Message-Id: <20221207131838.239125-4-kw...@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito 
Signed-off-by: Kevin Wolf 
---
 include/block/graph-lock.h | 66 ++
 1 file changed, 66 insertions(+)

diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h
index 82edb62cfa..b27d8a5fb1 100644
--- a/include/block/graph-lock.h
+++ b/include/block/graph-lock.h
@@ -135,5 +135,71 @@ void coroutine_fn bdrv_graph_co_rdunlock(void);
 void bdrv_graph_rdlock_main_loop(void);
 void bdrv_graph_rdunlock_main_loop(void);
 
+typedef struct GraphLockable { } GraphLockable;
+
+/*
+ * In C, compound literals have the lifetime of an automatic variable.
+ * In C++ it would be different, but then C++ wouldn't need QemuLockable
+ * either...
+ */
+#define GML_OBJ_() (&(GraphLockable) { })
+
+static inline GraphLockable *graph_lockable_auto_lock(GraphLockable *x)
+{
+bdrv_graph_co_rdlock();
+return x;
+}
+
+static inline void graph_lockable_auto_unlock(GraphLockable *x)
+{
+bdrv_graph_co_rdunlock();
+}
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(GraphLockable, graph_lockable_auto_unlock)
+
+#define WITH_GRAPH_RDLOCK_GUARD_(var) \
+for (g_autoptr(GraphLockable) var = graph_lockable_auto_lock(GML_OBJ_()); \
+ var; \
+ graph_lockable_auto_unlock(var), var = NULL)
+
+#define WITH_GRAPH_RDLOCK_GUARD() \
+WITH_GRAPH_RDLOCK_GUARD_(glue(graph_lockable_auto, __COUNTER__))
+
+#define GRAPH_RDLOCK_GUARD(x)   \
+g_autoptr(GraphLockable)\
+glue(graph_lockable_auto, __COUNTER__) G_GNUC_UNUSED =  \
+graph_lockable_auto_lock(GML_OBJ_())
+
+
+typedef struct GraphLockableMainloop { } GraphLockableMainloop;
+
+/*
+ * In C, compound literals have the lifetime of an automatic variable.
+ * In C++ it would be different, but then C++ wouldn't need QemuLockable
+ * either...
+ */
+#define GMLML_OBJ_() (&(GraphLockableMainloop) { })
+
+static inline GraphLockableMainloop *
+graph_lockable_auto_lock_mainloop(GraphLockableMainloop *x)
+{
+bdrv_graph_rdlock_main_loop();
+return x;
+}
+
+static inline void
+graph_lockable_auto_unlock_mainloop(GraphLockableMainloop *x)
+{
+bdrv_graph_rdunlock_main_loop();
+}
+
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(GraphLockableMainloop,
+  graph_lockable_auto_unlock_mainloop)
+
+#define GRAPH_RDLOCK_GUARD_MAINLOOP(x)  \
+g_autoptr(GraphLockableMainloop)\
+glue(graph_lockable_auto, __COUNTER__) G_GNUC_UNUSED =  \
+graph_lockable_auto_lock_mainloop(GMLML_OBJ_())
+
 #endif /* GRAPH_LOCK_H */
 
-- 
2.38.1




[PULL 29/51] block-coroutine-wrapper.py: introduce co_wrapper

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

This new annotation starts just a function wrapper that creates
a new coroutine. It assumes the caller is not a coroutine.
It will be the default annotation to be used in the future.

This is much better as c_w_mixed, because it is clear if the caller
is a coroutine or not, and provides the advantage of automating
the code creation. In the future all c_w_mixed functions will be
substituted by co_wrapper.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20221128142337.657646-11-eespo...@redhat.com>
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 docs/devel/block-coroutine-wrapper.rst |   6 +-
 include/block/block-common.h   |   8 +-
 scripts/block-coroutine-wrapper.py | 110 +
 3 files changed, 86 insertions(+), 38 deletions(-)

diff --git a/docs/devel/block-coroutine-wrapper.rst 
b/docs/devel/block-coroutine-wrapper.rst
index 64acc8d65d..6dd2cdcab3 100644
--- a/docs/devel/block-coroutine-wrapper.rst
+++ b/docs/devel/block-coroutine-wrapper.rst
@@ -26,12 +26,12 @@ called ``bdrv_foo()``. In this case the script 
can help. To
 trigger the generation:
 
 1. You need ``bdrv_foo`` declaration somewhere (for example, in
-   ``block/coroutines.h``) with the ``co_wrapper_mixed`` mark,
+   ``block/coroutines.h``) with the ``co_wrapper`` mark,
like this:
 
 .. code-block:: c
 
-int co_wrapper_mixed bdrv_foo();
+int co_wrapper bdrv_foo();
 
 2. You need to feed this declaration to block-coroutine-wrapper script.
For this, add the .h (or .c) file with the declaration to the
@@ -46,7 +46,7 @@ Links
 
 1. The script location is ``scripts/block-coroutine-wrapper.py``.
 
-2. Generic place for private ``co_wrapper_mixed`` declarations is
+2. Generic place for private ``co_wrapper`` declarations is
``block/coroutines.h``, for public declarations:
``include/block/block.h``
 
diff --git a/include/block/block-common.h b/include/block/block-common.h
index ec2309055b..847e4d4626 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h
@@ -42,9 +42,13 @@
  *
  * Usage: read docs/devel/block-coroutine-wrapper.rst
  *
- * co_wrapper_mixed functions can be called by both coroutine and
- * non-coroutine context.
+ * There are 2 kind of specifiers:
+ * - co_wrapper functions can be called by only non-coroutine context, because
+ *   they always generate a new coroutine.
+ * - co_wrapper_mixed functions can be called by both coroutine and
+ *   non-coroutine context.
  */
+#define co_wrapper
 #define co_wrapper_mixed
 
 /* block.c */
diff --git a/scripts/block-coroutine-wrapper.py 
b/scripts/block-coroutine-wrapper.py
index 56e6425356..2090c3bf73 100644
--- a/scripts/block-coroutine-wrapper.py
+++ b/scripts/block-coroutine-wrapper.py
@@ -2,7 +2,7 @@
 """Generate coroutine wrappers for block subsystem.
 
 The program parses one or several concatenated c files from stdin,
-searches for functions with the 'co_wrapper_mixed' specifier
+searches for functions with the 'co_wrapper' specifier
 and generates corresponding wrappers on stdout.
 
 Usage: block-coroutine-wrapper.py generated-file.c FILE.[ch]...
@@ -62,10 +62,25 @@ def __init__(self, param_decl: str) -> None:
 
 
 class FuncDecl:
-def __init__(self, return_type: str, name: str, args: str) -> None:
+def __init__(self, return_type: str, name: str, args: str,
+ variant: str) -> None:
 self.return_type = return_type.strip()
 self.name = name.strip()
+self.struct_name = snake_to_camel(self.name)
 self.args = [ParamDecl(arg.strip()) for arg in args.split(',')]
+self.create_only_co = 'mixed' not in variant
+
+subsystem, subname = self.name.split('_', 1)
+self.co_name = f'{subsystem}_co_{subname}'
+
+t = self.args[0].type
+if t == 'BlockDriverState *':
+bs = 'bs'
+elif t == 'BdrvChild *':
+bs = 'child->bs'
+else:
+bs = 'blk_bs(blk)'
+self.bs = bs
 
 def gen_list(self, format: str) -> str:
 return ', '.join(format.format_map(arg.__dict__) for arg in self.args)
@@ -74,8 +89,9 @@ def gen_block(self, format: str) -> str:
 return '\n'.join(format.format_map(arg.__dict__) for arg in self.args)
 
 
-# Match wrappers declared with a co_wrapper_mixed mark
-func_decl_re = re.compile(r'^int\s*co_wrapper_mixed\s*'
+# Match wrappers declared with a co_wrapper mark
+func_decl_re = re.compile(r'^int\s*co_wrapper'
+  r'(?P(_[a-z][a-z0-9_]*)?)\s*'
   r'(?P[a-z][a-z0-9_]*)'
   r'\((?P[^)]*)\);$', re.MULTILINE)
 
@@ -84,7 +100,8 @@ def func_decl_iter(text: str) -> Iterator:
 for m in func_decl_re.finditer(text):
 yield FuncDecl(return_type='int',
name=m.group('wrapper_name'),
-   args=m.group('args'))
+   args=m.group('a

[PULL 32/51] block: convert bdrv_create to co_wrapper

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

This function is never called in coroutine context, therefore
instead of manually creating a new coroutine, delegate it to the
block-coroutine-wrapper script, defining it as co_wrapper.

Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Kevin Wolf 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20221128142337.657646-14-eespo...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 include/block/block-global-state.h |  8 --
 block.c| 41 ++
 2 files changed, 8 insertions(+), 41 deletions(-)

diff --git a/include/block/block-global-state.h 
b/include/block/block-global-state.h
index 387a7cbb2e..1f8b54f2df 100644
--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h
@@ -55,8 +55,12 @@ BlockDriver *bdrv_find_protocol(const char *filename,
 bool allow_protocol_prefix,
 Error **errp);
 BlockDriver *bdrv_find_format(const char *format_name);
-int bdrv_create(BlockDriver *drv, const char* filename,
-QemuOpts *opts, Error **errp);
+
+int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename,
+QemuOpts *opts, Error **errp);
+int co_wrapper bdrv_create(BlockDriver *drv, const char *filename,
+   QemuOpts *opts, Error **errp);
+
 int coroutine_fn bdrv_co_create_file(const char *filename, QemuOpts *opts,
  Error **errp);
 
diff --git a/block.c b/block.c
index c8ac91eb63..6191ac1f44 100644
--- a/block.c
+++ b/block.c
@@ -526,8 +526,8 @@ typedef struct CreateCo {
 Error *err;
 } CreateCo;
 
-static int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename,
-   QemuOpts *opts, Error **errp)
+int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename,
+QemuOpts *opts, Error **errp)
 {
 int ret;
 GLOBAL_STATE_CODE();
@@ -547,43 +547,6 @@ static int coroutine_fn bdrv_co_create(BlockDriver *drv, 
const char *filename,
 return ret;
 }
 
-static void coroutine_fn bdrv_create_co_entry(void *opaque)
-{
-CreateCo *cco = opaque;
-GLOBAL_STATE_CODE();
-
-cco->ret = bdrv_co_create(cco->drv, cco->filename, cco->opts, &cco->err);
-aio_wait_kick();
-}
-
-int bdrv_create(BlockDriver *drv, const char* filename,
-QemuOpts *opts, Error **errp)
-{
-GLOBAL_STATE_CODE();
-
-if (qemu_in_coroutine()) {
-/* Fast-path if already in coroutine context */
-return bdrv_co_create(drv, filename, opts, errp);
-} else {
-Coroutine *co;
-CreateCo cco = {
-.drv = drv,
-.filename = filename,
-.opts = opts,
-.ret = NOT_DONE,
-.err = NULL,
-};
-
-co = qemu_coroutine_create(bdrv_create_co_entry, &cco);
-qemu_coroutine_enter(co);
-while (cco.ret == NOT_DONE) {
-aio_poll(qemu_get_aio_context(), true);
-}
-error_propagate(errp, cco.err);
-return cco.ret;
-}
-}
-
 /**
  * Helper function for bdrv_create_file_fallback(): Resize @blk to at
  * least the given @minimum_size.
-- 
2.38.1




[PULL 37/51] async: Register/unregister aiocontext in graph lock list

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

Add/remove the AioContext in aio_context_list in graph-lock.c when it is
created/destroyed. This allows using the graph locking operations from
this AioContext.

In order to allow linking util/async.c with binaries that don't include
the block layer, introduce stubs for (un)register_aiocontext().

Signed-off-by: Emanuele Giuseppe Esposito 
Signed-off-by: Kevin Wolf 
Message-Id: <20221207131838.239125-5-kw...@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 stubs/graph-lock.c | 10 ++
 util/async.c   |  4 
 stubs/meson.build  |  1 +
 3 files changed, 15 insertions(+)
 create mode 100644 stubs/graph-lock.c

diff --git a/stubs/graph-lock.c b/stubs/graph-lock.c
new file mode 100644
index 00..177aa0a8ba
--- /dev/null
+++ b/stubs/graph-lock.c
@@ -0,0 +1,10 @@
+#include "qemu/osdep.h"
+#include "block/graph-lock.h"
+
+void register_aiocontext(AioContext *ctx)
+{
+}
+
+void unregister_aiocontext(AioContext *ctx)
+{
+}
diff --git a/util/async.c b/util/async.c
index 63434ddae4..14d63b3091 100644
--- a/util/async.c
+++ b/util/async.c
@@ -27,6 +27,7 @@
 #include "qapi/error.h"
 #include "block/aio.h"
 #include "block/thread-pool.h"
+#include "block/graph-lock.h"
 #include "qemu/main-loop.h"
 #include "qemu/atomic.h"
 #include "qemu/rcu_queue.h"
@@ -376,6 +377,7 @@ aio_ctx_finalize(GSource *source)
 qemu_rec_mutex_destroy(&ctx->lock);
 qemu_lockcnt_destroy(&ctx->list_lock);
 timerlistgroup_deinit(&ctx->tlg);
+unregister_aiocontext(ctx);
 aio_context_destroy(ctx);
 }
 
@@ -574,6 +576,8 @@ AioContext *aio_context_new(Error **errp)
 ctx->thread_pool_min = 0;
 ctx->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT;
 
+register_aiocontext(ctx);
+
 return ctx;
 fail:
 g_source_destroy(&ctx->source);
diff --git a/stubs/meson.build b/stubs/meson.build
index c96a74f095..981585cbdf 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -13,6 +13,7 @@ stub_ss.add(files('error-printf.c'))
 stub_ss.add(files('fdset.c'))
 stub_ss.add(files('gdbstub.c'))
 stub_ss.add(files('get-vm-name.c'))
+stub_ss.add(files('graph-lock.c'))
 if linux_io_uring.found()
   stub_ss.add(files('io_uring.c'))
 endif
-- 
2.38.1




[PULL 17/51] block: Drop out of coroutine in bdrv_do_drained_begin_quiesce()

2022-12-14 Thread Kevin Wolf
The next patch adds a parent drain to bdrv_attach_child_common(), which
shouldn't be, but is currently called from coroutines in some cases (e.g.
.bdrv_co_create implementations generally open new nodes). Therefore,
the assertion that we're not in a coroutine doesn't hold true any more.

We could just remove the assertion because there is nothing in the
function that should be in conflict with running in a coroutine, but
just to be on the safe side, we can reverse the caller relationship
between bdrv_do_drained_begin() and bdrv_do_drained_begin_quiesce() so
that the latter also just drops out of coroutine context and we can
still be certain in the future that any drain code doesn't run in
coroutines.

As a nice side effect, the structure of bdrv_do_drained_begin() is now
symmetrical with bdrv_do_drained_end().

Signed-off-by: Kevin Wolf 
Message-Id: <20221118174110.55183-14-kw...@redhat.com>
Reviewed-by: Hanna Reitz 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Kevin Wolf 
---
 block/io.c | 25 -
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/block/io.c b/block/io.c
index 2e9503df6a..5e9150d92c 100644
--- a/block/io.c
+++ b/block/io.c
@@ -346,10 +346,15 @@ static void coroutine_fn 
bdrv_co_yield_to_drain(BlockDriverState *bs,
 }
 }
 
-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
+  bool poll)
 {
 IO_OR_GS_CODE();
-assert(!qemu_in_coroutine());
+
+if (qemu_in_coroutine()) {
+bdrv_co_yield_to_drain(bs, true, parent, poll);
+return;
+}
 
 /* Stop things in parent-to-child order */
 if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
@@ -359,17 +364,6 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, 
BdrvChild *parent)
 bs->drv->bdrv_drain_begin(bs);
 }
 }
-}
-
-static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
-  bool poll)
-{
-if (qemu_in_coroutine()) {
-bdrv_co_yield_to_drain(bs, true, parent, poll);
-return;
-}
-
-bdrv_do_drained_begin_quiesce(bs, parent);
 
 /*
  * Wait for drained requests to finish.
@@ -385,6 +379,11 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, 
BdrvChild *parent,
 }
 }
 
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
+{
+bdrv_do_drained_begin(bs, parent, false);
+}
+
 void bdrv_drained_begin(BlockDriverState *bs)
 {
 IO_OR_GS_CODE();
-- 
2.38.1




[PULL 38/51] Import clang-tsa.h

2022-12-14 Thread Kevin Wolf
This defines macros that allow clang to perform Thread Safety Analysis
based on function and variable annotations that specify the locking
rules. On non-clang compilers, the annotations are ignored.

Imported tsa.h from the original repository with the pthread_mutex_t
wrapper removed:

https://github.com/jhi/clang-thread-safety-analysis-for-c.git

Signed-off-by: Kevin Wolf 
Message-Id: <20221207131838.239125-6-kw...@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito 
Signed-off-by: Kevin Wolf 
---
 include/qemu/clang-tsa.h | 101 +++
 1 file changed, 101 insertions(+)
 create mode 100644 include/qemu/clang-tsa.h

diff --git a/include/qemu/clang-tsa.h b/include/qemu/clang-tsa.h
new file mode 100644
index 00..0a3361dfc8
--- /dev/null
+++ b/include/qemu/clang-tsa.h
@@ -0,0 +1,101 @@
+#ifndef CLANG_TSA_H
+#define CLANG_TSA_H
+
+/*
+ * Copyright 2018 Jarkko Hietaniemi 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without
+ * limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* http://clang.llvm.org/docs/ThreadSafetyAnalysis.html
+ *
+ * TSA is available since clang 3.6-ish.
+ */
+#ifdef __clang__
+#  define TSA(x)   __attribute__((x))
+#else
+#  define TSA(x)   /* No TSA, make TSA attributes no-ops. */
+#endif
+
+/* TSA_CAPABILITY() is used to annotate typedefs:
+ *
+ * typedef pthread_mutex_t TSA_CAPABILITY("mutex") tsa_mutex;
+ */
+#define TSA_CAPABILITY(x) TSA(capability(x))
+
+/* TSA_GUARDED_BY() is used to annotate global variables,
+ * the data is guarded:
+ *
+ * Foo foo TSA_GUARDED_BY(mutex);
+ */
+#define TSA_GUARDED_BY(x) TSA(guarded_by(x))
+
+/* TSA_PT_GUARDED_BY() is used to annotate global pointers, the data
+ * behind the pointer is guarded.
+ *
+ * Foo* ptr TSA_PT_GUARDED_BY(mutex);
+ */
+#define TSA_PT_GUARDED_BY(x) TSA(pt_guarded_by(x))
+
+/* The TSA_REQUIRES() is used to annotate functions: the caller of the
+ * function MUST hold the resource, the function will NOT release it.
+ *
+ * More than one mutex may be specified, comma-separated.
+ *
+ * void Foo(void) TSA_REQUIRES(mutex);
+ */
+#define TSA_REQUIRES(...) TSA(requires_capability(__VA_ARGS__))
+
+/* TSA_EXCLUDES() is used to annotate functions: the caller of the
+ * function MUST NOT hold resource, the function first acquires the
+ * resource, and then releases it.
+ *
+ * More than one mutex may be specified, comma-separated.
+ *
+ * void Foo(void) TSA_EXCLUDES(mutex);
+ */
+#define TSA_EXCLUDES(...) TSA(locks_excluded(__VA_ARGS__))
+
+/* TSA_ACQUIRE() is used to annotate functions: the caller of the
+ * function MUST NOT hold the resource, the function will acquire the
+ * resource, but NOT release it.
+ *
+ * More than one mutex may be specified, comma-separated.
+ *
+ * void Foo(void) TSA_ACQUIRE(mutex);
+ */
+#define TSA_ACQUIRE(...) TSA(acquire_capability(__VA_ARGS__))
+
+/* TSA_RELEASE() is used to annotate functions: the caller of the
+ * function MUST hold the resource, but the function will then release it.
+ *
+ * More than one mutex may be specified, comma-separated.
+ *
+ * void Foo(void) TSA_RELEASE(mutex);
+ */
+#define TSA_RELEASE(...) TSA(release_capability(__VA_ARGS__))
+
+/* TSA_NO_TSA is used to annotate functions.  Use only when you need to.
+ *
+ * void Foo(void) TSA_NO_TSA;
+ */
+#define TSA_NO_TSA TSA(no_thread_safety_analysis)
+
+#endif /* #ifndef CLANG_TSA_H */
-- 
2.38.1




[PULL 34/51] block: Factor out bdrv_drain_all_begin_nopoll()

2022-12-14 Thread Kevin Wolf
Provide a separate function that just quiesces the users of a node to
prevent new requests from coming in, but without waiting for the already
in-flight I/O to complete.

This function can be used in contexts where polling is not allowed.

Signed-off-by: Kevin Wolf 
Message-Id: <20221207131838.239125-2-kw...@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito 
Signed-off-by: Kevin Wolf 
---
 include/block/block-global-state.h |  1 +
 block/io.c | 19 +--
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/include/block/block-global-state.h 
b/include/block/block-global-state.h
index 1f8b54f2df..b0a3cfe6b8 100644
--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h
@@ -152,6 +152,7 @@ int bdrv_inactivate_all(void);
 int bdrv_flush_all(void);
 void bdrv_close_all(void);
 void bdrv_drain_all_begin(void);
+void bdrv_drain_all_begin_nopoll(void);
 void bdrv_drain_all_end(void);
 void bdrv_drain_all(void);
 
diff --git a/block/io.c b/block/io.c
index fb..d160d2e273 100644
--- a/block/io.c
+++ b/block/io.c
@@ -466,16 +466,11 @@ static bool bdrv_drain_all_poll(void)
  * NOTE: no new block jobs or BlockDriverStates can be created between
  * the bdrv_drain_all_begin() and bdrv_drain_all_end() calls.
  */
-void bdrv_drain_all_begin(void)
+void bdrv_drain_all_begin_nopoll(void)
 {
 BlockDriverState *bs = NULL;
 GLOBAL_STATE_CODE();
 
-if (qemu_in_coroutine()) {
-bdrv_co_yield_to_drain(NULL, true, NULL, true);
-return;
-}
-
 /*
  * bdrv queue is managed by record/replay,
  * waiting for finishing the I/O requests may
@@ -500,6 +495,18 @@ void bdrv_drain_all_begin(void)
 bdrv_do_drained_begin(bs, NULL, false);
 aio_context_release(aio_context);
 }
+}
+
+void bdrv_drain_all_begin(void)
+{
+BlockDriverState *bs = NULL;
+
+if (qemu_in_coroutine()) {
+bdrv_co_yield_to_drain(NULL, true, NULL, true);
+return;
+}
+
+bdrv_drain_all_begin_nopoll();
 
 /* Now poll the in-flight requests */
 AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
-- 
2.38.1




[PULL 10/51] block: Fix locking for bdrv_reopen_queue_child()

2022-12-14 Thread Kevin Wolf
Callers don't agree whether bdrv_reopen_queue_child() should be called
with the AioContext lock held or not. Standardise on holding the lock
(as done by QMP blockdev-reopen and the replication block driver) and
fix bdrv_reopen() to do the same.

Signed-off-by: Kevin Wolf 
Message-Id: <20221118174110.55183-7-kw...@redhat.com>
Reviewed-by: Hanna Reitz 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Kevin Wolf 
---
 block.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index ce71545551..3266519455 100644
--- a/block.c
+++ b/block.c
@@ -4174,6 +4174,8 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
  * bs_queue, or the existing bs_queue being used.
  *
  * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
+ *
+ * To be called with bs->aio_context locked.
  */
 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
  BlockDriverState *bs,
@@ -4332,6 +4334,7 @@ static BlockReopenQueue 
*bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
 return bs_queue;
 }
 
+/* To be called with bs->aio_context locked */
 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
 BlockDriverState *bs,
 QDict *options, bool keep_old_opts)
@@ -4492,11 +4495,11 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool 
keep_old_opts,
 GLOBAL_STATE_CODE();
 
 bdrv_subtree_drained_begin(bs);
+queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
+
 if (ctx != qemu_get_aio_context()) {
 aio_context_release(ctx);
 }
-
-queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
 ret = bdrv_reopen_multiple(queue, errp);
 
 if (ctx != qemu_get_aio_context()) {
-- 
2.38.1




[PULL 50/51] block: use co_wrapper_mixed_bdrv_rdlock in functions taking the rdlock

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

Take the rdlock already, before we add the assertions.

All these functions either read the graph recursively, or call
BlockDriver callbacks that will eventually need to be protected by the
graph rdlock.

Do it now to all functions together, because many of these recursively
call each other.

For example, bdrv_co_truncate calls BlockDriver->bdrv_co_truncate, and
some driver callbacks implement their own .bdrv_co_truncate by calling
bdrv_flush inside. So if bdrv_flush asserts but bdrv_truncate does not
take the rdlock yet, the assertion will always fail.

Signed-off-by: Emanuele Giuseppe Esposito 
Signed-off-by: Kevin Wolf 
Message-Id: <20221207131838.239125-18-kw...@redhat.com>
Reviewed-by: Emanuele Giuseppe Esposito 
Reviewed-by: Kevin Wolf 
Signed-off-by: Kevin Wolf 
---
 block/coroutines.h   |  2 +-
 include/block/block-io.h | 53 +++-
 2 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/block/coroutines.h b/block/coroutines.h
index 17da4db963..48e9081aa1 100644
--- a/block/coroutines.h
+++ b/block/coroutines.h
@@ -71,7 +71,7 @@ nbd_co_do_establish_connection(BlockDriverState *bs, bool 
blocking,
  * the "I/O or GS" API.
  */
 
-int co_wrapper_mixed
+int co_wrapper_mixed_bdrv_rdlock
 bdrv_common_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
bool include_base,
diff --git a/include/block/block-io.h b/include/block/block-io.h
index 52869ea08e..2ed6214909 100644
--- a/include/block/block-io.h
+++ b/include/block/block-io.h
@@ -39,19 +39,24 @@
  * to catch when they are accidentally called by the wrong API.
  */
 
-int co_wrapper_mixed bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
-int64_t bytes,
-BdrvRequestFlags flags);
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset, int64_t bytes,
+   BdrvRequestFlags flags);
+
 int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
-int co_wrapper_mixed bdrv_pread(BdrvChild *child, int64_t offset,
-int64_t bytes, void *buf,
-BdrvRequestFlags flags);
-int co_wrapper_mixed bdrv_pwrite(BdrvChild *child, int64_t offset,
- int64_t bytes, const void *buf,
- BdrvRequestFlags flags);
-int co_wrapper_mixed bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
-  int64_t bytes, const void *buf,
-  BdrvRequestFlags flags);
+
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_pread(BdrvChild *child, int64_t offset, int64_t bytes, void *buf,
+   BdrvRequestFlags flags);
+
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_pwrite(BdrvChild *child, int64_t offset,int64_t bytes,
+const void *buf, BdrvRequestFlags flags);
+
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_pwrite_sync(BdrvChild *child, int64_t offset, int64_t bytes,
+ const void *buf, BdrvRequestFlags flags);
+
 int coroutine_fn bdrv_co_pwrite_sync(BdrvChild *child, int64_t offset,
  int64_t bytes, const void *buf,
  BdrvRequestFlags flags);
@@ -287,22 +292,26 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, 
int64_t src_offset,
 
 void bdrv_drain(BlockDriverState *bs);
 
-int co_wrapper_mixed
+int co_wrapper_mixed_bdrv_rdlock
 bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
   PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
 
-int co_wrapper_mixed bdrv_check(BlockDriverState *bs, BdrvCheckResult *res,
-BdrvCheckMode fix);
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
 
 /* Invalidate any cached metadata used by image formats */
-int co_wrapper_mixed bdrv_invalidate_cache(BlockDriverState *bs,
-   Error **errp);
-int co_wrapper_mixed bdrv_flush(BlockDriverState *bs);
-int co_wrapper_mixed bdrv_pdiscard(BdrvChild *child, int64_t offset,
-   int64_t bytes);
-int co_wrapper_mixed
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_invalidate_cache(BlockDriverState *bs, Error **errp);
+
+int co_wrapper_mixed_bdrv_rdlock bdrv_flush(BlockDriverState *bs);
+
+int co_wrapper_mixed_bdrv_rdlock
+bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
+
+int co_wrapper_mixed_bdrv_rdlock
 bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
-int co_wrapper_mixed
+
+int co_wrapper_mixed_bdrv_rdlock
 bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
 
 /**
-- 
2.38.1




[PULL 25/51] block: avoid duplicating filename string in bdrv_create

2022-12-14 Thread Kevin Wolf
From: Emanuele Giuseppe Esposito 

We know that the string will stay around until the function
returns, and the parameter of drv->bdrv_co_create_opts is const char*,
so it must not be modified either.

Suggested-by: Kevin Wolf 
Signed-off-by: Emanuele Giuseppe Esposito 
Reviewed-by: Kevin Wolf 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20221128142337.657646-7-eespo...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 block.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/block.c b/block.c
index 97073092c4..9d77ec8818 100644
--- a/block.c
+++ b/block.c
@@ -551,7 +551,7 @@ int bdrv_create(BlockDriver *drv, const char* filename,
 Coroutine *co;
 CreateCo cco = {
 .drv = drv,
-.filename = g_strdup(filename),
+.filename = filename,
 .opts = opts,
 .ret = NOT_DONE,
 .err = NULL,
@@ -559,8 +559,7 @@ int bdrv_create(BlockDriver *drv, const char* filename,
 
 if (!drv->bdrv_co_create_opts) {
 error_setg(errp, "Driver '%s' does not support image creation", 
drv->format_name);
-ret = -ENOTSUP;
-goto out;
+return -ENOTSUP;
 }
 
 if (qemu_in_coroutine()) {
@@ -583,8 +582,6 @@ int bdrv_create(BlockDriver *drv, const char* filename,
 }
 }
 
-out:
-g_free(cco.filename);
 return ret;
 }
 
-- 
2.38.1




[PULL 07/51] block: Revert .bdrv_drained_begin/end to non-coroutine_fn

2022-12-14 Thread Kevin Wolf
Polling during bdrv_drained_end() can be problematic (and in the future,
we may get cases for bdrv_drained_begin() where polling is forbidden,
and we don't care about already in-flight requests, but just want to
prevent new requests from arriving).

The .bdrv_drained_begin/end callbacks running in a coroutine is the only
reason why we have to do this polling, so make them non-coroutine
callbacks again. None of the callers actually yield any more.

This means that bdrv_drained_end() effectively doesn't poll any more,
even if AIO_WAIT_WHILE() loops are still there (their condition is false
from the beginning). This is generally not a problem, but in
test-bdrv-drain, some additional explicit aio_poll() calls need to be
added because the test case wants to verify the final state after BHs
have executed.

Signed-off-by: Kevin Wolf 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Emanuele Giuseppe Esposito 
Reviewed-by: Hanna Reitz 
Message-Id: <20221118174110.55183-4-kw...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 include/block/block_int-common.h | 10 ---
 block.c  |  4 +--
 block/io.c   | 49 +---
 block/qed.c  |  6 ++--
 block/throttle.c |  8 +++---
 tests/unit/test-bdrv-drain.c | 18 ++--
 6 files changed, 32 insertions(+), 63 deletions(-)

diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
index 31ae91e56e..40d646d1ed 100644
--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
@@ -735,17 +735,19 @@ struct BlockDriver {
 void (*bdrv_io_unplug)(BlockDriverState *bs);
 
 /**
- * bdrv_co_drain_begin is called if implemented in the beginning of a
+ * bdrv_drain_begin is called if implemented in the beginning of a
  * drain operation to drain and stop any internal sources of requests in
  * the driver.
- * bdrv_co_drain_end is called if implemented at the end of the drain.
+ * bdrv_drain_end is called if implemented at the end of the drain.
  *
  * They should be used by the driver to e.g. manage scheduled I/O
  * requests, or toggle an internal state. After the end of the drain new
  * requests will continue normally.
+ *
+ * Implementations of both functions must not call aio_poll().
  */
-void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs);
-void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
+void (*bdrv_drain_begin)(BlockDriverState *bs);
+void (*bdrv_drain_end)(BlockDriverState *bs);
 
 bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
 bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)(
diff --git a/block.c b/block.c
index 385ed3cd53..466770b9ac 100644
--- a/block.c
+++ b/block.c
@@ -1713,8 +1713,8 @@ static int bdrv_open_driver(BlockDriverState *bs, 
BlockDriver *drv,
 assert(is_power_of_2(bs->bl.request_alignment));
 
 for (i = 0; i < bs->quiesce_counter; i++) {
-if (drv->bdrv_co_drain_begin) {
-drv->bdrv_co_drain_begin(bs);
+if (drv->bdrv_drain_begin) {
+drv->bdrv_drain_begin(bs);
 }
 }
 
diff --git a/block/io.c b/block/io.c
index b9424024f9..c2ed4b2af9 100644
--- a/block/io.c
+++ b/block/io.c
@@ -252,55 +252,20 @@ typedef struct {
 int *drained_end_counter;
 } BdrvCoDrainData;
 
-static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
-{
-BdrvCoDrainData *data = opaque;
-BlockDriverState *bs = data->bs;
-
-if (data->begin) {
-bs->drv->bdrv_co_drain_begin(bs);
-} else {
-bs->drv->bdrv_co_drain_end(bs);
-}
-
-/* Set data->done and decrement drained_end_counter before bdrv_wakeup() */
-qatomic_mb_set(&data->done, true);
-if (!data->begin) {
-qatomic_dec(data->drained_end_counter);
-}
-bdrv_dec_in_flight(bs);
-
-g_free(data);
-}
-
-/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
+/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */
 static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
   int *drained_end_counter)
 {
-BdrvCoDrainData *data;
-
-if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
-(!begin && !bs->drv->bdrv_co_drain_end)) {
+if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) ||
+(!begin && !bs->drv->bdrv_drain_end)) {
 return;
 }
 
-data = g_new(BdrvCoDrainData, 1);
-*data = (BdrvCoDrainData) {
-.bs = bs,
-.done = false,
-.begin = begin,
-.drained_end_counter = drained_end_counter,
-};
-
-if (!begin) {
-qatomic_inc(drained_end_counter);
+if (begin) {
+bs->drv->bdrv_drain_begin(bs);
+} else {
+bs->drv->bdrv_drain_end(bs);
 }
-
-/* Make sure the driver callback completes during the polling phase for
- * drain_begin. */
-bdrv_

[PULL 13/51] stream: Replace subtree drain with a single node drain

2022-12-14 Thread Kevin Wolf
The subtree drain was introduced in commit b1e1af394d9 as a way to avoid
graph changes between finding the base node and changing the block graph
as necessary on completion of the image streaming job.

The block graph could change between these two points because
bdrv_set_backing_hd() first drains the parent node, which involved
polling and can do anything.

Subtree draining was an imperfect way to make this less likely (because
with it, fewer callbacks are called during this window). Everyone agreed
that it's not really the right solution, and it was only committed as a
stopgap solution.

This replaces the subtree drain with a solution that simply drains the
parent node before we try to find the base node, and then call a version
of bdrv_set_backing_hd() that doesn't drain, but just asserts that the
parent node is already drained.

This way, any graph changes caused by draining happen before we start
looking at the graph and things stay consistent between finding the base
node and changing the graph.

Signed-off-by: Kevin Wolf 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Hanna Reitz 
Message-Id: <20221118174110.55183-10-kw...@redhat.com>
Signed-off-by: Kevin Wolf 
---
 include/block/block-global-state.h |  3 +++
 block.c| 17 ++---
 block/stream.c | 26 --
 3 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/include/block/block-global-state.h 
b/include/block/block-global-state.h
index c7bd4a2088..00e0cf8aea 100644
--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h
@@ -82,6 +82,9 @@ int bdrv_open_file_child(const char *filename,
 BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
 int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
 Error **errp);
+int bdrv_set_backing_hd_drained(BlockDriverState *bs,
+BlockDriverState *backing_hd,
+Error **errp);
 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
const char *bdref_key, Error **errp);
 BlockDriverState *bdrv_open(const char *filename, const char *reference,
diff --git a/block.c b/block.c
index db043346d8..97bfb1494f 100644
--- a/block.c
+++ b/block.c
@@ -3426,14 +3426,15 @@ static int bdrv_set_backing_noperm(BlockDriverState *bs,
 return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp);
 }
 
-int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
-Error **errp)
+int bdrv_set_backing_hd_drained(BlockDriverState *bs,
+BlockDriverState *backing_hd,
+Error **errp)
 {
 int ret;
 Transaction *tran = tran_new();
 
 GLOBAL_STATE_CODE();
-bdrv_drained_begin(bs);
+assert(bs->quiesce_counter > 0);
 
 ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp);
 if (ret < 0) {
@@ -3443,7 +3444,17 @@ int bdrv_set_backing_hd(BlockDriverState *bs, 
BlockDriverState *backing_hd,
 ret = bdrv_refresh_perms(bs, tran, errp);
 out:
 tran_finalize(tran, ret);
+return ret;
+}
 
+int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+Error **errp)
+{
+int ret;
+GLOBAL_STATE_CODE();
+
+bdrv_drained_begin(bs);
+ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp);
 bdrv_drained_end(bs);
 
 return ret;
diff --git a/block/stream.c b/block/stream.c
index 694709bd25..8744ad103f 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -64,13 +64,16 @@ static int stream_prepare(Job *job)
 bdrv_cor_filter_drop(s->cor_filter_bs);
 s->cor_filter_bs = NULL;
 
-bdrv_subtree_drained_begin(s->above_base);
+/*
+ * bdrv_set_backing_hd() requires that unfiltered_bs is drained. Drain
+ * already here and use bdrv_set_backing_hd_drained() instead because
+ * the polling during drained_begin() might change the graph, and if we do
+ * this only later, we may end up working with the wrong base node (or it
+ * might even have gone away by the time we want to use it).
+ */
+bdrv_drained_begin(unfiltered_bs);
 
 base = bdrv_filter_or_cow_bs(s->above_base);
-if (base) {
-bdrv_ref(base);
-}
-
 unfiltered_base = bdrv_skip_filters(base);
 
 if (bdrv_cow_child(unfiltered_bs)) {
@@ -82,7 +85,13 @@ static int stream_prepare(Job *job)
 }
 }
 
-bdrv_set_backing_hd(unfiltered_bs, base, &local_err);
+bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err);
+
+/*
+ * This call will do I/O, so the graph can change again from here on.
+ * We have already completed the graph change, so we are not in danger
+ * of operating on the wrong node any more if this happens.
+ */
 ret = bdrv_change_backing_file(unfiltered

[PULL 09/30] nbd/server: Clean up abuse of BlockExportOptionsNbd member @arg

2022-12-14 Thread Markus Armbruster
block-export-add argument @name defaults to the value of argument
@node-name.

nbd_export_create() implements this by copying @node_name to @name.
It leaves @has_node_name false, violating the "has_node_name ==
!!node_name" invariant.  Unclean.  Falls apart when we elide
@has_node_name (next commit): then QAPI frees the same value twice,
once for @node_name and once @name.  iotest 307 duly explodes.

Goes back to commit c62d24e906 "blockdev-nbd: Boxed argument type for
nbd-server-add" (v5.0.0).  Got moved from qmp_nbd_server_add() to
nbd_export_create() (commit 56ee86261e), then copied back (commit
b6076afcab).  Commit 8675cbd68b "nbd: Utilize QAPI_CLONE for type
conversion" (v5.2.0) cleaned up the copy in qmp_nbd_server_add()
noting

Second, our assignment to arg->name is fishy: the generated QAPI code
for qapi_free_NbdServerAddOptions does not visit arg->name if
arg->has_name is false, but if it DID visit it, we would have
introduced a double-free situation when arg is finally freed.

Exactly.  However, the copy in nbd_export_create() remained dirty.

Clean it up.  Since the value stored in member @name is not actually
used outside this function, use a local variable instead of modifying
the QAPI object.

Signed-off-by: Markus Armbruster 
Cc: Eric Blake 
Cc: Vladimir Sementsov-Ogievskiy 
Cc: qemu-block@nongnu.org
Message-Id: <20221104160712.3005652-10-arm...@redhat.com>
Reviewed-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 nbd/server.c | 15 ++-
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index ada16089f3..0570596312 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1638,6 +1638,7 @@ static int nbd_export_create(BlockExport *blk_exp, 
BlockExportOptions *exp_args,
 {
 NBDExport *exp = container_of(blk_exp, NBDExport, common);
 BlockExportOptionsNbd *arg = &exp_args->u.nbd;
+const char *name = arg->name ?: exp_args->node_name;
 BlockBackend *blk = blk_exp->blk;
 int64_t size;
 uint64_t perm, shared_perm;
@@ -1653,12 +1654,8 @@ static int nbd_export_create(BlockExport *blk_exp, 
BlockExportOptions *exp_args,
 return -EINVAL;
 }
 
-if (!arg->has_name) {
-arg->name = exp_args->node_name;
-}
-
-if (strlen(arg->name) > NBD_MAX_STRING_SIZE) {
-error_setg(errp, "export name '%s' too long", arg->name);
+if (strlen(name) > NBD_MAX_STRING_SIZE) {
+error_setg(errp, "export name '%s' too long", name);
 return -EINVAL;
 }
 
@@ -1667,8 +1664,8 @@ static int nbd_export_create(BlockExport *blk_exp, 
BlockExportOptions *exp_args,
 return -EINVAL;
 }
 
-if (nbd_export_find(arg->name)) {
-error_setg(errp, "NBD server already has export named '%s'", 
arg->name);
+if (nbd_export_find(name)) {
+error_setg(errp, "NBD server already has export named '%s'", name);
 return -EEXIST;
 }
 
@@ -1688,7 +1685,7 @@ static int nbd_export_create(BlockExport *blk_exp, 
BlockExportOptions *exp_args,
 }
 
 QTAILQ_INIT(&exp->clients);
-exp->name = g_strdup(arg->name);
+exp->name = g_strdup(name);
 exp->description = g_strdup(arg->description);
 exp->nbdflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_FLUSH |
  NBD_FLAG_SEND_FUA | NBD_FLAG_SEND_CACHE);
-- 
2.37.3




[PULL 26/30] qapi transaction: Elide redundant has_FOO in generated C

2022-12-14 Thread Markus Armbruster
The has_FOO for pointer-valued FOO are redundant, except for arrays.
They are also a nuisance to work with.  Recent commit "qapi: Start to
elide redundant has_FOO in generated C" provided the means to elide
them step by step.  This is the step for qapi/transaction.json.

Said commit explains the transformation in more detail.  The invariant
violations mentioned there do not occur here.

In qmp_transaction(), we can't just drop parameter @has_props, since
it's used to track whether @props needs to be freed.  Replace it by a
local variable.

Cc: Kevin Wolf 
Cc: Hanna Reitz 
Cc: qemu-block@nongnu.org
Signed-off-by: Markus Armbruster 
Message-Id: <20221104160712.3005652-27-arm...@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé 
---
 blockdev.c | 4 ++--
 scripts/qapi/schema.py | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 59753400e9..75eef8535e 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1048,7 +1048,7 @@ static void blockdev_do_action(TransactionAction *action, 
Error **errp)
 
 list.value = action;
 list.next = NULL;
-qmp_transaction(&list, false, NULL, errp);
+qmp_transaction(&list, NULL, errp);
 }
 
 void qmp_blockdev_snapshot_sync(const char *device, const char *node_name,
@@ -2289,11 +2289,11 @@ static TransactionProperties 
*get_transaction_properties(
  * Always run under BQL.
  */
 void qmp_transaction(TransactionActionList *dev_list,
- bool has_props,
  struct TransactionProperties *props,
  Error **errp)
 {
 TransactionActionList *dev_entry = dev_list;
+bool has_props = !!props;
 JobTxn *block_job_txn = NULL;
 BlkActionState *state, *next;
 Error *local_err = NULL;
diff --git a/scripts/qapi/schema.py b/scripts/qapi/schema.py
index f0726af876..3673296ad8 100644
--- a/scripts/qapi/schema.py
+++ b/scripts/qapi/schema.py
@@ -759,7 +759,6 @@ def need_has(self):
 assert self.type
 # Temporary hack to support dropping the has_FOO in reviewable chunks
 opt_out = [
-'qapi/transaction.json',
 'qapi/ui.json',
 'qapi/virtio.json',
 'qga/qapi-schema.json']
-- 
2.37.3




[PULL 08/30] blockdev: Clean up abuse of DriveBackup member format

2022-12-14 Thread Markus Armbruster
drive-backup argument @format defaults to the format of the source
unless @mode is "existing".

drive_backup_prepare() implements this by copying the source's
@format_name to DriveBackup member @format.  It leaves @has_format
false, violating the "has_format == !!format" invariant.  Unclean.
Falls apart when we elide @has_format (commit after next): then QAPI
passes @format, which is a string constant, to g_free().  iotest 056
duly explodes.

Clean it up.  Since the value stored in member @format is not actually
used outside this function, use a local variable instead of modifying
the QAPI object.

Signed-off-by: Markus Armbruster 
Cc: Kevin Wolf 
Cc: Hanna Reitz 
Cc: qemu-block@nongnu.org
Message-Id: <20221104160712.3005652-9-arm...@redhat.com>
Reviewed-by: Eric Blake 
---
 blockdev.c | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 3f1dec6242..d6550e0dc8 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1686,6 +1686,7 @@ static void drive_backup_prepare(BlkActionState *common, 
Error **errp)
 BlockDriverState *source = NULL;
 AioContext *aio_context;
 AioContext *old_context;
+const char *format;
 QDict *options;
 Error *local_err = NULL;
 int flags;
@@ -1717,9 +1718,9 @@ static void drive_backup_prepare(BlkActionState *common, 
Error **errp)
 /* Paired with .clean() */
 bdrv_drained_begin(bs);
 
-if (!backup->has_format) {
-backup->format = backup->mode == NEW_IMAGE_MODE_EXISTING ?
- NULL : (char *) bs->drv->format_name;
+format = backup->format;
+if (!format && backup->mode != NEW_IMAGE_MODE_EXISTING) {
+format = bs->drv->format_name;
 }
 
 /* Early check to avoid creating target */
@@ -1758,19 +1759,19 @@ static void drive_backup_prepare(BlkActionState 
*common, Error **errp)
 }
 
 if (backup->mode != NEW_IMAGE_MODE_EXISTING) {
-assert(backup->format);
+assert(format);
 if (source) {
 /* Implicit filters should not appear in the filename */
 BlockDriverState *explicit_backing =
 bdrv_skip_implicit_filters(source);
 
 bdrv_refresh_filename(explicit_backing);
-bdrv_img_create(backup->target, backup->format,
+bdrv_img_create(backup->target, format,
 explicit_backing->filename,
 explicit_backing->drv->format_name, NULL,
 size, flags, false, &local_err);
 } else {
-bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL,
+bdrv_img_create(backup->target, format, NULL, NULL, NULL,
 size, flags, false, &local_err);
 }
 }
@@ -1783,8 +1784,8 @@ static void drive_backup_prepare(BlkActionState *common, 
Error **errp)
 options = qdict_new();
 qdict_put_str(options, "discard", "unmap");
 qdict_put_str(options, "detect-zeroes", "unmap");
-if (backup->format) {
-qdict_put_str(options, "driver", backup->format);
+if (format) {
+qdict_put_str(options, "driver", format);
 }
 
 target_bs = bdrv_open(backup->target, NULL, options, flags, errp);
-- 
2.37.3




Re: [PATCH 1/3] include/block: Untangle inclusion loops

2022-12-14 Thread Markus Armbruster
Paolo Bonzini  writes:

> On 12/8/22 15:39, Markus Armbruster wrote:
>>* Global state (GS) API. These functions run under the BQL.
>>*
>>* See include/block/block-global-state.h for more information about
>> - * the GS API.
>> + * the GS API.b
>>*/
>
> One-character typo.

Thanks!