On Fri, Oct 27, 2023 at 05:53:25PM +0200, Kevin Wolf wrote:
> Instead of taking the writer lock internally, require callers to already
> hold it when calling bdrv_replace_node(). Its callers may already want
> to hold the graph lock and so wouldn't be able to call functions that
> take it internally.
> 
> Signed-off-by: Kevin Wolf <kw...@redhat.com>
> ---
>  include/block/block-global-state.h |  6 ++++--
>  block.c                            | 26 +++++++-------------------
>  block/commit.c                     | 13 +++++++++++--
>  block/mirror.c                     | 26 ++++++++++++++++----------
>  blockdev.c                         |  5 +++++
>  tests/unit/test-bdrv-drain.c       |  6 ++++++
>  tests/unit/test-bdrv-graph-mod.c   | 13 +++++++++++--
>  7 files changed, 60 insertions(+), 35 deletions(-)
> 
> +++ b/block.c
> @@ -5484,25 +5484,7 @@ out:
>  int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
>                        Error **errp)
>  {
> -    int ret;
> -
> -    GLOBAL_STATE_CODE();
> -
> -    /* Make sure that @from doesn't go away until we have successfully 
> attached
> -     * all of its parents to @to. */

Useful comment that you just moved here in the previous patch...

> -    bdrv_ref(from);
> -    bdrv_drained_begin(from);
> -    bdrv_drained_begin(to);
> -    bdrv_graph_wrlock(to);
> -
> -    ret = bdrv_replace_node_common(from, to, true, false, errp);
> -
> -    bdrv_graph_wrunlock();
> -    bdrv_drained_end(to);
> -    bdrv_drained_end(from);
> -    bdrv_unref(from);
> -
> -    return ret;
> +    return bdrv_replace_node_common(from, to, true, false, errp);
>  }
>  
>  int bdrv_drop_filter(BlockDriverState *bs, Error **errp)
> @@ -5717,9 +5699,15 @@ BlockDriverState *bdrv_insert_node(BlockDriverState 
> *bs, QDict *options,
>          goto fail;
>      }
>  
> +    bdrv_ref(bs);

...but now it is gone.  Intentional?

>      bdrv_drained_begin(bs);
> +    bdrv_drained_begin(new_node_bs);
> +    bdrv_graph_wrlock(new_node_bs);
>      ret = bdrv_replace_node(bs, new_node_bs, errp);
> +    bdrv_graph_wrunlock();
> +    bdrv_drained_end(new_node_bs);
>      bdrv_drained_end(bs);
> +    bdrv_unref(bs);
>  
>      if (ret < 0) {
>          error_prepend(errp, "Could not replace node: ");
> diff --git a/block/commit.c b/block/commit.c
> index d92af02ead..2fecdce86f 100644
> --- a/block/commit.c
> +++ b/block/commit.c
> @@ -68,6 +68,7 @@ static void commit_abort(Job *job)
>  {
>      CommitBlockJob *s = container_of(job, CommitBlockJob, common.job);
>      BlockDriverState *top_bs = blk_bs(s->top);
> +    BlockDriverState *commit_top_backing_bs;
>  
>      if (s->chain_frozen) {
>          bdrv_graph_rdlock_main_loop();
> @@ -94,8 +95,12 @@ static void commit_abort(Job *job)
>       * XXX Can (or should) we somehow keep 'consistent read' blocked even
>       * after the failed/cancelled commit job is gone? If we already wrote
>       * something to base, the intermediate images aren't valid any more. */
> -    bdrv_replace_node(s->commit_top_bs, s->commit_top_bs->backing->bs,
> -                      &error_abort);
> +    commit_top_backing_bs = s->commit_top_bs->backing->bs;
> +    bdrv_drained_begin(commit_top_backing_bs);
> +    bdrv_graph_wrlock(commit_top_backing_bs);

Here, and elsewhere in the patch, drained_begin/end is outside
wr(un)lock...

> +    bdrv_replace_node(s->commit_top_bs, commit_top_backing_bs, &error_abort);
> +    bdrv_graph_wrunlock();
> +    bdrv_drained_end(commit_top_backing_bs);
>  
>      bdrv_unref(s->commit_top_bs);
>      bdrv_unref(top_bs);
> @@ -425,7 +430,11 @@ fail:
>      /* commit_top_bs has to be replaced after deleting the block job,
>       * otherwise this would fail because of lack of permissions. */
>      if (commit_top_bs) {
> +        bdrv_graph_wrlock(top);
> +        bdrv_drained_begin(top);
>          bdrv_replace_node(commit_top_bs, top, &error_abort);
> +        bdrv_drained_end(top);
> +        bdrv_graph_wrunlock();

...but here you do it in the opposite order.  Intentional?

> +++ b/tests/unit/test-bdrv-drain.c
> @@ -2000,7 +2000,13 @@ static void do_test_replace_child_mid_drain(int 
> old_drain_count,
>      parent_s->was_undrained = false;
>  
>      g_assert(parent_bs->quiesce_counter == old_drain_count);
> +    bdrv_drained_begin(old_child_bs);
> +    bdrv_drained_begin(new_child_bs);
> +    bdrv_graph_wrlock(NULL);

Why is this locking on NULL instead of new_child_bs?

>      bdrv_replace_node(old_child_bs, new_child_bs, &error_abort);
> +    bdrv_graph_wrunlock();
> +    bdrv_drained_end(new_child_bs);
> +    bdrv_drained_end(old_child_bs);
>      g_assert(parent_bs->quiesce_counter == new_drain_count);
>  
>      if (!old_drain_count && !new_drain_count) {

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.
Virtualization:  qemu.org | libguestfs.org


Reply via email to