Am 08/11/2022 um 13:37 schrieb Kevin Wolf:
> In order to make sure that bdrv_replace_child_noperm() doesn't have to
> poll any more, get rid of the bdrv_parent_drained_begin_single() call.
>
> This is possible now because we can require that the child is already
> drained when the function is called (it better be, having in-flight
> requests while modifying the graph isn't going to end well!) and we
> don't call the parent drain callbacks more than once.
>
> The additional drain calls needed in callers cause the test case to run
> its code in the drain handler too early (bdrv_attach_child() drains
> now), so modify it to only enable the code after the test setup has
> completed.
>
> Signed-off-by: Kevin Wolf <kw...@redhat.com>
> ---
> include/block/block-io.h | 8 ++++
> block.c | 72 +++++++++++++++++++++++++-----------
> block/io.c | 2 +-
> tests/unit/test-bdrv-drain.c | 10 +++++
> 4 files changed, 70 insertions(+), 22 deletions(-)
>
> diff --git a/include/block/block-io.h b/include/block/block-io.h
> index 5b54ed4672..ddce8550a9 100644
> --- a/include/block/block-io.h
> +++ b/include/block/block-io.h
> @@ -290,6 +290,14 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector
> *qiov, int64_t pos);
> */
> void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
>
> +/**
> + * bdrv_parent_drained_poll_single:
> + *
> + * Returns true if there is any pending activity to cease before @c can be
> + * called quiesced, false otherwise.
> + */
> +bool bdrv_parent_drained_poll_single(BdrvChild *c);
> +
> /**
> * bdrv_parent_drained_end_single:
> *
> diff --git a/block.c b/block.c
> index 5f5f79cd16..12039e9b8a 100644
> --- a/block.c
> +++ b/block.c
> @@ -2399,6 +2399,20 @@ static void bdrv_replace_child_abort(void *opaque)
>
> GLOBAL_STATE_CODE();
> /* old_bs reference is transparently moved from @s to @s->child */
> + if (!s->child->bs) {
> + /*
> + * The parents were undrained when removing old_bs from the child.
> New
> + * requests can't have been made, though, because the child was
> empty.
> + *
> + * TODO Make bdrv_replace_child_noperm() transactionable to avoid
> + * undraining the parent in the first place. Once this is done,
> having
> + * new_bs drained when calling bdrv_replace_child_tran() is not a
> + * requirement any more.
> + */
> + bdrv_parent_drained_begin_single(s->child, false);
> + assert(!bdrv_parent_drained_poll_single(s->child));
> + }
> + assert(s->child->parent_quiesce_counter);
> bdrv_replace_child_noperm(s->child, s->old_bs);
> bdrv_unref(new_bs);
> }
> @@ -2414,12 +2428,20 @@ static TransactionActionDrv bdrv_replace_child_drv = {
> *
> * Note: real unref of old_bs is done only on commit.
> *
> + * Both child and new_bs (if non-NULL) must be drained. new_bs must be kept
> + * drained until the transaction is completed (this automatically implies
> that
> + * child remains drained, too).
> + *
> * The function doesn't update permissions, caller is responsible for this.
> */
> static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState
> *new_bs,
> Transaction *tran)
> {
> BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
> +
> + assert(child->parent_quiesce_counter);
> + assert(!new_bs || new_bs->quiesce_counter);
> +
> *s = (BdrvReplaceChildState) {
> .child = child,
> .old_bs = child->bs,
> @@ -2818,6 +2840,12 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
> int new_bs_quiesce_counter;
>
> assert(!child->frozen);
> + /*
> + * When removing the child, it's the callers responsibility to make sure
> + * that no requests are in flight any more. Usually the parent is
> drained,
> + * but not through child->parent_quiesce_counter.
> + */
> + assert(!new_bs || child->parent_quiesce_counter);
> assert(old_bs != new_bs);
> GLOBAL_STATE_CODE();
>
> @@ -2825,16 +2853,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
> assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
> }
>
> - new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
> -
> - /*
> - * If the new child node is drained but the old one was not, flush
> - * all outstanding requests to the old child node.
> - */
> - if (new_bs_quiesce_counter && !child->parent_quiesce_counter) {
> - bdrv_parent_drained_begin_single(child, true);
> - }
> -
> if (old_bs) {
> if (child->klass->detach) {
> child->klass->detach(child);
> @@ -2849,14 +2867,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
> assert_bdrv_graph_writable(new_bs);
> QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
>
> - /*
> - * Polling in bdrv_parent_drained_begin_single() may have led to the
> new
> - * node's quiesce_counter having been decreased. Not a problem, we
> just
> - * need to recognize this here and then invoke drained_end
> appropriately
> - * more often.
> - */
> - assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
> -
> if (child->klass->attach) {
> child->klass->attach(child);
> }
> @@ -2865,9 +2875,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
> /*
> * If the old child node was drained but the new one is not, allow
> * requests to come in only after the new node has been attached.
> - *
> - * Update new_bs_quiesce_counter because
> bdrv_parent_drained_begin_single()
> - * polls, which could have changed the value.
> */
> new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
> if (!new_bs_quiesce_counter && child->parent_quiesce_counter) {
> @@ -3004,6 +3011,12 @@ static BdrvChild
> *bdrv_attach_child_common(BlockDriverState *child_bs,
> }
>
> bdrv_ref(child_bs);
> + /*
> + * Let every new BdrvChild start drained, inserting it in the graph with
> + * bdrv_replace_child_noperm() will undrain it if the child node is not
> + * drained. The child was only just created, so polling is not necessary.
> + */
I think there's a better way to write this, I find it complicated to read.
Also I don't really understand how you cover the case where we are
replacing a child with another one (so both old and new are not-null and
not newly created), and `old` for example could (?) have a drained
counter greater than `new`.
Before we had all the draining saldo stuff, but now it's gone.
Thank you,
Emanuele
> + bdrv_parent_drained_begin_single(new_child, false);
> bdrv_replace_child_noperm(new_child, child_bs);
>
> BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
> @@ -5053,7 +5066,10 @@ static void bdrv_remove_child(BdrvChild *child,
> Transaction *tran)
> }
>
> if (child->bs) {
> + BlockDriverState *bs = child->bs;
> + bdrv_drained_begin(bs);
> bdrv_replace_child_tran(child, NULL, tran);
> + bdrv_drained_end(bs);
> }
>
> tran_add(tran, &bdrv_remove_child_drv, child);
> @@ -5070,6 +5086,15 @@ static void
> bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
> bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran);
> }
>
> +static void undrain_on_clean_cb(void *opaque)
> +{
> + bdrv_drained_end(opaque);
> +}
> +
> +static TransactionActionDrv undrain_on_clean = {
> + .clean = undrain_on_clean_cb,
> +};
> +
> static int bdrv_replace_node_noperm(BlockDriverState *from,
> BlockDriverState *to,
> bool auto_skip, Transaction *tran,
> @@ -5079,6 +5104,11 @@ static int bdrv_replace_node_noperm(BlockDriverState
> *from,
>
> GLOBAL_STATE_CODE();
>
> + bdrv_drained_begin(from);
> + bdrv_drained_begin(to);
> + tran_add(tran, &undrain_on_clean, from);
> + tran_add(tran, &undrain_on_clean, to);
> +
> QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
> assert(c->bs == from);
> if (!should_update_child(c, to)) {
> diff --git a/block/io.c b/block/io.c
> index 4a83359a8f..d0f641926f 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -80,7 +80,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs,
> BdrvChild *ignore)
> }
> }
>
> -static bool bdrv_parent_drained_poll_single(BdrvChild *c)
> +bool bdrv_parent_drained_poll_single(BdrvChild *c)
> {
> if (c->klass->drained_poll) {
> return c->klass->drained_poll(c);
> diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
> index 172bc6debc..2686a8acee 100644
> --- a/tests/unit/test-bdrv-drain.c
> +++ b/tests/unit/test-bdrv-drain.c
> @@ -1654,6 +1654,7 @@ static void test_drop_intermediate_poll(void)
>
>
> typedef struct BDRVReplaceTestState {
> + bool setup_completed;
> bool was_drained;
> bool was_undrained;
> bool has_read;
> @@ -1738,6 +1739,10 @@ static void
> bdrv_replace_test_drain_begin(BlockDriverState *bs)
> {
> BDRVReplaceTestState *s = bs->opaque;
>
> + if (!s->setup_completed) {
> + return;
> + }
> +
> if (!s->drain_count) {
> s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs);
> bdrv_inc_in_flight(bs);
> @@ -1769,6 +1774,10 @@ static void
> bdrv_replace_test_drain_end(BlockDriverState *bs)
> {
> BDRVReplaceTestState *s = bs->opaque;
>
> + if (!s->setup_completed) {
> + return;
> + }
> +
> g_assert(s->drain_count > 0);
> if (!--s->drain_count) {
> s->was_undrained = true;
> @@ -1867,6 +1876,7 @@ static void do_test_replace_child_mid_drain(int
> old_drain_count,
> bdrv_ref(old_child_bs);
> bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds,
> BDRV_CHILD_COW, &error_abort);
> + parent_s->setup_completed = true;
>
> for (i = 0; i < old_drain_count; i++) {
> bdrv_drained_begin(old_child_bs);
>