Am 28/04/2022 um 15:55 schrieb Stefan Hajnoczi:
> On Tue, Apr 26, 2022 at 04:51:11AM -0400, Emanuele Giuseppe Esposito wrote:
>> The only problem here is ->attach and ->detach callbacks
>> could call bdrv_{un}apply_subtree_drain(), which itself
>> will use a rdlock to navigate through all nodes.
>> To avoid deadlocks, take the lock only outside the drains,
>> and if we need to both attach and detach, do it in a single
>> critical section.
>>
>> Therefore change ->attach and ->detach to return true if they
>> are modifying the lock, so that we don't take it twice or release
>> temporarly.
>>
>> Signed-off-by: Emanuele Giuseppe Esposito <eespo...@redhat.com>
>> ---
>> block.c | 31 +++++++++++++++++++++++++++----
>> block/block-backend.c | 6 ++++--
>> include/block/block_int-common.h | 8 ++++++--
>> 3 files changed, 37 insertions(+), 8 deletions(-)
>>
>> diff --git a/block.c b/block.c
>> index b2eb679abb..6cd87e8dd3 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -1434,21 +1434,26 @@ static void bdrv_inherited_options(BdrvChildRole
>> role, bool parent_is_format,
>> *child_flags = flags;
>> }
>>
>> -static void bdrv_child_cb_attach(BdrvChild *child)
>> +static bool bdrv_child_cb_attach(BdrvChild *child)
>> {
>> BlockDriverState *bs = child->opaque;
>>
>> assert_bdrv_graph_writable(bs);
>> QLIST_INSERT_HEAD(&bs->children, child, next);
>>
>> + /* Paired with bdrv_graph_wrlock() in bdrv_replace_child_noperm */
>> + bdrv_graph_wrunlock();
>> +
>> if (child->role & BDRV_CHILD_COW) {
>> bdrv_backing_attach(child);
>> }
>>
>> bdrv_apply_subtree_drain(child, bs);
>> +
>> + return true;
>> }
>>
>> -static void bdrv_child_cb_detach(BdrvChild *child)
>> +static bool bdrv_child_cb_detach(BdrvChild *child)
>> {
>> BlockDriverState *bs = child->opaque;
>>
>> @@ -1458,8 +1463,13 @@ static void bdrv_child_cb_detach(BdrvChild *child)
>>
>> bdrv_unapply_subtree_drain(child, bs);
>>
>> + /* Paired with bdrv_graph_wrunlock() in bdrv_replace_child_noperm */
>> + bdrv_graph_wrlock();
>> +
>> assert_bdrv_graph_writable(bs);
>> QLIST_REMOVE(child, next);
>> +
>> + return true;
>> }
>>
>> static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState
>> *base,
>> @@ -2842,6 +2852,7 @@ static void bdrv_replace_child_noperm(BdrvChild
>> **childp,
>> BlockDriverState *old_bs = child->bs;
>> int new_bs_quiesce_counter;
>> int drain_saldo;
>> + bool locked = false;
>>
>> assert(!child->frozen);
>> assert(old_bs != new_bs);
>> @@ -2868,8 +2879,12 @@ static void bdrv_replace_child_noperm(BdrvChild
>> **childp,
>> * are already gone and we only end the drain sections that came
>> from
>> * elsewhere. */
>> if (child->klass->detach) {
>> - child->klass->detach(child);
>> + locked = child->klass->detach(child);
>> + }
>> + if (!locked) {
>> + bdrv_graph_wrlock();
>> }
>> + locked = true;
>> assert_bdrv_graph_writable(old_bs);
>> QLIST_REMOVE(child, next_parent);
>> }
>> @@ -2880,6 +2895,10 @@ static void bdrv_replace_child_noperm(BdrvChild
>> **childp,
>> }
>>
>> if (new_bs) {
>> + if (!locked) {
>> + bdrv_graph_wrlock();
>> + locked = true;
>> + }
>> assert_bdrv_graph_writable(new_bs);
>> QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
>>
>> @@ -2896,10 +2915,14 @@ static void bdrv_replace_child_noperm(BdrvChild
>> **childp,
>> * drain sections coming from @child don't get an extra
>> .drained_begin
>> * callback. */
>> if (child->klass->attach) {
>> - child->klass->attach(child);
>> + locked = !(child->klass->attach(child));
>
> O_O I don't understand what the return value of ->attach() means. It has
> the opposite meaning to the return value of ->detach()?
It means "state of the lock changed". So for ->attach(), if it is
changed (went to unlock), we want locked = false.
I will probably switch to Paolo's suggestion, it's cleaner.
>
>> }
>> }
>>
>> + if (locked) {
>> + bdrv_graph_wrunlock();
>> + }
>> +
>> /*
>> * If the old child node was drained but the new one is not, allow
>> * requests to come in only after the new node has been attached.
>> diff --git a/block/block-backend.c b/block/block-backend.c
>> index e0e1aff4b1..5dbd9fceae 100644
>> --- a/block/block-backend.c
>> +++ b/block/block-backend.c
>> @@ -282,7 +282,7 @@ static int blk_root_inactivate(BdrvChild *child)
>> return 0;
>> }
>>
>> -static void blk_root_attach(BdrvChild *child)
>> +static bool blk_root_attach(BdrvChild *child)
>> {
>> BlockBackend *blk = child->opaque;
>> BlockBackendAioNotifier *notifier;
>> @@ -295,9 +295,10 @@ static void blk_root_attach(BdrvChild *child)
>> notifier->detach_aio_context,
>> notifier->opaque);
>> }
>> + return false;
>> }
>>
>> -static void blk_root_detach(BdrvChild *child)
>> +static bool blk_root_detach(BdrvChild *child)
>> {
>> BlockBackend *blk = child->opaque;
>> BlockBackendAioNotifier *notifier;
>> @@ -310,6 +311,7 @@ static void blk_root_detach(BdrvChild *child)
>> notifier->detach_aio_context,
>> notifier->opaque);
>> }
>> + return false;
>> }
>>
>> static AioContext *blk_root_get_parent_aio_context(BdrvChild *c)
>> diff --git a/include/block/block_int-common.h
>> b/include/block/block_int-common.h
>> index 5a04c778e4..dd058c1fd8 100644
>> --- a/include/block/block_int-common.h
>> +++ b/include/block/block_int-common.h
>> @@ -857,8 +857,12 @@ struct BdrvChildClass {
>> void (*activate)(BdrvChild *child, Error **errp);
>> int (*inactivate)(BdrvChild *child);
>>
>> - void (*attach)(BdrvChild *child);
>> - void (*detach)(BdrvChild *child);
>> + /*
>> + * Return true if the graph wrlock is taken/released,
>
> What does "taken/released" mean? Does it mean released by attach and
> taken by detach?
Yes
>
> Also, please document which locks are held when these callbacks are
> invoked.
>
>> + * false if the wrlock state is not changed.
>> + */
>> + bool (*attach)(BdrvChild *child);
>> + bool (*detach)(BdrvChild *child);
>>
>> /*
>> * Notifies the parent that the filename of its child has changed (e.g.
>> --
>> 2.31.1
>>