[Qemu-devel] [PATCH 02/18] block: BDS deletion during bdrv_drain_recurse

2017-09-13 Thread Max Reitz
Drainined a BDS child may lead to both the original BDS and/or its other
children being deleted (e.g. if the original BDS represents a block
job).  We should prepare for this in both bdrv_drain_recurse() and
bdrv_drained_begin() by monitoring whether the BDS we are about to drain
still exists at all.

Signed-off-by: Max Reitz 
---
 block/io.c | 72 +-
 1 file changed, 52 insertions(+), 20 deletions(-)

diff --git a/block/io.c b/block/io.c
index 4378ae4c7d..8ec1a564ad 100644
--- a/block/io.c
+++ b/block/io.c
@@ -182,33 +182,57 @@ static void bdrv_drain_invoke(BlockDriverState *bs)
 
 static bool bdrv_drain_recurse(BlockDriverState *bs)
 {
-BdrvChild *child, *tmp;
+BdrvChild *child;
 bool waited;
+struct BDSToDrain {
+BlockDriverState *bs;
+BdrvDeletedStatus del_stat;
+QLIST_ENTRY(BDSToDrain) next;
+};
+QLIST_HEAD(, BDSToDrain) bs_list = QLIST_HEAD_INITIALIZER(bs_list);
+bool in_main_loop =
+qemu_get_current_aio_context() == qemu_get_aio_context();
 
 waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
 
 /* Ensure any pending metadata writes are submitted to bs->file.  */
 bdrv_drain_invoke(bs);
 
-QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
-BlockDriverState *bs = child->bs;
-bool in_main_loop =
-qemu_get_current_aio_context() == qemu_get_aio_context();
-assert(bs->refcnt > 0);
-if (in_main_loop) {
-/* In case the recursive bdrv_drain_recurse processes a
- * block_job_defer_to_main_loop BH and modifies the graph,
- * let's hold a reference to bs until we are done.
- *
- * IOThread doesn't have such a BH, and it is not safe to call
- * bdrv_unref without BQL, so skip doing it there.
- */
-bdrv_ref(bs);
-}
-waited |= bdrv_drain_recurse(bs);
-if (in_main_loop) {
-bdrv_unref(bs);
+/* Draining children may result in other children being removed and maybe
+ * even deleted, so copy the children list first */
+QLIST_FOREACH(child, &bs->children, next) {
+struct BDSToDrain *bs2d = g_new0(struct BDSToDrain, 1);
+
+bs2d->bs = child->bs;
+QLIST_INSERT_HEAD(&bs->deleted_status, &bs2d->del_stat, next);
+
+QLIST_INSERT_HEAD(&bs_list, bs2d, next);
+}
+
+while (!QLIST_EMPTY(&bs_list)) {
+struct BDSToDrain *bs2d = QLIST_FIRST(&bs_list);
+QLIST_REMOVE(bs2d, next);
+
+if (!bs2d->del_stat.deleted) {
+QLIST_REMOVE(&bs2d->del_stat, next);
+
+if (in_main_loop) {
+/* In case the recursive bdrv_drain_recurse processes a
+ * block_job_defer_to_main_loop BH and modifies the graph,
+ * let's hold a reference to the BDS until we are done.
+ *
+ * IOThread doesn't have such a BH, and it is not safe to call
+ * bdrv_unref without BQL, so skip doing it there.
+ */
+bdrv_ref(bs2d->bs);
+}
+waited |= bdrv_drain_recurse(bs2d->bs);
+if (in_main_loop) {
+bdrv_unref(bs2d->bs);
+}
 }
+
+g_free(bs2d);
 }
 
 return waited;
@@ -252,17 +276,25 @@ static void coroutine_fn 
bdrv_co_yield_to_drain(BlockDriverState *bs)
 
 void bdrv_drained_begin(BlockDriverState *bs)
 {
+BdrvDeletedStatus del_stat = { .deleted = false };
+
 if (qemu_in_coroutine()) {
 bdrv_co_yield_to_drain(bs);
 return;
 }
 
+QLIST_INSERT_HEAD(&bs->deleted_status, &del_stat, next);
+
 if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
 aio_disable_external(bdrv_get_aio_context(bs));
 bdrv_parent_drained_begin(bs);
 }
 
-bdrv_drain_recurse(bs);
+if (!del_stat.deleted) {
+QLIST_REMOVE(&del_stat, next);
+
+bdrv_drain_recurse(bs);
+}
 }
 
 void bdrv_drained_end(BlockDriverState *bs)
-- 
2.13.5




Re: [Qemu-devel] [PATCH 02/18] block: BDS deletion during bdrv_drain_recurse

2017-09-17 Thread Fam Zheng
On Wed, 09/13 20:18, Max Reitz wrote:
> Drainined a BDS child may lead to both the original BDS and/or its other
> children being deleted (e.g. if the original BDS represents a block
> job).  We should prepare for this in both bdrv_drain_recurse() and
> bdrv_drained_begin() by monitoring whether the BDS we are about to drain
> still exists at all.

Can the deletion happen when IOThread calls
bdrv_drain_recurse/bdrv_drained_begin?  If not, is it enough to do

...
if (in_main_loop) {
bdrv_ref(bs);
}
...
if (in_main_loop) {
bdrv_unref(bs);
}

to protect the main loop case? So the BdrvDeletedStatus state is not needed.

Fam

> 
> Signed-off-by: Max Reitz 
> ---
>  block/io.c | 72 
> +-
>  1 file changed, 52 insertions(+), 20 deletions(-)
> 
> diff --git a/block/io.c b/block/io.c
> index 4378ae4c7d..8ec1a564ad 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -182,33 +182,57 @@ static void bdrv_drain_invoke(BlockDriverState *bs)
>  
>  static bool bdrv_drain_recurse(BlockDriverState *bs)
>  {
> -BdrvChild *child, *tmp;
> +BdrvChild *child;
>  bool waited;
> +struct BDSToDrain {
> +BlockDriverState *bs;
> +BdrvDeletedStatus del_stat;
> +QLIST_ENTRY(BDSToDrain) next;
> +};
> +QLIST_HEAD(, BDSToDrain) bs_list = QLIST_HEAD_INITIALIZER(bs_list);
> +bool in_main_loop =
> +qemu_get_current_aio_context() == qemu_get_aio_context();
>  
>  waited = BDRV_POLL_WHILE(bs, atomic_read(&bs->in_flight) > 0);
>  
>  /* Ensure any pending metadata writes are submitted to bs->file.  */
>  bdrv_drain_invoke(bs);
>  
> -QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
> -BlockDriverState *bs = child->bs;
> -bool in_main_loop =
> -qemu_get_current_aio_context() == qemu_get_aio_context();
> -assert(bs->refcnt > 0);
> -if (in_main_loop) {
> -/* In case the recursive bdrv_drain_recurse processes a
> - * block_job_defer_to_main_loop BH and modifies the graph,
> - * let's hold a reference to bs until we are done.
> - *
> - * IOThread doesn't have such a BH, and it is not safe to call
> - * bdrv_unref without BQL, so skip doing it there.
> - */
> -bdrv_ref(bs);
> -}
> -waited |= bdrv_drain_recurse(bs);
> -if (in_main_loop) {
> -bdrv_unref(bs);
> +/* Draining children may result in other children being removed and maybe
> + * even deleted, so copy the children list first */
> +QLIST_FOREACH(child, &bs->children, next) {
> +struct BDSToDrain *bs2d = g_new0(struct BDSToDrain, 1);
> +
> +bs2d->bs = child->bs;
> +QLIST_INSERT_HEAD(&bs->deleted_status, &bs2d->del_stat, next);
> +
> +QLIST_INSERT_HEAD(&bs_list, bs2d, next);
> +}
> +
> +while (!QLIST_EMPTY(&bs_list)) {
> +struct BDSToDrain *bs2d = QLIST_FIRST(&bs_list);
> +QLIST_REMOVE(bs2d, next);
> +
> +if (!bs2d->del_stat.deleted) {
> +QLIST_REMOVE(&bs2d->del_stat, next);
> +
> +if (in_main_loop) {
> +/* In case the recursive bdrv_drain_recurse processes a
> + * block_job_defer_to_main_loop BH and modifies the graph,
> + * let's hold a reference to the BDS until we are done.
> + *
> + * IOThread doesn't have such a BH, and it is not safe to 
> call
> + * bdrv_unref without BQL, so skip doing it there.
> + */
> +bdrv_ref(bs2d->bs);
> +}
> +waited |= bdrv_drain_recurse(bs2d->bs);
> +if (in_main_loop) {
> +bdrv_unref(bs2d->bs);
> +}
>  }
> +
> +g_free(bs2d);
>  }
>  
>  return waited;
> @@ -252,17 +276,25 @@ static void coroutine_fn 
> bdrv_co_yield_to_drain(BlockDriverState *bs)
>  
>  void bdrv_drained_begin(BlockDriverState *bs)
>  {
> +BdrvDeletedStatus del_stat = { .deleted = false };
> +
>  if (qemu_in_coroutine()) {
>  bdrv_co_yield_to_drain(bs);
>  return;
>  }
>  
> +QLIST_INSERT_HEAD(&bs->deleted_status, &del_stat, next);
> +
>  if (atomic_fetch_inc(&bs->quiesce_counter) == 0) {
>  aio_disable_external(bdrv_get_aio_context(bs));
>  bdrv_parent_drained_begin(bs);
>  }
>  
> -bdrv_drain_recurse(bs);
> +if (!del_stat.deleted) {
> +QLIST_REMOVE(&del_stat, next);
> +
> +bdrv_drain_recurse(bs);
> +}
>  }
>  
>  void bdrv_drained_end(BlockDriverState *bs)
> -- 
> 2.13.5
> 



Re: [Qemu-devel] [PATCH 02/18] block: BDS deletion during bdrv_drain_recurse

2017-09-18 Thread Max Reitz
On 2017-09-18 05:44, Fam Zheng wrote:
> On Wed, 09/13 20:18, Max Reitz wrote:
>> Drainined a BDS child may lead to both the original BDS and/or its other
>> children being deleted (e.g. if the original BDS represents a block
>> job).  We should prepare for this in both bdrv_drain_recurse() and
>> bdrv_drained_begin() by monitoring whether the BDS we are about to drain
>> still exists at all.
> 
> Can the deletion happen when IOThread calls
> bdrv_drain_recurse/bdrv_drained_begin?

I don't think so, because (1) my issue was draining a block job and that
can only be completed in the main loop, and (2) I would like to think
it's always impossible, considering that bdrv_unref() may only be called
with the BQL.

> If not, is it enough to do
> 
> ...
> if (in_main_loop) {
> bdrv_ref(bs);
> }
> ...
> if (in_main_loop) {
> bdrv_unref(bs);
> }
> 
> to protect the main loop case? So the BdrvDeletedStatus state is not needed.

We already have that in bdrv_drained_recurse(), don't we?

The issue here is, though, that QLIST_FOREACH_SAFE() stores the next
child pointer to @tmp.  However, once the current child @child is
drained, @tmp may no longer be valid -- it may have been detached from
@bs, and it may even have been deleted.

We could work around the latter by increasing the next child's reference
somehow (but BdrvChild doesn't really have a refcount, and in order to
do so, we would probably have to emulate being a parent or
something...), but then you'd still have the issue of @tmp being
detached from the children list we're trying to iterate over.  So
tmp->next is no longer valid.

Anyway, so the latter is the reason why I decided to introduce the bs_list.

But maybe that actually saves us from having to fiddle with BdrvChild...
 Since it's just a list of BDSs now, it may be enough to simply
bdrv_ref() all of the BDSs in that list before draining any of them.  So
 we'd keep creating the bs_list and then we'd move the existing
bdrv_ref() from the drain loop into the loop filling bs_list.

And adding a bdrv_ref()/bdrv_unref() pair to bdrv_drained_begin() should
hopefully work there, too.

Max



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH 02/18] block: BDS deletion during bdrv_drain_recurse

2017-10-09 Thread Max Reitz
On 2017-09-18 18:13, Max Reitz wrote:
> On 2017-09-18 05:44, Fam Zheng wrote:
>> On Wed, 09/13 20:18, Max Reitz wrote:
>>> Drainined a BDS child may lead to both the original BDS and/or its other
>>> children being deleted (e.g. if the original BDS represents a block
>>> job).  We should prepare for this in both bdrv_drain_recurse() and
>>> bdrv_drained_begin() by monitoring whether the BDS we are about to drain
>>> still exists at all.
>>
>> Can the deletion happen when IOThread calls
>> bdrv_drain_recurse/bdrv_drained_begin?
> 
> I don't think so, because (1) my issue was draining a block job and that
> can only be completed in the main loop, and (2) I would like to think
> it's always impossible, considering that bdrv_unref() may only be called
> with the BQL.
> 
>> If not, is it enough to do
>>
>> ...
>> if (in_main_loop) {
>> bdrv_ref(bs);
>> }
>> ...
>> if (in_main_loop) {
>> bdrv_unref(bs);
>> }
>>
>> to protect the main loop case? So the BdrvDeletedStatus state is not needed.
> 
> We already have that in bdrv_drained_recurse(), don't we?
> 
> The issue here is, though, that QLIST_FOREACH_SAFE() stores the next
> child pointer to @tmp.  However, once the current child @child is
> drained, @tmp may no longer be valid -- it may have been detached from
> @bs, and it may even have been deleted.
> 
> We could work around the latter by increasing the next child's reference
> somehow (but BdrvChild doesn't really have a refcount, and in order to
> do so, we would probably have to emulate being a parent or
> something...), but then you'd still have the issue of @tmp being
> detached from the children list we're trying to iterate over.  So
> tmp->next is no longer valid.
> 
> Anyway, so the latter is the reason why I decided to introduce the bs_list.
> 
> But maybe that actually saves us from having to fiddle with BdrvChild...
>  Since it's just a list of BDSs now, it may be enough to simply
> bdrv_ref() all of the BDSs in that list before draining any of them.  So
>  we'd keep creating the bs_list and then we'd move the existing
> bdrv_ref() from the drain loop into the loop filling bs_list.
> 
> And adding a bdrv_ref()/bdrv_unref() pair to bdrv_drained_begin() should
> hopefully work there, too.

It turns out it isn't so simple after all... because bdrv_close()
invokes bdrv_drained_begin(). So we may end up with an endless recursion
here.

One way to fix this would be to skip the bdrv_drained_begin() in
bdrv_close() if this would result in such a recursion...  But any
solution that comes quickly to my mind would require another BDS field,
too -- just checking the quiesce_counter is probably not enough because
this might just indicate concurrent drainage that stops before
bdrv_close() wants it to stop.

So maybe BdrvDeletedStatus is the simplest solution after all...?

Max



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH 02/18] block: BDS deletion during bdrv_drain_recurse

2017-10-10 Thread Kevin Wolf
Am 13.09.2017 um 20:18 hat Max Reitz geschrieben:
> Drainined a BDS child may lead to both the original BDS and/or its other
> children being deleted (e.g. if the original BDS represents a block
> job).  We should prepare for this in both bdrv_drain_recurse() and
> bdrv_drained_begin() by monitoring whether the BDS we are about to drain
> still exists at all.
> 
> Signed-off-by: Max Reitz 

How hard would it be to write a test case for this? qemu-iotests
probably isn't the right tool, but I feel a C unit test would be
possible.

> -QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
> -BlockDriverState *bs = child->bs;
> -bool in_main_loop =
> -qemu_get_current_aio_context() == qemu_get_aio_context();
> -assert(bs->refcnt > 0);

Would it make sense to keep this assertion for the !deleted case?

> -if (in_main_loop) {
> -/* In case the recursive bdrv_drain_recurse processes a
> - * block_job_defer_to_main_loop BH and modifies the graph,
> - * let's hold a reference to bs until we are done.
> - *
> - * IOThread doesn't have such a BH, and it is not safe to call
> - * bdrv_unref without BQL, so skip doing it there.
> - */
> -bdrv_ref(bs);
> -}
> -waited |= bdrv_drain_recurse(bs);
> -if (in_main_loop) {
> -bdrv_unref(bs);
> +/* Draining children may result in other children being removed and maybe
> + * even deleted, so copy the children list first */

Maybe it's just me, but I failed to understand this correctly at first.
How about "being removed from their parent" to clarify that it's not the
BDS that is removed, but just the reference?

Kevin



Re: [Qemu-devel] [PATCH 02/18] block: BDS deletion during bdrv_drain_recurse

2017-10-11 Thread Max Reitz
On 2017-10-10 10:36, Kevin Wolf wrote:
> Am 13.09.2017 um 20:18 hat Max Reitz geschrieben:
>> Drainined a BDS child may lead to both the original BDS and/or its other
>> children being deleted (e.g. if the original BDS represents a block
>> job).  We should prepare for this in both bdrv_drain_recurse() and
>> bdrv_drained_begin() by monitoring whether the BDS we are about to drain
>> still exists at all.
>>
>> Signed-off-by: Max Reitz 
> 
> How hard would it be to write a test case for this? qemu-iotests
> probably isn't the right tool, but I feel a C unit test would be
> possible.

I can look into it, but I can't promise anything.

>> -QLIST_FOREACH_SAFE(child, &bs->children, next, tmp) {
>> -BlockDriverState *bs = child->bs;
>> -bool in_main_loop =
>> -qemu_get_current_aio_context() == qemu_get_aio_context();
>> -assert(bs->refcnt > 0);
> 
> Would it make sense to keep this assertion for the !deleted case?

Sure, why not.

>> -if (in_main_loop) {
>> -/* In case the recursive bdrv_drain_recurse processes a
>> - * block_job_defer_to_main_loop BH and modifies the graph,
>> - * let's hold a reference to bs until we are done.
>> - *
>> - * IOThread doesn't have such a BH, and it is not safe to call
>> - * bdrv_unref without BQL, so skip doing it there.
>> - */
>> -bdrv_ref(bs);
>> -}
>> -waited |= bdrv_drain_recurse(bs);
>> -if (in_main_loop) {
>> -bdrv_unref(bs);
>> +/* Draining children may result in other children being removed and 
>> maybe
>> + * even deleted, so copy the children list first */
> 
> Maybe it's just me, but I failed to understand this correctly at first.
> How about "being removed from their parent" to clarify that it's not the
> BDS that is removed, but just the reference?

Well, it's the BdrvChild that's removed, that's what I meant by
"children".  But then the comment speaks of "children list" and means
creation of a list of BDSs, sooo...  Yes, some change necessary.

Max



signature.asc
Description: OpenPGP digital signature