Now nodes are removed during block-graph update transactions now? Look
at bdrv_replace_child_tran: bdrv_unref() is simply postponed to commit
phase.

What is the problem with it?

We want to make copy-before-write permissions strict: it should unshare
write always, not only when it has at least one parent. But if so, we
can't neither insert the filter nor remove it:

To insert the filter, we should first do blockdev-add, and filter will
unshare write on the child, so, blockdev-add will fail if disk is in
use by guest.

To remove the filter, we should first do a replace operations, which
again leads to situation when the filter and old parent share one
child, and all parent want write permission when the filter unshare it.

The solution is first do both graph-modifying operations (add &
replace, or replace & remove) and only then update permissions. But
that is not possible with current method to transactionally remove the
block node: if we just postpone bdrv_unref() to commit phase, than on
prepare phase the node is not removed, and it still keep all
permissions on its children.

What to do? In general, I don't know. But it's possible to solve the
problem for the block drivers that doesn't need access to their
children on .bdrv_close(). For such drivers we can detach their
children on prepare stage (still, postponing bdrv_close() call to
commit phase). For this to work we of course should effectively reduce
bs->refcnt on prepare phase as well.

So, the logic of new bdrv_unref_tran() is:

prepare:
  decrease refcnt and detach children if possible (and if refcnt is 0)

commit:
  do bdrv_delete() if refcnt is 0

abort:
  restore children and refcnt

What's the difficulty with it? If we want to transactionally (and with
no permission change) remove nodes, we should understand that some
nodes may be removed recursively, and finally we get several possible
not deleted leaves, where permissions should be updated. How caller
will know what to update? That leads to additional transaction-wide
refresh_list variable, which is filled by various graph modifying
function. So, user should declare referesh_list variable and do one or
several block-graph modifying operations (that may probably remove some
nodes), then user call bdrv_list_refresh_perms on resulting
refresh_list.

Signed-off-by: Vladimir Sementsov-Ogievskiy <v.sementsov...@mail.ru>
---
 block.c                   | 224 +++++++++++++++++++++++++-------------
 include/block/block.h     |   5 +-
 include/block/block_int.h |   7 ++
 3 files changed, 157 insertions(+), 79 deletions(-)

diff --git a/block.c b/block.c
index 22c5010c4d..7c22b31259 100644
--- a/block.c
+++ b/block.c
@@ -89,10 +89,12 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
 
 static void bdrv_replace_child_noperm(BdrvChild *child,
                                       BlockDriverState *new_bs);
-static void bdrv_remove_child(BdrvChild *child, Transaction *tran);
+static void bdrv_remove_child(BdrvChild *child, GSList **refresh_list,
+                              Transaction *tran);
 
 static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
                                BlockReopenQueue *queue,
+                               GSList **refresh_list,
                                Transaction *change_child_tran, Error **errp);
 static void bdrv_reopen_commit(BDRVReopenState *reopen_state);
 static void bdrv_reopen_abort(BDRVReopenState *reopen_state);
@@ -2285,38 +2287,23 @@ typedef struct BdrvReplaceChildState {
     BlockDriverState *old_bs;
 } BdrvReplaceChildState;
 
-static void bdrv_replace_child_commit(void *opaque)
-{
-    BdrvReplaceChildState *s = opaque;
-
-    bdrv_unref(s->old_bs);
-}
-
 static void bdrv_replace_child_abort(void *opaque)
 {
     BdrvReplaceChildState *s = opaque;
     BlockDriverState *new_bs = s->child->bs;
 
-    /* old_bs reference is transparently moved from @s to @s->child */
     bdrv_replace_child_noperm(s->child, s->old_bs);
     bdrv_unref(new_bs);
 }
 
 static TransactionActionDrv bdrv_replace_child_drv = {
-    .commit = bdrv_replace_child_commit,
     .abort = bdrv_replace_child_abort,
     .clean = g_free,
 };
 
-/*
- * bdrv_replace_child_tran
- *
- * Note: real unref of old_bs is done only on commit.
- *
- * The function doesn't update permissions, caller is responsible for this.
- */
+/* Caller is responsible to refresh permissions in @refresh_list */
 static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
-                                    Transaction *tran)
+                                    GSList **refresh_list, Transaction *tran)
 {
     BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
     *s = (BdrvReplaceChildState) {
@@ -2327,9 +2314,15 @@ static void bdrv_replace_child_tran(BdrvChild *child, 
BlockDriverState *new_bs,
 
     if (new_bs) {
         bdrv_ref(new_bs);
+        *refresh_list = g_slist_prepend(*refresh_list, new_bs);
     }
     bdrv_replace_child_noperm(child, new_bs);
-    /* old_bs reference is transparently moved from @child to @s */
+    if (s->old_bs) {
+        bdrv_unref_tran(s->old_bs, refresh_list, tran);
+        if (s->old_bs->refcnt > 0) {
+            *refresh_list = g_slist_prepend(*refresh_list, s->old_bs);
+        }
+    }
 }
 
 /*
@@ -2829,7 +2822,6 @@ static TransactionActionDrv bdrv_try_set_aio_context_drv 
= {
     .clean = g_free,
 };
 
-__attribute__((unused))
 static int bdrv_try_set_aio_context_tran(BlockDriverState *bs,
                                          AioContext *new_ctx,
                                          Transaction *tran,
@@ -3103,30 +3095,40 @@ out:
     return ret < 0 ? NULL : child;
 }
 
-/* Callers must ensure that child->frozen is false. */
-void bdrv_root_unref_child(BdrvChild *child)
+/* Caller is responsible to refresh permissions in @refresh_list */
+static void bdrv_root_unref_child_tran(BdrvChild *child, GSList **refresh_list,
+                                       Transaction *tran)
 {
     BlockDriverState *child_bs = child->bs;
 
-    bdrv_replace_child_noperm(child, NULL);
-    bdrv_child_free(child);
-
-    if (child_bs) {
-        /*
-         * Update permissions for old node. We're just taking a parent away, so
-         * we're loosening restrictions. Errors of permission update are not
-         * fatal in this case, ignore them.
-         */
-        bdrv_refresh_perms(child_bs, NULL, NULL);
+    bdrv_remove_child(child, refresh_list, tran);
 
+    if (child_bs && child_bs->refcnt > 0) {
         /*
          * When the parent requiring a non-default AioContext is removed, the
          * node moves back to the main AioContext
          */
-        bdrv_try_set_aio_context(child_bs, qemu_get_aio_context(), NULL);
+        bdrv_try_set_aio_context_tran(child_bs, qemu_get_aio_context(),
+                                      tran, NULL);
     }
+}
 
-    bdrv_unref(child_bs);
+/* Callers must ensure that child->frozen is false. */
+void bdrv_root_unref_child(BdrvChild *child)
+{
+    Transaction *tran = tran_new();
+    g_autoptr(GSList) refresh_list = NULL;
+
+    bdrv_root_unref_child_tran(child, &refresh_list, tran);
+
+    /*
+     * Update permissions for old node. We're just taking a parent away, so
+     * we're loosening restrictions. Errors of permission update are not
+     * fatal in this case, ignore them.
+     */
+    bdrv_list_refresh_perms(refresh_list, NULL, tran, NULL);
+
+    tran_commit(tran);
 }
 
 typedef struct BdrvSetInheritsFrom {
@@ -3195,15 +3197,27 @@ static void bdrv_unset_inherits_from(BlockDriverState 
*root, BdrvChild *child,
     }
 }
 
-/* Callers must ensure that child->frozen is false. */
-void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
+/* Caller is responsible to refresh permissions in @refresh_list */
+static void bdrv_unref_child_tran(BlockDriverState *parent, BdrvChild *child,
+                                    GSList **refresh_list, Transaction *tran)
 {
     if (child == NULL) {
         return;
     }
 
-    bdrv_unset_inherits_from(parent, child, NULL);
-    bdrv_root_unref_child(child);
+    bdrv_unset_inherits_from(parent, child, tran);
+    bdrv_root_unref_child_tran(child, refresh_list, tran);
+}
+
+/* Callers must ensure that child->frozen is false. */
+void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
+{
+    Transaction *tran = tran_new();
+    g_autoptr(GSList) refresh_list = NULL;
+
+    bdrv_unref_child_tran(parent, child, &refresh_list, tran);
+    bdrv_list_refresh_perms(refresh_list, NULL, tran, NULL);
+    tran_commit(tran);
 }
 
 
@@ -3247,11 +3261,12 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState 
*bs)
  * Sets the bs->backing or bs->file link of a BDS. A new reference is created;
  * callers which don't need their own reference any more must call 
bdrv_unref().
  *
- * Function doesn't update permissions, caller is responsible for this.
+ * Caller is responsible to refresh permissions in @refresh_list.
  */
 static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
                                            BlockDriverState *child_bs,
                                            bool is_backing,
+                                           GSList **refresh_list,
                                            Transaction *tran, Error **errp)
 {
     int ret = 0;
@@ -3303,13 +3318,15 @@ static int 
bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
 
     if (child) {
         bdrv_unset_inherits_from(parent_bs, child, tran);
-        bdrv_remove_child(child, tran);
+        bdrv_remove_child(child, refresh_list, tran);
     }
 
     if (!child_bs) {
         goto out;
     }
 
+    *refresh_list = g_slist_prepend(*refresh_list, parent_bs);
+
     ret = bdrv_attach_child_noperm(parent_bs, child_bs,
                                    is_backing ? "backing" : "file",
                                    &child_of_bds, role,
@@ -3333,11 +3350,14 @@ out:
     return 0;
 }
 
+/* Caller is responsible to refresh permissions in @refresh_list */
 static int bdrv_set_backing_noperm(BlockDriverState *bs,
                                    BlockDriverState *backing_hd,
+                                   GSList **refresh_list,
                                    Transaction *tran, Error **errp)
 {
-    return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp);
+    return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, refresh_list,
+                                           tran, errp);
 }
 
 int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
@@ -3345,15 +3365,16 @@ int bdrv_set_backing_hd(BlockDriverState *bs, 
BlockDriverState *backing_hd,
 {
     int ret;
     Transaction *tran = tran_new();
+    g_autoptr(GSList) refresh_list = NULL;
 
     bdrv_drained_begin(bs);
 
-    ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp);
+    ret = bdrv_set_backing_noperm(bs, backing_hd, &refresh_list, tran, errp);
     if (ret < 0) {
         goto out;
     }
 
-    ret = bdrv_refresh_perms(bs, tran, errp);
+    ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
 out:
     tran_finalize(tran, ret);
 
@@ -4302,7 +4323,8 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, 
Error **errp)
         assert(bs_entry->state.bs->quiesce_counter > 0);
         ctx = bdrv_get_aio_context(bs_entry->state.bs);
         aio_context_acquire(ctx);
-        ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, tran, errp);
+        ret = bdrv_reopen_prepare(&bs_entry->state, bs_queue, &refresh_list,
+                                  tran, errp);
         aio_context_release(ctx);
         if (ret < 0) {
             goto abort;
@@ -4314,14 +4336,7 @@ int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, 
Error **errp)
         BDRVReopenState *state = &bs_entry->state;
 
         refresh_list = g_slist_prepend(refresh_list, state->bs);
-        if (state->old_backing_bs) {
-            refresh_list = g_slist_prepend(refresh_list, 
state->old_backing_bs);
-        }
-        if (state->old_file_bs) {
-            refresh_list = g_slist_prepend(refresh_list, state->old_file_bs);
-        }
     }
-
     /*
      * Note that file-posix driver rely on permission update done during reopen
      * (even if no permission changed), because it wants "new" permissions for
@@ -4430,10 +4445,14 @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, 
bool read_only,
  * true and reopen_state->new_backing_bs contains a pointer to the new
  * backing BlockDriverState (or NULL).
  *
+ * Caller is responsible to refresh permissions in @refresh_list.
+ *
  * Return 0 on success, otherwise return < 0 and set @errp.
  */
 static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
-                                             bool is_backing, Transaction 
*tran,
+                                             bool is_backing,
+                                             GSList **refresh_list,
+                                             Transaction *tran,
                                              Error **errp)
 {
     BlockDriverState *bs = reopen_state->bs;
@@ -4499,14 +4518,8 @@ static int 
bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
         return -EINVAL;
     }
 
-    if (is_backing) {
-        reopen_state->old_backing_bs = old_child_bs;
-    } else {
-        reopen_state->old_file_bs = old_child_bs;
-    }
-
     return bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing,
-                                           tran, errp);
+                                           refresh_list, tran, errp);
 }
 
 /*
@@ -4518,6 +4531,8 @@ static int 
bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
  * flags are the new open flags
  * queue is the reopen queue
  *
+ * Caller is responsible to refresh permissions in @refresh_list.
+ *
  * Returns 0 on success, non-zero on error.  On error errp will be set
  * as well.
  *
@@ -4528,6 +4543,7 @@ static int 
bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
  */
 static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
                                BlockReopenQueue *queue,
+                               GSList **refresh_list,
                                Transaction *change_child_tran, Error **errp)
 {
     int ret = -1;
@@ -4648,7 +4664,7 @@ static int bdrv_reopen_prepare(BDRVReopenState 
*reopen_state,
      * either a reference to an existing node (using its node name)
      * or NULL to simply detach the current backing file.
      */
-    ret = bdrv_reopen_parse_file_or_backing(reopen_state, true,
+    ret = bdrv_reopen_parse_file_or_backing(reopen_state, true, refresh_list,
                                             change_child_tran, errp);
     if (ret < 0) {
         goto error;
@@ -4656,7 +4672,7 @@ static int bdrv_reopen_prepare(BDRVReopenState 
*reopen_state,
     qdict_del(reopen_state->options, "backing");
 
     /* Allow changing the 'file' option. In this case NULL is not allowed */
-    ret = bdrv_reopen_parse_file_or_backing(reopen_state, false,
+    ret = bdrv_reopen_parse_file_or_backing(reopen_state, false, refresh_list,
                                             change_child_tran, errp);
     if (ret < 0) {
         goto error;
@@ -4965,24 +4981,28 @@ static TransactionActionDrv bdrv_remove_child_drv = {
 
 /*
  * A function to remove backing or file child of @bs.
- * Function doesn't update permissions, caller is responsible for this.
+ * Caller is responsible to refresh permissions in @refresh_list.
  */
-static void bdrv_remove_child(BdrvChild *child, Transaction *tran)
+static void bdrv_remove_child(BdrvChild *child, GSList **refresh_list,
+                              Transaction *tran)
 {
     if (!child) {
         return;
     }
 
     if (child->bs) {
-        bdrv_replace_child_tran(child, NULL, tran);
+        bdrv_replace_child_tran(child, NULL, refresh_list, tran);
     }
 
     tran_add(tran, &bdrv_remove_child_drv, child);
 }
 
+/* Caller is responsible to refresh permissions in @refresh_list */
 static int bdrv_replace_node_noperm(BlockDriverState *from,
                                     BlockDriverState *to,
-                                    bool auto_skip, Transaction *tran,
+                                    bool auto_skip,
+                                    GSList **refresh_list,
+                                    Transaction *tran,
                                     Error **errp)
 {
     BdrvChild *c, *next;
@@ -5002,7 +5022,7 @@ static int bdrv_replace_node_noperm(BlockDriverState 
*from,
                        c->name, from->node_name);
             return -EPERM;
         }
-        bdrv_replace_child_tran(c, to, tran);
+        bdrv_replace_child_tran(c, to, refresh_list, tran);
     }
 
     return 0;
@@ -5053,18 +5073,17 @@ static int bdrv_replace_node_common(BlockDriverState 
*from,
      * permissions based on new graph. If we fail, we'll roll-back the
      * replacement.
      */
-    ret = bdrv_replace_node_noperm(from, to, auto_skip, tran, errp);
+    ret = bdrv_replace_node_noperm(from, to, auto_skip, &refresh_list, tran,
+                                   errp);
     if (ret < 0) {
         goto out;
     }
 
     if (detach_subchain) {
-        bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran);
+        bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent),
+                          &refresh_list, tran);
     }
 
-    refresh_list = g_slist_prepend(refresh_list, to);
-    refresh_list = g_slist_prepend(refresh_list, from);
-
     ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
     if (ret < 0) {
         goto out;
@@ -5110,6 +5129,7 @@ int bdrv_append(BlockDriverState *bs_new, 
BlockDriverState *bs_top,
 {
     int ret;
     Transaction *tran = tran_new();
+    g_autoptr(GSList) refresh_list = NULL;
 
     assert(!bs_new->backing);
 
@@ -5120,12 +5140,13 @@ int bdrv_append(BlockDriverState *bs_new, 
BlockDriverState *bs_top,
         goto out;
     }
 
-    ret = bdrv_replace_node_noperm(bs_top, bs_new, true, tran, errp);
+    ret = bdrv_replace_node_noperm(bs_top, bs_new, true, &refresh_list, tran,
+                                   errp);
     if (ret < 0) {
         goto out;
     }
 
-    ret = bdrv_refresh_perms(bs_new, tran, errp);
+    ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
 out:
     tran_finalize(tran, ret);
 
@@ -5147,10 +5168,7 @@ int bdrv_replace_child_bs(BdrvChild *child, 
BlockDriverState *new_bs,
     bdrv_drained_begin(old_bs);
     bdrv_drained_begin(new_bs);
 
-    bdrv_replace_child_tran(child, new_bs, tran);
-
-    refresh_list = g_slist_prepend(refresh_list, old_bs);
-    refresh_list = g_slist_prepend(refresh_list, new_bs);
+    bdrv_replace_child_tran(child, new_bs, &refresh_list, tran);
 
     ret = bdrv_list_refresh_perms(refresh_list, NULL, tran, errp);
 
@@ -6589,6 +6607,58 @@ void bdrv_ref(BlockDriverState *bs)
     bs->refcnt++;
 }
 
+static void bdrv_unref_commit(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+
+    if (bs->refcnt == 0) {
+        bdrv_delete(bs);
+    }
+}
+
+static void bdrv_unref_abort(void *opaque)
+{
+    bdrv_ref(opaque);
+}
+
+static TransactionActionDrv bdrv_unref_drv = {
+    .commit = bdrv_unref_commit,
+    .abort = bdrv_unref_abort,
+};
+
+/*
+ * Transactional unref
+ *   - deletion is postponed to transaction commit
+ *   - where possible children are detached now, and permissions are not
+ *     updated. @refresh_list is filled with nodes, to call
+ *     bdrv_nodes_refresh_perms() on.
+ */
+void bdrv_unref_tran(BlockDriverState *bs, GSList **refresh_list,
+                     Transaction *tran)
+{
+    BdrvChild *child, *next;
+
+    if (!bs) {
+        return;
+    }
+
+    assert(bs->refcnt > 0);
+    bs->refcnt--;
+
+    tran_add(tran, &bdrv_unref_drv, bs);
+
+    if (bs->drv && (!bs->drv->bdrv_close || bs->drv->indepenent_close) &&
+        refresh_list && bs->refcnt == 0)
+    {
+        QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
+            if (child->bs && child->bs->refcnt > 1) {
+                *refresh_list = g_slist_prepend(*refresh_list, child->bs);
+            }
+            bdrv_unref_child_tran(bs, child, refresh_list, tran);
+        }
+    }
+}
+
 /* Release a previously grabbed reference to bs.
  * If after releasing, reference count is zero, the BlockDriverState is
  * deleted. */
diff --git a/include/block/block.h b/include/block/block.h
index 09c254db9b..92fe31bd13 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -208,8 +208,6 @@ typedef struct BDRVReopenState {
     int flags;
     BlockdevDetectZeroesOptions detect_zeroes;
     bool backing_missing;
-    BlockDriverState *old_backing_bs; /* keep pointer for permissions update */
-    BlockDriverState *old_file_bs; /* keep pointer for permissions update */
     QDict *options;
     QDict *explicit_options;
     void *opaque;
@@ -671,7 +669,10 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs);
 
 void bdrv_ref(BlockDriverState *bs);
 void bdrv_unref(BlockDriverState *bs);
+void bdrv_unref_tran(BlockDriverState *bs, GSList **refresh_list,
+                     Transaction *tran);
 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
+int bdrv_try_unref(BlockDriverState *bs, Error **errp);
 BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
                              BlockDriverState *child_bs,
                              const char *child_name,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 767825aec4..e2bb936451 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -173,6 +173,13 @@ struct BlockDriver {
      */
     bool supports_backing;
 
+    /*
+     * If true that guarantees that .bdrv_close doesn't access any bdrv 
children
+     * and is safe to be called in commit phase of block-graph modifying
+     * transaction.
+     */
+    bool indepenent_close;
+
     /* For handling image reopen for split or non-split files */
     int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
                                BlockReopenQueue *queue, Error **errp);
-- 
2.35.1


Reply via email to