Remember all parent nodes and just change the pointers there instead of swapping the contents of the BlockDriverState.
Handling of snapshot=on must be moved further down in bdrv_open() because *pbs (which is the bs pointer in the BlockBackend) must already be set before bdrv_append() is called. Otherwise bdrv_append() changes the BB's pointer to the temporary snapshot, but bdrv_open() overwrites it with the read-only original image. We also need to be careful to update callers as the interface changes (becomes less insane): Previously, the meaning of the two parameters was inverted when bdrv_append() returns. Now any BDS pointers keep pointing to the same node. Signed-off-by: Kevin Wolf <kw...@redhat.com> Reviewed-by: Max Reitz <mre...@redhat.com> Reviewed-by: Fam Zheng <f...@redhat.com> --- block.c | 112 +++++++++++++++++++++++++++++++++++++++++++++---------------- blockdev.c | 2 +- 2 files changed, 85 insertions(+), 29 deletions(-) diff --git a/block.c b/block.c index 980437f..b4d2313 100644 --- a/block.c +++ b/block.c @@ -1516,15 +1516,6 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, bdrv_refresh_filename(bs); - /* For snapshot=on, create a temporary qcow2 overlay. bs points to the - * temporary snapshot afterwards. */ - if (snapshot_flags) { - ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err); - if (local_err) { - goto close_and_fail; - } - } - /* Check if any unknown options were used */ if (options && (qdict_size(options) != 0)) { const QDictEntry *entry = qdict_first(options); @@ -1556,6 +1547,16 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename, QDECREF(options); *pbs = bs; + + /* For snapshot=on, create a temporary qcow2 overlay. bs points to the + * temporary snapshot afterwards. */ + if (snapshot_flags) { + ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err); + if (local_err) { + goto close_and_fail; + } + } + return 0; fail: @@ -2000,20 +2001,6 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, bs_dest->enable_write_cache = bs_src->enable_write_cache; - /* i/o throttled req */ - bs_dest->throttle_state = bs_src->throttle_state, - bs_dest->io_limits_enabled = bs_src->io_limits_enabled; - bs_dest->pending_reqs[0] = bs_src->pending_reqs[0]; - bs_dest->pending_reqs[1] = bs_src->pending_reqs[1]; - bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0]; - bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1]; - memcpy(&bs_dest->round_robin, - &bs_src->round_robin, - sizeof(bs_dest->round_robin)); - memcpy(&bs_dest->throttle_timers, - &bs_src->throttle_timers, - sizeof(ThrottleTimers)); - /* r/w error */ bs_dest->on_read_error = bs_src->on_read_error; bs_dest->on_write_error = bs_src->on_write_error; @@ -2027,10 +2014,25 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, } /* Fields that only need to be swapped if the contents of BDSes is swapped - * rather than pointers being changed in the parents. */ + * rather than pointers being changed in the parents, and throttling fields + * because only bdrv_swap() messes with internals of throttling. */ static void bdrv_move_reference_fields(BlockDriverState *bs_dest, BlockDriverState *bs_src) { + /* i/o throttled req */ + bs_dest->throttle_state = bs_src->throttle_state, + bs_dest->io_limits_enabled = bs_src->io_limits_enabled; + bs_dest->pending_reqs[0] = bs_src->pending_reqs[0]; + bs_dest->pending_reqs[1] = bs_src->pending_reqs[1]; + bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0]; + bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1]; + memcpy(&bs_dest->round_robin, + &bs_src->round_robin, + sizeof(bs_dest->round_robin)); + memcpy(&bs_dest->throttle_timers, + &bs_src->throttle_timers, + sizeof(ThrottleTimers)); + /* reference count */ bs_dest->refcnt = bs_src->refcnt; @@ -2156,6 +2158,45 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) bdrv_rebind(bs_old); } +static void change_parent_backing_link(BlockDriverState *from, + BlockDriverState *to) +{ + BdrvChild *c, *next; + + QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { + assert(c->role != &child_backing); + c->bs = to; + QLIST_REMOVE(c, next_parent); + QLIST_INSERT_HEAD(&to->parents, c, next_parent); + bdrv_ref(to); + bdrv_unref(from); + } + if (from->blk) { + blk_set_bs(from->blk, to); + if (!to->device_list.tqe_prev) { + QTAILQ_INSERT_BEFORE(from, to, device_list); + } + QTAILQ_REMOVE(&bdrv_states, from, device_list); + } +} + +static void swap_feature_fields(BlockDriverState *bs_top, + BlockDriverState *bs_new) +{ + BlockDriverState tmp; + + bdrv_move_feature_fields(&tmp, bs_top); + bdrv_move_feature_fields(bs_top, bs_new); + bdrv_move_feature_fields(bs_new, &tmp); + + assert(!bs_new->throttle_state); + if (bs_top->throttle_state) { + assert(bs_top->io_limits_enabled); + bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top)); + bdrv_io_limits_disable(bs_top); + } +} + /* * Add new bs contents at the top of an image chain while the chain is * live, while keeping required fields on the top layer. @@ -2166,14 +2207,29 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) * bs_new must not be attached to a BlockBackend. * * This function does not create any image files. + * + * bdrv_append() takes ownership of a bs_new reference and unrefs it because + * that's what the callers commonly need. bs_new will be referenced by the old + * parents of bs_top after bdrv_append() returns. If the caller needs to keep a + * reference of its own, it must call bdrv_ref(). */ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) { - bdrv_swap(bs_new, bs_top); + assert(!bdrv_requests_pending(bs_top)); + assert(!bdrv_requests_pending(bs_new)); + + bdrv_ref(bs_top); + change_parent_backing_link(bs_top, bs_new); + + /* Some fields always stay on top of the backing file chain */ + swap_feature_fields(bs_top, bs_new); + + bdrv_set_backing_hd(bs_new, bs_top); + bdrv_unref(bs_top); - /* The contents of 'tmp' will become bs_top, as we are - * swapping bs_new and bs_top contents. */ - bdrv_set_backing_hd(bs_top, bs_new); + /* bs_new is now referenced by its new parents, we don't need the + * additional reference any more. */ + bdrv_unref(bs_new); } static void bdrv_delete(BlockDriverState *bs) diff --git a/blockdev.c b/blockdev.c index b633212..6c8cce4 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1546,7 +1546,7 @@ static void external_snapshot_commit(BlkTransactionState *common) /* We don't need (or want) to use the transactional * bdrv_reopen_multiple() across all the entries at once, because we * don't want to abort all of them if one of them fails the reopen */ - bdrv_reopen(state->new_bs, state->new_bs->open_flags & ~BDRV_O_RDWR, + bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR, NULL); aio_context_release(state->aio_context); -- 1.8.3.1