We have a bug in qcow2: assume we've started data write into host cluster A. s->lock is unlocked. During the write the refcount of cluster A may become zero, cluster may be reallocated for other needs, and our in-flight write become a use-after-free. More details will be in the further commit which actually fixes the bug.
For now, let's prepare infrastructure for the following fix. We are going to track these in-flight data writes and other operations. So, we create a hash map cluster_index -> HostCluster And for each HostCluster we calculate number of in-flight operations on it (which does qcow2_host_range_ref() of course). Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com> --- block/qcow2.h | 12 ++++ block/qcow2-host-range-refs.c | 127 ++++++++++++++++++++++++++++++++++ block/qcow2.c | 3 + block/meson.build | 1 + 4 files changed, 143 insertions(+) create mode 100644 block/qcow2-host-range-refs.c diff --git a/block/qcow2.h b/block/qcow2.h index 511db948ec..d6de9543c4 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -420,6 +420,9 @@ typedef struct BDRVQcow2State { * is to convert the image with the desired compression type set. */ Qcow2CompressionType compression_type; + + /* For qcow2-host-range-refs.c */ + GHashTable *host_range_refs; } BDRVQcow2State; typedef struct Qcow2COWRegion { @@ -899,6 +902,15 @@ int qcow2_detect_metadata_preallocation(BlockDriverState *bs); void qcow2_cache_host_discard(BlockDriverState *bs, uint64_t offset, uint64_t length); +void qcow2_init_host_range_refs(BDRVQcow2State *s); +void qcow2_release_host_range_refs(BDRVQcow2State *s); +void qcow2_host_range_ref(BlockDriverState *bs, int64_t offset, + int64_t length); +void qcow2_host_range_unref(BlockDriverState *bs, int64_t offset, + int64_t length); +uint64_t qcow2_get_host_range_refcnt(BlockDriverState *bs, + int64_t cluster_index); + /* qcow2-cluster.c functions */ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, bool exact_size); diff --git a/block/qcow2-host-range-refs.c b/block/qcow2-host-range-refs.c new file mode 100644 index 0000000000..54f0be27a4 --- /dev/null +++ b/block/qcow2-host-range-refs.c @@ -0,0 +1,127 @@ +/* + * Block driver for the QCOW version 2 format + * + * Copyright (c) 2021 Virtuozzo International GmbH. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qcow2.h" + +typedef struct HostCluster { + uint64_t host_range_refcnt; + + /* For convenience, keep cluster_index here */ + int64_t cluster_index; +} HostCluster; + +void qcow2_init_host_range_refs(BDRVQcow2State *s) +{ + s->host_range_refs = + g_hash_table_new_full(g_int64_hash, g_int64_equal, g_free, g_free); +} + +void qcow2_release_host_range_refs(BDRVQcow2State *s) +{ + assert(g_hash_table_size(s->host_range_refs) == 0); + g_hash_table_unref(s->host_range_refs); +} + +static HostCluster *find_host_cluster(BDRVQcow2State *s, int64_t cluster_index) +{ + HostCluster *cl; + + if (!s->host_range_refs) { + return NULL; + } + + cl = g_hash_table_lookup(s->host_range_refs, &cluster_index); + + if (cl) { + assert(cl->host_range_refcnt > 0); + } + + return cl; +} + +uint64_t qcow2_get_host_range_refcnt(BlockDriverState *bs, + int64_t cluster_index) +{ + BDRVQcow2State *s = bs->opaque; + HostCluster *cl = find_host_cluster(s, cluster_index); + + if (!cl) { + return 0; + } + + return cl->host_range_refcnt; +} + +/* Inrease host_range_refcnt of clusters intersecting with range */ +void coroutine_fn +qcow2_host_range_ref(BlockDriverState *bs, int64_t offset, int64_t length) +{ + BDRVQcow2State *s = bs->opaque; + int64_t start, last, cluster_index; + + start = start_of_cluster(s, offset) >> s->cluster_bits; + last = start_of_cluster(s, offset + length - 1) >> s->cluster_bits; + for (cluster_index = start; cluster_index <= last; cluster_index++) { + HostCluster *cl = find_host_cluster(s, cluster_index); + + if (!cl) { + cl = g_new(HostCluster, 1); + *cl = (HostCluster) { + .cluster_index = cluster_index, + .host_range_refcnt = 1, + }; + g_hash_table_insert(s->host_range_refs, + g_memdup(&cluster_index, + sizeof(cluster_index)), cl); + } else { + cl->host_range_refcnt++; + } + continue; + } +} + +/* Decrease host_range_refcnt of clusters intersecting with range */ +void coroutine_fn +qcow2_host_range_unref(BlockDriverState *bs, int64_t offset, int64_t length) +{ + BDRVQcow2State *s = bs->opaque; + int64_t start, last, cluster_index; + + start = start_of_cluster(s, offset) >> s->cluster_bits; + last = start_of_cluster(s, offset + length - 1) >> s->cluster_bits; + for (cluster_index = start; cluster_index <= last; cluster_index++) { + HostCluster *cl = find_host_cluster(s, cluster_index); + + assert(cl); + assert(cl->host_range_refcnt >= 1); + + if (cl->host_range_refcnt > 1) { + cl->host_range_refcnt--; + continue; + } + + g_hash_table_remove(s->host_range_refs, &cluster_index); + } +} diff --git a/block/qcow2.c b/block/qcow2.c index be62585e03..aa298c9e42 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1834,6 +1834,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, #endif qemu_co_queue_init(&s->thread_task_queue); + qcow2_init_host_range_refs(s); return ret; @@ -2714,6 +2715,8 @@ static void qcow2_close(BlockDriverState *bs) g_free(s->image_backing_file); g_free(s->image_backing_format); + qcow2_release_host_range_refs(s); + if (has_data_file(bs)) { bdrv_unref_child(bs, s->data_file); s->data_file = NULL; diff --git a/block/meson.build b/block/meson.build index d21990ec95..a9bf6fde0c 100644 --- a/block/meson.build +++ b/block/meson.build @@ -25,6 +25,7 @@ block_ss.add(files( 'qcow2-bitmap.c', 'qcow2-cache.c', 'qcow2-cluster.c', + 'qcow2-host-range-refs.c', 'qcow2-refcount.c', 'qcow2-snapshot.c', 'qcow2-threads.c', -- 2.29.2