This adds an adjustable limit for the total memory usage of the overlap prevention structures.
Signed-off-by: Max Reitz <mre...@redhat.com> --- block/qcow2-overlap.c | 145 +++++++++++++++++++++++++++++++++++++++++++++++--- block/qcow2.c | 2 +- block/qcow2.h | 2 +- 3 files changed, 139 insertions(+), 10 deletions(-) diff --git a/block/qcow2-overlap.c b/block/qcow2-overlap.c index 92097dd..654d5c3 100644 --- a/block/qcow2-overlap.c +++ b/block/qcow2-overlap.c @@ -23,9 +23,11 @@ */ #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu-common.h" #include "qemu/range.h" #include "qcow2.h" +#include "qapi-event.h" /* Number of clusters which are covered by each metadata window; * note that this may not exceed 2^16 as long as @@ -68,12 +70,50 @@ struct Qcow2MetadataList { unsigned current_age; + size_t mem_usage, max_mem_usage; + /* Index into the windows array */ int *cached_windows; size_t nb_cached_windows; }; /** + * Logs newly allocated memory in the @mdl->mem_usage field. Returns true iff + * @mdl->mem_usage + @size * @nmemb <= @mdl->max_mem_usage, that is, if + * allocating that memory is still within limits. In that case, @mdl->mem_usage + * is increased by @size * @nmemb; otherwise, it is left unchanged. + */ +static bool increase_mdl_mem_usage(Qcow2MetadataList *mdl, size_t size, + size_t nmemb) +{ + if (nmemb && SIZE_MAX / nmemb < size) { + /* Overflow */ + return false; + } + + size *= nmemb; + if (mdl->mem_usage + size < mdl->mem_usage || + mdl->mem_usage + size > mdl->max_mem_usage) + { + return false; + } + + mdl->mem_usage += size; + return true; +} + +static void signal_memory_excess(BlockDriverState *bs, uint64_t start_cluster, + uint64_t nb_clusters) +{ + BDRVQcowState *s = bs->opaque; + const char *ref = bs->blk ? blk_name(bs->blk) : bs->node_name; + + qapi_event_send_qcow2_overlap_check_memory_limit_reached(ref, true, + start_cluster * s->cluster_size, true, nb_clusters * s->cluster_size, + &error_abort); +} + +/** * Destroys the cached window bitmap. If it has been modified, the fragment list * will be rebuilt accordingly. */ @@ -81,11 +121,16 @@ static void destroy_window_bitmap(Qcow2MetadataList *mdl, Qcow2MetadataWindow *window) { Qcow2MetadataFragment *new_fragments; + bool increase_ret; if (!window->bitmap) { return; } + /* Treat the bitmap as freed already; while not really correct, this gives + * us space for the fragment list */ + mdl->mem_usage -= WINDOW_SIZE; + if (window->bitmap_modified) { int bitmap_i, fragment_i = 0; QCow2MetadataOverlap current_types = 0; @@ -105,6 +150,9 @@ static void destroy_window_bitmap(Qcow2MetadataList *mdl, } else { if (current_types && current_nb_clusters) { if (fragment_i >= window->fragments_array_size) { + mdl->mem_usage -= sizeof(Qcow2MetadataFragment) + * window->fragments_array_size; + window->fragments_array_size = 3 * window->fragments_array_size / 2 + 1; if (sizeof(Qcow2MetadataFragment) @@ -115,6 +163,16 @@ static void destroy_window_bitmap(Qcow2MetadataList *mdl, goto fail; } + increase_ret = increase_mdl_mem_usage(mdl, + sizeof(Qcow2MetadataFragment), + window->fragments_array_size); + /* This can never be false: Because mem_usage was + * reduced by WINDOW_SIZE at the beginning of this + * function and the size of the fragment list is certain + * to be less than WINDOW_SIZE, there must be enough + * memory available. */ + assert(increase_ret); + new_fragments = g_try_renew(Qcow2MetadataFragment, window->fragments, @@ -148,6 +206,7 @@ static void destroy_window_bitmap(Qcow2MetadataList *mdl, new_fragments = g_try_renew(Qcow2MetadataFragment, window->fragments, window->nb_fragments); if (new_fragments) { + mdl->mem_usage -= window->fragments_array_size - window->nb_fragments; window->fragments = new_fragments; window->fragments_array_size = window->nb_fragments; } @@ -163,12 +222,20 @@ fail: window->fragments = NULL; window->nb_fragments = 0; window->fragments_array_size = 0; + + /* window->bitmap is not destroyed, so we need to account for it (because + * we "borrowed" the space before) */ + increase_ret = increase_mdl_mem_usage(mdl, sizeof(uint8_t), WINDOW_SIZE); + /* If we land here, no additional memory was allocated (only + * window->fragments may grow in this function, but at this point it has + * been freed), so we must be able to reclaim the space */ + assert(increase_ret); } /** * Creates a bitmap from the fragment list. */ -static void build_window_bitmap(Qcow2MetadataList *mdl, +static bool build_window_bitmap(Qcow2MetadataList *mdl, Qcow2MetadataWindow *window) { int cache_i, oldest_cache_i = -1, i; @@ -202,9 +269,13 @@ static void build_window_bitmap(Qcow2MetadataList *mdl, /* Maybe there already is a bitmap because it was more space-efficient than * the range list representation */ if (window->bitmap) { - return; + return true; } + if (!increase_mdl_mem_usage(mdl, sizeof(uint8_t), WINDOW_SIZE)) { + window->bitmap = NULL; + return false; + } window->bitmap = g_new0(uint8_t, WINDOW_SIZE); for (i = 0; i < window->nb_fragments; i++) { @@ -215,6 +286,8 @@ static void build_window_bitmap(Qcow2MetadataList *mdl, } window->bitmap_modified = false; + + return true; } /** @@ -251,10 +324,24 @@ void qcow2_metadata_list_enter(BlockDriverState *bs, uint64_t offset, * the array to exactly the required size */ Qcow2MetadataWindow *new_windows; + if (!increase_mdl_mem_usage(s->metadata_list, + sizeof(Qcow2MetadataWindow), + window_i + 1 + - s->metadata_list->nb_windows)) + { + /* This will happen for any cluster from here until end_cluster, + * so we can just abort immediately */ + signal_memory_excess(bs, window_i * WINDOW_SIZE, + end_cluster - current_cluster); + return; + } + new_windows = g_try_renew(Qcow2MetadataWindow, s->metadata_list->windows, window_i + 1); if (!new_windows) { + signal_memory_excess(bs, window_i * WINDOW_SIZE, + end_cluster - current_cluster); return; } @@ -268,7 +355,10 @@ void qcow2_metadata_list_enter(BlockDriverState *bs, uint64_t offset, window = &s->metadata_list->windows[window_i]; if (!window->bitmap) { - build_window_bitmap(s->metadata_list, window); + if (!build_window_bitmap(s->metadata_list, window)) { + signal_memory_excess(bs, window_i * WINDOW_SIZE, WINDOW_SIZE); + goto next_window; + } } for (bitmap_i = bitmap_i_start; bitmap_i < bitmap_i_end; bitmap_i++) { @@ -278,6 +368,7 @@ void qcow2_metadata_list_enter(BlockDriverState *bs, uint64_t offset, window->age = s->metadata_list->current_age++; window->bitmap_modified = true; +next_window: /* Go to the next window */ current_cluster += WINDOW_SIZE - bitmap_i_start; } @@ -320,7 +411,17 @@ void qcow2_metadata_list_remove(BlockDriverState *bs, uint64_t offset, window = &s->metadata_list->windows[window_i]; if (!window->bitmap) { - build_window_bitmap(s->metadata_list, window); + if (!build_window_bitmap(s->metadata_list, window)) { + signal_memory_excess(bs, window_i * WINDOW_SIZE, WINDOW_SIZE); + /* We *must* drop the given metadata types from the list, no + * matter what; therefore, the best thing we can do is empty + * the fragment list */ + g_free(window->fragments); + window->fragments = NULL; + window->nb_fragments = 0; + window->fragments_array_size = 0; + goto next_window; + } } for (bitmap_i = bitmap_i_start; bitmap_i < bitmap_i_end; bitmap_i++) { @@ -330,12 +431,14 @@ void qcow2_metadata_list_remove(BlockDriverState *bs, uint64_t offset, window->age = s->metadata_list->current_age++; window->bitmap_modified = true; +next_window: /* Go to the next window */ current_cluster += WINDOW_SIZE - bitmap_i_start; } } -static int single_check_metadata_overlap(Qcow2MetadataList *mdl, int ign, +static int single_check_metadata_overlap(BlockDriverState *bs, + Qcow2MetadataList *mdl, int ign, uint64_t cluster) { uint64_t window_i = cluster / WINDOW_SIZE; @@ -348,7 +451,10 @@ static int single_check_metadata_overlap(Qcow2MetadataList *mdl, int ign, window = &mdl->windows[window_i]; if (!window->bitmap) { - build_window_bitmap(mdl, window); + if (!build_window_bitmap(mdl, window)) { + signal_memory_excess(bs, window_i * WINDOW_SIZE, WINDOW_SIZE); + return 0; + } } window->age = mdl->current_age++; @@ -368,7 +474,7 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset, for (current_cluster = start_cluster; current_cluster < end_cluster; current_cluster++) { - ret |= single_check_metadata_overlap(s->metadata_list, ign, + ret |= single_check_metadata_overlap(bs, s->metadata_list, ign, current_cluster); } @@ -376,16 +482,33 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset, } int qcow2_create_empty_metadata_list(BlockDriverState *bs, size_t cache_size, - Error **errp) + size_t max_total_mem_size, Error **errp) { BDRVQcowState *s = bs->opaque; int ret; size_t cache_entries, i; + if (max_total_mem_size < sizeof(Qcow2MetadataList)) { + error_setg(errp, "Cannot allocate metadata list"); + ret = -ENOMEM; + goto fail; + } + s->metadata_list = g_new0(Qcow2MetadataList, 1); + s->metadata_list->max_mem_usage = max_total_mem_size; + s->metadata_list->mem_usage = sizeof(Qcow2MetadataList); + s->metadata_list->nb_windows = DIV_ROUND_UP(bdrv_nb_sectors(bs->file), (uint64_t)s->cluster_sectors * WINDOW_SIZE); + if (!increase_mdl_mem_usage(s->metadata_list, sizeof(Qcow2MetadataWindow), + s->metadata_list->nb_windows)) + { + error_setg(errp, "Cannot allocate metadata overlap check windows"); + ret = -ENOMEM; + goto fail; + } + s->metadata_list->windows = g_try_new0(Qcow2MetadataWindow, s->metadata_list->nb_windows); if (s->metadata_list->nb_windows && !s->metadata_list->windows) { @@ -404,6 +527,12 @@ int qcow2_create_empty_metadata_list(BlockDriverState *bs, size_t cache_size, } s->metadata_list->nb_cached_windows = cache_entries; + if (!increase_mdl_mem_usage(s->metadata_list, sizeof(int), cache_entries)) { + error_setg(errp, "Cannot allocate metadata overlap cache pointers"); + ret = -ENOMEM; + goto fail; + } + s->metadata_list->cached_windows = g_try_new(int, cache_entries); if (!s->metadata_list->cached_windows) { error_setg(errp, "Could not allocate metadata overlap cache pointers"); diff --git a/block/qcow2.c b/block/qcow2.c index 4e7005e..fbf4d6f 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -760,7 +760,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, if (s->overlap_check) { /* TODO: Let the user override this default */ - ret = qcow2_create_empty_metadata_list(bs, 65536, errp); + ret = qcow2_create_empty_metadata_list(bs, 65536, SIZE_MAX, errp); if (ret < 0) { goto fail; } diff --git a/block/qcow2.h b/block/qcow2.h index 4a5530b..cbb932a 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -594,7 +594,7 @@ int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table); /* qcow2-overlap.c functions */ int qcow2_create_empty_metadata_list(BlockDriverState *bs, size_t cache_size, - Error **errp); + size_t max_total_mem_size, Error **errp); void qcow2_metadata_list_destroy(BlockDriverState *bs); void qcow2_metadata_list_enter(BlockDriverState *bs, uint64_t offset, int nb_clusters, QCow2MetadataOverlap type); -- 2.3.7