The sequence of ram_block_add()/qemu_ram_free()/ram_block_add() function calls leads to leaking some memory.
ram_block_add() calls dirty_memory_extend() to allocate bitmap blocks for new memory. These blocks only grow but never shrink. So the qemu_ram_free() restores RAM size back to it's original stat but doesn't touch dirty memory bitmaps. After qemu_ram_free() there is no way of knowing that we have allocated dirty memory bitmaps beyond current RAM size. So the next ram_block_add() will call dirty_memory_extend() again to to allocate new bitmaps and rewrite pointers to bitmaps left after the first ram_block_add()/dirty_memory_extend() calls. Rework dirty_memory_extend() to be able to shrink dirty maps, also rename it to dirty_memory_resize(). And fix the leak by shrinking dirty memory maps on qemu_ram_free() if needed. Fixes: 5b82b703b69a ("memory: RCU ram_list.dirty_memory[] for safe RAM hotplug") Cc: qemu-sta...@nongnu.org Signed-off-by: Andrey Ryabinin <a...@yandex-team.com> --- include/exec/ramlist.h | 2 ++ softmmu/physmem.c | 38 ++++++++++++++++++++++++++++++++------ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h index 2ad2a81acc..019e238e7c 100644 --- a/include/exec/ramlist.h +++ b/include/exec/ramlist.h @@ -41,6 +41,8 @@ typedef struct RAMBlockNotifier RAMBlockNotifier; #define DIRTY_MEMORY_BLOCK_SIZE ((ram_addr_t)256 * 1024 * 8) typedef struct { struct rcu_head rcu; + unsigned int nr_blocks; + unsigned int nr_blocks_inuse; unsigned long *blocks[]; } DirtyMemoryBlocks; diff --git a/softmmu/physmem.c b/softmmu/physmem.c index 32f76362bf..073ab37351 100644 --- a/softmmu/physmem.c +++ b/softmmu/physmem.c @@ -1919,8 +1919,23 @@ void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length) } } +static void dirty_memory_free(DirtyMemoryBlocks *blocks) +{ + int i; + + /* + *'nr_blocks_inuse' is more than nr_blocks (memory was extended) + * or it's less than 'nr_blocks' (memory shrunk). In the second case + * we free all the blocks above the nr_blocks_inuse. + */ + for (i = blocks->nr_blocks_inuse; i < blocks->nr_blocks; i++) { + g_free(blocks->blocks[i]); + } + g_free(blocks); +} + /* Called with ram_list.mutex held */ -static void dirty_memory_extend(ram_addr_t old_ram_size, +static void dirty_memory_resize(ram_addr_t old_ram_size, ram_addr_t new_ram_size) { ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size, @@ -1929,25 +1944,28 @@ static void dirty_memory_extend(ram_addr_t old_ram_size, DIRTY_MEMORY_BLOCK_SIZE); int i; - /* Only need to extend if block count increased */ - if (new_num_blocks <= old_num_blocks) { + /* Only need to resize if block count changed */ + if (new_num_blocks == old_num_blocks) { return; } for (i = 0; i < DIRTY_MEMORY_NUM; i++) { DirtyMemoryBlocks *old_blocks; DirtyMemoryBlocks *new_blocks; + unsigned int num_blocks = MAX(old_num_blocks, new_num_blocks); int j; old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]); new_blocks = g_malloc(sizeof(*new_blocks) + - sizeof(new_blocks->blocks[0]) * new_num_blocks); + sizeof(new_blocks->blocks[0]) * num_blocks); + new_blocks->nr_blocks = new_num_blocks; if (old_num_blocks) { memcpy(new_blocks->blocks, old_blocks->blocks, old_num_blocks * sizeof(old_blocks->blocks[0])); } + /* memory extend case (new>old): allocate new blocks*/ for (j = old_num_blocks; j < new_num_blocks; j++) { new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE); } @@ -1955,7 +1973,8 @@ static void dirty_memory_extend(ram_addr_t old_ram_size, qatomic_rcu_set(&ram_list.dirty_memory[i], new_blocks); if (old_blocks) { - g_free_rcu(old_blocks, rcu); + old_blocks->nr_blocks_inuse = new_num_blocks; + call_rcu(old_blocks, dirty_memory_free, rcu); } } } @@ -2001,7 +2020,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) new_ram_size = MAX(old_ram_size, (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS); if (new_ram_size > old_ram_size) { - dirty_memory_extend(old_ram_size, new_ram_size); + dirty_memory_resize(old_ram_size, new_ram_size); } /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ, * QLIST (which has an RCU-friendly variant) does not have insertion at @@ -2218,6 +2237,8 @@ static void reclaim_ramblock(RAMBlock *block) void qemu_ram_free(RAMBlock *block) { + ram_addr_t old_ram_size, new_ram_size; + if (!block) { return; } @@ -2228,12 +2249,17 @@ void qemu_ram_free(RAMBlock *block) } qemu_mutex_lock_ramlist(); + old_ram_size = last_ram_page(); + QLIST_REMOVE_RCU(block, next); ram_list.mru_block = NULL; /* Write list before version */ smp_wmb(); ram_list.version++; call_rcu(block, reclaim_ramblock, rcu); + + new_ram_size = last_ram_page(); + dirty_memory_resize(old_ram_size, new_ram_size); qemu_mutex_unlock_ramlist(); } -- 2.34.1