On Tue, 27 Aug 2024 at 04:38, David Hildenbrand <da...@redhat.com> wrote:
>
> As reported by Peter, we might be leaking memory when removing the
> highest RAMBlock (in the weird ram_addr_t space), and adding a new one.
>
> We will fail to realize that we already allocated bitmaps for more
> dirty memory blocks, and effectively discard the pointers to them.
>
> Fix it by getting rid of last_ram_page() and simply storing the number
> of dirty memory blocks that have been allocated. We'll store the number
> of blocks along with the actual pointer to keep it simple.
>
> Looks like this leak was introduced as we switched from using a single
> bitmap_zero_extend() to allocating multiple bitmaps:
> bitmap_zero_extend() relies on g_renew() which should have taken care of
> this.
>
> Resolves: 
> https://lkml.kernel.org/r/CAFEAcA-k7a+VObGAfCFNygQNfCKL=AfX6A4kScq=vssk0pe...@mail.gmail.com
> Reported-by: Peter Maydell <peter.mayd...@linaro.org>
> Fixes: 5b82b703b69a ("memory: RCU ram_list.dirty_memory[] for safe RAM 
> hotplug")
> Cc: qemu-sta...@nongnu.org
> Cc: Stefan Hajnoczi <stefa...@redhat.com>
> Cc: Paolo Bonzini <pbonz...@redhat.com>
> Cc: Peter Xu <pet...@redhat.com>
> Cc: "Philippe Mathieu-Daudé" <phi...@linaro.org>
> Signed-off-by: David Hildenbrand <da...@redhat.com>
> ---
>  include/exec/ramlist.h |  1 +
>  system/physmem.c       | 44 ++++++++++++++----------------------------
>  2 files changed, 16 insertions(+), 29 deletions(-)
>
> diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h
> index 2ad2a81acc..f2a965f293 100644
> --- a/include/exec/ramlist.h
> +++ b/include/exec/ramlist.h
> @@ -41,6 +41,7 @@ typedef struct RAMBlockNotifier RAMBlockNotifier;
>  #define DIRTY_MEMORY_BLOCK_SIZE ((ram_addr_t)256 * 1024 * 8)
>  typedef struct {
>      struct rcu_head rcu;
> +    unsigned int num_blocks;

The maximum amount of memory supported by unsigned int is:
(2 ^ 32 - 1) * 4KB * DIRTY_MEMORY_BLOCK_SIZE
= ~32 exabytes

This should be fine. The maximum guest RAM sizes are in the TBs range
(source: https://access.redhat.com/articles/rhel-kvm-limits).

Reviewed-by: Stefan Hajnoczi <stefa...@redhat.com>

>      unsigned long *blocks[];
>  } DirtyMemoryBlocks;
>
> diff --git a/system/physmem.c b/system/physmem.c
> index 94600a33ec..fa48ff8333 100644
> --- a/system/physmem.c
> +++ b/system/physmem.c
> @@ -1534,18 +1534,6 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
>      return offset;
>  }
>
> -static unsigned long last_ram_page(void)
> -{
> -    RAMBlock *block;
> -    ram_addr_t last = 0;
> -
> -    RCU_READ_LOCK_GUARD();
> -    RAMBLOCK_FOREACH(block) {
> -        last = MAX(last, block->offset + block->max_length);
> -    }
> -    return last >> TARGET_PAGE_BITS;
> -}
> -
>  static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
>  {
>      int ret;
> @@ -1799,28 +1787,31 @@ void qemu_ram_msync(RAMBlock *block, ram_addr_t 
> start, ram_addr_t length)
>  }
>
>  /* Called with ram_list.mutex held */
> -static void dirty_memory_extend(ram_addr_t old_ram_size,
> -                                ram_addr_t new_ram_size)
> +static void dirty_memory_extend(ram_addr_t new_ram_size)
>  {
> -    ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
> -                                             DIRTY_MEMORY_BLOCK_SIZE);
>      ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
>                                               DIRTY_MEMORY_BLOCK_SIZE);
>      int i;
>
> -    /* Only need to extend if block count increased */
> -    if (new_num_blocks <= old_num_blocks) {
> -        return;
> -    }
> -
>      for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
>          DirtyMemoryBlocks *old_blocks;
>          DirtyMemoryBlocks *new_blocks;
> +        ram_addr_t old_num_blocks = 0;
>          int j;
>
>          old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]);
> +        if (old_blocks) {
> +            old_num_blocks = old_blocks->num_blocks;
> +
> +            /* Only need to extend if block count increased */
> +            if (new_num_blocks <= old_num_blocks) {
> +                return;
> +            }
> +        }
> +
>          new_blocks = g_malloc(sizeof(*new_blocks) +
>                                sizeof(new_blocks->blocks[0]) * 
> new_num_blocks);
> +        new_blocks->num_blocks = new_num_blocks;
>
>          if (old_num_blocks) {
>              memcpy(new_blocks->blocks, old_blocks->blocks,
> @@ -1846,11 +1837,9 @@ static void ram_block_add(RAMBlock *new_block, Error 
> **errp)
>      RAMBlock *block;
>      RAMBlock *last_block = NULL;
>      bool free_on_error = false;
> -    ram_addr_t old_ram_size, new_ram_size;
> +    ram_addr_t ram_size;
>      Error *err = NULL;
>
> -    old_ram_size = last_ram_page();
> -
>      qemu_mutex_lock_ramlist();
>      new_block->offset = find_ram_offset(new_block->max_length);
>
> @@ -1901,11 +1890,8 @@ static void ram_block_add(RAMBlock *new_block, Error 
> **errp)
>          }
>      }
>
> -    new_ram_size = MAX(old_ram_size,
> -              (new_block->offset + new_block->max_length) >> 
> TARGET_PAGE_BITS);
> -    if (new_ram_size > old_ram_size) {
> -        dirty_memory_extend(old_ram_size, new_ram_size);
> -    }
> +    ram_size = (new_block->offset + new_block->max_length) >> 
> TARGET_PAGE_BITS;
> +    dirty_memory_extend(ram_size);
>      /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
>       * QLIST (which has an RCU-friendly variant) does not have insertion at
>       * tail, so save the last element in last_block.
> --
> 2.46.0
>
>

Reply via email to