hash: Avoid resizing-down HPT on first memory hotplug

Leonardo Bras Thu, 11 Mar 2021 23:32:29 -0800

Because hypervisors may need to create HPTs without knowing the guest
page size, the smallest used page-size (4k) may be chosen, resulting in
a HPT that is possibly bigger than needed.


On a guest with bigger page-sizes, the amount of entries for HTP may be
too high, causing the guest to ask for a HPT resize-down on the first
hotplug.

This becomes a problem when HPT resize-down fails, and causes the
HPT resize to be performed on every LMB added, until HPT size is
compatible to guest memory size, causing a major slowdown.

So, avoiding HPT resizing-down on hot-add significantly improves memory
hotplug times.

As an example, hotplugging 256GB on a 129GB guest took 710s without this
patch, and 21s after applied.

Signed-off-by: Leonardo Bras <leobra...@gmail.com>
---
 arch/powerpc/mm/book3s64/hash_utils.c | 36 ++++++++++++++++-----------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/hash_utils.c 
b/arch/powerpc/mm/book3s64/hash_utils.c
index 73b06adb6eeb..cfb3ec164f56 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -794,7 +794,7 @@ static unsigned long __init htab_get_table_size(void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-static int resize_hpt_for_hotplug(unsigned long new_mem_size)
+static int resize_hpt_for_hotplug(unsigned long new_mem_size, bool shrinking)
 {
        unsigned target_hpt_shift;
 
@@ -803,19 +803,25 @@ static int resize_hpt_for_hotplug(unsigned long 
new_mem_size)
 
        target_hpt_shift = htab_shift_for_mem_size(new_mem_size);
 
-       /*
-        * To avoid lots of HPT resizes if memory size is fluctuating
-        * across a boundary, we deliberately have some hysterisis
-        * here: we immediately increase the HPT size if the target
-        * shift exceeds the current shift, but we won't attempt to
-        * reduce unless the target shift is at least 2 below the
-        * current shift
-        */
-       if (target_hpt_shift > ppc64_pft_size ||
-           target_hpt_shift < ppc64_pft_size - 1)
-               return mmu_hash_ops.resize_hpt(target_hpt_shift);
+       if (shrinking) {
 
-       return 0;
+               /*
+                * To avoid lots of HPT resizes if memory size is fluctuating
+                * across a boundary, we deliberately have some hysterisis
+                * here: we immediately increase the HPT size if the target
+                * shift exceeds the current shift, but we won't attempt to
+                * reduce unless the target shift is at least 2 below the
+                * current shift
+                */
+
+               if (target_hpt_shift >= ppc64_pft_size - 1)
+                       return 0;
+
+       } else if (target_hpt_shift <= ppc64_pft_size) {
+               return 0;
+       }
+
+       return mmu_hash_ops.resize_hpt(target_hpt_shift);
 }
 
 int hash__create_section_mapping(unsigned long start, unsigned long end,
@@ -828,7 +834,7 @@ int hash__create_section_mapping(unsigned long start, 
unsigned long end,
                return -1;
        }
 
-       resize_hpt_for_hotplug(memblock_phys_mem_size());
+       resize_hpt_for_hotplug(memblock_phys_mem_size(), false);
 
        rc = htab_bolt_mapping(start, end, __pa(start),
                               pgprot_val(prot), mmu_linear_psize,
@@ -847,7 +853,7 @@ int hash__remove_section_mapping(unsigned long start, 
unsigned long end)
        int rc = htab_remove_mapping(start, end, mmu_linear_psize,
                                     mmu_kernel_ssize);
 
-       if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC)
+       if (resize_hpt_for_hotplug(memblock_phys_mem_size(), true) == -ENOSPC)
                pr_warn("Hash collision while resizing HPT\n");
 
        return rc;
-- 
2.29.2

[PATCH 1/3] powerpc/mm/hash: Avoid resizing-down HPT on first memory hotplug

Reply via email to