This reduces TLB misses by nearly 30x on a `git diff` workload on a
2-node POWER9 (59,800 -> 2,100) and reduces CPU cycles by 0.54%, due
to vfs hashes being allocated with 2MB pages.

Cc: linuxppc-...@lists.ozlabs.org
Acked-by: Michael Ellerman <m...@ellerman.id.au>
Signed-off-by: Nicholas Piggin <npig...@gmail.com>
---
 .../admin-guide/kernel-parameters.txt         |  2 ++
 arch/powerpc/Kconfig                          |  1 +
 arch/powerpc/kernel/module.c                  | 21 +++++++++++++++----
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt 
b/Documentation/admin-guide/kernel-parameters.txt
index a10b545c2070..d62df53e5200 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3225,6 +3225,8 @@
 
        nohugeiomap     [KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings.
 
+       nohugevmalloc   [PPC] Disable kernel huge vmalloc mappings.
+
        nosmt           [KNL,S390] Disable symmetric multithreading (SMT).
                        Equivalent to smt=1.
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 107bb4319e0e..781da6829ab7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -181,6 +181,7 @@ config PPC
        select GENERIC_GETTIMEOFDAY
        select HAVE_ARCH_AUDITSYSCALL
        select HAVE_ARCH_HUGE_VMAP              if PPC_BOOK3S_64 && 
PPC_RADIX_MMU
+       select HAVE_ARCH_HUGE_VMALLOC           if HAVE_ARCH_HUGE_VMAP
        select HAVE_ARCH_JUMP_LABEL
        select HAVE_ARCH_KASAN                  if PPC32 && PPC_PAGE_SHIFT <= 14
        select HAVE_ARCH_KASAN_VMALLOC          if PPC32 && PPC_PAGE_SHIFT <= 14
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index a211b0253cdb..07026335d24d 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -87,13 +87,26 @@ int module_finalize(const Elf_Ehdr *hdr,
        return 0;
 }
 
-#ifdef MODULES_VADDR
 void *module_alloc(unsigned long size)
 {
+       unsigned long start = VMALLOC_START;
+       unsigned long end = VMALLOC_END;
+
+#ifdef MODULES_VADDR
        BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR);
+       start = MODULES_VADDR;
+       end = MODULES_END;
+#endif
+
+       /*
+        * Don't do huge page allocations for modules yet until more testing
+        * is done. STRICT_MODULE_RWX may require extra work to support this
+        * too.
+        */
 
-       return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, 
GFP_KERNEL,
-                                   PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, 
NUMA_NO_NODE,
+       return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL,
+                                   PAGE_KERNEL_EXEC,
+                                   VM_NO_HUGE_VMAP | VM_FLUSH_RESET_PERMS,
+                                   NUMA_NO_NODE,
                                    __builtin_return_address(0));
 }
-#endif
-- 
2.23.0

Reply via email to