This reduces TLB misses by nearly 30x on a `git diff` workload on a 2-node POWER9 (59,800 -> 2,100) and reduces CPU cycles by 0.54%, due to vfs hashes being allocated with 2MB pages.
Cc: linuxppc-...@lists.ozlabs.org Acked-by: Michael Ellerman <m...@ellerman.id.au> Signed-off-by: Nicholas Piggin <npig...@gmail.com> --- .../admin-guide/kernel-parameters.txt | 2 ++ arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/module.c | 21 +++++++++++++++---- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a10b545c2070..d62df53e5200 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3225,6 +3225,8 @@ nohugeiomap [KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings. + nohugevmalloc [PPC] Disable kernel huge vmalloc mappings. + nosmt [KNL,S390] Disable symmetric multithreading (SMT). Equivalent to smt=1. diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 107bb4319e0e..781da6829ab7 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -181,6 +181,7 @@ config PPC select GENERIC_GETTIMEOFDAY select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU + select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14 select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14 diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index a211b0253cdb..07026335d24d 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -87,13 +87,26 @@ int module_finalize(const Elf_Ehdr *hdr, return 0; } -#ifdef MODULES_VADDR void *module_alloc(unsigned long size) { + unsigned long start = VMALLOC_START; + unsigned long end = VMALLOC_END; + +#ifdef MODULES_VADDR BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); + start = MODULES_VADDR; + end = MODULES_END; +#endif + + /* + * Don't do huge page allocations for modules yet until more testing + * is done. STRICT_MODULE_RWX may require extra work to support this + * too. + */ - return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL, - PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, + return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, + PAGE_KERNEL_EXEC, + VM_NO_HUGE_VMAP | VM_FLUSH_RESET_PERMS, + NUMA_NO_NODE, __builtin_return_address(0)); } -#endif -- 2.23.0