no_slb_preload cmdline can come useful in quickly disabling and/or testing the performance impact of userspace slb preloads. Recently there was a slb multi-hit issue due to slb preload cache which was very difficult to triage. This cmdline option allows to quickly disable preloads and verify if the issue exists in preload cache or somewhere else. This can also be a useful option to see the effect of slb preloads for any application workload e.g. number of slb faults with or w/o slb preloads.
with slb_preload: slb_faults (minimal initrd boot): 15 slb_faults (full systemd boot): 300 with no_slb_preload: slb_faults (minimal initrd boot): 33 slb_faults (full systemd boot): 138180 Cc: Madhavan Srinivasan <[email protected]> Cc: Michael Ellerman <[email protected]> Cc: Nicholas Piggin <[email protected]> Cc: Christophe Leroy <[email protected]> Cc: Paul Mackerras <[email protected]> Cc: Aneesh Kumar K.V <[email protected]> Cc: Donet Tom <[email protected]> Cc: <[email protected]> Signed-off-by: Ritesh Harjani (IBM) <[email protected]> --- Documentation/admin-guide/kernel-parameters.txt | 3 +++ arch/powerpc/mm/book3s64/hash_utils.c | 3 +++ arch/powerpc/mm/book3s64/internal.h | 7 +++++++ arch/powerpc/mm/book3s64/slb.c | 15 +++++++++++++++ 4 files changed, 28 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 6c42061ca20e..0b0bb73d1cc1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -7192,6 +7192,9 @@ them frequently to increase the rate of SLB faults on kernel addresses. + no_slb_preload [PPC,EARLY] + Disables slb preloading for userspace. + sunrpc.min_resvport= sunrpc.max_resvport= [NFS,SUNRPC] diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 31162dbad05c..9dc5889d6ecb 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1329,6 +1329,9 @@ static void __init htab_initialize(void) if (stress_slb_enabled) static_branch_enable(&stress_slb_key); + if (no_slb_preload) + static_branch_enable(&no_slb_preload_key); + if (stress_hpt_enabled) { unsigned long tmp; static_branch_enable(&stress_hpt_key); diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h index c26a6f0c90fc..cad08d83369c 100644 --- a/arch/powerpc/mm/book3s64/internal.h +++ b/arch/powerpc/mm/book3s64/internal.h @@ -22,6 +22,13 @@ static inline bool stress_hpt(void) return static_branch_unlikely(&stress_hpt_key); } +extern bool no_slb_preload; +DECLARE_STATIC_KEY_FALSE(no_slb_preload_key); +static inline bool slb_preload_disabled(void) +{ + return static_branch_unlikely(&no_slb_preload_key); +} + void hpt_do_stress(unsigned long ea, unsigned long hpte_group); void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush); diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index 042b762fc0d2..15f73abd1506 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -42,6 +42,15 @@ early_param("stress_slb", parse_stress_slb); __ro_after_init DEFINE_STATIC_KEY_FALSE(stress_slb_key); +bool no_slb_preload __initdata; +static int __init parse_no_slb_preload(char *p) +{ + no_slb_preload = true; + return 0; +} +early_param("no_slb_preload", parse_no_slb_preload); +__ro_after_init DEFINE_STATIC_KEY_FALSE(no_slb_preload_key); + static void assert_slb_presence(bool present, unsigned long ea) { #ifdef CONFIG_DEBUG_VM @@ -299,6 +308,9 @@ static void preload_add(struct thread_info *ti, unsigned long ea) unsigned char idx; unsigned long esid; + if (slb_preload_disabled()) + return; + if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) { /* EAs are stored >> 28 so 256MB segments don't need clearing */ if (ea & ESID_MASK_1T) @@ -412,6 +424,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) copy_mm_to_paca(mm); + if (slb_preload_disabled()) + return; + /* * We gradually age out SLBs after a number of context switches to * reduce reload overhead of unused entries (like we do with FP/VEC -- 2.51.0
