Re: [PATCH v2 6/7] mm, slab: add static key for should_failslab()
On 6/25/24 7:12 PM, Alexei Starovoitov wrote: > On Tue, Jun 25, 2024 at 7:24 AM Vlastimil Babka wrote: >> >> On 6/20/24 12:49 AM, Vlastimil Babka wrote: >> > --- a/mm/slub.c >> > +++ b/mm/slub.c >> > @@ -3874,13 +3874,37 @@ static __always_inline void >> > maybe_wipe_obj_freeptr(struct kmem_cache *s, >> > 0, sizeof(void *)); >> > } >> > >> > -noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags) >> > +#if defined(CONFIG_FUNCTION_ERROR_INJECTION) || defined(CONFIG_FAILSLAB) >> > +DEFINE_STATIC_KEY_FALSE(should_failslab_active); >> > + >> > +#ifdef CONFIG_FUNCTION_ERROR_INJECTION >> > +noinline >> > +#else >> > +static inline >> > +#endif >> > +int should_failslab(struct kmem_cache *s, gfp_t gfpflags) >> >> Note that it has been found that (regardless of this series) gcc may clone >> this to a should_failslab.constprop.0 in case the function is empty because >> __should_failslab is compiled out (CONFIG_FAILSLAB=n). The "noinline" >> doesn't help - the original function stays but only the clone is actually >> being called, thus overriding the original function achieves nothing, see: >> https://github.com/bpftrace/bpftrace/issues/3258 >> >> So we could use __noclone to prevent that, and I was thinking by adding >> something this to error-injection.h: >> >> #ifdef CONFIG_FUNCTION_ERROR_INJECTION >> #define __error_injectable(alternative) noinline __noclone > > To prevent such compiler transformations we typically use > __used noinline > > We didn't have a need for __noclone yet. If __used is enough I'd stick to > that. __used made no difference here (gcc 13.3), __noclone did
Re: [PATCH v2 6/7] mm, slab: add static key for should_failslab()
On Tue, Jun 25, 2024 at 7:24 AM Vlastimil Babka wrote: > > On 6/20/24 12:49 AM, Vlastimil Babka wrote: > > --- a/mm/slub.c > > +++ b/mm/slub.c > > @@ -3874,13 +3874,37 @@ static __always_inline void > > maybe_wipe_obj_freeptr(struct kmem_cache *s, > > 0, sizeof(void *)); > > } > > > > -noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags) > > +#if defined(CONFIG_FUNCTION_ERROR_INJECTION) || defined(CONFIG_FAILSLAB) > > +DEFINE_STATIC_KEY_FALSE(should_failslab_active); > > + > > +#ifdef CONFIG_FUNCTION_ERROR_INJECTION > > +noinline > > +#else > > +static inline > > +#endif > > +int should_failslab(struct kmem_cache *s, gfp_t gfpflags) > > Note that it has been found that (regardless of this series) gcc may clone > this to a should_failslab.constprop.0 in case the function is empty because > __should_failslab is compiled out (CONFIG_FAILSLAB=n). The "noinline" > doesn't help - the original function stays but only the clone is actually > being called, thus overriding the original function achieves nothing, see: > https://github.com/bpftrace/bpftrace/issues/3258 > > So we could use __noclone to prevent that, and I was thinking by adding > something this to error-injection.h: > > #ifdef CONFIG_FUNCTION_ERROR_INJECTION > #define __error_injectable(alternative) noinline __noclone To prevent such compiler transformations we typically use __used noinline We didn't have a need for __noclone yet. If __used is enough I'd stick to that.
Re: [PATCH v2 6/7] mm, slab: add static key for should_failslab()
On 6/20/24 12:49 AM, Vlastimil Babka wrote: > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -3874,13 +3874,37 @@ static __always_inline void > maybe_wipe_obj_freeptr(struct kmem_cache *s, > 0, sizeof(void *)); > } > > -noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags) > +#if defined(CONFIG_FUNCTION_ERROR_INJECTION) || defined(CONFIG_FAILSLAB) > +DEFINE_STATIC_KEY_FALSE(should_failslab_active); > + > +#ifdef CONFIG_FUNCTION_ERROR_INJECTION > +noinline > +#else > +static inline > +#endif > +int should_failslab(struct kmem_cache *s, gfp_t gfpflags) Note that it has been found that (regardless of this series) gcc may clone this to a should_failslab.constprop.0 in case the function is empty because __should_failslab is compiled out (CONFIG_FAILSLAB=n). The "noinline" doesn't help - the original function stays but only the clone is actually being called, thus overriding the original function achieves nothing, see: https://github.com/bpftrace/bpftrace/issues/3258 So we could use __noclone to prevent that, and I was thinking by adding something this to error-injection.h: #ifdef CONFIG_FUNCTION_ERROR_INJECTION #define __error_injectable(alternative) noinline __noclone #else #define __error_injectable(alternative) alternative #endif and the usage here would be: __error_injectable(static inline) int should_failslab(...) Does that look acceptable, or is it too confusing that "static inline" is specified there as the storage class to use when error injection is actually disabled? > { > if (__should_failslab(s, gfpflags)) > return -ENOMEM; > return 0; > } > -ALLOW_ERROR_INJECTION(should_failslab, ERRNO); > +ALLOW_ERROR_INJECTION_KEY(should_failslab, ERRNO, _failslab_active); > + > +static __always_inline int should_failslab_wrapped(struct kmem_cache *s, > +gfp_t gfp) > +{ > + if (static_branch_unlikely(_failslab_active)) > + return should_failslab(s, gfp); > + else > + return 0; > +} > +#else > +static __always_inline int should_failslab_wrapped(struct kmem_cache *s, > +gfp_t gfp) > +{ > + return false; > +} > +#endif > > static __fastpath_inline > struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) > @@ -3889,7 +3913,7 @@ struct kmem_cache *slab_pre_alloc_hook(struct > kmem_cache *s, gfp_t flags) > > might_alloc(flags); > > - if (unlikely(should_failslab(s, flags))) > + if (should_failslab_wrapped(s, flags)) > return NULL; > > return s; >
[PATCH v2 6/7] mm, slab: add static key for should_failslab()
Since commit 4f6923fbb352 ("mm: make should_failslab always available for fault injection") should_failslab() is unconditionally a noinline function. This adds visible overhead to the slab allocation hotpath, even if the function is empty. With CONFIG_FAILSLAB=y there's additional overhead, even when the functionality is not activated by a boot parameter or via debugfs. The overhead can be eliminated with a static key around the callsite. Fault injection and error injection frameworks including bpf can now be told that this function has a static key associated, and are able to enable and disable it accordingly. Additionally, compile out all relevant code if neither CONFIG_FAILSLAB nor CONFIG_FUNCTION_ERROR_INJECTION is enabled. When only the latter is not enabled, make should_failslab() static inline instead of noinline. To demonstrate the reduced overhead of calling an empty should_failslab() function, a kernel build with CONFIG_FUNCTION_ERROR_INJECTION enabled but CONFIG_FAILSLAB disabled, and CPU mitigations enabled, was used in a qemu-kvm (virtme-ng) on AMD Ryzen 7 2700 machine, and execution of a program trying to open() a non-existent file was measured 3 times: for (int i = 0; i < 1000; i++) { open("non_existent", O_RDONLY); } After this patch, the measured real time was 4.3% smaller. Using perf profiling it was verified that should_failslab was gone from the profile. With CONFIG_FAILSLAB also enabled, the patched kernel performace was unaffected, as expected, while unpatched kernel's performance was worse, resulting in the relative speedup being 10.5%. This means it no longer needs to be an option suitable only for debug kernel builds. Acked-by: Alexei Starovoitov Reviewed-by: Roman Gushchin Signed-off-by: Vlastimil Babka --- include/linux/fault-inject.h | 4 +++- mm/failslab.c| 2 +- mm/slab.h| 3 +++ mm/slub.c| 30 +++--- 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index cfe75cc1bac4..0d0fa94dc1c8 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -107,9 +107,11 @@ static inline bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order) } #endif /* CONFIG_FAIL_PAGE_ALLOC */ +#ifdef CONFIG_FUNCTION_ERROR_INJECTION int should_failslab(struct kmem_cache *s, gfp_t gfpflags); +#endif #ifdef CONFIG_FAILSLAB -extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags); +bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags); #else static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags) { diff --git a/mm/failslab.c b/mm/failslab.c index ffc420c0e767..878fd08e5dac 100644 --- a/mm/failslab.c +++ b/mm/failslab.c @@ -9,7 +9,7 @@ static struct { bool ignore_gfp_reclaim; bool cache_filter; } failslab = { - .attr = FAULT_ATTR_INITIALIZER, + .attr = FAULT_ATTR_INITIALIZER_KEY(_failslab_active.key), .ignore_gfp_reclaim = true, .cache_filter = false, }; diff --git a/mm/slab.h b/mm/slab.h index 5f8f47c5bee0..792e19cb37b8 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -11,6 +11,7 @@ #include #include #include +#include /* * Internal slab definitions @@ -160,6 +161,8 @@ static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t) */ #define slab_page(s) folio_page(slab_folio(s), 0) +DECLARE_STATIC_KEY_FALSE(should_failslab_active); + /* * If network-based swap is enabled, sl*b must keep track of whether pages * were allocated from pfmemalloc reserves. diff --git a/mm/slub.c b/mm/slub.c index 0809760cf789..11980aa94631 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3874,13 +3874,37 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s, 0, sizeof(void *)); } -noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags) +#if defined(CONFIG_FUNCTION_ERROR_INJECTION) || defined(CONFIG_FAILSLAB) +DEFINE_STATIC_KEY_FALSE(should_failslab_active); + +#ifdef CONFIG_FUNCTION_ERROR_INJECTION +noinline +#else +static inline +#endif +int should_failslab(struct kmem_cache *s, gfp_t gfpflags) { if (__should_failslab(s, gfpflags)) return -ENOMEM; return 0; } -ALLOW_ERROR_INJECTION(should_failslab, ERRNO); +ALLOW_ERROR_INJECTION_KEY(should_failslab, ERRNO, _failslab_active); + +static __always_inline int should_failslab_wrapped(struct kmem_cache *s, + gfp_t gfp) +{ + if (static_branch_unlikely(_failslab_active)) + return should_failslab(s, gfp); + else + return 0; +} +#else +static __always_inline int should_failslab_wrapped(struct kmem_cache *s, + gfp_t gfp) +{ + return false; +} +#endif static __fastpath_inline struct kmem_cache