Re: [PATCH v2 6/7] mm, slab: add static key for should_failslab()

2024-06-25 Thread Vlastimil Babka
On 6/25/24 7:12 PM, Alexei Starovoitov wrote:
> On Tue, Jun 25, 2024 at 7:24 AM Vlastimil Babka  wrote:
>>
>> On 6/20/24 12:49 AM, Vlastimil Babka wrote:
>> > --- a/mm/slub.c
>> > +++ b/mm/slub.c
>> > @@ -3874,13 +3874,37 @@ static __always_inline void 
>> > maybe_wipe_obj_freeptr(struct kmem_cache *s,
>> >   0, sizeof(void *));
>> >  }
>> >
>> > -noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>> > +#if defined(CONFIG_FUNCTION_ERROR_INJECTION) || defined(CONFIG_FAILSLAB)
>> > +DEFINE_STATIC_KEY_FALSE(should_failslab_active);
>> > +
>> > +#ifdef CONFIG_FUNCTION_ERROR_INJECTION
>> > +noinline
>> > +#else
>> > +static inline
>> > +#endif
>> > +int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>>
>> Note that it has been found that (regardless of this series) gcc may clone
>> this to a should_failslab.constprop.0 in case the function is empty because
>> __should_failslab is compiled out (CONFIG_FAILSLAB=n). The "noinline"
>> doesn't help - the original function stays but only the clone is actually
>> being called, thus overriding the original function achieves nothing, see:
>> https://github.com/bpftrace/bpftrace/issues/3258
>>
>> So we could use __noclone to prevent that, and I was thinking by adding
>> something this to error-injection.h:
>>
>> #ifdef CONFIG_FUNCTION_ERROR_INJECTION
>> #define __error_injectable(alternative) noinline __noclone
> 
> To prevent such compiler transformations we typically use
> __used noinline
> 
> We didn't have a need for __noclone yet. If __used is enough I'd stick to 
> that.

__used made no difference here (gcc 13.3), __noclone did



Re: [PATCH v2 6/7] mm, slab: add static key for should_failslab()

2024-06-25 Thread Alexei Starovoitov
On Tue, Jun 25, 2024 at 7:24 AM Vlastimil Babka  wrote:
>
> On 6/20/24 12:49 AM, Vlastimil Babka wrote:
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -3874,13 +3874,37 @@ static __always_inline void 
> > maybe_wipe_obj_freeptr(struct kmem_cache *s,
> >   0, sizeof(void *));
> >  }
> >
> > -noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> > +#if defined(CONFIG_FUNCTION_ERROR_INJECTION) || defined(CONFIG_FAILSLAB)
> > +DEFINE_STATIC_KEY_FALSE(should_failslab_active);
> > +
> > +#ifdef CONFIG_FUNCTION_ERROR_INJECTION
> > +noinline
> > +#else
> > +static inline
> > +#endif
> > +int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
>
> Note that it has been found that (regardless of this series) gcc may clone
> this to a should_failslab.constprop.0 in case the function is empty because
> __should_failslab is compiled out (CONFIG_FAILSLAB=n). The "noinline"
> doesn't help - the original function stays but only the clone is actually
> being called, thus overriding the original function achieves nothing, see:
> https://github.com/bpftrace/bpftrace/issues/3258
>
> So we could use __noclone to prevent that, and I was thinking by adding
> something this to error-injection.h:
>
> #ifdef CONFIG_FUNCTION_ERROR_INJECTION
> #define __error_injectable(alternative) noinline __noclone

To prevent such compiler transformations we typically use
__used noinline

We didn't have a need for __noclone yet. If __used is enough I'd stick to that.



Re: [PATCH v2 6/7] mm, slab: add static key for should_failslab()

2024-06-25 Thread Vlastimil Babka
On 6/20/24 12:49 AM, Vlastimil Babka wrote:
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -3874,13 +3874,37 @@ static __always_inline void 
> maybe_wipe_obj_freeptr(struct kmem_cache *s,
>   0, sizeof(void *));
>  }
>  
> -noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> +#if defined(CONFIG_FUNCTION_ERROR_INJECTION) || defined(CONFIG_FAILSLAB)
> +DEFINE_STATIC_KEY_FALSE(should_failslab_active);
> +
> +#ifdef CONFIG_FUNCTION_ERROR_INJECTION
> +noinline
> +#else
> +static inline
> +#endif
> +int should_failslab(struct kmem_cache *s, gfp_t gfpflags)

Note that it has been found that (regardless of this series) gcc may clone
this to a should_failslab.constprop.0 in case the function is empty because
__should_failslab is compiled out (CONFIG_FAILSLAB=n). The "noinline"
doesn't help - the original function stays but only the clone is actually
being called, thus overriding the original function achieves nothing, see:
https://github.com/bpftrace/bpftrace/issues/3258

So we could use __noclone to prevent that, and I was thinking by adding
something this to error-injection.h:

#ifdef CONFIG_FUNCTION_ERROR_INJECTION
#define __error_injectable(alternative) noinline __noclone
#else
#define __error_injectable(alternative) alternative
#endif

and the usage here would be:

__error_injectable(static inline) int should_failslab(...)

Does that look acceptable, or is it too confusing that "static inline" is
specified there as the storage class to use when error injection is actually
disabled?

>  {
>   if (__should_failslab(s, gfpflags))
>   return -ENOMEM;
>   return 0;
>  }
> -ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
> +ALLOW_ERROR_INJECTION_KEY(should_failslab, ERRNO, _failslab_active);
> +
> +static __always_inline int should_failslab_wrapped(struct kmem_cache *s,
> +gfp_t gfp)
> +{
> + if (static_branch_unlikely(_failslab_active))
> + return should_failslab(s, gfp);
> + else
> + return 0;
> +}
> +#else
> +static __always_inline int should_failslab_wrapped(struct kmem_cache *s,
> +gfp_t gfp)
> +{
> + return false;
> +}
> +#endif
>  
>  static __fastpath_inline
>  struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
> @@ -3889,7 +3913,7 @@ struct kmem_cache *slab_pre_alloc_hook(struct 
> kmem_cache *s, gfp_t flags)
>  
>   might_alloc(flags);
>  
> - if (unlikely(should_failslab(s, flags)))
> + if (should_failslab_wrapped(s, flags))
>   return NULL;
>  
>   return s;
> 




[PATCH v2 6/7] mm, slab: add static key for should_failslab()

2024-06-19 Thread Vlastimil Babka
Since commit 4f6923fbb352 ("mm: make should_failslab always available for
fault injection") should_failslab() is unconditionally a noinline
function. This adds visible overhead to the slab allocation hotpath,
even if the function is empty. With CONFIG_FAILSLAB=y there's additional
overhead, even when the functionality is not activated by a boot
parameter or via debugfs.

The overhead can be eliminated with a static key around the callsite.
Fault injection and error injection frameworks including bpf can now be
told that this function has a static key associated, and are able to
enable and disable it accordingly.

Additionally, compile out all relevant code if neither CONFIG_FAILSLAB
nor CONFIG_FUNCTION_ERROR_INJECTION is enabled. When only the latter is
not enabled, make should_failslab() static inline instead of noinline.

To demonstrate the reduced overhead of calling an empty
should_failslab() function, a kernel build with
CONFIG_FUNCTION_ERROR_INJECTION enabled but CONFIG_FAILSLAB disabled,
and CPU mitigations enabled, was used in a qemu-kvm (virtme-ng) on AMD
Ryzen 7 2700 machine, and execution of a program trying to open() a
non-existent file was measured 3 times:

for (int i = 0; i < 1000; i++) {
open("non_existent", O_RDONLY);
}

After this patch, the measured real time was 4.3% smaller. Using perf
profiling it was verified that should_failslab was gone from the
profile.

With CONFIG_FAILSLAB also enabled, the patched kernel performace was
unaffected, as expected, while unpatched kernel's performance was worse,
resulting in the relative speedup being 10.5%. This means it no longer
needs to be an option suitable only for debug kernel builds.

Acked-by: Alexei Starovoitov 
Reviewed-by: Roman Gushchin 
Signed-off-by: Vlastimil Babka 
---
 include/linux/fault-inject.h |  4 +++-
 mm/failslab.c|  2 +-
 mm/slab.h|  3 +++
 mm/slub.c| 30 +++---
 4 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index cfe75cc1bac4..0d0fa94dc1c8 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -107,9 +107,11 @@ static inline bool __should_fail_alloc_page(gfp_t 
gfp_mask, unsigned int order)
 }
 #endif /* CONFIG_FAIL_PAGE_ALLOC */
 
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
 int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
+#endif
 #ifdef CONFIG_FAILSLAB
-extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
+bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
 #else
 static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
 {
diff --git a/mm/failslab.c b/mm/failslab.c
index ffc420c0e767..878fd08e5dac 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -9,7 +9,7 @@ static struct {
bool ignore_gfp_reclaim;
bool cache_filter;
 } failslab = {
-   .attr = FAULT_ATTR_INITIALIZER,
+   .attr = FAULT_ATTR_INITIALIZER_KEY(_failslab_active.key),
.ignore_gfp_reclaim = true,
.cache_filter = false,
 };
diff --git a/mm/slab.h b/mm/slab.h
index 5f8f47c5bee0..792e19cb37b8 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * Internal slab definitions
@@ -160,6 +161,8 @@ static_assert(IS_ALIGNED(offsetof(struct slab, freelist), 
sizeof(freelist_aba_t)
  */
 #define slab_page(s) folio_page(slab_folio(s), 0)
 
+DECLARE_STATIC_KEY_FALSE(should_failslab_active);
+
 /*
  * If network-based swap is enabled, sl*b must keep track of whether pages
  * were allocated from pfmemalloc reserves.
diff --git a/mm/slub.c b/mm/slub.c
index 0809760cf789..11980aa94631 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3874,13 +3874,37 @@ static __always_inline void 
maybe_wipe_obj_freeptr(struct kmem_cache *s,
0, sizeof(void *));
 }
 
-noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
+#if defined(CONFIG_FUNCTION_ERROR_INJECTION) || defined(CONFIG_FAILSLAB)
+DEFINE_STATIC_KEY_FALSE(should_failslab_active);
+
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
+noinline
+#else
+static inline
+#endif
+int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
 {
if (__should_failslab(s, gfpflags))
return -ENOMEM;
return 0;
 }
-ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
+ALLOW_ERROR_INJECTION_KEY(should_failslab, ERRNO, _failslab_active);
+
+static __always_inline int should_failslab_wrapped(struct kmem_cache *s,
+  gfp_t gfp)
+{
+   if (static_branch_unlikely(_failslab_active))
+   return should_failslab(s, gfp);
+   else
+   return 0;
+}
+#else
+static __always_inline int should_failslab_wrapped(struct kmem_cache *s,
+  gfp_t gfp)
+{
+   return false;
+}
+#endif
 
 static __fastpath_inline
 struct kmem_cache