On 6/15/26 13:05, Harry Yoo (Oracle) wrote:
> Teach kfree_rcu_sheaf() how to handle the !allow_spin case. Try to get
> an empty sheaf from pcs->spare or the barn even when spinning is not
> allowed. Unlike __pcs_replace_full_main(), try harder to allocate
> an empty sheaf because the fallback path will be more expensive than
> kfree_nolock().

You mean that it will try to allocate an empty sheaf, while
__pcs_replace_full_main() doesn't with allow_spin == false.
I just noticed the comment there is a bit stale... nevermind.

> 
> When trylock fails or the kernel observes non-NULL pcs->rcu_free after
> lock acquisition, free the sheaf instead of putting it to the barn.
> This is rare and not worth complicating the code.
> 
> Since call_rcu() cannot be called in an unknown context,
> kfree_rcu_sheaf() fails when the rcu sheaf becomes full.
> 
> Signed-off-by: Harry Yoo (Oracle) <[email protected]>
> ---
>  mm/slab.h        |  2 +-
>  mm/slab_common.c |  2 +-
>  mm/slub.c        | 39 ++++++++++++++++++++++++++++++---------
>  3 files changed, 32 insertions(+), 11 deletions(-)
> 
> diff --git a/mm/slab.h b/mm/slab.h
> index 509f330654b8..b1bd33a16544 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -429,7 +429,7 @@ static inline bool is_kmalloc_normal(struct kmem_cache *s)
>       return !(s->flags & (SLAB_CACHE_DMA|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT));
>  }
>  
> -bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj);
> +bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj, bool allow_spin);
>  void flush_all_rcu_sheaves(void);
>  void flush_rcu_sheaves_on_cache(struct kmem_cache *s);
>  
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index b6426d7ceec9..bc1a8ec938d9 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -1605,7 +1605,7 @@ static bool kfree_rcu_sheaf(void *obj)
>  
>       s = slab->slab_cache;
>       if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id()))
> -             return __kfree_rcu_sheaf(s, obj);
> +             return __kfree_rcu_sheaf(s, obj, /* allow_spin = */ true);
>  
>       return false;
>  }
> diff --git a/mm/slub.c b/mm/slub.c
> index 87ca154ccd80..b0d38d515386 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -2815,7 +2815,8 @@ static inline struct slab_sheaf 
> *alloc_empty_sheaf(struct kmem_cache *s,
>       return __alloc_empty_sheaf(s, gfp, alloc_flags, s->sheaf_capacity);
>  }
>  
> -static void free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf)
> +static void __free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf 
> *sheaf,
> +                            bool allow_spin)
>  {
>       /*
>        * If the sheaf was created with SLAB_ALLOC_NO_RECURSE flag then its
> @@ -2827,11 +2828,20 @@ static void free_empty_sheaf(struct kmem_cache *s, 
> struct slab_sheaf *sheaf)
>               mark_obj_codetag_empty(sheaf);
>  
>       VM_WARN_ON_ONCE(sheaf->size > 0);
> -     kfree(sheaf);
> +
> +     if (likely(allow_spin))
> +             kfree(sheaf);
> +     else
> +             kfree_nolock(sheaf);

Hmm what if the sheaf wasn't allocated with kmalloc_nolock()?

>  
>       stat(s, SHEAF_FREE);
>  }
>  
> +static void free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf)
> +{
> +     __free_empty_sheaf(s, sheaf, /* allow_spin = */ true);
> +}
> +
>  static unsigned int
>  refill_objects(struct kmem_cache *s, void **p, gfp_t gfp, unsigned int min,
>              unsigned int max);
> @@ -3132,7 +3142,6 @@ static struct slab_sheaf *barn_get_empty_sheaf(struct 
> node_barn *barn,
>   * intended action due to a race or cpu migration. Thus they do not check the
>   * empty or full sheaf limits for simplicity.
>   */
> -
>  static void barn_put_empty_sheaf(struct node_barn *barn, struct slab_sheaf 
> *sheaf)
>  {
>       unsigned long flags;
> @@ -6065,7 +6074,7 @@ static void rcu_free_sheaf(struct rcu_head *head)
>   */
>  static DEFINE_WAIT_OVERRIDE_MAP(kfree_rcu_sheaf_map, LD_WAIT_CONFIG);
>  
> -bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj)
> +bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj, bool allow_spin)
>  {
>       struct slub_percpu_sheaves *pcs;
>       struct slab_sheaf *rcu_sheaf;
> @@ -6081,9 +6090,10 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj)
>       pcs = this_cpu_ptr(s->cpu_sheaves);
>  
>       if (unlikely(!pcs->rcu_free)) {
> -
>               struct slab_sheaf *empty;
>               struct node_barn *barn;
> +             unsigned int alloc_flags = SLAB_ALLOC_DEFAULT;
> +             gfp_t gfp = GFP_NOWAIT;
>  
>               /* Bootstrap or debug cache, fall back */
>               if (unlikely(!cache_has_sheaves(s))) {
> @@ -6103,7 +6113,7 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj)
>                       goto fail;
>               }
>  
> -             empty = barn_get_empty_sheaf(barn, true);
> +             empty = barn_get_empty_sheaf(barn, allow_spin);

Here we might be getting a sheaf allocated previously with kmalloc().

>  
>               if (empty) {
>                       pcs->rcu_free = empty;
> @@ -6112,20 +6122,25 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void 
> *obj)
>  
>               local_unlock(&s->cpu_sheaves->lock);
>  
> -             empty = alloc_empty_sheaf(s, GFP_NOWAIT, SLAB_ALLOC_DEFAULT);
> +             if (unlikely(!allow_spin)) {
> +                     alloc_flags = SLAB_ALLOC_TRYLOCK;
> +                     gfp = 0;

Maybe __GFP_NOWARN?
Here we would get a sheaf with kmalloc_nolock() so that's ok even if it's
later freed by someone else by kfree(), right.

> +             }
> +
> +             empty = alloc_empty_sheaf(s, gfp, alloc_flags);
>  
>               if (!empty)
>                       goto fail;
>  
>               if (!local_trylock(&s->cpu_sheaves->lock)) {
> -                     barn_put_empty_sheaf(barn, empty);
> +                     __free_empty_sheaf(s, empty, allow_spin);

Well we could still use the barn with allow_spin == true.
But more crucially, here we might be freeing with kfree_nolock() a sheaf
from the barn previously allocated with kmalloc()?
Maybe we need to track if it's the case and defer-free it or something.

Also maybe there could be a wrapper kfree_maybe_nolock() (~better name?)
That means "I want to kfree safely in kfree_nolock() context something that
MIGHT have been kmalloc()"
And maybe depending on the debugging options that make kmalloc() ->
kfree_nolock() incompatible, if those are not enabled, it wouldn't have to
defer, but proceed normally?

>                       goto fail;
>               }
>  
>               pcs = this_cpu_ptr(s->cpu_sheaves);
>  
>               if (unlikely(pcs->rcu_free))
> -                     barn_put_empty_sheaf(barn, empty);
> +                     __free_empty_sheaf(s, empty, allow_spin);
>               else
>                       pcs->rcu_free = empty;
>       }
> @@ -6143,6 +6158,12 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj)
>       if (likely(rcu_sheaf->size < s->sheaf_capacity)) {
>               rcu_sheaf = NULL;
>       } else {
> +             if (unlikely(!allow_spin)) {
> +                     /* call_rcu() cannot be called in an unknown context */
> +                     rcu_sheaf->size--;
> +                     local_unlock(&s->cpu_sheaves->lock);
> +                     goto fail;
> +             }
>               pcs->rcu_free = NULL;
>               rcu_sheaf->node = numa_node_id();
>       }
> 


Reply via email to