On 6/15/26 13:05, Harry Yoo (Oracle) wrote: > Teach kfree_rcu_sheaf() how to handle the !allow_spin case. Try to get > an empty sheaf from pcs->spare or the barn even when spinning is not > allowed. Unlike __pcs_replace_full_main(), try harder to allocate > an empty sheaf because the fallback path will be more expensive than > kfree_nolock().
You mean that it will try to allocate an empty sheaf, while __pcs_replace_full_main() doesn't with allow_spin == false. I just noticed the comment there is a bit stale... nevermind. > > When trylock fails or the kernel observes non-NULL pcs->rcu_free after > lock acquisition, free the sheaf instead of putting it to the barn. > This is rare and not worth complicating the code. > > Since call_rcu() cannot be called in an unknown context, > kfree_rcu_sheaf() fails when the rcu sheaf becomes full. > > Signed-off-by: Harry Yoo (Oracle) <[email protected]> > --- > mm/slab.h | 2 +- > mm/slab_common.c | 2 +- > mm/slub.c | 39 ++++++++++++++++++++++++++++++--------- > 3 files changed, 32 insertions(+), 11 deletions(-) > > diff --git a/mm/slab.h b/mm/slab.h > index 509f330654b8..b1bd33a16544 100644 > --- a/mm/slab.h > +++ b/mm/slab.h > @@ -429,7 +429,7 @@ static inline bool is_kmalloc_normal(struct kmem_cache *s) > return !(s->flags & (SLAB_CACHE_DMA|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT)); > } > > -bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj); > +bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj, bool allow_spin); > void flush_all_rcu_sheaves(void); > void flush_rcu_sheaves_on_cache(struct kmem_cache *s); > > diff --git a/mm/slab_common.c b/mm/slab_common.c > index b6426d7ceec9..bc1a8ec938d9 100644 > --- a/mm/slab_common.c > +++ b/mm/slab_common.c > @@ -1605,7 +1605,7 @@ static bool kfree_rcu_sheaf(void *obj) > > s = slab->slab_cache; > if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id())) > - return __kfree_rcu_sheaf(s, obj); > + return __kfree_rcu_sheaf(s, obj, /* allow_spin = */ true); > > return false; > } > diff --git a/mm/slub.c b/mm/slub.c > index 87ca154ccd80..b0d38d515386 100644 > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -2815,7 +2815,8 @@ static inline struct slab_sheaf > *alloc_empty_sheaf(struct kmem_cache *s, > return __alloc_empty_sheaf(s, gfp, alloc_flags, s->sheaf_capacity); > } > > -static void free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf) > +static void __free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf > *sheaf, > + bool allow_spin) > { > /* > * If the sheaf was created with SLAB_ALLOC_NO_RECURSE flag then its > @@ -2827,11 +2828,20 @@ static void free_empty_sheaf(struct kmem_cache *s, > struct slab_sheaf *sheaf) > mark_obj_codetag_empty(sheaf); > > VM_WARN_ON_ONCE(sheaf->size > 0); > - kfree(sheaf); > + > + if (likely(allow_spin)) > + kfree(sheaf); > + else > + kfree_nolock(sheaf); Hmm what if the sheaf wasn't allocated with kmalloc_nolock()? > > stat(s, SHEAF_FREE); > } > > +static void free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf) > +{ > + __free_empty_sheaf(s, sheaf, /* allow_spin = */ true); > +} > + > static unsigned int > refill_objects(struct kmem_cache *s, void **p, gfp_t gfp, unsigned int min, > unsigned int max); > @@ -3132,7 +3142,6 @@ static struct slab_sheaf *barn_get_empty_sheaf(struct > node_barn *barn, > * intended action due to a race or cpu migration. Thus they do not check the > * empty or full sheaf limits for simplicity. > */ > - > static void barn_put_empty_sheaf(struct node_barn *barn, struct slab_sheaf > *sheaf) > { > unsigned long flags; > @@ -6065,7 +6074,7 @@ static void rcu_free_sheaf(struct rcu_head *head) > */ > static DEFINE_WAIT_OVERRIDE_MAP(kfree_rcu_sheaf_map, LD_WAIT_CONFIG); > > -bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj) > +bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj, bool allow_spin) > { > struct slub_percpu_sheaves *pcs; > struct slab_sheaf *rcu_sheaf; > @@ -6081,9 +6090,10 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj) > pcs = this_cpu_ptr(s->cpu_sheaves); > > if (unlikely(!pcs->rcu_free)) { > - > struct slab_sheaf *empty; > struct node_barn *barn; > + unsigned int alloc_flags = SLAB_ALLOC_DEFAULT; > + gfp_t gfp = GFP_NOWAIT; > > /* Bootstrap or debug cache, fall back */ > if (unlikely(!cache_has_sheaves(s))) { > @@ -6103,7 +6113,7 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj) > goto fail; > } > > - empty = barn_get_empty_sheaf(barn, true); > + empty = barn_get_empty_sheaf(barn, allow_spin); Here we might be getting a sheaf allocated previously with kmalloc(). > > if (empty) { > pcs->rcu_free = empty; > @@ -6112,20 +6122,25 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void > *obj) > > local_unlock(&s->cpu_sheaves->lock); > > - empty = alloc_empty_sheaf(s, GFP_NOWAIT, SLAB_ALLOC_DEFAULT); > + if (unlikely(!allow_spin)) { > + alloc_flags = SLAB_ALLOC_TRYLOCK; > + gfp = 0; Maybe __GFP_NOWARN? Here we would get a sheaf with kmalloc_nolock() so that's ok even if it's later freed by someone else by kfree(), right. > + } > + > + empty = alloc_empty_sheaf(s, gfp, alloc_flags); > > if (!empty) > goto fail; > > if (!local_trylock(&s->cpu_sheaves->lock)) { > - barn_put_empty_sheaf(barn, empty); > + __free_empty_sheaf(s, empty, allow_spin); Well we could still use the barn with allow_spin == true. But more crucially, here we might be freeing with kfree_nolock() a sheaf from the barn previously allocated with kmalloc()? Maybe we need to track if it's the case and defer-free it or something. Also maybe there could be a wrapper kfree_maybe_nolock() (~better name?) That means "I want to kfree safely in kfree_nolock() context something that MIGHT have been kmalloc()" And maybe depending on the debugging options that make kmalloc() -> kfree_nolock() incompatible, if those are not enabled, it wouldn't have to defer, but proceed normally? > goto fail; > } > > pcs = this_cpu_ptr(s->cpu_sheaves); > > if (unlikely(pcs->rcu_free)) > - barn_put_empty_sheaf(barn, empty); > + __free_empty_sheaf(s, empty, allow_spin); > else > pcs->rcu_free = empty; > } > @@ -6143,6 +6158,12 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj) > if (likely(rcu_sheaf->size < s->sheaf_capacity)) { > rcu_sheaf = NULL; > } else { > + if (unlikely(!allow_spin)) { > + /* call_rcu() cannot be called in an unknown context */ > + rcu_sheaf->size--; > + local_unlock(&s->cpu_sheaves->lock); > + goto fail; > + } > pcs->rcu_free = NULL; > rcu_sheaf->node = numa_node_id(); > } >

