On 6/15/26 13:06, Harry Yoo (Oracle) wrote:
> As suggested by Vlastimil Babka, kfree_rcu_sheaf() can be used
> on PREEMPT_RT if we always assume spinning is not allowed on PREEMPT_RT.
> This is because local_trylock and spinlock_t are safe to use with
> trylock variant as long as the kernel does not spin and the context is
> not NMI and not hardirq.
>
> Now that __kfree_rcu_sheaf() knows how to handle allow_spin = false,
> relax the limitation and try the sheaves path on PREEMPT_RT as well.
>
> Keep the lockdep map on non RT kernels. However, do not use the lockdep
> map on PREEMPT_RT to avoid suppressing valid lockdep warnings.
>
> Link:
> https://lore.kernel.org/linux-mm/[email protected]
> Suggested-by: Vlastimil Babka (SUSE) <[email protected]>
> Signed-off-by: Harry Yoo (Oracle) <[email protected]>
LGTM, but maybe unnecessary pessimistic wrt call_rcu() on PREEMPT_RT?
I thought (in the Link: above) we'd only need to downgrade allow_spin to
false on PREEMPT_RT for handling sheaves movement from/to barn and
alloc_empty_sheaf(), but call_rcu() would be safe from kfree_rcu() even on
RT? Or is the irqs_disabled() condition rare enough so we don't care?
> ---
> mm/slab_common.c | 11 +++++++++--
> mm/slub.c | 17 ++++++++++-------
> 2 files changed, 19 insertions(+), 9 deletions(-)
>
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 55546b8385ff..807924a94fb0 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -1595,6 +1595,13 @@ static bool kfree_rcu_sheaf(void *obj)
> {
> struct kmem_cache *s;
> struct slab *slab;
> + bool allow_spin;
> +
> + /*
> + * It is not safe to spin on PREEMPT_RT because the kernel might be
> + * holding a raw spinlock and slab acquires sleeping locks.
> + */
> + allow_spin = !IS_ENABLED(CONFIG_PREEMPT_RT);
>
> if (is_vmalloc_addr(obj))
> return false;
> @@ -1605,7 +1612,7 @@ static bool kfree_rcu_sheaf(void *obj)
>
> s = slab->slab_cache;
> if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id()))
> - return __kfree_rcu_sheaf(s, obj, /* allow_spin = */ true);
> + return __kfree_rcu_sheaf(s, obj, allow_spin);
>
> return false;
> }
> @@ -1954,7 +1961,7 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr)
> if (!head)
> might_sleep();
>
> - if (!IS_ENABLED(CONFIG_PREEMPT_RT) && kfree_rcu_sheaf(ptr))
> + if (kfree_rcu_sheaf(ptr))
> return;
>
> // Queue the object but don't yet schedule the batch.
> diff --git a/mm/slub.c b/mm/slub.c
> index ba593c1c53d5..4850629774b2 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -6082,12 +6082,13 @@ static void rcu_free_sheaf(struct rcu_head *head)
> * kvfree_call_rcu() can be called while holding a raw_spinlock_t. Since
> * __kfree_rcu_sheaf() may acquire a spinlock_t (sleeping lock on
> PREEMPT_RT),
> * this would violate lock nesting rules. Therefore, kvfree_call_rcu() avoids
> - * this problem by bypassing the sheaves layer entirely on PREEMPT_RT.
> + * this problem by passing allow_spin = false on PREEMPT_RT.
> *
> * However, lockdep still complains that it is invalid to acquire spinlock_t
> * while holding raw_spinlock_t, even on !PREEMPT_RT where spinlock_t is a
> * spinning lock. Tell lockdep that acquiring spinlock_t is valid here
> - * by temporarily raising the wait-type to LD_WAIT_CONFIG.
> + * by temporarily raising the wait-type to LD_WAIT_CONFIG. Skip the lockdep
> map
> + * on PREEMPT_RT to avoid suppressing valid lockdep warnings.
> */
> static DEFINE_WAIT_OVERRIDE_MAP(kfree_rcu_sheaf_map, LD_WAIT_CONFIG);
>
> @@ -6096,10 +6097,10 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void
> *obj, bool allow_spin)
> struct slub_percpu_sheaves *pcs;
> struct slab_sheaf *rcu_sheaf;
>
> - if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT)))
> - return false;
> + VM_WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT) && allow_spin);
>
> - lock_map_acquire_try(&kfree_rcu_sheaf_map);
> + if (!IS_ENABLED(CONFIG_PREEMPT_RT))
> + lock_map_acquire_try(&kfree_rcu_sheaf_map);
>
> if (!local_trylock(&s->cpu_sheaves->lock))
> goto fail;
> @@ -6199,12 +6200,14 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void
> *obj, bool allow_spin)
> local_unlock(&s->cpu_sheaves->lock);
>
> stat(s, FREE_RCU_SHEAF);
> - lock_map_release(&kfree_rcu_sheaf_map);
> + if (!IS_ENABLED(CONFIG_PREEMPT_RT))
> + lock_map_release(&kfree_rcu_sheaf_map);
> return true;
>
> fail:
> stat(s, FREE_RCU_SHEAF_FAIL);
> - lock_map_release(&kfree_rcu_sheaf_map);
> + if (!IS_ENABLED(CONFIG_PREEMPT_RT))
> + lock_map_release(&kfree_rcu_sheaf_map);
> return false;
> }
>
>