On 6/15/26 13:06, Harry Yoo (Oracle) wrote:
> As suggested by Vlastimil Babka, kfree_rcu_sheaf() can be used
> on PREEMPT_RT if we always assume spinning is not allowed on PREEMPT_RT.
> This is because local_trylock and spinlock_t are safe to use with
> trylock variant as long as the kernel does not spin and the context is
> not NMI and not hardirq.
> 
> Now that __kfree_rcu_sheaf() knows how to handle allow_spin = false,
> relax the limitation and try the sheaves path on PREEMPT_RT as well.
> 
> Keep the lockdep map on non RT kernels. However, do not use the lockdep
> map on PREEMPT_RT to avoid suppressing valid lockdep warnings.
> 
> Link: 
> https://lore.kernel.org/linux-mm/[email protected]
> Suggested-by: Vlastimil Babka (SUSE) <[email protected]>
> Signed-off-by: Harry Yoo (Oracle) <[email protected]>

LGTM, but maybe unnecessary pessimistic wrt call_rcu() on PREEMPT_RT?
I thought (in the Link: above) we'd only need to downgrade allow_spin to
false on PREEMPT_RT for handling sheaves movement from/to barn and
alloc_empty_sheaf(), but call_rcu() would be safe from kfree_rcu() even on
RT? Or is the irqs_disabled() condition rare enough so we don't care?

> ---
>  mm/slab_common.c | 11 +++++++++--
>  mm/slub.c        | 17 ++++++++++-------
>  2 files changed, 19 insertions(+), 9 deletions(-)
> 
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 55546b8385ff..807924a94fb0 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -1595,6 +1595,13 @@ static bool kfree_rcu_sheaf(void *obj)
>  {
>       struct kmem_cache *s;
>       struct slab *slab;
> +     bool allow_spin;
> +
> +     /*
> +      * It is not safe to spin on PREEMPT_RT because the kernel might be
> +      * holding a raw spinlock and slab acquires sleeping locks.
> +      */
> +     allow_spin = !IS_ENABLED(CONFIG_PREEMPT_RT);
>  
>       if (is_vmalloc_addr(obj))
>               return false;
> @@ -1605,7 +1612,7 @@ static bool kfree_rcu_sheaf(void *obj)
>  
>       s = slab->slab_cache;
>       if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id()))
> -             return __kfree_rcu_sheaf(s, obj, /* allow_spin = */ true);
> +             return __kfree_rcu_sheaf(s, obj, allow_spin);
>  
>       return false;
>  }
> @@ -1954,7 +1961,7 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr)
>       if (!head)
>               might_sleep();
>  
> -     if (!IS_ENABLED(CONFIG_PREEMPT_RT) && kfree_rcu_sheaf(ptr))
> +     if (kfree_rcu_sheaf(ptr))
>               return;
>  
>       // Queue the object but don't yet schedule the batch.
> diff --git a/mm/slub.c b/mm/slub.c
> index ba593c1c53d5..4850629774b2 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -6082,12 +6082,13 @@ static void rcu_free_sheaf(struct rcu_head *head)
>   * kvfree_call_rcu() can be called while holding a raw_spinlock_t. Since
>   * __kfree_rcu_sheaf() may acquire a spinlock_t (sleeping lock on 
> PREEMPT_RT),
>   * this would violate lock nesting rules. Therefore, kvfree_call_rcu() avoids
> - * this problem by bypassing the sheaves layer entirely on PREEMPT_RT.
> + * this problem by passing allow_spin = false on PREEMPT_RT.
>   *
>   * However, lockdep still complains that it is invalid to acquire spinlock_t
>   * while holding raw_spinlock_t, even on !PREEMPT_RT where spinlock_t is a
>   * spinning lock. Tell lockdep that acquiring spinlock_t is valid here
> - * by temporarily raising the wait-type to LD_WAIT_CONFIG.
> + * by temporarily raising the wait-type to LD_WAIT_CONFIG. Skip the lockdep 
> map
> + * on PREEMPT_RT to avoid suppressing valid lockdep warnings.
>   */
>  static DEFINE_WAIT_OVERRIDE_MAP(kfree_rcu_sheaf_map, LD_WAIT_CONFIG);
>  
> @@ -6096,10 +6097,10 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void 
> *obj, bool allow_spin)
>       struct slub_percpu_sheaves *pcs;
>       struct slab_sheaf *rcu_sheaf;
>  
> -     if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT)))
> -             return false;
> +     VM_WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT) && allow_spin);
>  
> -     lock_map_acquire_try(&kfree_rcu_sheaf_map);
> +     if (!IS_ENABLED(CONFIG_PREEMPT_RT))
> +             lock_map_acquire_try(&kfree_rcu_sheaf_map);
>  
>       if (!local_trylock(&s->cpu_sheaves->lock))
>               goto fail;
> @@ -6199,12 +6200,14 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void 
> *obj, bool allow_spin)
>       local_unlock(&s->cpu_sheaves->lock);
>  
>       stat(s, FREE_RCU_SHEAF);
> -     lock_map_release(&kfree_rcu_sheaf_map);
> +     if (!IS_ENABLED(CONFIG_PREEMPT_RT))
> +             lock_map_release(&kfree_rcu_sheaf_map);
>       return true;
>  
>  fail:
>       stat(s, FREE_RCU_SHEAF_FAIL);
> -     lock_map_release(&kfree_rcu_sheaf_map);
> +     if (!IS_ENABLED(CONFIG_PREEMPT_RT))
> +             lock_map_release(&kfree_rcu_sheaf_map);
>       return false;
>  }
>  
> 


Reply via email to