As suggested by Vlastimil Babka, kfree_rcu_sheaf() can be used on PREEMPT_RT if we always assume spinning is not allowed on PREEMPT_RT. This is because local_trylock and spinlock_t are safe to use with trylock variant as long as the kernel does not spin and the context is not NMI and not hardirq.
Now that __kfree_rcu_sheaf() knows how to handle allow_spin = false, relax the limitation and try the sheaves path on PREEMPT_RT as well. Keep the lockdep map on non RT kernels. However, do not use the lockdep map on PREEMPT_RT to avoid suppressing valid lockdep warnings. Link: https://lore.kernel.org/linux-mm/[email protected] Suggested-by: Vlastimil Babka (SUSE) <[email protected]> Signed-off-by: Harry Yoo (Oracle) <[email protected]> --- mm/slab_common.c | 11 +++++++++-- mm/slub.c | 17 ++++++++++------- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 55546b8385ff..807924a94fb0 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1595,6 +1595,13 @@ static bool kfree_rcu_sheaf(void *obj) { struct kmem_cache *s; struct slab *slab; + bool allow_spin; + + /* + * It is not safe to spin on PREEMPT_RT because the kernel might be + * holding a raw spinlock and slab acquires sleeping locks. + */ + allow_spin = !IS_ENABLED(CONFIG_PREEMPT_RT); if (is_vmalloc_addr(obj)) return false; @@ -1605,7 +1612,7 @@ static bool kfree_rcu_sheaf(void *obj) s = slab->slab_cache; if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id())) - return __kfree_rcu_sheaf(s, obj, /* allow_spin = */ true); + return __kfree_rcu_sheaf(s, obj, allow_spin); return false; } @@ -1954,7 +1961,7 @@ void kvfree_call_rcu(struct rcu_head *head, void *ptr) if (!head) might_sleep(); - if (!IS_ENABLED(CONFIG_PREEMPT_RT) && kfree_rcu_sheaf(ptr)) + if (kfree_rcu_sheaf(ptr)) return; // Queue the object but don't yet schedule the batch. diff --git a/mm/slub.c b/mm/slub.c index ba593c1c53d5..4850629774b2 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6082,12 +6082,13 @@ static void rcu_free_sheaf(struct rcu_head *head) * kvfree_call_rcu() can be called while holding a raw_spinlock_t. Since * __kfree_rcu_sheaf() may acquire a spinlock_t (sleeping lock on PREEMPT_RT), * this would violate lock nesting rules. Therefore, kvfree_call_rcu() avoids - * this problem by bypassing the sheaves layer entirely on PREEMPT_RT. + * this problem by passing allow_spin = false on PREEMPT_RT. * * However, lockdep still complains that it is invalid to acquire spinlock_t * while holding raw_spinlock_t, even on !PREEMPT_RT where spinlock_t is a * spinning lock. Tell lockdep that acquiring spinlock_t is valid here - * by temporarily raising the wait-type to LD_WAIT_CONFIG. + * by temporarily raising the wait-type to LD_WAIT_CONFIG. Skip the lockdep map + * on PREEMPT_RT to avoid suppressing valid lockdep warnings. */ static DEFINE_WAIT_OVERRIDE_MAP(kfree_rcu_sheaf_map, LD_WAIT_CONFIG); @@ -6096,10 +6097,10 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj, bool allow_spin) struct slub_percpu_sheaves *pcs; struct slab_sheaf *rcu_sheaf; - if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT))) - return false; + VM_WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT) && allow_spin); - lock_map_acquire_try(&kfree_rcu_sheaf_map); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + lock_map_acquire_try(&kfree_rcu_sheaf_map); if (!local_trylock(&s->cpu_sheaves->lock)) goto fail; @@ -6199,12 +6200,14 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj, bool allow_spin) local_unlock(&s->cpu_sheaves->lock); stat(s, FREE_RCU_SHEAF); - lock_map_release(&kfree_rcu_sheaf_map); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + lock_map_release(&kfree_rcu_sheaf_map); return true; fail: stat(s, FREE_RCU_SHEAF_FAIL); - lock_map_release(&kfree_rcu_sheaf_map); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + lock_map_release(&kfree_rcu_sheaf_map); return false; } -- 2.53.0

