Currently, k[v]free_rcu() cannot be called in unknown context since it could lead to a deadlock when called in the middle of k[v]free_rcu().
Make users' lives easier by introducing kfree_rcu_nolock() variant, now that kfree_rcu_sheaf() is available on PREEMPT_RT and __kfree_rcu_sheaf() handles unknown context. Unlike k[v]free_rcu(), kfree_rcu_nolock() does not fall back to the kvfree_rcu batching when the sheaves path fails, and falls back to defer_kfree_rcu() instead. In most cases, the sheaves path is expected to succeed and it's unnecessary to add complexity to the existing kvfree_rcu batching. Since defer_kfree_rcu() can be called on caches without sheaves, move deferred_work_barrier() and rcu_barrier() outside the branch in kvfree_rcu_barrier_on_cache(). Signed-off-by: Harry Yoo (Oracle) <[email protected]> --- include/linux/rcupdate.h | 12 ++++++++++++ mm/slab.h | 1 + mm/slab_common.c | 22 ++++++++++++++++++++-- mm/slub.c | 23 ++++++++++++++++++++++- 4 files changed, 55 insertions(+), 3 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 5e95acc33989..3025249bfcb5 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1099,6 +1099,7 @@ static inline void rcu_read_unlock_migrate(void) * In mm/slab_common.c, no suitable header to include here. */ void kvfree_call_rcu(struct rcu_head *head, void *ptr); +void kfree_call_rcu_nolock(struct rcu_head *head, void *ptr); /* * The BUILD_BUG_ON() makes sure the rcu_head offset can be handled. See the @@ -1122,6 +1123,17 @@ do { \ kvfree_call_rcu(NULL, (void *) (___p)); \ } while (0) +/* kfree_rcu_nolock() supports 2-arg variant only */ +#define kfree_rcu_nolock(ptr, krhf) \ +do { \ + typeof (ptr) ___p = (ptr); \ + \ + if (___p) { \ + BUILD_BUG_ON(offsetof(typeof(*(ptr)), krhf) >= 4096); \ + kfree_call_rcu_nolock(&((___p)->krhf), (void *) (___p));\ + } \ +} while (0) + /* * Place this after a lock-acquisition primitive to guarantee that * an UNLOCK+LOCK pair acts as a full barrier. This guarantee applies diff --git a/mm/slab.h b/mm/slab.h index 961581e35ec8..a493c5201e96 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -745,6 +745,7 @@ void __check_heap_object(const void *ptr, unsigned long n, const struct slab *slab, bool to_user); void deferred_work_barrier(void); +void defer_kfree_rcu(struct rcu_head *head); static inline bool slub_debug_orig_size(struct kmem_cache *s) { diff --git a/mm/slab_common.c b/mm/slab_common.c index 807924a94fb0..5a39e6225160 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -1263,6 +1263,23 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); EXPORT_TRACEPOINT_SYMBOL(kfree); EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free); +void kfree_call_rcu_nolock(struct rcu_head *head, void *ptr) +{ + struct slab *slab; + struct kmem_cache *s; + + VM_WARN_ON_ONCE(is_vmalloc_addr(ptr) || !virt_to_slab(ptr)); + + slab = virt_to_slab(ptr); + s = slab->slab_cache; + + if (__kfree_rcu_sheaf(s, ptr, /* allow_spin = */ false)) + return; + + defer_kfree_rcu(head); +} +EXPORT_SYMBOL_GPL(kfree_call_rcu_nolock); + #ifndef CONFIG_KVFREE_RCU_BATCHED void kvfree_call_rcu(struct rcu_head *head, void *ptr) @@ -2120,10 +2137,11 @@ void kvfree_rcu_barrier_on_cache(struct kmem_cache *s) cpus_read_lock(); flush_rcu_sheaves_on_cache(s); cpus_read_unlock(); - deferred_work_barrier(); - rcu_barrier(); } + /* kfree_rcu_nolock() might have deferred frees even without sheaves */ + deferred_work_barrier(); + rcu_barrier(); __kvfree_rcu_barrier(); } EXPORT_SYMBOL_GPL(kvfree_rcu_barrier_on_cache); diff --git a/mm/slub.c b/mm/slub.c index 4850629774b2..19018a979445 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4075,6 +4075,7 @@ static void flush_all(struct kmem_cache *s) struct deferred_percpu_work { struct llist_head objects; + struct llist_head objects_by_rcu; struct llist_head rcu_sheaves; struct irq_work work; }; @@ -4083,6 +4084,7 @@ static void deferred_percpu_work_fn(struct irq_work *work); static DEFINE_PER_CPU(struct deferred_percpu_work, deferred_percpu_work) = { .objects = LLIST_HEAD_INIT(objects), + .objects_by_rcu = LLIST_HEAD_INIT(objects_by_rcu), .rcu_sheaves = LLIST_HEAD_INIT(rcu_sheaves), .work = IRQ_WORK_INIT(deferred_percpu_work_fn), }; @@ -6392,12 +6394,13 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p) static void deferred_percpu_work_fn(struct irq_work *work) { struct deferred_percpu_work *dpw; - struct llist_head *objs, *rcu_sheaves; + struct llist_head *objs, *objs_by_rcu, *rcu_sheaves; struct llist_node *llnode, *pos, *t; dpw = container_of(work, struct deferred_percpu_work, work); rcu_sheaves = &dpw->rcu_sheaves; objs = &dpw->objects; + objs_by_rcu = &dpw->objects_by_rcu; llnode = llist_del_all(objs); llist_for_each_safe(pos, t, llnode) { @@ -6428,6 +6431,13 @@ static void deferred_percpu_work_fn(struct irq_work *work) call_rcu(&rcu_sheaf->rcu_head, rcu_free_sheaf); } + + llnode = llist_del_all(objs_by_rcu); + llist_for_each_safe(pos, t, llnode) { + struct rcu_head *head = (struct rcu_head *)pos; + + call_rcu(head, kvfree_rcu_cb); + } } static void defer_free(struct kmem_cache *s, void *head) @@ -6443,6 +6453,17 @@ static void defer_free(struct kmem_cache *s, void *head) irq_work_queue(&dpw->work); } +void defer_kfree_rcu(struct rcu_head *head) +{ + struct deferred_percpu_work *dpw; + + guard(preempt)(); + + dpw = this_cpu_ptr(&deferred_percpu_work); + if (llist_add((struct llist_node *)head, &dpw->objects_by_rcu)) + irq_work_queue(&dpw->work); +} + void deferred_work_barrier(void) { int cpu; -- 2.53.0

