On Wed, 2021-04-14 at 08:15 +0200, Mike Galbraith wrote:
> On Wed, 2021-04-14 at 07:26 +0200, Dmitry Vyukov wrote:
> > On Wed, Apr 14, 2021 at 6:00 AM Mike Galbraith <efa...@gmx.de> wrote:
> >
> > > [    0.692437] BUG: sleeping function called from invalid context at 
> > > kernel/locking/rtmutex.c:943
> > > [    0.692439] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 1, 
> > > name: swapper/0
> > > [    0.692442] Preemption disabled at:
> > > [    0.692443] [<ffffffff811a1510>] on_each_cpu_cond_mask+0x30/0xb0
> > > [    0.692451] CPU: 5 PID: 1 Comm: swapper/0 Not tainted 
> > > 5.12.0.g2afefec-tip-rt #5
> > > [    0.692454] Hardware name: MEDION MS-7848/MS-7848, BIOS M7848W08.20C 
> > > 09/23/2013
> > > [    0.692456] Call Trace:
> > > [    0.692458]  ? on_each_cpu_cond_mask+0x30/0xb0
> > > [    0.692462]  dump_stack+0x8a/0xb5
> > > [    0.692467]  ___might_sleep.cold+0xfe/0x112
> > > [    0.692471]  rt_spin_lock+0x1c/0x60
> >
> > HI Mike,
> >
> > If freeing pages from smp_call_function is not OK, then perhaps we
> > need just to collect the objects to be freed to the task/CPU that
> > executes kasan_quarantine_remove_cache and it will free them (we know
> > it can free objects).
>
> Yeah, RT will have to shove freeing into preemptible context.

There's a very similar problem addressed in the RT patch set, so I used
the free samples on top of your *very* convenient hint that pesky
preallocation is optional, to seemingly make KASAN a happy RT camper.
Dunno if RT maintainers would prefer something like this over simply
disabling KASAN for RT configs, but what the heck, I'll show it.

kasan: make it RT aware

Skip preallocation when not possible for RT, and move cache removal
from IPI to synchronous work.

Signed-off-by: Mike Galbraith <efa...@gmx.de>
---
 lib/stackdepot.c      |   10 +++++-----
 mm/kasan/quarantine.c |   49 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 5 deletions(-)

--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -71,7 +71,7 @@ static void *stack_slabs[STACK_ALLOC_MAX
 static int depot_index;
 static int next_slab_inited;
 static size_t depot_offset;
-static DEFINE_SPINLOCK(depot_lock);
+static DEFINE_RAW_SPINLOCK(depot_lock);

 static bool init_stack_slab(void **prealloc)
 {
@@ -265,7 +265,7 @@ depot_stack_handle_t stack_depot_save(un
        struct page *page = NULL;
        void *prealloc = NULL;
        unsigned long flags;
-       u32 hash;
+       u32 hash, may_prealloc = !IS_ENABLED(CONFIG_PREEMPT_RT) || 
preemptible();

        if (unlikely(nr_entries == 0) || stack_depot_disable)
                goto fast_exit;
@@ -291,7 +291,7 @@ depot_stack_handle_t stack_depot_save(un
         * The smp_load_acquire() here pairs with smp_store_release() to
         * |next_slab_inited| in depot_alloc_stack() and init_stack_slab().
         */
-       if (unlikely(!smp_load_acquire(&next_slab_inited))) {
+       if (unlikely(!smp_load_acquire(&next_slab_inited) && may_prealloc)) {
                /*
                 * Zero out zone modifiers, as we don't have specific zone
                 * requirements. Keep the flags related to allocation in atomic
@@ -305,7 +305,7 @@ depot_stack_handle_t stack_depot_save(un
                        prealloc = page_address(page);
        }

-       spin_lock_irqsave(&depot_lock, flags);
+       raw_spin_lock_irqsave(&depot_lock, flags);

        found = find_stack(*bucket, entries, nr_entries, hash);
        if (!found) {
@@ -329,7 +329,7 @@ depot_stack_handle_t stack_depot_save(un
                WARN_ON(!init_stack_slab(&prealloc));
        }

-       spin_unlock_irqrestore(&depot_lock, flags);
+       raw_spin_unlock_irqrestore(&depot_lock, flags);
 exit:
        if (prealloc) {
                /* Nobody used this memory, ok to free it. */
--- a/mm/kasan/quarantine.c
+++ b/mm/kasan/quarantine.c
@@ -19,6 +19,9 @@
 #include <linux/srcu.h>
 #include <linux/string.h>
 #include <linux/types.h>
+#include <linux/cpu.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
 #include <linux/cpuhotplug.h>

 #include "../slab.h"
@@ -308,6 +311,48 @@ static void per_cpu_remove_cache(void *a
        qlist_free_all(&to_free, cache);
 }

+#ifdef CONFIG_PREEMPT_RT
+struct remove_cache_work {
+       struct work_struct work;
+       struct kmem_cache *cache;
+};
+
+static DEFINE_MUTEX(remove_caches_lock);
+static DEFINE_PER_CPU(struct remove_cache_work, remove_cache_work);
+
+static void per_cpu_remove_cache_work(struct work_struct *w)
+{
+       struct remove_cache_work *rcw;
+
+       rcw = container_of(w, struct remove_cache_work, work);
+       per_cpu_remove_cache(rcw->cache);
+}
+
+static void per_cpu_remove_caches_sync(struct kmem_cache *cache)
+{
+       struct remove_cache_work *rcw;
+       unsigned int cpu;
+
+       cpus_read_lock();
+       mutex_lock(&remove_caches_lock);
+
+       for_each_online_cpu(cpu) {
+               rcw = &per_cpu(remove_cache_work, cpu);
+               INIT_WORK(&rcw->work, per_cpu_remove_cache_work);
+               rcw->cache = cache;
+               schedule_work_on(cpu, &rcw->work);
+       }
+
+       for_each_online_cpu(cpu) {
+               rcw = &per_cpu(remove_cache_work, cpu);
+               flush_work(&rcw->work);
+       }
+
+       mutex_unlock(&remove_caches_lock);
+       cpus_read_unlock();
+}
+#endif
+
 /* Free all quarantined objects belonging to cache. */
 void kasan_quarantine_remove_cache(struct kmem_cache *cache)
 {
@@ -321,7 +366,11 @@ void kasan_quarantine_remove_cache(struc
         * achieves the first goal, while synchronize_srcu() achieves the
         * second.
         */
+#ifndef CONFIG_PREEMPT_RT
        on_each_cpu(per_cpu_remove_cache, cache, 1);
+#else
+       per_cpu_remove_caches_sync(cache);
+#endif

        raw_spin_lock_irqsave(&quarantine_lock, flags);
        for (i = 0; i < QUARANTINE_BATCHES; i++) {


Reply via email to