Dear RT folks!

I'm pleased to announce the v5.0.21-rt13 patch set. 

Changes since v5.0.21-rt12:

  - A patch by Kirill Smelkov to avoid deadlock in the switchtec driver.

  - Rework of the hrtimer, timer and posix-timer cancelation interface
    on -RT. Instead of the swait/schedule interface we now have locks
    which are taken while timer is active. During the cancellation of an
    active timer the lock is acquired. The lock will then either
    PI-boost the timer or block and wait until the timer completed.
    The new code looks simpler and does not trigger a warning from
    rcu_note_context_switch() anymore like reported by Grygorii Strashko
    and Daniel Wagner.
    The patches were contributed by Anna-Maria Gleixner.

  - Drop a preempt_disable_rt() statement in get_nohz_timer_target().
    The caller holds a lock which already disables preemption.

  - tasklet_kill() could deadlock since the softirq rework if the task
    invoking tasklet_kill() preempted the active tasklet.

  - in_softirq() (and related functions) did not work as expected since
    the softirq rework.

  - RCU_FAST_NO_HZ was disabled on RT because a timer was used in a bad
    context. After double checking this is no longer the case and the
    option can be enabled (but it depends on RCU_EXPERT so be careful).

  - The option "rcu.rcu_normal_after_boot=1" is set by default on RT.
    Now it is not possible to disable it on command line. Suggested by
    Paul E. McKenney.

  - Backport a patch from upstream to introduce
    user_access_{save,restore}() which is needed due to a backport made
    by stable.

Known issues
     - rcutorture is currently broken on -RT. Reported by Juri Lelli.

The delta patch against v5.0.21-rt12 is appended below and can be found here:
 
     
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.0/incr/patch-5.0.21-rt12-rt13.patch.xz

You can get this release via the git tree at:

    git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git 
v5.0.21-rt13

The RT patch against v5.0.21 can be found here:

    
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.0/older/patch-5.0.21-rt13.patch.xz

The split quilt queue is available at:

    
https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.0/older/patches-5.0.21-rt13.tar.xz

Sebastian

diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index db333300bd4be..6cfe431710203 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -58,6 +58,23 @@ static __always_inline void stac(void)
        alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
 }
 
+static __always_inline unsigned long smap_save(void)
+{
+       unsigned long flags;
+
+       asm volatile (ALTERNATIVE("", "pushf; pop %0; " __stringify(__ASM_CLAC),
+                                 X86_FEATURE_SMAP)
+                     : "=rm" (flags) : : "memory", "cc");
+
+       return flags;
+}
+
+static __always_inline void smap_restore(unsigned long flags)
+{
+       asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP)
+                     : : "g" (flags) : "memory", "cc");
+}
+
 /* These macros can be used in asm() statements */
 #define ASM_CLAC \
        ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
@@ -69,6 +86,9 @@ static __always_inline void stac(void)
 static inline void clac(void) { }
 static inline void stac(void) { }
 
+static inline unsigned long smap_save(void) { return 0; }
+static inline void smap_restore(unsigned long flags) { }
+
 #define ASM_CLAC
 #define ASM_STAC
 
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 2b0dd1b9c2087..743e1a96cd6ea 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -720,6 +720,9 @@ static __must_check inline bool user_access_begin(const 
void __user *ptr, size_t
 #define user_access_begin(a,b) user_access_begin(a,b)
 #define user_access_end()      __uaccess_end()
 
+#define user_access_save()     smap_save()
+#define user_access_restore(x) smap_restore(x)
+
 #define unsafe_put_user(x, ptr, label) \
        __put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label)
 
diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index 80823ad221ba5..ba17eaa410f96 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -392,7 +392,7 @@ static int switchtec_dev_open(struct inode *inode, struct 
file *filp)
                return PTR_ERR(stuser);
 
        filp->private_data = stuser;
-       nonseekable_open(inode, filp);
+       stream_open(inode, filp);
 
        dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser);
 
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 190cb85044112..86ce98700f323 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -471,10 +471,11 @@ static int do_timerfd_settime(int ufd, int flags,
                                break;
                }
                spin_unlock_irq(&ctx->wqh.lock);
+
                if (isalarm(ctx))
-                       hrtimer_wait_for_timer(&ctx->t.alarm.timer);
+                       hrtimer_grab_expiry_lock(&ctx->t.alarm.timer);
                else
-                       hrtimer_wait_for_timer(&ctx->t.tmr);
+                       hrtimer_grab_expiry_lock(&ctx->t.tmr);
        }
 
        /*
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 34e9c6b74ae0a..c737e15ea6536 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -19,7 +19,6 @@
 #include <linux/percpu.h>
 #include <linux/timer.h>
 #include <linux/timerqueue.h>
-#include <linux/wait.h>
 
 struct hrtimer_clock_base;
 struct hrtimer_cpu_base;
@@ -190,6 +189,8 @@ enum  hrtimer_base_type {
  * @nr_retries:                Total number of hrtimer interrupt retries
  * @nr_hangs:          Total number of hrtimer interrupt hangs
  * @max_hang_time:     Maximum time spent in hrtimer_interrupt
+ * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are
+ *                      expired
  * @expires_next:      absolute time of the next event, is required for remote
  *                     hrtimer enqueue; it is the total first expiry time (hard
  *                     and soft hrtimer are taken into account)
@@ -217,12 +218,10 @@ struct hrtimer_cpu_base {
        unsigned short                  nr_hangs;
        unsigned int                    max_hang_time;
 #endif
+       spinlock_t                      softirq_expiry_lock;
        ktime_t                         expires_next;
        struct hrtimer                  *next_timer;
        ktime_t                         softirq_expires_next;
-#ifdef CONFIG_PREEMPT_RT_BASE
-       wait_queue_head_t               wait;
-#endif
        struct hrtimer                  *softirq_next_timer;
        struct hrtimer_clock_base       clock_base[HRTIMER_MAX_CLOCK_BASES];
 } ____cacheline_aligned;
@@ -423,6 +422,7 @@ static inline void hrtimer_start(struct hrtimer *timer, 
ktime_t tim,
 
 extern int hrtimer_cancel(struct hrtimer *timer);
 extern int hrtimer_try_to_cancel(struct hrtimer *timer);
+extern void hrtimer_grab_expiry_lock(const struct hrtimer *timer);
 
 static inline void hrtimer_start_expires(struct hrtimer *timer,
                                         enum hrtimer_mode mode)
@@ -440,13 +440,6 @@ static inline void hrtimer_restart(struct hrtimer *timer)
        hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
 }
 
-/* Softirq preemption could deadlock timer removal */
-#ifdef CONFIG_PREEMPT_RT_BASE
-  extern void hrtimer_wait_for_timer(const struct hrtimer *timer);
-#else
-# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0)
-#endif
-
 /* Query timers: */
 extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool 
adjust);
 
@@ -472,7 +465,7 @@ static inline int hrtimer_is_queued(struct hrtimer *timer)
  * Helper function to check, whether the timer is running the callback
  * function
  */
-static inline int hrtimer_callback_running(const struct hrtimer *timer)
+static inline int hrtimer_callback_running(struct hrtimer *timer)
 {
        return timer->base->running == timer;
 }
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 9329de0d8bfdd..64762225e1756 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -596,7 +596,10 @@ static inline void tasklet_unlock(struct tasklet_struct *t)
 
 static inline void tasklet_unlock_wait(struct tasklet_struct *t)
 {
-       while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
+       while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
+               local_bh_disable();
+               local_bh_enable();
+       }
 }
 #else
 #define tasklet_trylock(t) 1
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 05cd7466d10a8..d3552a5bcc8b2 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -15,6 +15,7 @@ struct cpu_timer_list {
        u64 expires, incr;
        struct task_struct *task;
        int firing;
+       int firing_cpu;
 };
 
 /*
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 6a4884268f4c9..d559e3a0379c2 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -80,14 +80,6 @@
 #define hardirq_count()        (preempt_count() & HARDIRQ_MASK)
 #define irq_count()    (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
                                 | NMI_MASK))
-#ifdef CONFIG_PREEMPT_RT_FULL
-
-long softirq_count(void);
-
-#else
-#define softirq_count()        (preempt_count() & SOFTIRQ_MASK)
-#endif
-
 /*
  * Are we doing bottom half or hardware interrupt processing?
  *
@@ -102,12 +94,23 @@ long softirq_count(void);
  *       should not be used in new code.
  */
 #define in_irq()               (hardirq_count())
-#define in_softirq()           (softirq_count())
 #define in_interrupt()         (irq_count())
-#define in_serving_softirq()   (softirq_count() & SOFTIRQ_OFFSET)
 #define in_nmi()               (preempt_count() & NMI_MASK)
 #define in_task()              (!(preempt_count() & \
                                   (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
+#ifdef CONFIG_PREEMPT_RT_FULL
+
+#define softirq_count()                ((long)get_current()->softirq_count)
+#define in_softirq()           (softirq_count())
+#define in_serving_softirq()   (get_current()->softirq_count & SOFTIRQ_OFFSET)
+
+#else
+
+#define softirq_count()                (preempt_count() & SOFTIRQ_MASK)
+#define in_softirq()           (softirq_count())
+#define in_serving_softirq()   (softirq_count() & SOFTIRQ_OFFSET)
+
+#endif
 
 /*
  * The preempt_count offset after preempt_disable();
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e1ea2ea52feb0..8c5bc47f934c3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -961,6 +961,9 @@ struct task_struct {
        int                             softirqs_enabled;
        int                             softirq_context;
 #endif
+#ifdef CONFIG_PREEMPT_RT_FULL
+       int                             softirq_count;
+#endif
 
 #ifdef CONFIG_LOCKDEP
 # define MAX_LOCK_DEPTH                        48UL
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index d3afc0f018147..7f7356e151ce3 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -270,6 +270,8 @@ extern long strncpy_from_unsafe(char *dst, const void 
*unsafe_addr, long count);
 #define user_access_end() do { } while (0)
 #define unsafe_get_user(x, ptr, err) do { if (unlikely(__get_user(x, ptr))) 
goto err; } while (0)
 #define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) 
goto err; } while (0)
+static inline unsigned long user_access_save(void) { return 0UL; }
+static inline void user_access_restore(unsigned long flags) { }
 #endif
 
 #ifdef CONFIG_HARDENED_USERCOPY
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 7e36ea9b7b720..5f5a54714a7a3 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -172,7 +172,7 @@ config RCU_FANOUT_LEAF
 
 config RCU_FAST_NO_HZ
        bool "Accelerate last non-dyntick-idle CPU's grace periods"
-       depends on NO_HZ_COMMON && SMP && RCU_EXPERT && !PREEMPT_RT_FULL
+       depends on NO_HZ_COMMON && SMP && RCU_EXPERT
        default n
        help
          This option permits CPUs to enter dynticks-idle state even if
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 3700b730ea55d..aae5968ec9ebb 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -69,7 +69,9 @@ module_param(rcu_expedited, int, 0);
 extern int rcu_normal; /* from sysctl */
 module_param(rcu_normal, int, 0);
 static int rcu_normal_after_boot = IS_ENABLED(CONFIG_PREEMPT_RT_FULL);
+#ifndef CONFIG_PREEMPT_RT_FULL
 module_param(rcu_normal_after_boot, int, 0);
+#endif
 #endif /* #ifndef CONFIG_TINY_RCU */
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a8493ff60b673..2bd114e788a10 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -570,14 +570,11 @@ void resched_cpu(int cpu)
  */
 int get_nohz_timer_target(void)
 {
-       int i, cpu;
+       int i, cpu = smp_processor_id();
        struct sched_domain *sd;
 
-       preempt_disable_rt();
-       cpu = smp_processor_id();
-
        if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER))
-               goto preempt_en_rt;
+               return cpu;
 
        rcu_read_lock();
        for_each_domain(cpu, sd) {
@@ -596,8 +593,6 @@ int get_nohz_timer_target(void)
                cpu = housekeeping_any_cpu(HK_FLAG_TIMER);
 unlock:
        rcu_read_unlock();
-preempt_en_rt:
-       preempt_enable_rt();
        return cpu;
 }
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 473369122ddd0..c4fae96f23c54 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -109,12 +109,6 @@ static bool ksoftirqd_running(unsigned long pending)
 static DEFINE_LOCAL_IRQ_LOCK(bh_lock);
 static DEFINE_PER_CPU(long, softirq_counter);
 
-long softirq_count(void)
-{
-       return raw_cpu_read(softirq_counter);
-}
-EXPORT_SYMBOL(softirq_count);
-
 void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
 {
        unsigned long __maybe_unused flags;
@@ -125,6 +119,7 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int 
cnt)
                local_lock(bh_lock);
        soft_cnt = this_cpu_inc_return(softirq_counter);
        WARN_ON_ONCE(soft_cnt == 0);
+       current->softirq_count += SOFTIRQ_DISABLE_OFFSET;
 
 #ifdef CONFIG_TRACE_IRQFLAGS
        local_irq_save(flags);
@@ -155,6 +150,7 @@ void _local_bh_enable(void)
        local_irq_restore(flags);
 #endif
 
+       current->softirq_count -= SOFTIRQ_DISABLE_OFFSET;
        if (!in_atomic())
                local_unlock(bh_lock);
 }
@@ -192,6 +188,7 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int 
cnt)
        if (!in_atomic())
                local_unlock(bh_lock);
 
+       current->softirq_count -= SOFTIRQ_DISABLE_OFFSET;
        preempt_check_resched();
 }
 EXPORT_SYMBOL(__local_bh_enable_ip);
@@ -365,7 +362,9 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
        pending = local_softirq_pending();
        account_irq_enter_time(current);
 
-#ifndef CONFIG_PREEMPT_RT_FULL
+#ifdef CONFIG_PREEMPT_RT_FULL
+       current->softirq_count |= SOFTIRQ_OFFSET;
+#else
        __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
 #endif
        in_hardirq = lockdep_softirq_start();
@@ -418,7 +417,9 @@ asmlinkage __visible void __softirq_entry __do_softirq(void)
 
        lockdep_softirq_end(in_hardirq);
        account_irq_exit_time(current);
-#ifndef CONFIG_PREEMPT_RT_FULL
+#ifdef CONFIG_PREEMPT_RT_FULL
+       current->softirq_count &= ~SOFTIRQ_OFFSET;
+#else
        __local_bh_enable(SOFTIRQ_OFFSET);
 #endif
        WARN_ON_ONCE(in_interrupt());
@@ -468,7 +469,7 @@ void irq_enter(void)
 
 static inline void invoke_softirq(void)
 {
-       if (softirq_count() == 0)
+       if (this_cpu_read(softirq_counter) == 0)
                wakeup_softirqd();
 }
 
@@ -552,7 +553,7 @@ void raise_softirq_irqoff(unsigned int nr)
         * If were are not in BH-disabled section then we have to wake
         * ksoftirqd.
         */
-       if (softirq_count() == 0)
+       if (this_cpu_read(softirq_counter) == 0)
                wakeup_softirqd();
 }
 
@@ -704,7 +705,8 @@ void tasklet_kill(struct tasklet_struct *t)
 
        while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
                do {
-                       yield();
+                       local_bh_disable();
+                       local_bh_enable();
                } while (test_bit(TASKLET_STATE_SCHED, &t->state));
        }
        tasklet_unlock_wait(t);
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index f6cd4bed61846..9f17e011087eb 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -433,7 +433,7 @@ int alarm_cancel(struct alarm *alarm)
                int ret = alarm_try_to_cancel(alarm);
                if (ret >= 0)
                        return ret;
-               hrtimer_wait_for_timer(&alarm->timer);
+               hrtimer_grab_expiry_lock(&alarm->timer);
        }
 }
 EXPORT_SYMBOL_GPL(alarm_cancel);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 6e5d62bdebf22..2067f461b12a3 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -930,33 +930,16 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, 
ktime_t interval)
 }
 EXPORT_SYMBOL_GPL(hrtimer_forward);
 
-#ifdef CONFIG_PREEMPT_RT_BASE
-# define wake_up_timer_waiters(b)      wake_up(&(b)->wait)
-
-/**
- * hrtimer_wait_for_timer - Wait for a running timer
- *
- * @timer:     timer to wait for
- *
- * The function waits in case the timers callback function is
- * currently executed on the waitqueue of the timer base. The
- * waitqueue is woken up after the timer callback function has
- * finished execution.
- */
-void hrtimer_wait_for_timer(const struct hrtimer *timer)
+void hrtimer_grab_expiry_lock(const struct hrtimer *timer)
 {
        struct hrtimer_clock_base *base = timer->base;
 
-       if (base && base->cpu_base &&
-           base->index >= HRTIMER_BASE_MONOTONIC_SOFT)
-               wait_event(base->cpu_base->wait,
-                               !(hrtimer_callback_running(timer)));
+       if (base && base->cpu_base) {
+               spin_lock(&base->cpu_base->softirq_expiry_lock);
+               spin_unlock(&base->cpu_base->softirq_expiry_lock);
+       }
 }
 
-#else
-# define wake_up_timer_waiters(b)      do { } while (0)
-#endif
-
 /*
  * enqueue_hrtimer - internal function to (re)start a timer
  *
@@ -1191,7 +1174,7 @@ int hrtimer_cancel(struct hrtimer *timer)
 
                if (ret >= 0)
                        return ret;
-               hrtimer_wait_for_timer(timer);
+               hrtimer_grab_expiry_lock(timer);
        }
 }
 EXPORT_SYMBOL_GPL(hrtimer_cancel);
@@ -1495,6 +1478,7 @@ static __latent_entropy void hrtimer_run_softirq(struct 
softirq_action *h)
        unsigned long flags;
        ktime_t now;
 
+       spin_lock(&cpu_base->softirq_expiry_lock);
        raw_spin_lock_irqsave(&cpu_base->lock, flags);
 
        now = hrtimer_update_base(cpu_base);
@@ -1504,7 +1488,7 @@ static __latent_entropy void hrtimer_run_softirq(struct 
softirq_action *h)
        hrtimer_update_softirq_timer(cpu_base, true);
 
        raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
-       wake_up_timer_waiters(cpu_base);
+       spin_unlock(&cpu_base->softirq_expiry_lock);
 }
 
 #ifdef CONFIG_HIGH_RES_TIMERS
@@ -1914,9 +1898,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
        cpu_base->softirq_next_timer = NULL;
        cpu_base->expires_next = KTIME_MAX;
        cpu_base->softirq_expires_next = KTIME_MAX;
-#ifdef CONFIG_PREEMPT_RT_BASE
-       init_waitqueue_head(&cpu_base->wait);
-#endif
+       spin_lock_init(&cpu_base->softirq_expiry_lock);
        return 0;
 }
 
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index cb2b301d05490..d999294adeefd 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -213,7 +213,7 @@ int do_setitimer(int which, struct itimerval *value, struct 
itimerval *ovalue)
                /* We are sharing ->siglock with it_real_fn() */
                if (hrtimer_try_to_cancel(timer) < 0) {
                        spin_unlock_irq(&tsk->sighand->siglock);
-                       hrtimer_wait_for_timer(&tsk->signal->real_timer);
+                       hrtimer_grab_expiry_lock(timer);
                        goto again;
                }
                expires = timeval_to_ktime(value->it_value);
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index a436ee592737a..5bb1edffe0d05 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -789,6 +789,7 @@ check_timers_list(struct list_head *timers,
                        return t->expires;
 
                t->firing = 1;
+               t->firing_cpu = smp_processor_id();
                list_move_tail(&t->entry, firing);
        }
 
@@ -1131,6 +1132,20 @@ static inline int fastpath_timer_check(struct 
task_struct *tsk)
        return 0;
 }
 
+static DEFINE_PER_CPU(spinlock_t, cpu_timer_expiry_lock) = 
__SPIN_LOCK_UNLOCKED(cpu_timer_expiry_lock);
+
+void cpu_timers_grab_expiry_lock(struct k_itimer *timer)
+{
+       int cpu = timer->it.cpu.firing_cpu;
+
+       if (cpu >= 0) {
+               spinlock_t *expiry_lock = per_cpu_ptr(&cpu_timer_expiry_lock, 
cpu);
+
+               spin_lock_irq(expiry_lock);
+               spin_unlock_irq(expiry_lock);
+       }
+}
+
 /*
  * This is called from the timer interrupt handler.  The irq handler has
  * already updated our counts.  We need to check if any timers fire now.
@@ -1141,6 +1156,7 @@ static void __run_posix_cpu_timers(struct task_struct 
*tsk)
        LIST_HEAD(firing);
        struct k_itimer *timer, *next;
        unsigned long flags;
+       spinlock_t *expiry_lock;
 
        /*
         * The fast path checks that there are no expired thread or thread
@@ -1149,6 +1165,9 @@ static void __run_posix_cpu_timers(struct task_struct 
*tsk)
        if (!fastpath_timer_check(tsk))
                return;
 
+       expiry_lock = this_cpu_ptr(&cpu_timer_expiry_lock);
+       spin_lock(expiry_lock);
+
        if (!lock_task_sighand(tsk, &flags))
                return;
        /*
@@ -1183,6 +1202,7 @@ static void __run_posix_cpu_timers(struct task_struct 
*tsk)
                list_del_init(&timer->it.cpu.entry);
                cpu_firing = timer->it.cpu.firing;
                timer->it.cpu.firing = 0;
+               timer->it.cpu.firing_cpu = -1;
                /*
                 * The firing flag is -1 if we collided with a reset
                 * of the timer, which already reported this
@@ -1192,6 +1212,7 @@ static void __run_posix_cpu_timers(struct task_struct 
*tsk)
                        cpu_timer_fire(timer);
                spin_unlock(&timer->it_lock);
        }
+       spin_unlock(expiry_lock);
 }
 
 #ifdef CONFIG_PREEMPT_RT_BASE
@@ -1457,6 +1478,8 @@ static int do_cpu_nanosleep(const clockid_t which_clock, 
int flags,
                spin_unlock_irq(&timer.it_lock);
 
                while (error == TIMER_RETRY) {
+
+                       cpu_timers_grab_expiry_lock(&timer);
                        /*
                         * We need to handle case when timer was or is in the
                         * middle of firing. In other cases we already freed
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index a4f57a1ea0df2..2c1ca3cc391b2 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -800,27 +800,22 @@ static void common_hrtimer_arm(struct k_itimer *timr, 
ktime_t expires,
                hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
 }
 
-/*
- * Protected by RCU!
- */
-static void timer_wait_for_callback(const struct k_clock *kc, struct k_itimer 
*timr)
-{
-#ifdef CONFIG_PREEMPT_RT_FULL
-       if (kc->timer_arm == common_hrtimer_arm)
-               hrtimer_wait_for_timer(&timr->it.real.timer);
-       else if (kc == &alarm_clock)
-               hrtimer_wait_for_timer(&timr->it.alarm.alarmtimer.timer);
-       else
-               /* FIXME: Whacky hack for posix-cpu-timers */
-               schedule_timeout(1);
-#endif
-}
-
 static int common_hrtimer_try_to_cancel(struct k_itimer *timr)
 {
        return hrtimer_try_to_cancel(&timr->it.real.timer);
 }
 
+static void timer_wait_for_callback(const struct k_clock *kc, struct k_itimer 
*timer)
+{
+       if (kc->timer_arm == common_hrtimer_arm)
+               hrtimer_grab_expiry_lock(&timer->it.real.timer);
+       else if (kc == &alarm_clock)
+               hrtimer_grab_expiry_lock(&timer->it.alarm.alarmtimer.timer);
+       else
+               /* posix-cpu-timers */
+               cpu_timers_grab_expiry_lock(timer);
+}
+
 /* Set a POSIX.1b interval timer. */
 int common_timer_set(struct k_itimer *timr, int flags,
                     struct itimerspec64 *new_setting,
@@ -880,21 +875,21 @@ static int do_timer_settime(timer_t timer_id, int flags,
        if (!timr)
                return -EINVAL;
 
-       rcu_read_lock();
        kc = timr->kclock;
        if (WARN_ON_ONCE(!kc || !kc->timer_set))
                error = -EINVAL;
        else
                error = kc->timer_set(timr, flags, new_spec64, old_spec64);
 
-       unlock_timer(timr, flag);
        if (error == TIMER_RETRY) {
+               rcu_read_lock();
+               unlock_timer(timr, flag);
                timer_wait_for_callback(kc, timr);
-               old_spec64 = NULL;      // We already got the old time...
                rcu_read_unlock();
+               old_spec64 = NULL;      // We already got the old time...
                goto retry;
        }
-       rcu_read_unlock();
+       unlock_timer(timr, flag);
 
        return error;
 }
@@ -956,13 +951,21 @@ int common_timer_del(struct k_itimer *timer)
        return 0;
 }
 
-static inline int timer_delete_hook(struct k_itimer *timer)
+static int timer_delete_hook(struct k_itimer *timer)
 {
        const struct k_clock *kc = timer->kclock;
+       int ret;
 
        if (WARN_ON_ONCE(!kc || !kc->timer_del))
                return -EINVAL;
-       return kc->timer_del(timer);
+       ret = kc->timer_del(timer);
+       if (ret == TIMER_RETRY) {
+               rcu_read_lock();
+               spin_unlock_irq(&timer->it_lock);
+               timer_wait_for_callback(kc, timer);
+               rcu_read_unlock();
+       }
+       return ret;
 }
 
 /* Delete a POSIX.1b interval timer. */
@@ -976,15 +979,8 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id)
        if (!timer)
                return -EINVAL;
 
-       rcu_read_lock();
-       if (timer_delete_hook(timer) == TIMER_RETRY) {
-               unlock_timer(timer, flags);
-               timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
-                                       timer);
-               rcu_read_unlock();
+       if (timer_delete_hook(timer) == TIMER_RETRY)
                goto retry_delete;
-       }
-       rcu_read_unlock();
 
        spin_lock(&current->sighand->siglock);
        list_del(&timer->list);
@@ -1010,20 +1006,9 @@ static void itimer_delete(struct k_itimer *timer)
 retry_delete:
        spin_lock_irqsave(&timer->it_lock, flags);
 
-       /* On RT we can race with a deletion */
-       if (!timer->it_signal) {
-               unlock_timer(timer, flags);
-               return;
-       }
-
-       if (timer_delete_hook(timer) == TIMER_RETRY) {
-               rcu_read_lock();
-               unlock_timer(timer, flags);
-               timer_wait_for_callback(clockid_to_kclock(timer->it_clock),
-                                       timer);
-               rcu_read_unlock();
+       if (timer_delete_hook(timer) == TIMER_RETRY)
                goto retry_delete;
-       }
+
        list_del(&timer->list);
        /*
         * This keeps any tasks waiting on the spin lock from thinking
diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h
index ddb21145211a0..725bd230a8db4 100644
--- a/kernel/time/posix-timers.h
+++ b/kernel/time/posix-timers.h
@@ -32,6 +32,8 @@ extern const struct k_clock clock_process;
 extern const struct k_clock clock_thread;
 extern const struct k_clock alarm_clock;
 
+extern void cpu_timers_grab_expiry_lock(struct k_itimer *timer);
+
 int posix_timer_event(struct k_itimer *timr, int si_private);
 
 void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 227dba00dd0ef..0b5f07c2fa834 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -43,7 +43,6 @@
 #include <linux/sched/debug.h>
 #include <linux/slab.h>
 #include <linux/compat.h>
-#include <linux/swait.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -197,9 +196,7 @@ EXPORT_SYMBOL(jiffies_64);
 struct timer_base {
        raw_spinlock_t          lock;
        struct timer_list       *running_timer;
-#ifdef CONFIG_PREEMPT_RT_FULL
-       struct swait_queue_head wait_for_running_timer;
-#endif
+       spinlock_t              expiry_lock;
        unsigned long           clk;
        unsigned long           next_expiry;
        unsigned int            cpu;
@@ -1181,33 +1178,6 @@ void add_timer_on(struct timer_list *timer, int cpu)
 }
 EXPORT_SYMBOL_GPL(add_timer_on);
 
-#ifdef CONFIG_PREEMPT_RT_FULL
-/*
- * Wait for a running timer
- */
-static void wait_for_running_timer(struct timer_list *timer)
-{
-       struct timer_base *base;
-       u32 tf = timer->flags;
-
-       if (tf & TIMER_MIGRATING)
-               return;
-
-       base = get_timer_base(tf);
-       swait_event_exclusive(base->wait_for_running_timer,
-                             base->running_timer != timer);
-}
-
-# define wakeup_timer_waiters(b)       
swake_up_all(&(b)->wait_for_running_timer)
-#else
-static inline void wait_for_running_timer(struct timer_list *timer)
-{
-       cpu_relax();
-}
-
-# define wakeup_timer_waiters(b)       do { } while (0)
-#endif
-
 /**
  * del_timer - deactivate a timer.
  * @timer: the timer to be deactivated
@@ -1237,6 +1207,25 @@ int del_timer(struct timer_list *timer)
 }
 EXPORT_SYMBOL(del_timer);
 
+static int __try_to_del_timer_sync(struct timer_list *timer,
+                                  struct timer_base **basep)
+{
+       struct timer_base *base;
+       unsigned long flags;
+       int ret = -1;
+
+       debug_assert_init(timer);
+
+       *basep = base = lock_timer_base(timer, &flags);
+
+       if (base->running_timer != timer)
+               ret = detach_if_pending(timer, base, true);
+
+       raw_spin_unlock_irqrestore(&base->lock, flags);
+
+       return ret;
+}
+
 /**
  * try_to_del_timer_sync - Try to deactivate a timer
  * @timer: timer to delete
@@ -1247,23 +1236,31 @@ EXPORT_SYMBOL(del_timer);
 int try_to_del_timer_sync(struct timer_list *timer)
 {
        struct timer_base *base;
-       unsigned long flags;
-       int ret = -1;
 
-       debug_assert_init(timer);
-
-       base = lock_timer_base(timer, &flags);
-
-       if (base->running_timer != timer)
-               ret = detach_if_pending(timer, base, true);
-
-       raw_spin_unlock_irqrestore(&base->lock, flags);
-
-       return ret;
+       return __try_to_del_timer_sync(timer, &base);
 }
 EXPORT_SYMBOL(try_to_del_timer_sync);
 
 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL)
+static int __del_timer_sync(struct timer_list *timer)
+{
+       struct timer_base *base;
+       int ret;
+
+       for (;;) {
+               ret = __try_to_del_timer_sync(timer, &base);
+               if (ret >= 0)
+                       return ret;
+
+               /*
+                * When accessing the lock, timers of base are no longer expired
+                * and so timer is no longer running.
+                */
+               spin_lock(&base->expiry_lock);
+               spin_unlock(&base->expiry_lock);
+       }
+}
+
 /**
  * del_timer_sync - deactivate a timer and wait for the handler to finish.
  * @timer: the timer to be deactivated
@@ -1319,12 +1316,8 @@ int del_timer_sync(struct timer_list *timer)
         * could lead to deadlock.
         */
        WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
-       for (;;) {
-               int ret = try_to_del_timer_sync(timer);
-               if (ret >= 0)
-                       return ret;
-               wait_for_running_timer(timer);
-       }
+
+       return __del_timer_sync(timer);
 }
 EXPORT_SYMBOL(del_timer_sync);
 #endif
@@ -1389,11 +1382,15 @@ static void expire_timers(struct timer_base *base, 
struct hlist_head *head)
                        raw_spin_unlock(&base->lock);
                        call_timer_fn(timer, fn);
                        base->running_timer = NULL;
+                       spin_unlock(&base->expiry_lock);
+                       spin_lock(&base->expiry_lock);
                        raw_spin_lock(&base->lock);
                } else {
                        raw_spin_unlock_irq(&base->lock);
                        call_timer_fn(timer, fn);
                        base->running_timer = NULL;
+                       spin_unlock(&base->expiry_lock);
+                       spin_lock(&base->expiry_lock);
                        raw_spin_lock_irq(&base->lock);
                }
        }
@@ -1688,6 +1685,7 @@ static inline void __run_timers(struct timer_base *base)
        if (!time_after_eq(jiffies, base->clk))
                return;
 
+       spin_lock(&base->expiry_lock);
        raw_spin_lock_irq(&base->lock);
 
        /*
@@ -1715,7 +1713,7 @@ static inline void __run_timers(struct timer_base *base)
                        expire_timers(base, heads + levels);
        }
        raw_spin_unlock_irq(&base->lock);
-       wakeup_timer_waiters(base);
+       spin_unlock(&base->expiry_lock);
 }
 
 /*
@@ -1962,9 +1960,7 @@ static void __init init_timer_cpu(int cpu)
                base->cpu = cpu;
                raw_spin_lock_init(&base->lock);
                base->clk = jiffies;
-#ifdef CONFIG_PREEMPT_RT_FULL
-               init_swait_queue_head(&base->wait_for_running_timer);
-#endif
+               spin_lock_init(&base->expiry_lock);
        }
 }
 
diff --git a/localversion-rt b/localversion-rt
index 6e44e540b927b..9f7d0bdbffb18 100644
--- a/localversion-rt
+++ b/localversion-rt
@@ -1 +1 @@
--rt12
+-rt13

Reply via email to