Linus,

please pull the latest timers-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
timers-urgent-for-linus

A pile of fixes for long standing issues with the timer wheel and the NOHZ
code:

  - Prevent timer base confusion accross the nohz switch, which can cause
    unlocked access and data corruption

  - Reinitialize the stale base clock on cpu hotplug to prevent subtle
    side effects including rollovers on 32bit

  - Prevent an interrupt storm when the timer softirq is already pending
    caused by tick_nohz_stop_sched_tick()

  - Move the timer start tracepoint to a place where it actually makes sense
  
  - Add documentation to timerqueue functions as they caused confusion
    several times now.

Thanks,

        tglx

------------------>
Anna-Maria Gleixner (1):
      timers: Use deferrable base independent of base::nohz_active

Thomas Gleixner (4):
      timers: Reinitialize per cpu bases on hotplug
      nohz: Prevent a timer interrupt storm in tick_nohz_stop_sched_tick()
      timers: Invoke timer_start_debug() where it makes sense
      timerqueue: Document return values of timerqueue_add/del()


 include/linux/cpuhotplug.h |  2 +-
 include/linux/timer.h      |  4 +++-
 kernel/cpu.c               |  4 ++--
 kernel/time/tick-sched.c   | 19 +++++++++++++++++--
 kernel/time/timer.c        | 35 ++++++++++++++++++++++++-----------
 lib/timerqueue.c           |  8 +++++---
 6 files changed, 52 insertions(+), 20 deletions(-)

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 201ab7267986..1a32e558eb11 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -86,7 +86,7 @@ enum cpuhp_state {
        CPUHP_MM_ZSWP_POOL_PREPARE,
        CPUHP_KVM_PPC_BOOK3S_PREPARE,
        CPUHP_ZCOMP_PREPARE,
-       CPUHP_TIMERS_DEAD,
+       CPUHP_TIMERS_PREPARE,
        CPUHP_MIPS_SOC_PREPARE,
        CPUHP_BP_PREPARE_DYN,
        CPUHP_BP_PREPARE_DYN_END                = CPUHP_BP_PREPARE_DYN + 20,
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 04af640ea95b..2448f9cc48a3 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -207,9 +207,11 @@ unsigned long round_jiffies_up(unsigned long j);
 unsigned long round_jiffies_up_relative(unsigned long j);
 
 #ifdef CONFIG_HOTPLUG_CPU
+int timers_prepare_cpu(unsigned int cpu);
 int timers_dead_cpu(unsigned int cpu);
 #else
-#define timers_dead_cpu NULL
+#define timers_prepare_cpu     NULL
+#define timers_dead_cpu                NULL
 #endif
 
 #endif
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 41376c3ac93b..97858477e586 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -1277,9 +1277,9 @@ static struct cpuhp_step cpuhp_bp_states[] = {
         * before blk_mq_queue_reinit_notify() from notify_dead(),
         * otherwise a RCU stall occurs.
         */
-       [CPUHP_TIMERS_DEAD] = {
+       [CPUHP_TIMERS_PREPARE] = {
                .name                   = "timers:dead",
-               .startup.single         = NULL,
+               .startup.single         = timers_prepare_cpu,
                .teardown.single        = timers_dead_cpu,
        },
        /* Kicks the plugged cpu into life */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 77555faf6fbc..f7cc7abfcf25 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -650,6 +650,11 @@ static void tick_nohz_restart(struct tick_sched *ts, 
ktime_t now)
        ts->next_tick = 0;
 }
 
+static inline bool local_timer_softirq_pending(void)
+{
+       return local_softirq_pending() & TIMER_SOFTIRQ;
+}
+
 static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
                                         ktime_t now, int cpu)
 {
@@ -666,8 +671,18 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched 
*ts,
        } while (read_seqretry(&jiffies_lock, seq));
        ts->last_jiffies = basejiff;
 
-       if (rcu_needs_cpu(basemono, &next_rcu) ||
-           arch_needs_cpu() || irq_work_needs_cpu()) {
+       /*
+        * Keep the periodic tick, when RCU, architecture or irq_work
+        * requests it.
+        * Aside of that check whether the local timer softirq is
+        * pending. If so its a bad idea to call get_next_timer_interrupt()
+        * because there is an already expired timer, so it will request
+        * immeditate expiry, which rearms the hardware timer with a
+        * minimal delta which brings us back to this place
+        * immediately. Lather, rinse and repeat...
+        */
+       if (rcu_needs_cpu(basemono, &next_rcu) || arch_needs_cpu() ||
+           irq_work_needs_cpu() || local_timer_softirq_pending()) {
                next_tick = basemono + TICK_NSEC;
        } else {
                /*
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index ffebcf878fba..89a9e1b4264a 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -823,11 +823,10 @@ static inline struct timer_base *get_timer_cpu_base(u32 
tflags, u32 cpu)
        struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu);
 
        /*
-        * If the timer is deferrable and nohz is active then we need to use
-        * the deferrable base.
+        * If the timer is deferrable and NO_HZ_COMMON is set then we need
+        * to use the deferrable base.
         */
-       if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active &&
-           (tflags & TIMER_DEFERRABLE))
+       if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
                base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu);
        return base;
 }
@@ -837,11 +836,10 @@ static inline struct timer_base 
*get_timer_this_cpu_base(u32 tflags)
        struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
 
        /*
-        * If the timer is deferrable and nohz is active then we need to use
-        * the deferrable base.
+        * If the timer is deferrable and NO_HZ_COMMON is set then we need
+        * to use the deferrable base.
         */
-       if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active &&
-           (tflags & TIMER_DEFERRABLE))
+       if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE))
                base = this_cpu_ptr(&timer_bases[BASE_DEF]);
        return base;
 }
@@ -1009,8 +1007,6 @@ __mod_timer(struct timer_list *timer, unsigned long 
expires, unsigned int option
        if (!ret && (options & MOD_TIMER_PENDING_ONLY))
                goto out_unlock;
 
-       debug_activate(timer, expires);
-
        new_base = get_target_base(base, timer->flags);
 
        if (base != new_base) {
@@ -1034,6 +1030,8 @@ __mod_timer(struct timer_list *timer, unsigned long 
expires, unsigned int option
                }
        }
 
+       debug_activate(timer, expires);
+
        timer->expires = expires;
        /*
         * If 'idx' was calculated above and the base time did not advance
@@ -1684,7 +1682,7 @@ static __latent_entropy void run_timer_softirq(struct 
softirq_action *h)
        base->must_forward_clk = false;
 
        __run_timers(base);
-       if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && base->nohz_active)
+       if (IS_ENABLED(CONFIG_NO_HZ_COMMON))
                __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF]));
 }
 
@@ -1855,6 +1853,21 @@ static void migrate_timer_list(struct timer_base 
*new_base, struct hlist_head *h
        }
 }
 
+int timers_prepare_cpu(unsigned int cpu)
+{
+       struct timer_base *base;
+       int b;
+
+       for (b = 0; b < NR_BASES; b++) {
+               base = per_cpu_ptr(&timer_bases[b], cpu);
+               base->clk = jiffies;
+               base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
+               base->is_idle = false;
+               base->must_forward_clk = true;
+       }
+       return 0;
+}
+
 int timers_dead_cpu(unsigned int cpu)
 {
        struct timer_base *old_base;
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
index 4a720ed4fdaf..0d54bcbc8170 100644
--- a/lib/timerqueue.c
+++ b/lib/timerqueue.c
@@ -33,8 +33,9 @@
  * @head: head of timerqueue
  * @node: timer node to be added
  *
- * Adds the timer node to the timerqueue, sorted by the
- * node's expires value.
+ * Adds the timer node to the timerqueue, sorted by the node's expires
+ * value. Returns true if the newly added timer is the first expiring timer in
+ * the queue.
  */
 bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
 {
@@ -70,7 +71,8 @@ EXPORT_SYMBOL_GPL(timerqueue_add);
  * @head: head of timerqueue
  * @node: timer node to be removed
  *
- * Removes the timer node from the timerqueue.
+ * Removes the timer node from the timerqueue. Returns true if the queue is
+ * not empty after the remove.
  */
 bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node)
 {

Reply via email to