We have a global offloading state and make the offloading
decision based on printing task pointer and timestamp. If
we keep seeing the same task performing printing for too
long (`atomic_print_limit') we request offloading. Similarly
when we see that printing is now performed by another task,
we reset the timestamp counter.

This, however, will not work in the following case:

===============================================================================

CPU0                                            CPU1
//taskA                                         //taskB
preempt_disable()                               preempt_disable()

 printk()
  console_trylock()
  console_unlock()
   printing_task = taskA
  up()
                                                printk()
                                                 console_trylock()
                                                 console_unlock()
                                                  printing_task = taskB
                                                  ^^^ reset offloading control
                                                up()
 printk()
  console_trylock()
  console_unlock()
   printing_task = taskA
   ^^^ reset offloading control
  up()
                                                printk()
                                                 console_trylock()
                                                 console_unlock()
                                                  printing_task = taskB
                                                  ^^^ reset offloading control
                                                up()

/*
 * X seconds later
 */

 printk()
  console_trylock()
  console_unlock()
   printing_task = taskA
   ^^^ reset offloading control
  up()
                                                printk()
                                                 console_trylock()
                                                 console_unlock()
                                                  printing_task = taskB
                                                  ^^^ reset offloading control
                                                up()

 lockup!                                        lockup!

===============================================================================

So this printk ping-pong confuses our offloading control logic.
Move it to per-CPU area and have a separate offloading control
on every CPU.

Signed-off-by: Sergey Senozhatsky <sergey.senozhat...@gmail.com>
---
 kernel/printk/printk.c | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index ba82152ce5d9..f9799616e9fc 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -558,13 +558,14 @@ static inline void adj_atomic_print_limit(void)
 #endif
 }
 
-static inline unsigned long emergency_timeout(unsigned long ts)
+static inline unsigned long emergency_timeout(unsigned long now,
+                                               unsigned long ts)
 {
 #ifdef CONFIG_LOCKUP_DETECTOR
        if (watchdog_thresh)
-               return ts + 2 * watchdog_thresh;
+               return time_after_eq(now, ts + 2 * watchdog_thresh);
 #endif
-       return ts + 10 * atomic_print_limit;
+       return time_after_eq(now, ts + 10 * atomic_print_limit);
 }
 
 /*
@@ -574,13 +575,13 @@ static inline unsigned long emergency_timeout(unsigned 
long ts)
  * amount of time a process can print from console_unlock().
  *
  * This function must be called from 'printk_safe' context under
- * console_sem lock.
+ * console_sem lock with preemption disabled.
  */
 static inline bool console_offload_printing(void)
 {
-       static struct task_struct *printing_task;
-       static unsigned long printing_start_ts;
-       static unsigned long saved_csw;
+       static DEFINE_PER_CPU(struct task_struct, *printing_task);
+       static DEFINE_PER_CPU(unsigned long, printing_start_ts);
+       static DEFINE_PER_CPU(unsigned long, saved_csw);
        unsigned long now = local_clock() >> 30LL; /* seconds */
 
        if (printk_kthread_should_stop())
@@ -600,16 +601,17 @@ static inline bool console_offload_printing(void)
                goto offload;
 
        /* A new task - reset the counters. */
-       if (printing_task != current) {
-               printing_start_ts = local_clock() >> 30LL;
-               saved_csw = current->nvcsw + current->nivcsw;
-               printing_task = current;
+       if (this_cpu_read(printing_task) != current) {
+               this_cpu_write(printing_start_ts, local_clock() >> 30LL);
+               this_cpu_write(saved_csw, current->nvcsw + current->nivcsw);
+               this_cpu_write(printing_task, current);
                return false;
        }
 
        adj_atomic_print_limit();
 
-       if (!time_after_eq(now, printing_start_ts + atomic_print_limit))
+       if (!time_after_eq(now, this_cpu_read(printing_start_ts) +
+                               atomic_print_limit))
                return false;
 
        if (current == printk_kthread) {
@@ -626,7 +628,7 @@ static inline bool console_offload_printing(void)
                 * back to console_unlock(), it will have another full
                 * `atomic_print_limit' time slice.
                 */
-               printing_start_ts = local_clock() >> 30LL;
+               this_cpu_write(printing_start_ts, local_clock() >> 30LL);
                return true;
        }
 
@@ -634,10 +636,12 @@ static inline bool console_offload_printing(void)
         * A trivial emergency enforcement - give up on printk_kthread if
         * we can't wake it up.
         */
-       if (time_after_eq(now, emergency_timeout(printing_start_ts)) &&
-                       saved_csw == (current->nvcsw + current->nivcsw)) {
+       if (this_cpu_read(saved_csw) == (current->nvcsw + current->nivcsw)
+               && emergency_timeout(now, this_cpu_read(printing_start_ts))) {
+
                printk_enforce_emergency = true;
-               pr_crit("Declaring printk emergency mode.\n");
+               pr_crit("CPU%d declared a printk emergency mode.\n",
+                               smp_processor_id());
                return true;
        }
 
-- 
2.14.1

Reply via email to