printk() is quite complex internally and, basically, it does two
slightly independent things:
 a) adds a new message to a kernel log buffer (log_store())
 b) prints kernel log messages to serial consoles (console_unlock())

while (a) is guaranteed to be executed by printk(), (b) is not, for a
variety of reasons, and, unlike log_store(), it comes at a price:

 1) console_unlock() attempts to flush all pending kernel log messages
to the console. Thus, it can loop indefinitely.

 2) while console_unlock() is executed on one particular CPU, printing
pending kernel log messages, other CPUs can simultaneously append new
messages to the kernel log buffer.

 3) the time it takes console_unlock() to print kernel messages also
depends on the speed of the console -- which may not be fast at all.

 4) console_unlock() is executed in the same context as printk(), so
it may be non-preemptible/atomic, which makes 1)-3) dangerous.

As a result, nobody knows how long a printk() call will take, so
it's not really safe to call printk() in a number of situations,
including atomic context, RCU critical sections, interrupt context,
and more.

This patch introduces a dedicated printing kernel thread - printk_kthread.
The main purpose of this kthread is to offload printing to a non-atomic
and always scheduleable context, which eliminates 4) and makes 1)-3) less
critical. printk() now just appends log messages to the kernel log buffer
and wake_up()s printk_kthread instead of locking console_sem and calling
into potentially unsafe console_unlock().

This, however, is not always safe on its own. For example, we can't call
into the scheduler from panic(), because this may cause deadlock. That's
why we introduce a concept of printk_emergency() mode, when printk()
switches back to the old behaviour (console_unlock() from vprintk_emit())
in those cases. In general, this switch happens automatically once a EMERG
log level message appears in the log buffer. Another cases when wake_up()
might not work as expected are suspend, hibernate, etc. For those situations
we provide two new functions:
 -- printk_emergency_begin()
    Disables printk offloading. All printk() calls will attempt
    to lock the console_sem and, if successful, flush kernel log
    messages.

 -- printk_emergency_end()
    Enables printk offloading.

Signed-off-by: Sergey Senozhatsky <[email protected]>
Signed-off-by: Jan Kara <[email protected]>
---
 include/linux/console.h |   3 ++
 kernel/printk/printk.c  | 107 ++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 102 insertions(+), 8 deletions(-)

diff --git a/include/linux/console.h b/include/linux/console.h
index 5949d1855589..f1a86944072e 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -187,6 +187,9 @@ extern bool console_suspend_enabled;
 extern void suspend_console(void);
 extern void resume_console(void);
 
+extern void printk_emergency_begin(void);
+extern void printk_emergency_end(void);
+
 int mda_console_init(void);
 void prom_con_init(void);
 
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 2d07678e9ff9..ab6b3b2a68c6 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -48,6 +48,7 @@
 #include <linux/sched/clock.h>
 #include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>
+#include <linux/kthread.h>
 
 #include <linux/uaccess.h>
 #include <asm/sections.h>
@@ -402,7 +403,8 @@ DEFINE_RAW_SPINLOCK(logbuf_lock);
        } while (0)
 
 /*
- * Delayed printk version, for scheduler-internal messages:
+ * Used both for deferred printk version (scheduler-internal messages)
+ * and printk_kthread control.
  */
 #define PRINTK_PENDING_WAKEUP  0x01
 #define PRINTK_PENDING_OUTPUT  0x02
@@ -445,6 +447,42 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
 static char *log_buf = __log_buf;
 static u32 log_buf_len = __LOG_BUF_LEN;
 
+static struct task_struct *printk_kthread __read_mostly;
+/*
+ * We can't call into the scheduler (wake_up() printk kthread) during
+ * suspend/kexec/etc. This temporarily switches printk to old behaviour.
+ */
+static atomic_t printk_emergency __read_mostly;
+/*
+ * Disable printk_kthread permanently. Unlike `oops_in_progress'
+ * it doesn't go back to 0.
+ */
+static bool printk_kthread_disabled __read_mostly;
+
+static inline bool printk_kthread_enabled(void)
+{
+       return !printk_kthread_disabled &&
+               printk_kthread && atomic_read(&printk_emergency) == 0;
+}
+
+/*
+ * This disables printing offloading and instead attempts
+ * to do the usual console_trylock()->console_unlock().
+ *
+ * Note, this does not stop the printk_kthread if it's already
+ * printing logbuf messages.
+ */
+void printk_emergency_begin(void)
+{
+       atomic_inc(&printk_emergency);
+}
+
+/* This re-enables printk_kthread offloading. */
+void printk_emergency_end(void)
+{
+       atomic_dec(&printk_emergency);
+}
+
 /* Return log buffer address */
 char *log_buf_addr_get(void)
 {
@@ -1765,17 +1803,40 @@ asmlinkage int vprintk_emit(int facility, int level,
 
        printed_len += log_output(facility, level, lflags, dict, dictlen, text, 
text_len);
 
+       /*
+        * Emergency level indicates that the system is unstable and, thus,
+        * we better stop relying on wake_up(printk_kthread) and try to do
+        * a direct printing.
+        */
+       if (level == LOGLEVEL_EMERG)
+               printk_kthread_disabled = true;
+
+       set_bit(PRINTK_PENDING_OUTPUT, &printk_pending);
        logbuf_unlock_irqrestore(flags);
 
        /* If called from the scheduler, we can not call up(). */
        if (!in_sched) {
                /*
-                * Try to acquire and then immediately release the console
-                * semaphore.  The release will print out buffers and wake up
-                * /dev/kmsg and syslog() users.
+                * Under heavy printing load/slow serial console/etc
+                * console_unlock() can stall CPUs, which can result in
+                * soft/hard-lockups, lost interrupts, RCU stalls, etc.
+                * Therefore we attempt to print the messages to console
+                * from a dedicated printk_kthread, which always runs in
+                * schedulable context.
                 */
-               if (console_trylock())
-                       console_unlock();
+               if (printk_kthread_enabled()) {
+                       printk_safe_enter_irqsave(flags);
+                       wake_up_process(printk_kthread);
+                       printk_safe_exit_irqrestore(flags);
+               } else {
+                       /*
+                        * Try to acquire and then immediately release the
+                        * console semaphore. The release will print out
+                        * buffers and wake up /dev/kmsg and syslog() users.
+                        */
+                       if (console_trylock())
+                               console_unlock();
+               }
        }
 
        return printed_len;
@@ -1882,6 +1943,9 @@ static size_t msg_print_text(const struct printk_log *msg,
                             bool syslog, char *buf, size_t size) { return 0; }
 static bool suppress_message_printing(int level) { return false; }
 
+void printk_emergency_begin(void) {}
+void printk_emergency_end(void) {}
+
 #endif /* CONFIG_PRINTK */
 
 #ifdef CONFIG_EARLY_PRINTK
@@ -2164,6 +2228,13 @@ void console_unlock(void)
        bool do_cond_resched, retry;
 
        if (console_suspended) {
+               /*
+                * Avoid an infinite loop in printk_kthread function
+                * when console_unlock() cannot flush messages because
+                * we suspended consoles. Someone else will print the
+                * messages from resume_console().
+                */
+               clear_bit(PRINTK_PENDING_OUTPUT, &printk_pending);
                up_console_sem();
                return;
        }
@@ -2182,6 +2253,7 @@ void console_unlock(void)
        console_may_schedule = 0;
 
 again:
+       clear_bit(PRINTK_PENDING_OUTPUT, &printk_pending);
        /*
         * We released the console_sem lock, so we need to recheck if
         * cpu is online and (if not) is there at least one CON_ANYTIME
@@ -2664,8 +2736,11 @@ late_initcall(printk_late_init);
 #if defined CONFIG_PRINTK
 static void wake_up_klogd_work_func(struct irq_work *irq_work)
 {
-       if (test_and_clear_bit(PRINTK_PENDING_OUTPUT, &printk_pending)) {
-               /* If trylock fails, someone else is doing the printing */
+       if (test_bit(PRINTK_PENDING_OUTPUT, &printk_pending)) {
+               /*
+                * If trylock fails, someone else is doing the printing.
+                * PRINTK_PENDING_OUTPUT bit is cleared by console_unlock().
+                */
                if (console_trylock())
                        console_unlock();
        }
@@ -2679,6 +2754,22 @@ static DEFINE_PER_CPU(struct irq_work, 
wake_up_klogd_work) = {
        .flags = IRQ_WORK_LAZY,
 };
 
+static int printk_kthread_func(void *data)
+{
+       while (1) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               if (!test_bit(PRINTK_PENDING_OUTPUT, &printk_pending))
+                       schedule();
+
+               __set_current_state(TASK_RUNNING);
+
+               console_lock();
+               console_unlock();
+       }
+
+       return 0;
+}
+
 void wake_up_klogd(void)
 {
        preempt_disable();
-- 
2.12.2

Reply via email to