(adding Kay Sievers who wrote most of this)

On Tue, 2016-08-23 at 00:40 +0900, Sergey Senozhatsky wrote:
> Hello,
> 
>       RFC and POC
> 
>       Petr, I took a very quick look at your series [1]. I think it
> won't work on some of the setups I'm toying with, where multiple CPUs
> can do a simultaneous pr_cont() output. What do you about the following
> approach?
> 
> (it's not really tested, I just finished writing the code. Will test it
> more tomorrow. But I kinda like that at this point it removes almost
> twice the code it adds, which is probably because I missed some cases).
> 
> ===8<====
> 
>       This patch changes pr_cont buffer to be a per-cpu variable, so
> CPUs don't compete anymore for a single cont buffer. Thus we minimize
> the possibility of preliminary/forced flushes of incomplete pr_cont
> buffers.
>       The basic idea is that the first time CPU issues a pr_cont or
> !LOG_NEWLINE output it starts a non-preemptible region which ends when
> CPUs writes a NEW_LINE symbol.
> Schematically:
> 
>       pr_cont()                       /* preempt_disable() */
>               << write to this_cpu_ptr cont buffer
>       pr_cont()
>               << write to this_cpu_ptr cont buffer
>       ...
>       pr_cont("\n")                   /* log_store(). preempt_enable() */
> 
>       The only possible case (unless I'm missing something) for a
> race is when IRQ is issuing a pr_cont() output, interrupting the CPU
> which is already in the middle of pr_cont() printing.
> 
> 
> [1]: http://marc.info/?l=linux-kernel&m=146860197621876
> 
> Signed-off-by: Sergey Senozhatsky <sergey.senozhat...@gmail.com>
> ---
>  kernel/printk/printk.c | 193 
> ++++++++++++++++---------------------------------
>  1 file changed, 64 insertions(+), 129 deletions(-)
> 
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index 0d3e026..2bf8b85 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -337,10 +337,9 @@ static int console_may_schedule;
>   */
>  
>  enum log_flags {
> -     LOG_NOCONS      = 1,    /* already flushed, do not print to console */
> -     LOG_NEWLINE     = 2,    /* text ended with a newline */
> -     LOG_PREFIX      = 4,    /* text started with a prefix */
> -     LOG_CONT        = 8,    /* text is a fragment of a continuation line */
> +     LOG_NEWLINE     = 1,    /* text ended with a newline */
> +     LOG_PREFIX      = 2,    /* text started with a prefix */
> +     LOG_CONT        = 4,    /* text is a fragment of a continuation line */
>  };
>  
>  struct printk_log {
> @@ -1661,108 +1660,62 @@ static inline void printk_delay(void)
>   * though, are printed immediately to the consoles to ensure everything has
>   * reached the console in case of a kernel crash.
>   */
> -static struct cont {
> +struct cont {
>       char buf[LOG_LINE_MAX];
>       size_t len;                     /* length == 0 means unused buffer */
> -     size_t cons;                    /* bytes written to console */
> -     struct task_struct *owner;      /* task of first print*/
>       u64 ts_nsec;                    /* time of first print */
>       u8 level;                       /* log level of first message */
>       u8 facility;                    /* log facility of first message */
>       enum log_flags flags;           /* prefix, newline flags */
> -     bool flushed:1;                 /* buffer sealed and committed */
> -} cont;
> +};
> +
> +static DEFINE_PER_CPU(struct cont, pcpu_cont);
> +static DEFINE_PER_CPU(bool, cont_printing);
>  
> -static void cont_flush(enum log_flags flags)
> +static void cont_flush(struct cont *cont, enum log_flags flags)
>  {
> -     if (cont.flushed)
> -             return;
> -     if (cont.len == 0)
> +     if (cont->len == 0)
>               return;
>  
> -     if (cont.cons) {
> -             /*
> -              * If a fragment of this line was directly flushed to the
> -              * console; wait for the console to pick up the rest of the
> -              * line. LOG_NOCONS suppresses a duplicated output.
> -              */
> -             log_store(cont.facility, cont.level, flags | LOG_NOCONS,
> -                       cont.ts_nsec, NULL, 0, cont.buf, cont.len);
> -             cont.flags = flags;
> -             cont.flushed = true;
> -     } else {
> -             /*
> -              * If no fragment of this line ever reached the console,
> -              * just submit it to the store and free the buffer.
> -              */
> -             log_store(cont.facility, cont.level, flags, 0,
> -                       NULL, 0, cont.buf, cont.len);
> -             cont.len = 0;
> -     }
> +     /*
> +      * If no fragment of this line ever reached the console,
> +      * just submit it to the store and free the buffer.
> +      */
> +     log_store(cont->facility, cont->level, flags, 0,
> +               NULL, 0, cont->buf, cont->len);
> +     cont->len = 0;
>  }
>  
>  static bool cont_add(int facility, int level, const char *text, size_t len)
>  {
> -     if (cont.len && cont.flushed)
> -             return false;
> +     struct cont *cont = this_cpu_ptr(&pcpu_cont);
>  
>       /*
>        * If ext consoles are present, flush and skip in-kernel
>        * continuation.  See nr_ext_console_drivers definition.  Also, if
>        * the line gets too long, split it up in separate records.
>        */
> -     if (nr_ext_console_drivers || cont.len + len > sizeof(cont.buf)) {
> -             cont_flush(LOG_CONT);
> +     if (nr_ext_console_drivers || cont->len + len > sizeof(cont->buf)) {
> +             cont_flush(cont, LOG_CONT);
>               return false;
>       }
>  
> -     if (!cont.len) {
> -             cont.facility = facility;
> -             cont.level = level;
> -             cont.owner = current;
> -             cont.ts_nsec = local_clock();
> -             cont.flags = 0;
> -             cont.cons = 0;
> -             cont.flushed = false;
> +     if (!cont->len) {
> +             cont->facility = facility;
> +             cont->level = level;
> +             cont->ts_nsec = local_clock();
> +             cont->flags = 0;
>       }
>  
> -     memcpy(cont.buf + cont.len, text, len);
> -     cont.len += len;
> +     memcpy(cont->buf + cont->len, text, len);
> +     cont->len += len;
>  
> -     if (cont.len > (sizeof(cont.buf) * 80) / 100)
> -             cont_flush(LOG_CONT);
> +     if (cont->len > (sizeof(cont->buf) * 80) / 100)
> +             cont_flush(cont, LOG_CONT);
>  
>       return true;
>  }
>  
> -static size_t cont_print_text(char *text, size_t size)
> -{
> -     size_t textlen = 0;
> -     size_t len;
> -
> -     if (cont.cons == 0 && (console_prev & LOG_NEWLINE)) {
> -             textlen += print_time(cont.ts_nsec, text);
> -             size -= textlen;
> -     }
> -
> -     len = cont.len - cont.cons;
> -     if (len > 0) {
> -             if (len+1 > size)
> -                     len = size-1;
> -             memcpy(text + textlen, cont.buf + cont.cons, len);
> -             textlen += len;
> -             cont.cons = cont.len;
> -     }
> -
> -     if (cont.flushed) {
> -             if (cont.flags & LOG_NEWLINE)
> -                     text[textlen++] = '\n';
> -             /* got everything, release buffer */
> -             cont.len = 0;
> -     }
> -     return textlen;
> -}
> -
>  asmlinkage int vprintk_emit(int facility, int level,
>                           const char *dict, size_t dictlen,
>                           const char *fmt, va_list args)
> @@ -1779,6 +1732,7 @@ asmlinkage int vprintk_emit(int facility, int level,
>       int printed_len = 0;
>       int nmi_message_lost;
>       bool in_sched = false;
> +     struct cont *cont;
>  
>       if (level == LOGLEVEL_SCHED) {
>               level = LOGLEVEL_DEFAULT;
> @@ -1789,6 +1743,7 @@ asmlinkage int vprintk_emit(int facility, int level,
>       printk_delay();
>  
>       local_irq_save(flags);
> +     cont = this_cpu_ptr(&pcpu_cont);
>       this_cpu = smp_processor_id();
>  
>       /*
> @@ -1878,12 +1833,22 @@ asmlinkage int vprintk_emit(int facility, int level,
>               lflags |= LOG_PREFIX|LOG_NEWLINE;
>  
>       if (!(lflags & LOG_NEWLINE)) {
> +             if (!this_cpu_read(cont_printing)) {
> +                     bool unsafe_pr_cont = preemptible() &&
> +                             !rcu_preempt_depth();
> +
> +                     if (unsafe_pr_cont) {
> +                             this_cpu_write(cont_printing, true);
> +                             preempt_disable();
> +                     }
> +             }
> +
>               /*
>                * Flush the conflicting buffer. An earlier newline was missing,
>                * or another task also prints continuation lines.
>                */
> -             if (cont.len && (lflags & LOG_PREFIX || cont.owner != current))
> -                     cont_flush(LOG_NEWLINE);
> +             if (cont->len && (lflags & LOG_PREFIX))
> +                     cont_flush(cont, LOG_NEWLINE);
>  
>               /* buffer line if possible, otherwise store it right away */
>               if (cont_add(facility, level, text, text_len))
> @@ -1895,6 +1860,11 @@ asmlinkage int vprintk_emit(int facility, int level,
>       } else {
>               bool stored = false;
>  
> +             if (this_cpu_read(cont_printing)) {
> +                     this_cpu_write(cont_printing, false);
> +                     preempt_enable();
> +             }
> +
>               /*
>                * If an earlier newline was missing and it was the same task,
>                * either merge it with the current buffer and flush, or if
> @@ -1903,11 +1873,11 @@ asmlinkage int vprintk_emit(int facility, int level,
>                * If the preceding printk was from a different task and missed
>                * a newline, flush and append the newline.
>                */
> -             if (cont.len) {
> -                     if (cont.owner == current && !(lflags & LOG_PREFIX))
> +             if (cont->len) {
> +                     if (!(lflags & LOG_PREFIX))
>                               stored = cont_add(facility, level, text,
>                                                 text_len);
> -                     cont_flush(LOG_NEWLINE);
> +                     cont_flush(cont, LOG_NEWLINE);
>               }
>  
>               if (stored)
> @@ -2051,7 +2021,6 @@ static struct cont {
>       size_t len;
>       size_t cons;
>       u8 level;
> -     bool flushed:1;
>  } cont;
>  static char *log_text(const struct printk_log *msg) { return NULL; }
>  static char *log_dict(const struct printk_log *msg) { return NULL; }
> @@ -2344,42 +2313,6 @@ static inline int can_use_console(void)
>       return cpu_online(raw_smp_processor_id()) || have_callable_console();
>  }
>  
> -static void console_cont_flush(char *text, size_t size)
> -{
> -     unsigned long flags;
> -     size_t len;
> -
> -     raw_spin_lock_irqsave(&logbuf_lock, flags);
> -
> -     if (!cont.len)
> -             goto out;
> -
> -     if (suppress_message_printing(cont.level)) {
> -             cont.cons = cont.len;
> -             if (cont.flushed)
> -                     cont.len = 0;
> -             goto out;
> -     }
> -
> -     /*
> -      * We still queue earlier records, likely because the console was
> -      * busy. The earlier ones need to be printed before this one, we
> -      * did not flush any fragment so far, so just let it queue up.
> -      */
> -     if (console_seq < log_next_seq && !cont.cons)
> -             goto out;
> -
> -     len = cont_print_text(text, size);
> -     raw_spin_unlock(&logbuf_lock);
> -     stop_critical_timings();
> -     call_console_drivers(cont.level, NULL, 0, text, len);
> -     start_critical_timings();
> -     local_irq_restore(flags);
> -     return;
> -out:
> -     raw_spin_unlock_irqrestore(&logbuf_lock, flags);
> -}
> -
>  /**
>   * console_unlock - unlock the console system
>   *
> @@ -2433,9 +2366,6 @@ again:
>               return;
>       }
>  
> -     /* flush buffered message fragment immediately to console */
> -     console_cont_flush(text, sizeof(text));
> -
>       for (;;) {
>               struct printk_log *msg;
>               size_t ext_len = 0;
> @@ -2465,8 +2395,7 @@ skip:
>  
>               msg = log_from_idx(console_idx);
>               level = msg->level;
> -             if ((msg->flags & LOG_NOCONS) ||
> -                             suppress_message_printing(level)) {
> +             if (suppress_message_printing(level)) {
>                       /*
>                        * Skip record we have buffered and already printed
>                        * directly to the console when we received it, and
> @@ -2474,12 +2403,6 @@ skip:
>                        */
>                       console_idx = log_next(console_idx);
>                       console_seq++;
> -                     /*
> -                      * We will get here again when we register a new
> -                      * CON_PRINTBUFFER console. Clear the flag so we
> -                      * will properly dump everything later.
> -                      */
> -                     msg->flags &= ~LOG_NOCONS;
>                       console_prev = msg->flags;
>                       goto skip;
>               }
> @@ -2581,6 +2504,18 @@ void console_unblank(void)
>   */
>  void console_flush_on_panic(void)
>  {
> +     unsigned long flags;
> +     unsigned int cpu;
> +
> +     /*
> +      * Flush the cont messages; no matter complete
> +      * or incomplete.
> +      */
> +     local_irq_save(flags);
> +     for_each_online_cpu(cpu)
> +             cont_flush(per_cpu_ptr(&pcpu_cont, cpu), LOG_NEWLINE);
> +     local_irq_restore(flags);
> +
>       /*
>        * If someone else is holding the console lock, trylock will fail
>        * and may_schedule may be set.  Ignore and proceed to unlock so

Reply via email to