On 08/11/2015 07:23 AM, Pan Xinhui wrote:
> From: Pan Xinhui <xinhuix....@intel.com>
> 
> printk can be called in any context, It's very useful to output debug
> info.
> 
> But it might cause very bad issues on some special cases. For example,
> some driver hit errors, and it dumps many messages like reg values, etc. 
> 
> Sometimes, printk is called when irqs disabled. This is OKay if there is
> a few messages. But What would happen if many messages outputted by other
> drivers at same time.
> 
> Here is the scenario.
> CPUA                                  CPUB
>                                       local_irq_save(flags);
>                                       printk()
> while(..) {                                   --> console_unlock
>       printk(...);                    
>       //hundreds or thousands loops
> }                             //all messages flushed out to consoles
>                                       local_irq_restore(flags);
> 
> printk runs on CPUA just store the messages in the buf and return.
> printk runs on CPUB(who owns the console_sem lock) would take the duty
> to flush all messages to consoles. It would take a long time to flush
> messages out, IOW, irq would be disabled for a long time. Such case is
> too bad.  We hit many interrupt related panics, for example, cpu did not
> respond to IPI.
> 
> Here is the solution, if we detect such case above, try to rebalance it.
> Let CPUA take the duty to flush messages to consoles.
> 
> The idea is simple, but the implementation is a little difficult.
> Introduce many help functions to fix it.

Please describe the rebalance state machine/message passing in detail.

Regards,
Peter Hurley

> Signed-off-by: Pan Xinhui <xinhuix....@intel.com>
> ---
>  kernel/printk/printk.c | 101 
> ++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 100 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index cf8c242..3dc2f60 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -1652,6 +1652,65 @@ static size_t cont_print_text(char *text, size_t size)
>       return textlen;
>  }
>  
> +static struct {
> +     unsigned int request:1;/* set it with logbuf_lock and console_sem held
> +                             * clear it with logbuf_lock held.
> +                             */
> +     unsigned int reply:1; /* set/clear it with logbuf_lock held*/
> +     struct semaphore help_sem;
> +} help_t = {
> +     .request = 0,
> +     .reply = 0,
> +     .help_sem = __SEMAPHORE_INITIALIZER(help_t.help_sem, 0),
> +};
> +
> +/* protected by logbuf_lock*/
> +static int check_help(void)
> +{
> +     return !!help_t.request;
> +}
> +
> +/* protected by logbuf_lock and console_sem*/
> +static void request_help(void)
> +{
> +     help_t.request = 1;
> +}
> +
> +/* protected by logbuf_lock*/
> +static void cancel_help(void)
> +{
> +     help_t.request = 0;
> +}
> +
> +/* protected by logbuf_lock*/
> +static void reply_help(void)
> +{
> +     help_t.request = 0;
> +     help_t.reply = 1;
> +}
> +
> +/* protected by logbuf_lock*/
> +static int ack_help(void)
> +{
> +     return !!help_t.reply;
> +}
> +
> +/* protected by logbuf_lock*/
> +static void close_help(void)
> +{
> +     help_t.reply = 0;
> +}
> +
> +static void thanks_help(void)
> +{
> +     up(&help_t.help_sem);
> +}
> +
> +static int done_help(void)
> +{
> +     return !down_trylock(&help_t.help_sem);
> +}
> +
>  asmlinkage int vprintk_emit(int facility, int level,
>                           const char *dict, size_t dictlen,
>                           const char *fmt, va_list args)
> @@ -1667,6 +1726,8 @@ asmlinkage int vprintk_emit(int facility, int level,
>       bool in_sched = false;
>       /* cpu currently holding logbuf_lock in this function */
>       static unsigned int logbuf_cpu = UINT_MAX;
> +     bool help = 0;
> +     bool can_help = !(irqs_disabled() || in_sched || in_interrupt());
>  
>       if (level == LOGLEVEL_SCHED) {
>               level = LOGLEVEL_DEFAULT;
> @@ -1701,6 +1762,10 @@ asmlinkage int vprintk_emit(int facility, int level,
>  
>       lockdep_off();
>       raw_spin_lock(&logbuf_lock);
> +     if (can_help && check_help()) {
> +             reply_help();
> +             help = true;
> +     }
>       logbuf_cpu = this_cpu;
>  
>       if (unlikely(recursion_bug)) {
> @@ -1811,6 +1876,9 @@ asmlinkage int vprintk_emit(int facility, int level,
>                */
>               preempt_disable();
>  
> +             if (help)
> +                     while (!done_help())
> +                             cpu_relax();
>               /*
>                * Try to acquire and then immediately release the console
>                * semaphore.  The release will print out buffers and wake up
> @@ -2230,6 +2298,8 @@ void console_unlock(void)
>       unsigned long flags;
>       bool wake_klogd = false;
>       bool retry;
> +     bool help = !!(irqs_disabled() || in_interrupt());
> +     bool skip_ack = true;
>  
>       if (console_suspended) {
>               up_console_sem();
> @@ -2248,6 +2318,21 @@ again:
>               int level;
>  
>               raw_spin_lock_irqsave(&logbuf_lock, flags);
> +             if (help) {
> +                     if (!skip_ack) {
> +                             if (ack_help()) {
> +                                     /*
> +                                     * If request got replyed by another
> +                                     * printk-on-going, close the request.
> +                                     */
> +                                     close_help();
> +                                     break;
> +                             }
> +                             cancel_help();
> +                     }
> +                     request_help();
> +                     skip_ack = 0;
> +             }
>               if (seen_seq != log_next_seq) {
>                       wake_klogd = true;
>                       seen_seq = log_next_seq;
> @@ -2265,8 +2350,13 @@ again:
>                       len = 0;
>               }
>  skip:
> -             if (console_seq == log_next_seq)
> +             if (console_seq == log_next_seq) {
> +                     if (help) {
> +                             cancel_help();
> +                             help = 0;
> +                     }
>                       break;
> +             }
>  
>               msg = log_from_idx(console_idx);
>               if (msg->flags & LOG_NOCONS) {
> @@ -2318,6 +2408,14 @@ skip:
>  
>       up_console_sem();
>  
> +     if (help) {
> +             /*
> +             * Need call up_console_sem first.
> +             */
> +             thanks_help();
> +             goto wake_up;
> +     }
> +
>       /*
>        * Someone could have filled up the buffer again, so re-check if there's
>        * something to flush. In case we cannot trylock the console_sem again,
> @@ -2331,6 +2429,7 @@ skip:
>       if (retry && console_trylock())
>               goto again;
>  
> +wake_up:
>       if (wake_klogd)
>               wake_up_klogd();
>  }
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to