On Thu, Jan 11, 2018 at 9:35 PM, Frederic Weisbecker
<frede...@kernel.org> wrote:
> As we plan to be able to defer some specific softurq vector processing
> to workqueues when those vectors need more time than IRQs can offer,
> let's first count the time spent and the number of occurences per vector.
>
> For now we still defer to ksoftirqd when the per vector limits are reached
>
> Suggested-by: Linus Torvalds <torva...@linux-foundation.org>
> Signed-off-by: Frederic Weisbecker <frede...@kernel.org>
> Cc: Dmitry Safonov <d...@arista.com>
> Cc: Eric Dumazet <eduma...@google.com>
> Cc: Linus Torvalds <torva...@linux-foundation.org>
> Cc: Peter Zijlstra <pet...@infradead.org>
> Cc: Andrew Morton <a...@linux-foundation.org>
> Cc: David Miller <da...@davemloft.net>
> Cc: Hannes Frederic Sowa <han...@stressinduktion.org>
> Cc: Ingo Molnar <mi...@kernel.org>
> Cc: Levin Alexander <alexander.le...@verizon.com>
> Cc: Paolo Abeni <pab...@redhat.com>
> Cc: Paul E. McKenney <paul...@linux.vnet.ibm.com>
> Cc: Radu Rendec <rren...@arista.com>
> Cc: Rik van Riel <r...@redhat.com>
> Cc: Stanislaw Gruszka <sgrus...@redhat.com>
> Cc: Thomas Gleixner <t...@linutronix.de>
> Cc: Wanpeng Li <wanpeng...@hotmail.com>
> ---
>  kernel/softirq.c | 37 +++++++++++++++++++++++++++++--------
>  1 file changed, 29 insertions(+), 8 deletions(-)
>
> diff --git a/kernel/softirq.c b/kernel/softirq.c
> index 2f5e87f..fa267f7 100644
> --- a/kernel/softirq.c
> +++ b/kernel/softirq.c
> @@ -26,6 +26,7 @@
>  #include <linux/smpboot.h>
>  #include <linux/tick.h>
>  #include <linux/irq.h>
> +#include <linux/sched/clock.h>
>
>  #define CREATE_TRACE_POINTS
>  #include <trace/events/irq.h>
> @@ -62,6 +63,17 @@ const char * const softirq_to_name[NR_SOFTIRQS] = {
>         "TASKLET", "SCHED", "HRTIMER", "RCU"
>  };
>
> +struct vector_stat {
> +       u64 time;
> +       int count;
> +};
> +
> +struct softirq_stat {
> +       struct vector_stat stat[NR_SOFTIRQS];
> +};
> +
> +static DEFINE_PER_CPU(struct softirq_stat, softirq_stat_cpu);
> +
>  /*
>   * we cannot loop indefinitely here to avoid userspace starvation,
>   * but we also don't want to introduce a worst case 1/HZ latency
> @@ -203,7 +215,7 @@ EXPORT_SYMBOL(__local_bh_enable_ip);
>   * we want to handle softirqs as soon as possible, but they
>   * should not be able to lock up the box.
>   */
> -#define MAX_SOFTIRQ_TIME  msecs_to_jiffies(2)
> +#define MAX_SOFTIRQ_TIME  (2 * NSEC_PER_MSEC)
>  #define MAX_SOFTIRQ_RESTART 10
>
>  #ifdef CONFIG_TRACE_IRQFLAGS
> @@ -241,12 +253,11 @@ static inline void lockdep_softirq_end(bool in_hardirq) 
> { }
>
>  asmlinkage __visible void __softirq_entry __do_softirq(void)
>  {
> -       unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
> +       struct softirq_stat *sstat = this_cpu_ptr(&softirq_stat_cpu);
>         unsigned long old_flags = current->flags;
> -       int max_restart = MAX_SOFTIRQ_RESTART;
>         struct softirq_action *h;
>         bool in_hardirq;
> -       __u32 pending;
> +       __u32 pending, overrun = 0;
>         int softirq_bit;
>
>         /*
> @@ -262,6 +273,7 @@ asmlinkage __visible void __softirq_entry 
> __do_softirq(void)
>         __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET);
>         in_hardirq = lockdep_softirq_start();
>
> +       memzero_explicit(sstat, sizeof(*sstat));

If you clear sstat here, it means it does not need to be a per cpu
variable, but an automatic one (defined on the stack)

I presume we need a per cpu var to track cpu usage on last time window.

( typical case of 99,000 IRQ per second, one packet delivered per IRQ,
10 usec spent per packet)



>  restart:
>         /* Reset the pending bitmask before enabling irqs */
>         set_softirq_pending(0);
> @@ -271,8 +283,10 @@ asmlinkage __visible void __softirq_entry 
> __do_softirq(void)
>         h = softirq_vec;
>
>         while ((softirq_bit = ffs(pending))) {
> +               struct vector_stat *vstat;
>                 unsigned int vec_nr;
>                 int prev_count;
> +               u64 startime;
>
>                 h += softirq_bit - 1;
>
> @@ -280,10 +294,18 @@ asmlinkage __visible void __softirq_entry 
> __do_softirq(void)
>                 prev_count = preempt_count();
>
>                 kstat_incr_softirqs_this_cpu(vec_nr);
> +               vstat = &sstat->stat[vec_nr];
>
>                 trace_softirq_entry(vec_nr);
> +               startime = local_clock();
>                 h->action(h);
> +               vstat->time += local_clock() - startime;

You might store local_clock() in a variable, so that we do not call
local_clock() two times per ->action() called.


> +               vstat->count++;
>                 trace_softirq_exit(vec_nr);
> +
> +               if (vstat->time > MAX_SOFTIRQ_TIME || vstat->count > 
> MAX_SOFTIRQ_RESTART)

If we trust local_clock() to be precise enough, we do not need to
track vstat->count anymore.

> +                       overrun |= 1 << vec_nr;
> +
>                 if (unlikely(prev_count != preempt_count())) {
>                         pr_err("huh, entered softirq %u %s %p with 
> preempt_count %08x, exited with %08x?\n",
>                                vec_nr, softirq_to_name[vec_nr], h->action,
> @@ -299,11 +321,10 @@ asmlinkage __visible void __softirq_entry 
> __do_softirq(void)
>
>         pending = local_softirq_pending();
>         if (pending) {
> -               if (time_before(jiffies, end) && !need_resched() &&
> -                   --max_restart)
> +               if (overrun || need_resched())
> +                       wakeup_softirqd();
> +               else
>                         goto restart;
> -
> -               wakeup_softirqd();
>         }
>
>         lockdep_softirq_end(in_hardirq);
> --
> 2.7.4
>

Reply via email to