sched: Account si cpu time to ksoftirqd(s)

Dmitry Safonov Thu, 18 Jan 2018 08:14:05 -0800

Warning: non-merge-ready in any sense

Under CONFIG_FAIR_SOFTIRQ_SCHEDULE each sched tick will account cpu time
spent on processing softirqs to ksoftirqd of the softirq's group.
Update then ksoftirqd->se.sum_exec_runtime and recalculate
ksoftirqd->se.vruntime.


Use CFS's vrutime to decide if softirq needs to be served or deferred.
It's possible to tune this with ksoftirqd nice policy.

Signed-off-by: Dmitry Safonov <d...@arista.com>
---
 include/linux/interrupt.h |  1 +
 kernel/sched/fair.c       | 38 ++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h      | 19 +++++++++++++++++++
 kernel/softirq.c          | 45 +++++++++++++++++++++++++++++++++++++--------
 4 files changed, 95 insertions(+), 8 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 17e1a04445fa..a0b5c24c088a 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -512,6 +512,7 @@ extern struct task_struct *__percpu **ksoftirqd;
 extern unsigned nr_softirq_groups;
 
 extern bool servicing_softirq(unsigned nr);
+extern unsigned group_softirqs(unsigned nr);
 static inline bool current_is_ksoftirqd(void)
 {
        unsigned i;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2fe3aa853e4d..d0105739551f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -813,6 +813,42 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq, int 
force)
 }
 #endif /* CONFIG_SMP */
 
+static void update_ksoftirqd(struct cfs_rq *cfs_rq)
+{
+#ifdef CONFIG_FAIR_SOFTIRQ_SCHEDULE
+       int rq_cpu = cpu_of(rq_of(cfs_rq));
+       u64 si_times[NR_SOFTIRQS], delta[NR_SOFTIRQS];
+       unsigned i;
+
+       if (unlikely(!ksoftirqd))
+               return;
+
+       softirq_time_read(rq_cpu, si_times);
+
+       for (i = 0; i < NR_SOFTIRQS; i++) {
+               delta[i] = si_times[i] - cfs_rq->prev_si_time[i];
+               cfs_rq->prev_si_time[i] = si_times[i];
+               if (unlikely((s64)delta[i] < 0))
+                       delta[i] = 0;
+       }
+
+       for (i = 0; i < nr_softirq_groups; i++) {
+               unsigned j, softirq = 0, group_mask = group_softirqs(i);
+               struct task_struct *tsk = *this_cpu_ptr(ksoftirqd[i]);
+               u64 sum_delta = 0;
+
+               while ((j = ffs(group_mask))) {
+                       softirq += j - 1;
+                       group_mask >>= j;
+                       sum_delta += delta[softirq];
+               }
+
+               tsk->se.sum_exec_runtime += sum_delta;
+               tsk->se.vruntime += calc_delta_fair(sum_delta, &tsk->se);
+       }
+#endif
+}
+
 /*
  * Update the current task's runtime statistics.
  */
@@ -822,6 +858,8 @@ static void update_curr(struct cfs_rq *cfs_rq)
        u64 now = rq_clock_task(rq_of(cfs_rq));
        u64 delta_exec;
 
+       update_ksoftirqd(cfs_rq);
+
        if (unlikely(!curr))
                return;
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 14e154c86dc5..e95d8d4f9146 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -487,6 +487,10 @@ struct cfs_rq {
        struct list_head leaf_cfs_rq_list;
        struct task_group *tg;  /* group that "owns" this runqueue */
 
+#ifdef CONFIG_FAIR_SOFTIRQ_SCHEDULE
+       u64 prev_si_time[NR_SOFTIRQS];
+#endif
+
 #ifdef CONFIG_CFS_BANDWIDTH
        int runtime_enabled;
        u64 runtime_expires;
@@ -2081,6 +2085,21 @@ static inline u64 irq_time_read(int cpu)
 }
 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
 
+static inline void softirq_time_read(int cpu, u64 si_times[NR_SOFTIRQS])
+{
+#ifdef CONFIG_FAIR_SOFTIRQ_SCHEDULE
+       struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
+       unsigned int seq, i;
+
+       for (i = 0; i < NR_SOFTIRQS; i++) {
+               do {
+                       seq = __u64_stats_fetch_begin(&irqtime->sync);
+                       si_times[i] = irqtime->total_si[i];
+               } while (__u64_stats_fetch_retry(&irqtime->sync, seq));
+       }
+#endif
+}
+
 #ifdef CONFIG_CPU_FREQ
 DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 516e31d3d5b4..a123bafa11c2 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -82,6 +82,11 @@ bool servicing_softirq(unsigned nr)
        return false;
 }
 
+unsigned group_softirqs(unsigned nr)
+{
+       return group_to_softirqs[nr];
+}
+
 /*
  * we cannot loop indefinitely here to avoid userspace starvation,
  * but we also don't want to introduce a worst case 1/HZ latency
@@ -112,15 +117,10 @@ static void wakeup_softirqd(u32 softirq_mask)
  * If ksoftirqd is scheduled, we do not want to process pending softirqs
  * right now. Let ksoftirqd handle this at its own rate, to get fairness.
  */
-static bool ksoftirqd_running(void)
+static bool ksoftirqd_running(__u32 pending)
 {
-       /* We rely that there are pending softirqs */
-       __u32 pending = local_softirq_pending();
        unsigned i;
 
-       if (!ksoftirqd)
-               return false;
-
        for (i = 0; i < nr_softirq_groups && pending; i++) {
                /* Interrupts are disabled: no need to stop preemption */
                struct task_struct *tsk = *this_cpu_ptr(ksoftirqd[i]);
@@ -137,6 +137,33 @@ static bool ksoftirqd_running(void)
        return !pending;
 }
 
+static __u32 softirqs_to_serve(__u32 pending)
+{
+       unsigned i;
+       __u32 unserve = pending;
+
+       if (!ksoftirqd || !current || is_idle_task(current))
+               return pending;
+
+       if (!IS_ENABLED(CONFIG_FAIR_SOFTIRQ_SCHEDULE))
+               return ksoftirqd_running(pending) ? 0 : pending;
+
+       for (i = 0; i < nr_softirq_groups && unserve; i++) {
+               /* Interrupts are disabled: no need to stop preemption */
+               struct task_struct *tsk = *this_cpu_ptr(ksoftirqd[i]);
+
+               if (tsk && (s64)(current->se.vruntime - tsk->se.vruntime) < 0) {
+                       if (tsk->state != TASK_RUNNING)
+                               wake_up_process(tsk);
+                       continue;
+               }
+
+               unserve &= ~group_to_softirqs[i];
+       }
+
+       return pending & ~unserve;
+}
+
 /*
  * preempt_count and SOFTIRQ_OFFSET usage:
  * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
@@ -385,7 +412,8 @@ asmlinkage __visible void do_softirq(void)
 
        local_irq_save(flags);
 
-       if (!ksoftirqd_running())
+       pending = softirqs_to_serve(pending);
+       if (pending)
                do_softirq_own_stack(pending);
 
        local_irq_restore(flags);
@@ -414,7 +442,8 @@ static inline void invoke_softirq(void)
 {
        __u32 pending = local_softirq_pending();
 
-       if (!pending || !ksoftirqd_running())
+       pending = softirqs_to_serve(pending);
+       if (!pending)
                return;
 
        if (!force_irqthreads) {
-- 
2.13.6

[RFC 6/6] softirq/sched: Account si cpu time to ksoftirqd(s)

Reply via email to