IRQ time entry is currently accounted before HARDIRQ_OFFSET or SOFTIRQ_OFFSET are incremented. This is convenient to decide to which index the cputime to account is dispatched.
Unfortunately it prevents tick_irq_enter() from being called under HARDIRQ_OFFSET because tick_irq_enter() has to be called before the IRQ entry accounting due to the necessary clock catch up. As a result we don't benefit from appropriate lockdep coverage on tick_irq_enter(). To prepare for fixing this, move the IRQ entry cputime accounting after the preempt offset is incremented. This requires the cputime dispatch code to handle the extra offset. Signed-off-by: Frederic Weisbecker <frede...@kernel.org> Cc: Peter Zijlstra <pet...@infradead.org> Cc: Tony Luck <tony.l...@intel.com> Cc: Fenghua Yu <fenghua...@intel.com> Cc: Michael Ellerman <m...@ellerman.id.au> Cc: Benjamin Herrenschmidt <b...@kernel.crashing.org> Cc: Paul Mackerras <pau...@samba.org> Cc: Heiko Carstens <h...@linux.ibm.com> Cc: Vasily Gorbik <g...@linux.ibm.com> Cc: Christian Borntraeger <borntrae...@de.ibm.com> --- include/linux/hardirq.h | 4 +-- include/linux/vtime.h | 10 +++++--- kernel/sched/cputime.c | 56 ++++++++++++++++++++++++++++++----------- kernel/softirq.c | 2 +- 4 files changed, 51 insertions(+), 21 deletions(-) diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 754f67ac4326..02499c10fbf7 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -32,9 +32,9 @@ static __always_inline void rcu_irq_enter_check_tick(void) */ #define __irq_enter() \ do { \ + preempt_count_add(HARDIRQ_OFFSET); \ + lockdep_hardirq_enter(); \ account_irq_enter_time(current); \ - preempt_count_add(HARDIRQ_OFFSET); \ - lockdep_hardirq_enter(); \ } while (0) /* diff --git a/include/linux/vtime.h b/include/linux/vtime.h index f827b38c3bb7..cad8ff530273 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -96,21 +96,23 @@ static inline void vtime_flush(struct task_struct *tsk) { } #ifdef CONFIG_IRQ_TIME_ACCOUNTING -extern void irqtime_account_irq(struct task_struct *tsk); +extern void irqtime_account_enter(struct task_struct *tsk); +extern void irqtime_account_exit(struct task_struct *tsk); #else -static inline void irqtime_account_irq(struct task_struct *tsk) { } +static inline void irqtime_account_enter(struct task_struct *tsk) { } +static inline void irqtime_account_exit(struct task_struct *tsk) { } #endif static inline void account_irq_enter_time(struct task_struct *tsk) { vtime_account_irq_enter(tsk); - irqtime_account_irq(tsk); + irqtime_account_enter(tsk); } static inline void account_irq_exit_time(struct task_struct *tsk) { vtime_account_irq_exit(tsk); - irqtime_account_irq(tsk); + irqtime_account_exit(tsk); } #endif /* _LINUX_KERNEL_VTIME_H */ diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 6fa81cc33fec..82623d97667c 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -43,23 +43,49 @@ static void irqtime_account_delta(struct irqtime *irqtime, u64 delta, u64_stats_update_end(&irqtime->sync); } -/* - * Called before incrementing preempt_count on {soft,}irq_enter - * and before decrementing preempt_count on {soft,}irq_exit. - */ -void irqtime_account_irq(struct task_struct *curr) +static s64 irqtime_get_delta(struct irqtime *irqtime) { - struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); + int cpu = smp_processor_id(); s64 delta; - int cpu; - if (!sched_clock_irqtime) - return; - - cpu = smp_processor_id(); delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; irqtime->irq_start_time += delta; + return delta; +} + +/* Called after incrementing preempt_count on {soft,}irq_enter */ +void irqtime_account_enter(struct task_struct *curr) +{ + struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); + u64 delta; + + if (!sched_clock_irqtime) + return; + + delta = irqtime_get_delta(irqtime); + /* + * We do not account for softirq time from ksoftirqd here. + * We want to continue accounting softirq time to ksoftirqd thread + * in that case, so as not to confuse scheduler with a special task + * that do not consume any time, but still wants to run. + */ + if ((irq_count() == (SOFTIRQ_OFFSET | HARDIRQ_OFFSET)) && + curr != this_cpu_ksoftirqd()) + irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ); +} +EXPORT_SYMBOL_GPL(irqtime_account_enter); + +/* Called before decrementing preempt_count on {soft,}irq_exit */ +void irqtime_account_exit(struct task_struct *curr) +{ + struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); + u64 delta; + + if (!sched_clock_irqtime) + return; + + delta = irqtime_get_delta(irqtime); /* * We do not account for softirq time from ksoftirqd here. * We want to continue accounting softirq time to ksoftirqd thread @@ -71,7 +97,7 @@ void irqtime_account_irq(struct task_struct *curr) else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ); } -EXPORT_SYMBOL_GPL(irqtime_account_irq); +EXPORT_SYMBOL_GPL(irqtime_account_exit); static u64 irqtime_tick_accounted(u64 maxtime) { @@ -428,9 +454,11 @@ void vtime_task_switch(struct task_struct *prev) */ void vtime_account_irq_enter(struct task_struct *tsk) { - if (hardirq_count()) { + WARN_ON_ONCE(in_task()); + + if (hardirq_count() > HARDIRQ_OFFSET) { vtime_account_hardirq(tsk); - } else if (in_serving_softirq()) { + } else if (hardirq_count() && in_serving_softirq()) { vtime_account_softirq(tsk); } else if (is_idle_task(tsk)) { vtime_account_idle(tsk); diff --git a/kernel/softirq.c b/kernel/softirq.c index 617009ccd82c..24254c41bb7c 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -315,9 +315,9 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) current->flags &= ~PF_MEMALLOC; pending = local_softirq_pending(); - account_irq_enter_time(current); __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); + account_irq_enter_time(current); in_hardirq = lockdep_softirq_start(); restart: -- 2.25.1