Commit-ID:  ff9a9b4c4334b53b52ee9279f30bd5dd92ea9bdd
Gitweb:     http://git.kernel.org/tip/ff9a9b4c4334b53b52ee9279f30bd5dd92ea9bdd
Author:     Rik van Riel <r...@redhat.com>
AuthorDate: Wed, 10 Feb 2016 20:08:27 -0500
Committer:  Ingo Molnar <mi...@kernel.org>
CommitDate: Mon, 29 Feb 2016 09:53:10 +0100

sched, time: Switch VIRT_CPU_ACCOUNTING_GEN to jiffy granularity

When profiling syscall overhead on nohz-full kernels,
after removing __acct_update_integrals() from the profile,
native_sched_clock() remains as the top CPU user. This can be
reduced by moving VIRT_CPU_ACCOUNTING_GEN to jiffy granularity.

This will reduce timing accuracy on nohz_full CPUs to jiffy
based sampling, just like on normal CPUs. It results in
totally removing native_sched_clock from the profile, and
significantly speeding up the syscall entry and exit path,
as well as irq entry and exit, and KVM guest entry & exit.

Additionally, only call the more expensive functions (and
advance the seqlock) when jiffies actually changed.

This code relies on another CPU advancing jiffies when the
system is busy. On a nohz_full system, this is done by a
housekeeping CPU.

A microbenchmark calling an invalid syscall number 10 million
times in a row speeds up an additional 30% over the numbers
with just the previous patches, for a total speedup of about
40% over 4.4 and 4.5-rc1.

Run times for the microbenchmark:

 4.4                            3.8 seconds
 4.5-rc1                        3.7 seconds
 4.5-rc1 + first patch          3.3 seconds
 4.5-rc1 + first 3 patches      3.1 seconds
 4.5-rc1 + all patches          2.3 seconds

A non-NOHZ_FULL cpu (not the housekeeping CPU):

 all kernels                    1.86 seconds

Signed-off-by: Rik van Riel <r...@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
Reviewed-by: Thomas Gleixner <t...@linutronix.de>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Mike Galbraith <efa...@gmx.de>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: cl...@redhat.com
Cc: eric.duma...@gmail.com
Cc: fweis...@gmail.com
Cc: l...@amacapital.net
Link: http://lkml.kernel.org/r/1455152907-18495-5-git-send-email-r...@redhat.com
Signed-off-by: Ingo Molnar <mi...@kernel.org>
---
 kernel/sched/cputime.c | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index b2ab2ff..01d9898 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -668,26 +668,25 @@ void thread_group_cputime_adjusted(struct task_struct *p, 
cputime_t *ut, cputime
 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
-static unsigned long long vtime_delta(struct task_struct *tsk)
+static cputime_t vtime_delta(struct task_struct *tsk)
 {
-       unsigned long long clock;
+       unsigned long now = READ_ONCE(jiffies);
 
-       clock = local_clock();
-       if (clock < tsk->vtime_snap)
+       if (time_before(now, (unsigned long)tsk->vtime_snap))
                return 0;
 
-       return clock - tsk->vtime_snap;
+       return jiffies_to_cputime(now - tsk->vtime_snap);
 }
 
 static cputime_t get_vtime_delta(struct task_struct *tsk)
 {
-       unsigned long long delta = vtime_delta(tsk);
+       unsigned long now = READ_ONCE(jiffies);
+       unsigned long delta = now - tsk->vtime_snap;
 
        WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
-       tsk->vtime_snap += delta;
+       tsk->vtime_snap = now;
 
-       /* CHECKME: always safe to convert nsecs to cputime? */
-       return nsecs_to_cputime(delta);
+       return jiffies_to_cputime(delta);
 }
 
 static void __vtime_account_system(struct task_struct *tsk)
@@ -699,6 +698,9 @@ static void __vtime_account_system(struct task_struct *tsk)
 
 void vtime_account_system(struct task_struct *tsk)
 {
+       if (!vtime_delta(tsk))
+               return;
+
        write_seqcount_begin(&tsk->vtime_seqcount);
        __vtime_account_system(tsk);
        write_seqcount_end(&tsk->vtime_seqcount);
@@ -707,7 +709,8 @@ void vtime_account_system(struct task_struct *tsk)
 void vtime_gen_account_irq_exit(struct task_struct *tsk)
 {
        write_seqcount_begin(&tsk->vtime_seqcount);
-       __vtime_account_system(tsk);
+       if (vtime_delta(tsk))
+               __vtime_account_system(tsk);
        if (context_tracking_in_user())
                tsk->vtime_snap_whence = VTIME_USER;
        write_seqcount_end(&tsk->vtime_seqcount);
@@ -718,16 +721,19 @@ void vtime_account_user(struct task_struct *tsk)
        cputime_t delta_cpu;
 
        write_seqcount_begin(&tsk->vtime_seqcount);
-       delta_cpu = get_vtime_delta(tsk);
        tsk->vtime_snap_whence = VTIME_SYS;
-       account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
+       if (vtime_delta(tsk)) {
+               delta_cpu = get_vtime_delta(tsk);
+               account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
+       }
        write_seqcount_end(&tsk->vtime_seqcount);
 }
 
 void vtime_user_enter(struct task_struct *tsk)
 {
        write_seqcount_begin(&tsk->vtime_seqcount);
-       __vtime_account_system(tsk);
+       if (vtime_delta(tsk))
+               __vtime_account_system(tsk);
        tsk->vtime_snap_whence = VTIME_USER;
        write_seqcount_end(&tsk->vtime_seqcount);
 }
@@ -742,7 +748,8 @@ void vtime_guest_enter(struct task_struct *tsk)
         * that can thus safely catch up with a tickless delta.
         */
        write_seqcount_begin(&tsk->vtime_seqcount);
-       __vtime_account_system(tsk);
+       if (vtime_delta(tsk))
+               __vtime_account_system(tsk);
        current->flags |= PF_VCPU;
        write_seqcount_end(&tsk->vtime_seqcount);
 }
@@ -772,7 +779,7 @@ void arch_vtime_task_switch(struct task_struct *prev)
 
        write_seqcount_begin(&current->vtime_seqcount);
        current->vtime_snap_whence = VTIME_SYS;
-       current->vtime_snap = sched_clock_cpu(smp_processor_id());
+       current->vtime_snap = jiffies;
        write_seqcount_end(&current->vtime_seqcount);
 }
 
@@ -783,7 +790,7 @@ void vtime_init_idle(struct task_struct *t, int cpu)
        local_irq_save(flags);
        write_seqcount_begin(&t->vtime_seqcount);
        t->vtime_snap_whence = VTIME_SYS;
-       t->vtime_snap = sched_clock_cpu(cpu);
+       t->vtime_snap = jiffies;
        write_seqcount_end(&t->vtime_seqcount);
        local_irq_restore(flags);
 }

Reply via email to