Userland threads are preempt()'d when hogging a CPU or when processing
an AST.  Currently when such a thread is preempted the scheduler looks
for an idle CPU and puts it on its run queue.  That means the number of
involuntary context switch often result in a migration. 

This is not a problem per se and one could argue that if another CPU
is idle it makes sense to move.  However with the KERNEL_LOCK() moving
to another CPU won't necessarily allows the preempt()'d thread to run.
It's even worse, it increases contention.

If you add to this behavior the fact that sched_choosecpu() prefers idle
CPUs in a linear order, meaning CPU0 > CPU1 > .. > CPUN, you'll
understand that the set of idle CPUs will change every time preempt() is
called.

I believe this behavior affects kernel threads by side effect, since
the set of idle CPU changes every time a thread is preempted.  With this
diff the 'softnet' thread didn't move on a 2 CPUs machine during simple
benchmarks.  Without, it plays ping-pong between CPU.

The goal of this diff is to reduce the number of migrations.  You
can compare the value of 'sched_nomigrations' and 'sched_nmigrations'
with and without it.

As usual, I'd like to know what's the impact of this diff on your
favorite benchmark.  Please test and report back.

Index: kern/kern_sched.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sched.c,v
retrieving revision 1.44
diff -u -p -r1.44 kern_sched.c
--- kern/kern_sched.c   21 Jan 2017 05:42:03 -0000      1.44
+++ kern/kern_sched.c   24 Jan 2017 03:08:23 -0000
@@ -51,6 +51,8 @@ uint64_t sched_noidle;                /* Times we didn
 uint64_t sched_stolen;         /* Times we stole proc from other cpus */
 uint64_t sched_choose;         /* Times we chose a cpu */
 uint64_t sched_wasidle;                /* Times we came out of idle */
+uint64_t sched_nvcsw;          /* voluntary context switches */
+uint64_t sched_nivcsw;         /* involuntary context switches */
 
 #ifdef MULTIPROCESSOR
 struct taskq *sbartq;
Index: kern/kern_synch.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_synch.c,v
retrieving revision 1.136
diff -u -p -r1.136 kern_synch.c
--- kern/kern_synch.c   21 Jan 2017 05:42:03 -0000      1.136
+++ kern/kern_synch.c   24 Jan 2017 03:08:23 -0000
@@ -296,6 +296,7 @@ sleep_finish(struct sleep_state *sls, in
        if (sls->sls_do_sleep && do_sleep) {
                p->p_stat = SSLEEP;
                p->p_ru.ru_nvcsw++;
+               sched_nvcsw++;
                SCHED_ASSERT_LOCKED();
                mi_switch();
        } else if (!do_sleep) {
@@ -481,6 +482,7 @@ sys_sched_yield(struct proc *p, void *v,
        p->p_stat = SRUN;
        setrunqueue(p);
        p->p_ru.ru_nvcsw++;
+       sched_nvcsw++;
        mi_switch();
        SCHED_UNLOCK(s);
 
Index: kern/sched_bsd.c
===================================================================
RCS file: /cvs/src/sys/kern/sched_bsd.c,v
retrieving revision 1.43
diff -u -p -r1.43 sched_bsd.c
--- kern/sched_bsd.c    9 Mar 2016 13:38:50 -0000       1.43
+++ kern/sched_bsd.c    24 Jan 2017 03:18:24 -0000
@@ -302,6 +302,7 @@ yield(void)
        p->p_stat = SRUN;
        setrunqueue(p);
        p->p_ru.ru_nvcsw++;
+       sched_nvcsw++;
        mi_switch();
        SCHED_UNLOCK(s);
 }
@@ -327,9 +328,12 @@ preempt(struct proc *newp)
        SCHED_LOCK(s);
        p->p_priority = p->p_usrpri;
        p->p_stat = SRUN;
+#if 0
        p->p_cpu = sched_choosecpu(p);
+#endif
        setrunqueue(p);
        p->p_ru.ru_nivcsw++;
+       sched_nivcsw++;
        mi_switch();
        SCHED_UNLOCK(s);
 }
Index: sys/sched.h
===================================================================
RCS file: /cvs/src/sys/sys/sched.h,v
retrieving revision 1.41
diff -u -p -r1.41 sched.h
--- sys/sched.h 17 Mar 2016 13:18:47 -0000      1.41
+++ sys/sched.h 24 Jan 2017 02:10:41 -0000
@@ -134,6 +134,9 @@ struct schedstate_percpu {
 extern int schedhz;                    /* ideally: 16 */
 extern int rrticks_init;               /* ticks per roundrobin() */
 
+extern uint64_t sched_nvcsw;           /* voluntary context switches */
+extern uint64_t sched_nivcsw;          /* involuntary context switches */
+
 struct proc;
 void schedclock(struct proc *);
 struct cpu_info;

Reply via email to