Please, don't try this diff blindly it won't make your machine faster.

In the past months I've been looking more closely at our scheduler.
At p2k16 I've shown to a handful of developers that when running a
browser on my x220 with HT enable, a typical desktop usage, the per-
CPU runqueues were never balanced.  You often have no job on a CPU
and multiple on the others.

Currently when a CPU doesn't have any job on its runqueue it tries
to "steal" a job from another CPU's runqueue.  If look at the stats
on my machine running a lot of threaded apps (GNOME3, Thunderbird,
Firefox, Chrome), here's what I get:

# pstat -d ld sched_stolen sched_choose sched_wasidle
        sched_stolen: 1665846
        sched_choose: 3195615
        sched_wasidle: 1309253

For 32K jobs dispatched, 16K got stolen.  That's 50% of the jobs on
my machine and this ratio is stable for my usage.

On my test machine, an Atom with HT, I got the following number:

- after boot:
        sched_stolen: 570
        sched_choose: 10450
        sched_wasidle: 8936

- after playing a video on youtube w/ firefox:
        sched_stolen: 2153754
        sched_choose: 10261682
        sched_wasidle: 1525801

- after playing a video on youtube w/ chromium (after reboot):
        sched_stolen: 310000
        sched_choose: 6470258
        sched_wasidle: 934772

What's interesting here is that threaded apps (like firefox) seems to
trigger more "stealing".  It would be interesting to see if/how this
is related to the yield-busy-wait triggered by librthread's thrsleep()
usage explained some months ago.

What's also interesting is that the number of stolen jobs seems to
be higher if your number of CPU is higher. Elementary, My Dear Watson?
I observed that for the same workload, playing a HD video in firefox
while compiling a kernel with make -j4, I have 50% have stolen jobs
with 4 CPUs and 20% with 2 CPUs.  Sadly I don't have a bigger machine
to test.  How bad can it be?

So I looked at how this situation could be improved.  My goal was to
be able to compile a kernel while watching a video in my browser without
having my audio slutter.  I started by removing the "stealing" logic but
the situation didn't improve.  Then I tried to play with the calculation
of the cost and failed.  Then I decided to remove completely the per-CPU
runqueues and came up with the diff below...

There's too many things that I still don't understand so I'm not asking
for ok, but I'd appreciate if people could test this diff and report back.
My goal is currently to get a better understanding of our scheduler to
hopefully improve it.

By using a single runqueue I prioritise latency over throughput.  That
means your performance might degrade, but at least I can watch my HD
video while doing a "make -j4".

As a bonus, the diff below also greatly reduces the number of IPIs on my
systems.

Index: sys/sched.h
===================================================================
RCS file: /cvs/src/sys/sys/sched.h,v
retrieving revision 1.41
diff -u -p -r1.41 sched.h
--- sys/sched.h 17 Mar 2016 13:18:47 -0000      1.41
+++ sys/sched.h 6 Jul 2016 17:31:11 -0000
@@ -89,9 +89,10 @@
 
 #define        SCHED_NQS       32                      /* 32 run queues. */
 
+#ifdef _KERNEL
+
 /*
  * Per-CPU scheduler state.
- * XXX - expose to userland for now.
  */
 struct schedstate_percpu {
        struct timespec spc_runtime;    /* time curproc started running */
@@ -102,23 +103,16 @@ struct schedstate_percpu {
        int spc_rrticks;                /* ticks until roundrobin() */
        int spc_pscnt;                  /* prof/stat counter */
        int spc_psdiv;                  /* prof/stat divisor */ 
+       unsigned int spc_npeg;          /* nb. of pegged threads on runqueue */
        struct proc *spc_idleproc;      /* idle proc for this cpu */
 
-       u_int spc_nrun;                 /* procs on the run queues */
        fixpt_t spc_ldavg;              /* shortest load avg. for this cpu */
 
-       TAILQ_HEAD(prochead, proc) spc_qs[SCHED_NQS];
-       volatile uint32_t spc_whichqs;
-
-#ifdef notyet
-       struct proc *spc_reaper;        /* dead proc reaper */
-#endif
        LIST_HEAD(,proc) spc_deadproc;
 
        volatile int spc_barrier;       /* for sched_barrier() */
 };
 
-#ifdef _KERNEL
 
 /* spc_flags */
 #define SPCF_SEENRR             0x0001  /* process has seen roundrobin() */
@@ -141,14 +135,13 @@ void roundrobin(struct cpu_info *);
 void scheduler_start(void);
 void userret(struct proc *p);
 
+void sched_init(void);
 void sched_init_cpu(struct cpu_info *);
 void sched_idle(void *);
 void sched_exit(struct proc *);
 void mi_switch(void);
 void cpu_switchto(struct proc *, struct proc *);
 struct proc *sched_chooseproc(void);
-struct cpu_info *sched_choosecpu(struct proc *);
-struct cpu_info *sched_choosecpu_fork(struct proc *parent, int);
 void cpu_idle_enter(void);
 void cpu_idle_cycle(void);
 void cpu_idle_leave(void);
@@ -163,11 +156,11 @@ void sched_start_secondary_cpus(void);
 void sched_stop_secondary_cpus(void);
 #endif
 
-#define cpu_is_idle(ci)        ((ci)->ci_schedstate.spc_whichqs == 0)
-
-void sched_init_runqueues(void);
 void setrunqueue(struct proc *);
 void remrunqueue(struct proc *);
+
+extern volatile uint32_t sched_whichqs;
+#define sched_qs_empty(ci)     (sched_whichqs == 0)
 
 /* Inherit the parent's scheduler history */
 #define scheduler_fork_hook(parent, child) do {                                
\
Index: kern/sched_bsd.c
===================================================================
RCS file: /cvs/src/sys/kern/sched_bsd.c,v
retrieving revision 1.43
diff -u -p -r1.43 sched_bsd.c
--- kern/sched_bsd.c    9 Mar 2016 13:38:50 -0000       1.43
+++ kern/sched_bsd.c    6 Jul 2016 17:31:11 -0000
@@ -105,7 +105,7 @@ roundrobin(struct cpu_info *ci)
                }
        }
 
-       if (spc->spc_nrun)
+       if (!sched_qs_empty(ci))
                need_resched(ci);
 }
 
@@ -300,6 +300,7 @@ yield(void)
        SCHED_LOCK(s);
        p->p_priority = p->p_usrpri;
        p->p_stat = SRUN;
+       KASSERT(p->p_cpu != NULL);
        setrunqueue(p);
        p->p_ru.ru_nvcsw++;
        mi_switch();
@@ -327,7 +328,7 @@ preempt(struct proc *newp)
        SCHED_LOCK(s);
        p->p_priority = p->p_usrpri;
        p->p_stat = SRUN;
-       p->p_cpu = sched_choosecpu(p);
+       KASSERT(p->p_cpu != NULL);
        setrunqueue(p);
        p->p_ru.ru_nivcsw++;
        mi_switch();
@@ -418,6 +419,7 @@ mi_switch(void)
        }
 
        clear_resched(curcpu());
+       spc->spc_curpriority = p->p_usrpri;
 
        SCHED_ASSERT_LOCKED();
 
@@ -454,25 +456,15 @@ mi_switch(void)
 #endif
 }
 
-static __inline void
+/*
+ * If the last CPU of thread ``p'' is currently running a lower
+ * priority thread, force a reschedule.
+ */
+static inline void
 resched_proc(struct proc *p, u_char pri)
 {
-       struct cpu_info *ci;
+       struct cpu_info *ci = p->p_cpu;
 
-       /*
-        * XXXSMP
-        * This does not handle the case where its last
-        * CPU is running a higher-priority process, but every
-        * other CPU is running a lower-priority process.  There
-        * are ways to handle this situation, but they're not
-        * currently very pretty, and we also need to weigh the
-        * cost of moving a process from one CPU to another.
-        *
-        * XXXSMP
-        * There is also the issue of locking the other CPU's
-        * sched state, which we currently do not do.
-        */
-       ci = (p->p_cpu != NULL) ? p->p_cpu : curcpu();
        if (pri < ci->ci_schedstate.spc_curpriority)
                need_resched(ci);
 }
@@ -507,7 +499,7 @@ setrunnable(struct proc *p)
                break;
        }
        p->p_stat = SRUN;
-       p->p_cpu = sched_choosecpu(p);
+       KASSERT(p->p_cpu != NULL);
        setrunqueue(p);
        if (p->p_slptime > 1)
                updatepri(p);
Index: kern/kern_synch.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_synch.c,v
retrieving revision 1.132
diff -u -p -r1.132 kern_synch.c
--- kern/kern_synch.c   4 Jul 2016 16:12:52 -0000       1.132
+++ kern/kern_synch.c   6 Jul 2016 17:31:11 -0000
@@ -266,6 +266,7 @@ sleep_finish(struct sleep_state *sls, in
                mi_switch();
        } else if (!do_sleep) {
                unsleep(p);
+               p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
        }
 
 #ifdef DIAGNOSTIC
@@ -273,7 +274,6 @@ sleep_finish(struct sleep_state *sls, in
                panic("sleep_finish !SONPROC");
 #endif
 
-       p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
        SCHED_UNLOCK(sls->sls_s);
 
        /*
Index: kern/kern_sched.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sched.c,v
retrieving revision 1.43
diff -u -p -r1.43 kern_sched.c
--- kern/kern_sched.c   3 Jun 2016 15:21:23 -0000       1.43
+++ kern/kern_sched.c   6 Jul 2016 17:31:11 -0000
@@ -26,36 +26,37 @@
 #include <sys/mutex.h>
 #include <sys/task.h>
 
-#include <uvm/uvm_extern.h>
+TAILQ_HEAD(, proc)             sched_qs[SCHED_NQS];
+volatile uint32_t              sched_whichqs;
 
-void sched_kthreads_create(void *);
+#ifdef MULTIPROCESSOR
+struct taskq *sbartq;
+#endif
 
-int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
-struct proc *sched_steal_proc(struct cpu_info *);
+struct proc *sched_select(struct cpu_info *);
+void sched_kthreads_create(void *);
 
-/*
- * To help choosing which cpu should run which process we keep track
- * of cpus which are currently idle and which cpus have processes
- * queued.
- */
-struct cpuset sched_idle_cpus;
-struct cpuset sched_queued_cpus;
-struct cpuset sched_all_cpus;
+void
+sched_init(void)
+{
+       struct cpu_info *ci = curcpu();
+       int i;
 
-/*
- * Some general scheduler counters.
- */
-uint64_t sched_nmigrations;    /* Cpu migration counter */
-uint64_t sched_nomigrations;   /* Cpu no migration counter */
-uint64_t sched_noidle;         /* Times we didn't pick the idle task */
-uint64_t sched_stolen;         /* Times we stole proc from other cpus */
-uint64_t sched_choose;         /* Times we chose a cpu */
-uint64_t sched_wasidle;                /* Times we came out of idle */
+       for (i = 0; i < SCHED_NQS; i++)
+               TAILQ_INIT(&sched_qs[i]);
+       sched_whichqs = 0;
 
 #ifdef MULTIPROCESSOR
-struct taskq *sbartq;
+       sbartq = taskq_create("sbar", 1, IPL_NONE,
+           TASKQ_MPSAFE | TASKQ_CANTSLEEP);
+       if (sbartq == NULL)
+               panic("unable to create sbar taskq");
 #endif
 
+       ci->ci_randseed = (arc4random() & 0x7fffffff) + 1;
+       sched_init_cpu(ci);
+}
+
 /*
  * A few notes about cpu_switchto that is implemented in MD code.
  *
@@ -74,30 +75,18 @@ struct taskq *sbartq;
  */
 
 /*
- * sched_init_cpu is called from main() for the boot cpu, then it's the
- * responsibility of the MD code to call it for all other cpus.
+ * sched_init_cpu is called from sched_init() for the boot cpu, then
+ * it's the responsibility of the MD code to call it for all other cpus.
  */
 void
 sched_init_cpu(struct cpu_info *ci)
 {
        struct schedstate_percpu *spc = &ci->ci_schedstate;
-       int i;
-
-       for (i = 0; i < SCHED_NQS; i++)
-               TAILQ_INIT(&spc->spc_qs[i]);
 
        spc->spc_idleproc = NULL;
-
-       kthread_create_deferred(sched_kthreads_create, ci);
-
        LIST_INIT(&spc->spc_deadproc);
 
-       /*
-        * Slight hack here until the cpuset code handles cpu_info
-        * structures.
-        */
-       cpuset_init_cpu(ci);
-       cpuset_add(&sched_all_cpus, ci);
+       kthread_create_deferred(sched_kthreads_create, ci);
 }
 
 void
@@ -115,10 +104,46 @@ sched_kthreads_create(void *v)
        /* Name it as specified. */
        snprintf(spc->spc_idleproc->p_comm, sizeof(spc->spc_idleproc->p_comm),
            "idle%d", num);
+       /* Always triggers a reschedule when an idle thread is running. */
+       spc->spc_idleproc->p_usrpri = MAXPRI;
 
        num++;
 }
 
+/*
+ * Returns 1 if a CPU can idle, 0 otherwise.
+ */
+static inline int
+can_idle(struct cpu_info *ci)
+{
+#ifdef MULTIPROCESSOR
+       struct schedstate_percpu *spc = &ci->ci_schedstate;
+#endif /* MULTIPROCESSOR */
+
+       /*
+        * As soon as a wakeup() or roundrobin() called need_resched()
+        * for this CPU, it has to go through mi_switch() to clear the
+        * resched flag.
+        *
+        * Yes, it is racy as the thread that triggered the reschedule
+        * might already be executing on another CPU.  In this case,
+        * if there's nothing else on the runqueue, this CPU will come
+        * back in its idle loop.
+        */
+       if (want_resched(ci))
+               return (0);
+
+       if (sched_qs_empty(ci))
+               return (1);
+
+#ifdef MULTIPROCESSOR
+       if ((spc->spc_schedflags & SPCF_SHOULDHALT) && (spc->spc_npeg == 0))
+               return (1);
+#endif /* MULTIPROCESSOR */
+
+       return (0);
+}
+
 void
 sched_idle(void *v)
 {
@@ -136,19 +161,17 @@ sched_idle(void *v)
         * just go away for a while.
         */
        SCHED_LOCK(s);
-       cpuset_add(&sched_idle_cpus, ci);
        p->p_stat = SSLEEP;
        p->p_cpu = ci;
        atomic_setbits_int(&p->p_flag, P_CPUPEG);
        mi_switch();
-       cpuset_del(&sched_idle_cpus, ci);
        SCHED_UNLOCK(s);
 
        KASSERT(ci == curcpu());
        KASSERT(curproc == spc->spc_idleproc);
 
        while (1) {
-               while (!cpu_is_idle(curcpu())) {
+               while (!can_idle(ci)) {
                        struct proc *dead;
 
                        SCHED_LOCK(s);
@@ -164,24 +187,20 @@ sched_idle(void *v)
 
                splassert(IPL_NONE);
 
-               cpuset_add(&sched_idle_cpus, ci);
                cpu_idle_enter();
-               while (spc->spc_whichqs == 0) {
+               while (!want_resched(ci)) {
 #ifdef MULTIPROCESSOR
                        if (spc->spc_schedflags & SPCF_SHOULDHALT &&
                            (spc->spc_schedflags & SPCF_HALTED) == 0) {
-                               cpuset_del(&sched_idle_cpus, ci);
-                               SCHED_LOCK(s);
+                               KASSERT(spc->spc_npeg == 0);
                                atomic_setbits_int(&spc->spc_schedflags,
-                                   spc->spc_whichqs ? 0 : SPCF_HALTED);
-                               SCHED_UNLOCK(s);
+                                   SPCF_HALTED);
                                wakeup(spc);
                        }
-#endif
+#endif /* MULTIPROCESSOR */
                        cpu_idle_cycle();
                }
                cpu_idle_leave();
-               cpuset_del(&sched_idle_cpus, ci);
        }
 }
 
@@ -216,100 +235,94 @@ sched_exit(struct proc *p)
        SCHED_LOCK(s);
        idle = spc->spc_idleproc;
        idle->p_stat = SRUN;
+       idle->p_cpu = curcpu();
        cpu_switchto(NULL, idle);
        panic("cpu_switchto returned");
 }
 
-/*
- * Run queue management.
- */
-void
-sched_init_runqueues(void)
-{
-#ifdef MULTIPROCESSOR
-       sbartq = taskq_create("sbar", 1, IPL_NONE,
-           TASKQ_MPSAFE | TASKQ_CANTSLEEP);
-       if (sbartq == NULL)
-               panic("unable to create sbar taskq");
-#endif
-}
-
 void
 setrunqueue(struct proc *p)
 {
-       struct schedstate_percpu *spc;
        int queue = p->p_priority >> 2;
 
        SCHED_ASSERT_LOCKED();
-       spc = &p->p_cpu->ci_schedstate;
-       spc->spc_nrun++;
 
-       TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq);
-       spc->spc_whichqs |= (1 << queue);
-       cpuset_add(&sched_queued_cpus, p->p_cpu);
+       TAILQ_INSERT_TAIL(&sched_qs[queue], p, p_runq);
+       sched_whichqs |= (1 << queue);
 
-       if (cpuset_isset(&sched_idle_cpus, p->p_cpu))
-               cpu_unidle(p->p_cpu);
+       if (p->p_flag & P_CPUPEG)
+               p->p_cpu->ci_schedstate.spc_npeg++;
 }
 
 void
 remrunqueue(struct proc *p)
 {
-       struct schedstate_percpu *spc;
        int queue = p->p_priority >> 2;
 
        SCHED_ASSERT_LOCKED();
-       spc = &p->p_cpu->ci_schedstate;
-       spc->spc_nrun--;
 
-       TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq);
-       if (TAILQ_EMPTY(&spc->spc_qs[queue])) {
-               spc->spc_whichqs &= ~(1 << queue);
-               if (spc->spc_whichqs == 0)
-                       cpuset_del(&sched_queued_cpus, p->p_cpu);
-       }
+       TAILQ_REMOVE(&sched_qs[queue], p, p_runq);
+       if (TAILQ_EMPTY(&sched_qs[queue]))
+               sched_whichqs &= ~(1 << queue);
+
+       if (p->p_flag & P_CPUPEG)
+               p->p_cpu->ci_schedstate.spc_npeg--;
 }
 
+/*
+ * Select the first thread that can run on cpu ``ci'' from the runqueue.
+ *
+ * This is O(1) when there's no pegged thread in the runqueue.
+ */
 struct proc *
-sched_chooseproc(void)
+sched_select(struct cpu_info *ci)
 {
-       struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
+#ifdef MULTIPROCESSOR
+       struct schedstate_percpu *spc = &ci->ci_schedstate;
+#endif /* MULTIPROCESSOR */
        struct proc *p;
        int queue;
 
-       SCHED_ASSERT_LOCKED();
+       if (sched_qs_empty(ci))
+               return (NULL);
 
+       for (queue = 0; queue < SCHED_NQS; queue++) {
+               TAILQ_FOREACH(p, &sched_qs[queue], p_runq) {
 #ifdef MULTIPROCESSOR
-       if (spc->spc_schedflags & SPCF_SHOULDHALT) {
-               if (spc->spc_whichqs) {
-                       for (queue = 0; queue < SCHED_NQS; queue++) {
-                               while ((p = TAILQ_FIRST(&spc->spc_qs[queue]))) {
-                                       remrunqueue(p);
-                                       p->p_cpu = sched_choosecpu(p);
-                                       setrunqueue(p);
-                                       if (p->p_cpu == curcpu()) {
-                                               KASSERT(p->p_flag & P_CPUPEG);
-                                               goto again;
-                                       }
-                               }
-                       }
+                       /* Never run a thread pegged to another CPU. */
+                       if ((p->p_flag & P_CPUPEG) && p->p_cpu != ci)
+                               continue;
+
+                       /* If it should halt, only run pegged threads. */
+                       if ((spc->spc_schedflags & SPCF_SHOULDHALT) &&
+                           (p->p_flag & P_CPUPEG) == 0)
+                               continue;
+#endif /* MULTIPROCESSOR */
+
+                       return (p);
                }
-               p = spc->spc_idleproc;
-               KASSERT(p);
-               KASSERT(p->p_wchan == NULL);
-               p->p_stat = SRUN;
-               return (p);
        }
-#endif
+
+       return (NULL);
+}
+
+struct proc *
+sched_chooseproc(void)
+{
+       struct cpu_info *ci = curcpu();
+       struct proc *p = NULL;
+
+       SCHED_ASSERT_LOCKED();
 
 again:
-       if (spc->spc_whichqs) {
-               queue = ffs(spc->spc_whichqs) - 1;
-               p = TAILQ_FIRST(&spc->spc_qs[queue]);
+       p = sched_select(ci);
+
+       if (p != NULL) {
                remrunqueue(p);
-               sched_noidle++;
                KASSERT(p->p_stat == SRUN);
-       } else if ((p = sched_steal_proc(curcpu())) == NULL) {
+       } else {
+               struct schedstate_percpu *spc = &ci->ci_schedstate;
+
                p = spc->spc_idleproc;
                if (p == NULL) {
                         int s;
@@ -328,263 +341,11 @@ again:
                 }
                KASSERT(p);
                p->p_stat = SRUN;
-       } 
-
-       KASSERT(p->p_wchan == NULL);
-       return (p);     
-}
-
-struct cpu_info *
-sched_choosecpu_fork(struct proc *parent, int flags)
-{
-#ifdef MULTIPROCESSOR
-       struct cpu_info *choice = NULL;
-       fixpt_t load, best_load = ~0;
-       int run, best_run = INT_MAX;
-       struct cpu_info *ci;
-       struct cpuset set;
-
-#if 0
-       /*
-        * XXX
-        * Don't do this until we have a painless way to move the cpu in exec.
-        * Preferably when nuking the old pmap and getting a new one on a
-        * new cpu.
-        */
-       /*
-        * PPWAIT forks are simple. We know that the parent will not
-        * run until we exec and choose another cpu, so we just steal its
-        * cpu.
-        */
-       if (flags & FORK_PPWAIT)
-               return (parent->p_cpu);
-#endif
-
-       /*
-        * Look at all cpus that are currently idle and have nothing queued.
-        * If there are none, pick the one with least queued procs first,
-        * then the one with lowest load average.
-        */
-       cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
-       cpuset_intersection(&set, &set, &sched_all_cpus);
-       if (cpuset_first(&set) == NULL)
-               cpuset_copy(&set, &sched_all_cpus);
-
-       while ((ci = cpuset_first(&set)) != NULL) {
-               cpuset_del(&set, ci);
-
-               load = ci->ci_schedstate.spc_ldavg;
-               run = ci->ci_schedstate.spc_nrun;
-
-               if (choice == NULL || run < best_run ||
-                   (run == best_run &&load < best_load)) {
-                       choice = ci;
-                       best_load = load;
-                       best_run = run;
-               }
-       }
-
-       return (choice);
-#else
-       return (curcpu());
-#endif
-}
-
-struct cpu_info *
-sched_choosecpu(struct proc *p)
-{
-#ifdef MULTIPROCESSOR
-       struct cpu_info *choice = NULL;
-       int last_cost = INT_MAX;
-       struct cpu_info *ci;
-       struct cpuset set;
-
-       /*
-        * If pegged to a cpu, don't allow it to move.
-        */
-       if (p->p_flag & P_CPUPEG)
-               return (p->p_cpu);
-
-       sched_choose++;
-
-       /*
-        * Look at all cpus that are currently idle and have nothing queued.
-        * If there are none, pick the cheapest of those.
-        * (idle + queued could mean that the cpu is handling an interrupt
-        * at this moment and haven't had time to leave idle yet).
-        */
-       cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
-       cpuset_intersection(&set, &set, &sched_all_cpus);
-
-       /*
-        * First, just check if our current cpu is in that set, if it is,
-        * this is simple.
-        * Also, our cpu might not be idle, but if it's the current cpu
-        * and it has nothing else queued and we're curproc, take it.
-        */
-       if (cpuset_isset(&set, p->p_cpu) ||
-           (p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 &&
-           (p->p_cpu->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0 &&
-           curproc == p)) {
-               sched_wasidle++;
-               return (p->p_cpu);
        }
 
-       if (cpuset_first(&set) == NULL)
-               cpuset_copy(&set, &sched_all_cpus);
-
-       while ((ci = cpuset_first(&set)) != NULL) {
-               int cost = sched_proc_to_cpu_cost(ci, p);
-
-               if (choice == NULL || cost < last_cost) {
-                       choice = ci;
-                       last_cost = cost;
-               }
-               cpuset_del(&set, ci);
-       }
-
-       if (p->p_cpu != choice)
-               sched_nmigrations++;
-       else
-               sched_nomigrations++;
-
-       return (choice);
-#else
-       return (curcpu());
-#endif
-}
-
-/*
- * Attempt to steal a proc from some cpu.
- */
-struct proc *
-sched_steal_proc(struct cpu_info *self)
-{
-       struct proc *best = NULL;
-#ifdef MULTIPROCESSOR
-       struct schedstate_percpu *spc;
-       int bestcost = INT_MAX;
-       struct cpu_info *ci;
-       struct cpuset set;
-
-       KASSERT((self->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0);
-
-       cpuset_copy(&set, &sched_queued_cpus);
-
-       while ((ci = cpuset_first(&set)) != NULL) {
-               struct proc *p;
-               int queue;
-               int cost;
-
-               cpuset_del(&set, ci);
-
-               spc = &ci->ci_schedstate;
-
-               queue = ffs(spc->spc_whichqs) - 1;
-               TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) {
-                       if (p->p_flag & P_CPUPEG)
-                               continue;
-
-                       cost = sched_proc_to_cpu_cost(self, p);
-
-                       if (best == NULL || cost < bestcost) {
-                               best = p;
-                               bestcost = cost;
-                       }
-               }
-       }
-       if (best == NULL)
-               return (NULL);
-
-       spc = &best->p_cpu->ci_schedstate;
-       remrunqueue(best);
-       best->p_cpu = self;
-
-       sched_stolen++;
-#endif
-       return (best);
-}
-
-#ifdef MULTIPROCESSOR
-/*
- * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
- */
-static int
-log2(unsigned int i)
-{
-       int ret = 0;
-
-       while (i >>= 1)
-               ret++;
-
-       return (ret);
-}
-
-/*
- * Calculate the cost of moving the proc to this cpu.
- * 
- * What we want is some guesstimate of how much "performance" it will
- * cost us to move the proc here. Not just for caches and TLBs and NUMA
- * memory, but also for the proc itself. A highly loaded cpu might not
- * be the best candidate for this proc since it won't get run.
- *
- * Just total guesstimates for now.
- */
-
-int sched_cost_load = 1;
-int sched_cost_priority = 1;
-int sched_cost_runnable = 3;
-int sched_cost_resident = 1;
-#endif
-
-int
-sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p)
-{
-       int cost = 0;
-#ifdef MULTIPROCESSOR
-       struct schedstate_percpu *spc;
-       int l2resident = 0;
-
-       spc = &ci->ci_schedstate;
-
-       /*
-        * First, account for the priority of the proc we want to move.
-        * More willing to move, the lower the priority of the destination
-        * and the higher the priority of the proc.
-        */
-       if (!cpuset_isset(&sched_idle_cpus, ci)) {
-               cost += (p->p_priority - spc->spc_curpriority) *
-                   sched_cost_priority;
-               cost += sched_cost_runnable;
-       }
-       if (cpuset_isset(&sched_queued_cpus, ci))
-               cost += spc->spc_nrun * sched_cost_runnable;
-
-       /*
-        * Try to avoid the primary cpu as it handles hardware interrupts.
-        *
-        * XXX Needs to be revisited when we distribute interrupts
-        * over cpus.
-        */
-       if (CPU_IS_PRIMARY(ci))
-               cost += sched_cost_runnable;
-
-       /*
-        * Higher load on the destination means we don't want to go there.
-        */
-       cost += ((sched_cost_load * spc->spc_ldavg) >> FSHIFT);
-
-       /*
-        * If the proc is on this cpu already, lower the cost by how much
-        * it has been running and an estimate of its footprint.
-        */
-       if (p->p_cpu == ci && p->p_slptime == 0) {
-               l2resident =
-                   log2(pmap_resident_count(p->p_vmspace->vm_map.pmap));
-               cost -= l2resident * sched_cost_resident;
-       }
-#endif
-       return (cost);
+       KASSERT(p->p_wchan == NULL);
+       p->p_cpu = ci;
+       return (p);
 }
 
 /*
@@ -620,7 +381,6 @@ sched_start_secondary_cpus(void)
 
                if (CPU_IS_PRIMARY(ci))
                        continue;
-               cpuset_add(&sched_all_cpus, ci);
                atomic_clearbits_int(&spc->spc_schedflags,
                    SPCF_SHOULDHALT | SPCF_HALTED);
        }
@@ -640,7 +400,6 @@ sched_stop_secondary_cpus(void)
 
                if (CPU_IS_PRIMARY(ci))
                        continue;
-               cpuset_del(&sched_all_cpus, ci);
                atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDHALT);
        }
        CPU_INFO_FOREACH(cii, ci) {
@@ -697,14 +456,14 @@ sched_barrier(struct cpu_info *ci)
        }
 }
 
-#else
+#else /* MULTIPROCESSOR */
 
 void
 sched_barrier(struct cpu_info *ci)
 {
 }
 
-#endif
+#endif /* MULTIPROCESSOR */
 
 /*
  * Functions to manipulate cpu sets.
Index: kern/kern_fork.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_fork.c,v
retrieving revision 1.187
diff -u -p -r1.187 kern_fork.c
--- kern/kern_fork.c    25 Apr 2016 20:18:31 -0000      1.187
+++ kern/kern_fork.c    6 Jul 2016 17:31:11 -0000
@@ -486,7 +486,7 @@ fork1(struct proc *curp, int flags, void
        if ((flags & FORK_IDLE) == 0) {
                SCHED_LOCK(s);
                p->p_stat = SRUN;
-               p->p_cpu = sched_choosecpu_fork(curp, flags);
+               p->p_cpu = curcpu();
                setrunqueue(p);
                SCHED_UNLOCK(s);
        } else
Index: kern/kern_clock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clock.c,v
retrieving revision 1.90
diff -u -p -r1.90 kern_clock.c
--- kern/kern_clock.c   24 Mar 2016 05:40:56 -0000      1.90
+++ kern/kern_clock.c   6 Jul 2016 17:31:11 -0000
@@ -400,7 +400,8 @@ statclock(struct clockframe *frame)
        spc->spc_pscnt = psdiv;
 
        if (p != NULL) {
-               p->p_cpticks++;
+               if (p != spc->spc_idleproc)
+                       p->p_cpticks++;
                /*
                 * If no schedclock is provided, call it here at ~~12-25 Hz;
                 * ~~16 Hz is best
Index: kern/init_main.c
===================================================================
RCS file: /cvs/src/sys/kern/init_main.c,v
retrieving revision 1.253
diff -u -p -r1.253 init_main.c
--- kern/init_main.c    17 May 2016 23:28:03 -0000      1.253
+++ kern/init_main.c    6 Jul 2016 17:31:11 -0000
@@ -328,17 +328,16 @@ main(void *framep)
         */
        (void)chgproccnt(0, 1);
 
-       /* Initialize run queues */
-       sched_init_runqueues();
        sleep_queue_init();
-       sched_init_cpu(curcpu());
-       p->p_cpu->ci_randseed = (arc4random() & 0x7fffffff) + 1;
 
        /* Initialize task queues */
        taskq_init();
 
        /* Initialize the interface/address trees */
        ifinit();
+
+       /* Initialize the scheduler */
+       sched_init();
 
        /* Lock the kernel on behalf of proc0. */
        KERNEL_LOCK();
Index: dev/acpi/acpicpu.c
===================================================================
RCS file: /cvs/src/sys/dev/acpi/acpicpu.c,v
retrieving revision 1.74
diff -u -p -r1.74 acpicpu.c
--- dev/acpi/acpicpu.c  17 Mar 2016 13:18:47 -0000      1.74
+++ dev/acpi/acpicpu.c  6 Jul 2016 17:31:11 -0000
@@ -1188,7 +1188,7 @@ acpicpu_idle(void)
 #endif
 
                /* something already queued? */
-               if (!cpu_is_idle(ci))
+               if (want_resched(ci))
                        return;
 
                /*
@@ -1204,7 +1204,7 @@ acpicpu_idle(void)
                hints = (unsigned)best->address;
                microuptime(&start);
                atomic_setbits_int(&ci->ci_mwait, MWAIT_IDLING);
-               if (cpu_is_idle(ci)) {
+               if (!want_resched(ci)) {
                        /* intel errata AAI65: cflush before monitor */
                        if (ci->ci_cflushsz != 0) {
                                membar_sync();
Index: arch/sparc64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/sparc64/include/cpu.h,v
retrieving revision 1.88
diff -u -p -r1.88 cpu.h
--- arch/sparc64/include/cpu.h  28 Aug 2015 23:28:39 -0000      1.88
+++ arch/sparc64/include/cpu.h  6 Jul 2016 17:31:11 -0000
@@ -240,8 +240,9 @@ extern void (*cpu_start_clock)(void);
  * Preempt the current process if in interrupt from user mode,
  * or after the current trap/syscall if in system mode.
  */
-extern void need_resched(struct cpu_info *);
-#define clear_resched(ci) (ci)->ci_want_resched = 0
+void need_resched(struct cpu_info *);
+#define        clear_resched(ci) (ci)->ci_want_resched = 0
+#define        want_resched(ci)  ((ci)->ci_want_resched)
 
 /*
  * This is used during profiling to integrate system time.
Index: arch/sparc/sparc/trap.c
===================================================================
RCS file: /cvs/src/sys/arch/sparc/sparc/trap.c,v
retrieving revision 1.73
diff -u -p -r1.73 trap.c
--- arch/sparc/sparc/trap.c     27 Feb 2016 13:08:07 -0000      1.73
+++ arch/sparc/sparc/trap.c     6 Jul 2016 17:31:11 -0000
@@ -199,7 +199,7 @@ void syscall(register_t, struct trapfram
 
 int ignore_bogus_traps = 0;
 
-int want_ast = 0;
+int cpu_want_ast = 0;
 
 /*
  * If someone stole the FPU while we were away, do not enable it
@@ -300,9 +300,9 @@ trap(type, psr, pc, tf)
                break;
 
        case T_AST:
-               want_ast = 0;
+               cpu_want_ast = 0;
                uvmexp.softs++;
-               mi_ast(p, want_resched);
+               mi_ast(p, want_resched(curcpu()));
                break;
 
        case T_ILLINST:
Index: arch/sparc/sparc/locore.s
===================================================================
RCS file: /cvs/src/sys/arch/sparc/sparc/locore.s,v
retrieving revision 1.101
diff -u -p -r1.101 locore.s
--- arch/sparc/sparc/locore.s   23 May 2016 20:11:49 -0000      1.101
+++ arch/sparc/sparc/locore.s   6 Jul 2016 17:31:11 -0000
@@ -3093,8 +3093,8 @@ rft_kernel:
  * If returning to a valid window, just set psr and return.
  */
 rft_user:
-!      sethi   %hi(_C_LABEL(want_ast)), %l7    ! (done below)
-       ld      [%l7 + %lo(_C_LABEL(want_ast))], %l7
+!      sethi   %hi(_C_LABEL(cpu_want_ast)), %l7        ! (done below)
+       ld      [%l7 + %lo(_C_LABEL(cpu_want_ast))], %l7
        tst     %l7                     ! want AST trap?
        bne,a   softtrap                ! yes, re-enter trap with type T_AST
         mov    T_AST, %o0
@@ -3221,7 +3221,7 @@ rft_user_or_recover_pcb_windows:
        ld      [%l6 + PCB_NSAVED], %l7
        tst     %l7
        bz,a    rft_user
-        sethi  %hi(_C_LABEL(want_ast)), %l7    ! first instr of rft_user
+       sethi   %hi(_C_LABEL(cpu_want_ast)), %l7 ! first instr of rft_user
 
        bg,a    softtrap                ! if (pcb_nsaved > 0)
         mov    T_WINOF, %o0            !       trap(T_WINOF);
@@ -4317,7 +4317,7 @@ ENTRY(write_user_windows)
         nop
 
 
-       .comm   _C_LABEL(want_resched),4
+       .comm   _C_LABEL(cpu_want_resched),4
 /*
  * Masterpaddr is the p->p_addr of the last process on the processor.
  * XXX masterpaddr is almost the same as cpcb
Index: arch/sparc/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/sparc/include/cpu.h,v
retrieving revision 1.35
diff -u -p -r1.35 cpu.h
--- arch/sparc/include/cpu.h    2 Dec 2012 07:03:31 -0000       1.35
+++ arch/sparc/include/cpu.h    6 Jul 2016 17:31:11 -0000
@@ -93,14 +93,16 @@ extern int eintstack[];
 #define        CLKF_PC(framep)         ((framep)->pc)
 #define        CLKF_INTR(framep)       ((framep)->fp < (u_int)eintstack)
 
+extern int     cpu_want_resched;               /* need_resched() was called */
+extern int     cpu_want_ast;
+
 /*
  * Preempt the current process if in interrupt from user mode,
  * or after the current trap/syscall if in system mode.
  */
-extern int     want_resched;           /* resched() was called */
-#define        need_resched(ci)                (want_resched = 1, want_ast = 1)
-#define clear_resched(ci)      want_resched = 0
-extern int     want_ast;
+#define        need_resched(ci)        (cpu_want_resched = 1, cpu_want_ast = 1)
+#define        clear_resched(ci)       cpu_want_resched = 0
+#define        want_resched(ci)        (cpu_want_resched)
 
 /*
  * This is used during profiling to integrate system time.
@@ -113,13 +115,13 @@ extern int        want_ast;
  * buffer pages are invalid.  On the sparc, request an ast to send us
  * through trap(), marking the proc as needing a profiling tick.
  */
-#define        need_proftick(p)        do { want_ast = 1; } while (0)
+#define        need_proftick(p)        do { cpu_want_ast = 1; } while (0)
 
 /*
  * Notify the current process (p) that it has a signal pending,
  * process as soon as possible.
  */
-#define        signotify(p)            (want_ast = 1)
+#define        signotify(p)            (cpu_want_ast = 1)
 
 extern int     foundfpu;               /* true => we have an FPU */
 
Index: arch/sh/sh/trap.c
===================================================================
RCS file: /cvs/src/sys/arch/sh/sh/trap.c,v
retrieving revision 1.35
diff -u -p -r1.35 trap.c
--- arch/sh/sh/trap.c   27 Feb 2016 13:08:07 -0000      1.35
+++ arch/sh/sh/trap.c   6 Jul 2016 17:31:11 -0000
@@ -483,7 +483,7 @@ ast(struct proc *p, struct trapframe *tf
                p->p_md.md_astpending = 0;
                refreshcreds(p);
                uvmexp.softs++;
-               mi_ast(p, want_resched);
+               mi_ast(p, want_resched(curcpu()));
                userret(p);
        }
 }
Index: arch/sh/sh/locore_c.c
===================================================================
RCS file: /cvs/src/sys/arch/sh/sh/locore_c.c,v
retrieving revision 1.12
diff -u -p -r1.12 locore_c.c
--- arch/sh/sh/locore_c.c       18 Nov 2014 20:51:01 -0000      1.12
+++ arch/sh/sh/locore_c.c       6 Jul 2016 17:31:11 -0000
@@ -121,7 +121,7 @@
 
 void (*__sh_switch_resume)(struct proc *);
 void cpu_switch_prepare(struct proc *, struct proc *);
-int want_resched;
+int cpu_want_resched;
 
 /*
  * Prepare context switch from oproc to nproc.
Index: arch/sh/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/sh/include/cpu.h,v
retrieving revision 1.27
diff -u -p -r1.27 cpu.h
--- arch/sh/include/cpu.h       11 Jul 2014 10:53:07 -0000      1.27
+++ arch/sh/include/cpu.h       6 Jul 2016 17:31:11 -0000
@@ -105,17 +105,19 @@ struct clockframe {
 #define        PROC_PC(p)      ((p)->p_md.md_regs->tf_spc)
 #define        PROC_STACK(p)   ((p)->p_md.md_regs->tf_r15)
 
+extern int cpu_want_resched;           /* need_resched() was called */
 /*
  * Preempt the current process if in interrupt from user mode,
  * or after the current trap/syscall if in system mode.
  */
 #define        need_resched(ci)                                                
\
 do {                                                                   \
-       want_resched = 1;                                               \
+       cpu_want_resched = 1;                                           \
        if (curproc != NULL)                                            \
-               aston(curproc);                                 \
+               aston(curproc);                                         \
 } while (/*CONSTCOND*/0)
-#define clear_resched(ci)      want_resched = 0
+#define        clear_resched(ci)       cpu_want_resched = 0
+#define        want_resched(ci)        (cpu_want_resched)
 
 /*
  * Give a profiling tick to the current process when the user profiling
@@ -131,8 +133,6 @@ do {                                                        
                \
 #define        signotify(p)    aston(p)
 
 #define        aston(p)        ((p)->p_md.md_astpending = 1)
-
-extern int want_resched;               /* need_resched() was called */
 
 /*
  * We need a machine-independent name for this.
Index: arch/powerpc/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/powerpc/include/cpu.h,v
retrieving revision 1.63
diff -u -p -r1.63 cpu.h
--- arch/powerpc/include/cpu.h  7 May 2016 22:46:54 -0000       1.63
+++ arch/powerpc/include/cpu.h  6 Jul 2016 17:31:11 -0000
@@ -181,7 +181,8 @@ do {                                                        
                \
        if (ci->ci_curproc != NULL)                                     \
                aston(ci->ci_curproc);                                  \
 } while (0)
-#define clear_resched(ci) (ci)->ci_want_resched = 0
+#define        clear_resched(ci) (ci)->ci_want_resched = 0
+#define        want_resched(ci)  ((ci)->ci_want_resched)
 
 #define        need_proftick(p)        aston(p)
 
Index: arch/mips64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/mips64/include/cpu.h,v
retrieving revision 1.110
diff -u -p -r1.110 cpu.h
--- arch/mips64/include/cpu.h   6 Mar 2016 19:42:27 -0000       1.110
+++ arch/mips64/include/cpu.h   6 Jul 2016 17:31:11 -0000
@@ -305,6 +305,7 @@ void        cp0_calibrate(struct cpu_info *);
                        aston((ci)->ci_curproc); \
        } while(0)
 #define        clear_resched(ci)       (ci)->ci_want_resched = 0
+#define        want_resched(ci)        ((ci)->ci_want_resched)
 
 /*
  * Give a profiling tick to the current process when the user profiling
Index: arch/m88k/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/m88k/include/cpu.h,v
retrieving revision 1.64
diff -u -p -r1.64 cpu.h
--- arch/m88k/include/cpu.h     2 Jul 2015 01:33:59 -0000       1.64
+++ arch/m88k/include/cpu.h     6 Jul 2016 17:31:11 -0000
@@ -274,7 +274,9 @@ struct clockframe {
 #define        PROC_PC(p)      PC_REGS((struct reg *)((p)->p_md.md_tf))
 #define        PROC_STACK(p)   ((p)->p_md.md_tf->tf_sp)
 
+void   need_resched(struct cpu_info *);
 #define clear_resched(ci)      (ci)->ci_want_resched = 0
+#define        want_resched(ci)        ((ci)->ci_want_resched)
 
 /*
  * Give a profiling tick to the current process when the user profiling
@@ -283,7 +285,6 @@ struct clockframe {
  */
 #define        need_proftick(p)        aston(p)
 
-void   need_resched(struct cpu_info *);
 void   signotify(struct proc *);
 void   softipi(void);
 
Index: arch/i386/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/i386/include/cpu.h,v
retrieving revision 1.147
diff -u -p -r1.147 cpu.h
--- arch/i386/include/cpu.h     15 Mar 2016 03:17:51 -0000      1.147
+++ arch/i386/include/cpu.h     6 Jul 2016 17:31:11 -0000
@@ -244,14 +244,14 @@ void cpu_unidle(struct cpu_info *);
 
 #define curpcb                 curcpu()->ci_curpcb
 
-#define want_resched (curcpu()->ci_want_resched)
-
 /*
  * Preempt the current process if in interrupt from user mode,
  * or after the current trap/syscall if in system mode.
  */
-extern void need_resched(struct cpu_info *);
-#define clear_resched(ci) (ci)->ci_want_resched = 0
+void need_resched(struct cpu_info *);
+#define        clear_resched(ci)       (ci)->ci_want_resched = 0
+#define        want_resched(ci)        ((ci)->ci_want_resched)
+
 
 #define        CLKF_USERMODE(frame)    USERMODE((frame)->if_cs, 
(frame)->if_eflags)
 #define        CLKF_PC(frame)          ((frame)->if_eip)
Index: arch/i386/i386/trap.c
===================================================================
RCS file: /cvs/src/sys/arch/i386/i386/trap.c,v
retrieving revision 1.125
diff -u -p -r1.125 trap.c
--- arch/i386/i386/trap.c       28 Feb 2016 15:46:18 -0000      1.125
+++ arch/i386/i386/trap.c       6 Jul 2016 17:31:11 -0000
@@ -528,7 +528,7 @@ ast(struct trapframe *frame)
        p->p_md.md_regs = frame;
        refreshcreds(p);
        uvmexp.softs++;
-       mi_ast(p, want_resched);
+       mi_ast(p, want_resched(curcpu()));
        userret(p);
 }
 
Index: arch/i386/i386/cpu.c
===================================================================
RCS file: /cvs/src/sys/arch/i386/i386/cpu.c,v
retrieving revision 1.78
diff -u -p -r1.78 cpu.c
--- arch/i386/i386/cpu.c        28 Jun 2016 05:37:50 -0000      1.78
+++ arch/i386/i386/cpu.c        6 Jul 2016 17:31:11 -0000
@@ -761,7 +761,7 @@ cpu_idle_mwait_cycle(void)
                panic("idle with interrupts blocked!");
 
        /* something already queued? */
-       if (!cpu_is_idle(ci))
+       if (want_resched(ci))
                return;
 
        /*
@@ -775,7 +775,7 @@ cpu_idle_mwait_cycle(void)
         * the check in sched_idle() and here.
         */
        atomic_setbits_int(&ci->ci_mwait, MWAIT_IDLING | MWAIT_ONLY);
-       if (cpu_is_idle(ci)) {
+       if (!want_resched(ci)) {
                monitor(&ci->ci_mwait, 0, 0);
                if ((ci->ci_mwait & MWAIT_IDLING) == MWAIT_IDLING)
                        mwait(0, 0);
Index: arch/hppa/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/hppa/include/cpu.h,v
retrieving revision 1.89
diff -u -p -r1.89 cpu.h
--- arch/hppa/include/cpu.h     10 May 2016 14:52:03 -0000      1.89
+++ arch/hppa/include/cpu.h     6 Jul 2016 17:31:11 -0000
@@ -244,8 +244,13 @@ void       cpu_unidle(struct cpu_info *);
 #define        cpu_unidle(ci)
 #endif
 
-extern void need_resched(struct cpu_info *);
-#define clear_resched(ci)      (ci)->ci_want_resched = 0
+/*
+ * Preempt the current process if in interrupt from user mode,
+ * or after the current trap/syscall if in system mode.
+ */
+void need_resched(struct cpu_info *);
+#define        clear_resched(ci)       (ci)->ci_want_resched = 0
+#define        want_resched(ci)        ((ci)->ci_want_resched)
 
 #endif
 
Index: arch/arm/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/arm/include/cpu.h,v
retrieving revision 1.41
diff -u -p -r1.41 cpu.h
--- arch/arm/include/cpu.h      4 Apr 2016 09:13:44 -0000       1.41
+++ arch/arm/include/cpu.h      6 Jul 2016 17:31:11 -0000
@@ -267,9 +267,10 @@ extern int astpending;
  * Preempt the current process if in interrupt from user mode,
  * or after the current trap/syscall if in system mode.
  */
-extern int want_resched;       /* resched() was called */
-#define        need_resched(ci)        (want_resched = 1, setsoftast())
-#define clear_resched(ci)      want_resched = 0
+extern int cpu_want_resched;   /* need_resched() was called */
+#define        need_resched(ci)        (cpu_want_resched = 1, setsoftast())
+#define        clear_resched(ci)       cpu_want_resched = 0
+#define        want_resched(ci)        (cpu_want_resched)
 
 /*
  * Give a profiling tick to the current process when the user profiling
Index: arch/arm/arm/ast.c
===================================================================
RCS file: /cvs/src/sys/arch/arm/arm/ast.c,v
retrieving revision 1.14
diff -u -p -r1.14 ast.c
--- arch/arm/arm/ast.c  18 Nov 2014 20:51:01 -0000      1.14
+++ arch/arm/arm/ast.c  6 Jul 2016 17:31:11 -0000
@@ -65,7 +65,7 @@
  */
 void ast(struct trapframe *);
  
-int want_resched;
+int cpu_want_resched;
 extern int astpending;
 
 /*
@@ -91,7 +91,7 @@ ast(struct trapframe *tf)
 #endif 
 
        uvmexp.softs++;
-       mi_ast(p, want_resched);
+       mi_ast(p, want_resched(curcpu()));
        userret(p);
 }
 
Index: arch/amd64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
retrieving revision 1.101
diff -u -p -r1.101 cpu.h
--- arch/amd64/include/cpu.h    9 May 2016 22:45:07 -0000       1.101
+++ arch/amd64/include/cpu.h    6 Jul 2016 17:31:11 -0000
@@ -216,12 +216,13 @@ extern struct cpu_info *cpu_info_list;
 
 #define CPU_INFO_UNIT(ci)      ((ci)->ci_dev ? (ci)->ci_dev->dv_unit : 0)
 
-/*      
+/*
  * Preempt the current process if in interrupt from user mode,
  * or after the current trap/syscall if in system mode.
  */
-extern void need_resched(struct cpu_info *);
-#define clear_resched(ci) (ci)->ci_want_resched = 0
+void need_resched(struct cpu_info *);
+#define        clear_resched(ci)       (ci)->ci_want_resched = 0
+#define        want_resched(ci)        ((ci)->ci_want_resched)
 
 #if defined(MULTIPROCESSOR)
 
Index: arch/amd64/amd64/cpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/cpu.c,v
retrieving revision 1.101
diff -u -p -r1.101 cpu.c
--- arch/amd64/amd64/cpu.c      28 Jun 2016 05:37:50 -0000      1.101
+++ arch/amd64/amd64/cpu.c      6 Jul 2016 17:31:11 -0000
@@ -253,7 +253,7 @@ cpu_idle_mwait_cycle(void)
                panic("idle with interrupts blocked!");
 
        /* something already queued? */
-       if (!cpu_is_idle(ci))
+       if (want_resched(ci))
                return;
 
        /*
@@ -267,7 +267,7 @@ cpu_idle_mwait_cycle(void)
         * the check in sched_idle() and here.
         */
        atomic_setbits_int(&ci->ci_mwait, MWAIT_IDLING | MWAIT_ONLY);
-       if (cpu_is_idle(ci)) {
+       if (!want_resched(ci)) {
                monitor(&ci->ci_mwait, 0, 0);
                if ((ci->ci_mwait & MWAIT_IDLING) == MWAIT_IDLING)
                        mwait(0, 0);
Index: arch/alpha/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/alpha/include/cpu.h,v
retrieving revision 1.57
diff -u -p -r1.57 cpu.h
--- arch/alpha/include/cpu.h    30 Mar 2016 15:39:46 -0000      1.57
+++ arch/alpha/include/cpu.h    6 Jul 2016 17:31:11 -0000
@@ -301,7 +301,8 @@ do {                                                        
                \
        if ((ci)->ci_curproc != NULL)                                   \
                aston((ci)->ci_curproc);                                \
 } while (/*CONSTCOND*/0)
-#define clear_resched(ci) (ci)->ci_want_resched = 0
+#define        clear_resched(ci)       (ci)->ci_want_resched = 0
+#define        want_resched(ci)        ((ci)->ci_want_resched)
 
 /*
  * Give a profiling tick to the current process when the user profiling

Reply via email to