Aaaaand it didn't take long for me to find a small bug... attached is a
fixed version of the patch. Such things happen if one decides to
regenerate a patch "just in case" and forgets to revert to a working
version before doing that :D
--
Gregor Best
Index: sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.149
diff -u -r1.149 proc.h
--- sys/proc.h 7 Jan 2012 05:38:12 -0000 1.149
+++ sys/proc.h 17 Jan 2012 16:10:09 -0000
@@ -43,6 +43,7 @@
#include <machine/proc.h> /* Machine-dependent proc substruct. */
#include <sys/selinfo.h> /* For struct selinfo */
#include <sys/queue.h>
+#include <sys/tree.h>
#include <sys/timeout.h> /* For struct timeout */
#include <sys/event.h> /* For struct klist */
#include <sys/mutex.h> /* For struct mutex */
@@ -210,7 +211,9 @@
#define PS_SINGLEUNWIND _P_SINGLEUNWIND
struct proc {
- TAILQ_ENTRY(proc) p_runq;
+ RB_ENTRY(proc) p_runq;
+ RB_ENTRY(proc) p_expq;
+ TAILQ_ENTRY(proc) p_slpq;
LIST_ENTRY(proc) p_list; /* List of all processes. */
struct process *p_p; /* The process of this thread. */
@@ -251,6 +254,9 @@
#endif
/* scheduling */
+ struct timeval p_deadline; /* virtual deadline used for scheduling */
+ struct timeval p_deadline_set; /* time at which the deadline was set */
+ int p_rrticks; /* number of ticks this process is allowed to stay on a
processor */
u_int p_estcpu; /* Time averaged value of p_cpticks. */
int p_cpticks; /* Ticks of cpu time. */
fixpt_t p_pctcpu; /* %cpu for this process during p_swtime */
Index: sys/sched.h
===================================================================
RCS file: /cvs/src/sys/sys/sched.h,v
retrieving revision 1.30
diff -u -r1.30 sched.h
--- sys/sched.h 16 Nov 2011 20:50:19 -0000 1.30
+++ sys/sched.h 17 Jan 2012 16:10:09 -0000
@@ -87,8 +87,6 @@
#define CP_IDLE 4
#define CPUSTATES 5
-#define SCHED_NQS 32 /* 32 run queues. */
-
/*
* Per-CPU scheduler state.
* XXX - expose to userland for now.
@@ -99,7 +97,6 @@
u_int spc_schedticks; /* ticks for schedclock() */
u_int64_t spc_cp_time[CPUSTATES]; /* CPU state statistics */
u_char spc_curpriority; /* usrpri of curproc */
- int spc_rrticks; /* ticks until roundrobin() */
int spc_pscnt; /* prof/stat counter */
int spc_psdiv; /* prof/stat divisor */
struct proc *spc_idleproc; /* idle proc for this cpu */
@@ -107,9 +104,6 @@
u_int spc_nrun; /* procs on the run queues */
fixpt_t spc_ldavg; /* shortest load avg. for this cpu */
- TAILQ_HEAD(prochead, proc) spc_qs[SCHED_NQS];
- volatile uint32_t spc_whichqs;
-
#ifdef notyet
struct proc *spc_reaper; /* dead proc reaper */
#endif
@@ -119,18 +113,16 @@
#ifdef _KERNEL
/* spc_flags */
-#define SPCF_SEENRR 0x0001 /* process has seen roundrobin() */
-#define SPCF_SHOULDYIELD 0x0002 /* process should yield the CPU */
-#define SPCF_SWITCHCLEAR (SPCF_SEENRR|SPCF_SHOULDYIELD)
-#define SPCF_SHOULDHALT 0x0004 /* CPU should be vacated */
-#define SPCF_HALTED 0x0008 /* CPU has been halted */
+#define SPCF_SHOULDYIELD 0x0001 /* process should yield the CPU */
+#define SPCF_SHOULDHALT 0x0002 /* CPU should be vacated */
+#define SPCF_HALTED 0x0004 /* CPU has been halted */
-#define SCHED_PPQ (128 / SCHED_NQS) /* priorities per queue
*/
#define NICE_WEIGHT 2 /* priorities per nice level */
-#define ESTCPULIM(e) min((e), NICE_WEIGHT * PRIO_MAX - SCHED_PPQ)
+#define ESTCPULIM(e) min((e), NICE_WEIGHT * PRIO_MAX)
extern int schedhz; /* ideally: 16 */
extern int rrticks_init; /* ticks per roundrobin() */
+extern struct cpuset sched_idle_cpus;
struct proc;
void schedclock(struct proc *);
@@ -147,18 +139,20 @@
void cpu_switchto(struct proc *, struct proc *);
struct proc *sched_chooseproc(void);
struct cpu_info *sched_choosecpu(struct proc *);
-struct cpu_info *sched_choosecpu_fork(struct proc *parent, int);
+struct cpu_info *sched_choosecpu_fork(struct proc *parent);
void cpu_idle_enter(void);
void cpu_idle_cycle(void);
void cpu_idle_leave(void);
void sched_peg_curproc(struct cpu_info *ci);
+void generate_deadline(struct proc *, char);
+
#ifdef MULTIPROCESSOR
void sched_start_secondary_cpus(void);
void sched_stop_secondary_cpus(void);
#endif
-#define curcpu_is_idle() (curcpu()->ci_schedstate.spc_whichqs == 0)
+#define curcpu_is_idle() (cpuset_isset(&sched_idle_cpus, curcpu()))
void sched_init_runqueues(void);
void setrunqueue(struct proc *);
Index: kern/kern_clock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clock.c,v
retrieving revision 1.72
diff -u -r1.72 kern_clock.c
--- kern/kern_clock.c 7 Mar 2011 07:07:13 -0000 1.72
+++ kern/kern_clock.c 17 Jan 2012 16:10:10 -0000
@@ -241,7 +241,11 @@
if (stathz == 0)
statclock(frame);
- if (--ci->ci_schedstate.spc_rrticks <= 0)
+ /*
+ * If the currently running process went over its round robin tick
quota,
+ * preempt it.
+ */
+ if (p && (--(p->p_rrticks) <= 0))
roundrobin(ci);
/*
Index: kern/kern_fork.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_fork.c,v
retrieving revision 1.133
diff -u -r1.133 kern_fork.c
--- kern/kern_fork.c 14 Dec 2011 07:32:16 -0000 1.133
+++ kern/kern_fork.c 17 Jan 2012 16:10:10 -0000
@@ -225,6 +225,9 @@
atomic_setbits_int(&pr->ps_flags, PS_CONTROLT);
p->p_p = pr;
+
+ /* Pretend we preempted this new process */
+ generate_deadline(p, 0);
}
/* print the 'table full' message once per 10 seconds */
@@ -485,7 +488,7 @@
getmicrotime(&p->p_stats->p_start);
p->p_acflag = AFORK;
p->p_stat = SRUN;
- p->p_cpu = sched_choosecpu_fork(curp, flags);
+ p->p_cpu = sched_choosecpu_fork(curp);
setrunqueue(p);
SCHED_UNLOCK(s);
Index: kern/kern_proc.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_proc.c,v
retrieving revision 1.47
diff -u -r1.47 kern_proc.c
--- kern/kern_proc.c 18 Sep 2011 23:20:54 -0000 1.47
+++ kern/kern_proc.c 17 Jan 2012 16:10:10 -0000
@@ -398,8 +398,6 @@
p->p_comm, p->p_pid, pst, p->p_flag, P_BITS);
(*pr)(" pri=%u, usrpri=%u, nice=%d\n",
p->p_priority, p->p_usrpri, p->p_p->ps_nice);
- (*pr)(" forw=%p, list=%p,%p\n",
- TAILQ_NEXT(p, p_runq), p->p_list.le_next, p->p_list.le_prev);
(*pr)(" process=%p user=%p, vmspace=%p\n",
p->p_p, p->p_addr, p->p_vmspace);
(*pr)(" estcpu=%u, cpticks=%d, pctcpu=%u.%u, swtime=%u\n",
Index: kern/kern_sched.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sched.c,v
retrieving revision 1.24
diff -u -r1.24 kern_sched.c
--- kern/kern_sched.c 12 Oct 2011 18:30:09 -0000 1.24
+++ kern/kern_sched.c 17 Jan 2012 16:10:10 -0000
@@ -19,6 +19,7 @@
#include <sys/sched.h>
#include <sys/proc.h>
+#include <sys/tree.h>
#include <sys/kthread.h>
#include <sys/systm.h>
#include <sys/resourcevar.h>
@@ -32,9 +33,11 @@
void sched_kthreads_create(void *);
-int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
struct proc *sched_steal_proc(struct cpu_info *);
+RB_HEAD(runqueue, proc) sched_runqueue;
+RB_HEAD(expqueue, proc) sched_expqueue;
+
/*
* To help choosing which cpu should run which process we keep track
* of cpus which are currently idle and which cpus have processes
@@ -61,6 +64,27 @@
* interrupts during the context switch.
*/
+static int
+sched_cmp_proc(struct proc *a, struct proc *b) {
+ if (a == b)
+ return 0;
+ if (timercmp(&(a->p_deadline), &(b->p_deadline), <))
+ return -1;
+ return 1;
+}
+
+static int
+sched_cmp_proc_exp(struct proc *a, struct proc *b) {
+ if (a == b)
+ return 0;
+ if (timercmp(&(a->p_deadline_set), &(b->p_deadline_set), <))
+ return -1;
+ return 1;
+}
+
+RB_GENERATE_STATIC(runqueue, proc, p_runq, sched_cmp_proc);
+RB_GENERATE_STATIC(expqueue, proc, p_expq, sched_cmp_proc_exp);
+
/*
* sched_init_cpu is called from main() for the boot cpu, then it's the
* responsibility of the MD code to call it for all other cpus.
@@ -69,10 +93,6 @@
sched_init_cpu(struct cpu_info *ci)
{
struct schedstate_percpu *spc = &ci->ci_schedstate;
- int i;
-
- for (i = 0; i < SCHED_NQS; i++)
- TAILQ_INIT(&spc->spc_qs[i]);
spc->spc_idleproc = NULL;
@@ -106,6 +126,7 @@
{
struct schedstate_percpu *spc;
struct proc *p = curproc;
+ struct proc *dead;
struct cpu_info *ci = v;
int s;
@@ -120,7 +141,6 @@
SCHED_LOCK(s);
cpuset_add(&sched_idle_cpus, ci);
p->p_stat = SSLEEP;
- p->p_cpu = ci;
atomic_setbits_int(&p->p_flag, P_CPUPEG);
mi_switch();
cpuset_del(&sched_idle_cpus, ci);
@@ -130,38 +150,27 @@
KASSERT(curproc == spc->spc_idleproc);
while (1) {
- while (!curcpu_is_idle()) {
- struct proc *dead;
-
- SCHED_LOCK(s);
- p->p_stat = SSLEEP;
- mi_switch();
- SCHED_UNLOCK(s);
-
- while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
+ while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
LIST_REMOVE(dead, p_hash);
exit2(dead);
- }
}
+ if ((spc->spc_schedflags & SPCF_SHOULDHALT) &&
!(spc->spc_schedflags & SPCF_HALTED))
+ atomic_setbits_int(&spc->spc_schedflags, SPCF_HALTED);
splassert(IPL_NONE);
cpuset_add(&sched_idle_cpus, ci);
+
cpu_idle_enter();
- while (spc->spc_whichqs == 0) {
- if (spc->spc_schedflags & SPCF_SHOULDHALT &&
- (spc->spc_schedflags & SPCF_HALTED) == 0) {
- cpuset_del(&sched_idle_cpus, ci);
- SCHED_LOCK(s);
- atomic_setbits_int(&spc->spc_schedflags,
- spc->spc_whichqs ? 0 : SPCF_HALTED);
- SCHED_UNLOCK(s);
- wakeup(spc);
- }
- cpu_idle_cycle();
- }
+ cpu_idle_cycle();
cpu_idle_leave();
+
cpuset_del(&sched_idle_cpus, ci);
+
+ SCHED_LOCK(s);
+ p->p_stat = SSLEEP;
+ mi_switch();
+ SCHED_UNLOCK(s);
}
}
@@ -206,63 +215,35 @@
void
sched_init_runqueues(void)
{
+ RB_INIT(&sched_runqueue);
+ RB_INIT(&sched_expqueue);
}
void
setrunqueue(struct proc *p)
{
- struct schedstate_percpu *spc;
- int queue = p->p_priority >> 2;
-
SCHED_ASSERT_LOCKED();
- spc = &p->p_cpu->ci_schedstate;
- spc->spc_nrun++;
-
- TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq);
- spc->spc_whichqs |= (1 << queue);
- cpuset_add(&sched_queued_cpus, p->p_cpu);
-
- if (cpuset_isset(&sched_idle_cpus, p->p_cpu))
- cpu_unidle(p->p_cpu);
+ RB_INSERT(runqueue, &sched_runqueue, p);
}
void
remrunqueue(struct proc *p)
{
- struct schedstate_percpu *spc;
- int queue = p->p_priority >> 2;
-
SCHED_ASSERT_LOCKED();
- spc = &p->p_cpu->ci_schedstate;
- spc->spc_nrun--;
-
- TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq);
- if (TAILQ_EMPTY(&spc->spc_qs[queue])) {
- spc->spc_whichqs &= ~(1 << queue);
- if (spc->spc_whichqs == 0)
- cpuset_del(&sched_queued_cpus, p->p_cpu);
- }
+ if (!(RB_REMOVE(runqueue, &sched_runqueue, p)))
+ RB_REMOVE(expqueue, &sched_expqueue, p);
}
struct proc *
sched_chooseproc(void)
{
struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
- struct proc *p;
- int queue;
+ struct proc *p = NULL;
+ struct timeval now;
SCHED_ASSERT_LOCKED();
if (spc->spc_schedflags & SPCF_SHOULDHALT) {
- if (spc->spc_whichqs) {
- for (queue = 0; queue < SCHED_NQS; queue++) {
- TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) {
- remrunqueue(p);
- p->p_cpu = sched_choosecpu(p);
- setrunqueue(p);
- }
- }
- }
p = spc->spc_idleproc;
KASSERT(p);
KASSERT(p->p_wchan == NULL);
@@ -270,16 +251,30 @@
return (p);
}
-again:
- if (spc->spc_whichqs) {
- queue = ffs(spc->spc_whichqs) - 1;
- p = TAILQ_FIRST(&spc->spc_qs[queue]);
- remrunqueue(p);
- KASSERT(p->p_stat == SRUN);
- } else if ((p = sched_steal_proc(curcpu())) == NULL) {
- p = spc->spc_idleproc;
- if (p == NULL) {
- int s;
+ if ((!RB_EMPTY(&sched_runqueue)) || (!RB_EMPTY(&sched_expqueue))) {
+ /*
+ * Move expired processes to the expired runqueue where they
are sorted by the time they got their
+ * deadline set instead of the deadline itself.
+ * */
+ microuptime(&now);
+ while(!RB_EMPTY(&sched_runqueue)) {
+ p = RB_MIN(runqueue, &sched_runqueue);
+ if (!p) break;
+ if (!timercmp(&(p->p_deadline), &now, <)) break;
+ RB_INSERT(expqueue, &sched_expqueue, p);
+ RB_REMOVE(runqueue, &sched_runqueue, p);
+ }
+
+ if (!RB_EMPTY(&sched_expqueue)) {
+ p = RB_MIN(expqueue, &sched_expqueue);
+ RB_REMOVE(expqueue, &sched_expqueue, p);
+ } else {
+ p = RB_MIN(runqueue, &sched_runqueue);
+ remrunqueue(p);
+ }
+ } else {
+ while ((p = spc->spc_idleproc) == NULL) {
+ int s;
/*
* We get here if someone decides to switch during
* boot before forking kthreads, bleh.
@@ -291,13 +286,15 @@
spl0();
delay(10);
SCHED_LOCK(s);
- goto again;
- }
+ }
KASSERT(p);
+ }
+
+ if (p) {
p->p_stat = SRUN;
+ p->p_cpu = curcpu();
}
- KASSERT(p->p_wchan == NULL);
return (p);
}
@@ -310,30 +307,13 @@
uint64_t sched_nomigrations;
struct cpu_info *
-sched_choosecpu_fork(struct proc *parent, int flags)
+sched_choosecpu_fork(struct proc *parent)
{
struct cpu_info *choice = NULL;
fixpt_t load, best_load = ~0;
- int run, best_run = INT_MAX;
struct cpu_info *ci;
struct cpuset set;
-#if 0
- /*
- * XXX
- * Don't do this until we have a painless way to move the cpu in exec.
- * Preferably when nuking the old pmap and getting a new one on a
- * new cpu.
- */
- /*
- * PPWAIT forks are simple. We know that the parent will not
- * run until we exec and choose another cpu, so we just steal its
- * cpu.
- */
- if (flags & FORK_PPWAIT)
- return (parent->p_cpu);
-#endif
-
/*
* Look at all cpus that are currently idle and have nothing queued.
* If there are none, pick the one with least queued procs first,
@@ -347,13 +327,10 @@
cpuset_del(&set, ci);
load = ci->ci_schedstate.spc_ldavg;
- run = ci->ci_schedstate.spc_nrun;
- if (choice == NULL || run < best_run ||
- (run == best_run &&load < best_load)) {
+ if (choice == NULL || load < best_load) {
choice = ci;
best_load = load;
- best_run = run;
}
}
@@ -364,9 +341,6 @@
sched_choosecpu(struct proc *p)
{
struct cpu_info *choice = NULL;
- int last_cost = INT_MAX;
- struct cpu_info *ci;
- struct cpuset set;
/*
* If pegged to a cpu, don't allow it to move.
@@ -374,169 +348,19 @@
if (p->p_flag & P_CPUPEG)
return (p->p_cpu);
- sched_choose++;
-
- /*
- * Look at all cpus that are currently idle and have nothing queued.
- * If there are none, pick the cheapest of those.
- * (idle + queued could mean that the cpu is handling an interrupt
- * at this moment and haven't had time to leave idle yet).
- */
- cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
-
/*
- * First, just check if our current cpu is in that set, if it is,
- * this is simple.
- * Also, our cpu might not be idle, but if it's the current cpu
- * and it has nothing else queued and we're curproc, take it.
+ * Else, just pretend we forked a new process.
*/
- if (cpuset_isset(&set, p->p_cpu) ||
- (p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 &&
- curproc == p)) {
- sched_wasidle++;
- return (p->p_cpu);
- }
-
- if (cpuset_first(&set) == NULL)
- cpuset_copy(&set, &sched_all_cpus);
-
- while ((ci = cpuset_first(&set)) != NULL) {
- int cost = sched_proc_to_cpu_cost(ci, p);
+ sched_choose++;
- if (choice == NULL || cost < last_cost) {
- choice = ci;
- last_cost = cost;
- }
- cpuset_del(&set, ci);
- }
+ choice = sched_choosecpu_fork(p);
if (p->p_cpu != choice)
sched_nmigrations++;
else
sched_nomigrations++;
- return (choice);
-}
-
-/*
- * Attempt to steal a proc from some cpu.
- */
-struct proc *
-sched_steal_proc(struct cpu_info *self)
-{
- struct schedstate_percpu *spc;
- struct proc *best = NULL;
- int bestcost = INT_MAX;
- struct cpu_info *ci;
- struct cpuset set;
-
- cpuset_copy(&set, &sched_queued_cpus);
-
- while ((ci = cpuset_first(&set)) != NULL) {
- struct proc *p;
- int queue;
- int cost;
-
- cpuset_del(&set, ci);
-
- spc = &ci->ci_schedstate;
-
- queue = ffs(spc->spc_whichqs) - 1;
- TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) {
- if (p->p_flag & P_CPUPEG)
- continue;
-
- cost = sched_proc_to_cpu_cost(self, p);
-
- if (best == NULL || cost < bestcost) {
- best = p;
- bestcost = cost;
- }
- }
- }
- if (best == NULL)
- return (NULL);
-
- spc = &best->p_cpu->ci_schedstate;
- remrunqueue(best);
- best->p_cpu = self;
-
- sched_stolen++;
-
- return (best);
-}
-
-/*
- * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
- */
-static int
-log2(unsigned int i)
-{
- int ret = 0;
-
- while (i >>= 1)
- ret++;
-
- return (ret);
-}
-
-/*
- * Calculate the cost of moving the proc to this cpu.
- *
- * What we want is some guesstimate of how much "performance" it will
- * cost us to move the proc here. Not just for caches and TLBs and NUMA
- * memory, but also for the proc itself. A highly loaded cpu might not
- * be the best candidate for this proc since it won't get run.
- *
- * Just total guesstimates for now.
- */
-
-int sched_cost_load = 1;
-int sched_cost_priority = 1;
-int sched_cost_runnable = 3;
-int sched_cost_resident = 1;
-
-int
-sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p)
-{
- struct schedstate_percpu *spc;
- int l2resident = 0;
- int cost;
-
- spc = &ci->ci_schedstate;
-
- cost = 0;
-
- /*
- * First, account for the priority of the proc we want to move.
- * More willing to move, the lower the priority of the destination
- * and the higher the priority of the proc.
- */
- if (!cpuset_isset(&sched_idle_cpus, ci)) {
- cost += (p->p_priority - spc->spc_curpriority) *
- sched_cost_priority;
- cost += sched_cost_runnable;
- }
- if (cpuset_isset(&sched_queued_cpus, ci)) {
- cost += spc->spc_nrun * sched_cost_runnable;
- }
-
- /*
- * Higher load on the destination means we don't want to go there.
- */
- cost += ((sched_cost_load * spc->spc_ldavg) >> FSHIFT);
-
- /*
- * If the proc is on this cpu already, lower the cost by how much
- * it has been running and an estimate of its footprint.
- */
- if (p->p_cpu == ci && p->p_slptime == 0) {
- l2resident =
- log2(pmap_resident_count(p->p_vmspace->vm_map.pmap));
- cost -= l2resident * sched_cost_resident;
- }
-
- return (cost);
+ return choice;
}
/*
@@ -560,7 +384,6 @@
}
#ifdef MULTIPROCESSOR
-
void
sched_start_secondary_cpus(void)
{
@@ -583,6 +406,9 @@
{
CPU_INFO_ITERATOR cii;
struct cpu_info *ci;
+ int s;
+
+ SCHED_LOCK(s);
/*
* Make sure we stop the secondary CPUs.
@@ -601,14 +427,17 @@
if (CPU_IS_PRIMARY(ci))
continue;
- while ((spc->spc_schedflags & SPCF_HALTED) == 0) {
+
+ SCHED_UNLOCK(s);
+ while (!(spc->spc_schedflags & SPCF_HALTED)) {
sleep_setup(&sls, spc, PZERO, "schedstate");
- sleep_finish(&sls,
- (spc->spc_schedflags & SPCF_HALTED) == 0);
+ sleep_setup_timeout(&sls, 10);
+ sleep_finish(&sls, 1);
}
+ SCHED_LOCK(s);
}
+ SCHED_UNLOCK(s);
}
-
#endif
/*
Index: kern/kern_synch.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_synch.c,v
retrieving revision 1.99
diff -u -r1.99 kern_synch.c
--- kern/kern_synch.c 17 Jan 2012 02:34:18 -0000 1.99
+++ kern/kern_synch.c 17 Jan 2012 16:10:10 -0000
@@ -205,7 +205,7 @@
p->p_wmesg = wmesg;
p->p_slptime = 0;
p->p_priority = prio & PRIMASK;
- TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq);
+ TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_slpq);
}
void
@@ -342,7 +342,7 @@
unsleep(struct proc *p)
{
if (p->p_wchan) {
- TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_runq);
+ TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_slpq);
p->p_wchan = NULL;
}
}
@@ -361,7 +361,7 @@
SCHED_LOCK(s);
qp = &slpque[LOOKUP(ident)];
for (p = TAILQ_FIRST(qp); p != NULL && n != 0; p = pnext) {
- pnext = TAILQ_NEXT(p, p_runq);
+ pnext = TAILQ_NEXT(p, p_slpq);
#ifdef DIAGNOSTIC
if (p->p_stat != SSLEEP && p->p_stat != SSTOP)
panic("wakeup: p_stat is %d", (int)p->p_stat);
@@ -369,7 +369,7 @@
if (p->p_wchan == ident) {
--n;
p->p_wchan = 0;
- TAILQ_REMOVE(qp, p, p_runq);
+ TAILQ_REMOVE(qp, p, p_slpq);
if (p->p_stat == SSLEEP) {
/* OPTIMIZED EXPANSION OF setrunnable(p); */
if (p->p_slptime > 1)
Index: kern/sched_bsd.c
===================================================================
RCS file: /cvs/src/sys/kern/sched_bsd.c,v
retrieving revision 1.27
diff -u -r1.27 sched_bsd.c
--- kern/sched_bsd.c 7 Jul 2011 18:00:33 -0000 1.27
+++ kern/sched_bsd.c 17 Jan 2012 16:10:10 -0000
@@ -77,37 +77,23 @@
timeout_set(&schedcpu_to, schedcpu, &schedcpu_to);
- rrticks_init = hz / 10;
+ rrticks_init = hz / 50;
schedcpu(&schedcpu_to);
}
/*
- * Force switch among equal priority processes every 100ms.
+ * Force switch among equal priority processes every 20ms.
*/
void
roundrobin(struct cpu_info *ci)
{
struct schedstate_percpu *spc = &ci->ci_schedstate;
- spc->spc_rrticks = rrticks_init;
-
if (ci->ci_curproc != NULL) {
- if (spc->spc_schedflags & SPCF_SEENRR) {
- /*
- * The process has already been through a roundrobin
- * without switching and may be hogging the CPU.
- * Indicate that the process should yield.
- */
- atomic_setbits_int(&spc->spc_schedflags,
- SPCF_SHOULDYIELD);
- } else {
- atomic_setbits_int(&spc->spc_schedflags,
- SPCF_SEENRR);
- }
+ atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDYIELD);
}
- if (spc->spc_nrun)
- need_resched(ci);
+ need_resched(ci);
}
/*
@@ -204,7 +190,6 @@
struct timeout *to = (struct timeout *)arg;
fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
struct proc *p;
- int s;
unsigned int newcpu;
int phz;
@@ -233,7 +218,6 @@
*/
if (p->p_slptime > 1)
continue;
- SCHED_LOCK(s);
/*
* p_pctcpu is only for ps.
*/
@@ -250,17 +234,6 @@
newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu);
p->p_estcpu = newcpu;
resetpriority(p);
- if (p->p_priority >= PUSER) {
- if (p->p_stat == SRUN &&
- (p->p_priority / SCHED_PPQ) !=
- (p->p_usrpri / SCHED_PPQ)) {
- remrunqueue(p);
- p->p_priority = p->p_usrpri;
- setrunqueue(p);
- } else
- p->p_priority = p->p_usrpri;
- }
- SCHED_UNLOCK(s);
}
uvm_meter();
wakeup(&lbolt);
@@ -278,8 +251,6 @@
unsigned int newcpu = p->p_estcpu;
fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
- SCHED_ASSERT_LOCKED();
-
if (p->p_slptime > 5 * loadfac)
p->p_estcpu = 0;
else {
@@ -304,6 +275,7 @@
SCHED_LOCK(s);
p->p_priority = p->p_usrpri;
p->p_stat = SRUN;
+ generate_deadline(p, 1);
setrunqueue(p);
p->p_stats->p_ru.ru_nvcsw++;
mi_switch();
@@ -332,6 +304,7 @@
p->p_priority = p->p_usrpri;
p->p_stat = SRUN;
p->p_cpu = sched_choosecpu(p);
+ generate_deadline(p, 0);
setrunqueue(p);
p->p_stats->p_ru.ru_nivcsw++;
mi_switch();
@@ -372,14 +345,7 @@
* process was running, and add that to its total so far.
*/
microuptime(&tv);
- if (timercmp(&tv, &spc->spc_runtime, <)) {
-#if 0
- printf("uptime is not monotonic! "
- "tv=%lu.%06lu, runtime=%lu.%06lu\n",
- tv.tv_sec, tv.tv_usec, spc->spc_runtime.tv_sec,
- spc->spc_runtime.tv_usec);
-#endif
- } else {
+ if (timercmp(&tv, &spc->spc_runtime, >=)) {
timersub(&tv, &spc->spc_runtime, &tv);
timeradd(&p->p_rtime, &tv, &p->p_rtime);
}
@@ -403,7 +369,7 @@
* Process is about to yield the CPU; clear the appropriate
* scheduling flags.
*/
- atomic_clearbits_int(&spc->spc_schedflags, SPCF_SWITCHCLEAR);
+ atomic_clearbits_int(&spc->spc_schedflags, SPCF_SHOULDYIELD);
nextproc = sched_chooseproc();
@@ -435,9 +401,8 @@
* be running on a new CPU now, so don't use the cache'd
* schedstate_percpu pointer.
*/
- KASSERT(p->p_cpu == curcpu());
-
- microuptime(&p->p_cpu->ci_schedstate.spc_runtime);
+ if (p->p_cpu == curcpu())
+ microuptime(&p->p_cpu->ci_schedstate.spc_runtime);
#ifdef MULTIPROCESSOR
/*
@@ -515,30 +480,56 @@
}
p->p_stat = SRUN;
p->p_cpu = sched_choosecpu(p);
- setrunqueue(p);
if (p->p_slptime > 1)
updatepri(p);
+ setrunqueue(p);
p->p_slptime = 0;
resched_proc(p, p->p_priority);
}
/*
* Compute the priority of a process when running in user mode.
- * Arrange to reschedule if the resulting priority is better
- * than that of the current process.
*/
void
resetpriority(struct proc *p)
{
- unsigned int newpriority;
-
- SCHED_ASSERT_LOCKED();
+ p->p_usrpri = min(((NZERO + (p->p_p->ps_nice))) << 1, MAXPRI);
+}
- newpriority = PUSER + p->p_estcpu +
- NICE_WEIGHT * (p->p_p->ps_nice - NZERO);
- newpriority = min(newpriority, MAXPRI);
- p->p_usrpri = newpriority;
- resched_proc(p, p->p_usrpri);
+/*
+ * Generate a new virtual deadline for a process. The deadline is a soft
+ * one and has no purpose besides being used for choosing the next process
+ * to run. Also resets the number of round robin ticks available to the
+ * process if the previous timeslice expired and the process had to be
preempted.
+ */
+void
+generate_deadline(struct proc *p, char voluntary) {
+ /*
+ * For nice values between 0 and 39 inclusively, the offset lies between
+ * 32 and 1280 milliseconds for a machine with hz=100. That means that
+ * processes with nice value=0 (i.e. -20 in userland) will be executed
+ * 32 milliseconds in the future at the latest. Processes with very
+ * little priority will be executed 1.28 seconds in the future at the
very
+ * latest. The shift is done to ensure that the lowest possible offset
is
+ * larger than the timeslice, in order to make sure that the scheduler
does
+ * not degenerate to round robin behaviour when more than just a few
processes
+ * with high priority are started.
+ *
+ * If the process voluntarily yielded its CPU, we reward it by halving
its
+ * deadline offset.
+ */
+ unsigned int offset_msec = ((p->p_p->ps_nice + 1) * rrticks_init) <<
(voluntary ? 2 : 3);
+ struct timeval offset = {
+ .tv_sec = offset_msec / 1000,
+ .tv_usec = offset_msec % 1000
+ };
+ struct timeval now;
+ microuptime(&now);
+ bcopy(&now, &(p->p_deadline_set), sizeof(struct timeval));
+
+ timeradd(&now, &offset, &(p->p_deadline));
+ if (!voluntary)
+ p->p_rrticks = rrticks_init;
}
/*
@@ -559,12 +550,8 @@
void
schedclock(struct proc *p)
{
- int s;
-
- SCHED_LOCK(s);
p->p_estcpu = ESTCPULIM(p->p_estcpu + 1);
resetpriority(p);
if (p->p_priority >= PUSER)
p->p_priority = p->p_usrpri;
- SCHED_UNLOCK(s);
}