On Sun, 4 Nov 2007, Josh Carroll wrote:

Josh, I included one too many changes in the diff and it made the results
ambiguous.  I've scaled it back slightly by removing the changes to
sched_pickcpu() and included the patch in this email again.  Can you run
through your tests once more?  I'd like to commit this part soon as it
helps in other cases.  I'm most interested in the buildworld numbers for
now.

Sure, I can run through them. Should I change the slice_min value at
all during testing to compare different values? Or is that part of the
other patch that is not included in this (haven't fetched and grep'd
it yet). :)

I'll get the benchmark numbers as soon as I can.

Turns out the last patch I posted had a small compile error because I edited it by hand to remove one section. Here's an updated patch that fixes that and changes the min/max slice values to something more reasonable. Slice min should be around 4 with a max of 12.

Also looks like 4BSD's slice handling got more precise recently as well. jhb changed it from using a callout to an actual counter based on runtime.

Thanks,
Jeff


Thanks,
Josh
_______________________________________________
freebsd-performance@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-performance
To unsubscribe, send any mail to "[EMAIL PROTECTED]"
Index: sched_ule.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/sched_ule.c,v
retrieving revision 1.216
diff -u -r1.216 sched_ule.c
--- sched_ule.c	23 Oct 2007 00:52:24 -0000	1.216
+++ sched_ule.c	4 Nov 2007 23:16:30 -0000
@@ -88,7 +88,8 @@
 	short		ts_flags;	/* TSF_* flags. */
 	u_char		ts_rqindex;	/* Run queue index. */
 	u_char		ts_cpu;		/* CPU that we have affinity for. */
-	int		ts_slice;	/* Ticks of slice remaining. */
+	int		ts_slice;	/* Ticks of slice used. */
+	int		ts_score;	/* Interactivity score. */
 	u_int		ts_slptime;	/* Number of ticks we vol. slept */
 	u_int		ts_runtime;	/* Number of ticks we were running */
 	/* The following variables are only used for pctcpu calculation */
@@ -102,6 +103,7 @@
 /* flags kept in ts_flags */
 #define	TSF_BOUND	0x0001		/* Thread can not migrate. */
 #define	TSF_XFERABLE	0x0002		/* Thread was added as transferable. */
+#define	TSF_INTERLOAD	0x0004		/* Interactive load on runq. */
 
 static struct td_sched td_sched0;
 
@@ -167,13 +169,15 @@
  *			the shift factor.  Without the shift the error rate
  *			due to rounding would be unacceptably high.
  * realstathz:		stathz is sometimes 0 and run off of hz.
- * sched_slice:		Runtime of each thread before rescheduling.
+ * sched_slice_max:	Maximum runtime of each thread before rescheduling.
+ * sched_slice_min:	Minimum runtime of each thread before rescheduling.
  * preempt_thresh:	Priority threshold for preemption and remote IPIs.
  */
 static int sched_interact = SCHED_INTERACT_THRESH;
 static int realstathz;
 static int tickincr;
-static int sched_slice;
+static int sched_slice_max = 1;
+static int sched_slice_min = 1;
 #ifdef PREEMPTION
 #ifdef FULL_PREEMPTION
 static int preempt_thresh = PRI_MAX_IDLE;
@@ -194,6 +198,7 @@
 	struct runq	tdq_realtime;		/* real-time run queue. */
 	struct runq	tdq_timeshare;		/* timeshare run queue. */
 	struct runq	tdq_idle;		/* Queue of IDLE threads. */
+	unsigned int	tdq_interload;		/* Interactive load. */
 	int		tdq_load;		/* Aggregate load. */
 	u_char		tdq_idx;		/* Current insert index. */
 	u_char		tdq_ridx;		/* Current removal index. */
@@ -239,7 +244,6 @@
 static int balance_interval = 128;	/* Default set in sched_initticks(). */
 static int pick_pri = 1;
 static int affinity;
-static int tryself = 1;
 static int steal_htt = 1;
 static int steal_idle = 1;
 static int steal_thresh = 2;
@@ -288,10 +292,12 @@
 static void tdq_setup(struct tdq *);
 static void tdq_load_add(struct tdq *, struct td_sched *);
 static void tdq_load_rem(struct tdq *, struct td_sched *);
+static int tdq_slice(struct tdq *);
 static __inline void tdq_runq_add(struct tdq *, struct td_sched *, int);
 static __inline void tdq_runq_rem(struct tdq *, struct td_sched *);
-void tdq_print(int cpu);
-static void runq_print(struct runq *rq);
+void tdq_print(int);
+void sched_print(struct thread *);
+static void runq_print(struct runq *);
 static void tdq_add(struct tdq *, struct thread *, int);
 #ifdef SMP
 static void tdq_move(struct tdq *, struct tdq *);
@@ -345,6 +351,26 @@
 	}
 }
 
+void
+sched_print(struct thread *td)
+{
+	struct td_sched *ts;
+
+	if (td == NULL)
+		td = curthread;
+	ts = td->td_sched;
+	printf("flags:    0x%X\n", ts->ts_flags);
+	printf("rqindex:  %d\n", ts->ts_rqindex);
+	printf("cpu:      %d\n", ts->ts_cpu);
+	printf("slice:    %d\n", ts->ts_slice);
+	printf("score:    %d\n", ts->ts_score);
+	printf("slptime:  %d\n", ts->ts_slptime);
+	printf("runtime:  %d\n", ts->ts_runtime);
+	printf("ltick:    %d\n", ts->ts_ltick);
+	printf("ftick:    %d\n", ts->ts_ftick);
+	printf("ticks:    %d\n", ts->ts_ticks);
+}
+
 /*
  * Print the status of a per-cpu thread queue.  Should be a ddb show cmd.
  */
@@ -357,7 +383,9 @@
 
 	printf("tdq %d:\n", TDQ_ID(tdq));
 	printf("\tlockptr         %p\n", TDQ_LOCKPTR(tdq));
+	printf("\tinterload:	  %d\n", tdq->tdq_interload);
 	printf("\tload:           %d\n", tdq->tdq_load);
+	printf("\tslice:          %d\n", tdq_slice(tdq));
 	printf("\ttimeshare idx:  %d\n", tdq->tdq_idx);
 	printf("\ttimeshare ridx: %d\n", tdq->tdq_ridx);
 	printf("\trealtime runq:\n");
@@ -383,8 +411,12 @@
 static __inline void
 tdq_runq_add(struct tdq *tdq, struct td_sched *ts, int flags)
 {
+	u_char pri;
+
+	pri = ts->ts_thread->td_priority;
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
 	THREAD_LOCK_ASSERT(ts->ts_thread, MA_OWNED);
+	TD_SET_RUNQ(ts->ts_thread);
 #ifdef SMP
 	if (THREAD_CAN_MIGRATE(ts->ts_thread)) {
 		tdq->tdq_transferable++;
@@ -392,15 +424,15 @@
 		ts->ts_flags |= TSF_XFERABLE;
 	}
 #endif
-	if (ts->ts_runq == &tdq->tdq_timeshare) {
-		u_char pri;
-
-		pri = ts->ts_thread->td_priority;
+	if (pri <= PRI_MAX_REALTIME) {
+		ts->ts_runq = &tdq->tdq_realtime;
+	} else if (pri <= PRI_MAX_TIMESHARE) {
+		ts->ts_runq = &tdq->tdq_timeshare;
 		KASSERT(pri <= PRI_MAX_TIMESHARE && pri >= PRI_MIN_TIMESHARE,
 			("Invalid priority %d on timeshare runq", pri));
 		/*
 		 * This queue contains only priorities between MIN and MAX
-		 * realtime.  Use the whole queue to represent these values.
+		 * timeshare.  Use the whole queue to represent these values.
 		 */
 		if ((flags & (SRQ_BORROWING|SRQ_PREEMPTED)) == 0) {
 			pri = (pri - PRI_MIN_TIMESHARE) / TS_RQ_PPQ;
@@ -416,8 +448,10 @@
 		} else
 			pri = tdq->tdq_ridx;
 		runq_add_pri(ts->ts_runq, ts, pri, flags);
+		return;
 	} else
-		runq_add(ts->ts_runq, ts, flags);
+		ts->ts_runq = &tdq->tdq_idle;
+	runq_add(ts->ts_runq, ts, flags);
 }
 
 /* 
@@ -443,13 +477,6 @@
 			runq_remove_idx(ts->ts_runq, ts, &tdq->tdq_ridx);
 		else
 			runq_remove_idx(ts->ts_runq, ts, NULL);
-		/*
-		 * For timeshare threads we update the priority here so
-		 * the priority reflects the time we've been sleeping.
-		 */
-		ts->ts_ltick = ticks;
-		sched_pctcpu_update(ts);
-		sched_priority(ts->ts_thread);
 	} else
 		runq_remove(ts->ts_runq, ts);
 }
@@ -466,6 +493,8 @@
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
 	THREAD_LOCK_ASSERT(ts->ts_thread, MA_OWNED);
 	class = PRI_BASE(ts->ts_thread->td_pri_class);
+	tdq->tdq_interload += ts->ts_score;
+	ts->ts_flags |= TSF_INTERLOAD;
 	tdq->tdq_load++;
 	CTR2(KTR_SCHED, "cpu %d load: %d", TDQ_ID(tdq), tdq->tdq_load);
 	if (class != PRI_ITHD &&
@@ -498,9 +527,37 @@
 #endif
 	KASSERT(tdq->tdq_load != 0,
 	    ("tdq_load_rem: Removing with 0 load on queue %d", TDQ_ID(tdq)));
+	ts->ts_flags &= ~TSF_INTERLOAD;
+	ts->ts_runq = NULL;
+	tdq->tdq_interload -= ts->ts_score;
 	tdq->tdq_load--;
 	CTR1(KTR_SCHED, "load: %d", tdq->tdq_load);
-	ts->ts_runq = NULL;
+}
+
+/*
+ * Compute the maximum slice when the interload changes.  This gives a soft
+ * upper bound on latency as the load increases.
+ */
+static int
+tdq_slice(struct tdq *tdq)
+{
+	int slice;
+	int load;
+
+	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
+	if (tdq->tdq_interload < 100)
+		return (sched_slice_max);
+	/*
+	 * An interload of 100 is roughly equivalent to 100% cpu utilization
+	 * requested.  Calculate how many times overloaded we are and then
+	 * divide the maximum slice by this number.  None of this is precise
+	 * but it does yield decreasing slice values within the [min, max]
+	 * range as load increases.
+	 */
+	load = (tdq->tdq_interload + 99) / 100;
+	slice = sched_slice_max / load;
+	slice = max(slice, sched_slice_min);
+	return (slice);
 }
 
 #ifdef SMP
@@ -1070,14 +1127,6 @@
 	cpu = self = PCPU_GET(cpuid);
 	if (smp_started == 0)
 		return (self);
-	/*
-	 * Don't migrate a running thread from sched_switch().
-	 */
-	if (flags & SRQ_OURSELF) {
-		CTR1(KTR_ULE, "YIELDING %d",
-		    curthread->td_priority);
-		return (self);
-	}
 	pri = ts->ts_thread->td_priority;
 	cpu = ts->ts_cpu;
 	/*
@@ -1113,9 +1162,8 @@
 	 * This may improve locality among sleepers and wakers when there
 	 * is shared data.
 	 */
-	if (tryself && pri < curthread->td_priority) {
-		CTR1(KTR_ULE, "tryself %d",
-		    curthread->td_priority);
+	if (pri < curthread->td_priority) {
+		CTR1(KTR_ULE, "tryself %d", curthread->td_priority);
 		return (self);
 	}
 	/*
@@ -1175,6 +1223,7 @@
 	runq_init(&tdq->tdq_timeshare);
 	runq_init(&tdq->tdq_idle);
 	tdq->tdq_load = 0;
+	tdq->tdq_interload = 0;
 }
 
 #ifdef SMP
@@ -1324,12 +1373,12 @@
 	 * in case which sched_clock() called before sched_initticks().
 	 */
 	realstathz = hz;
-	sched_slice = (realstathz/10);	/* ~100ms */
 	tickincr = 1 << SCHED_TICK_SHIFT;
 
 	/* Add thread0's load since it's running. */
 	TDQ_LOCK(tdq);
 	thread0.td_lock = TDQ_LOCKPTR(TDQ_SELF());
+	td_sched0.ts_score = 0;
 	tdq_load_add(tdq, &td_sched0);
 	TDQ_UNLOCK(tdq);
 }
@@ -1344,7 +1393,8 @@
 	int incr;
 
 	realstathz = stathz ? stathz : hz;
-	sched_slice = (realstathz/10);	/* ~100ms */
+	sched_slice_max = realstathz / 10;	/* ~100ms */
+	sched_slice_min = realstathz / 25;	/* ~40ms */
 
 	/*
 	 * tickincr is shifted out by 10 to avoid rounding errors due to
@@ -1374,7 +1424,6 @@
 #endif
 }
 
-
 /*
  * This is the core of the interactivity algorithm.  Determines a score based
  * on past behavior.  It is the ratio of sleep time to run time scaled to
@@ -1389,15 +1438,6 @@
 	int div;
 
 	ts = td->td_sched;
-	/*
-	 * The score is only needed if this is likely to be an interactive
-	 * task.  Don't go through the expense of computing it if there's
-	 * no chance.
-	 */
-	if (sched_interact <= SCHED_INTERACT_HALF &&
-		ts->ts_runtime >= ts->ts_slptime)
-			return (SCHED_INTERACT_HALF);
-
 	if (ts->ts_runtime > ts->ts_slptime) {
 		div = max(1, ts->ts_runtime / SCHED_INTERACT_HALF);
 		return (SCHED_INTERACT_HALF +
@@ -1443,7 +1483,7 @@
 	 * score.  Negative nice values make it easier for a thread to be
 	 * considered interactive.
 	 */
-	score = imax(0, sched_interact_score(td) - td->td_proc->p_nice);
+	score = imax(0, td->td_sched->ts_score - td->td_proc->p_nice);
 	if (score < sched_interact) {
 		pri = PRI_MIN_REALTIME;
 		pri += ((PRI_MAX_REALTIME - PRI_MIN_REALTIME) / sched_interact)
@@ -1477,12 +1517,15 @@
 sched_interact_update(struct thread *td)
 {
 	struct td_sched *ts;
+	struct tdq *tdq;
 	u_int sum;
+	int score;
 
+	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	ts = td->td_sched;
 	sum = ts->ts_runtime + ts->ts_slptime;
 	if (sum < SCHED_SLP_RUN_MAX)
-		return;
+		goto score;
 	/*
 	 * This only happens from two places:
 	 * 1) We have added an unusual amount of run time from fork_exit.
@@ -1490,13 +1533,13 @@
 	 */
 	if (sum > SCHED_SLP_RUN_MAX * 2) {
 		if (ts->ts_runtime > ts->ts_slptime) {
-			ts->ts_runtime = SCHED_SLP_RUN_MAX;
+			ts->ts_runtime = SCHED_SLP_RUN_MAX / 2;
 			ts->ts_slptime = 1;
 		} else {
-			ts->ts_slptime = SCHED_SLP_RUN_MAX;
+			ts->ts_slptime = SCHED_SLP_RUN_MAX / 2;
 			ts->ts_runtime = 1;
 		}
-		return;
+		goto score;
 	}
 	/*
 	 * If we have exceeded by more than 1/5th then the algorithm below
@@ -1506,10 +1549,19 @@
 	if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) {
 		ts->ts_runtime /= 2;
 		ts->ts_slptime /= 2;
-		return;
+		goto score;
 	}
 	ts->ts_runtime = (ts->ts_runtime / 5) * 4;
 	ts->ts_slptime = (ts->ts_slptime / 5) * 4;
+score:
+	score = sched_interact_score(td);
+	if (ts->ts_flags & TSF_INTERLOAD) {
+		tdq = TDQ_CPU(ts->ts_cpu);
+		TDQ_LOCK_ASSERT(tdq, MA_OWNED);
+		tdq->tdq_interload -= ts->ts_score;
+		tdq->tdq_interload += score;
+	}
+	ts->ts_score = score;
 }
 
 /*
@@ -1559,7 +1611,7 @@
 {
 
 	/* Convert sched_slice to hz */
-	return (hz/(realstathz/sched_slice));
+	return (hz/(realstathz/sched_slice_max));
 }
 
 /*
@@ -1598,16 +1650,19 @@
 sched_thread_priority(struct thread *td, u_char prio)
 {
 	struct td_sched *ts;
+	struct tdq *tdq;
 
 	CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)",
 	    td, td->td_proc->p_comm, td->td_priority, prio, curthread,
 	    curthread->td_proc->p_comm);
 	ts = td->td_sched;
+	tdq = TDQ_CPU(ts->ts_cpu);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
-	if (td->td_priority == prio)
+	if (td->td_priority <= prio) {
+		td->td_priority = prio;
 		return;
-
-	if (TD_ON_RUNQ(td) && prio < td->td_priority) {
+	}
+	if (TD_ON_RUNQ(td)) {
 		/*
 		 * If the priority has been elevated due to priority
 		 * propagation, we may have to move ourselves to a new
@@ -1617,16 +1672,14 @@
 		sched_rem(td);
 		td->td_priority = prio;
 		sched_add(td, SRQ_BORROWING);
-	} else {
 #ifdef SMP
-		struct tdq *tdq;
-
-		tdq = TDQ_CPU(ts->ts_cpu);
+	} else if (TD_IS_RUNNING(td)) {
 		if (prio < tdq->tdq_lowpri)
 			tdq->tdq_lowpri = prio;
+		td->td_priority = prio;
 #endif
+	} else
 		td->td_priority = prio;
-	}
 }
 
 /*
@@ -1772,6 +1825,8 @@
 
 	tdn = TDQ_CPU(td->td_sched->ts_cpu);
 #ifdef SMP
+	/* The load is being removed from the current cpu. */
+	tdq_load_rem(tdq, td->td_sched);
 	/*
 	 * Do the lock dance required to avoid LOR.  We grab an extra
 	 * spinlock nesting to prevent preemption while we're
@@ -1863,12 +1918,11 @@
 		TD_SET_CAN_RUN(td);
 	} else if (TD_IS_RUNNING(td)) {
 		MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
-		tdq_load_rem(tdq, ts);
 		srqflag = (flags & SW_PREEMPT) ?
 		    SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED :
 		    SRQ_OURSELF|SRQ_YIELDING;
 		if (ts->ts_cpu == cpuid)
-			tdq_add(tdq, td, srqflag);
+			tdq_runq_add(tdq, ts, srqflag);
 		else
 			mtx = sched_switch_migrate(tdq, td, srqflag);
 	} else {
@@ -1970,22 +2024,18 @@
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	ts = td->td_sched;
 	/*
-	 * If we slept for more than a tick update our interactivity and
-	 * priority.
+	 * Update interactivity and priority after a sleep.
 	 */
 	slptick = td->td_slptick;
 	td->td_slptick = 0;
-	if (slptick && slptick != ticks) {
-		u_int hzticks;
-
-		hzticks = (ticks - slptick) << SCHED_TICK_SHIFT;
-		ts->ts_slptime += hzticks;
+	if (slptick && slptick != ticks)  {
+		ts->ts_slptime += (ticks - slptick) << SCHED_TICK_SHIFT;
 		sched_interact_update(td);
 		sched_pctcpu_update(ts);
 		sched_priority(td);
+		/* Reset the slice value after we sleep. */
+		ts->ts_slice = 0;
 	}
-	/* Reset the slice value after we sleep. */
-	ts->ts_slice = sched_slice;
 	sched_add(td, SRQ_BORING);
 }
 
@@ -2040,7 +2090,6 @@
 	 */
 	ts2->ts_slptime = ts->ts_slptime;
 	ts2->ts_runtime = ts->ts_runtime;
-	ts2->ts_slice = 1;	/* Attempt to quickly learn interactivity. */
 }
 
 /*
@@ -2188,25 +2237,26 @@
 	}
 	ts = td->td_sched;
 	/*
-	 * We only do slicing code for TIMESHARE threads.
-	 */
-	if (td->td_pri_class != PRI_TIMESHARE)
-		return;
-	/*
 	 * We used a tick; charge it to the thread so that we can compute our
 	 * interactivity.
 	 */
 	td->td_sched->ts_runtime += tickincr;
 	sched_interact_update(td);
 	/*
+	 * We only do slicing code for TIMESHARE threads.
+	 */
+	if (td->td_pri_class != PRI_TIMESHARE)
+		return;
+	sched_priority(td);
+	/*
 	 * We used up one time slice.
 	 */
-	if (--ts->ts_slice > 0)
+	if (++ts->ts_slice < tdq_slice(tdq))
 		return;
 	/*
-	 * We're out of time, recompute priorities and requeue.
+	 * We're out of time, force a requeue later.
 	 */
-	sched_priority(td);
+	ts->ts_slice = 0;
 	td->td_flags |= TDF_NEEDRESCHED;
 }
 
@@ -2290,35 +2340,6 @@
 #endif
 	return (PCPU_GET(idlethread));
 }
-
-/*
- * Set owepreempt if necessary.  Preemption never happens directly in ULE,
- * we always request it once we exit a critical section.
- */
-static inline void
-sched_setpreempt(struct thread *td)
-{
-	struct thread *ctd;
-	int cpri;
-	int pri;
-
-	ctd = curthread;
-	pri = td->td_priority;
-	cpri = ctd->td_priority;
-	if (td->td_priority < ctd->td_priority)
-		curthread->td_flags |= TDF_NEEDRESCHED;
-	if (panicstr != NULL || pri >= cpri || cold || TD_IS_INHIBITED(ctd))
-		return;
-	/*
-	 * Always preempt IDLE threads.  Otherwise only if the preempting
-	 * thread is an ithread.
-	 */
-	if (pri > preempt_thresh && cpri < PRI_MIN_IDLE)
-		return;
-	ctd->td_owepreempt = 1;
-	return;
-}
-
 /*
  * Add a thread to a thread queue.  Initializes priority, slice, runq, and
  * add it to the appropriate queue.  This is the internal function called
@@ -2328,11 +2349,10 @@
 tdq_add(struct tdq *tdq, struct thread *td, int flags)
 {
 	struct td_sched *ts;
-	int class;
 #ifdef SMP
+	int class;
 	int cpumask;
 #endif
-
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
 	KASSERT((td->td_inhibitors == 0),
 	    ("sched_add: trying to run inhibited thread"));
@@ -2342,20 +2362,10 @@
 	    ("sched_add: thread swapped out"));
 
 	ts = td->td_sched;
-	class = PRI_BASE(td->td_pri_class);
-        TD_SET_RUNQ(td);
-	if (ts->ts_slice == 0)
-		ts->ts_slice = sched_slice;
-	/*
-	 * Pick the run queue based on priority.
-	 */
-	if (td->td_priority <= PRI_MAX_REALTIME)
-		ts->ts_runq = &tdq->tdq_realtime;
-	else if (td->td_priority <= PRI_MAX_TIMESHARE)
-		ts->ts_runq = &tdq->tdq_timeshare;
-	else
-		ts->ts_runq = &tdq->tdq_idle;
+	tdq_runq_add(tdq, ts, flags);
+	tdq_load_add(tdq, ts);
 #ifdef SMP
+	class = PRI_BASE(td->td_pri_class);
 	cpumask = 1 << ts->ts_cpu;
 	/*
 	 * If we had been idle, clear our bit in the group and potentially
@@ -2378,8 +2388,6 @@
 	if (td->td_priority < tdq->tdq_lowpri)
 		tdq->tdq_lowpri = td->td_priority;
 #endif
-	tdq_runq_add(tdq, ts, flags);
-	tdq_load_add(tdq, ts);
 }
 
 /*
@@ -2390,6 +2398,7 @@
 sched_add(struct thread *td, int flags)
 {
 	struct td_sched *ts;
+	struct thread *ctd;
 	struct tdq *tdq;
 #ifdef SMP
 	int cpuid;
@@ -2434,8 +2443,19 @@
 	thread_lock_set(td, TDQ_LOCKPTR(tdq));
 	tdq_add(tdq, td, flags);
 #endif
-	if (!(flags & SRQ_YIELDING))
-		sched_setpreempt(td);
+	if (cold || panicstr)
+		return;
+	/*
+	 * Always preempt IDLE threads.  Otherwise only if the preempting
+	 * thread is an ithread.
+	 */
+	ctd = curthread;
+	if (td->td_priority < ctd->td_priority) {
+		ctd->td_flags |= TDF_NEEDRESCHED;
+		if (td->td_priority <= preempt_thresh ||
+		    ctd->td_priority > PRI_MIN_IDLE)
+			ctd->td_owepreempt = 1;
+	}
 }
 
 /*
@@ -2660,8 +2680,10 @@
     "Scheduler");
 SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ULE", 0,
     "Scheduler name");
-SYSCTL_INT(_kern_sched, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0,
-    "Slice size for timeshare threads");
+SYSCTL_INT(_kern_sched, OID_AUTO, slice, CTLFLAG_RW, &sched_slice_max, 0,
+    "Maximum slice size for timeshare threads");
+SYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &sched_slice_min, 0,
+    "Minimum slice size for timeshare threads");
 SYSCTL_INT(_kern_sched, OID_AUTO, interact, CTLFLAG_RW, &sched_interact, 0,
      "Interactivity score threshold");
 SYSCTL_INT(_kern_sched, OID_AUTO, preempt_thresh, CTLFLAG_RW, &preempt_thresh,
@@ -2671,7 +2693,6 @@
     "Pick the target cpu based on priority rather than load.");
 SYSCTL_INT(_kern_sched, OID_AUTO, affinity, CTLFLAG_RW, &affinity, 0,
     "Number of hz ticks to keep thread affinity for");
-SYSCTL_INT(_kern_sched, OID_AUTO, tryself, CTLFLAG_RW, &tryself, 0, "");
 SYSCTL_INT(_kern_sched, OID_AUTO, balance, CTLFLAG_RW, &rebalance, 0,
     "Enables the long-term load balancer");
 SYSCTL_INT(_kern_sched, OID_AUTO, balance_interval, CTLFLAG_RW,
_______________________________________________
freebsd-performance@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-performance
To unsubscribe, send any mail to "[EMAIL PROTECTED]"

Reply via email to