Module Name:    src
Committed By:   ad
Date:           Sat Apr  4 20:17:58 UTC 2020

Modified Files:
        src/sys/kern: kern_runq.c

Log Message:
- sched_idle(): if a migration took place, rather than going idle briefly
  to avoid stealing back the LWP, remember the target CPU and avoid it.

- sched_preempted(): only choose a totally idle CPU; in the interests of
  overall throughput it's better to make use of SMT / slow CPUs.


To generate a diff of this commit:
cvs rdiff -u -r1.64 -r1.65 src/sys/kern/kern_runq.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/kern_runq.c
diff -u src/sys/kern/kern_runq.c:1.64 src/sys/kern/kern_runq.c:1.65
--- src/sys/kern/kern_runq.c:1.64	Thu Mar 26 19:25:07 2020
+++ src/sys/kern/kern_runq.c	Sat Apr  4 20:17:58 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_runq.c,v 1.64 2020/03/26 19:25:07 ad Exp $	*/
+/*	$NetBSD: kern_runq.c,v 1.65 2020/04/04 20:17:58 ad Exp $	*/
 
 /*-
  * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc.
@@ -56,7 +56,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.64 2020/03/26 19:25:07 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.65 2020/04/04 20:17:58 ad Exp $");
 
 #include "opt_dtrace.h"
 
@@ -340,17 +340,17 @@ sched_resched_cpu(struct cpu_info *ci, p
 	}
 
 	/*
-	 * Things start as soon as we touch ci_want_resched: x86 for example
-	 * has an instruction that monitors the memory cell it's in.  We
-	 * want to drop the schedstate lock in advance, otherwise the remote
-	 * CPU can awaken and immediately block on the lock.
+	 * Things can start as soon as ci_want_resched is touched: x86 has
+	 * an instruction that monitors the memory cell it's in.  Drop the
+	 * schedstate lock in advance, otherwise the remote CPU can awaken
+	 * and immediately block on the lock.
 	 */
 	if (__predict_true(unlock)) {
 		spc_unlock(ci);
 	}
 
 	/*
-	 * The caller will always have a second scheduler lock held: either
+	 * The caller almost always has a second scheduler lock held: either
 	 * the running LWP lock (spc_lwplock), or a sleep queue lock.  That
 	 * keeps preemption disabled, which among other things ensures all
 	 * LWPs involved won't be freed while we're here (see lwp_dtor()).
@@ -361,8 +361,10 @@ sched_resched_cpu(struct cpu_info *ci, p
 		n = atomic_cas_uint(&ci->ci_want_resched, o, o | f);
 		if (__predict_true(o == n)) {
 			/*
-			 * We're the first.  If we're in process context on
-			 * the same CPU, we can avoid the visit to trap().
+			 * We're the first to set a resched on the CPU.  Try
+			 * to avoid causing a needless trip through trap()
+			 * to handle an AST fault, if it's known the LWP
+			 * will either block or go through userret() soon.
 			 */
 			if (l != curlwp || cpu_intr_p()) {
 				cpu_need_resched(ci, l, f);
@@ -680,9 +682,10 @@ sched_catchlwp(struct cpu_info *ci)
 }
 
 /*
- * Called from sched_idle() to handle migration.
+ * Called from sched_idle() to handle migration.  Return the CPU that we
+ * pushed the LWP to (may be NULL).
  */
-static void
+static struct cpu_info *
 sched_idle_migrate(void)
 {
 	struct cpu_info *ci = curcpu(), *tci = NULL;
@@ -748,13 +751,14 @@ sched_idle_migrate(void)
 		sched_resched_lwp(l, true);
 		/* tci now unlocked */
 		spc_unlock(ci);
-		return;
+		return tci;
 	}
 	if (dlock == true) {
 		KASSERT(tci != NULL);
 		spc_unlock(tci);
 	}
 	spc_unlock(ci);
+	return NULL;
 }
 
 /*
@@ -785,21 +789,22 @@ sched_steal(struct cpu_info *ci, struct 
 void
 sched_idle(void)
 {
-	struct cpu_info *ci = curcpu(), *inner, *outer, *first, *tci = NULL;
+	struct cpu_info *ci, *inner, *outer, *first, *tci, *mci;
 	struct schedstate_percpu *spc, *tspc;
 	struct lwp *l;
 
+	ci = curcpu();
 	spc = &ci->ci_schedstate;
+	tci = NULL;
+	mci = NULL;
 
 	/*
 	 * Handle LWP migrations off this CPU to another.  If there a is
-	 * migration to do then go idle afterwards (we'll wake again soon),
-	 * as we don't want to instantly steal back the LWP we just moved
-	 * out.
+	 * migration to do then remember the CPU the LWP was sent to, and
+	 * don't steal the LWP back from that CPU below.
 	 */
 	if (spc->spc_migrating != NULL) {
-		sched_idle_migrate();
-		return;
+		mci = sched_idle_migrate();
 	}
 
 	/* If this CPU is offline, or we have an LWP to run, we're done. */
@@ -812,7 +817,7 @@ sched_idle(void)
 		/* Try to help our siblings out. */
 		tci = ci->ci_sibling[CPUREL_CORE];
 		while (tci != ci) {
-			if (sched_steal(ci, tci)) {
+			if (tci != mci && sched_steal(ci, tci)) {
 				return;
 			}
 			tci = tci->ci_sibling[CPUREL_CORE];
@@ -849,7 +854,8 @@ sched_idle(void)
 		do {
 			/* Don't hit the locks unless needed. */
 			tspc = &inner->ci_schedstate;
-			if (ci == inner || spc->spc_psid != tspc->spc_psid ||
+			if (ci == inner || ci == mci ||
+			    spc->spc_psid != tspc->spc_psid ||
 			    tspc->spc_mcount < min_catch) {
 				continue;
 			}
@@ -874,6 +880,7 @@ sched_idle(void)
 void
 sched_preempted(struct lwp *l)
 {
+	const int flags = SPCF_IDLE | SPCF_1STCLASS;
 	struct schedstate_percpu *tspc;
 	struct cpu_info *ci, *tci;
 
@@ -903,7 +910,6 @@ sched_preempted(struct lwp *l)
 	 */
 	tci = ci->ci_sibling[CPUREL_CORE];
 	while (tci != ci) {
-		const int flags = SPCF_IDLE | SPCF_1STCLASS;
 		tspc = &tci->ci_schedstate;
 		if ((tspc->spc_flags & flags) == flags &&
 		    sched_migratable(l, tci)) {
@@ -928,7 +934,9 @@ sched_preempted(struct lwp *l)
 	} else {
 		/*
 		 * Try to find a better CPU to take it, but don't move to
-		 * another 2nd class CPU; there's not much point.
+		 * another 2nd class CPU, and don't move to a non-idle CPU,
+		 * because that would prevent SMT being used to maximise
+		 * throughput.
 		 *
 		 * Search in the current CPU package in order to try and
 		 * keep L2/L3 cache locality, but expand to include the
@@ -936,7 +944,7 @@ sched_preempted(struct lwp *l)
 		 */
 		tci = sched_bestcpu(l, l->l_cpu);
 		if (tci != ci &&
-		    (tci->ci_schedstate.spc_flags & SPCF_1STCLASS) != 0) {
+		    (tci->ci_schedstate.spc_flags & flags) == flags) {
 			l->l_target_cpu = tci;
 		}
 	}

Reply via email to