Module Name: src Committed By: ad Date: Sat Apr 4 20:17:58 UTC 2020
Modified Files: src/sys/kern: kern_runq.c Log Message: - sched_idle(): if a migration took place, rather than going idle briefly to avoid stealing back the LWP, remember the target CPU and avoid it. - sched_preempted(): only choose a totally idle CPU; in the interests of overall throughput it's better to make use of SMT / slow CPUs. To generate a diff of this commit: cvs rdiff -u -r1.64 -r1.65 src/sys/kern/kern_runq.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/kern_runq.c diff -u src/sys/kern/kern_runq.c:1.64 src/sys/kern/kern_runq.c:1.65 --- src/sys/kern/kern_runq.c:1.64 Thu Mar 26 19:25:07 2020 +++ src/sys/kern/kern_runq.c Sat Apr 4 20:17:58 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_runq.c,v 1.64 2020/03/26 19:25:07 ad Exp $ */ +/* $NetBSD: kern_runq.c,v 1.65 2020/04/04 20:17:58 ad Exp $ */ /*- * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc. @@ -56,7 +56,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.64 2020/03/26 19:25:07 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.65 2020/04/04 20:17:58 ad Exp $"); #include "opt_dtrace.h" @@ -340,17 +340,17 @@ sched_resched_cpu(struct cpu_info *ci, p } /* - * Things start as soon as we touch ci_want_resched: x86 for example - * has an instruction that monitors the memory cell it's in. We - * want to drop the schedstate lock in advance, otherwise the remote - * CPU can awaken and immediately block on the lock. + * Things can start as soon as ci_want_resched is touched: x86 has + * an instruction that monitors the memory cell it's in. Drop the + * schedstate lock in advance, otherwise the remote CPU can awaken + * and immediately block on the lock. */ if (__predict_true(unlock)) { spc_unlock(ci); } /* - * The caller will always have a second scheduler lock held: either + * The caller almost always has a second scheduler lock held: either * the running LWP lock (spc_lwplock), or a sleep queue lock. That * keeps preemption disabled, which among other things ensures all * LWPs involved won't be freed while we're here (see lwp_dtor()). @@ -361,8 +361,10 @@ sched_resched_cpu(struct cpu_info *ci, p n = atomic_cas_uint(&ci->ci_want_resched, o, o | f); if (__predict_true(o == n)) { /* - * We're the first. If we're in process context on - * the same CPU, we can avoid the visit to trap(). + * We're the first to set a resched on the CPU. Try + * to avoid causing a needless trip through trap() + * to handle an AST fault, if it's known the LWP + * will either block or go through userret() soon. */ if (l != curlwp || cpu_intr_p()) { cpu_need_resched(ci, l, f); @@ -680,9 +682,10 @@ sched_catchlwp(struct cpu_info *ci) } /* - * Called from sched_idle() to handle migration. + * Called from sched_idle() to handle migration. Return the CPU that we + * pushed the LWP to (may be NULL). */ -static void +static struct cpu_info * sched_idle_migrate(void) { struct cpu_info *ci = curcpu(), *tci = NULL; @@ -748,13 +751,14 @@ sched_idle_migrate(void) sched_resched_lwp(l, true); /* tci now unlocked */ spc_unlock(ci); - return; + return tci; } if (dlock == true) { KASSERT(tci != NULL); spc_unlock(tci); } spc_unlock(ci); + return NULL; } /* @@ -785,21 +789,22 @@ sched_steal(struct cpu_info *ci, struct void sched_idle(void) { - struct cpu_info *ci = curcpu(), *inner, *outer, *first, *tci = NULL; + struct cpu_info *ci, *inner, *outer, *first, *tci, *mci; struct schedstate_percpu *spc, *tspc; struct lwp *l; + ci = curcpu(); spc = &ci->ci_schedstate; + tci = NULL; + mci = NULL; /* * Handle LWP migrations off this CPU to another. If there a is - * migration to do then go idle afterwards (we'll wake again soon), - * as we don't want to instantly steal back the LWP we just moved - * out. + * migration to do then remember the CPU the LWP was sent to, and + * don't steal the LWP back from that CPU below. */ if (spc->spc_migrating != NULL) { - sched_idle_migrate(); - return; + mci = sched_idle_migrate(); } /* If this CPU is offline, or we have an LWP to run, we're done. */ @@ -812,7 +817,7 @@ sched_idle(void) /* Try to help our siblings out. */ tci = ci->ci_sibling[CPUREL_CORE]; while (tci != ci) { - if (sched_steal(ci, tci)) { + if (tci != mci && sched_steal(ci, tci)) { return; } tci = tci->ci_sibling[CPUREL_CORE]; @@ -849,7 +854,8 @@ sched_idle(void) do { /* Don't hit the locks unless needed. */ tspc = &inner->ci_schedstate; - if (ci == inner || spc->spc_psid != tspc->spc_psid || + if (ci == inner || ci == mci || + spc->spc_psid != tspc->spc_psid || tspc->spc_mcount < min_catch) { continue; } @@ -874,6 +880,7 @@ sched_idle(void) void sched_preempted(struct lwp *l) { + const int flags = SPCF_IDLE | SPCF_1STCLASS; struct schedstate_percpu *tspc; struct cpu_info *ci, *tci; @@ -903,7 +910,6 @@ sched_preempted(struct lwp *l) */ tci = ci->ci_sibling[CPUREL_CORE]; while (tci != ci) { - const int flags = SPCF_IDLE | SPCF_1STCLASS; tspc = &tci->ci_schedstate; if ((tspc->spc_flags & flags) == flags && sched_migratable(l, tci)) { @@ -928,7 +934,9 @@ sched_preempted(struct lwp *l) } else { /* * Try to find a better CPU to take it, but don't move to - * another 2nd class CPU; there's not much point. + * another 2nd class CPU, and don't move to a non-idle CPU, + * because that would prevent SMT being used to maximise + * throughput. * * Search in the current CPU package in order to try and * keep L2/L3 cache locality, but expand to include the @@ -936,7 +944,7 @@ sched_preempted(struct lwp *l) */ tci = sched_bestcpu(l, l->l_cpu); if (tci != ci && - (tci->ci_schedstate.spc_flags & SPCF_1STCLASS) != 0) { + (tci->ci_schedstate.spc_flags & flags) == flags) { l->l_target_cpu = tci; } }