On Thu, Aug 03, 2023 at 10:53:24AM +0200, Claudio Jeker wrote: > On Thu, Aug 03, 2023 at 10:13:57AM +0200, Martin Pieuchot wrote: > > On 02/08/23(Wed) 14:22, Claudio Jeker wrote: > > > On Mon, Jul 31, 2023 at 10:21:11AM -0500, Scott Cheloha wrote: > > > > On Fri, Jul 28, 2023 at 07:36:41PM -0500, Scott Cheloha wrote: > > > > > claudio@ notes that uvm_loadav() pointlessly walks the allproc list to > > > > > recompute schedstate_percpu.spn_nrun for each CPU. > > > > > > > > > > We can just use the value instead of recomputing it. > > > > > > > > Whoops, off-by-one. The current load averaging code includes the > > > > running thread in the nrun count if it is *not* the idle thread. > > > > > > Yes, with this the loadavg seems to be consistent and following the number > > > of running processes. The code seems to behave like before (with all its > > > quirks). > > > > > > OK claudio@, this is a good first step. Now I think this code should later > > > be moved into kern_sched.c or sched_bsd.c and removed from uvm. Not sure > > > why > > > the load calculation is part of memory management... > > > > > > On top of this I wonder about the per-CPU load calculation. In my opinion > > > it is wrong to skip the calculation if the CPU is idle. Because of this > > > there is no decay for idle CPUs and that feels wrong to me. > > > Do we have a userland utility that reports spc_ldavg? > > > > I don't understand why the SCHED_LOCK() is needed. Since I'm really > > against adding new uses for it, could you comment on that? > > The question is how sloppy do we want to be. This code looks at > ci_schedstate (spc_idleproc and spc_nrun) and ci_curproc so the be correct > this needs to lock the scheduler. Do we really want that, hell no. How about this. Kill the spc_ldavg calculation. Its use is more then questionable. The cpu selection code using this is not wroking well and process stealing will do the rest. Also use sched_cpu_idle to know if a cpu is idle.
-- :wq Claudio Index: kern/kern_sched.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sched.c,v retrieving revision 1.81 diff -u -p -r1.81 kern_sched.c --- kern/kern_sched.c 27 Jul 2023 17:52:53 -0000 1.81 +++ kern/kern_sched.c 3 Aug 2023 08:41:38 -0000 @@ -373,7 +373,6 @@ sched_choosecpu_fork(struct proc *parent { #ifdef MULTIPROCESSOR struct cpu_info *choice = NULL; - fixpt_t load, best_load = ~0; int run, best_run = INT_MAX; struct cpu_info *ci; struct cpuset set; @@ -407,13 +406,10 @@ sched_choosecpu_fork(struct proc *parent while ((ci = cpuset_first(&set)) != NULL) { cpuset_del(&set, ci); - load = ci->ci_schedstate.spc_ldavg; run = ci->ci_schedstate.spc_nrun; - if (choice == NULL || run < best_run || - (run == best_run &&load < best_load)) { + if (choice == NULL || run < best_run) { choice = ci; - best_load = load; best_run = run; } } @@ -605,11 +601,6 @@ sched_proc_to_cpu_cost(struct cpu_info * */ if (CPU_IS_PRIMARY(ci)) cost += sched_cost_runnable; - - /* - * Higher load on the destination means we don't want to go there. - */ - cost += ((sched_cost_load * spc->spc_ldavg) >> FSHIFT); /* * If the proc is on this cpu already, lower the cost by how much Index: sys/sched.h =================================================================== RCS file: /cvs/src/sys/sys/sched.h,v retrieving revision 1.58 diff -u -p -r1.58 sched.h --- sys/sched.h 25 Jul 2023 18:16:19 -0000 1.58 +++ sys/sched.h 3 Aug 2023 08:42:39 -0000 @@ -110,7 +110,6 @@ struct schedstate_percpu { struct clockintr *spc_profclock; /* [o] profclock handle */ u_int spc_nrun; /* procs on the run queues */ - fixpt_t spc_ldavg; /* shortest load avg. for this cpu */ volatile uint32_t spc_whichqs; volatile u_int spc_spinning; /* this cpu is currently spinning */ Index: uvm/uvm_meter.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_meter.c,v retrieving revision 1.46 diff -u -p -r1.46 uvm_meter.c --- uvm/uvm_meter.c 2 Aug 2023 13:54:45 -0000 1.46 +++ uvm/uvm_meter.c 3 Aug 2023 10:12:02 -0000 @@ -70,7 +70,7 @@ struct loadavg averunnable; * 5 second intervals. */ -static fixpt_t cexp[3] = { +static const fixpt_t cexp[3] = { 0.9200444146293232 * FSCALE, /* exp(-1/12) */ 0.9834714538216174 * FSCALE, /* exp(-1/60) */ 0.9944598480048967 * FSCALE, /* exp(-1/180) */ @@ -98,27 +98,16 @@ uvm_meter(void) static void uvm_loadav(struct loadavg *avg) { + extern struct cpuset sched_idle_cpus; CPU_INFO_ITERATOR cii; struct cpu_info *ci; - struct schedstate_percpu *spc; - u_int i, nrun = 0, nrun_cpu; - int s; + u_int i, nrun = 0; - - SCHED_LOCK(s); CPU_INFO_FOREACH(cii, ci) { - spc = &ci->ci_schedstate; - nrun_cpu = spc->spc_nrun; - if (ci->ci_curproc != spc->spc_idleproc) - nrun_cpu++; - if (nrun_cpu == 0) - continue; - spc->spc_ldavg = (cexp[0] * spc->spc_ldavg + - nrun_cpu * FSCALE * - (FSCALE - cexp[0])) >> FSHIFT; - nrun += nrun_cpu; + if (!cpuset_isset(&sched_idle_cpus, ci)) + nrun++; + nrun += ci->ci_schedstate.spc_nrun; } - SCHED_UNLOCK(s); for (i = 0; i < 3; i++) { avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +