On 14/01/20(Tue) 17:16, Martin Pieuchot wrote: > On 13/01/20(Mon) 21:31, Martin Pieuchot wrote: > > I'd like hardclock() to be free of SCHED_LOCK(), the diff below brings > > us one step away from that goal. Please test with your favorite > > benchmark and report any regression :o) > > > > The reason for moving the SCHED_LOCK() away from hardclock() is because > > it will allow us to re-commit the diff moving accounting out of the > > SCHED_LOCK(). In the end we shrink the SCHED_LOCK() which is generally > > a good thing(tm). > > > > `p_priority' is not very well named. It's currently a placeholder for > > three values: > > > > - the sleeping priority, corresponding to the value passed to tsleep(9) > > which will be later used by setrunnable() to place the thread on the > > appropriate runqueue. > > > > - the per-CPU runqueue priority which is used to add/remove a thread > > to/from a runqueue. > > > > - the scheduling priority, also named `p_usrpri', calculated from the > > estimated amount of CPU time used. > > > > > > The diff below splits the current `p_priority' into two fields `p_runpri' > > and `p_slppri'. The important part is the schedclock() chunk: > > > > @@ -519,8 +515,6 @@ schedclock(struct proc *p) > > SCHED_LOCK(s); > > newcpu = ESTCPULIM(p->p_estcpu + 1); > > setpriority(p, newcpu, p->p_p->ps_nice); > > - if (p->p_priority >= PUSER) > > - p->p_priority = p->p_usrpri; > > SCHED_UNLOCK(s); > > > > `p_priority' had to be updated because it was overwritten during each > > tsleep()/setrunnable() cycle. The same happened in schedcpu(): > > > > schedcpu: reseting priority for zerothread(44701) 127 -> 90 > > schedclock: reseting priority for zerothread(44701) 127 -> 91 > > > > Getting rid of this assignment means we can then protect `p_estcpu' and > > `p_usrpri' with a custom rer-thread lock. > > > > With this division, `p_runpri' becomes part of the scheduler fields while > > `p_slppri' is obviously part of the global sleepqueue. > > > > The rest of the diff is quite straightforward, including the userland > > compatibility bits. > > > > Tests, comments and oks welcome :o) > > Updated diff that changes getpriority() into a macro allowing libkvm to > build as reported by anton@.
New diff that initializes `p_runpri' and fix a parenthesis incoherency in macro, pointed by visa@. This has been tested by various people without regression so far, so I'd like to move forward, ok? Index: arch/sparc64/sparc64/db_interface.c =================================================================== RCS file: /cvs/src/sys/arch/sparc64/sparc64/db_interface.c,v retrieving revision 1.54 diff -u -p -r1.54 db_interface.c --- arch/sparc64/sparc64/db_interface.c 7 Nov 2019 14:44:53 -0000 1.54 +++ arch/sparc64/sparc64/db_interface.c 19 Jan 2020 13:45:36 -0000 @@ -958,10 +958,10 @@ db_proc_cmd(addr, have_addr, count, modi return; } db_printf("process %p:", p); - db_printf("pid:%d vmspace:%p pmap:%p ctx:%x wchan:%p pri:%d upri:%d\n", + db_printf("pid:%d vmspace:%p pmap:%p ctx:%x wchan:%p rpri:%d upri:%d\n", p->p_p->ps_pid, p->p_vmspace, p->p_vmspace->vm_map.pmap, p->p_vmspace->vm_map.pmap->pm_ctx, - p->p_wchan, p->p_priority, p->p_usrpri); + p->p_wchan, p->p_runpri, p->p_usrpri); db_printf("maxsaddr:%p ssiz:%dpg or %llxB\n", p->p_vmspace->vm_maxsaddr, p->p_vmspace->vm_ssize, (unsigned long long)ptoa(p->p_vmspace->vm_ssize)); Index: dev/pci/drm/i915/intel_breadcrumbs.c =================================================================== RCS file: /cvs/src/sys/dev/pci/drm/i915/intel_breadcrumbs.c,v retrieving revision 1.4 diff -u -p -r1.4 intel_breadcrumbs.c --- dev/pci/drm/i915/intel_breadcrumbs.c 10 Jul 2019 07:56:30 -0000 1.4 +++ dev/pci/drm/i915/intel_breadcrumbs.c 19 Jan 2020 13:45:36 -0000 @@ -451,7 +451,7 @@ static bool __intel_engine_add_wait(stru #ifdef __linux__ if (wait->tsk->prio > to_wait(parent)->tsk->prio) { #else - if (wait->tsk->p_priority > to_wait(parent)->tsk->p_priority) { + if (wait->tsk->p_usrpri > to_wait(parent)->tsk->p_usrpri) { #endif p = &parent->rb_right; first = false; @@ -538,7 +538,7 @@ static inline bool chain_wakeup(struct r #else static inline bool chain_wakeup(struct rb_node *rb, int priority) { - return rb && to_wait(rb)->tsk->p_priority <= priority; + return rb && to_wait(rb)->tsk->p_usrpri <= priority; } #endif @@ -558,7 +558,7 @@ static inline int wakeup_priority(struct if (p == b->signaler) return INT_MIN; else - return p->p_priority; + return p->p_usrpri; } #endif Index: kern/kern_fork.c =================================================================== RCS file: /cvs/src/sys/kern/kern_fork.c,v retrieving revision 1.220 diff -u -p -r1.220 kern_fork.c --- kern/kern_fork.c 6 Jan 2020 10:25:10 -0000 1.220 +++ kern/kern_fork.c 19 Jan 2020 13:47:36 -0000 @@ -148,6 +148,7 @@ thread_new(struct proc *parent, vaddr_t p = pool_get(&proc_pool, PR_WAITOK); p->p_stat = SIDL; /* protect against others */ + p->p_runpri = 0; p->p_flag = 0; /* @@ -312,7 +313,7 @@ fork_thread_start(struct proc *p, struct SCHED_LOCK(s); ci = sched_choosecpu_fork(parent, flags); - setrunqueue(ci, p, p->p_priority); + setrunqueue(ci, p, p->p_usrpri); SCHED_UNLOCK(s); } Index: kern/kern_proc.c =================================================================== RCS file: /cvs/src/sys/kern/kern_proc.c,v retrieving revision 1.85 diff -u -p -r1.85 kern_proc.c --- kern/kern_proc.c 12 Nov 2018 15:09:17 -0000 1.85 +++ kern/kern_proc.c 19 Jan 2020 13:45:36 -0000 @@ -476,7 +476,7 @@ proc_printit(struct proc *p, const char (*pr)(" flags process=%b proc=%b\n", p->p_p->ps_flags, PS_BITS, p->p_flag, P_BITS); (*pr)(" pri=%u, usrpri=%u, nice=%d\n", - p->p_priority, p->p_usrpri, p->p_p->ps_nice); + p->p_runpri, p->p_usrpri, p->p_p->ps_nice); (*pr)(" forw=%p, list=%p,%p\n", TAILQ_NEXT(p, p_runq), p->p_list.le_next, p->p_list.le_prev); (*pr)(" process=%p user=%p, vmspace=%p\n", Index: kern/kern_sched.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sched.c,v retrieving revision 1.62 diff -u -p -r1.62 kern_sched.c --- kern/kern_sched.c 4 Nov 2019 18:06:03 -0000 1.62 +++ kern/kern_sched.c 19 Jan 2020 13:45:36 -0000 @@ -257,7 +257,7 @@ setrunqueue(struct cpu_info *ci, struct p->p_cpu = ci; p->p_stat = SRUN; - p->p_priority = prio; + p->p_runpri = prio; spc = &p->p_cpu->ci_schedstate; spc->spc_nrun++; @@ -277,7 +277,7 @@ void remrunqueue(struct proc *p) { struct schedstate_percpu *spc; - int queue = p->p_priority >> 2; + int queue = p->p_runpri >> 2; SCHED_ASSERT_LOCKED(); spc = &p->p_cpu->ci_schedstate; @@ -306,7 +306,7 @@ sched_chooseproc(void) for (queue = 0; queue < SCHED_NQS; queue++) { while ((p = TAILQ_FIRST(&spc->spc_qs[queue]))) { remrunqueue(p); - setrunqueue(NULL, p, p->p_priority); + setrunqueue(NULL, p, p->p_runpri); if (p->p_cpu == curcpu()) { KASSERT(p->p_flag & P_CPUPEG); goto again; @@ -578,7 +578,7 @@ sched_proc_to_cpu_cost(struct cpu_info * * and the higher the priority of the proc. */ if (!cpuset_isset(&sched_idle_cpus, ci)) { - cost += (p->p_priority - spc->spc_curpriority) * + cost += (p->p_usrpri - spc->spc_curpriority) * sched_cost_priority; cost += sched_cost_runnable; } Index: kern/kern_sig.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sig.c,v retrieving revision 1.242 diff -u -p -r1.242 kern_sig.c --- kern/kern_sig.c 16 Jan 2020 16:35:03 -0000 1.242 +++ kern/kern_sig.c 19 Jan 2020 13:45:36 -0000 @@ -1130,8 +1130,8 @@ runfast: /* * Raise priority to at least PUSER. */ - if (p->p_priority > PUSER) - p->p_priority = PUSER; + if (p->p_usrpri > PUSER) + p->p_usrpri = PUSER; run: setrunnable(p); out: @@ -1886,7 +1886,7 @@ userret(struct proc *p) WITNESS_WARN(WARN_PANIC, NULL, "userret: returning"); - p->p_cpu->ci_schedstate.spc_curpriority = p->p_priority = p->p_usrpri; + p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri; } int Index: kern/kern_synch.c =================================================================== RCS file: /cvs/src/sys/kern/kern_synch.c,v retrieving revision 1.158 diff -u -p -r1.158 kern_synch.c --- kern/kern_synch.c 16 Jan 2020 16:35:04 -0000 1.158 +++ kern/kern_synch.c 19 Jan 2020 13:45:36 -0000 @@ -383,7 +383,7 @@ sleep_setup(struct sleep_state *sls, con p->p_wchan = ident; p->p_wmesg = wmesg; p->p_slptime = 0; - p->p_priority = prio & PRIMASK; + p->p_slppri = prio & PRIMASK; TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq); } @@ -613,7 +613,7 @@ sys_sched_yield(struct proc *p, void *v, */ newprio = p->p_usrpri; TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) - newprio = max(newprio, q->p_priority); + newprio = max(newprio, q->p_runpri); setrunqueue(p->p_cpu, p, newprio); p->p_ru.ru_nvcsw++; mi_switch(); Index: kern/sched_bsd.c =================================================================== RCS file: /cvs/src/sys/kern/sched_bsd.c,v retrieving revision 1.60 diff -u -p -r1.60 sched_bsd.c --- kern/sched_bsd.c 11 Dec 2019 07:30:09 -0000 1.60 +++ kern/sched_bsd.c 19 Jan 2020 13:45:36 -0000 @@ -254,14 +254,10 @@ schedcpu(void *arg) newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu); setpriority(p, newcpu, p->p_p->ps_nice); - if (p->p_priority >= PUSER) { - if (p->p_stat == SRUN && - (p->p_priority / SCHED_PPQ) != - (p->p_usrpri / SCHED_PPQ)) { - remrunqueue(p); - setrunqueue(p->p_cpu, p, p->p_usrpri); - } else - p->p_priority = p->p_usrpri; + if (p->p_stat == SRUN && + (p->p_runpri / SCHED_PPQ) != (p->p_usrpri / SCHED_PPQ)) { + remrunqueue(p); + setrunqueue(p->p_cpu, p, p->p_usrpri); } SCHED_UNLOCK(s); } @@ -444,6 +440,7 @@ void setrunnable(struct proc *p) { struct process *pr = p->p_p; + u_char prio; SCHED_ASSERT_LOCKED(); @@ -462,11 +459,15 @@ setrunnable(struct proc *p) */ if ((pr->ps_flags & PS_TRACED) != 0 && pr->ps_xsig != 0) atomic_setbits_int(&p->p_siglist, sigmask(pr->ps_xsig)); + prio = p->p_usrpri; + unsleep(p); + break; case SSLEEP: + prio = p->p_slppri; unsleep(p); /* e.g. when sending signals */ break; } - setrunqueue(NULL, p, p->p_priority); + setrunqueue(NULL, p, prio); if (p->p_slptime > 1) { uint32_t newcpu; @@ -519,8 +520,6 @@ schedclock(struct proc *p) SCHED_LOCK(s); newcpu = ESTCPULIM(p->p_estcpu + 1); setpriority(p, newcpu, p->p_p->ps_nice); - if (p->p_priority >= PUSER) - p->p_priority = p->p_usrpri; SCHED_UNLOCK(s); } Index: sys/proc.h =================================================================== RCS file: /cvs/src/sys/sys/proc.h,v retrieving revision 1.285 diff -u -p -r1.285 proc.h --- sys/proc.h 16 Jan 2020 16:35:04 -0000 1.285 +++ sys/proc.h 19 Jan 2020 13:45:36 -0000 @@ -341,7 +341,7 @@ struct proc { int p_flag; /* P_* flags. */ u_char p_spare; /* unused */ char p_stat; /* [s] S* process status. */ - char p_pad1[1]; + u_char p_runpri; /* [s] Runqueue priority */ u_char p_descfd; /* if not 255, fdesc permits this fd */ pid_t p_tid; /* Thread identifier. */ @@ -382,8 +382,8 @@ struct proc { #define p_startcopy p_sigmask sigset_t p_sigmask; /* Current signal mask. */ - u_char p_priority; /* [s] Process priority. */ - u_char p_usrpri; /* [s] User-prio based on p_estcpu & ps_nice. */ + u_char p_slppri; /* [s] Sleeping priority */ + u_char p_usrpri; /* [s] Priority based on p_estcpu & ps_nice */ u_int p_estcpu; /* [s] Time averaged val of p_cpticks */ int p_pledge_syscall; /* Cache of current syscall */ Index: sys/sysctl.h =================================================================== RCS file: /cvs/src/sys/sys/sysctl.h,v retrieving revision 1.199 diff -u -p -r1.199 sysctl.h --- sys/sysctl.h 24 Dec 2019 13:13:54 -0000 1.199 +++ sys/sysctl.h 19 Jan 2020 13:49:39 -0000 @@ -561,6 +561,10 @@ struct kinfo_vmentry { #define PR_UNLOCK(pr) /* nothing */ #endif +#define _getcompatprio(_p) \ + ((_p)->p_stat == SRUN ? (_p)->p_runpri : \ + ((_p)->p_stat == SSLEEP) ? (_p)->p_slppri : (_p)->p_usrpri) + #define PTRTOINT64(_x) ((u_int64_t)(u_long)(_x)) #define FILL_KPROC(kp, copy_str, p, pr, uc, pg, paddr, \ @@ -659,7 +663,7 @@ do { \ (kp)->p_stat = (p)->p_stat; \ (kp)->p_slptime = (p)->p_slptime; \ (kp)->p_holdcnt = 1; \ - (kp)->p_priority = (p)->p_priority; \ + (kp)->p_priority = _getcompatprio(p); \ (kp)->p_usrpri = (p)->p_usrpri; \ if ((p)->p_wchan && (p)->p_wmesg) \ copy_str((kp)->p_wmesg, (p)->p_wmesg, \