On 05/12/20(Sat) 22:34, Jonathan Matthew wrote: > On Fri, Dec 04, 2020 at 10:03:46AM -0300, Martin Pieuchot wrote: > > On 04/12/20(Fri) 12:01, Jonathan Matthew wrote: > > > On Wed, Dec 02, 2020 at 11:41:04AM -0300, Martin Pieuchot wrote: > > > > [...] > > > > Could you try the diff below that only call smr_barrier() for multi- > > > > threaded processes with threads still in the list. I guess this also > > > > answers guenther@'s question. The same could be done with smr_flush(). > > > > > > This removes the overhead, more or less. Are we only looking at > > > unlocking access > > > to ps_threads from within a process (not the sysctl or ptrace stuff)? > > > Otherwise > > > this doesn't seem safe. > > > > I'd argue that if `ps_thread' is being iterated the CPU doing the > > iteration must already have a reference to the "struct process" so > > the serialization should be done on this reference. > > Sounds reasonable to me. > > > > > Now I doubt we'll be able to answer all the questions right now. If we > > can find a path forward that doesn't decrease performances too much and > > allow us to move signal delivery and sleep out of the KERNEL_LOCK() > > that's a huge win. > > I think we're at an acceptable performance hit now, so if this lets you > progress with unlocking signal delivery, I'm happy.
Ok, so here's the conversion to SMR_TAILQ again. Any ok for this one before we choose if we go with smr_barrier(), smr_flush() or a callback? Index: lib/libkvm/kvm_proc2.c =================================================================== RCS file: /cvs/src/lib/libkvm/kvm_proc2.c,v retrieving revision 1.31 diff -u -p -r1.31 kvm_proc2.c --- lib/libkvm/kvm_proc2.c 11 Dec 2019 12:36:28 -0000 1.31 +++ lib/libkvm/kvm_proc2.c 7 Dec 2020 12:46:48 -0000 @@ -341,8 +341,9 @@ kvm_proclist(kvm_t *kd, int op, int arg, kp.p_pctcpu = 0; kp.p_stat = (process.ps_flags & PS_ZOMBIE) ? SDEAD : SIDL; - for (p = TAILQ_FIRST(&process.ps_threads); p != NULL; - p = TAILQ_NEXT(&proc, p_thr_link)) { + for (p = SMR_TAILQ_FIRST_LOCKED(&process.ps_threads); + p != NULL; + p = SMR_TAILQ_NEXT_LOCKED(&proc, p_thr_link)) { if (KREAD(kd, (u_long)p, &proc)) { _kvm_err(kd, kd->program, "can't read proc at %lx", @@ -376,8 +377,8 @@ kvm_proclist(kvm_t *kd, int op, int arg, if (!dothreads) continue; - for (p = TAILQ_FIRST(&process.ps_threads); p != NULL; - p = TAILQ_NEXT(&proc, p_thr_link)) { + for (p = SMR_TAILQ_FIRST_LOCKED(&process.ps_threads); p != NULL; + p = SMR_TAILQ_NEXT_LOCKED(&proc, p_thr_link)) { if (KREAD(kd, (u_long)p, &proc)) { _kvm_err(kd, kd->program, "can't read proc at %lx", Index: sys/kern/exec_elf.c =================================================================== RCS file: /cvs/src/sys/kern/exec_elf.c,v retrieving revision 1.155 diff -u -p -r1.155 exec_elf.c --- sys/kern/exec_elf.c 6 Jul 2020 13:33:09 -0000 1.155 +++ sys/kern/exec_elf.c 7 Dec 2020 12:46:48 -0000 @@ -85,6 +85,7 @@ #include <sys/signalvar.h> #include <sys/stat.h> #include <sys/pledge.h> +#include <sys/smr.h> #include <sys/mman.h> @@ -1360,7 +1361,7 @@ coredump_notes_elf(struct proc *p, void * threads in the process have been stopped and the list can't * change. */ - TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) { + SMR_TAILQ_FOREACH_LOCKED(q, &pr->ps_threads, p_thr_link) { if (q == p) /* we've taken care of this thread */ continue; error = coredump_note_elf(q, iocookie, ¬esize); Index: sys/kern/init_main.c =================================================================== RCS file: /cvs/src/sys/kern/init_main.c,v retrieving revision 1.301 diff -u -p -r1.301 init_main.c --- sys/kern/init_main.c 13 Sep 2020 09:42:31 -0000 1.301 +++ sys/kern/init_main.c 7 Dec 2020 12:46:48 -0000 @@ -519,7 +519,7 @@ main(void *framep) */ LIST_FOREACH(pr, &allprocess, ps_list) { nanouptime(&pr->ps_start); - TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) { + SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) { nanouptime(&p->p_cpu->ci_schedstate.spc_runtime); timespecclear(&p->p_rtime); } Index: sys/kern/kern_exit.c =================================================================== RCS file: /cvs/src/sys/kern/kern_exit.c,v retrieving revision 1.191 diff -u -p -r1.191 kern_exit.c --- sys/kern/kern_exit.c 16 Nov 2020 18:37:06 -0000 1.191 +++ sys/kern/kern_exit.c 7 Dec 2020 12:46:48 -0000 @@ -63,6 +63,7 @@ #ifdef SYSVSEM #include <sys/sem.h> #endif +#include <sys/smr.h> #include <sys/witness.h> #include <sys/mount.h> @@ -161,7 +162,7 @@ exit1(struct proc *p, int xexit, int xsi } /* unlink ourselves from the active threads */ - TAILQ_REMOVE(&pr->ps_threads, p, p_thr_link); + SMR_TAILQ_REMOVE_LOCKED(&pr->ps_threads, p, p_thr_link); if ((p->p_flag & P_THREAD) == 0) { /* main thread gotta wait because it has the pid, et al */ while (pr->ps_refcnt > 1) @@ -724,7 +725,7 @@ process_zap(struct process *pr) if (pr->ps_ptstat != NULL) free(pr->ps_ptstat, M_SUBPROC, sizeof(*pr->ps_ptstat)); pool_put(&rusage_pool, pr->ps_ru); - KASSERT(TAILQ_EMPTY(&pr->ps_threads)); + KASSERT(SMR_TAILQ_EMPTY_LOCKED(&pr->ps_threads)); lim_free(pr->ps_limit); crfree(pr->ps_ucred); pool_put(&process_pool, pr); Index: sys/kern/kern_fork.c =================================================================== RCS file: /cvs/src/sys/kern/kern_fork.c,v retrieving revision 1.229 diff -u -p -r1.229 kern_fork.c --- sys/kern/kern_fork.c 4 Dec 2020 15:16:45 -0000 1.229 +++ sys/kern/kern_fork.c 7 Dec 2020 12:50:20 -0000 @@ -52,6 +52,7 @@ #include <sys/acct.h> #include <sys/ktrace.h> #include <sys/sched.h> +#include <sys/smr.h> #include <sys/sysctl.h> #include <sys/pool.h> #include <sys/mman.h> @@ -179,8 +180,8 @@ process_initialize(struct process *pr, s { /* initialize the thread links */ pr->ps_mainproc = p; - TAILQ_INIT(&pr->ps_threads); - TAILQ_INSERT_TAIL(&pr->ps_threads, p, p_thr_link); + SMR_TAILQ_INIT(&pr->ps_threads); + SMR_TAILQ_INSERT_TAIL_LOCKED(&pr->ps_threads, p, p_thr_link); pr->ps_refcnt = 1; p->p_p = pr; @@ -557,7 +558,7 @@ thread_fork(struct proc *curp, void *sta LIST_INSERT_HEAD(&allproc, p, p_list); LIST_INSERT_HEAD(TIDHASH(p->p_tid), p, p_hash); - TAILQ_INSERT_TAIL(&pr->ps_threads, p, p_thr_link); + SMR_TAILQ_INSERT_TAIL_LOCKED(&pr->ps_threads, p, p_thr_link); /* * if somebody else wants to take us to single threaded mode, Index: sys/kern/kern_proc.c =================================================================== RCS file: /cvs/src/sys/kern/kern_proc.c,v retrieving revision 1.88 diff -u -p -r1.88 kern_proc.c --- sys/kern/kern_proc.c 26 Sep 2020 15:15:22 -0000 1.88 +++ sys/kern/kern_proc.c 7 Dec 2020 12:46:48 -0000 @@ -502,7 +502,7 @@ db_kill_cmd(db_expr_t addr, int have_add return; } - p = TAILQ_FIRST(&pr->ps_threads); + p = SMR_TAILQ_FIRST_LOCKED(&pr->ps_threads); /* Send uncatchable SIGABRT for coredump */ sigabort(p); @@ -558,7 +558,7 @@ db_show_all_procs(db_expr_t addr, int ha while (pr != NULL) { ppr = pr->ps_pptr; - TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) { + SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) { #ifdef MULTIPROCESSOR if (__mp_lock_held(&kernel_lock, p->p_cpu)) has_kernel_lock = 1; Index: sys/kern/kern_resource.c =================================================================== RCS file: /cvs/src/sys/kern/kern_resource.c,v retrieving revision 1.69 diff -u -p -r1.69 kern_resource.c --- sys/kern/kern_resource.c 25 Sep 2020 20:24:32 -0000 1.69 +++ sys/kern/kern_resource.c 7 Dec 2020 12:46:48 -0000 @@ -212,7 +212,7 @@ donice(struct proc *curp, struct process return (EACCES); chgpr->ps_nice = n; SCHED_LOCK(s); - TAILQ_FOREACH(p, &chgpr->ps_threads, p_thr_link) { + SMR_TAILQ_FOREACH_LOCKED(p, &chgpr->ps_threads, p_thr_link) { setpriority(p, p->p_estcpu, n); } SCHED_UNLOCK(s); @@ -488,7 +488,7 @@ dogetrusage(struct proc *p, int who, str memset(rup, 0, sizeof(*rup)); /* add on all living threads */ - TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) { + SMR_TAILQ_FOREACH_LOCKED(q, &pr->ps_threads, p_thr_link) { ruadd(rup, &q->p_ru); tuagg(pr, q); } Index: sys/kern/kern_sig.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sig.c,v retrieving revision 1.267 diff -u -p -r1.267 kern_sig.c --- sys/kern/kern_sig.c 4 Dec 2020 15:16:45 -0000 1.267 +++ sys/kern/kern_sig.c 7 Dec 2020 12:50:20 -0000 @@ -925,7 +925,7 @@ ptsignal(struct proc *p, int signum, enu * delayed. Otherwise, mark it pending on the * main thread. */ - TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) { + SMR_TAILQ_FOREACH_LOCKED(q, &pr->ps_threads, p_thr_link) { /* ignore exiting threads */ if (q->p_flag & P_WEXIT) continue; @@ -1009,7 +1009,7 @@ ptsignal(struct proc *p, int signum, enu * XXX delay processing of SA_STOP signals unless action == SIG_DFL? */ if (prop & (SA_CONT | SA_STOP) && type != SPROPAGATED) - TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) + SMR_TAILQ_FOREACH_LOCKED(q, &pr->ps_threads, p_thr_link) if (q != p) ptsignal(q, signum, SPROPAGATED); @@ -2038,7 +2038,7 @@ single_thread_set(struct proc *p, enum s pr->ps_singlecount = 0; membar_producer(); pr->ps_single = p; - TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) { + SMR_TAILQ_FOREACH_LOCKED(q, &pr->ps_threads, p_thr_link) { if (q == p) continue; if (q->p_flag & P_WEXIT) { @@ -2130,7 +2130,7 @@ single_thread_clear(struct proc *p, int SCHED_LOCK(s); pr->ps_single = NULL; atomic_clearbits_int(&pr->ps_flags, PS_SINGLEUNWIND | PS_SINGLEEXIT); - TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) { + SMR_TAILQ_FOREACH_LOCKED(q, &pr->ps_threads, p_thr_link) { if (q == p || (q->p_flag & P_SUSPSINGLE) == 0) continue; atomic_clearbits_int(&q->p_flag, P_SUSPSINGLE); Index: sys/kern/kern_synch.c =================================================================== RCS file: /cvs/src/sys/kern/kern_synch.c,v retrieving revision 1.171 diff -u -p -r1.171 kern_synch.c --- sys/kern/kern_synch.c 23 Oct 2020 20:28:09 -0000 1.171 +++ sys/kern/kern_synch.c 7 Dec 2020 12:46:48 -0000 @@ -50,6 +50,7 @@ #include <sys/pool.h> #include <sys/refcnt.h> #include <sys/atomic.h> +#include <sys/smr.h> #include <sys/witness.h> #include <sys/tracepoint.h> @@ -633,7 +634,7 @@ sys_sched_yield(struct proc *p, void *v, * can make some progress. */ newprio = p->p_usrpri; - TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link) + SMR_TAILQ_FOREACH_LOCKED(q, &p->p_p->ps_threads, p_thr_link) newprio = max(newprio, q->p_runpri); setrunqueue(p->p_cpu, p, newprio); p->p_ru.ru_nvcsw++; Index: sys/kern/kern_sysctl.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sysctl.c,v retrieving revision 1.383 diff -u -p -r1.383 kern_sysctl.c --- sys/kern/kern_sysctl.c 16 Nov 2020 06:42:12 -0000 1.383 +++ sys/kern/kern_sysctl.c 7 Dec 2020 12:46:48 -0000 @@ -1596,7 +1596,7 @@ again: if (!dothreads) continue; - TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) { + SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) { if (buflen >= elem_size && elem_count > 0) { fill_kproc(pr, kproc, p, show_pointers); error = copyout(kproc, dp, elem_size); @@ -1696,7 +1696,7 @@ fill_kproc(struct process *pr, struct ki } else { ki->p_pctcpu = 0; ki->p_stat = (pr->ps_flags & PS_ZOMBIE) ? SDEAD : SIDL; - TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) { + SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) { ki->p_pctcpu += p->p_pctcpu; /* find best state: ONPROC > RUN > STOP > SLEEP > .. */ if (p->p_stat == SONPROC || ki->p_stat == SONPROC) Index: sys/kern/subr_witness.c =================================================================== RCS file: /cvs/src/sys/kern/subr_witness.c,v retrieving revision 1.38 diff -u -p -r1.38 subr_witness.c --- sys/kern/subr_witness.c 12 Nov 2020 05:49:26 -0000 1.38 +++ sys/kern/subr_witness.c 7 Dec 2020 12:46:48 -0000 @@ -1888,7 +1888,7 @@ witness_process_has_locks(struct process { struct proc *p; - TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) { + SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) { if (witness_thread_has_locks(p)) return (1); } @@ -2108,7 +2108,7 @@ db_witness_list_all(db_expr_t addr, int LIST_FOREACH(pr, &allprocess, ps_list) { if (!witness_process_has_locks(pr)) continue; - TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) { + SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) { if (!witness_thread_has_locks(p)) continue; db_printf("Process %d (%s) thread %p (%d)\n", Index: sys/kern/sys_process.c =================================================================== RCS file: /cvs/src/sys/kern/sys_process.c,v retrieving revision 1.84 diff -u -p -r1.84 sys_process.c --- sys/kern/sys_process.c 19 Oct 2020 08:19:46 -0000 1.84 +++ sys/kern/sys_process.c 7 Dec 2020 12:46:48 -0000 @@ -301,7 +301,7 @@ ptrace_ctrl(struct proc *p, int req, pid error = ESRCH; goto fail; } - t = TAILQ_FIRST(&tr->ps_threads); + t = SMR_TAILQ_FIRST_LOCKED(&tr->ps_threads); break; /* calls that accept a PID or a thread ID */ @@ -561,9 +561,9 @@ ptrace_kstate(struct proc *p, int req, p return ESRCH; if (t->p_p != tr) return EINVAL; - t = TAILQ_NEXT(t, p_thr_link); + t = SMR_TAILQ_NEXT_LOCKED(t, p_thr_link); } else { - t = TAILQ_FIRST(&tr->ps_threads); + t = SMR_TAILQ_FIRST_LOCKED(&tr->ps_threads); } if (t == NULL) @@ -754,7 +754,7 @@ process_tprfind(pid_t tpid, struct proc if (tr == NULL) return NULL; - *tp = TAILQ_FIRST(&tr->ps_threads); + *tp = SMR_TAILQ_FIRST_LOCKED(&tr->ps_threads); return tr; } } Index: sys/kern/tty.c =================================================================== RCS file: /cvs/src/sys/kern/tty.c,v retrieving revision 1.164 diff -u -p -r1.164 tty.c --- sys/kern/tty.c 9 Sep 2020 16:29:14 -0000 1.164 +++ sys/kern/tty.c 7 Dec 2020 12:46:48 -0000 @@ -2118,7 +2118,7 @@ process_sum(struct process *pr, fixpt_t ret = 0; estcpu = 0; - TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) { + SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) { if (p->p_stat == SRUN || p->p_stat == SONPROC) ret = 1; estcpu += p->p_pctcpu; @@ -2220,12 +2220,12 @@ update_pickpr: * - prefer living * Otherwise take the newest thread */ - pick = p = TAILQ_FIRST(&pickpr->ps_threads); + pick = p = SMR_TAILQ_FIRST_LOCKED(&pickpr->ps_threads); if (p == NULL) goto empty; run = p->p_stat == SRUN || p->p_stat == SONPROC; pctcpu = p->p_pctcpu; - while ((p = TAILQ_NEXT(p, p_thr_link)) != NULL) { + while ((p = SMR_TAILQ_NEXT_LOCKED(p, p_thr_link)) != NULL) { run2 = p->p_stat == SRUN || p->p_stat == SONPROC; pctcpu2 = p->p_pctcpu; if (run) { Index: sys/sys/proc.h =================================================================== RCS file: /cvs/src/sys/sys/proc.h,v retrieving revision 1.301 diff -u -p -r1.301 proc.h --- sys/sys/proc.h 10 Nov 2020 17:26:54 -0000 1.301 +++ sys/sys/proc.h 7 Dec 2020 12:46:48 -0000 @@ -50,6 +50,7 @@ #include <sys/resource.h> /* For struct rusage */ #include <sys/rwlock.h> /* For struct rwlock */ #include <sys/sigio.h> /* For struct sigio */ +#include <sys/smr.h> #ifdef _KERNEL #include <sys/atomic.h> @@ -156,6 +157,9 @@ struct unveil; * R rlimit_lock * S scheduler lock * T itimer_mtx + * + * For SMR related structures that allow lock-free reads, the write lock + * is indicated below. */ struct process { /* @@ -167,7 +171,7 @@ struct process { struct ucred *ps_ucred; /* Process owner's identity. */ LIST_ENTRY(process) ps_list; /* List of all processes. */ - TAILQ_HEAD(,proc) ps_threads; /* Threads in this process. */ + SMR_TAILQ_HEAD(,proc) ps_threads; /* [K] Threads in this process. */ LIST_ENTRY(process) ps_pglist; /* List of processes in pgrp. */ struct process *ps_pptr; /* Pointer to parent process. */ @@ -330,16 +334,20 @@ struct p_inentry { /* * Locks used to protect struct members in this file: * I immutable after creation - * S scheduler lock + * K kernel lock * l read only reference, see lim_read_enter() * o owned (read/modified only) by this thread + * S scheduler lock + * + * For SMR related structures that allow lock-free reads, the write lock + * is indicated below. */ struct proc { TAILQ_ENTRY(proc) p_runq; /* [S] current run/sleep queue */ LIST_ENTRY(proc) p_list; /* List of all threads. */ struct process *p_p; /* [I] The process of this thread. */ - TAILQ_ENTRY(proc) p_thr_link; /* Threads in a process linkage. */ + SMR_TAILQ_ENTRY(proc) p_thr_link; /* [K] Threads in a process linkage */ TAILQ_ENTRY(proc) p_fut_link; /* Threads in a futex linkage. */ struct futex *p_futex; /* Current sleeping futex. */ @@ -425,8 +433,9 @@ struct proc { #define SONPROC 7 /* Thread is currently on a CPU. */ #define P_ZOMBIE(p) ((p)->p_stat == SDEAD) -#define P_HASSIBLING(p) (TAILQ_FIRST(&(p)->p_p->ps_threads) != (p) || \ - TAILQ_NEXT((p), p_thr_link) != NULL) +#define P_HASSIBLING(p) \ + (SMR_TAILQ_FIRST_LOCKED(&(p)->p_p->ps_threads) != (p) || \ + SMR_TAILQ_NEXT_LOCKED((p), p_thr_link) != NULL) /* * These flags are per-thread and kept in p_flag Index: sys/uvm/uvm_glue.c =================================================================== RCS file: /cvs/src/sys/uvm/uvm_glue.c,v retrieving revision 1.76 diff -u -p -r1.76 uvm_glue.c --- sys/uvm/uvm_glue.c 19 Oct 2020 17:57:43 -0000 1.76 +++ sys/uvm/uvm_glue.c 7 Dec 2020 12:46:48 -0000 @@ -369,7 +369,7 @@ uvm_swapout_threads(void) * the smallest p_slptime */ slpp = NULL; - TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) { + SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) { switch (p->p_stat) { case SRUN: case SONPROC: