On Mon, Nov 30, 2020 at 07:10:47PM -0300, Martin Pieuchot wrote:
> Every multi-threaded process keeps a list of threads in `ps_threads'.
> This list is iterated in interrupt and process context which makes it
> complicated to protect it with a rwlock.
>
> One of the places where such iteration is done is inside the tsleep(9)
> routines, directly in single_thread_check() or via CURSIG(). In order
> to take this code path out of the KERNEL_LOCK(), claudio@ proposed to
> use SMR_TAILQ. This has the advantage of not introducing lock
> dependencies and allow us to address every iteration one-by-one.
>
> Diff below is a first step into this direction, it replaces the existing
> TAILQ_* macros by the locked version of SMR_TAILQ*. This is mostly lifted
> from claudio@'s diff and should not introduce any side effect.
>
> ok?
>
> diff --git lib/libkvm/kvm_proc2.c lib/libkvm/kvm_proc2.c
> index 96f7dc91b92..1f4f9b914bb 100644
> --- lib/libkvm/kvm_proc2.c
> +++ lib/libkvm/kvm_proc2.c
> @@ -341,8 +341,9 @@ kvm_proclist(kvm_t *kd, int op, int arg, struct process
> *pr,
> kp.p_pctcpu = 0;
> kp.p_stat = (process.ps_flags & PS_ZOMBIE) ? SDEAD :
> SIDL;
> - for (p = TAILQ_FIRST(&process.ps_threads); p != NULL;
> - p = TAILQ_NEXT(&proc, p_thr_link)) {
> + for (p = SMR_TAILQ_FIRST_LOCKED(&process.ps_threads);
> + p != NULL;
> + p = SMR_TAILQ_NEXT_LOCKED(&proc, p_thr_link)) {
> if (KREAD(kd, (u_long)p, &proc)) {
> _kvm_err(kd, kd->program,
> "can't read proc at %lx",
> @@ -376,8 +377,8 @@ kvm_proclist(kvm_t *kd, int op, int arg, struct process
> *pr,
> if (!dothreads)
> continue;
>
> - for (p = TAILQ_FIRST(&process.ps_threads); p != NULL;
> - p = TAILQ_NEXT(&proc, p_thr_link)) {
> + for (p = SMR_TAILQ_FIRST_LOCKED(&process.ps_threads); p != NULL;
> + p = SMR_TAILQ_NEXT_LOCKED(&proc, p_thr_link)) {
> if (KREAD(kd, (u_long)p, &proc)) {
> _kvm_err(kd, kd->program,
> "can't read proc at %lx",
> diff --git sys/kern/exec_elf.c sys/kern/exec_elf.c
> index 5e455208663..575273b306c 100644
> --- sys/kern/exec_elf.c
> +++ sys/kern/exec_elf.c
> @@ -85,6 +85,7 @@
> #include <sys/signalvar.h>
> #include <sys/stat.h>
> #include <sys/pledge.h>
> +#include <sys/smr.h>
>
> #include <sys/mman.h>
>
> @@ -1360,7 +1361,7 @@ coredump_notes_elf(struct proc *p, void *iocookie,
> size_t *sizep)
> * threads in the process have been stopped and the list can't
> * change.
> */
> - TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
> + SMR_TAILQ_FOREACH_LOCKED(q, &pr->ps_threads, p_thr_link) {
> if (q == p) /* we've taken care of this thread */
> continue;
> error = coredump_note_elf(q, iocookie, ¬esize);
> diff --git sys/kern/init_main.c sys/kern/init_main.c
> index fed6be19435..2b657ffe328 100644
> --- sys/kern/init_main.c
> +++ sys/kern/init_main.c
> @@ -519,7 +519,7 @@ main(void *framep)
> */
> LIST_FOREACH(pr, &allprocess, ps_list) {
> nanouptime(&pr->ps_start);
> - TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
> + SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) {
> nanouptime(&p->p_cpu->ci_schedstate.spc_runtime);
> timespecclear(&p->p_rtime);
> }
> diff --git sys/kern/kern_exit.c sys/kern/kern_exit.c
> index a20775419e3..ffc0158954c 100644
> --- sys/kern/kern_exit.c
> +++ sys/kern/kern_exit.c
> @@ -63,6 +63,7 @@
> #ifdef SYSVSEM
> #include <sys/sem.h>
> #endif
> +#include <sys/smr.h>
> #include <sys/witness.h>
>
> #include <sys/mount.h>
> @@ -161,7 +162,7 @@ exit1(struct proc *p, int xexit, int xsig, int flags)
> }
>
> /* unlink ourselves from the active threads */
> - TAILQ_REMOVE(&pr->ps_threads, p, p_thr_link);
> + SMR_TAILQ_REMOVE_LOCKED(&pr->ps_threads, p, p_thr_link);
> if ((p->p_flag & P_THREAD) == 0) {
> /* main thread gotta wait because it has the pid, et al */
> while (pr->ps_refcnt > 1)
> @@ -724,7 +725,7 @@ process_zap(struct process *pr)
> if (pr->ps_ptstat != NULL)
> free(pr->ps_ptstat, M_SUBPROC, sizeof(*pr->ps_ptstat));
> pool_put(&rusage_pool, pr->ps_ru);
> - KASSERT(TAILQ_EMPTY(&pr->ps_threads));
> + KASSERT(SMR_TAILQ_EMPTY_LOCKED(&pr->ps_threads));
> lim_free(pr->ps_limit);
> crfree(pr->ps_ucred);
> pool_put(&process_pool, pr);
> diff --git sys/kern/kern_fork.c sys/kern/kern_fork.c
> index 9fb239bc8b4..e1cb587b2b8 100644
> --- sys/kern/kern_fork.c
> +++ sys/kern/kern_fork.c
> @@ -52,6 +52,7 @@
> #include <sys/acct.h>
> #include <sys/ktrace.h>
> #include <sys/sched.h>
> +#include <sys/smr.h>
> #include <sys/sysctl.h>
> #include <sys/pool.h>
> #include <sys/mman.h>
> @@ -179,8 +180,8 @@ process_initialize(struct process *pr, struct proc *p)
> {
> /* initialize the thread links */
> pr->ps_mainproc = p;
> - TAILQ_INIT(&pr->ps_threads);
> - TAILQ_INSERT_TAIL(&pr->ps_threads, p, p_thr_link);
> + SMR_TAILQ_INIT(&pr->ps_threads);
> + SMR_TAILQ_INSERT_TAIL_LOCKED(&pr->ps_threads, p, p_thr_link);
> pr->ps_refcnt = 1;
> p->p_p = pr;
>
> @@ -557,7 +558,7 @@ thread_fork(struct proc *curp, void *stack, void *tcb,
> pid_t *tidptr,
>
> LIST_INSERT_HEAD(&allproc, p, p_list);
> LIST_INSERT_HEAD(TIDHASH(p->p_tid), p, p_hash);
> - TAILQ_INSERT_TAIL(&pr->ps_threads, p, p_thr_link);
> + SMR_TAILQ_INSERT_TAIL_LOCKED(&pr->ps_threads, p, p_thr_link);
>
> /*
> * if somebody else wants to take us to single threaded mode,
> diff --git sys/kern/kern_proc.c sys/kern/kern_proc.c
> index 70b781e53cb..a2acc741834 100644
> --- sys/kern/kern_proc.c
> +++ sys/kern/kern_proc.c
> @@ -502,7 +502,7 @@ db_kill_cmd(db_expr_t addr, int have_addr, db_expr_t
> count, char *modif)
> return;
> }
>
> - p = TAILQ_FIRST(&pr->ps_threads);
> + p = SMR_TAILQ_FIRST_LOCKED(&pr->ps_threads);
>
> /* Send uncatchable SIGABRT for coredump */
> sigabort(p);
> @@ -558,7 +558,7 @@ db_show_all_procs(db_expr_t addr, int haddr, db_expr_t
> count, char *modif)
> while (pr != NULL) {
> ppr = pr->ps_pptr;
>
> - TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
> + SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) {
> #ifdef MULTIPROCESSOR
> if (__mp_lock_held(&kernel_lock, p->p_cpu))
> has_kernel_lock = 1;
> diff --git sys/kern/kern_resource.c sys/kern/kern_resource.c
> index 0111bfa733a..86e855eda43 100644
> --- sys/kern/kern_resource.c
> +++ sys/kern/kern_resource.c
> @@ -211,11 +211,11 @@ donice(struct proc *curp, struct process *chgpr, int n)
> if (n < chgpr->ps_nice && suser(curp))
> return (EACCES);
> chgpr->ps_nice = n;
> - SCHED_LOCK(s);
> - TAILQ_FOREACH(p, &chgpr->ps_threads, p_thr_link) {
> + SMR_TAILQ_FOREACH_LOCKED(p, &chgpr->ps_threads, p_thr_link) {
> + SCHED_LOCK(s);
> setpriority(p, p->p_estcpu, n);
> + SCHED_UNLOCK(s);
I would not move the SCHED_LOCK() here anymore and instead leave it around
the loop like before. Grabing and releasing the scheduler lock in a tight
loop is not very helpful.
> }
> - SCHED_UNLOCK(s);
> return (0);
> }
>
> @@ -488,7 +488,7 @@ dogetrusage(struct proc *p, int who, struct rusage *rup)
> memset(rup, 0, sizeof(*rup));
>
> /* add on all living threads */
> - TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
> + SMR_TAILQ_FOREACH_LOCKED(q, &pr->ps_threads, p_thr_link) {
> ruadd(rup, &q->p_ru);
> tuagg(pr, q);
> }
> diff --git sys/kern/kern_sig.c sys/kern/kern_sig.c
> index 0cd78b46de5..269494bd142 100644
> --- sys/kern/kern_sig.c
> +++ sys/kern/kern_sig.c
> @@ -925,7 +925,7 @@ ptsignal(struct proc *p, int signum, enum signal_type
> type)
> * delayed. Otherwise, mark it pending on the
> * main thread.
> */
> - TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
> + SMR_TAILQ_FOREACH_LOCKED(q, &pr->ps_threads,
> p_thr_link) {
> /* ignore exiting threads */
> if (q->p_flag & P_WEXIT)
> continue;
> @@ -1009,7 +1009,7 @@ ptsignal(struct proc *p, int signum, enum signal_type
> type)
> * XXX delay processing of SA_STOP signals unless action == SIG_DFL?
> */
> if (prop & (SA_CONT | SA_STOP) && type != SPROPAGATED)
> - TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link)
> + SMR_TAILQ_FOREACH_LOCKED(q, &pr->ps_threads, p_thr_link)
> if (q != p)
> ptsignal(q, signum, SPROPAGATED);
>
> @@ -2026,7 +2026,7 @@ single_thread_set(struct proc *p, enum
> single_thread_mode mode, int deep)
> pr->ps_singlecount = 0;
> membar_producer();
> pr->ps_single = p;
> - TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
> + SMR_TAILQ_FOREACH_LOCKED(q, &pr->ps_threads, p_thr_link) {
> if (q == p)
> continue;
> if (q->p_flag & P_WEXIT) {
> @@ -2118,7 +2118,7 @@ single_thread_clear(struct proc *p, int flag)
> SCHED_LOCK(s);
> pr->ps_single = NULL;
> atomic_clearbits_int(&pr->ps_flags, PS_SINGLEUNWIND | PS_SINGLEEXIT);
> - TAILQ_FOREACH(q, &pr->ps_threads, p_thr_link) {
> + SMR_TAILQ_FOREACH_LOCKED(q, &pr->ps_threads, p_thr_link) {
> if (q == p || (q->p_flag & P_SUSPSINGLE) == 0)
> continue;
> atomic_clearbits_int(&q->p_flag, P_SUSPSINGLE);
> diff --git sys/kern/kern_synch.c sys/kern/kern_synch.c
> index f0338e8f90f..9ef710acf4d 100644
> --- sys/kern/kern_synch.c
> +++ sys/kern/kern_synch.c
> @@ -50,6 +50,7 @@
> #include <sys/pool.h>
> #include <sys/refcnt.h>
> #include <sys/atomic.h>
> +#include <sys/smr.h>
> #include <sys/witness.h>
> #include <sys/tracepoint.h>
>
> @@ -633,7 +634,7 @@ sys_sched_yield(struct proc *p, void *v, register_t
> *retval)
> * can make some progress.
> */
> newprio = p->p_usrpri;
> - TAILQ_FOREACH(q, &p->p_p->ps_threads, p_thr_link)
> + SMR_TAILQ_FOREACH_LOCKED(q, &p->p_p->ps_threads, p_thr_link)
> newprio = max(newprio, q->p_runpri);
> setrunqueue(p->p_cpu, p, newprio);
> p->p_ru.ru_nvcsw++;
> diff --git sys/kern/kern_sysctl.c sys/kern/kern_sysctl.c
> index a275fa7b6a9..da4fe6a22cf 100644
> --- sys/kern/kern_sysctl.c
> +++ sys/kern/kern_sysctl.c
> @@ -1596,7 +1596,7 @@ again:
> if (!dothreads)
> continue;
>
> - TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
> + SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) {
> if (buflen >= elem_size && elem_count > 0) {
> fill_kproc(pr, kproc, p, show_pointers);
> error = copyout(kproc, dp, elem_size);
> @@ -1696,7 +1696,7 @@ fill_kproc(struct process *pr, struct kinfo_proc *ki,
> struct proc *p,
> } else {
> ki->p_pctcpu = 0;
> ki->p_stat = (pr->ps_flags & PS_ZOMBIE) ? SDEAD : SIDL;
> - TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
> + SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) {
> ki->p_pctcpu += p->p_pctcpu;
> /* find best state: ONPROC > RUN > STOP > SLEEP > .. */
> if (p->p_stat == SONPROC || ki->p_stat == SONPROC)
> diff --git sys/kern/subr_witness.c sys/kern/subr_witness.c
> index 6ee1619da2b..8e44bd930bc 100644
> --- sys/kern/subr_witness.c
> +++ sys/kern/subr_witness.c
> @@ -1888,7 +1888,7 @@ witness_process_has_locks(struct process *pr)
> {
> struct proc *p;
>
> - TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
> + SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) {
> if (witness_thread_has_locks(p))
> return (1);
> }
> @@ -2108,7 +2108,7 @@ db_witness_list_all(db_expr_t addr, int have_addr,
> db_expr_t count, char *modif)
> LIST_FOREACH(pr, &allprocess, ps_list) {
> if (!witness_process_has_locks(pr))
> continue;
> - TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
> + SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) {
> if (!witness_thread_has_locks(p))
> continue;
> db_printf("Process %d (%s) thread %p (%d)\n",
> diff --git sys/kern/sys_process.c sys/kern/sys_process.c
> index 4f6bc30cd76..1ddf711b2a6 100644
> --- sys/kern/sys_process.c
> +++ sys/kern/sys_process.c
> @@ -301,7 +301,7 @@ ptrace_ctrl(struct proc *p, int req, pid_t pid, caddr_t
> addr, int data)
> error = ESRCH;
> goto fail;
> }
> - t = TAILQ_FIRST(&tr->ps_threads);
> + t = SMR_TAILQ_FIRST_LOCKED(&tr->ps_threads);
> break;
>
> /* calls that accept a PID or a thread ID */
> @@ -561,9 +561,9 @@ ptrace_kstate(struct proc *p, int req, pid_t pid, void
> *addr)
> return ESRCH;
> if (t->p_p != tr)
> return EINVAL;
> - t = TAILQ_NEXT(t, p_thr_link);
> + t = SMR_TAILQ_NEXT_LOCKED(t, p_thr_link);
> } else {
> - t = TAILQ_FIRST(&tr->ps_threads);
> + t = SMR_TAILQ_FIRST_LOCKED(&tr->ps_threads);
> }
>
> if (t == NULL)
> @@ -754,7 +754,7 @@ process_tprfind(pid_t tpid, struct proc **tp)
>
> if (tr == NULL)
> return NULL;
> - *tp = TAILQ_FIRST(&tr->ps_threads);
> + *tp = SMR_TAILQ_FIRST_LOCKED(&tr->ps_threads);
> return tr;
> }
> }
> diff --git sys/kern/tty.c sys/kern/tty.c
> index efd8bbf5cb9..d1bbfa08540 100644
> --- sys/kern/tty.c
> +++ sys/kern/tty.c
> @@ -2118,7 +2118,7 @@ process_sum(struct process *pr, fixpt_t *estcpup)
>
> ret = 0;
> estcpu = 0;
> - TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
> + SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) {
> if (p->p_stat == SRUN || p->p_stat == SONPROC)
> ret = 1;
> estcpu += p->p_pctcpu;
> @@ -2220,12 +2220,12 @@ update_pickpr:
> * - prefer living
> * Otherwise take the newest thread
> */
> - pick = p = TAILQ_FIRST(&pickpr->ps_threads);
> + pick = p = SMR_TAILQ_FIRST(&pickpr->ps_threads);
I guess you want to use the _LOCKED version here like in all other cases.
> if (p == NULL)
> goto empty;
> run = p->p_stat == SRUN || p->p_stat == SONPROC;
> pctcpu = p->p_pctcpu;
> - while ((p = TAILQ_NEXT(p, p_thr_link)) != NULL) {
> + while ((p = SMR_TAILQ_NEXT_LOCKED(p, p_thr_link)) != NULL) {
> run2 = p->p_stat == SRUN || p->p_stat == SONPROC;
> pctcpu2 = p->p_pctcpu;
> if (run) {
> diff --git sys/sys/proc.h sys/sys/proc.h
> index d4ec6450a7a..196e48ce9a6 100644
> --- sys/sys/proc.h
> +++ sys/sys/proc.h
> @@ -50,6 +50,7 @@
> #include <sys/resource.h> /* For struct rusage */
> #include <sys/rwlock.h> /* For struct rwlock */
> #include <sys/sigio.h> /* For struct sigio */
> +#include <sys/smr.h>
>
> #ifdef _KERNEL
> #include <sys/atomic.h>
> @@ -156,6 +157,9 @@ struct unveil;
> * R rlimit_lock
> * S scheduler lock
> * T itimer_mtx
> + *
> + * For SMR related structures that allow lock-free reads, the write lock
> + * is indicated below.
> */
> struct process {
> /*
> @@ -167,7 +171,7 @@ struct process {
> struct ucred *ps_ucred; /* Process owner's identity. */
>
> LIST_ENTRY(process) ps_list; /* List of all processes. */
> - TAILQ_HEAD(,proc) ps_threads; /* Threads in this process. */
> + SMR_TAILQ_HEAD(,proc) ps_threads; /* [K] Threads in this process. */
>
> LIST_ENTRY(process) ps_pglist; /* List of processes in pgrp. */
> struct process *ps_pptr; /* Pointer to parent process. */
> @@ -330,16 +334,20 @@ struct p_inentry {
> /*
> * Locks used to protect struct members in this file:
> * I immutable after creation
> - * S scheduler lock
> + * K kernel lock
> * l read only reference, see lim_read_enter()
> * o owned (read/modified only) by this thread
> + * S scheduler lock
> + *
> + * For SMR related structures that allow lock-free reads, the write lock
> + * is indicated below.
> */
> struct proc {
> TAILQ_ENTRY(proc) p_runq; /* [S] current run/sleep queue */
> LIST_ENTRY(proc) p_list; /* List of all threads. */
>
> struct process *p_p; /* [I] The process of this thread. */
> - TAILQ_ENTRY(proc) p_thr_link; /* Threads in a process linkage. */
> + SMR_TAILQ_ENTRY(proc) p_thr_link; /* [K] Threads in a process linkage */
>
> TAILQ_ENTRY(proc) p_fut_link; /* Threads in a futex linkage. */
> struct futex *p_futex; /* Current sleeping futex. */
> @@ -425,8 +433,9 @@ struct proc {
> #define SONPROC 7 /* Thread is currently on a CPU. */
>
> #define P_ZOMBIE(p) ((p)->p_stat == SDEAD)
> -#define P_HASSIBLING(p) (TAILQ_FIRST(&(p)->p_p->ps_threads) != (p) || \
> - TAILQ_NEXT((p), p_thr_link) != NULL)
> +#define P_HASSIBLING(p)
> \
> + (SMR_TAILQ_FIRST_LOCKED(&(p)->p_p->ps_threads) != (p) || \
> + SMR_TAILQ_NEXT_LOCKED((p), p_thr_link) != NULL)
>
> /*
> * These flags are per-thread and kept in p_flag
> diff --git sys/uvm/uvm_glue.c sys/uvm/uvm_glue.c
> index 390307c4c81..40a10e4c1c5 100644
> --- sys/uvm/uvm_glue.c
> +++ sys/uvm/uvm_glue.c
> @@ -369,7 +369,7 @@ uvm_swapout_threads(void)
> * the smallest p_slptime
> */
> slpp = NULL;
> - TAILQ_FOREACH(p, &pr->ps_threads, p_thr_link) {
> + SMR_TAILQ_FOREACH_LOCKED(p, &pr->ps_threads, p_thr_link) {
> switch (p->p_stat) {
> case SRUN:
> case SONPROC:
>
Why did you not include the smr_call() to safely free struct proc in this
diff?
--
:wq Claudio