Diff below aims to simplify the API to put a thread on a sleep queue and reduce it to the following:
sleep_setup(); /* check condition or release lock */ sleep_finish(); It is motivated by my work to sleep the SCHED_LOCK() but might as well prevent/fix some bugs. The tricky part of the current implementation is that sleep_setup_signal() can already park/stop the current thread resulting in a context change. Should any custom accounting / lock check happen before that? At least two lock primitives do so currently: drm's schedule_timeout() and rwlock's rw_enter(). As a result of this diff various states can be removed and sleep_finish() contains the following magic: 1. check for signal/parking 2. context switch or remove from sleep queue 3. check for signal/parking Note that sleep_finish() could be simplified even further but I left that for later to ease the review. Comments? Oks? Index: dev/dt/dt_dev.c =================================================================== RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v retrieving revision 1.10 diff -u -p -r1.10 dt_dev.c --- dev/dt/dt_dev.c 28 Sep 2020 13:16:58 -0000 1.10 +++ dev/dt/dt_dev.c 7 Dec 2020 17:19:15 -0000 @@ -225,10 +225,8 @@ dtread(dev_t dev, struct uio *uio, int f return (EMSGSIZE); while (!sc->ds_evtcnt) { - sleep_setup(&sls, sc, PWAIT | PCATCH, "dtread"); - sleep_setup_signal(&sls); - sleep_finish(&sls, !sc->ds_evtcnt); - error = sleep_finish_signal(&sls); + sleep_setup(&sls, sc, PWAIT | PCATCH, "dtread", 0); + error = sleep_finish(&sls, !sc->ds_evtcnt); if (error == EINTR || error == ERESTART) break; } Index: dev/pci/drm/drm_linux.c =================================================================== RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v retrieving revision 1.70 diff -u -p -r1.70 drm_linux.c --- dev/pci/drm/drm_linux.c 14 Nov 2020 23:08:47 -0000 1.70 +++ dev/pci/drm/drm_linux.c 7 Dec 2020 17:19:15 -0000 @@ -110,26 +110,23 @@ schedule_timeout(long timeout) { struct sleep_state sls; long deadline; - int wait, spl; + int wait, spl, timo = 0; MUTEX_ASSERT_LOCKED(&sch_mtx); KASSERT(!cold); - sleep_setup(&sls, sch_ident, sch_priority, "schto"); if (timeout != MAX_SCHEDULE_TIMEOUT) - sleep_setup_timeout(&sls, timeout); + timo = timeout; + sleep_setup(&sls, sch_ident, sch_priority, "schto", timo); wait = (sch_proc == curproc && timeout > 0); spl = MUTEX_OLDIPL(&sch_mtx); MUTEX_OLDIPL(&sch_mtx) = splsched(); mtx_leave(&sch_mtx); - - sleep_setup_signal(&sls); - if (timeout != MAX_SCHEDULE_TIMEOUT) deadline = ticks + timeout; - sleep_finish_all(&sls, wait); + sleep_finish(&sls, wait); if (timeout != MAX_SCHEDULE_TIMEOUT) timeout = deadline - ticks; Index: dev/pci/if_myx.c =================================================================== RCS file: /cvs/src/sys/dev/pci/if_myx.c,v retrieving revision 1.112 diff -u -p -r1.112 if_myx.c --- dev/pci/if_myx.c 27 Nov 2020 00:13:15 -0000 1.112 +++ dev/pci/if_myx.c 7 Dec 2020 17:19:15 -0000 @@ -1396,7 +1396,7 @@ myx_down(struct myx_softc *sc) (void)myx_cmd(sc, MYXCMD_SET_IFDOWN, &mc, NULL); while (sc->sc_state != MYX_S_OFF) { - sleep_setup(&sls, sts, PWAIT, "myxdown"); + sleep_setup(&sls, sts, PWAIT, "myxdown", 0); membar_consumer(); sleep_finish(&sls, sc->sc_state != MYX_S_OFF); } Index: kern/kern_rwlock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_rwlock.c,v retrieving revision 1.45 diff -u -p -r1.45 kern_rwlock.c --- kern/kern_rwlock.c 2 Mar 2020 17:07:49 -0000 1.45 +++ kern/kern_rwlock.c 7 Dec 2020 17:19:15 -0000 @@ -278,15 +278,13 @@ retry: prio = op->wait_prio; if (flags & RW_INTR) prio |= PCATCH; - sleep_setup(&sls, rwl, prio, rwl->rwl_name); - if (flags & RW_INTR) - sleep_setup_signal(&sls); + sleep_setup(&sls, rwl, prio, rwl->rwl_name, 0); do_sleep = !rw_cas(&rwl->rwl_owner, o, set); - sleep_finish(&sls, do_sleep); + error = sleep_finish(&sls, do_sleep); if ((flags & RW_INTR) && - (error = sleep_finish_signal(&sls)) != 0) + (error != 0)) return (error); if (flags & RW_SLEEPFAIL) return (EAGAIN); Index: kern/kern_sched.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sched.c,v retrieving revision 1.67 diff -u -p -r1.67 kern_sched.c --- kern/kern_sched.c 11 Jun 2020 00:00:01 -0000 1.67 +++ kern/kern_sched.c 7 Dec 2020 17:19:15 -0000 @@ -674,7 +674,7 @@ sched_stop_secondary_cpus(void) if (CPU_IS_PRIMARY(ci)) continue; while ((spc->spc_schedflags & SPCF_HALTED) == 0) { - sleep_setup(&sls, spc, PZERO, "schedstate"); + sleep_setup(&sls, spc, PZERO, "schedstate", 0); sleep_finish(&sls, (spc->spc_schedflags & SPCF_HALTED) == 0); } Index: kern/kern_sig.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sig.c,v retrieving revision 1.267 diff -u -p -r1.267 kern_sig.c --- kern/kern_sig.c 4 Dec 2020 15:16:45 -0000 1.267 +++ kern/kern_sig.c 7 Dec 2020 17:19:15 -0000 @@ -1467,6 +1467,31 @@ postsig(struct proc *p, int signum) } /* + * Check and handle signals and suspensions around a sleep cycle. + */ +int +sigsleep(void) +{ + struct proc *p = curproc; + struct sigacts *ps = p->p_p->ps_sigacts; + int sig, error = 0; + + error = single_thread_check(p, 1); + if (error) + return error; + + sig = CURSIG(p); + if (sig != 0) { + if (ps->ps_sigintr & sigmask(sig)) + error = EINTR; + else + error = ERESTART; + } + + return error; +} + +/* * Force the current process to exit with the specified signal, dumping core * if appropriate. We bypass the normal tests for masked and caught signals, * allowing unrecoverable failures to terminate the process without changing @@ -2106,7 +2131,7 @@ single_thread_wait(struct process *pr, i /* wait until they're all suspended */ wait = pr->ps_singlecount > 0; while (wait) { - sleep_setup(&sls, &pr->ps_singlecount, PWAIT, "suspend"); + sleep_setup(&sls, &pr->ps_singlecount, PWAIT, "suspend", 0); wait = pr->ps_singlecount > 0; sleep_finish(&sls, wait); if (!recheck) Index: kern/kern_synch.c =================================================================== RCS file: /cvs/src/sys/kern/kern_synch.c,v retrieving revision 1.171 diff -u -p -r1.171 kern_synch.c --- kern/kern_synch.c 23 Oct 2020 20:28:09 -0000 1.171 +++ kern/kern_synch.c 7 Dec 2020 17:32:32 -0000 @@ -147,11 +147,8 @@ tsleep(const volatile void *ident, int p return (0); } - sleep_setup(&sls, ident, priority, wmesg); - sleep_setup_timeout(&sls, timo); - sleep_setup_signal(&sls); - - return sleep_finish_all(&sls, 1); + sleep_setup(&sls, ident, priority, wmesg, timo); + return sleep_finish(&sls, 1); } int @@ -241,8 +238,7 @@ msleep(const volatile void *ident, struc return (0); } - sleep_setup(&sls, ident, priority, wmesg); - sleep_setup_timeout(&sls, timo); + sleep_setup(&sls, ident, priority, wmesg, timo); /* XXX - We need to make sure that the mutex doesn't * unblock splsched. This can be made a bit more @@ -252,9 +248,7 @@ msleep(const volatile void *ident, struc MUTEX_OLDIPL(mtx) = splsched(); mtx_leave(mtx); /* signal may stop the process, release mutex before that */ - sleep_setup_signal(&sls); - - error = sleep_finish_all(&sls, 1); + error = sleep_finish(&sls, 1); if ((priority & PNORELOCK) == 0) { mtx_enter(mtx); @@ -303,14 +297,11 @@ rwsleep(const volatile void *ident, stru rw_assert_anylock(rwl); status = rw_status(rwl); - sleep_setup(&sls, ident, priority, wmesg); - sleep_setup_timeout(&sls, timo); + sleep_setup(&sls, ident, priority, wmesg, 0); rw_exit(rwl); /* signal may stop the process, release rwlock before that */ - sleep_setup_signal(&sls); - - error = sleep_finish_all(&sls, 1); + error = sleep_finish(&sls, 1); if ((priority & PNORELOCK) == 0) rw_enter(rwl, status); @@ -343,7 +334,7 @@ rwsleep_nsec(const volatile void *ident, void sleep_setup(struct sleep_state *sls, const volatile void *ident, int prio, - const char *wmesg) + const char *wmesg, int timo) { struct proc *p = curproc; @@ -357,15 +348,12 @@ sleep_setup(struct sleep_state *sls, con #endif sls->sls_catch = prio & PCATCH; - sls->sls_do_sleep = 1; sls->sls_locked = 0; - sls->sls_sig = 0; - sls->sls_unwind = 0; sls->sls_timeout = 0; /* * The kernel has to be locked for signal processing. - * This is done here and not in sleep_setup_signal() because + * This is done here and not in sleep_finish() because * KERNEL_LOCK() has to be taken before SCHED_LOCK(). */ if (sls->sls_catch != 0) { @@ -382,35 +370,52 @@ sleep_setup(struct sleep_state *sls, con p->p_slptime = 0; p->p_slppri = prio & PRIMASK; TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq); -} - -int -sleep_finish_all(struct sleep_state *sls, int do_sleep) -{ - int error, error1; - - sleep_finish(sls, do_sleep); - error1 = sleep_finish_timeout(sls); - error = sleep_finish_signal(sls); - - /* Signal errors are higher priority than timeouts. */ - if (error == 0 && error1 != 0) - error = error1; - return error; + KASSERT((p->p_flag & P_TIMEOUT) == 0); + if (timo) { + sls->sls_timeout = 1; + timeout_add(&p->p_sleep_to, timo); + } } -void +int sleep_finish(struct sleep_state *sls, int do_sleep) { struct proc *p = curproc; + int error = 0, error1 = 0; + + if (sls->sls_catch != 0) { + /* sleep_setup() has locked the kernel. */ + KERNEL_ASSERT_LOCKED(); + + /* + * We put ourselves on the sleep queue and start our + * timeout before calling sigsleep(), as we could stop + * there, and a wakeup or a SIGCONT (or both) could + * occur while we were stopped. A SIGCONT would cause + * us to be marked as SSLEEP without resuming us, thus + * we must be ready for sleep when sigsleep() is called. + * If the wakeup happens while we're stopped, p->p_wchan + * will be NULL upon return from sigsleep(). In that case + * we need to unwind immediately. + */ + atomic_setbits_int(&p->p_flag, P_SINTR); + if ((error = sigsleep()) != 0) { + p->p_stat = SONPROC; + sls->sls_catch = 0; + do_sleep = 0; + } else if (p->p_wchan == NULL) { + sls->sls_catch = 0; + do_sleep = 0; + } + } - if (sls->sls_do_sleep && do_sleep) { + if (do_sleep) { p->p_stat = SSLEEP; p->p_ru.ru_nvcsw++; SCHED_ASSERT_LOCKED(); mi_switch(); - } else if (!do_sleep) { + } else { unsleep(p); } @@ -427,30 +432,11 @@ sleep_finish(struct sleep_state *sls, in * we need to clear it before the ktrace. */ atomic_clearbits_int(&p->p_flag, P_SINTR); -} - -void -sleep_setup_timeout(struct sleep_state *sls, int timo) -{ - struct proc *p = curproc; - - KASSERT((p->p_flag & P_TIMEOUT) == 0); - - if (timo) { - sls->sls_timeout = 1; - timeout_add(&p->p_sleep_to, timo); - } -} - -int -sleep_finish_timeout(struct sleep_state *sls) -{ - struct proc *p = curproc; if (sls->sls_timeout) { if (p->p_flag & P_TIMEOUT) { atomic_clearbits_int(&p->p_flag, P_TIMEOUT); - return (EWOULDBLOCK); + error1 = EWOULDBLOCK; } else { /* This must not sleep. */ timeout_del_barrier(&p->p_sleep_to); @@ -458,70 +444,18 @@ sleep_finish_timeout(struct sleep_state } } - return (0); -} - -void -sleep_setup_signal(struct sleep_state *sls) -{ - struct proc *p = curproc; - - if (sls->sls_catch == 0) - return; - - /* sleep_setup() has locked the kernel. */ - KERNEL_ASSERT_LOCKED(); - - /* - * We put ourselves on the sleep queue and start our timeout before - * calling single_thread_check or CURSIG, as we could stop there, and - * a wakeup or a SIGCONT (or both) could occur while we were stopped. - * A SIGCONT would cause us to be marked as SSLEEP without resuming us, - * thus we must be ready for sleep when CURSIG is called. If the - * wakeup happens while we're stopped, p->p_wchan will be 0 upon - * return from single_thread_check or CURSIG. In that case we should - * not go to sleep. If single_thread_check returns an error we need - * to unwind immediately. That's achieved by saving the return value - * in sls->sl_unwind and checking it later in sleep_finish_signal. - */ - atomic_setbits_int(&p->p_flag, P_SINTR); - if ((sls->sls_unwind = single_thread_check(p, 1)) != 0 || - (sls->sls_sig = CURSIG(p)) != 0) { - unsleep(p); - p->p_stat = SONPROC; - sls->sls_do_sleep = 0; - } else if (p->p_wchan == 0) { - sls->sls_catch = 0; - sls->sls_do_sleep = 0; - } -} - -int -sleep_finish_signal(struct sleep_state *sls) -{ - struct proc *p = curproc; - int error = 0; - - if (sls->sls_catch != 0) { - KERNEL_ASSERT_LOCKED(); - - if (sls->sls_unwind != 0 || - (sls->sls_unwind = single_thread_check(p, 1)) != 0) - error = sls->sls_unwind; - else if (sls->sls_sig != 0 || - (sls->sls_sig = CURSIG(p)) != 0) { - if (p->p_p->ps_sigacts->ps_sigintr & - sigmask(sls->sls_sig)) - error = EINTR; - else - error = ERESTART; - } - } + /* Check if thread was woken up because of a unwind or signal */ + if (sls->sls_catch != 0) + error = sigsleep(); if (sls->sls_locked) KERNEL_UNLOCK(); - return (error); + /* Signal errors are higher priority than timeouts. */ + if (error == 0 && error1 != 0) + error = error1; + + return error; } int @@ -878,7 +812,7 @@ refcnt_finalize(struct refcnt *r, const refcnt = atomic_dec_int_nv(&r->refs); while (refcnt) { - sleep_setup(&sls, r, PWAIT, wmesg); + sleep_setup(&sls, r, PWAIT, wmesg, 0); refcnt = r->refs; sleep_finish(&sls, refcnt); } @@ -906,7 +840,7 @@ cond_wait(struct cond *c, const char *wm wait = c->c_wait; while (wait) { - sleep_setup(&sls, c, PWAIT, wmesg); + sleep_setup(&sls, c, PWAIT, wmesg, 0); wait = c->c_wait; sleep_finish(&sls, wait); } Index: kern/kern_timeout.c =================================================================== RCS file: /cvs/src/sys/kern/kern_timeout.c,v retrieving revision 1.82 diff -u -p -r1.82 kern_timeout.c --- kern/kern_timeout.c 20 Oct 2020 22:37:12 -0000 1.82 +++ kern/kern_timeout.c 7 Dec 2020 17:19:15 -0000 @@ -787,7 +787,7 @@ softclock_thread(void *arg) s = splsoftclock(); for (;;) { - sleep_setup(&sls, &timeout_proc, PSWP, "bored"); + sleep_setup(&sls, &timeout_proc, PSWP, "bored", 0); sleep_finish(&sls, CIRCQ_EMPTY(&timeout_proc)); mtx_enter(&timeout_mutex); Index: kern/subr_log.c =================================================================== RCS file: /cvs/src/sys/kern/subr_log.c,v retrieving revision 1.69 diff -u -p -r1.69 subr_log.c --- kern/subr_log.c 25 Oct 2020 10:55:42 -0000 1.69 +++ kern/subr_log.c 7 Dec 2020 17:19:15 -0000 @@ -235,10 +235,8 @@ logread(dev_t dev, struct uio *uio, int * Set up and enter sleep manually instead of using msleep() * to keep log_mtx as a leaf lock. */ - sleep_setup(&sls, mbp, LOG_RDPRI | PCATCH, "klog"); - sleep_setup_signal(&sls); - sleep_finish(&sls, logsoftc.sc_state & LOG_RDWAIT); - error = sleep_finish_signal(&sls); + sleep_setup(&sls, mbp, LOG_RDPRI | PCATCH, "klog", 0); + error = sleep_finish(&sls, logsoftc.sc_state & LOG_RDWAIT); mtx_enter(&log_mtx); if (error) goto out; Index: sys/proc.h =================================================================== RCS file: /cvs/src/sys/sys/proc.h,v retrieving revision 1.301 diff -u -p -r1.301 proc.h --- sys/proc.h 10 Nov 2020 17:26:54 -0000 1.301 +++ sys/proc.h 7 Dec 2020 17:19:15 -0000 @@ -612,10 +615,7 @@ int proc_cansugid(struct proc *); struct sleep_state { int sls_s; int sls_catch; - int sls_do_sleep; int sls_locked; - int sls_sig; - int sls_unwind; int sls_timeout; }; Index: sys/signalvar.h =================================================================== RCS file: /cvs/src/sys/sys/signalvar.h,v retrieving revision 1.45 diff -u -p -r1.45 signalvar.h --- sys/signalvar.h 8 Nov 2020 20:37:24 -0000 1.45 +++ sys/signalvar.h 7 Dec 2020 17:19:15 -0000 @@ -128,6 +128,7 @@ void trapsignal(struct proc *p, int sig, void sigexit(struct proc *, int); void sigabort(struct proc *); int sigismasked(struct proc *, int); +int sigsleep(void); int sigonstack(size_t); int killpg1(struct proc *, int, int, int); Index: sys/systm.h =================================================================== RCS file: /cvs/src/sys/sys/systm.h,v retrieving revision 1.148 diff -u -p -r1.148 systm.h --- sys/systm.h 26 Aug 2020 03:29:07 -0000 1.148 +++ sys/systm.h 7 Dec 2020 17:19:15 -0000 @@ -255,13 +255,8 @@ void stop_periodic_resettodr(void); struct sleep_state; void sleep_setup(struct sleep_state *, const volatile void *, int, - const char *); -void sleep_setup_timeout(struct sleep_state *, int); -void sleep_setup_signal(struct sleep_state *); -void sleep_finish(struct sleep_state *, int); -int sleep_finish_timeout(struct sleep_state *); -int sleep_finish_signal(struct sleep_state *); -int sleep_finish_all(struct sleep_state *, int); + const char *, int); +int sleep_finish(struct sleep_state *, int); void sleep_queue_init(void); struct cond;