Diff below aims to simplify the API to put a thread on a sleep queue and
reduce it to the following:

        sleep_setup();
        /* check condition or release lock */
        sleep_finish();

It is motivated by my work to sleep the SCHED_LOCK() but might as well
prevent/fix some bugs.

The tricky part of the current implementation is that sleep_setup_signal()
can already park/stop the current thread resulting in a context change.
Should any custom accounting / lock check happen before that?  At least
two lock primitives do so currently:  drm's schedule_timeout() and
rwlock's rw_enter().

As a result of this diff various states can be removed and sleep_finish()
contains the following magic:

        1. check for signal/parking
        2. context switch or remove from sleep queue
        3. check for signal/parking

Note that sleep_finish() could be simplified even further but I left
that for later to ease the review.

Comments?  Oks?

Index: dev/dt/dt_dev.c
===================================================================
RCS file: /cvs/src/sys/dev/dt/dt_dev.c,v
retrieving revision 1.10
diff -u -p -r1.10 dt_dev.c
--- dev/dt/dt_dev.c     28 Sep 2020 13:16:58 -0000      1.10
+++ dev/dt/dt_dev.c     7 Dec 2020 17:19:15 -0000
@@ -225,10 +225,8 @@ dtread(dev_t dev, struct uio *uio, int f
                return (EMSGSIZE);
 
        while (!sc->ds_evtcnt) {
-               sleep_setup(&sls, sc, PWAIT | PCATCH, "dtread");
-               sleep_setup_signal(&sls);
-               sleep_finish(&sls, !sc->ds_evtcnt);
-               error = sleep_finish_signal(&sls);
+               sleep_setup(&sls, sc, PWAIT | PCATCH, "dtread", 0);
+               error = sleep_finish(&sls, !sc->ds_evtcnt);
                if (error == EINTR || error == ERESTART)
                        break;
        }
Index: dev/pci/drm/drm_linux.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/drm/drm_linux.c,v
retrieving revision 1.70
diff -u -p -r1.70 drm_linux.c
--- dev/pci/drm/drm_linux.c     14 Nov 2020 23:08:47 -0000      1.70
+++ dev/pci/drm/drm_linux.c     7 Dec 2020 17:19:15 -0000
@@ -110,26 +110,23 @@ schedule_timeout(long timeout)
 {
        struct sleep_state sls;
        long deadline;
-       int wait, spl;
+       int wait, spl, timo = 0;
 
        MUTEX_ASSERT_LOCKED(&sch_mtx);
        KASSERT(!cold);
 
-       sleep_setup(&sls, sch_ident, sch_priority, "schto");
        if (timeout != MAX_SCHEDULE_TIMEOUT)
-               sleep_setup_timeout(&sls, timeout);
+               timo = timeout;
+       sleep_setup(&sls, sch_ident, sch_priority, "schto", timo);
 
        wait = (sch_proc == curproc && timeout > 0);
 
        spl = MUTEX_OLDIPL(&sch_mtx);
        MUTEX_OLDIPL(&sch_mtx) = splsched();
        mtx_leave(&sch_mtx);
-
-       sleep_setup_signal(&sls);
-
        if (timeout != MAX_SCHEDULE_TIMEOUT)
                deadline = ticks + timeout;
-       sleep_finish_all(&sls, wait);
+       sleep_finish(&sls, wait);
        if (timeout != MAX_SCHEDULE_TIMEOUT)
                timeout = deadline - ticks;
 
Index: dev/pci/if_myx.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_myx.c,v
retrieving revision 1.112
diff -u -p -r1.112 if_myx.c
--- dev/pci/if_myx.c    27 Nov 2020 00:13:15 -0000      1.112
+++ dev/pci/if_myx.c    7 Dec 2020 17:19:15 -0000
@@ -1396,7 +1396,7 @@ myx_down(struct myx_softc *sc)
        (void)myx_cmd(sc, MYXCMD_SET_IFDOWN, &mc, NULL);
 
        while (sc->sc_state != MYX_S_OFF) {
-               sleep_setup(&sls, sts, PWAIT, "myxdown");
+               sleep_setup(&sls, sts, PWAIT, "myxdown", 0);
                membar_consumer();
                sleep_finish(&sls, sc->sc_state != MYX_S_OFF);
        }
Index: kern/kern_rwlock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_rwlock.c,v
retrieving revision 1.45
diff -u -p -r1.45 kern_rwlock.c
--- kern/kern_rwlock.c  2 Mar 2020 17:07:49 -0000       1.45
+++ kern/kern_rwlock.c  7 Dec 2020 17:19:15 -0000
@@ -278,15 +278,13 @@ retry:
                prio = op->wait_prio;
                if (flags & RW_INTR)
                        prio |= PCATCH;
-               sleep_setup(&sls, rwl, prio, rwl->rwl_name);
-               if (flags & RW_INTR)
-                       sleep_setup_signal(&sls);
+               sleep_setup(&sls, rwl, prio, rwl->rwl_name, 0);
 
                do_sleep = !rw_cas(&rwl->rwl_owner, o, set);
 
-               sleep_finish(&sls, do_sleep);
+               error = sleep_finish(&sls, do_sleep);
                if ((flags & RW_INTR) &&
-                   (error = sleep_finish_signal(&sls)) != 0)
+                   (error != 0))
                        return (error);
                if (flags & RW_SLEEPFAIL)
                        return (EAGAIN);
Index: kern/kern_sched.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sched.c,v
retrieving revision 1.67
diff -u -p -r1.67 kern_sched.c
--- kern/kern_sched.c   11 Jun 2020 00:00:01 -0000      1.67
+++ kern/kern_sched.c   7 Dec 2020 17:19:15 -0000
@@ -674,7 +674,7 @@ sched_stop_secondary_cpus(void)
                if (CPU_IS_PRIMARY(ci))
                        continue;
                while ((spc->spc_schedflags & SPCF_HALTED) == 0) {
-                       sleep_setup(&sls, spc, PZERO, "schedstate");
+                       sleep_setup(&sls, spc, PZERO, "schedstate", 0);
                        sleep_finish(&sls,
                            (spc->spc_schedflags & SPCF_HALTED) == 0);
                }
Index: kern/kern_sig.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sig.c,v
retrieving revision 1.267
diff -u -p -r1.267 kern_sig.c
--- kern/kern_sig.c     4 Dec 2020 15:16:45 -0000       1.267
+++ kern/kern_sig.c     7 Dec 2020 17:19:15 -0000
@@ -1467,6 +1467,31 @@ postsig(struct proc *p, int signum)
 }
 
 /*
+ * Check and handle signals and suspensions around a sleep cycle.
+ */
+int
+sigsleep(void)
+{
+       struct proc *p = curproc;
+       struct sigacts *ps = p->p_p->ps_sigacts;
+       int sig, error = 0;
+
+       error = single_thread_check(p, 1);
+       if (error)
+               return error;
+
+       sig = CURSIG(p);
+       if (sig != 0) {
+               if (ps->ps_sigintr & sigmask(sig))
+                       error = EINTR;
+               else
+                       error = ERESTART;
+       }
+
+       return error;
+}
+
+/*
  * Force the current process to exit with the specified signal, dumping core
  * if appropriate.  We bypass the normal tests for masked and caught signals,
  * allowing unrecoverable failures to terminate the process without changing
@@ -2106,7 +2131,7 @@ single_thread_wait(struct process *pr, i
        /* wait until they're all suspended */
        wait = pr->ps_singlecount > 0;
        while (wait) {
-               sleep_setup(&sls, &pr->ps_singlecount, PWAIT, "suspend");
+               sleep_setup(&sls, &pr->ps_singlecount, PWAIT, "suspend", 0);
                wait = pr->ps_singlecount > 0;
                sleep_finish(&sls, wait);
                if (!recheck)
Index: kern/kern_synch.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_synch.c,v
retrieving revision 1.171
diff -u -p -r1.171 kern_synch.c
--- kern/kern_synch.c   23 Oct 2020 20:28:09 -0000      1.171
+++ kern/kern_synch.c   7 Dec 2020 17:32:32 -0000
@@ -147,11 +147,8 @@ tsleep(const volatile void *ident, int p
                return (0);
        }
 
-       sleep_setup(&sls, ident, priority, wmesg);
-       sleep_setup_timeout(&sls, timo);
-       sleep_setup_signal(&sls);
-
-       return sleep_finish_all(&sls, 1);
+       sleep_setup(&sls, ident, priority, wmesg, timo);
+       return sleep_finish(&sls, 1);
 }
 
 int
@@ -241,8 +238,7 @@ msleep(const volatile void *ident, struc
                return (0);
        }
 
-       sleep_setup(&sls, ident, priority, wmesg);
-       sleep_setup_timeout(&sls, timo);
+       sleep_setup(&sls, ident, priority, wmesg, timo);
 
        /* XXX - We need to make sure that the mutex doesn't
         * unblock splsched. This can be made a bit more
@@ -252,9 +248,7 @@ msleep(const volatile void *ident, struc
        MUTEX_OLDIPL(mtx) = splsched();
        mtx_leave(mtx);
        /* signal may stop the process, release mutex before that */
-       sleep_setup_signal(&sls);
-
-       error = sleep_finish_all(&sls, 1);
+       error = sleep_finish(&sls, 1);
 
        if ((priority & PNORELOCK) == 0) {
                mtx_enter(mtx);
@@ -303,14 +297,11 @@ rwsleep(const volatile void *ident, stru
        rw_assert_anylock(rwl);
        status = rw_status(rwl);
 
-       sleep_setup(&sls, ident, priority, wmesg);
-       sleep_setup_timeout(&sls, timo);
+       sleep_setup(&sls, ident, priority, wmesg, 0);
 
        rw_exit(rwl);
        /* signal may stop the process, release rwlock before that */
-       sleep_setup_signal(&sls);
-
-       error = sleep_finish_all(&sls, 1);
+       error = sleep_finish(&sls, 1);
 
        if ((priority & PNORELOCK) == 0)
                rw_enter(rwl, status);
@@ -343,7 +334,7 @@ rwsleep_nsec(const volatile void *ident,
 
 void
 sleep_setup(struct sleep_state *sls, const volatile void *ident, int prio,
-    const char *wmesg)
+    const char *wmesg, int timo)
 {
        struct proc *p = curproc;
 
@@ -357,15 +348,12 @@ sleep_setup(struct sleep_state *sls, con
 #endif
 
        sls->sls_catch = prio & PCATCH;
-       sls->sls_do_sleep = 1;
        sls->sls_locked = 0;
-       sls->sls_sig = 0;
-       sls->sls_unwind = 0;
        sls->sls_timeout = 0;
 
        /*
         * The kernel has to be locked for signal processing.
-        * This is done here and not in sleep_setup_signal() because
+        * This is done here and not in sleep_finish() because
         * KERNEL_LOCK() has to be taken before SCHED_LOCK().
         */
        if (sls->sls_catch != 0) {
@@ -382,35 +370,52 @@ sleep_setup(struct sleep_state *sls, con
        p->p_slptime = 0;
        p->p_slppri = prio & PRIMASK;
        TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_runq);
-}
-
-int
-sleep_finish_all(struct sleep_state *sls, int do_sleep)
-{
-       int error, error1;
-
-       sleep_finish(sls, do_sleep);
-       error1 = sleep_finish_timeout(sls);
-       error = sleep_finish_signal(sls);
-
-       /* Signal errors are higher priority than timeouts. */
-       if (error == 0 && error1 != 0)
-               error = error1;
 
-       return error;
+       KASSERT((p->p_flag & P_TIMEOUT) == 0);
+       if (timo) {
+               sls->sls_timeout = 1;
+               timeout_add(&p->p_sleep_to, timo);
+       }
 }
 
-void
+int
 sleep_finish(struct sleep_state *sls, int do_sleep)
 {
        struct proc *p = curproc;
+       int error = 0, error1 = 0;
+
+       if (sls->sls_catch != 0) {
+               /* sleep_setup() has locked the kernel. */
+               KERNEL_ASSERT_LOCKED();
+
+               /*
+                * We put ourselves on the sleep queue and start our
+                * timeout before calling sigsleep(), as we could stop
+                * there, and a wakeup or a SIGCONT (or both) could
+                * occur while we were stopped.  A SIGCONT would cause
+                * us to be marked as SSLEEP without resuming us, thus
+                * we must be ready for sleep when sigsleep() is called.
+                * If the wakeup happens while we're stopped, p->p_wchan
+                * will be NULL upon return from sigsleep().  In that case
+                * we need to unwind immediately.
+                */
+               atomic_setbits_int(&p->p_flag, P_SINTR);
+               if ((error = sigsleep()) != 0) {
+                       p->p_stat = SONPROC;
+                       sls->sls_catch = 0;
+                       do_sleep = 0;
+               } else if (p->p_wchan == NULL) {
+                       sls->sls_catch = 0;
+                       do_sleep = 0;
+               }
+       }
 
-       if (sls->sls_do_sleep && do_sleep) {
+       if (do_sleep) {
                p->p_stat = SSLEEP;
                p->p_ru.ru_nvcsw++;
                SCHED_ASSERT_LOCKED();
                mi_switch();
-       } else if (!do_sleep) {
+       } else {
                unsleep(p);
        }
 
@@ -427,30 +432,11 @@ sleep_finish(struct sleep_state *sls, in
         * we need to clear it before the ktrace.
         */
        atomic_clearbits_int(&p->p_flag, P_SINTR);
-}
-
-void
-sleep_setup_timeout(struct sleep_state *sls, int timo)
-{
-       struct proc *p = curproc;
-
-       KASSERT((p->p_flag & P_TIMEOUT) == 0);
-
-       if (timo) {
-               sls->sls_timeout = 1;
-               timeout_add(&p->p_sleep_to, timo);
-       }
-}
-
-int
-sleep_finish_timeout(struct sleep_state *sls)
-{
-       struct proc *p = curproc;
 
        if (sls->sls_timeout) {
                if (p->p_flag & P_TIMEOUT) {
                        atomic_clearbits_int(&p->p_flag, P_TIMEOUT);
-                       return (EWOULDBLOCK);
+                       error1 = EWOULDBLOCK;
                } else {
                        /* This must not sleep. */
                        timeout_del_barrier(&p->p_sleep_to);
@@ -458,70 +444,18 @@ sleep_finish_timeout(struct sleep_state 
                }
        }
 
-       return (0);
-}
-
-void
-sleep_setup_signal(struct sleep_state *sls)
-{
-       struct proc *p = curproc;
-
-       if (sls->sls_catch == 0)
-               return;
-
-       /* sleep_setup() has locked the kernel. */
-       KERNEL_ASSERT_LOCKED();
-
-       /*
-        * We put ourselves on the sleep queue and start our timeout before
-        * calling single_thread_check or CURSIG, as we could stop there, and
-        * a wakeup or a SIGCONT (or both) could occur while we were stopped.
-        * A SIGCONT would cause us to be marked as SSLEEP without resuming us,
-        * thus we must be ready for sleep when CURSIG is called.  If the
-        * wakeup happens while we're stopped, p->p_wchan will be 0 upon
-        * return from single_thread_check or CURSIG.  In that case we should
-        * not go to sleep.  If single_thread_check returns an error we need
-        * to unwind immediately.  That's achieved by saving the return value
-        * in sls->sl_unwind and checking it later in sleep_finish_signal.
-        */
-       atomic_setbits_int(&p->p_flag, P_SINTR);
-       if ((sls->sls_unwind = single_thread_check(p, 1)) != 0 ||
-           (sls->sls_sig = CURSIG(p)) != 0) {
-               unsleep(p);
-               p->p_stat = SONPROC;
-               sls->sls_do_sleep = 0;
-       } else if (p->p_wchan == 0) {
-               sls->sls_catch = 0;
-               sls->sls_do_sleep = 0;
-       }
-}
-
-int
-sleep_finish_signal(struct sleep_state *sls)
-{
-       struct proc *p = curproc;
-       int error = 0;
-
-       if (sls->sls_catch != 0) {
-               KERNEL_ASSERT_LOCKED();
-
-               if (sls->sls_unwind != 0 ||
-                   (sls->sls_unwind = single_thread_check(p, 1)) != 0)
-                       error = sls->sls_unwind;
-               else if (sls->sls_sig != 0 ||
-                   (sls->sls_sig = CURSIG(p)) != 0) {
-                       if (p->p_p->ps_sigacts->ps_sigintr &
-                           sigmask(sls->sls_sig))
-                               error = EINTR;
-                       else
-                               error = ERESTART;
-               }
-       }
+       /* Check if thread was woken up because of a unwind or signal */
+       if (sls->sls_catch != 0)
+               error = sigsleep();
 
        if (sls->sls_locked)
                KERNEL_UNLOCK();
 
-       return (error);
+       /* Signal errors are higher priority than timeouts. */
+       if (error == 0 && error1 != 0)
+               error = error1;
+
+       return error;
 }
 
 int
@@ -878,7 +812,7 @@ refcnt_finalize(struct refcnt *r, const 
 
        refcnt = atomic_dec_int_nv(&r->refs);
        while (refcnt) {
-               sleep_setup(&sls, r, PWAIT, wmesg);
+               sleep_setup(&sls, r, PWAIT, wmesg, 0);
                refcnt = r->refs;
                sleep_finish(&sls, refcnt);
        }
@@ -906,7 +840,7 @@ cond_wait(struct cond *c, const char *wm
 
        wait = c->c_wait;
        while (wait) {
-               sleep_setup(&sls, c, PWAIT, wmesg);
+               sleep_setup(&sls, c, PWAIT, wmesg, 0);
                wait = c->c_wait;
                sleep_finish(&sls, wait);
        }
Index: kern/kern_timeout.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_timeout.c,v
retrieving revision 1.82
diff -u -p -r1.82 kern_timeout.c
--- kern/kern_timeout.c 20 Oct 2020 22:37:12 -0000      1.82
+++ kern/kern_timeout.c 7 Dec 2020 17:19:15 -0000
@@ -787,7 +787,7 @@ softclock_thread(void *arg)
 
        s = splsoftclock();
        for (;;) {
-               sleep_setup(&sls, &timeout_proc, PSWP, "bored");
+               sleep_setup(&sls, &timeout_proc, PSWP, "bored", 0);
                sleep_finish(&sls, CIRCQ_EMPTY(&timeout_proc));
 
                mtx_enter(&timeout_mutex);
Index: kern/subr_log.c
===================================================================
RCS file: /cvs/src/sys/kern/subr_log.c,v
retrieving revision 1.69
diff -u -p -r1.69 subr_log.c
--- kern/subr_log.c     25 Oct 2020 10:55:42 -0000      1.69
+++ kern/subr_log.c     7 Dec 2020 17:19:15 -0000
@@ -235,10 +235,8 @@ logread(dev_t dev, struct uio *uio, int 
                 * Set up and enter sleep manually instead of using msleep()
                 * to keep log_mtx as a leaf lock.
                 */
-               sleep_setup(&sls, mbp, LOG_RDPRI | PCATCH, "klog");
-               sleep_setup_signal(&sls);
-               sleep_finish(&sls, logsoftc.sc_state & LOG_RDWAIT);
-               error = sleep_finish_signal(&sls);
+               sleep_setup(&sls, mbp, LOG_RDPRI | PCATCH, "klog", 0);
+               error = sleep_finish(&sls, logsoftc.sc_state & LOG_RDWAIT);
                mtx_enter(&log_mtx);
                if (error)
                        goto out;
Index: sys/proc.h
===================================================================
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.301
diff -u -p -r1.301 proc.h
--- sys/proc.h  10 Nov 2020 17:26:54 -0000      1.301
+++ sys/proc.h  7 Dec 2020 17:19:15 -0000
@@ -612,10 +615,7 @@ int        proc_cansugid(struct proc *);
 struct sleep_state {
        int sls_s;
        int sls_catch;
-       int sls_do_sleep;
        int sls_locked;
-       int sls_sig;
-       int sls_unwind;
        int sls_timeout;
 };
 
Index: sys/signalvar.h
===================================================================
RCS file: /cvs/src/sys/sys/signalvar.h,v
retrieving revision 1.45
diff -u -p -r1.45 signalvar.h
--- sys/signalvar.h     8 Nov 2020 20:37:24 -0000       1.45
+++ sys/signalvar.h     7 Dec 2020 17:19:15 -0000
@@ -128,6 +128,7 @@ void        trapsignal(struct proc *p, int sig,
 void   sigexit(struct proc *, int);
 void   sigabort(struct proc *);
 int    sigismasked(struct proc *, int);
+int    sigsleep(void);
 int    sigonstack(size_t);
 int    killpg1(struct proc *, int, int, int);
 
Index: sys/systm.h
===================================================================
RCS file: /cvs/src/sys/sys/systm.h,v
retrieving revision 1.148
diff -u -p -r1.148 systm.h
--- sys/systm.h 26 Aug 2020 03:29:07 -0000      1.148
+++ sys/systm.h 7 Dec 2020 17:19:15 -0000
@@ -255,13 +255,8 @@ void       stop_periodic_resettodr(void);
 
 struct sleep_state;
 void   sleep_setup(struct sleep_state *, const volatile void *, int,
-           const char *);
-void   sleep_setup_timeout(struct sleep_state *, int);
-void   sleep_setup_signal(struct sleep_state *);
-void   sleep_finish(struct sleep_state *, int);
-int    sleep_finish_timeout(struct sleep_state *);
-int    sleep_finish_signal(struct sleep_state *);
-int    sleep_finish_all(struct sleep_state *, int);
+           const char *, int);
+int    sleep_finish(struct sleep_state *, int);
 void   sleep_queue_init(void);
 
 struct cond;

Reply via email to