On Fri, 23 Sep 2016, Christiano F. Haesbaert wrote:
...
> The diff as it is will deadlock against SCHED_LOCK.
> tsleep() calls sleep_setup(), which grabs SCHED_LOCK,
> Then sleep_setup_timeout() will grab timeout_mutex in timeout_add()
> 
> On softclock() you have the opposite:
> Grabs timeout_mutex then does a wakeup, which grabs SCHED_LOCK.
> 

Hmm, yes. And softclock_thread() has the same problem: msleep() needs to 
grab SCHED_LOCK while the passed in mutex is held, so it'll hold 
timeout_mutex while grabbing SCHED_LOCK too.

I just played around with using a separate mutex for protecting 
timeout_proc, a mutex which would be 'outside' SCHED_LOCK, unlike 
timeout_mutex which is 'inside' SCHED_LOCK.  The catch is that supporting 
all the functionality of timeout_add() and timeout_del() becomes ugly and 
fragile: they would need to check whether the mutex has 
TIMEOUT_NEEDPROCCTX set and, if so, grab the new mutex before grabbing 
timeout_mutex.  That's "safe" from being a lock loop from tsleep() because 
the thread's p_sleep_to *can't* have that flag set, so the 'outside' mutex 
wouldn't be needed....but geez is this ugly.  I'm also unsure what defined 
semantics, if any, timeout_triggered() should have for NEEDPROCCTX 
timeouts.  Should it return true once the timeout has been dequeued from 
the timeout wheel, or should it only be set once softclock_thread is 
actually going to run it?


...or maybe this makes people think we should toss this out and go 
directly to dlg@'s proposal...


Philip Guenther

Index: kern_timeout.c
===================================================================
RCS file: /data/src/openbsd/src/sys/kern/kern_timeout.c,v
retrieving revision 1.49
diff -u -p -r1.49 kern_timeout.c
--- kern_timeout.c      22 Sep 2016 12:55:24 -0000      1.49
+++ kern_timeout.c      24 Sep 2016 05:10:00 -0000
@@ -87,9 +87,15 @@ timeout_from_circq(struct circq *p)
  * All wheels are locked with the same mutex.
  *
  * We need locking since the timeouts are manipulated from hardclock that's
- * not behind the big lock.
+ * not behind the big lock.  timeout_mutex protects the actual timeout
+ * wheel and queue of new timeouts.  timeout_proc_mutex proctects the
+ * queue of "need proc context" timeouts that have triggered.  Since those
+ * timeouts can reside in either location and are moved between them by
+ * softclock(), calls that might run when it's uncertain where the timeout
+ * is queued must hold *both* mutexes when queueing or dequeueing them.
  */
 struct mutex timeout_mutex = MUTEX_INITIALIZER(IPL_HIGH);
+struct mutex timeout_proc_mutex = MUTEX_INITIALIZER(IPL_NONE);
 
 /*
  * Circular queue definitions.
@@ -190,6 +196,8 @@ timeout_add(struct timeout *new, int to_
                panic("timeout_add: to_ticks (%d) < 0", to_ticks);
 #endif
 
+       if (new->to_flags & TIMEOUT_NEEDPROCCTX)
+               mtx_enter(&timeout_proc_mutex);
        mtx_enter(&timeout_mutex);
        /* Initialize the time here, it won't change. */
        old_time = new->to_time;
@@ -212,6 +220,8 @@ timeout_add(struct timeout *new, int to_
                CIRCQ_INSERT(&new->to_list, &timeout_todo);
        }
        mtx_leave(&timeout_mutex);
+       if (new->to_flags & TIMEOUT_NEEDPROCCTX)
+               mtx_leave(&timeout_proc_mutex);
 
        return (ret);
 }
@@ -312,6 +322,8 @@ timeout_del(struct timeout *to)
 {
        int ret = 0;
 
+       if (to->to_flags & TIMEOUT_NEEDPROCCTX)
+               mtx_enter(&timeout_proc_mutex);
        mtx_enter(&timeout_mutex);
        if (to->to_flags & TIMEOUT_ONQUEUE) {
                CIRCQ_REMOVE(&to->to_list);
@@ -320,6 +332,8 @@ timeout_del(struct timeout *to)
        }
        to->to_flags &= ~TIMEOUT_TRIGGERED;
        mtx_leave(&timeout_mutex);
+       if (to->to_flags & TIMEOUT_NEEDPROCCTX)
+               mtx_leave(&timeout_proc_mutex);
 
        return (ret);
 }
@@ -351,56 +365,66 @@ timeout_hardclock_update(void)
 }
 
 void
-timeout_run(struct timeout *to)
+timeout_run(struct timeout *to, struct mutex *mtx)
 {
        void (*fn)(void *);
        void *arg;
 
-       MUTEX_ASSERT_LOCKED(&timeout_mutex);
+       MUTEX_ASSERT_LOCKED(mtx);
 
        to->to_flags &= ~TIMEOUT_ONQUEUE;
-       to->to_flags |= TIMEOUT_TRIGGERED;
 
        fn = to->to_func;
        arg = to->to_arg;
 
-       mtx_leave(&timeout_mutex);
+       mtx_leave(mtx);
        fn(arg);
-       mtx_enter(&timeout_mutex);
+       mtx_enter(mtx);
 }
 
 void
 softclock(void *arg)
 {
+       struct circq need_proc;
        int delta;
        struct circq *bucket;
        struct timeout *to;
+       int need_wakeup;
 
+       CIRCQ_INIT(&need_proc);
        mtx_enter(&timeout_mutex);
        while (!CIRCQ_EMPTY(&timeout_todo)) {
                to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo));
                CIRCQ_REMOVE(&to->to_list);
 
-               /*
-                * If due run it or defer execution to the thread,
-                * otherwise insert it into the right bucket.
-                */
+               /* if not yet due, insert it into the right bucket */
                delta = to->to_time - ticks;
                if (delta > 0) {
                        bucket = &BUCKET(delta, to->to_time);
                        CIRCQ_INSERT(&to->to_list, bucket);
-               } else if (to->to_flags & TIMEOUT_NEEDPROCCTX) {
-                       CIRCQ_INSERT(&to->to_list, &timeout_proc);
-                       wakeup(&timeout_proc);
-               } else {
+                       continue;
+               }
+
 #ifdef DEBUG
-                       if (delta < 0)
-                               printf("timeout delayed %d\n", delta);
+               if (delta < 0)
+                       printf("timeout delayed %d\n", delta);
 #endif
-                       timeout_run(to);
-               }
+               to->to_flags |= TIMEOUT_TRIGGERED;
+
+               /* otherwise, run it or defer execution to the thread */
+               if (to->to_flags & TIMEOUT_NEEDPROCCTX)
+                       CIRCQ_INSERT(&to->to_list, &need_proc);
+               else
+                       timeout_run(to, &timeout_mutex);
        }
        mtx_leave(&timeout_mutex);
+
+       mtx_enter(&timeout_proc_mutex);
+       need_wakeup = CIRCQ_EMPTY(&timeout_proc);
+       CIRCQ_APPEND(&timeout_proc, &need_proc);
+       mtx_leave(&timeout_proc_mutex);
+       if (need_wakeup)
+               wakeup(&timeout_proc);
 }
 
 void
@@ -427,16 +451,16 @@ softclock_thread(void *arg)
        KASSERT(ci != NULL);
        sched_peg_curproc(ci);
 
-       mtx_enter(&timeout_mutex);
+       mtx_enter(&timeout_proc_mutex);
        for (;;) {
                while (CIRCQ_EMPTY(&timeout_proc))
-                       msleep(&timeout_proc, &timeout_mutex, PSWP, "bored", 0);
+                       msleep(&timeout_proc, &timeout_proc_mutex, PSWP,
+                           "bored", 0);
 
                to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc));
                CIRCQ_REMOVE(&to->to_list);
-               timeout_run(to);
+               timeout_run(to, &timeout_proc_mutex);
        }
-       mtx_leave(&timeout_mutex);
 }
 
 #ifndef SMALL_KERNEL

Reply via email to