On Fri, 23 Sep 2016, Christiano F. Haesbaert wrote: ... > The diff as it is will deadlock against SCHED_LOCK. > tsleep() calls sleep_setup(), which grabs SCHED_LOCK, > Then sleep_setup_timeout() will grab timeout_mutex in timeout_add() > > On softclock() you have the opposite: > Grabs timeout_mutex then does a wakeup, which grabs SCHED_LOCK. >
Hmm, yes. And softclock_thread() has the same problem: msleep() needs to grab SCHED_LOCK while the passed in mutex is held, so it'll hold timeout_mutex while grabbing SCHED_LOCK too. I just played around with using a separate mutex for protecting timeout_proc, a mutex which would be 'outside' SCHED_LOCK, unlike timeout_mutex which is 'inside' SCHED_LOCK. The catch is that supporting all the functionality of timeout_add() and timeout_del() becomes ugly and fragile: they would need to check whether the mutex has TIMEOUT_NEEDPROCCTX set and, if so, grab the new mutex before grabbing timeout_mutex. That's "safe" from being a lock loop from tsleep() because the thread's p_sleep_to *can't* have that flag set, so the 'outside' mutex wouldn't be needed....but geez is this ugly. I'm also unsure what defined semantics, if any, timeout_triggered() should have for NEEDPROCCTX timeouts. Should it return true once the timeout has been dequeued from the timeout wheel, or should it only be set once softclock_thread is actually going to run it? ...or maybe this makes people think we should toss this out and go directly to dlg@'s proposal... Philip Guenther Index: kern_timeout.c =================================================================== RCS file: /data/src/openbsd/src/sys/kern/kern_timeout.c,v retrieving revision 1.49 diff -u -p -r1.49 kern_timeout.c --- kern_timeout.c 22 Sep 2016 12:55:24 -0000 1.49 +++ kern_timeout.c 24 Sep 2016 05:10:00 -0000 @@ -87,9 +87,15 @@ timeout_from_circq(struct circq *p) * All wheels are locked with the same mutex. * * We need locking since the timeouts are manipulated from hardclock that's - * not behind the big lock. + * not behind the big lock. timeout_mutex protects the actual timeout + * wheel and queue of new timeouts. timeout_proc_mutex proctects the + * queue of "need proc context" timeouts that have triggered. Since those + * timeouts can reside in either location and are moved between them by + * softclock(), calls that might run when it's uncertain where the timeout + * is queued must hold *both* mutexes when queueing or dequeueing them. */ struct mutex timeout_mutex = MUTEX_INITIALIZER(IPL_HIGH); +struct mutex timeout_proc_mutex = MUTEX_INITIALIZER(IPL_NONE); /* * Circular queue definitions. @@ -190,6 +196,8 @@ timeout_add(struct timeout *new, int to_ panic("timeout_add: to_ticks (%d) < 0", to_ticks); #endif + if (new->to_flags & TIMEOUT_NEEDPROCCTX) + mtx_enter(&timeout_proc_mutex); mtx_enter(&timeout_mutex); /* Initialize the time here, it won't change. */ old_time = new->to_time; @@ -212,6 +220,8 @@ timeout_add(struct timeout *new, int to_ CIRCQ_INSERT(&new->to_list, &timeout_todo); } mtx_leave(&timeout_mutex); + if (new->to_flags & TIMEOUT_NEEDPROCCTX) + mtx_leave(&timeout_proc_mutex); return (ret); } @@ -312,6 +322,8 @@ timeout_del(struct timeout *to) { int ret = 0; + if (to->to_flags & TIMEOUT_NEEDPROCCTX) + mtx_enter(&timeout_proc_mutex); mtx_enter(&timeout_mutex); if (to->to_flags & TIMEOUT_ONQUEUE) { CIRCQ_REMOVE(&to->to_list); @@ -320,6 +332,8 @@ timeout_del(struct timeout *to) } to->to_flags &= ~TIMEOUT_TRIGGERED; mtx_leave(&timeout_mutex); + if (to->to_flags & TIMEOUT_NEEDPROCCTX) + mtx_leave(&timeout_proc_mutex); return (ret); } @@ -351,56 +365,66 @@ timeout_hardclock_update(void) } void -timeout_run(struct timeout *to) +timeout_run(struct timeout *to, struct mutex *mtx) { void (*fn)(void *); void *arg; - MUTEX_ASSERT_LOCKED(&timeout_mutex); + MUTEX_ASSERT_LOCKED(mtx); to->to_flags &= ~TIMEOUT_ONQUEUE; - to->to_flags |= TIMEOUT_TRIGGERED; fn = to->to_func; arg = to->to_arg; - mtx_leave(&timeout_mutex); + mtx_leave(mtx); fn(arg); - mtx_enter(&timeout_mutex); + mtx_enter(mtx); } void softclock(void *arg) { + struct circq need_proc; int delta; struct circq *bucket; struct timeout *to; + int need_wakeup; + CIRCQ_INIT(&need_proc); mtx_enter(&timeout_mutex); while (!CIRCQ_EMPTY(&timeout_todo)) { to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo)); CIRCQ_REMOVE(&to->to_list); - /* - * If due run it or defer execution to the thread, - * otherwise insert it into the right bucket. - */ + /* if not yet due, insert it into the right bucket */ delta = to->to_time - ticks; if (delta > 0) { bucket = &BUCKET(delta, to->to_time); CIRCQ_INSERT(&to->to_list, bucket); - } else if (to->to_flags & TIMEOUT_NEEDPROCCTX) { - CIRCQ_INSERT(&to->to_list, &timeout_proc); - wakeup(&timeout_proc); - } else { + continue; + } + #ifdef DEBUG - if (delta < 0) - printf("timeout delayed %d\n", delta); + if (delta < 0) + printf("timeout delayed %d\n", delta); #endif - timeout_run(to); - } + to->to_flags |= TIMEOUT_TRIGGERED; + + /* otherwise, run it or defer execution to the thread */ + if (to->to_flags & TIMEOUT_NEEDPROCCTX) + CIRCQ_INSERT(&to->to_list, &need_proc); + else + timeout_run(to, &timeout_mutex); } mtx_leave(&timeout_mutex); + + mtx_enter(&timeout_proc_mutex); + need_wakeup = CIRCQ_EMPTY(&timeout_proc); + CIRCQ_APPEND(&timeout_proc, &need_proc); + mtx_leave(&timeout_proc_mutex); + if (need_wakeup) + wakeup(&timeout_proc); } void @@ -427,16 +451,16 @@ softclock_thread(void *arg) KASSERT(ci != NULL); sched_peg_curproc(ci); - mtx_enter(&timeout_mutex); + mtx_enter(&timeout_proc_mutex); for (;;) { while (CIRCQ_EMPTY(&timeout_proc)) - msleep(&timeout_proc, &timeout_mutex, PSWP, "bored", 0); + msleep(&timeout_proc, &timeout_proc_mutex, PSWP, + "bored", 0); to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc)); CIRCQ_REMOVE(&to->to_list); - timeout_run(to); + timeout_run(to, &timeout_proc_mutex); } - mtx_leave(&timeout_mutex); } #ifndef SMALL_KERNEL