Module Name: src Committed By: martin Date: Tue Jun 27 18:51:47 UTC 2023
Modified Files: src/sys/kern [netbsd-10]: kern_timeout.c Log Message: Pull up following revision(s) (requested by pho in ticket #219): sys/kern/kern_timeout.c: revision 1.74 sys/kern/kern_timeout.c: revision 1.75 sys/kern/kern_timeout.c: revision 1.76 callout(9): Fix panic() in callout_destroy() (kern/57226) The culprit was callout_halt(). "(c->c_flags & CALLOUT_FIRED) != 0" wasn't the correct way to check if a callout is running. It failed to wait for a running callout to finish in the following scenario: 1. cpu0 initializes a callout and schedules it. 2. cpu0 invokes callout_softlock() and fires the callout, setting the flag CALLOUT_FIRED. 3. The callout invokes callout_schedule() to re-schedule itself. 4. callout_schedule_locked() clears the flag CALLOUT_FIRED, and releases the lock. 5. Before the lock is re-acquired by callout_softlock(), cpu1 decides to destroy the callout. It first invokes callout_halt() to make sure the callout finishes running. 6. But since CALLOUT_FIRED has been cleared, callout_halt() thinks it's not running and therefore returns without invoking callout_wait(). 7. cpu1 proceeds to invoke callout_destroy() while it's still running on cpu0. callout_destroy() detects that and panics. callout(9): Tidy up the condition for "callout is running on another LWP" No functional changes. callout(9): Delete the unused member cc_cancel from struct callout_cpu I see no reason why it should be there, and believe its a leftover from some old code. To generate a diff of this commit: cvs rdiff -u -r1.73 -r1.73.2.1 src/sys/kern/kern_timeout.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/kern_timeout.c diff -u src/sys/kern/kern_timeout.c:1.73 src/sys/kern/kern_timeout.c:1.73.2.1 --- src/sys/kern/kern_timeout.c:1.73 Sat Oct 29 00:19:21 2022 +++ src/sys/kern/kern_timeout.c Tue Jun 27 18:51:47 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: kern_timeout.c,v 1.73 2022/10/29 00:19:21 riastradh Exp $ */ +/* $NetBSD: kern_timeout.c,v 1.73.2.1 2023/06/27 18:51:47 martin Exp $ */ /*- * Copyright (c) 2003, 2006, 2007, 2008, 2009, 2019 The NetBSD Foundation, Inc. @@ -59,7 +59,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_timeout.c,v 1.73 2022/10/29 00:19:21 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_timeout.c,v 1.73.2.1 2023/06/27 18:51:47 martin Exp $"); /* * Timeouts are kept in a hierarchical timing wheel. The c_time is the @@ -174,7 +174,6 @@ struct callout_cpu { u_int cc_ticks; lwp_t *cc_lwp; callout_impl_t *cc_active; - callout_impl_t *cc_cancel; struct evcnt cc_ev_late; struct evcnt cc_ev_block; struct callout_circq cc_todo; /* Worklist */ @@ -263,6 +262,17 @@ callout_lock(callout_impl_t *c) } /* + * Check if the callout is currently running on an LWP that isn't curlwp. + */ +static inline bool +callout_running_somewhere_else(callout_impl_t *c, struct callout_cpu *cc) +{ + KASSERT(c->c_cpu == cc); + + return cc->cc_active == c && cc->cc_lwp != curlwp; +} + +/* * callout_startup: * * Initialize the callout facility, called at system startup time. @@ -378,7 +388,7 @@ callout_destroy(callout_t *cs) KASSERTMSG((c->c_flags & CALLOUT_PENDING) == 0, "pending callout %p: c_func (%p) c_flags (%#x) destroyed from %p", c, c->c_func, c->c_flags, __builtin_return_address(0)); - KASSERTMSG(c->c_cpu->cc_lwp == curlwp || c->c_cpu->cc_active != c, + KASSERTMSG(!callout_running_somewhere_else(c, c->c_cpu), "running callout %p: c_func (%p) c_flags (%#x) destroyed from %p", c, c->c_func, c->c_flags, __builtin_return_address(0)); c->c_magic = 0; @@ -496,7 +506,6 @@ bool callout_stop(callout_t *cs) { callout_impl_t *c = (callout_impl_t *)cs; - struct callout_cpu *cc; kmutex_t *lock; bool expired; @@ -509,16 +518,6 @@ callout_stop(callout_t *cs) expired = ((c->c_flags & CALLOUT_FIRED) != 0); c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED); - cc = c->c_cpu; - if (cc->cc_active == c) { - /* - * This is for non-MPSAFE callouts only. To synchronize - * effectively we must be called with kernel_lock held. - * It's also taken in callout_softclock. - */ - cc->cc_cancel = c; - } - SDT_PROBE5(sdt, kernel, callout, stop, c, c->c_func, c->c_arg, c->c_flags, expired); @@ -542,7 +541,6 @@ callout_halt(callout_t *cs, void *interl { callout_impl_t *c = (callout_impl_t *)cs; kmutex_t *lock; - int flags; KASSERT(c->c_magic == CALLOUT_MAGIC); KASSERT(!cpu_intr_p()); @@ -552,11 +550,10 @@ callout_halt(callout_t *cs, void *interl lock = callout_lock(c); SDT_PROBE4(sdt, kernel, callout, halt, c, c->c_func, c->c_arg, c->c_flags); - flags = c->c_flags; - if ((flags & CALLOUT_PENDING) != 0) + if ((c->c_flags & CALLOUT_PENDING) != 0) CIRCQ_REMOVE(&c->c_list); - c->c_flags = flags & ~(CALLOUT_PENDING|CALLOUT_FIRED); - if (__predict_false(flags & CALLOUT_FIRED)) { + c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED); + if (__predict_false(callout_running_somewhere_else(c, c->c_cpu))) { callout_wait(c, interlock, lock); return true; } @@ -592,7 +589,7 @@ callout_wait(callout_impl_t *c, void *in * - the callout itself has called callout_halt() (nice!) */ cc = c->c_cpu; - if (__predict_true(cc->cc_active != c || cc->cc_lwp == l)) + if (__predict_true(!callout_running_somewhere_else(c, cc))) break; /* It's running - need to wait for it to complete. */