Module Name:    src
Committed By:   martin
Date:           Tue Jun 27 18:51:47 UTC 2023

Modified Files:
        src/sys/kern [netbsd-10]: kern_timeout.c

Log Message:
Pull up following revision(s) (requested by pho in ticket #219):

        sys/kern/kern_timeout.c: revision 1.74
        sys/kern/kern_timeout.c: revision 1.75
        sys/kern/kern_timeout.c: revision 1.76

callout(9): Fix panic() in callout_destroy() (kern/57226)

The culprit was callout_halt(). "(c->c_flags & CALLOUT_FIRED) != 0" wasn't
the correct way to check if a callout is running. It failed to wait for a
running callout to finish in the following scenario:
1. cpu0 initializes a callout and schedules it.
2. cpu0 invokes callout_softlock() and fires the callout, setting the flag
    CALLOUT_FIRED.
3. The callout invokes callout_schedule() to re-schedule itself.
4. callout_schedule_locked() clears the flag CALLOUT_FIRED, and releases
    the lock.
5. Before the lock is re-acquired by callout_softlock(), cpu1 decides to
    destroy the callout. It first invokes callout_halt() to make sure the
    callout finishes running.
6. But since CALLOUT_FIRED has been cleared, callout_halt() thinks it's not
    running and therefore returns without invoking callout_wait().
7. cpu1 proceeds to invoke callout_destroy() while it's still running on
    cpu0. callout_destroy() detects that and panics.

callout(9): Tidy up the condition for "callout is running on another LWP"
No functional changes.

callout(9): Delete the unused member cc_cancel from struct callout_cpu
I see no reason why it should be there, and believe its a leftover from
some old code.


To generate a diff of this commit:
cvs rdiff -u -r1.73 -r1.73.2.1 src/sys/kern/kern_timeout.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/kern_timeout.c
diff -u src/sys/kern/kern_timeout.c:1.73 src/sys/kern/kern_timeout.c:1.73.2.1
--- src/sys/kern/kern_timeout.c:1.73	Sat Oct 29 00:19:21 2022
+++ src/sys/kern/kern_timeout.c	Tue Jun 27 18:51:47 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_timeout.c,v 1.73 2022/10/29 00:19:21 riastradh Exp $	*/
+/*	$NetBSD: kern_timeout.c,v 1.73.2.1 2023/06/27 18:51:47 martin Exp $	*/
 
 /*-
  * Copyright (c) 2003, 2006, 2007, 2008, 2009, 2019 The NetBSD Foundation, Inc.
@@ -59,7 +59,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_timeout.c,v 1.73 2022/10/29 00:19:21 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_timeout.c,v 1.73.2.1 2023/06/27 18:51:47 martin Exp $");
 
 /*
  * Timeouts are kept in a hierarchical timing wheel.  The c_time is the
@@ -174,7 +174,6 @@ struct callout_cpu {
 	u_int		cc_ticks;
 	lwp_t		*cc_lwp;
 	callout_impl_t	*cc_active;
-	callout_impl_t	*cc_cancel;
 	struct evcnt	cc_ev_late;
 	struct evcnt	cc_ev_block;
 	struct callout_circq cc_todo;		/* Worklist */
@@ -263,6 +262,17 @@ callout_lock(callout_impl_t *c)
 }
 
 /*
+ * Check if the callout is currently running on an LWP that isn't curlwp.
+ */
+static inline bool
+callout_running_somewhere_else(callout_impl_t *c, struct callout_cpu *cc)
+{
+	KASSERT(c->c_cpu == cc);
+
+	return cc->cc_active == c && cc->cc_lwp != curlwp;
+}
+
+/*
  * callout_startup:
  *
  *	Initialize the callout facility, called at system startup time.
@@ -378,7 +388,7 @@ callout_destroy(callout_t *cs)
 	KASSERTMSG((c->c_flags & CALLOUT_PENDING) == 0,
 	    "pending callout %p: c_func (%p) c_flags (%#x) destroyed from %p",
 	    c, c->c_func, c->c_flags, __builtin_return_address(0));
-	KASSERTMSG(c->c_cpu->cc_lwp == curlwp || c->c_cpu->cc_active != c,
+	KASSERTMSG(!callout_running_somewhere_else(c, c->c_cpu),
 	    "running callout %p: c_func (%p) c_flags (%#x) destroyed from %p",
 	    c, c->c_func, c->c_flags, __builtin_return_address(0));
 	c->c_magic = 0;
@@ -496,7 +506,6 @@ bool
 callout_stop(callout_t *cs)
 {
 	callout_impl_t *c = (callout_impl_t *)cs;
-	struct callout_cpu *cc;
 	kmutex_t *lock;
 	bool expired;
 
@@ -509,16 +518,6 @@ callout_stop(callout_t *cs)
 	expired = ((c->c_flags & CALLOUT_FIRED) != 0);
 	c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED);
 
-	cc = c->c_cpu;
-	if (cc->cc_active == c) {
-		/*
-		 * This is for non-MPSAFE callouts only.  To synchronize
-		 * effectively we must be called with kernel_lock held.
-		 * It's also taken in callout_softclock.
-		 */
-		cc->cc_cancel = c;
-	}
-
 	SDT_PROBE5(sdt, kernel, callout, stop,
 	    c, c->c_func, c->c_arg, c->c_flags, expired);
 
@@ -542,7 +541,6 @@ callout_halt(callout_t *cs, void *interl
 {
 	callout_impl_t *c = (callout_impl_t *)cs;
 	kmutex_t *lock;
-	int flags;
 
 	KASSERT(c->c_magic == CALLOUT_MAGIC);
 	KASSERT(!cpu_intr_p());
@@ -552,11 +550,10 @@ callout_halt(callout_t *cs, void *interl
 	lock = callout_lock(c);
 	SDT_PROBE4(sdt, kernel, callout, halt,
 	    c, c->c_func, c->c_arg, c->c_flags);
-	flags = c->c_flags;
-	if ((flags & CALLOUT_PENDING) != 0)
+	if ((c->c_flags & CALLOUT_PENDING) != 0)
 		CIRCQ_REMOVE(&c->c_list);
-	c->c_flags = flags & ~(CALLOUT_PENDING|CALLOUT_FIRED);
-	if (__predict_false(flags & CALLOUT_FIRED)) {
+	c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED);
+	if (__predict_false(callout_running_somewhere_else(c, c->c_cpu))) {
 		callout_wait(c, interlock, lock);
 		return true;
 	}
@@ -592,7 +589,7 @@ callout_wait(callout_impl_t *c, void *in
 		 * - the callout itself has called callout_halt() (nice!)
 		 */
 		cc = c->c_cpu;
-		if (__predict_true(cc->cc_active != c || cc->cc_lwp == l))
+		if (__predict_true(!callout_running_somewhere_else(c, cc)))
 			break;
 
 		/* It's running - need to wait for it to complete. */

Reply via email to