Module Name: src
Committed By: riastradh
Date: Sat Sep 2 17:44:41 UTC 2023
Modified Files:
src/sys/kern: kern_heartbeat.c
Log Message:
heartbeat(9): Move panicstr check into the IPI itself.
We can't return early from defibrillate because the IPI may have yet
to run -- we can't return until the other CPU is definitely done
using the ipi_msg_t we created on the stack.
We should avoid calling panic again on the patient CPU in case it was
already in the middle of a panic, so that we don't re-enter panic
while, e.g., trying to print a stack trace.
Sprinkle some comments.
To generate a diff of this commit:
cvs rdiff -u -r1.8 -r1.9 src/sys/kern/kern_heartbeat.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/kern/kern_heartbeat.c
diff -u src/sys/kern/kern_heartbeat.c:1.8 src/sys/kern/kern_heartbeat.c:1.9
--- src/sys/kern/kern_heartbeat.c:1.8 Sat Sep 2 17:44:32 2023
+++ src/sys/kern/kern_heartbeat.c Sat Sep 2 17:44:41 2023
@@ -1,4 +1,4 @@
-/* $NetBSD: kern_heartbeat.c,v 1.8 2023/09/02 17:44:32 riastradh Exp $ */
+/* $NetBSD: kern_heartbeat.c,v 1.9 2023/09/02 17:44:41 riastradh Exp $ */
/*-
* Copyright (c) 2023 The NetBSD Foundation, Inc.
@@ -82,7 +82,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_heartbeat.c,v 1.8 2023/09/02 17:44:32 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_heartbeat.c,v 1.9 2023/09/02 17:44:41 riastradh Exp $");
#ifdef _KERNEL_OPT
#include "opt_ddb.h"
@@ -457,7 +457,21 @@ defibrillator(void *cookie)
{
bool *ack = cookie;
+ /*
+ * Acknowledge the interrupt so the doctor CPU won't trigger a
+ * new panic for defibrillation timeout.
+ */
atomic_store_relaxed(ack, true);
+
+ /*
+ * If a panic is already in progress, we may have interrupted
+ * the logic that prints a stack trace on this CPU -- so let's
+ * not make it worse by giving the misapprehension of a
+ * recursive panic.
+ */
+ if (atomic_load_relaxed(&panicstr) != NULL)
+ return;
+
panic("%s[%d %s]: heart stopped beating", cpu_name(curcpu()),
curlwp->l_lid,
curlwp->l_name ? curlwp->l_name : curproc->p_comm);
@@ -498,13 +512,11 @@ defibrillate(struct cpu_info *ci, unsign
/*
* Busy-wait up to 1sec for the patient CPU to print a stack
* trace and panic. If the patient CPU acknowledges the IPI,
- * or if we're panicking anyway, just give up and stop here --
- * the system is coming down soon and we should avoid getting
- * in the way.
+ * just give up and stop here -- the system is coming down soon
+ * and we should avoid getting in the way.
*/
while (countdown --> 0) {
- if (atomic_load_relaxed(&ack) ||
- atomic_load_relaxed(&panicstr) != NULL)
+ if (atomic_load_relaxed(&ack))
return;
DELAY(1000); /* 1ms */
}