On Tue, Nov 05, 2024 at 05:41:44PM +0100, Alexander Bluhm wrote:
> Hi,
>
> my i386 regress machine crashed in regress/sys/kern/signal/signal-stress
>
> pWaAnRiNcI:NG: t sSPlLe ep(:0 xnobt0 )SO NPNOROCT
> LStopped at db_enter+0x4: popl %ebp
> TID PID UID PRFLAGS PFLAGS CPU COMMAND
> 32150 5151 0 0x20000000 0 6 signal-stress
> 196523 76435 0 0x20000000 0x8 1 signal-stress
> 367678 60513 0 0x20000000 0x8 7 signal-stress
> 235047 99681 0 0x2 0 2 signal-stress
> db_enter() at db_enter+0x4
> panic(d0cc511f) at panic+0x7a
> sleep_setup(d0fec344,128,d0c4e85d) at sleep_setup+0x116
> tsleep(d0fec344,128,d0c4e85d,0) at tsleep+0x79
> tsleep_nsec(d0fec344,128,d0c4e85d,ffffffff) at tsleep_nsec+0xd0
> sys_sigsuspend(f8bddb10,f6f78e90,f6f78e88) at sys_sigsuspend+0x58
> syscall(f6f78ed0) at syscall+0x41b
> Xsyscall_untramp() at Xsyscall_untramp+0xa9
> end of kernel
> https://www.openbsd.org/ddb.html describes the minimum info required in bug
> reports. Insufficient info makes it difficult to find and fix bugs.
>
> ddb{5}> show panic
> *cpu5: tsleep: not SONPROC
>
> ddb{5}> trace
> db_enter() at db_enter+0x4
> panic(d0cc511f) at panic+0x7a
> sleep_setup(d0fec344,128,d0c4e85d) at sleep_setup+0x116
> tsleep(d0fec344,128,d0c4e85d,0) at tsleep+0x79
> tsleep_nsec(d0fec344,128,d0c4e85d,ffffffff) at tsleep_nsec+0xd0
> sys_sigsuspend(f8bddb10,f6f78e90,f6f78e88) at sys_sigsuspend+0x58
> syscall(f6f78ed0) at syscall+0x41b
> Xsyscall_untramp() at Xsyscall_untramp+0xa9
> end of kernel
The diff below should fix this. We need to skip the proc_stop in the 2nd
sleep_signal_check call in sleep_finish. Since there we no longer want to
stop. Instead the next call of cursig will take care of that.
--
:wq Claudio
Index: kern/kern_synch.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_synch.c,v
diff -u -p -r1.210 kern_synch.c
--- kern/kern_synch.c 4 Nov 2024 22:41:50 -0000 1.210
+++ kern/kern_synch.c 5 Nov 2024 17:21:09 -0000
@@ -62,7 +62,7 @@
#include <sys/ktrace.h>
#endif
-int sleep_signal_check(struct proc *);
+int sleep_signal_check(struct proc *, int);
int thrsleep(struct proc *, struct sys___thrsleep_args *);
int thrsleep_unlock(void *);
@@ -339,9 +339,9 @@ sleep_setup(const volatile void *ident,
if (p->p_flag & P_CANTSLEEP)
panic("sleep: %s failed insomnia", p->p_p->ps_comm);
if (ident == NULL)
- panic("tsleep: no ident");
+ panic("sleep: no ident");
if (p->p_stat != SONPROC)
- panic("tsleep: not SONPROC");
+ panic("sleep: not SONPROC but %d", p->p_stat);
#endif
/* exiting processes are not allowed to catch signals */
if (p->p_flag & P_WEXIT)
@@ -387,7 +387,7 @@ sleep_finish(int timo, int do_sleep)
* we must be ready for sleep when sleep_signal_check() is
* called.
*/
- if ((error = sleep_signal_check(p)) != 0) {
+ if ((error = sleep_signal_check(p, 0)) != 0) {
catch = 0;
do_sleep = 0;
}
@@ -445,9 +445,12 @@ sleep_finish(int timo, int do_sleep)
atomic_clearbits_int(&p->p_flag, P_TIMEOUT);
}
- /* Check if thread was woken up because of a unwind or signal */
+ /*
+ * Check if thread was woken up because of a unwind or signal
+ * but ignore any pending stop condition.
+ */
if (catch != 0)
- error = sleep_signal_check(p);
+ error = sleep_signal_check(p, 1);
/* Signal errors are higher priority than timeouts. */
if (error == 0 && error1 != 0)
@@ -460,7 +463,7 @@ sleep_finish(int timo, int do_sleep)
* Check and handle signals and suspensions around a sleep cycle.
*/
int
-sleep_signal_check(struct proc *p)
+sleep_signal_check(struct proc *p, int nostop)
{
struct sigctx ctx;
int err, sig;
@@ -468,7 +471,7 @@ sleep_signal_check(struct proc *p)
if ((err = single_thread_check(p, 1)) != 0)
return err;
if ((sig = cursig(p, &ctx, 1)) != 0) {
- if (ctx.sig_stop) {
+ if (!nostop && ctx.sig_stop) {
SCHED_LOCK();
proc_stop(p, 0);
SCHED_UNLOCK();