On Wed, Mar 08, 2017 at 09:00:17PM -0800, Bryan Drewery wrote:
> I'm on r314708.  I hit ^C while running 'kyua test' in /usr/tests/bin/pwait.
> 
> > panic: tdsendsignal: ksi on queue
> > cpuid = 10
> 
> > #10 kdb_enter (why=0xffffffff814488f5 "panic", msg=<optimized out>) at 
> > /usr/src/sys/kern/subr_kdb.c:444
> > #11 0xffffffff80a577f3 in vpanic (fmt=<optimized out>, 
> > ap=0xfffffe35601a3620) at /usr/src/sys/kern/kern_shutdown.c:772
> > #12 0xffffffff80a5764f in _kassert_panic (fatal=1, fmt=0xffffffff81448fd7 
> > "%s: ksi on queue") at /usr/src/sys/kern/kern_shutdown.c:669
> > #13 0xffffffff80a5c843 in tdsendsignal (p=0xfffff80c39389a80, td=0x0, 
> > sig=20, ksi=0xfffff803888a2bd0) at /usr/src/sys/kern/kern_sig.c:2095
> > #14 0xffffffff80a13828 in exit1 (td=<optimized out>, rval=<optimized out>, 
> > signo=<optimized out>) at /usr/src/sys/kern/kern_exit.c:459
> > #15 0xffffffff80a5b28c in sigexit (td=0xfffff802f0bee000, sig=9) at 
> > /usr/src/sys/kern/kern_sig.c:3081
> > #16 0xffffffff80a5b88e in postsig (sig=9) at 
> > /usr/src/sys/kern/kern_sig.c:2992
> > #17 0xffffffff80a5b56b in kern_sigsuspend (td=0xfffff802f0bee000, mask=...) 
> > at /usr/src/sys/kern/kern_sig.c:1515
> > #18 0xffffffff80a5b441 in sys_sigsuspend (td=0xfffff802f0bee000, 
> > uap=<optimized out>) at /usr/src/sys/kern/kern_sig.c:1479
> > #19 0xffffffff80ee04da in syscallenter (td=0xfffff802f0bee000, 
> > sa=<optimized out>) at 
> > /usr/src/sys/amd64/amd64/../../kern/subr_syscall.c:135
> > #20 amd64_syscall (td=0xfffff802f0bee000, traced=0) at 
> > /usr/src/sys/amd64/amd64/trap.c:902
> 
> 
> > (kgdb) frame 18
> > #18 0xffffffff80a5b441 in sys_sigsuspend (td=0xfffff802f0bee000, 
> > uap=<optimized out>) at /usr/src/sys/kern/kern_sig.c:1479
> > 1479            return (kern_sigsuspend(td, mask));
> 
> > (kgdb) p td->td_proc->p_comm
> > $3 = "timeout", '\000' <repeats 12 times>
> 
> > (kgdb) frame 13
> > #13 0xffffffff80a5c843 in tdsendsignal (p=0xfffff80c39389a80, td=0x0, 
> > sig=20, ksi=0xfffff803888a2bd0) at /usr/src/sys/kern/kern_sig.c:2095
> > 2095            KASSERT(ksi == NULL || !KSI_ONQ(ksi), ("%s: ksi on queue", 
> > __func__));
> > (kgdb) p *ksi
> > $4 = {ksi_link = {tqe_next = 0x0, tqe_prev = 0xfffff80c39389c58}, ksi_info 
> > = {si_signo = 20, si_errno = 0, si_code = 2, si_pid = 90903, si_uid = 0, 
> > si_status = 9, si_addr = 0x0, si_value = {
> >       sival_int = 0, sival_ptr = 0x0, sigval_int = 0, sigval_ptr = 0x0}, 
> > _reason = {_fault = {_trapno = 0}, _timer = {_timerid = 0, _overrun = 0}, 
> > _mesgq = {_mqd = 0}, _poll = {_band = 0}, __spare__ = {
> >         __spare1__ = 0, __spare2__ = {0, 0, 0, 0, 0, 0, 0}}}}, ksi_flags = 
> > 6, ksi_sigq = 0xfffff80c39389c28}
> 
> > (kgdb) p *ksi->ksi_sigq
> > $6 = {sq_signals = {__bits = {524288, 0, 0, 0}}, sq_kill = {__bits = {0, 0, 
> > 0, 0}}, sq_ptrace = {__bits = {0, 0, 0, 0}}, sq_list = {tqh_first = 
> > 0xfffff803888a2bd0, tqh_last = 0xfffff803888a2bd0},

Yes, there is a race, apparently, with the child zombie still not finishing
sending the SIGCHLD to the parent and parent exiting.  The following should
fix the issue, but I do not think that reproducing the problem is easy.

diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index c524fe5df37..ba5ff84e9de 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -189,6 +189,7 @@ exit1(struct thread *td, int rval, int signo)
 {
        struct proc *p, *nq, *q, *t;
        struct thread *tdt;
+       ksiginfo_t ksi;
 
        mtx_assert(&Giant, MA_NOTOWNED);
        KASSERT(rval == 0 || signo == 0, ("exit1 rv %d sig %d", rval, signo));
@@ -456,7 +457,12 @@ exit1(struct thread *td, int rval, int signo)
                        proc_reparent(q, q->p_reaper);
                        if (q->p_state == PRS_ZOMBIE) {
                                PROC_LOCK(q->p_reaper);
-                               pksignal(q->p_reaper, SIGCHLD, q->p_ksi);
+                               if (q->p_ksi != NULL) {
+                                       ksiginfo_init(&ksi);
+                                       ksiginfo_copy(q->p_ksi, &ksi);
+                               }
+                               pksignal(q->p_reaper, SIGCHLD, q->p_ksi !=
+                                   NULL ? &ksi : NULL);
                                PROC_UNLOCK(q->p_reaper);
                        }
                } else {
_______________________________________________
freebsd-current@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/freebsd-current
To unsubscribe, send any mail to "freebsd-current-unsubscr...@freebsd.org"

Reply via email to