Module Name:    src
Committed By:   riz
Date:           Mon Oct  1 23:07:08 UTC 2012

Modified Files:
        src/sys/kern [netbsd-6]: kern_exit.c kern_lwp.c sys_lwp.c
        src/sys/sys [netbsd-6]: lwp.h

Log Message:
Pull up following revision(s) (requested by rmind in ticket #583):
        sys/kern/sys_lwp.c: revision 1.55
        sys/sys/lwp.h: revision 1.164
        sys/kern/kern_exit.c: revision 1.242
        sys/kern/kern_lwp.c: revision 1.173
exit_lwps, lwp_wait: fix a race condition by re-trying if p_lock was dropped
in a case of process exit.  Necessary to re-flag all LWPs for exit, as their
state might have changed or new LWPs spawned.
Should fix PR/46168 and PR/46402.


To generate a diff of this commit:
cvs rdiff -u -r1.236.2.1 -r1.236.2.2 src/sys/kern/kern_exit.c
cvs rdiff -u -r1.166 -r1.166.2.1 src/sys/kern/kern_lwp.c
cvs rdiff -u -r1.52.14.1 -r1.52.14.2 src/sys/kern/sys_lwp.c
cvs rdiff -u -r1.159.2.1 -r1.159.2.2 src/sys/sys/lwp.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/kern_exit.c
diff -u src/sys/kern/kern_exit.c:1.236.2.1 src/sys/kern/kern_exit.c:1.236.2.2
--- src/sys/kern/kern_exit.c:1.236.2.1	Thu Apr 12 17:05:36 2012
+++ src/sys/kern/kern_exit.c	Mon Oct  1 23:07:07 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_exit.c,v 1.236.2.1 2012/04/12 17:05:36 riz Exp $	*/
+/*	$NetBSD: kern_exit.c,v 1.236.2.2 2012/10/01 23:07:07 riz Exp $	*/
 
 /*-
  * Copyright (c) 1998, 1999, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.236.2.1 2012/04/12 17:05:36 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.236.2.2 2012/10/01 23:07:07 riz Exp $");
 
 #include "opt_ktrace.h"
 #include "opt_perfctrs.h"
@@ -598,17 +598,12 @@ exit1(struct lwp *l, int rv)
 void
 exit_lwps(struct lwp *l)
 {
-	struct proc *p;
-	struct lwp *l2;
-	int error;
-	lwpid_t waited;
+	proc_t *p = l->l_proc;
+	lwp_t *l2;
 	int nlocks;
 
 	KERNEL_UNLOCK_ALL(l, &nlocks);
 
-	p = l->l_proc;
-	KASSERT(mutex_owned(p->p_lock));
-
 #ifdef KERN_SA
 	if (p->p_sa != NULL) {
 		struct sadata_vp *vp;
@@ -642,6 +637,8 @@ exit_lwps(struct lwp *l)
 #endif
 
  retry:
+	KASSERT(mutex_owned(p->p_lock));
+
 	/*
 	 * Interrupt LWPs in interruptable sleep, unsuspend suspended
 	 * LWPs and then wait for everyone else to finish.
@@ -656,30 +653,20 @@ exit_lwps(struct lwp *l)
 		    l2->l_stat == LSSUSPENDED || l2->l_stat == LSSTOP) {
 		    	/* setrunnable() will release the lock. */
 			setrunnable(l2);
-			DPRINTF(("exit_lwps: Made %d.%d runnable\n",
-			    p->p_pid, l2->l_lid));
 			continue;
 		}
 		lwp_unlock(l2);
 	}
+
+	/*
+	 * Wait for every LWP to exit.  Note: LWPs can get suspended/slept
+	 * behind us or there may even be new LWPs created.  Therefore, a
+	 * full retry is required on error.
+	 */
 	while (p->p_nlwps > 1) {
-		DPRINTF(("exit_lwps: waiting for %d LWPs (%d zombies)\n",
-		    p->p_nlwps, p->p_nzlwps));
-		error = lwp_wait1(l, 0, &waited, LWPWAIT_EXITCONTROL);
-		if (p->p_nlwps == 1)
-			break;
-		if (error == EDEADLK) {
-			/*
-			 * LWPs can get suspended/slept behind us.
-			 * (eg. sa_setwoken)
-			 * kick them again and retry.
-			 */
+		if (lwp_wait(l, 0, NULL, true)) {
 			goto retry;
 		}
-		if (error)
-			panic("exit_lwps: lwp_wait1 failed with error %d",
-			    error);
-		DPRINTF(("exit_lwps: Got LWP %d from lwp_wait1()\n", waited));
 	}
 
 	KERNEL_LOCK(nlocks, l);

Index: src/sys/kern/kern_lwp.c
diff -u src/sys/kern/kern_lwp.c:1.166 src/sys/kern/kern_lwp.c:1.166.2.1
--- src/sys/kern/kern_lwp.c:1.166	Sat Feb 11 23:16:17 2012
+++ src/sys/kern/kern_lwp.c	Mon Oct  1 23:07:08 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_lwp.c,v 1.166 2012/02/11 23:16:17 martin Exp $	*/
+/*	$NetBSD: kern_lwp.c,v 1.166.2.1 2012/10/01 23:07:08 riz Exp $	*/
 
 /*-
  * Copyright (c) 2001, 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -211,7 +211,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.166 2012/02/11 23:16:17 martin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.166.2.1 2012/10/01 23:07:08 riz Exp $");
 
 #include "opt_ddb.h"
 #include "opt_lockdebug.h"
@@ -489,22 +489,21 @@ lwp_unstop(struct lwp *l)
  * Must be called with p->p_lock held.
  */
 int
-lwp_wait1(struct lwp *l, lwpid_t lid, lwpid_t *departed, int flags)
+lwp_wait(struct lwp *l, lwpid_t lid, lwpid_t *departed, bool exiting)
 {
-	struct proc *p = l->l_proc;
-	struct lwp *l2;
-	int nfound, error;
-	lwpid_t curlid;
-	bool exiting;
+	const lwpid_t curlid = l->l_lid;
+	proc_t *p = l->l_proc;
+	lwp_t *l2;
+	int error;
 
 	KASSERT(mutex_owned(p->p_lock));
 
 	p->p_nlwpwait++;
 	l->l_waitingfor = lid;
-	curlid = l->l_lid;
-	exiting = ((flags & LWPWAIT_EXITCONTROL) != 0);
 
 	for (;;) {
+		int nfound;
+
 		/*
 		 * Avoid a race between exit1() and sigexit(): if the
 		 * process is dumping core, then we need to bail out: call
@@ -514,10 +513,7 @@ lwp_wait1(struct lwp *l, lwpid_t lid, lw
 		if ((p->p_sflag & PS_WCORE) != 0) {
 			mutex_exit(p->p_lock);
 			lwp_userret(l);
-#ifdef DIAGNOSTIC
-			panic("lwp_wait1");
-#endif
-			/* NOTREACHED */
+			KASSERT(false);
 		}
 
 		/*
@@ -607,13 +603,14 @@ lwp_wait1(struct lwp *l, lwpid_t lid, lw
 		}
 
 		/*
-		 * The kernel is careful to ensure that it can not deadlock
-		 * when exiting - just keep waiting.
+		 * Note: since the lock will be dropped, need to restart on
+		 * wakeup to run all LWPs again, e.g. there may be new LWPs.
 		 */
 		if (exiting) {
 			KASSERT(p->p_nlwps > 1);
 			cv_wait(&p->p_lwpcv, p->p_lock);
-			continue;
+			error = EAGAIN;
+			break;
 		}
 
 		/*

Index: src/sys/kern/sys_lwp.c
diff -u src/sys/kern/sys_lwp.c:1.52.14.1 src/sys/kern/sys_lwp.c:1.52.14.2
--- src/sys/kern/sys_lwp.c:1.52.14.1	Mon May 21 15:25:56 2012
+++ src/sys/kern/sys_lwp.c	Mon Oct  1 23:07:07 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: sys_lwp.c,v 1.52.14.1 2012/05/21 15:25:56 riz Exp $	*/
+/*	$NetBSD: sys_lwp.c,v 1.52.14.2 2012/10/01 23:07:07 riz Exp $	*/
 
 /*-
  * Copyright (c) 2001, 2006, 2007, 2008 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.52.14.1 2012/05/21 15:25:56 riz Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.52.14.2 2012/10/01 23:07:07 riz Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -373,19 +373,14 @@ sys__lwp_wait(struct lwp *l, const struc
 	lwpid_t dep;
 
 	mutex_enter(p->p_lock);
-	error = lwp_wait1(l, SCARG(uap, wait_for), &dep, 0);
+	error = lwp_wait(l, SCARG(uap, wait_for), &dep, false);
 	mutex_exit(p->p_lock);
 
-	if (error)
-		return error;
-
-	if (SCARG(uap, departed)) {
+	if (!error && SCARG(uap, departed)) {
 		error = copyout(&dep, SCARG(uap, departed), sizeof(dep));
-		if (error)
-			return error;
 	}
 
-	return 0;
+	return error;
 }
 
 int

Index: src/sys/sys/lwp.h
diff -u src/sys/sys/lwp.h:1.159.2.1 src/sys/sys/lwp.h:1.159.2.2
--- src/sys/sys/lwp.h:1.159.2.1	Mon May 21 15:25:56 2012
+++ src/sys/sys/lwp.h	Mon Oct  1 23:07:08 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: lwp.h,v 1.159.2.1 2012/05/21 15:25:56 riz Exp $	*/
+/*	$NetBSD: lwp.h,v 1.159.2.2 2012/10/01 23:07:08 riz Exp $	*/
 
 /*-
  * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2010
@@ -320,9 +320,7 @@ void	lwp_drainrefs(lwp_t *);
 bool	lwp_alive(lwp_t *);
 lwp_t	*lwp_find_first(proc_t *);
 
-/* Flags for _lwp_wait1 */
-#define LWPWAIT_EXITCONTROL	0x00000001
-int	lwp_wait1(lwp_t *, lwpid_t, lwpid_t *, int);
+int	lwp_wait(lwp_t *, lwpid_t, lwpid_t *, bool);
 void	lwp_continue(lwp_t *);
 void	lwp_unsleep(lwp_t *, bool);
 void	lwp_unstop(lwp_t *);

Reply via email to