Module Name:    src
Committed By:   thorpej
Date:           Fri Apr 24 03:22:06 UTC 2020

Modified Files:
        src/sys/compat/linux/common: linux_exec.c linux_sched.c
        src/sys/kern: kern_exec.c kern_exit.c kern_fork.c kern_lwp.c
            kern_proc.c sys_lwp.c
        src/sys/sys: lwp.h proc.h

Log Message:
Overhaul the way LWP IDs are allocated.  Instead of each LWP having it's
own LWP ID space, LWP IDs came from the same number space as PIDs.  The
lead LWP of a process gets the PID as its LID.  If a multi-LWP process's
lead LWP exits, the PID persists for the process.

In addition to providing system-wide unique thread IDs, this also lets us
eliminate the per-process LWP radix tree, and some associated locks.

Remove the separate "global thread ID" map added previously; it is no longer
needed to provide this functionality.

Nudged in this direction by ad@ and chs@.


To generate a diff of this commit:
cvs rdiff -u -r1.121 -r1.122 src/sys/compat/linux/common/linux_exec.c
cvs rdiff -u -r1.74 -r1.75 src/sys/compat/linux/common/linux_sched.c
cvs rdiff -u -r1.498 -r1.499 src/sys/kern/kern_exec.c
cvs rdiff -u -r1.288 -r1.289 src/sys/kern/kern_exit.c
cvs rdiff -u -r1.222 -r1.223 src/sys/kern/kern_fork.c
cvs rdiff -u -r1.234 -r1.235 src/sys/kern/kern_lwp.c
cvs rdiff -u -r1.246 -r1.247 src/sys/kern/kern_proc.c
cvs rdiff -u -r1.78 -r1.79 src/sys/kern/sys_lwp.c
cvs rdiff -u -r1.206 -r1.207 src/sys/sys/lwp.h
cvs rdiff -u -r1.362 -r1.363 src/sys/sys/proc.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/compat/linux/common/linux_exec.c
diff -u src/sys/compat/linux/common/linux_exec.c:1.121 src/sys/compat/linux/common/linux_exec.c:1.122
--- src/sys/compat/linux/common/linux_exec.c:1.121	Sat Feb 15 17:13:55 2020
+++ src/sys/compat/linux/common/linux_exec.c	Fri Apr 24 03:22:06 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: linux_exec.c,v 1.121 2020/02/15 17:13:55 ad Exp $	*/
+/*	$NetBSD: linux_exec.c,v 1.122 2020/04/24 03:22:06 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 1994, 1995, 1998, 2000, 2007, 2008, 2020
@@ -32,7 +32,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: linux_exec.c,v 1.121 2020/02/15 17:13:55 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: linux_exec.c,v 1.122 2020/04/24 03:22:06 thorpej Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -130,8 +130,6 @@ linux_e_proc_exec(struct proc *p, struct
 	}
 
 	KASSERT(p->p_nlwps == 1);
-	l = LIST_FIRST(&p->p_lwps);
-	lwp_renumber(l, p->p_pid);
 }
 
 void
@@ -152,7 +150,6 @@ linux_e_proc_fork(struct proc *p2, struc
 
 	KASSERT(p2->p_nlwps == 1);
 	l2 = LIST_FIRST(&p2->p_lwps);
-	lwp_renumber(l2, p2->p_pid);
 	led1 = l1->l_emuldata;
 	led2 = l2->l_emuldata;
 	led2->led_child_tidptr = led1->led_child_tidptr;

Index: src/sys/compat/linux/common/linux_sched.c
diff -u src/sys/compat/linux/common/linux_sched.c:1.74 src/sys/compat/linux/common/linux_sched.c:1.75
--- src/sys/compat/linux/common/linux_sched.c:1.74	Sun Apr 19 20:31:59 2020
+++ src/sys/compat/linux/common/linux_sched.c	Fri Apr 24 03:22:06 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: linux_sched.c,v 1.74 2020/04/19 20:31:59 thorpej Exp $	*/
+/*	$NetBSD: linux_sched.c,v 1.75 2020/04/24 03:22:06 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2019 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.74 2020/04/19 20:31:59 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.75 2020/04/24 03:22:06 thorpej Exp $");
 
 #include <sys/param.h>
 #include <sys/mount.h>
@@ -195,7 +195,7 @@ linux_clone_nptl(struct lwp *l, const st
 		return ENOMEM;
 	}
 
-	error = lwp_create(l, p, uaddr, LWP_DETACHED | LWP_PIDLID,
+	error = lwp_create(l, p, uaddr, LWP_DETACHED,
 	    SCARG(uap, stack), 0, child_return, NULL, &l2, l->l_class,
 	    &l->l_sigmask, &l->l_sigstk);
 	if (__predict_false(error)) {

Index: src/sys/kern/kern_exec.c
diff -u src/sys/kern/kern_exec.c:1.498 src/sys/kern/kern_exec.c:1.499
--- src/sys/kern/kern_exec.c:1.498	Tue Apr 21 21:42:47 2020
+++ src/sys/kern/kern_exec.c	Fri Apr 24 03:22:06 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_exec.c,v 1.498 2020/04/21 21:42:47 ad Exp $	*/
+/*	$NetBSD: kern_exec.c,v 1.499 2020/04/24 03:22:06 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 2008, 2019, 2020 The NetBSD Foundation, Inc.
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.498 2020/04/21 21:42:47 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.499 2020/04/24 03:22:06 thorpej Exp $");
 
 #include "opt_exec.h"
 #include "opt_execfmt.h"
@@ -1148,10 +1148,6 @@ emulexec(struct lwp *l, struct exec_pack
 	    && p->p_emul != epp->ep_esch->es_emul)
 		(*p->p_emul->e_proc_exit)(p);
 
-	/* This is now LWP 1.  Re-number the LWP if needed. */
-	if (l->l_lid != 1)
-		lwp_renumber(l, 1);
-
 	/*
 	 * Call exec hook. Emulation code may NOT store reference to anything
 	 * from &pack.
@@ -2495,10 +2491,18 @@ do_posix_spawn(struct lwp *l1, pid_t *pi
 	 * Allocate new proc. Borrow proc0 vmspace for it, we will
 	 * replace it with its own before returning to userland
 	 * in the child.
+	 */
+	p2 = proc_alloc();
+	if (p2 == NULL) {
+		/* We were unable to allocate a process ID. */
+		error = EAGAIN;
+		goto error_exit;
+	}
+
+	/*
 	 * This is a point of no return, we will have to go through
 	 * the child proc to properly clean it up past this point.
 	 */
-	p2 = proc_alloc();
 	pid = p2->p_pid;
 
 	/*
@@ -2533,7 +2537,6 @@ do_posix_spawn(struct lwp *l1, pid_t *pi
 	mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
 	mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
 	rw_init(&p2->p_reflock);
-	rw_init(&p2->p_treelock);
 	cv_init(&p2->p_waitcv, "wait");
 	cv_init(&p2->p_lwpcv, "lwpwait");
 

Index: src/sys/kern/kern_exit.c
diff -u src/sys/kern/kern_exit.c:1.288 src/sys/kern/kern_exit.c:1.289
--- src/sys/kern/kern_exit.c:1.288	Sun Apr 19 20:31:59 2020
+++ src/sys/kern/kern_exit.c	Fri Apr 24 03:22:06 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_exit.c,v 1.288 2020/04/19 20:31:59 thorpej Exp $	*/
+/*	$NetBSD: kern_exit.c,v 1.289 2020/04/24 03:22:06 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 1998, 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
@@ -67,7 +67,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.288 2020/04/19 20:31:59 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_exit.c,v 1.289 2020/04/24 03:22:06 thorpej Exp $");
 
 #include "opt_ktrace.h"
 #include "opt_dtrace.h"
@@ -202,7 +202,6 @@ exit1(struct lwp *l, int exitcode, int s
 	ksiginfo_t	ksi;
 	ksiginfoq_t	kq;
 	int		wakeinit;
-	struct lwp	*l2 __diagused;
 
 	p = l->l_proc;
 
@@ -560,14 +559,8 @@ exit1(struct lwp *l, int exitcode, int s
 	pcu_discard_all(l);
 
 	mutex_enter(p->p_lock);
-	/* Don't bother with p_treelock as no other LWPs remain. */
-	l2 = radix_tree_remove_node(&p->p_lwptree, (uint64_t)(l->l_lid - 1));
-	KASSERT(l2 == l);
-	KASSERT(radix_tree_empty_tree_p(&p->p_lwptree));
-	radix_tree_fini_tree(&p->p_lwptree);
-	/* Free the linux lwp id */
-	if ((l->l_pflag & LP_PIDLID) != 0 && l->l_lid != p->p_pid)
-		proc_free_pid(l->l_lid);
+	/* Free the LWP ID */
+	proc_free_lwpid(p, l->l_lid);
 	lwp_drainrefs(l);
 	lwp_lock(l);
 	l->l_prflag &= ~LPR_DETACHED;
@@ -1269,7 +1262,6 @@ proc_free(struct proc *p, struct wrusage
 	cv_destroy(&p->p_waitcv);
 	cv_destroy(&p->p_lwpcv);
 	rw_destroy(&p->p_reflock);
-	rw_destroy(&p->p_treelock);
 
 	proc_free_mem(p);
 }

Index: src/sys/kern/kern_fork.c
diff -u src/sys/kern/kern_fork.c:1.222 src/sys/kern/kern_fork.c:1.223
--- src/sys/kern/kern_fork.c:1.222	Tue Apr 14 22:42:18 2020
+++ src/sys/kern/kern_fork.c	Fri Apr 24 03:22:06 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_fork.c,v 1.222 2020/04/14 22:42:18 kamil Exp $	*/
+/*	$NetBSD: kern_fork.c,v 1.223 2020/04/24 03:22:06 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2001, 2004, 2006, 2007, 2008, 2019
@@ -68,7 +68,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_fork.c,v 1.222 2020/04/14 22:42:18 kamil Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_fork.c,v 1.223 2020/04/24 03:22:06 thorpej Exp $");
 
 #include "opt_ktrace.h"
 #include "opt_dtrace.h"
@@ -305,14 +305,18 @@ fork1(struct lwp *l1, int flags, int exi
 		return ENOMEM;
 	}
 
+	/* Allocate new proc. */
+	p2 = proc_alloc();
+	if (p2 == NULL) {
+		/* We were unable to allocate a process ID. */
+		return EAGAIN;
+	}
+
 	/*
 	 * We are now committed to the fork.  From here on, we may
 	 * block on resources, but resource allocation may NOT fail.
 	 */
 
-	/* Allocate new proc. */
-	p2 = proc_alloc();
-
 	/*
 	 * Make a proc table entry for the new process.
 	 * Start by zeroing the section of proc that is zero-initialized,
@@ -327,7 +331,6 @@ fork1(struct lwp *l1, int flags, int exi
 
 	LIST_INIT(&p2->p_lwps);
 	LIST_INIT(&p2->p_sigwaiters);
-	radix_tree_init_tree(&p2->p_lwptree);
 
 	/*
 	 * Duplicate sub-structures as needed.
@@ -354,7 +357,6 @@ fork1(struct lwp *l1, int flags, int exi
 	mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
 	mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
 	rw_init(&p2->p_reflock);
-	rw_init(&p2->p_treelock);
 	cv_init(&p2->p_waitcv, "wait");
 	cv_init(&p2->p_lwpcv, "lwpwait");
 

Index: src/sys/kern/kern_lwp.c
diff -u src/sys/kern/kern_lwp.c:1.234 src/sys/kern/kern_lwp.c:1.235
--- src/sys/kern/kern_lwp.c:1.234	Sun Apr 19 23:05:04 2020
+++ src/sys/kern/kern_lwp.c	Fri Apr 24 03:22:06 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_lwp.c,v 1.234 2020/04/19 23:05:04 ad Exp $	*/
+/*	$NetBSD: kern_lwp.c,v 1.235 2020/04/24 03:22:06 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2019, 2020
@@ -83,6 +83,16 @@
  *	The LP_RUNNING flag in lwp::l_pflag indicates that an LWP is running.
  *	Importantly, it indicates that its state is tied to a CPU.
  *
+ *	LSLARVAL:
+ *
+ *		Born, but not fully mature: the LWP is in the process
+ *		of being constructed.  This state exists so that the
+ *		LWP can occupy a slot in the PID table, but without
+ *		having to worry about being touched; lookups of the
+ *		LWP will fail while in this state.  The LWP will become
+ *		visible in the PID table once its state transitions
+ *		to LSIDL.
+ *
  *	LSZOMB:
  *
  *		Dead or dying: the LWP has released most of its resources
@@ -120,6 +130,8 @@
  *
  *	LWPs may transition states in the following ways:
  *
+ *	 LARVAL ----> IDL
+ *
  *	 RUN -------> ONPROC		ONPROC -----> RUN
  *		    				    > SLEEP
  *		    				    > STOPPED
@@ -211,7 +223,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.234 2020/04/19 23:05:04 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.235 2020/04/24 03:22:06 thorpej Exp $");
 
 #include "opt_ddb.h"
 #include "opt_lockdebug.h"
@@ -245,7 +257,6 @@ __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v
 #include <sys/psref.h>
 #include <sys/msan.h>
 #include <sys/kcov.h>
-#include <sys/thmap.h>
 #include <sys/cprng.h>
 
 #include <uvm/uvm_extern.h>
@@ -258,59 +269,27 @@ struct lwplist		alllwp		__cacheline_alig
  * Lookups by global thread ID operate outside of the normal LWP
  * locking protocol.
  *
- * We are using a thmap, which internally can perform lookups lock-free.
- * However, we still need to serialize lookups against LWP exit.  We
- * achieve this as follows:
- *
- * => Assignment of TID is performed lazily by the LWP itself, when it
- *    is first requested.  Insertion into the thmap is done completely
- *    lock-free (other than the internal locking performed by thmap itself).
- *    Once the TID is published in the map, the l___tid field in the LWP
- *    is protected by p_lock.
- *
- * => When we look up an LWP in the thmap, we take lwp_threadid_lock as
+ * => When we look up an LWP in the table, we take lwp_threadid_lock as
  *    a READER.  While still holding the lock, we add a reference to
  *    the LWP (using atomics).  After adding the reference, we drop the
  *    lwp_threadid_lock.  We now take p_lock and check the state of the
- *    LWP.  If the LWP is draining its references or if the l___tid field
- *    has been invalidated, we drop the reference we took and return NULL.
- *    Otherwise, the lookup has succeeded and the LWP is returned with a
- *    reference count that the caller is responsible for dropping.
- *
- * => When a LWP is exiting it releases its TID.  While holding the
- *    p_lock, the entry is deleted from the thmap and the l___tid field
- *    invalidated.  Once the field is invalidated, p_lock is released.
- *    It is done in this sequence because the l___tid field is used as
- *    the lookup key storage in the thmap in order to conserve memory.
- *    Even if a lookup races with this process and succeeds only to have
- *    the TID invalidated, it's OK because it also results in a reference
- *    that will be drained later.
- *
- * => Deleting a node also requires GC of now-unused thmap nodes.  The
- *    serialization point between stage_gc and gc is performed by simply
- *    taking the lwp_threadid_lock as a WRITER and immediately releasing
- *    it.  By doing this, we know that any busy readers will have drained.
+ *    LWP.  If the LWP is draining its references, we drop the reference
+ *    we took and return NULL.  Otherwise, the lookup has succeeded and
+ *    the LWP is returned with a reference count that the caller is
+ *    responsible for dropping.
  *
  * => When a LWP is exiting, it also drains off any references being
  *    held by others.  However, the reference in the lookup path is taken
  *    outside the normal locking protocol.  There needs to be additional
  *    serialization so that EITHER lwp_drainrefs() sees the incremented
- *    reference count so that it knows to wait, OR lwp_getref_tid() sees
+ *    reference count so that it knows to wait, OR lwp_getref_lwpid() sees
  *    that the LWP is waiting to drain and thus drops the reference
  *    immediately.  This is achieved by taking lwp_threadid_lock as a
  *    WRITER when setting LPR_DRAINING.  Note the locking order:
  *
  *		p_lock -> lwp_threadid_lock
- *
- * Note that this scheme could easily use pserialize(9) in place of the
- * lwp_threadid_lock rwlock lock.  However, this would require placing a
- * pserialize_perform() call in the LWP exit path, which is arguably more
- * expensive than briefly taking a global lock that should be relatively
- * uncontended.  This issue can be revisited if the rwlock proves to be
- * a performance problem.
  */
 static krwlock_t	lwp_threadid_lock	__cacheline_aligned;
-static thmap_t *	lwp_threadid_map	__read_mostly;
 
 static void		lwp_dtor(void *, void *);
 
@@ -330,7 +309,7 @@ struct lwp lwp0 __aligned(MIN_LWP_ALIGNM
 	.l_md = LWP0_MD_INITIALIZER,
 #endif
 	.l_proc = &proc0,
-	.l_lid = 1,
+	.l_lid = 0,		/* we own proc0's slot in the pid table */
 	.l_flag = LW_SYSTEM,
 	.l_stat = LSONPROC,
 	.l_ts = &turnstile0,
@@ -407,7 +386,6 @@ lwp0_init(void)
 	struct lwp *l = &lwp0;
 
 	KASSERT((void *)uvm_lwp_getuarea(l) != NULL);
-	KASSERT(l->l_lid == proc0.p_nlwpid);
 
 	LIST_INSERT_HEAD(&alllwp, l, l_list);
 
@@ -646,8 +624,7 @@ lwp_wait(struct lwp *l, lwpid_t lid, lwp
 		 * it's not detached.
 		 */
 		if (lid != 0) {
-			l2 = radix_tree_lookup_node(&p->p_lwptree,
-			    (uint64_t)(lid - 1));
+			l2 = proc_find_lwp(p, lid);
 			if (l2 == NULL) {
 				error = ESRCH;
 				break;
@@ -767,8 +744,7 @@ lwp_wait(struct lwp *l, lwpid_t lid, lwp
 	 * so that they can re-check for zombies and for deadlock.
 	 */
 	if (lid != 0) {
-		l2 = radix_tree_lookup_node(&p->p_lwptree,
-		    (uint64_t)(lid - 1));
+		l2 = proc_find_lwp(p, lid);
 		KASSERT(l2 == NULL || l2->l_lid == lid);
 
 		if (l2 != NULL && l2->l_waiter == curlid)
@@ -782,43 +758,6 @@ lwp_wait(struct lwp *l, lwpid_t lid, lwp
 }
 
 /*
- * Find an unused LID for a new LWP.
- */
-static lwpid_t
-lwp_find_free_lid(struct proc *p)
-{
-	struct lwp *gang[32];
-	lwpid_t lid;
-	unsigned n;
-
-	KASSERT(mutex_owned(p->p_lock));
-	KASSERT(p->p_nlwpid > 0);
-
-	/*
-	 * Scoot forward through the tree in blocks of LIDs doing gang
-	 * lookup with dense=true, meaning the lookup will terminate the
-	 * instant a hole is encountered.  Most of the time the first entry
-	 * (p->p_nlwpid) is free and the lookup fails fast.
-	 */
-	for (lid = p->p_nlwpid;;) {
-		n = radix_tree_gang_lookup_node(&p->p_lwptree, lid - 1,
-		    (void **)gang, __arraycount(gang), true);
-		if (n == 0) {
-			/* Start point was empty. */
-			break;
-		}
-		KASSERT(gang[0]->l_lid == lid);
-		lid = gang[n - 1]->l_lid + 1;
-		if (n < __arraycount(gang)) {
-			/* Scan encountered a hole. */
-			break;
-		}
-	}
-
-	return (lwpid_t)lid;
-}
-
-/*
  * Create a new LWP within process 'p2', using LWP 'l1' as a template.
  * The new LWP is created in state LSIDL and must be set running,
  * suspended, or stopped by the caller.
@@ -831,7 +770,6 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_
 {
 	struct lwp *l2;
 	turnstile_t *ts;
-	lwpid_t lid;
 
 	KASSERT(l1 == curlwp || l1->l_proc == &proc0);
 
@@ -876,16 +814,34 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_
 		mutex_exit(p2->p_lock);
 		l2 = pool_cache_get(lwp_cache, PR_WAITOK);
 		memset(l2, 0, sizeof(*l2));
-		l2->l_ts = pool_cache_get(turnstile_cache, PR_WAITOK);
+		ts = l2->l_ts = pool_cache_get(turnstile_cache, PR_WAITOK);
 		SLIST_INIT(&l2->l_pi_lenders);
 	}
 
-	l2->l_stat = LSIDL;
+	l2->l_stat = LSLARVAL;
 	l2->l_proc = p2;
 	l2->l_refcnt = 0;
 	l2->l_class = sclass;
 
 	/*
+	 * Allocate a process ID for this LWP.  We need to do this now
+	 * while we can still unwind if it fails.  Beacuse we're marked
+	 * as LARVAL, no lookups by the ID will succeed.
+	 *
+	 * N.B. this will always succeed for the first LWP in a process,
+	 * because proc_alloc_lwpid() will usurp the slot.  Also note
+	 * that l2->l_proc MUST be valid so that lookups of the proc
+	 * will succeed, even if the LWP itself is not visible.
+	 */
+	if (__predict_false(proc_alloc_lwpid(p2, l2) == -1)) {
+		if (ts != &turnstile0)
+			pool_cache_put(turnstile_cache, ts);
+		l2->l_ts = NULL;
+		pool_cache_put(lwp_cache, l2);
+		return EAGAIN;
+	}
+
+	/*
 	 * If vfork(), we want the LWP to run fast and on the same CPU
 	 * as its parent, so that it can reuse the VM context and cache
 	 * footprint on the local CPU.
@@ -959,55 +915,13 @@ lwp_create(lwp_t *l1, proc_t *p2, vaddr_
 	uvm_lwp_setuarea(l2, uaddr);
 	uvm_lwp_fork(l1, l2, stack, stacksize, func, (arg != NULL) ? arg : l2);
 
-	if ((flags & LWP_PIDLID) != 0) {
-		/* Linux threads: use a PID. */
-		lid = proc_alloc_pid(p2);
-		l2->l_pflag |= LP_PIDLID;
-	} else if (p2->p_nlwps == 0) {
-		/*
-		 * First LWP in process.  Copy the parent's LID to avoid
-		 * causing problems for fork() + threads.  Don't give
-		 * subsequent threads the distinction of using LID 1.
-		 */
-		lid = l1->l_lid;
-		p2->p_nlwpid = 2;
-	} else {
-		/* Scan the radix tree for a free LID. */
-		lid = 0;
-	}
+	mutex_enter(p2->p_lock);
 
 	/*
-	 * Allocate LID if needed, and insert into the radix tree.  The
-	 * first LWP in most processes has a LID of 1.  It turns out that if
-	 * you insert an item with a key of zero to a radixtree, it's stored
-	 * directly in the root (p_lwptree) and no extra memory is
-	 * allocated.  We therefore always subtract 1 from the LID, which
-	 * means no memory is allocated for the tree unless the program is
-	 * using threads.  NB: the allocation and insert must take place
-	 * under the same hold of p_lock.
+	 * This renders l2 visible in the pid table once p2->p_lock is
+	 * released.
 	 */
-	mutex_enter(p2->p_lock);
-	for (;;) {
-		int error;
-
-		l2->l_lid = (lid == 0 ? lwp_find_free_lid(p2) : lid);
-
-		rw_enter(&p2->p_treelock, RW_WRITER);
-		error = radix_tree_insert_node(&p2->p_lwptree,
-		    (uint64_t)(l2->l_lid - 1), l2);
-		rw_exit(&p2->p_treelock);
-
-		if (__predict_true(error == 0)) {
-			if (lid == 0)
-				p2->p_nlwpid = l2->l_lid + 1;
-			break;
-		}
-
-		KASSERT(error == ENOMEM);
-		mutex_exit(p2->p_lock);
-		radix_tree_await_memory();
-		mutex_enter(p2->p_lock);
-	}
+	l2->l_stat = LSIDL;
 
 	if ((flags & LWP_DETACHED) != 0) {
 		l2->l_prflag = LPR_DETACHED;
@@ -1189,8 +1103,8 @@ lwp_exit(struct lwp *l)
 
 	/*
 	 * Perform any required thread cleanup.  Do this early so
-	 * anyone wanting to look us up by our global thread ID
-	 * will fail to find us.
+	 * anyone wanting to look us up with lwp_getref_lwpid() will
+	 * fail to find us before we become a zombie.
 	 *
 	 * N.B. this will unlock p->p_lock on our behalf.
 	 */
@@ -1238,9 +1152,6 @@ lwp_exit(struct lwp *l)
 	}
 
 	LIST_REMOVE(l, l_list);
-	if ((l->l_pflag & LP_PIDLID) != 0 && l->l_lid != p->p_pid) {
-		proc_free_pid(l->l_lid);
-	}
 	mutex_exit(proc_lock);
 
 	/*
@@ -1328,7 +1239,6 @@ lwp_free(struct lwp *l, bool recycle, bo
 {
 	struct proc *p = l->l_proc;
 	struct rusage *ru;
-	struct lwp *l2 __diagused;
 	ksiginfoq_t kq;
 
 	KASSERT(l != curlwp);
@@ -1364,14 +1274,8 @@ lwp_free(struct lwp *l, bool recycle, bo
 		if ((l->l_prflag & LPR_DETACHED) != 0)
 			p->p_ndlwps--;
 
-		/* Make note of the LID being free, and remove from tree. */
-		if (l->l_lid < p->p_nlwpid)
-			p->p_nlwpid = l->l_lid;
-		rw_enter(&p->p_treelock, RW_WRITER);
-		l2 = radix_tree_remove_node(&p->p_lwptree,
-		    (uint64_t)(l->l_lid - 1));
-		KASSERT(l2 == l);
-		rw_exit(&p->p_treelock);
+		/* Free the LWP ID. */
+		proc_free_lwpid(p, l->l_lid);
 
 		/*
 		 * Have any LWPs sleeping in lwp_wait() recheck for
@@ -1550,7 +1454,7 @@ lwp_find(struct proc *p, lwpid_t id)
 
 	KASSERT(mutex_owned(p->p_lock));
 
-	l = radix_tree_lookup_node(&p->p_lwptree, (uint64_t)(id - 1));
+	l = proc_find_lwp(p, id);
 	KASSERT(l == NULL || l->l_lid == id);
 
 	/*
@@ -1761,13 +1665,15 @@ lwp_need_userret(struct lwp *l)
 /*
  * Add one reference to an LWP.  Interlocked against lwp_drainrefs()
  * either by holding the proc's lock or by holding lwp_threadid_lock.
+ * If callers don't hold the proc's lock, then they must check for a
+ * larva after acquiring the reference.  References can't be added to
+ * zombies because references have already been drained off before the
+ * state changes to LSZOMB.
  */
 static void
 lwp_addref2(struct lwp *l)
 {
-
 	KASSERT(l->l_stat != LSZOMB);
-
 	atomic_inc_uint(&l->l_refcnt);
 }
 
@@ -1778,7 +1684,6 @@ lwp_addref2(struct lwp *l)
 void
 lwp_addref(struct lwp *l)
 {
-
 	KASSERT(mutex_owned(l->l_proc->p_lock));
 	lwp_addref2(l);
 }
@@ -1828,11 +1733,11 @@ lwp_drainrefs(struct lwp *l)
 	KASSERT(mutex_owned(p->p_lock));
 
 	/*
-	 * Lookups in the lwp_threadid_map hold lwp_threadid_lock
-	 * as a reader, increase l_refcnt, release it, and then
-	 * acquire p_lock to check for LPR_DRAINING.  By taking
-	 * lwp_threadid_lock as a writer here we ensure that either
-	 * we see the increase in l_refcnt or that they see LPR_DRAINING.
+	 * Lookups by thread ID hold lwp_threadid_lock as a reader,
+	 * increase l_refcnt, release it, and then acquire p_lock to
+	 * check for LPR_DRAINING.  By taking lwp_threadid_lock as a
+	 * writer here we ensure that either we see the increase in
+	 * l_refcnt or that they see LPR_DRAINING.
 	 */
 	rw_enter(&lwp_threadid_lock, RW_WRITER);
 	l->l_prflag |= LPR_DRAINING;
@@ -2125,131 +2030,10 @@ lwp_setprivate(struct lwp *l, void *ptr)
 	return error;
 }
 
-/*
- * Renumber the first and only LWP in a process on exec() or fork().
- * Don't bother with p_treelock here as this is the only live LWP in
- * the proc right now.
- */
-void
-lwp_renumber(lwp_t *l, lwpid_t lid)
-{
-	lwp_t *l2 __diagused;
-	proc_t *p = l->l_proc;
-	int error;
-
-	KASSERT(p->p_nlwps == 1);
-
-	while (l->l_lid != lid) {
-		mutex_enter(p->p_lock);
-		error = radix_tree_insert_node(&p->p_lwptree, lid - 1, l);
-		if (error == 0) {
-			l2 = radix_tree_remove_node(&p->p_lwptree,
-			    (uint64_t)(l->l_lid - 1));
-			KASSERT(l2 == l);
-			p->p_nlwpid = lid + 1;
-			l->l_lid = lid;
-		}
-		mutex_exit(p->p_lock);
-
-		if (error == 0)
-			break;
-
-		KASSERT(error == ENOMEM);
-		radix_tree_await_memory();
-	}
-}
-
-#define	LWP_TID_MASK	0x3fffffff		/* placeholder */
-
 static void
 lwp_threadid_init(void)
 {
 	rw_init(&lwp_threadid_lock);
-	lwp_threadid_map = thmap_create(0, NULL, THMAP_NOCOPY);
-}
-
-static void
-lwp_threadid_alloc(struct lwp * const l)
-{
-
-	KASSERT(l == curlwp);
-	KASSERT(l->l___tid == 0);
-
-	for (;;) {
-		l->l___tid = cprng_fast32() & LWP_TID_MASK;
-		if (l->l___tid != 0 &&
-		    /*
-		     * There is no need to take the lwp_threadid_lock
-		     * while inserting into the map: internally, the
-		     * map is already concurrency-safe, and the lock
-		     * is only needed to serialize removal with respect
-		     * to lookup.
-		     */
-		    thmap_put(lwp_threadid_map,
-			      &l->l___tid, sizeof(l->l___tid), l) == l) {
-			/* claimed! */
-			return;
-		}
-		preempt_point();
-	}
-}
-
-static inline void
-lwp_threadid_gc_serialize(void)
-{
-
-	/*
-	 * By acquiring the lock as a writer, we will know that
-	 * all of the existing readers have drained away and thus
-	 * the GC is safe.
-	 */
-	rw_enter(&lwp_threadid_lock, RW_WRITER);
-	rw_exit(&lwp_threadid_lock);
-}
-
-static void
-lwp_threadid_free(struct lwp * const l)
-{
-
-	KASSERT(l == curlwp);
-	KASSERT(l->l___tid != 0);
-
-	/*
-	 * Ensure that anyone who finds this entry in the lock-free lookup
-	 * path sees that the key has been deleted by serialzing with the
-	 * examination of l___tid.
-	 *
-	 * N.B. l___tid field must be zapped *after* deleting from the map
-	 * because that field is being used as the key storage by thmap.
-	 */
-	KASSERT(mutex_owned(l->l_proc->p_lock));
-	struct lwp * const ldiag __diagused = thmap_del(lwp_threadid_map,
-	    &l->l___tid, sizeof(l->l___tid));
-	l->l___tid = 0;
-	mutex_exit(l->l_proc->p_lock);
-
-	KASSERT(l == ldiag);
-
-	void * const gc_ref = thmap_stage_gc(lwp_threadid_map);
-	lwp_threadid_gc_serialize();
-	thmap_gc(lwp_threadid_map, gc_ref);
-}
-
-/*
- * Return the current LWP's global thread ID.  Only the current LWP
- * should ever use this value, unless it is guaranteed that the LWP
- * is paused (and then it should be accessed directly, rather than
- * by this accessor).
- */
-lwpid_t
-lwp_gettid(void)
-{
-	struct lwp * const l = curlwp;
-
-	if (l->l___tid == 0)
-		lwp_threadid_alloc(l);
-
-	return l->l___tid;
 }
 
 /*
@@ -2259,12 +2043,17 @@ lwp_gettid(void)
  * with lwp_delref().
  */
 struct lwp *
-lwp_getref_tid(lwpid_t tid)
+lwp_getref_lwpid(lwpid_t tid)
 {
-	struct lwp *l, *rv;
+	struct lwp *l;
 
+	/*
+	 * We rely on lwp_thread_cleanup() to hide LWP IDs from us
+	 * to ensure that we cannot add a reference do an exiting
+	 * LWP.
+	 */
 	rw_enter(&lwp_threadid_lock, RW_READER);
-	l = thmap_get(lwp_threadid_map, &tid, sizeof(&tid));
+	l = proc_seek_lwpid(tid);
 	if (__predict_false(l == NULL)) {
 		rw_exit(&lwp_threadid_lock);
 		return NULL;
@@ -2281,17 +2070,16 @@ lwp_getref_tid(lwpid_t tid)
 	/*
 	 * Now verify that our reference is valid.
 	 */
-	mutex_enter(l->l_proc->p_lock);
-	if (__predict_false((l->l_prflag & LPR_DRAINING) != 0 ||
-			    l->l___tid == 0)) {
+	struct proc *p = l->l_proc;
+	mutex_enter(p->p_lock);
+	if (__predict_false(l->l_stat == LSLARVAL ||
+			    (l->l_prflag & LPR_DRAINING) != 0)) {
 		lwp_delref2(l);
-		rv = NULL;
-	} else {
-		rv = l;
+		l = NULL;
 	}
-	mutex_exit(l->l_proc->p_lock);
+	mutex_exit(p->p_lock);
 
-	return rv;
+	return l;
 }
 
 /*
@@ -2303,23 +2091,18 @@ void
 lwp_thread_cleanup(struct lwp *l)
 {
 	KASSERT(l == curlwp);
-	const lwpid_t tid = l->l___tid;
+	const lwpid_t tid = l->l_lid;
 
 	KASSERT(mutex_owned(l->l_proc->p_lock));
 
-	if (__predict_false(tid != 0)) {
-		/*
-		 * Drop our thread ID.  This will also unlock
-		 * our proc.
-		 */
-		lwp_threadid_free(l);
-	} else {
-		/*
-		 * No thread cleanup was required; just unlock
-		 * the proc.
-		 */
-		mutex_exit(l->l_proc->p_lock);
-	}
+	/*
+	 * Hide this LWP from seekers (namely lwp_getref_lwpid())
+	 * to prevent them from attempting to acquire a reference
+	 * on a zombie.
+	 */
+	proc_hide_lwpid(tid);
+
+	mutex_exit(l->l_proc->p_lock);
 }
 
 #if defined(DDB)

Index: src/sys/kern/kern_proc.c
diff -u src/sys/kern/kern_proc.c:1.246 src/sys/kern/kern_proc.c:1.247
--- src/sys/kern/kern_proc.c:1.246	Tue Apr 21 21:42:47 2020
+++ src/sys/kern/kern_proc.c	Fri Apr 24 03:22:06 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: kern_proc.c,v 1.246 2020/04/21 21:42:47 ad Exp $	*/
+/*	$NetBSD: kern_proc.c,v 1.247 2020/04/24 03:22:06 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
@@ -62,7 +62,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.246 2020/04/21 21:42:47 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.247 2020/04/24 03:22:06 thorpej Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_kstack.h"
@@ -117,37 +117,66 @@ __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,
 struct proclist		allproc		__cacheline_aligned;
 struct proclist		zombproc	__cacheline_aligned;
 
-kmutex_t *		proc_lock	__cacheline_aligned;
+static kmutex_t		proc_lock_s	__cacheline_aligned;
+kmutex_t *		proc_lock	__read_mostly;
 
 /*
- * pid to proc lookup is done by indexing the pid_table array.
+ * pid to lwp/proc lookup is done by indexing the pid_table array.
  * Since pid numbers are only allocated when an empty slot
  * has been found, there is no need to search any lists ever.
  * (an orphaned pgrp will lock the slot, a session will lock
  * the pgrp with the same number.)
  * If the table is too small it is reallocated with twice the
  * previous size and the entries 'unzipped' into the two halves.
- * A linked list of free entries is passed through the pt_proc
- * field of 'free' items - set odd to be an invalid ptr.
+ * A linked list of free entries is passed through the pt_lwp
+ * field of 'free' items - set odd to be an invalid ptr.  Two
+ * additional bits are also used to indicate if the slot is
+ * currently occupied by a proc or lwp, and if the PID is
+ * hidden from certain kinds of lookups.  We thus require a
+ * minimum alignment for proc and lwp structures (LWPs are
+ * at least 32-byte aligned).
  */
 
 struct pid_table {
-	struct proc	*pt_proc;
+	uintptr_t	pt_slot;
 	struct pgrp	*pt_pgrp;
 	pid_t		pt_pid;
 };
-#if 1	/* strongly typed cast - should be a noop */
-static inline uint p2u(struct proc *p) { return (uint)(uintptr_t)p; }
-#else
-#define p2u(p) ((uint)p)
-#endif
-#define P_VALID(p) (!(p2u(p) & 1))
-#define P_NEXT(p) (p2u(p) >> 1)
-#define P_FREE(pid) ((struct proc *)(uintptr_t)((pid) << 1 | 1))
+
+#define	PT_F_FREE		__BIT(0)
+#define	PT_F_LWP		0	/* pseudo-flag */
+#define	PT_F_PROC		__BIT(1)
+#define	PT_F_HIDDEN		__BIT(2)
+
+#define	PT_F_TYPEBITS		(PT_F_FREE|PT_F_PROC)
+#define	PT_F_ALLBITS		(PT_F_FREE|PT_F_PROC|PT_F_HIDDEN)
+
+#define	PT_VALID(s)		(((s) & PT_F_FREE) == 0)
+#define	PT_RESERVED(s)		((s) == 0)
+#define	PT_HIDDEN(s)		((s) & PT_F_HIDDEN)
+#define	PT_NEXT(s)		((u_int)(s) >> 1)
+#define	PT_SET_FREE(pid)	(((pid) << 1) | PT_F_FREE)
+#define	PT_SET_HIDDEN(s)	((s) | PT_F_HIDDEN)
+#define	PT_SET_LWP(l)		((uintptr_t)(l))
+#define	PT_SET_PROC(p)		(((uintptr_t)(p)) | PT_F_PROC)
+#define	PT_SET_RESERVED		0
+#define	PT_GET_LWP(s)		((struct lwp *)((s) & ~PT_F_ALLBITS))
+#define	PT_GET_PROC(s)		((struct proc *)((s) & ~PT_F_ALLBITS))
+#define	PT_GET_TYPE(s)		((s) & PT_F_TYPEBITS)
+#define	PT_IS_LWP(s)		(PT_GET_TYPE(s) == PT_F_LWP && (s) != 0)
+#define	PT_IS_PROC(s)		(PT_GET_TYPE(s) == PT_F_PROC)
+
+#define	MIN_PROC_ALIGNMENT	(PT_F_ALLBITS + 1)
 
 /*
  * Table of process IDs (PIDs).
+ *
+ * Locking order:
+ *	proc_lock -> pid_table_lock
+ *  or
+ *	proc::p_lock -> pid_table_lock
  */
+static krwlock_t pid_table_lock		__cacheline_aligned;
 static struct pid_table *pid_table	__read_mostly;
 
 #define	INITIAL_PID_TABLE_SIZE		(1 << 5)
@@ -188,7 +217,6 @@ struct proc proc0 = {
 	.p_sigwaiters = LIST_HEAD_INITIALIZER(&proc0.p_sigwaiters),
 	.p_nlwps = 1,
 	.p_nrlwps = 1,
-	.p_nlwpid = 1,		/* must match lwp0.l_lid */
 	.p_pgrp = &pgrp0,
 	.p_comm = "system",
 	/*
@@ -338,6 +366,8 @@ proc_ctor(void *arg __unused, void *obj,
 	return 0;
 }
 
+static pid_t proc_alloc_pid_slot(struct proc *, uintptr_t);
+
 /*
  * Initialize global process hashing structures.
  */
@@ -351,7 +381,11 @@ procinit(void)
 	for (pd = proclists; pd->pd_list != NULL; pd++)
 		LIST_INIT(pd->pd_list);
 
-	proc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
+	mutex_init(&proc_lock_s, MUTEX_DEFAULT, IPL_NONE);
+	proc_lock = &proc_lock_s;
+
+	rw_init(&pid_table_lock);
+
 	pid_table = kmem_alloc(INITIAL_PID_TABLE_SIZE
 	    * sizeof(struct pid_table), KM_SLEEP);
 	pid_tbl_mask = INITIAL_PID_TABLE_SIZE - 1;
@@ -360,7 +394,7 @@ procinit(void)
 	/* Set free list running through table...
 	   Preset 'use count' above PID_MAX so we allocate pid 1 next. */
 	for (i = 0; i <= pid_tbl_mask; i++) {
-		pid_table[i].pt_proc = P_FREE(LINK_EMPTY + i + 1);
+		pid_table[i].pt_slot = PT_SET_FREE(LINK_EMPTY + i + 1);
 		pid_table[i].pt_pgrp = 0;
 		pid_table[i].pt_pid = 0;
 	}
@@ -368,15 +402,25 @@ procinit(void)
 	next_free_pt = 1;
 	/* Need to fix last entry. */
 	last_free_pt = pid_tbl_mask;
-	pid_table[last_free_pt].pt_proc = P_FREE(LINK_EMPTY);
+	pid_table[last_free_pt].pt_slot = PT_SET_FREE(LINK_EMPTY);
 	/* point at which we grow table - to avoid reusing pids too often */
 	pid_alloc_lim = pid_tbl_mask - 1;
 #undef LINK_EMPTY
 
+	/* Reserve PID 1 for init(8). */	/* XXX slightly gross */
+	rw_enter(&pid_table_lock, RW_WRITER);
+	if (proc_alloc_pid_slot(&proc0, PT_SET_RESERVED) != 1)
+		panic("failed to reserve PID 1 for init(8)");
+	rw_exit(&pid_table_lock);
+
 	proc_specificdata_domain = specificdata_domain_create();
 	KASSERT(proc_specificdata_domain != NULL);
 
-	proc_cache = pool_cache_init(sizeof(struct proc), coherency_unit, 0, 0,
+	size_t proc_alignment = coherency_unit;
+	if (proc_alignment < MIN_PROC_ALIGNMENT)
+		proc_alignment = MIN_PROC_ALIGNMENT;
+
+	proc_cache = pool_cache_init(sizeof(struct proc), proc_alignment, 0, 0,
 	    "procpl", NULL, IPL_NONE, proc_ctor, NULL, NULL);
 
 	proc_listener = kauth_listen_scope(KAUTH_SCOPE_PROCESS,
@@ -440,7 +484,6 @@ proc0_init(void)
 	struct pgrp *pg;
 	struct rlimit *rlim;
 	rlim_t lim;
-	int error __diagused;
 	int i;
 
 	p = &proc0;
@@ -451,20 +494,16 @@ proc0_init(void)
 	p->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
 
 	rw_init(&p->p_reflock);
-	rw_init(&p->p_treelock);
 	cv_init(&p->p_waitcv, "wait");
 	cv_init(&p->p_lwpcv, "lwpwait");
 
 	LIST_INSERT_HEAD(&p->p_lwps, &lwp0, l_sibling);
-	radix_tree_init_tree(&p->p_lwptree);
-	error = radix_tree_insert_node(&p->p_lwptree,
-	    (uint64_t)(lwp0.l_lid - 1), &lwp0);
-	KASSERT(error == 0);
 
-	pid_table[0].pt_proc = p;
+	KASSERT(lwp0.l_lid == 0);
+	pid_table[lwp0.l_lid].pt_slot = PT_SET_LWP(&lwp0);
 	LIST_INSERT_HEAD(&allproc, p, p_list);
 
-	pid_table[0].pt_pgrp = pg;
+	pid_table[lwp0.l_lid].pt_pgrp = pg;
 	LIST_INSERT_HEAD(&pg->pg_members, p, p_pglist);
 
 #ifdef __HAVE_SYSCALL_INTERN
@@ -548,28 +587,41 @@ proc_sesshold(struct session *ss)
 	ss->s_count++;
 }
 
-void
-proc_sessrele(struct session *ss)
+static void
+proc_sessrele_pid_table_write_locked(struct session *ss)
 {
+	struct pgrp *pg;
 
 	KASSERT(mutex_owned(proc_lock));
+	KASSERT(rw_write_held(&pid_table_lock));
 	KASSERT(ss->s_count > 0);
+
 	/*
 	 * We keep the pgrp with the same id as the session in order to
 	 * stop a process being given the same pid.  Since the pgrp holds
 	 * a reference to the session, it must be a 'zombie' pgrp by now.
 	 */
 	if (--ss->s_count == 0) {
-		struct pgrp *pg;
-
 		pg = pg_remove(ss->s_sid);
-		mutex_exit(proc_lock);
+	} else {
+		pg = NULL;
+		ss = NULL;
+	}
+
+	rw_exit(&pid_table_lock);
+	mutex_exit(proc_lock);
 
+	if (pg)
 		kmem_free(pg, sizeof(struct pgrp));
+	if (ss)
 		kmem_free(ss, sizeof(struct session));
-	} else {
-		mutex_exit(proc_lock);
-	}
+}
+
+void
+proc_sessrele(struct session *ss)
+{
+	rw_enter(&pid_table_lock, RW_WRITER);
+	proc_sessrele_pid_table_write_locked(ss);
 }
 
 /*
@@ -623,38 +675,147 @@ p_inferior(struct proc *p, struct proc *
 }
 
 /*
- * proc_find: locate a process by the ID.
+ * proc_find_lwp: locate an lwp in said proc by the ID.
  *
- * => Must be called with proc_lock held.
+ * => Must be called with p::p_lock held.
+ * => LARVAL lwps are not returned because they are only partially
+ *    constructed while occupying the slot.
+ * => Callers need to be careful about lwp::l_stat of the returned
+ *    lwp.
  */
-proc_t *
-proc_find_raw(pid_t pid)
+struct lwp *
+proc_find_lwp(proc_t *p, pid_t pid)
 {
 	struct pid_table *pt;
-	proc_t *p;
+	struct lwp *l = NULL;
+	uintptr_t slot;
+
+	KASSERT(mutex_owned(p->p_lock));
+	rw_enter(&pid_table_lock, RW_READER);
+	pt = &pid_table[pid & pid_tbl_mask];
+
+	slot = pt->pt_slot;
+	if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) {
+		l = PT_GET_LWP(slot);
+		if (__predict_false(l->l_proc != p || l->l_stat == LSLARVAL)) {
+			l = NULL;
+		}
+	}
+	rw_exit(&pid_table_lock);
+
+	return l;
+}
+
+/*
+ * proc_seek_lwpid: locate an lwp by only the ID.
+ *
+ * => This is a specialized interface used for looking up an LWP
+ *    without holding a lock on its owner process.
+ * => Callers of this interface MUST provide a separate synchronization
+ *    mechanism to ensure the validity of the returned LWP.  LARVAL LWPs
+ *    are found there, so callers must check for them!
+ * => Only returns LWPs whose ID has not been hidden from us.
+ */
+struct lwp *
+proc_seek_lwpid(pid_t pid)
+{
+	struct pid_table *pt;
+	struct lwp *l = NULL;
+	uintptr_t slot;
+
+	rw_enter(&pid_table_lock, RW_READER);
+	pt = &pid_table[pid & pid_tbl_mask];
+
+	slot = pt->pt_slot;
+	if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid &&
+			   !PT_HIDDEN(slot))) {
+		l = PT_GET_LWP(slot);
+	}
+	rw_exit(&pid_table_lock);
+
+	return l;
+}
+
+/*
+ * proc_hide_lwpid: hide an lwp ID from seekers.
+ */
+void
+proc_hide_lwpid(pid_t pid)
+{
+	struct pid_table *pt;
+	uintptr_t slot;
+
+	rw_enter(&pid_table_lock, RW_WRITER);
+	pt = &pid_table[pid & pid_tbl_mask];
+
+	slot = pt->pt_slot;
+	KASSERT(PT_IS_LWP(slot));
+	KASSERT(pt->pt_pid == pid);
+	pt->pt_slot = PT_SET_HIDDEN(slot);
+
+	rw_exit(&pid_table_lock);
+}
+
+/*
+ * proc_find_raw_pid_table_locked: locate a process by the ID.
+ *
+ * => Must be called with proc_lock held and the pid_table_lock
+ *    at least held for reading.
+ */
+static proc_t *
+proc_find_raw_pid_table_locked(pid_t pid)
+{
+	struct pid_table *pt;
+	proc_t *p = NULL;
+	uintptr_t slot;
 
 	KASSERT(mutex_owned(proc_lock));
 	pt = &pid_table[pid & pid_tbl_mask];
-	p = pt->pt_proc;
-	if (__predict_false(!P_VALID(p) || pt->pt_pid != pid)) {
-		return NULL;
+
+	slot = pt->pt_slot;
+	if (__predict_true(PT_IS_LWP(slot) && pt->pt_pid == pid)) {
+		/*
+		 * When looking up processes, require a direct match
+		 * on the PID assigned to the proc, not just one of
+		 * its LWPs.
+		 *
+		 * N.B. We require lwp::l_proc of LARVAL LWPs to be
+		 * valid here.
+		 */
+		p = PT_GET_LWP(slot)->l_proc;
+		if (__predict_false(p->p_pid != pid))
+			p = NULL;
+	} else if (PT_IS_PROC(slot) && pt->pt_pid == pid) {
+		p = PT_GET_PROC(slot);
 	}
 	return p;
 }
 
 proc_t *
-proc_find(pid_t pid)
+proc_find_raw(pid_t pid)
+{
+	KASSERT(mutex_owned(proc_lock));
+	rw_enter(&pid_table_lock, RW_READER);
+	proc_t *p = proc_find_raw_pid_table_locked(pid);
+	rw_exit(&pid_table_lock);
+	return p;
+}
+
+static proc_t *
+proc_find_pid_table_locked(pid_t pid)
 {
 	proc_t *p;
 
-	p = proc_find_raw(pid);
+	KASSERT(mutex_owned(proc_lock));
+
+	p = proc_find_raw_pid_table_locked(pid);
 	if (__predict_false(p == NULL)) {
 		return NULL;
 	}
 
 	/*
 	 * Only allow live processes to be found by PID.
-	 * XXX: p_stat might change, since unlocked.
+	 * XXX: p_stat might change, since proc unlocked.
 	 */
 	if (__predict_true(p->p_stat == SACTIVE || p->p_stat == SSTOP)) {
 		return p;
@@ -662,13 +823,24 @@ proc_find(pid_t pid)
 	return NULL;
 }
 
+proc_t *
+proc_find(pid_t pid)
+{
+	KASSERT(mutex_owned(proc_lock));
+	rw_enter(&pid_table_lock, RW_READER);
+	proc_t *p = proc_find_pid_table_locked(pid);
+	rw_exit(&pid_table_lock);
+	return p;
+}
+
 /*
- * pgrp_find: locate a process group by the ID.
+ * pgrp_find_pid_table_locked: locate a process group by the ID.
  *
- * => Must be called with proc_lock held.
+ * => Must be called with proc_lock held and the pid_table_lock
+ *    held at least for reading.
  */
-struct pgrp *
-pgrp_find(pid_t pgid)
+static struct pgrp *
+pgrp_find_pid_table_locked(pid_t pgid)
 {
 	struct pgrp *pg;
 
@@ -686,28 +858,43 @@ pgrp_find(pid_t pgid)
 	return pg;
 }
 
+struct pgrp *
+pgrp_find(pid_t pgid)
+{
+	KASSERT(mutex_owned(proc_lock));
+	rw_enter(&pid_table_lock, RW_READER);
+	struct pgrp *pg = pgrp_find_pid_table_locked(pgid);
+	rw_exit(&pid_table_lock);
+	return pg;
+}
+
 static void
 expand_pid_table(void)
 {
 	size_t pt_size, tsz;
 	struct pid_table *n_pt, *new_pt;
-	struct proc *proc;
+	uintptr_t slot;
 	struct pgrp *pgrp;
 	pid_t pid, rpid;
 	u_int i;
 	uint new_pt_mask;
 
+	KASSERT(rw_write_held(&pid_table_lock));
+
+	/* Unlock the pid_table briefly to allocate memory. */
+	rw_exit(&pid_table_lock);
+
 	pt_size = pid_tbl_mask + 1;
 	tsz = pt_size * 2 * sizeof(struct pid_table);
 	new_pt = kmem_alloc(tsz, KM_SLEEP);
 	new_pt_mask = pt_size * 2 - 1;
 
-	mutex_enter(proc_lock);
+	rw_enter(&pid_table_lock, RW_WRITER);
 	if (pt_size != pid_tbl_mask + 1) {
 		/* Another process beat us to it... */
-		mutex_exit(proc_lock);
+		rw_exit(&pid_table_lock);
 		kmem_free(new_pt, tsz);
-		return;
+		goto out;
 	}
 
 	/*
@@ -724,13 +911,13 @@ expand_pid_table(void)
 	i = pt_size - 1;
 	n_pt = new_pt + i;
 	for (; ; i--, n_pt--) {
-		proc = pid_table[i].pt_proc;
+		slot = pid_table[i].pt_slot;
 		pgrp = pid_table[i].pt_pgrp;
-		if (!P_VALID(proc)) {
+		if (!PT_VALID(slot)) {
 			/* Up 'use count' so that link is valid */
-			pid = (P_NEXT(proc) + pt_size) & ~pt_size;
+			pid = (PT_NEXT(slot) + pt_size) & ~pt_size;
 			rpid = 0;
-			proc = P_FREE(pid);
+			slot = PT_SET_FREE(pid);
 			if (pgrp)
 				pid = pgrp->pg_id;
 		} else {
@@ -739,14 +926,14 @@ expand_pid_table(void)
 		}
 
 		/* Save entry in appropriate half of table */
-		n_pt[pid & pt_size].pt_proc = proc;
+		n_pt[pid & pt_size].pt_slot = slot;
 		n_pt[pid & pt_size].pt_pgrp = pgrp;
 		n_pt[pid & pt_size].pt_pid = rpid;
 
 		/* Put other piece on start of free list */
 		pid = (pid ^ pt_size) & ~pid_tbl_mask;
-		n_pt[pid & pt_size].pt_proc =
-			P_FREE((pid & ~pt_size) | next_free_pt);
+		n_pt[pid & pt_size].pt_slot =
+			PT_SET_FREE((pid & ~pt_size) | next_free_pt);
 		n_pt[pid & pt_size].pt_pgrp = 0;
 		n_pt[pid & pt_size].pt_pid = 0;
 
@@ -771,8 +958,11 @@ expand_pid_table(void)
 	} else
 		pid_alloc_lim <<= 1;	/* doubles number of free slots... */
 
-	mutex_exit(proc_lock);
+	rw_exit(&pid_table_lock);
 	kmem_free(n_pt, tsz);
+
+ out:	/* Return with the pid_table_lock held again. */
+	rw_enter(&pid_table_lock, RW_WRITER);
 }
 
 struct proc *
@@ -784,38 +974,63 @@ proc_alloc(void)
 	p->p_stat = SIDL;			/* protect against others */
 	proc_initspecific(p);
 	kdtrace_proc_ctor(NULL, p);
-	p->p_pid = -1;
-	proc_alloc_pid(p);
+
+	/*
+	 * Allocate a placeholder in the pid_table.  When we create the
+	 * first LWP for this process, it will take ownership of the
+	 * slot.
+	 */
+	if (__predict_false(proc_alloc_pid(p) == -1)) {
+		/* Allocating the PID failed; unwind. */
+		proc_finispecific(p);
+		proc_free_mem(p);
+		p = NULL;
+	}
 	return p;
 }
 
 /*
- * proc_alloc_pid: allocate PID and record the given proc 'p' so that
+ * proc_alloc_pid_slot: allocate PID and record the occcupant so that
  * proc_find_raw() can find it by the PID.
  */
-
-pid_t
-proc_alloc_pid(struct proc *p)
+static pid_t __noinline
+proc_alloc_pid_slot(struct proc *p, uintptr_t slot)
 {
 	struct pid_table *pt;
 	pid_t pid;
 	int nxt;
 
+	KASSERT(rw_write_held(&pid_table_lock));
+
 	for (;;expand_pid_table()) {
-		if (__predict_false(pid_alloc_cnt >= pid_alloc_lim))
+		if (__predict_false(pid_alloc_cnt >= pid_alloc_lim)) {
 			/* ensure pids cycle through 2000+ values */
 			continue;
-		mutex_enter(proc_lock);
+		}
+		/*
+		 * The first user process *must* be given PID 1.
+		 * it has already been reserved for us.  This
+		 * will be coming in from the proc_alloc() call
+		 * above, and the entry will be usurped later when
+		 * the first user LWP is created.
+		 * XXX this is slightly gross.
+		 */
+		if (__predict_false(PT_RESERVED(pid_table[1].pt_slot) &&
+				    p != &proc0)) {
+			KASSERT(PT_IS_PROC(slot));
+			pt = &pid_table[1];
+			pt->pt_slot = slot;
+			return 1;
+		}
 		pt = &pid_table[next_free_pt];
 #ifdef DIAGNOSTIC
-		if (__predict_false(P_VALID(pt->pt_proc) || pt->pt_pgrp))
+		if (__predict_false(PT_VALID(pt->pt_slot) || pt->pt_pgrp))
 			panic("proc_alloc: slot busy");
 #endif
-		nxt = P_NEXT(pt->pt_proc);
+		nxt = PT_NEXT(pt->pt_slot);
 		if (nxt & pid_tbl_mask)
 			break;
 		/* Table full - expand (NB last entry not used....) */
-		mutex_exit(proc_lock);
 	}
 
 	/* pid is 'saved use count' + 'size' + entry */
@@ -825,47 +1040,133 @@ proc_alloc_pid(struct proc *p)
 	next_free_pt = nxt & pid_tbl_mask;
 
 	/* Grab table slot */
-	pt->pt_proc = p;
+	pt->pt_slot = slot;
 
 	KASSERT(pt->pt_pid == 0);
 	pt->pt_pid = pid;
-	if (p->p_pid == -1) {
-		p->p_pid = pid;
-	}
 	pid_alloc_cnt++;
-	mutex_exit(proc_lock);
 
 	return pid;
 }
 
-/*
- * Free a process id - called from proc_free (in kern_exit.c)
- *
- * Called with the proc_lock held.
- */
-void
-proc_free_pid(pid_t pid)
+pid_t
+proc_alloc_pid(struct proc *p)
+{
+	pid_t pid;
+
+	KASSERT((((uintptr_t)p) & PT_F_ALLBITS) == 0);
+
+	rw_enter(&pid_table_lock, RW_WRITER);
+	pid = proc_alloc_pid_slot(p, PT_SET_PROC(p));
+	if (pid != -1)
+		p->p_pid = pid;
+	rw_exit(&pid_table_lock);
+
+	return pid;
+}
+
+pid_t
+proc_alloc_lwpid(struct proc *p, struct lwp *l)
 {
 	struct pid_table *pt;
+	pid_t pid;
 
-	KASSERT(mutex_owned(proc_lock));
+	KASSERT((((uintptr_t)l) & PT_F_ALLBITS) == 0);
 
+	/*
+	 * If the slot for p->p_pid currently points to the proc,
+	 * then we should usurp this ID for the LWP.  This happens
+	 * at least once per process (for the first LWP), and can
+	 * happen again if the first LWP for a process exits and
+	 * before the process creates another.
+	 */
+	rw_enter(&pid_table_lock, RW_WRITER);
+	pid = p->p_pid;
 	pt = &pid_table[pid & pid_tbl_mask];
+	KASSERT(pt->pt_pid == pid);
+	if (PT_IS_PROC(pt->pt_slot)) {
+		KASSERT(PT_GET_PROC(pt->pt_slot) == p);
+		l->l_lid = pid;
+		pt->pt_slot = PT_SET_LWP(l);
+	} else {
+		/* Need to allocate a new slot. */
+		pid = proc_alloc_pid_slot(p, PT_SET_LWP(l));
+		if (pid != -1)
+			l->l_lid = pid;
+	}
+	rw_exit(&pid_table_lock);
 
-	/* save pid use count in slot */
-	pt->pt_proc = P_FREE(pid & ~pid_tbl_mask);
+	return pid;
+}
+
+static void __noinline
+proc_free_pid_internal(pid_t pid, uintptr_t type __diagused)
+{
+	struct pid_table *pt;
+
+	rw_enter(&pid_table_lock, RW_WRITER);
+	pt = &pid_table[pid & pid_tbl_mask];
+
+	KASSERT(PT_GET_TYPE(pt->pt_slot) == type);
 	KASSERT(pt->pt_pid == pid);
+
+	/* save pid use count in slot */
+	pt->pt_slot = PT_SET_FREE(pid & ~pid_tbl_mask);
 	pt->pt_pid = 0;
 
 	if (pt->pt_pgrp == NULL) {
 		/* link last freed entry onto ours */
 		pid &= pid_tbl_mask;
 		pt = &pid_table[last_free_pt];
-		pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pid);
+		pt->pt_slot = PT_SET_FREE(PT_NEXT(pt->pt_slot) | pid);
 		pt->pt_pid = 0;
 		last_free_pt = pid;
 		pid_alloc_cnt--;
 	}
+	rw_exit(&pid_table_lock);
+}
+
+/*
+ * Free a process id - called from proc_free (in kern_exit.c)
+ *
+ * Called with the proc_lock held.
+ */
+void
+proc_free_pid(pid_t pid)
+{
+	KASSERT(mutex_owned(proc_lock));
+	proc_free_pid_internal(pid, PT_F_PROC);
+}
+
+/*
+ * Free a process id used by an LWP.  If this was the process's
+ * first LWP, we convert the slot to point to the process; the
+ * entry will get cleaned up later when the process finishes exiting.
+ *
+ * If not, then it's the same as proc_free_pid().
+ */
+void
+proc_free_lwpid(struct proc *p, pid_t pid)
+{
+
+	KASSERT(mutex_owned(p->p_lock));
+
+	if (__predict_true(p->p_pid == pid)) {
+		struct pid_table *pt;
+
+		rw_enter(&pid_table_lock, RW_WRITER);
+		pt = &pid_table[pid & pid_tbl_mask];
+
+		KASSERT(pt->pt_pid == pid);
+		KASSERT(PT_IS_LWP(pt->pt_slot));
+		KASSERT(PT_GET_LWP(pt->pt_slot)->l_proc == p);
+
+		pt->pt_slot = PT_SET_PROC(p);
+
+		rw_exit(&pid_table_lock);
+		return;
+	}
+	proc_free_pid_internal(pid, PT_F_LWP);
 }
 
 void
@@ -899,13 +1200,16 @@ proc_enterpgrp(struct proc *curp, pid_t 
 	sess = mksess ? kmem_alloc(sizeof(*sess), KM_SLEEP) : NULL;
 
 	/* Allocate data areas we might need before doing any validity checks */
-	mutex_enter(proc_lock);		/* Because pid_table might change */
+	rw_enter(&pid_table_lock, RW_READER);/* Because pid_table might change */
 	if (pid_table[pgid & pid_tbl_mask].pt_pgrp == 0) {
-		mutex_exit(proc_lock);
+		rw_exit(&pid_table_lock);
 		new_pgrp = kmem_alloc(sizeof(*new_pgrp), KM_SLEEP);
-		mutex_enter(proc_lock);
-	} else
+	} else {
+		rw_exit(&pid_table_lock);
 		new_pgrp = NULL;
+	}
+	mutex_enter(proc_lock);
+	rw_enter(&pid_table_lock, RW_WRITER);
 	rval = EPERM;	/* most common error (to save typing) */
 
 	/* Check pgrp exists or can be created */
@@ -916,7 +1220,7 @@ proc_enterpgrp(struct proc *curp, pid_t 
 	/* Can only set another process under restricted circumstances. */
 	if (pid != curp->p_pid) {
 		/* Must exist and be one of our children... */
-		p = proc_find(pid);
+		p = proc_find_pid_table_locked(pid);
 		if (p == NULL || !p_inferior(p, curp)) {
 			rval = ESRCH;
 			goto done;
@@ -935,7 +1239,7 @@ proc_enterpgrp(struct proc *curp, pid_t 
 	} else {
 		/* ... setsid() cannot re-enter a pgrp */
 		if (mksess && (curp->p_pgid == curp->p_pid ||
-		    pgrp_find(curp->p_pid)))
+		    pgrp_find_pid_table_locked(curp->p_pid)))
 			goto done;
 		p = curp;
 	}
@@ -1029,6 +1333,7 @@ proc_enterpgrp(struct proc *curp, pid_t 
 	mutex_spin_exit(&tty_lock);
 
     done:
+	rw_exit(&pid_table_lock);
 	if (pg_id != NO_PGID) {
 		/* Releases proc_lock. */
 		pg_delete(pg_id);
@@ -1085,6 +1390,7 @@ pg_remove(pid_t pg_id)
 	struct pid_table *pt;
 
 	KASSERT(mutex_owned(proc_lock));
+	KASSERT(rw_write_held(&pid_table_lock));
 
 	pt = &pid_table[pg_id & pid_tbl_mask];
 	pgrp = pt->pt_pgrp;
@@ -1095,12 +1401,12 @@ pg_remove(pid_t pg_id)
 
 	pt->pt_pgrp = NULL;
 
-	if (!P_VALID(pt->pt_proc)) {
+	if (!PT_VALID(pt->pt_slot)) {
 		/* Orphaned pgrp, put slot onto free list. */
-		KASSERT((P_NEXT(pt->pt_proc) & pid_tbl_mask) == 0);
+		KASSERT((PT_NEXT(pt->pt_slot) & pid_tbl_mask) == 0);
 		pg_id &= pid_tbl_mask;
 		pt = &pid_table[last_free_pt];
-		pt->pt_proc = P_FREE(P_NEXT(pt->pt_proc) | pg_id);
+		pt->pt_slot = PT_SET_FREE(PT_NEXT(pt->pt_slot) | pg_id);
 		KASSERT(pt->pt_pid == 0);
 		last_free_pt = pg_id;
 		pid_alloc_cnt--;
@@ -1121,8 +1427,10 @@ pg_delete(pid_t pg_id)
 
 	KASSERT(mutex_owned(proc_lock));
 
+	rw_enter(&pid_table_lock, RW_WRITER);
 	pg = pid_table[pg_id & pid_tbl_mask].pt_pgrp;
 	if (pg == NULL || pg->pg_id != pg_id || !LIST_EMPTY(&pg->pg_members)) {
+		rw_exit(&pid_table_lock);
 		mutex_exit(proc_lock);
 		return;
 	}
@@ -1139,14 +1447,15 @@ pg_delete(pid_t pg_id)
 	mutex_spin_exit(&tty_lock);
 
 	/*
-	 * The leading process group in a session is freed by proc_sessrele(),
-	 * if last reference.  Note: proc_sessrele() releases proc_lock.
+	 * The leading process group in a session is freed by
+	 * proc_sessrele_pid_table_write_locked(), if last
+	 * reference.  It will also release the locks.
 	 */
 	pg = (ss->s_sid != pg->pg_id) ? pg_remove(pg_id) : NULL;
-	proc_sessrele(ss);
+	proc_sessrele_pid_table_write_locked(ss);
 
 	if (pg != NULL) {
-		/* Free it, if was not done by proc_sessrele(). */
+		/* Free it, if was not done above. */
 		kmem_free(pg, sizeof(struct pgrp));
 	}
 }
@@ -1241,23 +1550,31 @@ pidtbl_dump(void)
 	struct pid_table *pt;
 	struct proc *p;
 	struct pgrp *pgrp;
+	uintptr_t slot;
 	int id;
 
 	db_printf("pid table %p size %x, next %x, last %x\n",
 		pid_table, pid_tbl_mask+1,
 		next_free_pt, last_free_pt);
 	for (pt = pid_table, id = 0; id <= pid_tbl_mask; id++, pt++) {
-		p = pt->pt_proc;
-		if (!P_VALID(p) && !pt->pt_pgrp)
+		slot = pt->pt_slot;
+		if (!PT_VALID(slot) && !pt->pt_pgrp)
 			continue;
+		if (PT_IS_LWP(slot)) {
+			p = PT_GET_LWP(slot)->l_proc;
+		} else if (PT_IS_PROC(slot)) {
+			p = PT_GET_PROC(slot);
+		} else {
+			p = NULL;
+		}
 		db_printf("  id %x: ", id);
-		if (P_VALID(p))
+		if (p != NULL)
 			db_printf("slotpid %d proc %p id %d (0x%x) %s\n",
 				pt->pt_pid, p, p->p_pid, p->p_pid, p->p_comm);
 		else
 			db_printf("next %x use %x\n",
-				P_NEXT(p) & pid_tbl_mask,
-				P_NEXT(p) & ~pid_tbl_mask);
+				PT_NEXT(slot) & pid_tbl_mask,
+				PT_NEXT(slot) & ~pid_tbl_mask);
 		if ((pgrp = pt->pt_pgrp)) {
 			db_printf("\tsession %p, sid %d, count %d, login %s\n",
 			    pgrp->pg_session, pgrp->pg_session->s_sid,
@@ -2245,7 +2562,6 @@ fill_proc(const struct proc *psrc, struc
 	p->p_nrlwps = psrc->p_nrlwps;
 	p->p_nlwpwait = psrc->p_nlwpwait;
 	p->p_ndlwps = psrc->p_ndlwps;
-	p->p_nlwpid = psrc->p_nlwpid;
 	p->p_nstopchild = psrc->p_nstopchild;
 	p->p_waited = psrc->p_waited;
 	COND_SET_VALUE(p->p_zomblwp, psrc->p_zomblwp, allowaddr);

Index: src/sys/kern/sys_lwp.c
diff -u src/sys/kern/sys_lwp.c:1.78 src/sys/kern/sys_lwp.c:1.79
--- src/sys/kern/sys_lwp.c:1.78	Wed Apr 22 21:22:21 2020
+++ src/sys/kern/sys_lwp.c	Fri Apr 24 03:22:06 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: sys_lwp.c,v 1.78 2020/04/22 21:22:21 thorpej Exp $	*/
+/*	$NetBSD: sys_lwp.c,v 1.79 2020/04/24 03:22:06 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 2001, 2006, 2007, 2008, 2019, 2020 The NetBSD Foundation, Inc.
@@ -35,7 +35,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.78 2020/04/22 21:22:21 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sys_lwp.c,v 1.79 2020/04/24 03:22:06 thorpej Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -415,8 +415,7 @@ sys__lwp_detach(struct lwp *l, const str
 		 * We can't use lwp_find() here because the target might
 		 * be a zombie.
 		 */
-		t = radix_tree_lookup_node(&p->p_lwptree,
-		    (uint64_t)(target - 1));
+		t = proc_find_lwp(p, target);
 		KASSERT(t == NULL || t->l_lid == target);
 	}
 
@@ -458,7 +457,6 @@ sys__lwp_detach(struct lwp *l, const str
 int
 lwp_unpark(const lwpid_t *tp, const u_int ntargets)
 {
-	uint64_t id;
 	u_int target;
 	int error;
 	proc_t *p;
@@ -467,21 +465,40 @@ lwp_unpark(const lwpid_t *tp, const u_in
 	p = curproc;
 	error = 0;
 
-	rw_enter(&p->p_treelock, RW_READER);
+	mutex_enter(p->p_lock);
 	for (target = 0; target < ntargets; target++) {
 		/*
-		 * We don't bother excluding zombies or idle LWPs here, as
+		 * We don't bother excluding idle LWPs here, as
 		 * setting LW_UNPARKED on them won't do any harm.
 		 */
-		id = (uint64_t)(tp[target] - 1);
-		t = radix_tree_lookup_node(&p->p_lwptree, id);
-		if (t == NULL) {
+		t = proc_find_lwp(p, tp[target]);
+		if (__predict_false(t == NULL)) {
 			error = ESRCH;
 			continue;
 		}
 
+		/*
+		 * The locking order is p::p_lock -> l::l_mutex,
+		 * but it may not be unsafe to release p::p_lock
+		 * while l::l_mutex is held because l::l_mutex is
+		 * a scheduler lock and we don't want to get tied
+		 * in knots while unwinding priority inheritance.
+		 * So, get a reference count on the LWP and then
+		 * unlock p::p_lock before acquiring l::l_mutex.
+		 */
+		if (__predict_false(t->l_stat == LSZOMB)) {
+			continue;
+		}
+ 		lwp_addref(t);
+ 		mutex_exit(p->p_lock);
+
+		/*
+		 * Note the LWP cannot become a zombie while we
+		 * hold a reference.
+		 */
+
 		lwp_lock(t);
-		if (t->l_syncobj == &lwp_park_syncobj) {
+		if (__predict_true(t->l_syncobj == &lwp_park_syncobj)) {
 			/*
 			 * As expected it's parked, so wake it up. 
 			 * lwp_unsleep() will release the LWP lock.
@@ -499,8 +516,10 @@ lwp_unpark(const lwpid_t *tp, const u_in
 			t->l_flag |= LW_UNPARKED;
 			lwp_unlock(t);
 		}
+		mutex_enter(p->p_lock);
+		lwp_delref2(t);
 	}
-	rw_exit(&p->p_treelock);
+	mutex_exit(p->p_lock);
 
 	return error;
 }

Index: src/sys/sys/lwp.h
diff -u src/sys/sys/lwp.h:1.206 src/sys/sys/lwp.h:1.207
--- src/sys/sys/lwp.h:1.206	Fri Apr 10 17:16:21 2020
+++ src/sys/sys/lwp.h	Fri Apr 24 03:22:06 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: lwp.h,v 1.206 2020/04/10 17:16:21 ad Exp $	*/
+/*	$NetBSD: lwp.h,v 1.207 2020/04/24 03:22:06 thorpej Exp $	*/
 
 /*
  * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2010, 2019, 2020
@@ -136,22 +136,8 @@ struct lwp {
 	bool		l_vforkwaiting;	/* a: vfork() waiting */
 
 	/* User-space synchronization. */
-	uintptr_t	l___reserved;	/* reserved for future use */
-	/*
-	 * The global thread ID has special locking and access
-	 * considerations.  Because many LWPs may never need one,
-	 * global thread IDs are allocated lazily in lwp_gettid().
-	 * l___tid is not bean to be accessed directly unless
-	 * the accessor has specific knowledge that doing so
-	 * is safe.  l___tid is only assigned by the LWP itself.
-	 * Once assigned, it is stable until the LWP exits.
-	 * An LWP assigns its own thread ID unlocked before it
-	 * reaches visibility to the rest of the system, and
-	 * can access its own thread ID unlocked.  But once
-	 * published, it must hold the proc's lock to change
-	 * the value.
-	 */
-	lwpid_t		l___tid;	/* p: global thread id */
+	uintptr_t	l___rsvd0;	/* reserved for future use */
+	uint32_t	l___rsvd1;	/* reserved for future use */
 
 #if PCU_UNIT_COUNT > 0
 	struct cpu_info	* volatile l_pcu_cpu[PCU_UNIT_COUNT];
@@ -287,7 +273,7 @@ extern int		maxlwp __read_mostly;	/* max
 #define	LP_KTRACTIVE	0x00000001 /* Executing ktrace operation */
 #define	LP_KTRCSW	0x00000002 /* ktrace context switch marker */
 #define	LP_KTRCSWUSER	0x00000004 /* ktrace context switch marker */
-#define	LP_PIDLID	0x00000008 /* free LID from PID space on exit */
+	/* 		0x00000008    was LP_PIDLID */
 #define	LP_OWEUPC	0x00000010 /* Owe user profiling tick */
 #define	LP_MPSAFE	0x00000020 /* Starts life without kernel_lock */
 #define	LP_INTR		0x00000040 /* Soft interrupt handler */
@@ -325,6 +311,7 @@ extern int		maxlwp __read_mostly;	/* max
  *
  * These values are set in stone and must not be reused with future changes.
  */
+#define	LSLARVAL	0	/* in pid table, but partially constructed */
 #define	LSIDL		1	/* Process being created by fork. */
 #define	LSRUN		2	/* Currently runnable. */
 #define	LSSLEEP		3	/* Sleeping on an address. */
@@ -362,13 +349,13 @@ kmutex_t *lwp_setlock(lwp_t *, kmutex_t 
 void	lwp_unlock_to(lwp_t *, kmutex_t *);
 int	lwp_trylock(lwp_t *);
 void	lwp_addref(lwp_t *);
+lwp_t *	lwp_getref_lwpid(lwpid_t);
 void	lwp_delref(lwp_t *);
 void	lwp_delref2(lwp_t *);
 bool	lwp_drainrefs(lwp_t *);
 bool	lwp_alive(lwp_t *);
 lwp_t	*lwp_find_first(proc_t *);
 
-void	lwp_renumber(lwp_t *, lwpid_t);
 int	lwp_wait(lwp_t *, lwpid_t, lwpid_t *, bool);
 void	lwp_continue(lwp_t *);
 void	lwp_unsleep(lwp_t *, bool);
@@ -389,8 +376,6 @@ int	lwp_setprivate(lwp_t *, void *);
 int	do_lwp_create(lwp_t *, void *, u_long, lwp_t **, const sigset_t *,
     const stack_t *);
 
-lwpid_t	lwp_gettid(void);
-lwp_t *	lwp_getref_tid(lwpid_t);
 void	lwp_thread_cleanup(lwp_t *);
 
 void	lwpinit_specificdata(void);
@@ -606,7 +591,7 @@ curlwp_bindx(int bound)
 #define	LWP_SUSPENDED	0x00000080
 
 /* Kernel-internal flags for LWP creation. */
-#define	LWP_PIDLID	0x40000000
+	/*		0x40000000	was LWP_PIDLID */
 #define	LWP_VFORK	0x80000000
 
 #endif	/* !_SYS_LWP_H_ */

Index: src/sys/sys/proc.h
diff -u src/sys/sys/proc.h:1.362 src/sys/sys/proc.h:1.363
--- src/sys/sys/proc.h:1.362	Mon Apr  6 08:20:05 2020
+++ src/sys/sys/proc.h	Fri Apr 24 03:22:06 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: proc.h,v 1.362 2020/04/06 08:20:05 kamil Exp $	*/
+/*	$NetBSD: proc.h,v 1.363 2020/04/24 03:22:06 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
@@ -223,8 +223,6 @@ struct emul {
  * l:	proc_lock
  * t:	p_stmutex
  * p:	p_lock
- * r:	p_treelock (only for use by LWPs in the same proc)
- * p,r:	p_lock + p_treelock to modify, either to inspect
  * (:	updated atomically
  * ::	unlocked, stable
  */
@@ -265,7 +263,6 @@ struct proc {
 	LIST_ENTRY(proc) p_sibling;	/* l: List of sibling processes. */
 	LIST_HEAD(, proc) p_children;	/* l: List of children. */
 	LIST_HEAD(, lwp) p_lwps;	/* p: List of LWPs. */
-	struct radix_tree p_lwptree;	/* p,r: Tree of LWPs. */
 	struct ras	*p_raslist;	/* a: List of RAS entries */
 
 /* The following fields are all zeroed upon creation in fork. */
@@ -276,7 +273,6 @@ struct proc {
 	int		p_nrlwps;	/* p: Number running/sleeping LWPs */
 	int		p_nlwpwait;	/* p: Number of LWPs in lwp_wait1() */
 	int		p_ndlwps;	/* p: Number of detached LWPs */
-	int 		p_nlwpid;	/* p: Next LWP ID */
 	u_int		p_nstopchild;	/* l: Count of stopped/dead children */
 	u_int		p_waited;	/* l: parent has waited on child */
 	struct lwp	*p_zomblwp;	/* p: detached LWP to be reaped */
@@ -350,7 +346,6 @@ struct proc {
 	    __aligned(COHERENCY_UNIT);
 	kmutex_t	p_stmutex;	/* :: mutex on profiling state */
 	krwlock_t	p_reflock;	/* :: lock for debugger, procfs */
-	krwlock_t	p_treelock;	/* :: lock on p_lwptree */
 };
 
 #define	p_rlimit	p_limit->pl_rlimit
@@ -502,8 +497,12 @@ extern struct pool	ptimer_pool;	/* Memor
 int		proc_find_locked(struct lwp *, struct proc **, pid_t);
 proc_t *	proc_find_raw(pid_t);
 proc_t *	proc_find(pid_t);		/* Find process by ID */
+struct lwp *	proc_find_lwp(proc_t *, pid_t);	/* Find LWP in proc by ID */
 struct pgrp *	pgrp_find(pid_t);		/* Find process group by ID */
 
+struct lwp *	proc_seek_lwpid(pid_t);		/* Find LWP by ID only */
+void		proc_hide_lwpid(pid_t);		/* Hide LWP ID from seekers */
+
 void	procinit(void);
 void	procinit_sysctl(void);
 int	proc_enterpgrp(struct proc *, pid_t, pid_t, bool);
@@ -526,6 +525,8 @@ struct proc *proc_alloc(void);
 void	proc0_init(void);
 pid_t	proc_alloc_pid(struct proc *);
 void	proc_free_pid(pid_t);
+pid_t	proc_alloc_lwpid(struct proc *, struct lwp *);
+void	proc_free_lwpid(struct proc *, pid_t);
 void	proc_free_mem(struct proc *);
 void	exit_lwps(struct lwp *l);
 int	fork1(struct lwp *, int, int, void *, size_t,

Reply via email to