Module Name:    src
Committed By:   thorpej
Date:           Mon Dec 24 16:58:54 UTC 2018

Modified Files:
        src/distrib/sets/lists/comp: mi
        src/distrib/sets/lists/tests: mi module.mi
        src/share/man/man9: Makefile
        src/sys/kern: files.kern
        src/sys/rump/librump/rumpkern: Makefile.rumpkern
        src/sys/sys: param.h
        src/tests/kernel: Makefile
Added Files:
        src/share/man/man9: threadpool.9
        src/sys/kern: kern_threadpool.c
        src/sys/sys: threadpool.h
        src/tests/kernel: t_threadpool.sh
        src/tests/kernel/threadpool_tester: Makefile threadpool_tester.c

Log Message:
Add threadpool(9), an abstraction that provides shared pools of kernel
threads running at specific priorities, with support for unbound pools
and per-cpu pools.

Written by riastradh@, and based on the May 2014 draft, with a few changes
by me:
- Working on the assumption that a relative few priorities will actually
  be used, reduce the memory footprint by using linked lists, rather than
  2 large (and mostly empty) tables.  The performance impact is essentially
  nil, since these lists are consulted only when pools are created (and
  destroyed, for DIAGNOSTIC checks), and the lists will have at most 225
  entries.
- Make threadpool job object, which the caller must allocate storage for,
  really opaque.
- Use typedefs for the threadpool types, to reduce the verbosity of the
  API somewhat.
- Fix a bunch of pool / worker thread / job object lifecycle bugs.

Also include an ATF unit test, written by me, that exercises the basics
of the API by loading a kernel module that exposes several sysctls that
allow the ATF test script to create and destroy threadpools, schedule a
basic job, and verify that it ran.

And thus NetBSD 8.99.29 has arrived.


To generate a diff of this commit:
cvs rdiff -u -r1.2244 -r1.2245 src/distrib/sets/lists/comp/mi
cvs rdiff -u -r1.798 -r1.799 src/distrib/sets/lists/tests/mi
cvs rdiff -u -r1.13 -r1.14 src/distrib/sets/lists/tests/module.mi
cvs rdiff -u -r1.433 -r1.434 src/share/man/man9/Makefile
cvs rdiff -u -r0 -r1.1 src/share/man/man9/threadpool.9
cvs rdiff -u -r1.28 -r1.29 src/sys/kern/files.kern
cvs rdiff -u -r0 -r1.1 src/sys/kern/kern_threadpool.c
cvs rdiff -u -r1.172 -r1.173 src/sys/rump/librump/rumpkern/Makefile.rumpkern
cvs rdiff -u -r1.573 -r1.574 src/sys/sys/param.h
cvs rdiff -u -r0 -r1.1 src/sys/sys/threadpool.h
cvs rdiff -u -r1.54 -r1.55 src/tests/kernel/Makefile
cvs rdiff -u -r0 -r1.1 src/tests/kernel/t_threadpool.sh
cvs rdiff -u -r0 -r1.1 src/tests/kernel/threadpool_tester/Makefile \
    src/tests/kernel/threadpool_tester/threadpool_tester.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/distrib/sets/lists/comp/mi
diff -u src/distrib/sets/lists/comp/mi:1.2244 src/distrib/sets/lists/comp/mi:1.2245
--- src/distrib/sets/lists/comp/mi:1.2244	Sat Dec 22 14:39:47 2018
+++ src/distrib/sets/lists/comp/mi	Mon Dec 24 16:58:53 2018
@@ -1,4 +1,4 @@
-#	$NetBSD: mi,v 1.2244 2018/12/22 14:39:47 maxv Exp $
+#	$NetBSD: mi,v 1.2245 2018/12/24 16:58:53 thorpej Exp $
 #
 # Note: don't delete entries from here - mark them as "obsolete" instead.
 ./etc/mtree/set.comp				comp-sys-root
@@ -11825,6 +11825,19 @@
 ./usr/share/man/cat9/tc_syncbus.0		comp-sys-catman		.cat
 ./usr/share/man/cat9/tc_wmb.0			comp-sys-catman		.cat
 ./usr/share/man/cat9/tcp_congctl.0		comp-sys-catman		.cat
+./usr/share/man/cat9/threadpool.0		comp-sys-catman		.cat
+./usr/share/man/cat9/threadpool_get.0		comp-sys-catman		.cat
+./usr/share/man/cat9/threadpool_put.0		comp-sys-catman		.cat
+./usr/share/man/cat9/threadpool_percpu_get.0	comp-sys-catman		.cat
+./usr/share/man/cat9/threadpool_percpu_put.0	comp-sys-catman		.cat
+./usr/share/man/cat9/threadpool_percpu_ref.0	comp-sys-catman		.cat
+./usr/share/man/cat9/threadpool_percpu_ref_remote.0 comp-sys-catman	.cat
+./usr/share/man/cat9/threadpool_job_init.0	comp-sys-catman		.cat
+./usr/share/man/cat9/threadpool_job_destroy.0	comp-sys-catman		.cat
+./usr/share/man/cat9/threadpool_job_done.0	comp-sys-catman		.cat
+./usr/share/man/cat9/threadpool_schedule_job.0	comp-sys-catman		.cat
+./usr/share/man/cat9/threadpool_cancel_job.0	comp-sys-catman		.cat
+./usr/share/man/cat9/threadpool_cancel_job_async.0 comp-sys-catman	.cat
 ./usr/share/man/cat9/tick.0			comp-sys-catman		.cat
 ./usr/share/man/cat9/tickadj.0			comp-sys-catman		.cat
 ./usr/share/man/cat9/time.0			comp-obsolete		obsolete
@@ -19645,6 +19658,19 @@
 ./usr/share/man/html9/tc_syncbus.html		comp-sys-htmlman	html
 ./usr/share/man/html9/tc_wmb.html		comp-sys-htmlman	html
 ./usr/share/man/html9/tcp_congctl.html		comp-sys-htmlman	html
+./usr/share/man/html9/threadpool.html		comp-sys-htmlman	html
+./usr/share/man/html9/threadpool_get.html	comp-sys-htmlman	html
+./usr/share/man/html9/threadpool_put.html	comp-sys-htmlman	html
+./usr/share/man/html9/threadpool_percpu_get.html comp-sys-htmlman	html
+./usr/share/man/html9/threadpool_percpu_put.html comp-sys-htmlman	html
+./usr/share/man/html9/threadpool_percpu_ref.html comp-sys-htmlman	html
+./usr/share/man/html9/threadpool_percpu_ref_remote.html comp-sys-htmlman html
+./usr/share/man/html9/threadpool_job_init.html	comp-sys-htmlman	html
+./usr/share/man/html9/threadpool_job_destroy.html comp-sys-htmlman	html
+./usr/share/man/html9/threadpool_job_done.html	comp-sys-htmlman	html
+./usr/share/man/html9/threadpool_schedule_job.html comp-sys-htmlman	html
+./usr/share/man/html9/threadpool_cancel_job.html comp-sys-htmlman	html
+./usr/share/man/html9/threadpool_cancel_job_async.html comp-sys-htmlman	html
 ./usr/share/man/html9/tick.html			comp-sys-htmlman	html
 ./usr/share/man/html9/tickadj.html		comp-sys-htmlman	html
 ./usr/share/man/html9/time_second.html		comp-sys-htmlman	html
@@ -27665,6 +27691,19 @@
 ./usr/share/man/man9/tc_syncbus.9		comp-sys-man		.man
 ./usr/share/man/man9/tc_wmb.9			comp-sys-man		.man
 ./usr/share/man/man9/tcp_congctl.9		comp-sys-man		.man
+./usr/share/man/man9/threadpool.9		comp-sys-man		.man
+./usr/share/man/man9/threadpool_get.9		comp-sys-man		.man
+./usr/share/man/man9/threadpool_put.9		comp-sys-man		.man
+./usr/share/man/man9/threadpool_percpu_get.9	comp-sys-man		.man
+./usr/share/man/man9/threadpool_percpu_put.9	comp-sys-man		.man
+./usr/share/man/man9/threadpool_percpu_ref.9	comp-sys-man		.man
+./usr/share/man/man9/threadpool_percpu_ref_remote.9 comp-sys-man	.man
+./usr/share/man/man9/threadpool_job_init.9	comp-sys-man		.man
+./usr/share/man/man9/threadpool_job_destroy.9	comp-sys-man		.man
+./usr/share/man/man9/threadpool_job_done.9	comp-sys-man		.man
+./usr/share/man/man9/threadpool_schedule_job.9	comp-sys-man		.man
+./usr/share/man/man9/threadpool_cancel_job.9	comp-sys-man		.man
+./usr/share/man/man9/threadpool_cancel_job_async.9 comp-sys-man		.man
 ./usr/share/man/man9/tick.9			comp-sys-man		.man
 ./usr/share/man/man9/tickadj.9			comp-sys-man		.man
 ./usr/share/man/man9/time.9			comp-obsolete		obsolete

Index: src/distrib/sets/lists/tests/mi
diff -u src/distrib/sets/lists/tests/mi:1.798 src/distrib/sets/lists/tests/mi:1.799
--- src/distrib/sets/lists/tests/mi:1.798	Sun Dec 23 21:27:45 2018
+++ src/distrib/sets/lists/tests/mi	Mon Dec 24 16:58:54 2018
@@ -1,4 +1,4 @@
-# $NetBSD: mi,v 1.798 2018/12/23 21:27:45 jakllsch Exp $
+# $NetBSD: mi,v 1.799 2018/12/24 16:58:54 thorpej Exp $
 #
 # Note: don't delete entries from here - mark them as "obsolete" instead.
 #
@@ -2205,6 +2205,7 @@
 ./usr/tests/kernel/t_subr_prf			tests-kernel-tests	compattestfile,atf
 ./usr/tests/kernel/t_sysctl			tests-kernel-tests	compattestfile,atf
 ./usr/tests/kernel/t_sysv			tests-kernel-tests	compattestfile,atf
+./usr/tests/kernel/t_threadpool			tests-kernel-tests	compattestfile,atf
 ./usr/tests/kernel/t_time			tests-obsolete		obsolete
 ./usr/tests/kernel/t_timeleft			tests-kernel-tests	compattestfile,atf
 ./usr/tests/kernel/t_trapsignal			tests-kernel-tests	compattestfile,atf

Index: src/distrib/sets/lists/tests/module.mi
diff -u src/distrib/sets/lists/tests/module.mi:1.13 src/distrib/sets/lists/tests/module.mi:1.14
--- src/distrib/sets/lists/tests/module.mi:1.13	Mon Jan  8 14:17:15 2018
+++ src/distrib/sets/lists/tests/module.mi	Mon Dec 24 16:58:54 2018
@@ -1,7 +1,9 @@
-# $NetBSD: module.mi,v 1.13 2018/01/08 14:17:15 martin Exp $
+# $NetBSD: module.mi,v 1.14 2018/12/24 16:58:54 thorpej Exp $
 #
 # These are only made for ports doing modules.
 #
+./usr/tests/kernel/threadpool_tester		tests-kernel-tests	compattestfile,atf
+./usr/tests/kernel/threadpool_tester/threadpool_tester.kmod tests-kernel-tests compattestfile,atf
 ./usr/tests/modules/Atffile			tests-sys-tests		atf,rump
 ./usr/tests/modules/Kyuafile			tests-sys-tests		atf,rump,kyua
 ./usr/tests/modules/k_helper			tests-sys-tests		atf,rump

Index: src/share/man/man9/Makefile
diff -u src/share/man/man9/Makefile:1.433 src/share/man/man9/Makefile:1.434
--- src/share/man/man9/Makefile:1.433	Sat Dec 22 14:39:46 2018
+++ src/share/man/man9/Makefile	Mon Dec 24 16:58:54 2018
@@ -1,4 +1,4 @@
-#       $NetBSD: Makefile,v 1.433 2018/12/22 14:39:46 maxv Exp $
+#       $NetBSD: Makefile,v 1.434 2018/12/24 16:58:54 thorpej Exp $
 
 #	Makefile for section 9 (kernel function and variable) manual pages.
 
@@ -55,7 +55,8 @@ MAN=	accept_filter.9 accf_data.9 accf_ht
 	splraiseipl.9 \
 	store.9 suspendsched.9 \
 	sysctl.9 sysmon_envsys.9 sysmon_pswitch.9 sysmon_taskq.9 tc.9 \
-	tcp_congctl.9 timecounter.9 time_second.9 todr.9 ts2timo.9 tvtohz.9 \
+	tcp_congctl.9 threadpool.9 timecounter.9 time_second.9 todr.9 \
+	ts2timo.9 tvtohz.9 \
 	ucas.9 uiomove.9 ucom.9 userret.9 \
 	vattr.9 veriexec.9 vcons.9 vfs.9 vfs_hooks.9 vfsops.9 vfssubr.9 \
 	video.9 vme.9 \
@@ -915,6 +916,18 @@ MLINKS+=tc.9 tc_intr_establish.9 \
 	tc.9 tc_badaddr.9 \
 	tc.9 TC_DENSE_TO_SPARSE.9 \
 	tc.9 TC_PHYS_TO_UNCACHED.9
+MLINKS+=threadpool.9 threadpool_get.9 \
+	threadpool.9 threadpool_put.9 \
+	threadpool.9 threadpool_percpu_get.9 \
+	threadpool.9 threadpool_percpu_put.9 \
+	threadpool.9 threadpool_percpu_ref.9 \
+	threadpool.9 threadpool_percpu_ref_remote.9 \
+	threadpool.9 threadpool_job_init.9 \
+	threadpool.9 threadpool_job_destroy.9 \
+	threadpool.9 threadpool_job_done.9 \
+	threadpool.9 threadpool_schedule_job.9 \
+	threadpool.9 threadpool_cancel_job.9 \
+	threadpool.9 threadpool_cancel_job_async.9
 MLINKS+=todr.9 todr_gettime.9 todr.9 todr_settime.9 \
 	todr.9 clock_ymdhms_to_secs.9 todr.9 clock_secs_to_ymdhms.9
 

Index: src/sys/kern/files.kern
diff -u src/sys/kern/files.kern:1.28 src/sys/kern/files.kern:1.29
--- src/sys/kern/files.kern:1.28	Sun Dec 16 14:06:56 2018
+++ src/sys/kern/files.kern	Mon Dec 24 16:58:54 2018
@@ -1,4 +1,4 @@
-#	$NetBSD: files.kern,v 1.28 2018/12/16 14:06:56 rmind Exp $
+#	$NetBSD: files.kern,v 1.29 2018/12/24 16:58:54 thorpej Exp $
 
 #
 # kernel sources
@@ -87,6 +87,7 @@ file	kern/kern_synch.c		kern
 file	kern/kern_syscall.c		kern
 file	kern/kern_sysctl.c		kern
 file	kern/kern_tc.c			kern
+file	kern/kern_threadpool.c		kern
 file	kern/kern_time.c		kern
 file	kern/kern_timeout.c		kern
 file	kern/kern_turnstile.c		kern

Index: src/sys/rump/librump/rumpkern/Makefile.rumpkern
diff -u src/sys/rump/librump/rumpkern/Makefile.rumpkern:1.172 src/sys/rump/librump/rumpkern/Makefile.rumpkern:1.173
--- src/sys/rump/librump/rumpkern/Makefile.rumpkern:1.172	Sun Dec 16 14:06:56 2018
+++ src/sys/rump/librump/rumpkern/Makefile.rumpkern	Mon Dec 24 16:58:54 2018
@@ -1,4 +1,4 @@
-#	$NetBSD: Makefile.rumpkern,v 1.172 2018/12/16 14:06:56 rmind Exp $
+#	$NetBSD: Makefile.rumpkern,v 1.173 2018/12/24 16:58:54 thorpej Exp $
 #
 
 .include "${RUMPTOP}/Makefile.rump"
@@ -93,6 +93,7 @@ SRCS+=	init_sysctl_base.c	\
 	kern_syscall.c		\
 	kern_sysctl.c		\
 	kern_tc.c		\
+	kern_threadpool.c	\
 	kern_time.c		\
 	kern_timeout.c		\
 	kern_uidinfo.c		\

Index: src/sys/sys/param.h
diff -u src/sys/sys/param.h:1.573 src/sys/sys/param.h:1.574
--- src/sys/sys/param.h:1.573	Fri Dec 21 09:00:32 2018
+++ src/sys/sys/param.h	Mon Dec 24 16:58:54 2018
@@ -1,4 +1,4 @@
-/*	$NetBSD: param.h,v 1.573 2018/12/21 09:00:32 msaitoh Exp $	*/
+/*	$NetBSD: param.h,v 1.574 2018/12/24 16:58:54 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -67,7 +67,7 @@
  *	2.99.9		(299000900)
  */
 
-#define	__NetBSD_Version__	899002800	/* NetBSD 8.99.28 */
+#define	__NetBSD_Version__	899002900	/* NetBSD 8.99.29 */
 
 #define __NetBSD_Prereq__(M,m,p) (((((M) * 100000000) + \
     (m) * 1000000) + (p) * 100) <= __NetBSD_Version__)

Index: src/tests/kernel/Makefile
diff -u src/tests/kernel/Makefile:1.54 src/tests/kernel/Makefile:1.55
--- src/tests/kernel/Makefile:1.54	Sun Nov 11 01:26:08 2018
+++ src/tests/kernel/Makefile	Mon Dec 24 16:58:54 2018
@@ -1,4 +1,4 @@
-# $NetBSD: Makefile,v 1.54 2018/11/11 01:26:08 riastradh Exp $
+# $NetBSD: Makefile,v 1.55 2018/12/24 16:58:54 thorpej Exp $
 
 NOMAN=		# defined
 
@@ -25,6 +25,9 @@ TESTS_SH+=	t_trapsignal
 TESTS_SH+=	t_interp
 TESTS_SH+=	t_procpath
 
+SUBDIR+=	threadpool_tester
+TESTS_SH+=	t_threadpool
+
 BINDIR=		${TESTSDIR}
 PROGS=		h_ps_strings1
 PROGS+=		h_ps_strings2

Added files:

Index: src/share/man/man9/threadpool.9
diff -u /dev/null src/share/man/man9/threadpool.9:1.1
--- /dev/null	Mon Dec 24 16:58:54 2018
+++ src/share/man/man9/threadpool.9	Mon Dec 24 16:58:54 2018
@@ -0,0 +1,336 @@
+.\" $NetBSD: threadpool.9,v 1.1 2018/12/24 16:58:54 thorpej Exp $
+.\"
+.\" Copyright (c) 2014 The NetBSD Foundation, Inc.
+.\" All rights reserved.
+.\"
+.\" This code is derived from software contributed to The NetBSD Foundation
+.\" by Taylor R. Campbell.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd December 24, 2018
+.Dt THREADPOOL 9
+.Os
+.\"
+.Sh NAME
+.Nm threadpool
+.Nd shared pools of kthreads
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SYNOPSIS
+.In sys/threadpool.h
+.\""""""""""""""""""""""""""""""""""""
+.Ft int
+.Fn threadpool_get "threadpool_t **poolp" "pri_t pri"
+.\"
+.Ft void
+.Fn threadpool_put "threadpool_t *pool" "pri_t pri"
+.\""""""""""""""""""""""""""""""""""""
+.Ft int
+.Fn threadpool_percpu_get "threadpool_percpu_t **pool_percpup" "pri_t pri"
+.\"
+.Ft void
+.Fn threadpool_percpu_put "threadpool_percpu_t *pool_percpu" "pri_t pri"
+.\"
+.Ft threadpool_t *
+.Fn threadpool_percpu_ref "threadpool_percpu_t *pool"
+.\"
+.Ft threadpool_t *
+.Fn threadpool_percpu_ref_remote "threadpool_percpu_t *pool" "struct cpu_info *ci"
+.\""""""""""""""""""""""""""""""""""""
+.Ft void
+.Fn threadpool_job_init "threadpool_job_t *job" "void (*fn)(threadpool_job_t *)" "kmutex_t *interlock"
+.\"
+.Ft void
+.Fn threadpool_job_destroy "threadpool_job_t *job"
+.\"
+.Ft void
+.Fn threadpool_job_done "threadpool_job_t *job"
+.\""""""""""""""""""""""""""""""""""""
+.Ft void
+.Fn threadpool_schedule_job "threadpool_t *pool" "threadpool_job_t *job"
+.\"
+.Ft void
+.Fn threadpool_cancel_job "threadpool_t *pool" "threadpool_job_t *job"
+.\"
+.Ft bool
+.Fn threadpool_cancel_job_async "threadpool_t *pool" "threadpool_job_t *job"
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh DESCRIPTION
+The
+.Nm
+abstraction is provided to share a pool of
+.Xr kthread 9
+kernel threads for medium- to long-term actions, called jobs, which can
+be scheduled from contexts that do not allow sleeping.
+.Pp
+For each priority level, there is one unbound thread pool, and one
+collection of per-CPU thread pools.
+Access to the unbound thread pools is provided by
+.Fn threadpool_get
+and
+.Fn threadpool_put .
+Access to the per-CPU thread pools is provided by
+.Fn threadpool_percpu_get
+and
+.Fn threadpool_percpu_put .
+.Pp
+Job state is stored in the
+.Vt threadpool_job_t
+object.
+Callers of the
+.Nm
+abstraction
+must allocate memory for
+.Vt threadpool_job_t
+objects, but should consider them opaque, and should not inspect or
+copy them.
+Each job represented by a
+.Vt threadpool_job_t
+object will be run only once at a time, until the action associated
+with it calls
+.Fn threadpool_job_done .
+.Pp
+Jobs are run in thread context and may take arbitrarily long to run or
+sleep arbitrarily long.
+.\" The
+.\" .Nm
+.\" abstraction is intended as a building block for cheaper abstractions,
+.\" namely
+.\" .Xr task 9
+.\" and
+.\" .Xr workqueue 9 .
+.\" It should generally not be used directly.
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh FUNCTIONS
+.\"
+.Bl -tag -width abcd
+.\""""""""""""""""""""""""""""""""""""
+.It Fn threadpool_get "poolp" "pri"
+Obtain a reference to the unbound thread pool at priority
+.Fa pri
+and store it in
+.Fa poolp .
+.Pp
+May sleep.
+.\"
+.It Fn threadpool_put "pool" "pri"
+Release the reference to the unbound thread pool
+.Fa pool
+at priority
+.Fa pri ,
+which must be the same as the priority that was passed to
+.Fn threadpool_get
+to obtain
+.Fa pool .
+.Pp
+May sleep.
+.Pp
+Do not use
+.Fn threadpool_put
+with thread pools obtained from
+.Fn threadpool_percpu_ref
+or
+.Fn threadpool_percpu_ref_remote .
+.\""""""""""""""""""""""""""""""""""""
+.It Fn threadpool_percpu_get "pool_percpup" "pri"
+Obtain a reference to the per-CPU thread pool at priority
+.Fa pri
+and store it in
+.Fa pool_percpup .
+.Pp
+Use
+.Fn threadpool_percpu_ref
+or
+.Fn threadpool_percpu_ref_remote
+with it to get at the thread pool for a particular CPU.
+.Pp
+May sleep.
+.\"
+.It Fn threadpool_percpu_put "pool_percpu" "pri"
+Release a reference to the per-CPU thread pool
+.Fa pool_percpu
+at priority
+.Fa pri .
+.Pp
+May sleep.
+.\"
+.It Fn threadpool_percpu_ref "pool_percpu"
+Return the thread pool in
+.Fa pool_percpu
+for the current CPU.
+.Pp
+The resulting thread pool pointer is stable until
+.Fa pool_percpu
+is released with
+.Fn threadpool_percpu_put .
+Using it to schedule or cancel a job does not require being on the same
+CPU.
+.Pp
+Do not use
+.Fn threadpool_put
+with thread pools obtained from
+.Fn threadpool_percpu_ref .
+.\"
+.It Fn threadpool_percpu_ref_remote "pool_percpu" "ci"
+Return the thread pool in
+.Fa pool_percpu
+for the CPU whose
+.Vt struct cpu_info
+is given by
+.Fa ci .
+.Pp
+The resulting thread pool pointer is stable until
+.Fa pool_percpu
+is released with
+.Fn threadpool_percpu_put .
+Using it to schedule or cancel a job does not require being on the same
+CPU, but it is faster and friendlier to the cache to use
+.Fn threadpool_percpu_ref
+and use the resulting thread pool only on the same CPU.
+.Pp
+Do not use
+.Fn threadpool_put
+with thread pools obtained from
+.Fn threadpool_percpu_ref_remote .
+.\""""""""""""""""""""""""""""""""""""
+.It Fn threadpool_job_init "job" "fn" "interlock"
+Initialize the threadpool job
+.Fa job
+to run
+.Fa fn
+when scheduled and to interlock with
+.Fa interlock .
+.Pp
+The mutex
+.Fa interlock
+is used to synchronize job scheduling and completion.
+The action
+.Fa fn
+is required to eventually call
+.Fn threadpool_job_done ,
+with
+.Fa interlock
+held.
+This is so that while the job is running and may be waiting for work
+to do, scheduling the job has no effect, but as soon as the job is
+done, scheduling the job will cause it to run again.
+.Pp
+To change the action of a job, you must use
+.Fn threadpool_job_destroy
+first and then call
+.Fn threadpool_job_init
+again.
+.\"
+.It Fn threadpool_job_destroy "job"
+Destroy the threadpool job
+.Fa job .
+.Fa job
+must not currently be scheduled to run.
+If it may still be scheduled, you can use
+.Fn threadpool_cancel_job
+to cancel it.
+However,
+.Fn threadpool_cancel_job_async
+is not enough.
+.\"
+.It Fn threadpool_job_done "job"
+Notify that
+.Fa job
+is done, so that subsequent calls to
+.Fn threadpool_schedule_job
+will cause it to re-run its action.
+.Pp
+.Fn threadpool_job_done
+must be called exactly once by a job's action, and may not be called in
+any other context.
+.\""""""""""""""""""""""""""""""""""""
+.It Fn threadpool_schedule_job "pool" "job"
+Schedule
+.Fa job
+to run in a thread in
+.Fa pool
+as soon as possible, creating a new thread if necessary.
+.Pp
+Caller must hold the interlock of
+.Fa job .
+.Pp
+.Fn threadpool_schedule_job
+may be called in any context, including hard interrupt context, except
+at interrupt priority levels above
+.Vt IPL_VM .
+.\"
+.It Fn threadpool_cancel_job "pool" "job"
+Cancel
+.Fa job
+if it has been scheduled but has not yet been assigned a thread, or
+wait for it to complete if it has.
+.Pp
+Caller must hold the interlock of
+.Fa job ,
+which may be released in order to wait for completion.
+.Pp
+If
+.Fa job
+has not been scheduled,
+.Fn threadpool_cancel_job
+returns immediately.
+If
+.Fa job
+has been scheduled, it must have been scheduled in
+.Fa pool ,
+not in any other thread pool.
+.Pp
+May sleep.
+.\"
+.It Fn threadpool_cancel_job_async "pool" "job"
+Try to cancel
+.Fa job
+like
+.Fn threadpool_cancel_job ,
+but if it is already running, return
+.Vt false
+instead of waiting;
+otherwise, if it was not scheduled, or if it was scheduled and has not
+yet begun to run, return
+.Vt true .
+.Pp
+Caller must hold the interlock of
+.Fa job .
+.Pp
+.Fn threadpool_cancel_job_async
+may be called in any context, including hard interrupt context, except
+at interrupt priority levels above
+.Vt IPL_VM .
+.\""""""""""""""""""""""""""""""""""""
+.El
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh CODE REFERENCES
+The
+.Nm
+abstraction is implemented in
+.Pa sys/kern/kern_threadpool.c .
+.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+.Sh SEE ALSO
+.Xr kthread 9 ,
+.\" .Xr softint 9 ,
+.\" .Xr task 9 ,
+.Xr workqueue 9

Index: src/sys/kern/kern_threadpool.c
diff -u /dev/null src/sys/kern/kern_threadpool.c:1.1
--- /dev/null	Mon Dec 24 16:58:54 2018
+++ src/sys/kern/kern_threadpool.c	Mon Dec 24 16:58:54 2018
@@ -0,0 +1,1085 @@
+/*	$NetBSD: kern_threadpool.c,v 1.1 2018/12/24 16:58:54 thorpej Exp $	*/
+
+/*-
+ * Copyright (c) 2014, 2018 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Taylor R. Campbell and Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Thread pools.
+ *
+ * A thread pool is a collection of worker threads idle or running
+ * jobs, together with an overseer thread that does not run jobs but
+ * can be given jobs to assign to a worker thread.  Scheduling a job in
+ * a thread pool does not allocate or even sleep at all, except perhaps
+ * on an adaptive lock, unlike kthread_create.  Jobs reuse threads, so
+ * they do not incur the expense of creating and destroying kthreads
+ * unless there is not much work to be done.
+ *
+ * A per-CPU thread pool (threadpool_percpu) is a collection of thread
+ * pools, one per CPU bound to that CPU.  For each priority level in
+ * use, there is one shared unbound thread pool (i.e., pool of threads
+ * not bound to any CPU) and one shared per-CPU thread pool.
+ *
+ * To use the unbound thread pool at priority pri, call
+ * threadpool_get(&pool, pri).  When you're done, call
+ * threadpool_put(pool, pri).
+ *
+ * To use the per-CPU thread pools at priority pri, call
+ * threadpool_percpu_get(&pool_percpu, pri), and then use the thread
+ * pool returned by threadpool_percpu_ref(pool_percpu) for the current
+ * CPU, or by threadpool_percpu_ref_remote(pool_percpu, ci) for another
+ * CPU.  When you're done, call threadpool_percpu_put(pool_percpu,
+ * pri).
+ *
+ * +--MACHINE-----------------------------------------------+
+ * | +--CPU 0-------+ +--CPU 1-------+     +--CPU n-------+ |
+ * | | <overseer 0> | | <overseer 1> | ... | <overseer n> | |
+ * | | <idle 0a>    | | <running 1a> | ... | <idle na>    | |
+ * | | <running 0b> | | <running 1b> | ... | <idle nb>    | |
+ * | | .            | | .            | ... | .            | |
+ * | | .            | | .            | ... | .            | |
+ * | | .            | | .            | ... | .            | |
+ * | +--------------+ +--------------+     +--------------+ |
+ * |            +--unbound---------+                        |
+ * |            | <overseer n+1>   |                        |
+ * |            | <idle (n+1)a>    |                        |
+ * |            | <running (n+1)b> |                        |
+ * |            +------------------+                        |
+ * +--------------------------------------------------------+
+ *
+ * XXX Why one overseer per CPU?  I did that originally to avoid
+ * touching remote CPUs' memory when scheduling a job, but that still
+ * requires interprocessor synchronization.  Perhaps we could get by
+ * with a single overseer thread, at the expense of another pointer in
+ * struct threadpool_job_impl to identify the CPU on which it must run
+ * in order for the overseer to schedule it correctly.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: kern_threadpool.c,v 1.1 2018/12/24 16:58:54 thorpej Exp $");
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/atomic.h>
+#include <sys/condvar.h>
+#include <sys/cpu.h>
+#include <sys/kernel.h>
+#include <sys/kmem.h>
+#include <sys/kthread.h>
+#include <sys/mutex.h>
+#include <sys/once.h>
+#include <sys/percpu.h>
+#include <sys/pool.h>
+#include <sys/proc.h>
+#include <sys/queue.h>
+#include <sys/systm.h>
+#include <sys/threadpool.h>
+
+static ONCE_DECL(threadpool_init_once)
+
+#define	THREADPOOL_INIT()					\
+do {								\
+	int threadpool_init_error =				\
+	    RUN_ONCE(&threadpool_init_once, threadpools_init);	\
+	KASSERT(threadpool_init_error == 0);			\
+} while (/*CONSTCOND*/0)
+	
+
+/* Data structures */
+
+TAILQ_HEAD(job_head, threadpool_job_impl);
+TAILQ_HEAD(thread_head, threadpool_thread);
+
+typedef struct threadpool_job_impl {
+	kmutex_t			*job_lock;		/* 1 */
+	struct threadpool_thread	*job_thread;		/* 1 */
+	TAILQ_ENTRY(threadpool_job_impl) job_entry;		/* 2 */
+	volatile unsigned int		job_refcnt;		/* 1 */
+				/* implicit pad on _LP64 */
+	kcondvar_t			job_cv;			/* 3 */
+	threadpool_job_fn_t		job_fn;			/* 1 */
+							    /* ILP32 / LP64 */
+	char				job_name[MAXCOMLEN];	/* 4 / 2 */
+} threadpool_job_impl_t;
+
+CTASSERT(sizeof(threadpool_job_impl_t) <= sizeof(threadpool_job_t));
+#define	THREADPOOL_JOB_TO_IMPL(j)	((threadpool_job_impl_t *)(j))
+#define	THREADPOOL_IMPL_TO_JOB(j)	((threadpool_job_t *)(j))
+
+struct threadpool_thread {
+	struct lwp			*tpt_lwp;
+	threadpool_t			*tpt_pool;
+	threadpool_job_impl_t		*tpt_job;
+	kcondvar_t			tpt_cv;
+	TAILQ_ENTRY(threadpool_thread)	tpt_entry;
+};
+
+struct threadpool {
+	kmutex_t			tp_lock;
+	struct threadpool_thread	tp_overseer;
+	struct job_head			tp_jobs;
+	struct thread_head		tp_idle_threads;
+	unsigned int			tp_refcnt;
+	int				tp_flags;
+#define	THREADPOOL_DYING	0x01
+	struct cpu_info			*tp_cpu;
+	pri_t				tp_pri;
+};
+
+static int	threadpool_hold(threadpool_t *);
+static void	threadpool_rele(threadpool_t *);
+
+static int	threadpool_percpu_create(threadpool_percpu_t **, pri_t);
+static void	threadpool_percpu_destroy(threadpool_percpu_t *);
+
+static void	threadpool_job_dead(threadpool_job_t *);
+
+static int	threadpool_job_hold(threadpool_job_impl_t *);
+static void	threadpool_job_rele(threadpool_job_impl_t *);
+
+static void	threadpool_overseer_thread(void *) __dead;
+static void	threadpool_thread(void *) __dead;
+
+static pool_cache_t	threadpool_thread_pc __read_mostly;
+
+static kmutex_t		threadpools_lock __cacheline_aligned;
+
+	/* Idle out threads after 30 seconds */
+#define	THREADPOOL_IDLE_TICKS	mstohz(30 * 1000)
+
+struct threadpool_unbound {
+	/* must be first; see threadpool_create() */
+	struct threadpool		tpu_pool;
+
+	/* protected by threadpools_lock */
+	LIST_ENTRY(threadpool_unbound)	tpu_link;
+	unsigned int			tpu_refcnt;
+};
+
+static LIST_HEAD(, threadpool_unbound) unbound_threadpools;
+
+static struct threadpool_unbound *
+threadpool_lookup_unbound(pri_t pri)
+{
+	struct threadpool_unbound *tpu;
+
+	LIST_FOREACH(tpu, &unbound_threadpools, tpu_link) {
+		if (tpu->tpu_pool.tp_pri == pri)
+			return tpu;
+	}
+	return NULL;
+}
+
+static void
+threadpool_insert_unbound(struct threadpool_unbound *tpu)
+{
+	KASSERT(threadpool_lookup_unbound(tpu->tpu_pool.tp_pri) == NULL);
+	LIST_INSERT_HEAD(&unbound_threadpools, tpu, tpu_link);
+}
+
+static void
+threadpool_remove_unbound(struct threadpool_unbound *tpu)
+{
+	KASSERT(threadpool_lookup_unbound(tpu->tpu_pool.tp_pri) == tpu);
+	LIST_REMOVE(tpu, tpu_link);
+}
+
+struct threadpool_percpu {
+	percpu_t *			tpp_percpu;
+	pri_t				tpp_pri;
+
+	/* protected by threadpools_lock */
+	LIST_ENTRY(threadpool_percpu)	tpp_link;
+	unsigned int			tpp_refcnt;
+};
+
+static LIST_HEAD(, threadpool_percpu) percpu_threadpools;
+
+static threadpool_percpu_t *
+threadpool_lookup_percpu(pri_t pri)
+{
+	threadpool_percpu_t *tpp;
+
+	LIST_FOREACH(tpp, &percpu_threadpools, tpp_link) {
+		if (tpp->tpp_pri == pri)
+			return tpp;
+	}
+	return NULL;
+}
+
+static void
+threadpool_insert_percpu(threadpool_percpu_t *tpp)
+{
+	KASSERT(threadpool_lookup_percpu(tpp->tpp_pri) == NULL);
+	LIST_INSERT_HEAD(&percpu_threadpools, tpp, tpp_link);
+}
+
+static void
+threadpool_remove_percpu(threadpool_percpu_t *tpp)
+{
+	KASSERT(threadpool_lookup_percpu(tpp->tpp_pri) == tpp);
+	LIST_REMOVE(tpp, tpp_link);
+}
+
+#ifdef THREADPOOL_VERBOSE
+#define	TP_LOG(x)		printf x
+#else
+#define	TP_LOG(x)		/* nothing */
+#endif /* THREADPOOL_VERBOSE */
+
+static int
+threadpools_init(void)
+{
+
+	threadpool_thread_pc =
+	    pool_cache_init(sizeof(struct threadpool_thread), 0, 0, 0,
+		"thplthrd", NULL, IPL_NONE, NULL, NULL, NULL);
+
+	LIST_INIT(&unbound_threadpools);
+	LIST_INIT(&percpu_threadpools);
+	mutex_init(&threadpools_lock, MUTEX_DEFAULT, IPL_NONE);
+
+	TP_LOG(("%s: sizeof(threadpool_job) = %zu\n",
+	    __func__, sizeof(threadpool_job_t)));
+
+	return 0;
+}
+
+/* Thread pool creation */
+
+static bool
+threadpool_pri_is_valid(pri_t pri)
+{
+	return (pri == PRI_NONE || (pri >= PRI_USER && pri < PRI_COUNT));
+}
+
+static int
+threadpool_create(threadpool_t **poolp, struct cpu_info *ci, pri_t pri,
+    size_t size)
+{
+	threadpool_t *const pool = kmem_zalloc(size, KM_SLEEP);
+	struct lwp *lwp;
+	int ktflags;
+	int error;
+
+	KASSERT(threadpool_pri_is_valid(pri));
+
+	mutex_init(&pool->tp_lock, MUTEX_DEFAULT, IPL_VM);
+	/* XXX overseer */
+	TAILQ_INIT(&pool->tp_jobs);
+	TAILQ_INIT(&pool->tp_idle_threads);
+	pool->tp_refcnt = 0;
+	pool->tp_flags = 0;
+	pool->tp_cpu = ci;
+	pool->tp_pri = pri;
+
+	error = threadpool_hold(pool);
+	KASSERT(error == 0);
+	pool->tp_overseer.tpt_lwp = NULL;
+	pool->tp_overseer.tpt_pool = pool;
+	pool->tp_overseer.tpt_job = NULL;
+	cv_init(&pool->tp_overseer.tpt_cv, "poolover");
+
+	ktflags = 0;
+	ktflags |= KTHREAD_MPSAFE;
+	if (pri < PRI_KERNEL)
+		ktflags |= KTHREAD_TS;
+	error = kthread_create(pri, ktflags, ci, &threadpool_overseer_thread,
+	    &pool->tp_overseer, &lwp,
+	    "pooloverseer/%d@%d", (ci ? cpu_index(ci) : -1), (int)pri);
+	if (error)
+		goto fail0;
+
+	mutex_spin_enter(&pool->tp_lock);
+	pool->tp_overseer.tpt_lwp = lwp;
+	cv_broadcast(&pool->tp_overseer.tpt_cv);
+	mutex_spin_exit(&pool->tp_lock);
+
+	*poolp = pool;
+	return 0;
+
+fail0:	KASSERT(error);
+	KASSERT(pool->tp_overseer.tpt_job == NULL);
+	KASSERT(pool->tp_overseer.tpt_pool == pool);
+	KASSERT(pool->tp_flags == 0);
+	KASSERT(pool->tp_refcnt == 0);
+	KASSERT(TAILQ_EMPTY(&pool->tp_idle_threads));
+	KASSERT(TAILQ_EMPTY(&pool->tp_jobs));
+	KASSERT(!cv_has_waiters(&pool->tp_overseer.tpt_cv));
+	cv_destroy(&pool->tp_overseer.tpt_cv);
+	mutex_destroy(&pool->tp_lock);
+	kmem_free(pool, size);
+	return error;
+}
+
+/* Thread pool destruction */
+
+static void
+threadpool_destroy(threadpool_t *pool, size_t size)
+{
+	struct threadpool_thread *thread;
+
+	/* Mark the pool dying and wait for threads to commit suicide.  */
+	mutex_spin_enter(&pool->tp_lock);
+	KASSERT(TAILQ_EMPTY(&pool->tp_jobs));
+	pool->tp_flags |= THREADPOOL_DYING;
+	cv_broadcast(&pool->tp_overseer.tpt_cv);
+	TAILQ_FOREACH(thread, &pool->tp_idle_threads, tpt_entry)
+		cv_broadcast(&thread->tpt_cv);
+	while (0 < pool->tp_refcnt) {
+		TP_LOG(("%s: draining %u references...\n", __func__,
+		    pool->tp_refcnt));
+		cv_wait(&pool->tp_overseer.tpt_cv, &pool->tp_lock);
+	}
+	mutex_spin_exit(&pool->tp_lock);
+
+	KASSERT(pool->tp_overseer.tpt_job == NULL);
+	KASSERT(pool->tp_overseer.tpt_pool == pool);
+	KASSERT(pool->tp_flags == THREADPOOL_DYING);
+	KASSERT(pool->tp_refcnt == 0);
+	KASSERT(TAILQ_EMPTY(&pool->tp_idle_threads));
+	KASSERT(TAILQ_EMPTY(&pool->tp_jobs));
+	KASSERT(!cv_has_waiters(&pool->tp_overseer.tpt_cv));
+	cv_destroy(&pool->tp_overseer.tpt_cv);
+	mutex_destroy(&pool->tp_lock);
+	kmem_free(pool, size);
+}
+
+static int
+threadpool_hold(threadpool_t *pool)
+{
+	unsigned int refcnt;
+
+	do {
+		refcnt = pool->tp_refcnt;
+		if (refcnt == UINT_MAX)
+			return EBUSY;
+	} while (atomic_cas_uint(&pool->tp_refcnt, refcnt, (refcnt + 1))
+	    != refcnt);
+
+	return 0;
+}
+
+static void
+threadpool_rele(threadpool_t *pool)
+{
+	unsigned int refcnt;
+
+	do {
+		refcnt = pool->tp_refcnt;
+		KASSERT(0 < refcnt);
+		if (refcnt == 1) {
+			mutex_spin_enter(&pool->tp_lock);
+			refcnt = atomic_dec_uint_nv(&pool->tp_refcnt);
+			KASSERT(refcnt != UINT_MAX);
+			if (refcnt == 0)
+				cv_broadcast(&pool->tp_overseer.tpt_cv);
+			mutex_spin_exit(&pool->tp_lock);
+			return;
+		}
+	} while (atomic_cas_uint(&pool->tp_refcnt, refcnt, (refcnt - 1))
+	    != refcnt);
+}
+
+/* Unbound thread pools */
+
+int
+threadpool_get(threadpool_t **poolp, pri_t pri)
+{
+	struct threadpool_unbound *tpu, *tmp = NULL;
+	int error;
+
+	THREADPOOL_INIT();
+
+	ASSERT_SLEEPABLE();
+
+	if (! threadpool_pri_is_valid(pri))
+		return EINVAL;
+
+	mutex_enter(&threadpools_lock);
+	tpu = threadpool_lookup_unbound(pri);
+	if (tpu == NULL) {
+		threadpool_t *new_pool;
+		mutex_exit(&threadpools_lock);
+		TP_LOG(("%s: No pool for pri=%d, creating one.\n",
+			__func__, (int)pri));
+		error = threadpool_create(&new_pool, NULL, pri, sizeof(*tpu));
+		if (error)
+			return error;
+		KASSERT(new_pool != NULL);
+		tmp = container_of(new_pool, struct threadpool_unbound,
+		    tpu_pool);
+		mutex_enter(&threadpools_lock);
+		tpu = threadpool_lookup_unbound(pri);
+		if (tpu == NULL) {
+			TP_LOG(("%s: Won the creation race for pri=%d.\n",
+				__func__, (int)pri));
+			tpu = tmp;
+			tmp = NULL;
+			threadpool_insert_unbound(tpu);
+		}
+	}
+	KASSERT(tpu != NULL);
+	if (tpu->tpu_refcnt == UINT_MAX) {
+		mutex_exit(&threadpools_lock);
+		if (tmp != NULL)
+			threadpool_destroy(&tmp->tpu_pool, sizeof(*tpu));
+		return EBUSY;
+	}
+	tpu->tpu_refcnt++;
+	mutex_exit(&threadpools_lock);
+
+	if (tmp != NULL)
+		threadpool_destroy((threadpool_t *)tmp, sizeof(*tpu));
+	KASSERT(tpu != NULL);
+	*poolp = &tpu->tpu_pool;
+	return 0;
+}
+
+void
+threadpool_put(threadpool_t *pool, pri_t pri)
+{
+	struct threadpool_unbound *tpu =
+	    container_of(pool, struct threadpool_unbound, tpu_pool);
+
+	THREADPOOL_INIT();
+
+	ASSERT_SLEEPABLE();
+
+	KASSERT(threadpool_pri_is_valid(pri));
+
+	mutex_enter(&threadpools_lock);
+	KASSERT(tpu == threadpool_lookup_unbound(pri));
+	KASSERT(0 < tpu->tpu_refcnt);
+	if (--tpu->tpu_refcnt == 0) {
+		TP_LOG(("%s: Last reference for pri=%d, destroying pool.\n",
+			__func__, (int)pri));
+		threadpool_remove_unbound(tpu);
+	} else
+		tpu = NULL;
+	mutex_exit(&threadpools_lock);
+
+	if (tpu)
+		threadpool_destroy(pool, sizeof(*tpu));
+}
+
+/* Per-CPU thread pools */
+
+int
+threadpool_percpu_get(threadpool_percpu_t **pool_percpup, pri_t pri)
+{
+	threadpool_percpu_t *pool_percpu, *tmp = NULL;
+	int error;
+
+	THREADPOOL_INIT();
+
+	ASSERT_SLEEPABLE();
+
+	if (! threadpool_pri_is_valid(pri))
+		return EINVAL;
+
+	mutex_enter(&threadpools_lock);
+	pool_percpu = threadpool_lookup_percpu(pri);
+	if (pool_percpu == NULL) {
+		mutex_exit(&threadpools_lock);
+		TP_LOG(("%s: No pool for pri=%d, creating one.\n",
+			__func__, (int)pri));
+		error = threadpool_percpu_create(&tmp, pri);
+		if (error)
+			return error;
+		KASSERT(tmp != NULL);
+		mutex_enter(&threadpools_lock);
+		pool_percpu = threadpool_lookup_percpu(pri);
+		if (pool_percpu == NULL) {
+			TP_LOG(("%s: Won the creation race for pri=%d.\n",
+				__func__, (int)pri));
+			pool_percpu = tmp;
+			tmp = NULL;
+			threadpool_insert_percpu(pool_percpu);
+		}
+	}
+	KASSERT(pool_percpu != NULL);
+	if (pool_percpu->tpp_refcnt == UINT_MAX) {
+		mutex_exit(&threadpools_lock);
+		if (tmp != NULL)
+			threadpool_percpu_destroy(tmp);
+		return EBUSY;
+	}
+	pool_percpu->tpp_refcnt++;
+	mutex_exit(&threadpools_lock);
+
+	if (tmp != NULL)
+		threadpool_percpu_destroy(tmp);
+	KASSERT(pool_percpu != NULL);
+	*pool_percpup = pool_percpu;
+	return 0;
+}
+
+void
+threadpool_percpu_put(threadpool_percpu_t *pool_percpu, pri_t pri)
+{
+
+	THREADPOOL_INIT();
+
+	ASSERT_SLEEPABLE();
+
+	KASSERT(threadpool_pri_is_valid(pri));
+
+	mutex_enter(&threadpools_lock);
+	KASSERT(pool_percpu == threadpool_lookup_percpu(pri));
+	KASSERT(0 < pool_percpu->tpp_refcnt);
+	if (--pool_percpu->tpp_refcnt == 0) {
+		TP_LOG(("%s: Last reference for pri=%d, destroying pool.\n",
+			__func__, (int)pri));
+		threadpool_remove_percpu(pool_percpu);
+	} else
+		pool_percpu = NULL;
+	mutex_exit(&threadpools_lock);
+
+	if (pool_percpu)
+		threadpool_percpu_destroy(pool_percpu);
+}
+
+threadpool_t *
+threadpool_percpu_ref(threadpool_percpu_t *pool_percpu)
+{
+	threadpool_t **poolp, *pool;
+
+	poolp = percpu_getref(pool_percpu->tpp_percpu);
+	pool = *poolp;
+	percpu_putref(pool_percpu->tpp_percpu);
+
+	return pool;
+}
+
+threadpool_t *
+threadpool_percpu_ref_remote(threadpool_percpu_t *pool_percpu,
+    struct cpu_info *ci)
+{
+	threadpool_t **poolp, *pool;
+
+	percpu_traverse_enter();
+	poolp = percpu_getptr_remote(pool_percpu->tpp_percpu, ci);
+	pool = *poolp;
+	percpu_traverse_exit();
+
+	return pool;
+}
+
+static int
+threadpool_percpu_create(threadpool_percpu_t **pool_percpup, pri_t pri)
+{
+	threadpool_percpu_t *pool_percpu;
+	struct cpu_info *ci;
+	CPU_INFO_ITERATOR cii;
+	unsigned int i, j;
+	int error;
+
+	pool_percpu = kmem_zalloc(sizeof(*pool_percpu), KM_SLEEP);
+	if (pool_percpu == NULL) {
+		error = ENOMEM;
+		goto fail0;
+	}
+	pool_percpu->tpp_pri = pri;
+
+	pool_percpu->tpp_percpu = percpu_alloc(sizeof(threadpool_t *));
+	if (pool_percpu->tpp_percpu == NULL) {
+		error = ENOMEM;
+		goto fail1;
+	}
+
+	for (i = 0, CPU_INFO_FOREACH(cii, ci), i++) {
+		threadpool_t *pool;
+
+		error = threadpool_create(&pool, ci, pri, sizeof(*pool));
+		if (error)
+			goto fail2;
+		percpu_traverse_enter();
+		threadpool_t **const poolp =
+		    percpu_getptr_remote(pool_percpu->tpp_percpu, ci);
+		*poolp = pool;
+		percpu_traverse_exit();
+	}
+
+	/* Success!  */
+	*pool_percpup = (threadpool_percpu_t *)pool_percpu;
+	return 0;
+
+fail2:	for (j = 0, CPU_INFO_FOREACH(cii, ci), j++) {
+		if (i <= j)
+			break;
+		percpu_traverse_enter();
+		threadpool_t **const poolp =
+		    percpu_getptr_remote(pool_percpu->tpp_percpu, ci);
+		threadpool_t *const pool = *poolp;
+		percpu_traverse_exit();
+		threadpool_destroy(pool, sizeof(*pool));
+	}
+	percpu_free(pool_percpu->tpp_percpu, sizeof(struct taskthread_pool *));
+fail1:	kmem_free(pool_percpu, sizeof(*pool_percpu));
+fail0:	return error;
+}
+
+static void
+threadpool_percpu_destroy(threadpool_percpu_t *pool_percpu)
+{
+	struct cpu_info *ci;
+	CPU_INFO_ITERATOR cii;
+
+	for (CPU_INFO_FOREACH(cii, ci)) {
+		percpu_traverse_enter();
+		threadpool_t **const poolp =
+		    percpu_getptr_remote(pool_percpu->tpp_percpu, ci);
+		threadpool_t *const pool = *poolp;
+		percpu_traverse_exit();
+		threadpool_destroy(pool, sizeof(*pool));
+	}
+
+	percpu_free(pool_percpu->tpp_percpu, sizeof(threadpool_t *));
+	kmem_free(pool_percpu, sizeof(*pool_percpu));
+}
+
+/* Thread pool jobs */
+
+void __printflike(4,5)
+threadpool_job_init(threadpool_job_t *ext_job, threadpool_job_fn_t fn,
+    kmutex_t *lock, const char *fmt, ...)
+{
+	threadpool_job_impl_t *job = THREADPOOL_JOB_TO_IMPL(ext_job);
+	va_list ap;
+
+	va_start(ap, fmt);
+	(void)vsnprintf(job->job_name, sizeof(job->job_name), fmt, ap);
+	va_end(ap);
+
+	job->job_lock = lock;
+	job->job_thread = NULL;
+	job->job_refcnt = 0;
+	cv_init(&job->job_cv, job->job_name);
+	job->job_fn = fn;
+}
+
+static void
+threadpool_job_dead(threadpool_job_t *ext_job)
+{
+
+	panic("threadpool job %p ran after destruction", ext_job);
+}
+
+void
+threadpool_job_destroy(threadpool_job_t *ext_job)
+{
+	threadpool_job_impl_t *job = THREADPOOL_JOB_TO_IMPL(ext_job);
+
+	ASSERT_SLEEPABLE();
+
+	KASSERTMSG((job->job_thread == NULL), "job %p still running", job);
+
+	mutex_enter(job->job_lock);
+	while (0 < job->job_refcnt)
+		cv_wait(&job->job_cv, job->job_lock);
+	mutex_exit(job->job_lock);
+
+	job->job_lock = NULL;
+	KASSERT(job->job_thread == NULL);
+	KASSERT(job->job_refcnt == 0);
+	KASSERT(!cv_has_waiters(&job->job_cv));
+	cv_destroy(&job->job_cv);
+	job->job_fn = threadpool_job_dead;
+	(void)strlcpy(job->job_name, "deadjob", sizeof(job->job_name));
+}
+
+static int
+threadpool_job_hold(threadpool_job_impl_t *job)
+{
+	unsigned int refcnt;
+	do {
+		refcnt = job->job_refcnt;
+		if (refcnt == UINT_MAX)
+			return EBUSY;
+	} while (atomic_cas_uint(&job->job_refcnt, refcnt, (refcnt + 1))
+	    != refcnt);
+	
+	return 0;
+}
+
+static void
+threadpool_job_rele(threadpool_job_impl_t *job)
+{
+	unsigned int refcnt;
+
+	do {
+		refcnt = job->job_refcnt;
+		KASSERT(0 < refcnt);
+		if (refcnt == 1) {
+			mutex_enter(job->job_lock);
+			refcnt = atomic_dec_uint_nv(&job->job_refcnt);
+			KASSERT(refcnt != UINT_MAX);
+			if (refcnt == 0)
+				cv_broadcast(&job->job_cv);
+			mutex_exit(job->job_lock);
+			return;
+		}
+	} while (atomic_cas_uint(&job->job_refcnt, refcnt, (refcnt - 1))
+	    != refcnt);
+}
+
+void
+threadpool_job_done(threadpool_job_t *ext_job)
+{
+	threadpool_job_impl_t *job = THREADPOOL_JOB_TO_IMPL(ext_job);
+
+	KASSERT(mutex_owned(job->job_lock));
+	KASSERT(job->job_thread != NULL);
+	KASSERT(job->job_thread->tpt_lwp == curlwp);
+
+	cv_broadcast(&job->job_cv);
+	job->job_thread = NULL;
+}
+
+void
+threadpool_schedule_job(threadpool_t *pool, threadpool_job_t *ext_job)
+{
+	threadpool_job_impl_t *job = THREADPOOL_JOB_TO_IMPL(ext_job);
+
+	KASSERT(mutex_owned(job->job_lock));
+
+	/*
+	 * If the job's already running, let it keep running.  The job
+	 * is guaranteed by the interlock not to end early -- if it had
+	 * ended early, threadpool_job_done would have set job_thread
+	 * to NULL under the interlock.
+	 */
+	if (__predict_true(job->job_thread != NULL)) {
+		TP_LOG(("%s: job '%s' already runnining.\n",
+			__func__, job->job_name));
+		return;
+	}
+
+	/* Otherwise, try to assign a thread to the job.  */
+	mutex_spin_enter(&pool->tp_lock);
+	if (__predict_false(TAILQ_EMPTY(&pool->tp_idle_threads))) {
+		/* Nobody's idle.  Give it to the overseer.  */
+		TP_LOG(("%s: giving job '%s' to overseer.\n",
+			__func__, job->job_name));
+		job->job_thread = &pool->tp_overseer;
+		TAILQ_INSERT_TAIL(&pool->tp_jobs, job, job_entry);
+	} else {
+		/* Assign it to the first idle thread.  */
+		job->job_thread = TAILQ_FIRST(&pool->tp_idle_threads);
+		TP_LOG(("%s: giving job '%s' to idle thread %p.\n",
+			__func__, job->job_name, job->job_thread));
+		TAILQ_REMOVE(&pool->tp_idle_threads, job->job_thread,
+		    tpt_entry);
+		threadpool_job_hold(job);
+		job->job_thread->tpt_job = job;
+	}
+
+	/* Notify whomever we gave it to, overseer or idle thread.  */
+	KASSERT(job->job_thread != NULL);
+	cv_broadcast(&job->job_thread->tpt_cv);
+	mutex_spin_exit(&pool->tp_lock);
+}
+
+bool
+threadpool_cancel_job_async(threadpool_t *pool, threadpool_job_t *ext_job)
+{
+	threadpool_job_impl_t *job = THREADPOOL_JOB_TO_IMPL(ext_job);
+
+	KASSERT(mutex_owned(job->job_lock));
+
+	/*
+	 * XXXJRT This fails (albeit safely) when all of the following
+	 * are true:
+	 *
+	 *	=> "pool" is something other than what the job was
+	 *	   scheduled on.  This can legitimately occur if,
+	 *	   for example, a job is percpu-scheduled on CPU0
+	 *	   and then CPU1 attempts to cancel it without taking
+	 *	   a remote pool reference.  (this might happen by
+	 *	   "luck of the draw").
+	 *
+	 *	=> "job" is not yet running, but is assigned to the
+	 *	   overseer.
+	 *
+	 * When this happens, this code makes the determination that
+	 * the job is already running.  The failure mode is that the
+	 * caller is told the job is running, and thus has to wait.
+	 * The overseer will eventually get to it and the job will
+	 * proceed as if it had been already running.
+	 */
+
+	if (job->job_thread == NULL) {
+		/* Nothing to do.  Guaranteed not running.  */
+		return true;
+	} else if (job->job_thread == &pool->tp_overseer) {
+		/* Take it off the list to guarantee it won't run.  */
+		job->job_thread = NULL;
+		mutex_spin_enter(&pool->tp_lock);
+		TAILQ_REMOVE(&pool->tp_jobs, job, job_entry);
+		mutex_spin_exit(&pool->tp_lock);
+		return true;
+	} else {
+		/* Too late -- already running.  */
+		return false;
+	}
+}
+
+void
+threadpool_cancel_job(threadpool_t *pool, threadpool_job_t *ext_job)
+{
+	threadpool_job_impl_t *job = THREADPOOL_JOB_TO_IMPL(ext_job);
+
+	ASSERT_SLEEPABLE();
+
+	KASSERT(mutex_owned(job->job_lock));
+
+	if (threadpool_cancel_job_async(pool, ext_job))
+		return;
+
+	/* Already running.  Wait for it to complete.  */
+	while (job->job_thread != NULL)
+		cv_wait(&job->job_cv, job->job_lock);
+}
+
+/* Thread pool overseer thread */
+
+static void __dead
+threadpool_overseer_thread(void *arg)
+{
+	struct threadpool_thread *const overseer = arg;
+	threadpool_t *const pool = overseer->tpt_pool;
+	struct lwp *lwp = NULL;
+	int ktflags;
+	int error;
+
+	KASSERT((pool->tp_cpu == NULL) || (pool->tp_cpu == curcpu()));
+
+	/* Wait until we're initialized.  */
+	mutex_spin_enter(&pool->tp_lock);
+	while (overseer->tpt_lwp == NULL)
+		cv_wait(&overseer->tpt_cv, &pool->tp_lock);
+
+	TP_LOG(("%s: starting.\n", __func__));
+
+	for (;;) {
+		/* Wait until there's a job.  */
+		while (TAILQ_EMPTY(&pool->tp_jobs)) {
+			if (ISSET(pool->tp_flags, THREADPOOL_DYING)) {
+				TP_LOG(("%s: THREADPOOL_DYING\n",
+					__func__));
+				break;
+			}
+			cv_wait(&overseer->tpt_cv, &pool->tp_lock);
+		}
+		if (__predict_false(TAILQ_EMPTY(&pool->tp_jobs)))
+			break;
+
+		/* If there are no threads, we'll have to try to start one.  */
+		if (TAILQ_EMPTY(&pool->tp_idle_threads)) {
+			TP_LOG(("%s: Got a job, need to create a thread.\n",
+				__func__));
+			error = threadpool_hold(pool);
+			if (error) {
+				(void)kpause("thrdplrf", false, hz,
+				    &pool->tp_lock);
+				continue;
+			}
+			mutex_spin_exit(&pool->tp_lock);
+
+			struct threadpool_thread *const thread =
+			    pool_cache_get(threadpool_thread_pc, PR_WAITOK);
+			thread->tpt_lwp = NULL;
+			thread->tpt_pool = pool;
+			thread->tpt_job = NULL;
+			cv_init(&thread->tpt_cv, "poolthrd");
+
+			ktflags = 0;
+			ktflags |= KTHREAD_MPSAFE;
+			if (pool->tp_pri < PRI_KERNEL)
+				ktflags |= KTHREAD_TS;
+			error = kthread_create(pool->tp_pri, ktflags,
+			    pool->tp_cpu, &threadpool_thread, thread, &lwp,
+			    "poolthread/%d@%d",
+			    (pool->tp_cpu ? cpu_index(pool->tp_cpu) : -1),
+			    (int)pool->tp_pri);
+
+			mutex_spin_enter(&pool->tp_lock);
+			if (error) {
+				pool_cache_put(threadpool_thread_pc, thread);
+				threadpool_rele(pool);
+				/* XXX What to do to wait for memory?  */
+				(void)kpause("thrdplcr", false, hz,
+				    &pool->tp_lock);
+				continue;
+			}
+			KASSERT(lwp != NULL);
+			TAILQ_INSERT_TAIL(&pool->tp_idle_threads, thread,
+			    tpt_entry);
+			thread->tpt_lwp = lwp;
+			lwp = NULL;
+			cv_broadcast(&thread->tpt_cv);
+			continue;
+		}
+
+		/* There are idle threads, so try giving one a job.  */
+		bool rele_job = true;
+		threadpool_job_impl_t *const job = TAILQ_FIRST(&pool->tp_jobs);
+		TAILQ_REMOVE(&pool->tp_jobs, job, job_entry);
+		error = threadpool_job_hold(job);
+		if (error) {
+			TAILQ_INSERT_HEAD(&pool->tp_jobs, job, job_entry);
+			(void)kpause("pooljob", false, hz, &pool->tp_lock);
+			continue;
+		}
+		mutex_spin_exit(&pool->tp_lock);
+
+		mutex_enter(job->job_lock);
+		/* If the job was cancelled, we'll no longer be its thread.  */
+		if (__predict_true(job->job_thread == overseer)) {
+			mutex_spin_enter(&pool->tp_lock);
+			if (__predict_false(
+				    TAILQ_EMPTY(&pool->tp_idle_threads))) {
+				/*
+				 * Someone else snagged the thread
+				 * first.  We'll have to try again.
+				 */
+				TP_LOG(("%s: '%s' lost race to use idle thread.\n",
+					__func__, job->job_name));
+				TAILQ_INSERT_HEAD(&pool->tp_jobs, job,
+				    job_entry);
+			} else {
+				/*
+				 * Assign the job to the thread and
+				 * wake the thread so it starts work.
+				 */
+				struct threadpool_thread *const thread =
+				    TAILQ_FIRST(&pool->tp_idle_threads);
+
+				TP_LOG(("%s: '%s' gets thread %p\n",
+					__func__, job->job_name, thread));
+				KASSERT(thread->tpt_job == NULL);
+				TAILQ_REMOVE(&pool->tp_idle_threads, thread,
+				    tpt_entry);
+				thread->tpt_job = job;
+				job->job_thread = thread;
+				cv_broadcast(&thread->tpt_cv);
+				/* Gave the thread our job reference.  */
+				rele_job = false;
+			}
+			mutex_spin_exit(&pool->tp_lock);
+		}
+		mutex_exit(job->job_lock);
+		if (__predict_false(rele_job))
+			threadpool_job_rele(job);
+
+		mutex_spin_enter(&pool->tp_lock);
+	}
+	mutex_spin_exit(&pool->tp_lock);
+
+	TP_LOG(("%s: exiting.\n", __func__));
+
+	threadpool_rele(pool);
+	kthread_exit(0);
+}
+
+/* Thread pool thread */
+
+static void __dead
+threadpool_thread(void *arg)
+{
+	struct threadpool_thread *const thread = arg;
+	threadpool_t *const pool = thread->tpt_pool;
+
+	KASSERT((pool->tp_cpu == NULL) || (pool->tp_cpu == curcpu()));
+
+	/* Wait until we're initialized and on the queue.  */
+	mutex_spin_enter(&pool->tp_lock);
+	while (thread->tpt_lwp == NULL)
+		cv_wait(&thread->tpt_cv, &pool->tp_lock);
+
+	TP_LOG(("%s: starting.\n", __func__));
+
+	KASSERT(thread->tpt_lwp == curlwp);
+	for (;;) {
+		/* Wait until we are assigned a job.  */
+		while (thread->tpt_job == NULL) {
+			if (ISSET(pool->tp_flags, THREADPOOL_DYING)) {
+				TP_LOG(("%s: THREADPOOL_DYING\n",
+					__func__));
+				break;
+			}
+			if (cv_timedwait(&thread->tpt_cv, &pool->tp_lock,
+					 THREADPOOL_IDLE_TICKS))
+				break;
+		}
+		if (__predict_false(thread->tpt_job == NULL)) {
+			TAILQ_REMOVE(&pool->tp_idle_threads, thread,
+			    tpt_entry);
+			break;
+		}
+
+		threadpool_job_impl_t *const job = thread->tpt_job;
+		KASSERT(job != NULL);
+		mutex_spin_exit(&pool->tp_lock);
+
+		TP_LOG(("%s: running job '%s' on thread %p.\n",
+			__func__, job->job_name, thread));
+
+		/* Set our lwp name to reflect what job we're doing.  */
+		lwp_lock(curlwp);
+		char *const lwp_name = curlwp->l_name;
+		curlwp->l_name = job->job_name;
+		lwp_unlock(curlwp);
+
+		/* Run the job.  */
+		(*job->job_fn)(THREADPOOL_IMPL_TO_JOB(job));
+
+		/* Restore our lwp name.  */
+		lwp_lock(curlwp);
+		curlwp->l_name = lwp_name;
+		lwp_unlock(curlwp);
+
+		/* Job is done and its name is unreferenced.  Release it.  */
+		threadpool_job_rele(job);
+
+		mutex_spin_enter(&pool->tp_lock);
+		KASSERT(thread->tpt_job == job);
+		thread->tpt_job = NULL;
+		TAILQ_INSERT_TAIL(&pool->tp_idle_threads, thread, tpt_entry);
+	}
+	mutex_spin_exit(&pool->tp_lock);
+
+	TP_LOG(("%s: thread %p exiting.\n", __func__, thread));
+
+	KASSERT(!cv_has_waiters(&thread->tpt_cv));
+	cv_destroy(&thread->tpt_cv);
+	pool_cache_put(threadpool_thread_pc, thread);
+	threadpool_rele(pool);
+	kthread_exit(0);
+}

Index: src/sys/sys/threadpool.h
diff -u /dev/null src/sys/sys/threadpool.h:1.1
--- /dev/null	Mon Dec 24 16:58:54 2018
+++ src/sys/sys/threadpool.h	Mon Dec 24 16:58:54 2018
@@ -0,0 +1,77 @@
+/*	$NetBSD: threadpool.h,v 1.1 2018/12/24 16:58:54 thorpej Exp $	*/
+
+/*-
+ * Copyright (c) 2014 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Taylor R. Campbell.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef	_SYS_THREADPOOL_H_
+#define	_SYS_THREADPOOL_H_
+
+#if !defined(_KERNEL)
+#error "not supposed to be exposed to userland"
+#endif
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/mutex.h>
+#include <sys/queue.h>
+
+typedef struct threadpool threadpool_t;
+typedef struct threadpool_percpu threadpool_percpu_t;
+
+typedef struct threadpool_job {
+#ifdef _LP64
+	void	*opaque[11];
+#else
+	void	*opaque[13];
+#endif /* _LP64 */
+} threadpool_job_t;
+
+typedef void (*threadpool_job_fn_t)(threadpool_job_t *);
+
+int	threadpool_get(threadpool_t **, pri_t);
+void	threadpool_put(threadpool_t *, pri_t);
+
+int	threadpool_percpu_get(threadpool_percpu_t **, pri_t);
+void	threadpool_percpu_put(threadpool_percpu_t *, pri_t);
+threadpool_t *
+	threadpool_percpu_ref(threadpool_percpu_t *);
+threadpool_t *
+	threadpool_percpu_ref_remote(threadpool_percpu_t *,
+	    struct cpu_info *);
+
+void	threadpool_job_init(threadpool_job_t *, threadpool_job_fn_t,
+	    kmutex_t *, const char *, ...) __printflike(4,5);
+void	threadpool_job_destroy(threadpool_job_t *);
+void	threadpool_job_done(threadpool_job_t *);
+
+void	threadpool_schedule_job(threadpool_t *, threadpool_job_t *);
+void	threadpool_cancel_job(threadpool_t *, threadpool_job_t *);
+bool	threadpool_cancel_job_async(threadpool_t *, threadpool_job_t *);
+
+#endif	/* _SYS_THREADPOOL_H_ */

Index: src/tests/kernel/t_threadpool.sh
diff -u /dev/null src/tests/kernel/t_threadpool.sh:1.1
--- /dev/null	Mon Dec 24 16:58:54 2018
+++ src/tests/kernel/t_threadpool.sh	Mon Dec 24 16:58:54 2018
@@ -0,0 +1,169 @@
+# $NetBSD: t_threadpool.sh,v 1.1 2018/12/24 16:58:54 thorpej Exp $
+#
+# Copyright (c) 2018 The NetBSD Foundation, Inc.
+# All rights reserved.
+#
+# This code is derived from software contributed to The NetBSD Foundation
+# by Jason R. Thorpe.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+
+# Pick an arbitrary priority that is not likely to be used.
+tp_pri=5
+
+# The kernel test jig includes a 1 second delay in the job.  We need to
+# wait longer for it to complete.
+job_delay=2
+
+read_sysctl() {
+	echo "${1} = ${2}" >expout
+	atf_check -s eq:0 -o file:expout -e empty sysctl ${1}
+}
+
+write_sysctl() {
+	atf_check -s eq:0 -o ignore -e empty sysctl -w "${1}=${2}"
+}
+
+write_sysctl_fail() {
+	echo "${3}" >experr
+	atf_check -s eq:1 -o ignore -e file:experr sysctl -w "${1}=${2}"
+}
+
+atf_test_case unbound cleanup
+unbound_head() {
+	atf_set "descr" "Test unbound thread pools"
+	atf_set "require.user" "root"
+}
+unbound_body() {
+	modload $(atf_get_srcdir)/threadpool_tester/threadpool_tester.kmod
+	if [ $? -ne 0 ]; then
+		atf_skip "cannot load threadpool_tester.kmod"
+	fi
+
+	# Ensure that the state is clean.
+	read_sysctl kern.threadpool_tester.test_value 0
+
+	# Create an unbound pool.
+	write_sysctl kern.threadpool_tester.get_unbound $tp_pri
+
+	# Do it again.  We expect this to fail, but the test jig will
+	# do some additional threadpool object lifecycle validation.
+	# (It will not hold the additional reference.)
+	write_sysctl_fail kern.threadpool_tester.get_unbound $tp_pri \
+	    "sysctl: kern.threadpool_tester.get_unbound: File exists"
+
+	# Schedule the test jig job on the pool.
+	# Wait for a short period of time and then check that the job
+	# successfully ran.
+	write_sysctl kern.threadpool_tester.run_unbound $tp_pri
+	sleep $job_delay
+	read_sysctl kern.threadpool_tester.test_value 1
+
+	# ...and again.
+	write_sysctl kern.threadpool_tester.run_unbound $tp_pri
+	sleep $job_delay
+	read_sysctl kern.threadpool_tester.test_value 2
+
+	# Now destroy the threadpool.
+	write_sysctl kern.threadpool_tester.put_unbound $tp_pri
+}
+unbound_cleanup() {
+	modunload threadpool_tester >/dev/null 2>&1
+}
+
+atf_test_case percpu cleanup
+percpu_head() {
+	atf_set "descr" "Test percpu thread pools"
+	atf_set "require.user" "root"
+}
+percpu_body() {
+	modload $(atf_get_srcdir)/threadpool_tester/threadpool_tester.kmod
+	if [ $? -ne 0 ]; then
+		atf_skip "cannot load threadpool_tester.kmod"
+	fi
+
+	# Ensure that the state is clean.
+	read_sysctl kern.threadpool_tester.test_value 0
+
+	# Create an percpu pool.
+	write_sysctl kern.threadpool_tester.get_percpu $tp_pri
+
+	# Do it again.  We expect this to fail, but the test jig will
+	# do some additional threadpool object lifecycle validation.
+	# (It will not hold the additional reference.)
+	write_sysctl_fail kern.threadpool_tester.get_percpu $tp_pri \
+	    "sysctl: kern.threadpool_tester.get_percpu: File exists"
+
+	# Schedule the test jig job on the pool.
+	# Wait for a short period of time and then check that the job
+	# successfully ran.
+	write_sysctl kern.threadpool_tester.run_percpu $tp_pri
+	sleep $job_delay
+	read_sysctl kern.threadpool_tester.test_value 1
+
+	# ...and again.
+	write_sysctl kern.threadpool_tester.run_percpu $tp_pri
+	sleep $job_delay
+	read_sysctl kern.threadpool_tester.test_value 2
+
+	# Now destroy the threadpool.
+	write_sysctl kern.threadpool_tester.put_percpu $tp_pri
+}
+percpu_cleanup() {
+	modunload threadpool_tester >/dev/null 2>&1
+}
+
+atf_test_case rapid cleanup
+rapid_head() {
+	atf_set "descr" "Test rapid get/schedule/put sequence"
+	atf_set "require.user" "root"
+}
+rapid_body() {
+	modload $(atf_get_srcdir)/threadpool_tester/threadpool_tester.kmod
+	if [ $? -ne 0 ]; then
+		atf_skip "cannot load threadpool_tester.kmod"
+	fi
+
+	# Ensure that the state is clean.
+	read_sysctl kern.threadpool_tester.test_value 0
+
+	# Create an unbound pool.  Immediatelty schedule a job on it
+	# and destroy it.
+	write_sysctl kern.threadpool_tester.get_unbound $tp_pri
+	write_sysctl kern.threadpool_tester.run_unbound $tp_pri
+	write_sysctl kern.threadpool_tester.put_unbound $tp_pri
+
+	# Now ensure the job successfully ran.
+	sleep $job_delay
+	read_sysctl kern.threadpool_tester.test_value 1
+}
+rapid_cleanup() {
+	modunload threadpool_tester >/dev/null 2>&1
+}
+
+atf_init_test_cases()
+{
+	atf_add_test_case unbound
+	atf_add_test_case percpu
+	atf_add_test_case rapid
+}

Index: src/tests/kernel/threadpool_tester/Makefile
diff -u /dev/null src/tests/kernel/threadpool_tester/Makefile:1.1
--- /dev/null	Mon Dec 24 16:58:54 2018
+++ src/tests/kernel/threadpool_tester/Makefile	Mon Dec 24 16:58:54 2018
@@ -0,0 +1,14 @@
+#	$NetBSD: Makefile,v 1.1 2018/12/24 16:58:54 thorpej Exp $
+
+.include <bsd.own.mk>
+
+KMOD=		threadpool_tester
+KMODULEDIR=	${DESTDIR}/${TESTSBASE}/kernel/${KMOD}
+
+SRCS=		threadpool_tester.c
+
+ATFFILE=	no
+NOMAN=		# defined
+
+.include <bsd.test.mk>
+.include <bsd.kmodule.mk>
Index: src/tests/kernel/threadpool_tester/threadpool_tester.c
diff -u /dev/null src/tests/kernel/threadpool_tester/threadpool_tester.c:1.1
--- /dev/null	Mon Dec 24 16:58:54 2018
+++ src/tests/kernel/threadpool_tester/threadpool_tester.c	Mon Dec 24 16:58:54 2018
@@ -0,0 +1,502 @@
+/*	$NetBSD: threadpool_tester.c,v 1.1 2018/12/24 16:58:54 thorpej Exp $	*/
+
+/*-
+ * Copyright (c) 2018 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: threadpool_tester.c,v 1.1 2018/12/24 16:58:54 thorpej Exp $");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/module.h>
+#include <sys/sysctl.h>
+#include <sys/threadpool.h>
+
+MODULE(MODULE_CLASS_MISC, threadpool_tester, NULL);
+
+#ifdef THREADPOOL_VERBOSE
+#define	TP_LOG(x)		printf x
+#else
+#define	TP_LOG(x)		/* nothing */
+#endif /* THREADPOOL_VERBOSE */
+
+static struct tester_context {
+	kmutex_t ctx_mutex;
+	struct sysctllog *ctx_sysctllog;
+	threadpool_t *ctx_unbound[PRI_COUNT + 1];
+	threadpool_percpu_t *ctx_percpu[PRI_COUNT + 1];
+	unsigned int ctx_value;
+	threadpool_job_t ctx_job;
+} tester_ctx;
+
+#define	pri_to_idx(pri)		((pri) == PRI_NONE ? PRI_COUNT : (pri))
+
+static bool
+pri_is_valid(pri_t pri)
+{
+	return (pri == PRI_NONE || (pri >= PRI_USER && pri < PRI_COUNT));
+}
+
+static int
+threadpool_tester_get_unbound(SYSCTLFN_ARGS)
+{
+	struct tester_context *ctx;
+	threadpool_t *pool, *opool = NULL;
+	struct sysctlnode node;
+	int error, val;
+
+	node = *rnode;
+	ctx = node.sysctl_data;
+
+	val = -1;
+	node.sysctl_data = &val;
+	error = sysctl_lookup(SYSCTLFN_CALL(&node));
+	if (error || newp == NULL)
+		return error;
+	
+	if (! pri_is_valid(val))
+		return EINVAL;
+	
+	error = threadpool_get(&pool, val);
+	if (error) {
+		TP_LOG(("%s: threadpool_get(..., %d) failed -> %d\n",
+		    __func__, val, error));
+		return error;
+	}
+	
+	mutex_enter(&ctx->ctx_mutex);
+	if (ctx->ctx_unbound[pri_to_idx(val)] == NULL)
+		ctx->ctx_unbound[pri_to_idx(val)] = pool;
+	else
+		opool = ctx->ctx_unbound[pri_to_idx(val)];
+	mutex_exit(&ctx->ctx_mutex);
+
+	if (opool != NULL) {
+		/* Should have gotten reference to existing pool. */
+		TP_LOG(("%s: found existing unbound pool for pri %d (%s)\n",
+		    __func__, val, opool == pool ? "match" : "NO MATCH"));
+		KASSERT(opool == pool);
+		threadpool_put(pool, val);
+		error = EEXIST;
+	} else {
+		TP_LOG(("%s: created unbound pool for pri %d\n",
+		    __func__, val));
+	}
+
+	return error;
+}
+
+static int
+threadpool_tester_put_unbound(SYSCTLFN_ARGS)
+{
+	struct tester_context *ctx;
+	threadpool_t *pool;
+	struct sysctlnode node;
+	int error, val;
+
+	node = *rnode;
+	ctx = node.sysctl_data;
+
+	val = -1;
+	node.sysctl_data = &val;
+	error = sysctl_lookup(SYSCTLFN_CALL(&node));
+	if (error || newp == NULL)
+		return error;
+	
+	if (! pri_is_valid(val))
+		return EINVAL;
+	
+	mutex_enter(&ctx->ctx_mutex);
+	/* We only ever maintain a single reference. */
+	pool = ctx->ctx_unbound[pri_to_idx(val)];
+	ctx->ctx_unbound[pri_to_idx(val)] = NULL;
+	mutex_exit(&ctx->ctx_mutex);
+
+	if (pool == NULL) {
+		TP_LOG(("%s: no unbound pool for pri %d\n",
+		    __func__, val));
+		return ENODEV;
+	}
+
+	threadpool_put(pool, val);
+	TP_LOG(("%s: released unbound pool for pri %d\n",
+	    __func__, val));
+
+	return 0;
+}
+
+static int
+threadpool_tester_run_unbound(SYSCTLFN_ARGS)
+{
+	struct tester_context *ctx;
+	threadpool_t *pool;
+	struct sysctlnode node;
+	int error, val;
+
+	node = *rnode;
+	ctx = node.sysctl_data;
+
+	val = -1;
+	node.sysctl_data = &val;
+	error = sysctl_lookup(SYSCTLFN_CALL(&node));
+	if (error || newp == NULL)
+		return error;
+	
+	if (! pri_is_valid(val))
+		return EINVAL;
+
+	mutex_enter(&ctx->ctx_mutex);
+	pool = ctx->ctx_unbound[pri_to_idx(val)];
+	if (pool == NULL) {
+		TP_LOG(("%s: no unbound pool for pri %d\n",
+		    __func__, val));
+		mutex_exit(&ctx->ctx_mutex);
+		return ENODEV;
+	}
+
+	threadpool_schedule_job(pool, &ctx->ctx_job);
+	TP_LOG(("%s: scheduled job on unbound pool for pri %d\n",
+	    __func__, val));
+	mutex_exit(&ctx->ctx_mutex);
+
+	return 0;
+}
+
+static int
+threadpool_tester_get_percpu(SYSCTLFN_ARGS)
+{
+	struct tester_context *ctx;
+	threadpool_percpu_t *pcpu, *opcpu = NULL;
+	struct sysctlnode node;
+	int error, val;
+
+	node = *rnode;
+	ctx = node.sysctl_data;
+
+	val = -1;
+	node.sysctl_data = &val;
+	error = sysctl_lookup(SYSCTLFN_CALL(&node));
+	if (error || newp == NULL)
+		return error;
+	
+	if (! pri_is_valid(val))
+		return EINVAL;
+	
+	error = threadpool_percpu_get(&pcpu, val);
+	if (error) {
+		TP_LOG(("%s: threadpool_percpu_get(..., %d) failed -> %d\n",
+		    __func__, val, error));
+		return error;
+	}
+	
+	mutex_enter(&ctx->ctx_mutex);
+	if (ctx->ctx_percpu[pri_to_idx(val)] == NULL)
+		ctx->ctx_percpu[pri_to_idx(val)] = pcpu;
+	else
+		opcpu = ctx->ctx_percpu[pri_to_idx(val)];
+	mutex_exit(&ctx->ctx_mutex);
+
+	if (opcpu != NULL) {
+		/* Should have gotten reference to existing pool. */
+		TP_LOG(("%s: found existing unbound pool for pri %d (%s)\n",
+		    __func__, val, opcpu == pcpu ? "match" : "NO MATCH"));
+		KASSERT(opcpu == pcpu);
+		threadpool_percpu_put(pcpu, val);
+		error = EEXIST;
+	} else {
+		TP_LOG(("%s: created percpu pool for pri %d\n",
+		    __func__, val));
+	}
+
+	return error;
+}
+
+static int
+threadpool_tester_put_percpu(SYSCTLFN_ARGS)
+{
+	struct tester_context *ctx;
+	threadpool_percpu_t *pcpu;
+	struct sysctlnode node;
+	int error, val;
+
+	node = *rnode;
+	ctx = node.sysctl_data;
+
+	val = -1;
+	node.sysctl_data = &val;
+	error = sysctl_lookup(SYSCTLFN_CALL(&node));
+	if (error || newp == NULL)
+		return error;
+	
+	if (! pri_is_valid(val))
+		return EINVAL;
+	
+	mutex_enter(&ctx->ctx_mutex);
+	/* We only ever maintain a single reference. */
+	pcpu = ctx->ctx_percpu[pri_to_idx(val)];
+	ctx->ctx_percpu[pri_to_idx(val)] = NULL;
+	mutex_exit(&ctx->ctx_mutex);
+
+	if (pcpu == NULL) {
+		TP_LOG(("%s: no percpu pool for pri %d\n",
+		    __func__, val));
+		return ENODEV;
+	}
+
+	threadpool_percpu_put(pcpu, val);
+	TP_LOG(("%s: released percpu pool for pri %d\n",
+	    __func__, val));
+
+	return 0;
+}
+
+static int
+threadpool_tester_run_percpu(SYSCTLFN_ARGS)
+{
+	struct tester_context *ctx;
+	threadpool_percpu_t *pcpu;
+	threadpool_t *pool;
+	struct sysctlnode node;
+	int error, val;
+
+	node = *rnode;
+	ctx = node.sysctl_data;
+
+	val = -1;
+	node.sysctl_data = &val;
+	error = sysctl_lookup(SYSCTLFN_CALL(&node));
+	if (error || newp == NULL)
+		return error;
+	
+	if (! pri_is_valid(val))
+		return EINVAL;
+
+	mutex_enter(&ctx->ctx_mutex);
+	pcpu = ctx->ctx_percpu[pri_to_idx(val)];
+	if (pcpu == NULL) {
+		TP_LOG(("%s: no percpu pool for pri %d\n",
+		    __func__, val));
+		mutex_exit(&ctx->ctx_mutex);
+		return ENODEV;
+	}
+
+	pool = threadpool_percpu_ref(pcpu);
+	KASSERT(pool != NULL);
+
+	threadpool_schedule_job(pool, &ctx->ctx_job);
+	TP_LOG(("%s: scheduled job on percpu pool for pri %d\n",
+	    __func__, val));
+	mutex_exit(&ctx->ctx_mutex);
+
+	return 0;
+}
+
+static int
+threadpool_tester_test_value(SYSCTLFN_ARGS)
+{
+	struct tester_context *ctx;
+	struct sysctlnode node;
+	unsigned int val;
+	int error;
+
+	node = *rnode;
+	ctx = node.sysctl_data;
+
+	mutex_enter(&ctx->ctx_mutex);
+	val = ctx->ctx_value;
+	node.sysctl_data = &val;
+	error = sysctl_lookup(SYSCTLFN_CALL(&node));
+	if (error || newp == NULL) {
+		mutex_exit(&ctx->ctx_mutex);
+		return error;
+	}
+	ctx->ctx_value = val;
+	mutex_exit(&ctx->ctx_mutex);
+
+	return 0;
+}
+
+static void
+threadpool_tester_job(threadpool_job_t *job)
+{
+	struct tester_context *ctx =
+	    container_of(job, struct tester_context, ctx_job);
+	unsigned int oval, nval;
+
+	TP_LOG(("%s: job = %p, ctx = %p\n", __func__, job, ctx));
+
+	mutex_enter(&ctx->ctx_mutex);
+	oval = ctx->ctx_value;
+	nval = oval + 1;	/* always reference oval and nval */
+	ctx->ctx_value = nval;
+	mutex_exit(&ctx->ctx_mutex);
+
+	TP_LOG(("%s: %u -> %u\n", __func__, oval, nval));
+	(void) kpause("tptestjob", false, hz, NULL);
+
+	mutex_enter(&ctx->ctx_mutex);
+	threadpool_job_done(job);
+	mutex_exit(&ctx->ctx_mutex);
+}
+
+#define	RETURN_ERROR	if (error) goto return_error
+
+static int
+threadpool_tester_init(void)
+{
+	struct sysctllog **log = &tester_ctx.ctx_sysctllog;
+	const struct sysctlnode *rnode, *cnode;
+	int error;
+
+	mutex_init(&tester_ctx.ctx_mutex, MUTEX_DEFAULT, IPL_NONE);
+	threadpool_job_init(&tester_ctx.ctx_job, threadpool_tester_job,
+	    &tester_ctx.ctx_mutex, "tptest");
+
+	error = sysctl_createv(log, 0, NULL, &rnode, CTLFLAG_PERMANENT,
+	    CTLTYPE_NODE, "threadpool_tester",
+	    SYSCTL_DESCR("threadpool testing interface"),
+	    NULL, 0, NULL, 0, CTL_KERN, CTL_CREATE, CTL_EOL);
+	RETURN_ERROR;
+
+	error = sysctl_createv(log, 0, &rnode, &cnode,
+	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "get_unbound",
+	    SYSCTL_DESCR("get unbound pool of specified priority"),
+	    threadpool_tester_get_unbound, 0,
+	    (void *)&tester_ctx, 0, CTL_CREATE, CTL_EOL);
+	RETURN_ERROR;
+
+	error = sysctl_createv(log, 0, &rnode, &cnode,
+	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "put_unbound",
+	    SYSCTL_DESCR("put unbound pool of specified priority"),
+	    threadpool_tester_put_unbound, 0,
+	    (void *)&tester_ctx, 0, CTL_CREATE, CTL_EOL);
+	RETURN_ERROR;
+
+	error = sysctl_createv(log, 0, &rnode, &cnode,
+	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "run_unbound",
+	    SYSCTL_DESCR("run on unbound pool of specified priority"),
+	    threadpool_tester_run_unbound, 0,
+	    (void *)&tester_ctx, 0, CTL_CREATE, CTL_EOL);
+	RETURN_ERROR;
+
+	error = sysctl_createv(log, 0, &rnode, &cnode,
+	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "get_percpu",
+	    SYSCTL_DESCR("get percpu pool of specified priority"),
+	    threadpool_tester_get_percpu, 0,
+	    (void *)&tester_ctx, 0, CTL_CREATE, CTL_EOL);
+	RETURN_ERROR;
+
+	error = sysctl_createv(log, 0, &rnode, &cnode,
+	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "put_percpu",
+	    SYSCTL_DESCR("put percpu pool of specified priority"),
+	    threadpool_tester_put_percpu, 0,
+	    (void *)&tester_ctx, 0, CTL_CREATE, CTL_EOL);
+	RETURN_ERROR;
+
+	error = sysctl_createv(log, 0, &rnode, &cnode,
+	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "run_percpu",
+	    SYSCTL_DESCR("run on percpu pool of specified priority"),
+	    threadpool_tester_run_percpu, 0,
+	    (void *)&tester_ctx, 0, CTL_CREATE, CTL_EOL);
+	RETURN_ERROR;
+
+	error = sysctl_createv(log, 0, &rnode, &cnode,
+	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "test_value",
+	    SYSCTL_DESCR("test value that jobs increment"),
+	    threadpool_tester_test_value, 0,
+	    (void *)&tester_ctx, 0, CTL_CREATE, CTL_EOL);
+	RETURN_ERROR;
+
+	return 0;
+
+ return_error:
+ 	sysctl_teardown(log);
+	return error;
+}
+
+static int
+threadpool_tester_fini(void)
+{
+	pri_t pri;
+
+	mutex_enter(&tester_ctx.ctx_mutex);
+	for (pri = PRI_NONE/*-1*/; pri < PRI_COUNT; pri++) {
+		threadpool_t *pool =
+		    tester_ctx.ctx_unbound[pri_to_idx(pri)];
+		threadpool_percpu_t *pcpu =
+		    tester_ctx.ctx_percpu[pri_to_idx(pri)];
+
+		/*
+		 * threadpool_cancel_job() may be called on a pool
+		 * other than what the job is scheduled on. This is
+		 * safe; see comment in threadpool_cancel_job_async().
+		 */
+
+		if (pool != NULL) {
+			threadpool_cancel_job(pool, &tester_ctx.ctx_job);
+			threadpool_put(pool, pri);
+			tester_ctx.ctx_unbound[pri_to_idx(pri)] = NULL;
+		}
+		if (pcpu != NULL) {
+			pool = threadpool_percpu_ref(pcpu);
+			threadpool_cancel_job(pool, &tester_ctx.ctx_job);
+			threadpool_percpu_put(pcpu, pri);
+			tester_ctx.ctx_percpu[pri_to_idx(pri)] = NULL;
+		}
+	}
+	mutex_exit(&tester_ctx.ctx_mutex);
+	threadpool_job_destroy(&tester_ctx.ctx_job);
+	mutex_destroy(&tester_ctx.ctx_mutex);
+
+	sysctl_teardown(&tester_ctx.ctx_sysctllog);
+	
+	return 0;
+}
+
+static int
+threadpool_tester_modcmd(modcmd_t cmd, void *arg __unused)
+{
+	int error;
+
+	switch (cmd) {
+	case MODULE_CMD_INIT:
+		error = threadpool_tester_init();
+		break;
+	
+	case MODULE_CMD_FINI:
+		error = threadpool_tester_fini();
+		break;
+	
+	case MODULE_CMD_STAT:
+	default:
+		error = ENOTTY;
+	}
+
+	return error;
+}

Reply via email to