> Date: Sat, 15 Jan 2022 21:23:56 +0000
> From: Taylor R Campbell <riastr...@netbsd.org>
> 
> The attached patch series enables us to resolve a large class of races
> between devsw open/close, autoconf attach/detach, and module unload.

For convenience of testing if anyone wants to give it a whirl, here's
a giant diff you can apply on top of a CVS tree, instead of a `git
format-patch' series to use with `git am' in a git tree.
diff --git a/external/cddl/osnet/dev/dtrace/dtrace_modevent.c 
b/external/cddl/osnet/dev/dtrace/dtrace_modevent.c
index cc0f8103fb98..f1338840125a 100644
--- a/external/cddl/osnet/dev/dtrace/dtrace_modevent.c
+++ b/external/cddl/osnet/dev/dtrace/dtrace_modevent.c
@@ -42,9 +42,7 @@ dtrace_modcmd(modcmd_t cmd, void *data)
                return error;
 
        case MODULE_CMD_FINI:
-               error = devsw_detach(NULL, &dtrace_cdevsw);
-               if (error != 0)
-                       return error;
+               devsw_detach(NULL, &dtrace_cdevsw);
 
                error = dtrace_unload();
                if (error != 0) {
diff --git a/external/cddl/osnet/dev/fbt/fbt.c 
b/external/cddl/osnet/dev/fbt/fbt.c
index b367c2155292..46dd7c1f7f06 100644
--- a/external/cddl/osnet/dev/fbt/fbt.c
+++ b/external/cddl/osnet/dev/fbt/fbt.c
@@ -1329,7 +1329,8 @@ dtrace_fbt_modcmd(modcmd_t cmd, void *data)
                error = fbt_unload();
                if (error != 0)
                        return error;
-               return devsw_detach(NULL, &fbt_cdevsw);
+               devsw_detach(NULL, &fbt_cdevsw);
+               return 0;
        case MODULE_CMD_AUTOUNLOAD:
                return EBUSY;
        default:
diff --git a/external/cddl/osnet/dev/sdt/sdt.c 
b/external/cddl/osnet/dev/sdt/sdt.c
index c3ad129f8284..5a41270a2917 100644
--- a/external/cddl/osnet/dev/sdt/sdt.c
+++ b/external/cddl/osnet/dev/sdt/sdt.c
@@ -562,7 +562,8 @@ dtrace_sdt_modcmd(modcmd_t cmd, void *data)
                error = sdt_unload();
                if (error != 0)
                        return error;
-               return devsw_detach(NULL, &sdt_cdevsw);
+               devsw_detach(NULL, &sdt_cdevsw);
+               return 0;
        case MODULE_CMD_AUTOUNLOAD:
                return EBUSY;
        default:
diff --git a/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_ioctl.c 
b/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_ioctl.c
index 9e19cd1dc0c3..d74d8c71e54d 100644
--- a/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_ioctl.c
+++ b/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_ioctl.c
@@ -7231,7 +7231,7 @@ zfs_modcmd(modcmd_t cmd, void *arg)
                if (error)
                        return error;
 
-               (void) devsw_detach(&zfs_bdevsw, &zfs_cdevsw);
+               devsw_detach(&zfs_bdevsw, &zfs_cdevsw);
 
 attacherr:
                zfs_sysctl_fini();
diff --git a/share/man/man9/devsw_attach.9 b/share/man/man9/devsw_attach.9
index cf862be5846a..6ffc51957a3f 100644
--- a/share/man/man9/devsw_attach.9
+++ b/share/man/man9/devsw_attach.9
@@ -27,7 +27,7 @@
 .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 .\" POSSIBILITY OF SUCH DAMAGE.
 .\"
-.Dd April 30, 2017
+.Dd January 11, 2022
 .Dt DEVSW 9
 .Os
 .Sh NAME
@@ -49,7 +49,7 @@
 .Fa "const struct cdevsw *cdev"
 .Fa "devmajor_t *cmajor"
 .Fc
-.Ft int
+.Ft void
 .Fo devsw_detach
 .Fa "const struct bdevsw *bdev"
 .Fa "const struct cdevsw *cdev"
@@ -130,6 +130,11 @@ and
 structures.
 .Fn devsw_detach
 should be called before a loaded device driver is unloaded.
+The caller must ensure that there are no open instances of the device,
+and that the device's
+.Fn d_open
+function will fail, before calling.
+Fn devsw_detach .
 .Pp
 The
 .Fn bdevsw_lookup
@@ -155,10 +160,8 @@ or
 .Sh RETURN VALUES
 Upon successful completion,
 .Fn devsw_attach
-and
-.Fn devsw_detach
-return 0.
-Otherwise they return an error value.
+returns 0.
+Otherwise it returns an error value.
 .Pp
 In case of failure,
 .Fn bdevsw_lookup
diff --git a/sys/coda/coda_psdev.c b/sys/coda/coda_psdev.c
index cede16da3f53..7f531f03fe56 100644
--- a/sys/coda/coda_psdev.c
+++ b/sys/coda/coda_psdev.c
@@ -758,7 +758,7 @@ vcoda_modcmd(modcmd_t cmd, void *arg)
                                if (VC_OPEN(vcp))
                                        return EBUSY;
                        }
-                       return devsw_detach(NULL, &vcoda_cdevsw);
+                       devsw_detach(NULL, &vcoda_cdevsw);
                }
 #endif
                break;
diff --git a/sys/dev/ata/wd.c b/sys/dev/ata/wd.c
index b5d7476d1e6c..672684a3b4aa 100644
--- a/sys/dev/ata/wd.c
+++ b/sys/dev/ata/wd.c
@@ -152,6 +152,8 @@ const struct bdevsw wd_bdevsw = {
        .d_dump = wddump,
        .d_psize = wdsize,
        .d_discard = wddiscard,
+       .d_cfdriver = &wd_cd,
+       .d_devtounit = disklabel_dev_unit,
        .d_flag = D_DISK
 };
 
@@ -167,6 +169,8 @@ const struct cdevsw wd_cdevsw = {
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = wddiscard,
+       .d_cfdriver = &wd_cd,
+       .d_devtounit = disklabel_dev_unit,
        .d_flag = D_DISK
 };
 
diff --git a/sys/dev/audio/audio.c b/sys/dev/audio/audio.c
index d22ea989a180..176749ac449f 100644
--- a/sys/dev/audio/audio.c
+++ b/sys/dev/audio/audio.c
@@ -521,7 +521,6 @@ static int audio_exlock_mutex_enter(struct audio_softc *);
 static void audio_exlock_mutex_exit(struct audio_softc *);
 static int audio_exlock_enter(struct audio_softc *);
 static void audio_exlock_exit(struct audio_softc *);
-static void audio_sc_acquire_foropen(struct audio_softc *, struct psref *);
 static struct audio_softc *audio_sc_acquire_fromfile(audio_file_t *,
        struct psref *);
 static void audio_sc_release(struct audio_softc *, struct psref *);
@@ -732,6 +731,8 @@ const struct cdevsw audio_cdevsw = {
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
+       .d_cfdriver = &audio_cd,
+       .d_devtounit = dev_minor_unit,
        .d_flag = D_OTHER | D_MPSAFE
 };
 
@@ -1543,31 +1544,6 @@ audio_exlock_exit(struct audio_softc *sc)
        audio_exlock_mutex_exit(sc);
 }
 
-/*
- * Increment reference counter for this sc.
- * This is intended to be used for open.
- */
-void
-audio_sc_acquire_foropen(struct audio_softc *sc, struct psref *refp)
-{
-       int s;
-
-       /* Block audiodetach while we acquire a reference */
-       s = pserialize_read_enter();
-
-       /*
-        * We don't examine sc_dying here.  However, all open methods
-        * call audio_exlock_enter() right after this, so we can examine
-        * sc_dying in it.
-        */
-
-       /* Acquire a reference */
-       psref_acquire(refp, &sc->sc_psref, audio_psref_class);
-
-       /* Now sc won't go away until we drop the reference count */
-       pserialize_read_exit(s);
-}
-
 /*
  * Get sc from file, and increment reference counter for this sc.
  * This is intended to be used for methods other than open.
@@ -1680,21 +1656,20 @@ static int
 audioopen(dev_t dev, int flags, int ifmt, struct lwp *l)
 {
        struct audio_softc *sc;
-       struct psref sc_ref;
-       int bound;
        int error;
 
-       /* Find the device */
+       /*
+        * Find the device.  Because we wired the cdevsw to the audio
+        * autoconf instance, the system ensures it will not go away
+        * until after we return.
+        */
        sc = device_lookup_private(&audio_cd, AUDIOUNIT(dev));
        if (sc == NULL || sc->hw_if == NULL)
                return ENXIO;
 
-       bound = curlwp_bind();
-       audio_sc_acquire_foropen(sc, &sc_ref);
-
        error = audio_exlock_enter(sc);
        if (error)
-               goto done;
+               return error;
 
        device_active(sc->sc_dev, DVA_SYSTEM);
        switch (AUDIODEV(dev)) {
@@ -1714,9 +1689,6 @@ audioopen(dev_t dev, int flags, int ifmt, struct lwp *l)
        }
        audio_exlock_exit(sc);
 
-done:
-       audio_sc_release(sc, &sc_ref);
-       curlwp_bindx(bound);
        return error;
 }
 
@@ -2098,30 +2070,41 @@ done:
 int
 audiobellopen(dev_t dev, audio_file_t **filep)
 {
+       device_t audiodev = NULL;
        struct audio_softc *sc;
-       struct psref sc_ref;
-       int bound;
+       bool exlock = false;
        int error;
 
-       /* Find the device */
-       sc = device_lookup_private(&audio_cd, AUDIOUNIT(dev));
-       if (sc == NULL || sc->hw_if == NULL)
-               return ENXIO;
+       /*
+        * Find the autoconf instance and make sure it doesn't go away
+        * while we are opening it.
+        */
+       audiodev = device_lookup_acquire(&audio_cd, AUDIOUNIT(dev));
+       if (audiodev == NULL) {
+               error = ENXIO;
+               goto out;
+       }
 
-       bound = curlwp_bind();
-       audio_sc_acquire_foropen(sc, &sc_ref);
+       /* If attach failed, it's hopeless -- give up.  */
+       sc = device_private(audiodev);
+       if (sc->hw_if == NULL) {
+               error = ENXIO;
+               goto out;
+       }
 
+       /* Take the exclusive configuration lock.  */
        error = audio_exlock_enter(sc);
        if (error)
-               goto done;
+               goto out;
 
+       /* Open the audio device.  */
        device_active(sc->sc_dev, DVA_SYSTEM);
        error = audio_open(dev, sc, FWRITE, 0, curlwp, filep);
 
-       audio_exlock_exit(sc);
-done:
-       audio_sc_release(sc, &sc_ref);
-       curlwp_bindx(bound);
+out:   if (exlock)
+               audio_exlock_exit(sc);
+       if (audiodev)
+               device_release(audiodev);
        return error;
 }
 
diff --git a/sys/dev/ccd.c b/sys/dev/ccd.c
index 05945f9a67ba..2283bc0346da 100644
--- a/sys/dev/ccd.c
+++ b/sys/dev/ccd.c
@@ -1710,7 +1710,7 @@ ccd_modcmd(modcmd_t cmd, void *arg)
                        error = EBUSY;
                } else {
                        mutex_exit(&ccd_lock);
-                       error = devsw_detach(&ccd_bdevsw, &ccd_cdevsw);
+                       devsw_detach(&ccd_bdevsw, &ccd_cdevsw);
                        ccddetach();
                }
 #endif
diff --git a/sys/dev/clockctl.c b/sys/dev/clockctl.c
index 0da5e7765fe8..9685c0f129f6 100644
--- a/sys/dev/clockctl.c
+++ b/sys/dev/clockctl.c
@@ -182,14 +182,12 @@ clockctl_modcmd(modcmd_t cmd, void *data)
                        return EBUSY;
                }
 #ifdef _MODULE
-               error = devsw_detach(NULL, &clockctl_cdevsw);
+               devsw_detach(NULL, &clockctl_cdevsw);
 #endif
                mutex_exit(&clockctl_mtx);
 
-               if (error == 0) {
-                       kauth_unlisten_scope(clockctl_listener);
-                       mutex_destroy(&clockctl_mtx);
-               }
+               kauth_unlisten_scope(clockctl_listener);
+               mutex_destroy(&clockctl_mtx);
                break;
 
        default:
diff --git a/sys/dev/hdaudio/hdaudio.c b/sys/dev/hdaudio/hdaudio.c
index d39ff2db6cde..5c7874778e22 100644
--- a/sys/dev/hdaudio/hdaudio.c
+++ b/sys/dev/hdaudio/hdaudio.c
@@ -1636,11 +1636,7 @@ hdaudio_modcmd(modcmd_t cmd, void *opaque)
                error = config_cfdriver_detach(&hdaudio_cd);
                if (error)
                        break;
-               error = devsw_detach(NULL, &hdaudio_cdevsw);
-               if (error) {
-                       config_cfdriver_attach(&hdaudio_cd);
-                       break;
-               }
+               devsw_detach(NULL, &hdaudio_cdevsw);
 #endif
                break;
        default:
diff --git a/sys/dev/i2c/i2c.c b/sys/dev/i2c/i2c.c
index 322f6ad3f199..b56b3e235112 100644
--- a/sys/dev/i2c/i2c.c
+++ b/sys/dev/i2c/i2c.c
@@ -904,7 +904,7 @@ iic_modcmd(modcmd_t cmd, void *opaque)
                if (error) {
                        aprint_error("%s: unable to init component\n",
                            iic_cd.cd_name);
-                       (void)devsw_detach(NULL, &iic_cdevsw);
+                       devsw_detach(NULL, &iic_cdevsw);
                }
                mutex_exit(&iic_mtx);
 #endif
@@ -922,10 +922,7 @@ iic_modcmd(modcmd_t cmd, void *opaque)
                        mutex_exit(&iic_mtx);
                        break;
                }
-               error = devsw_detach(NULL, &iic_cdevsw);
-               if (error != 0)
-                       config_init_component(cfdriver_ioconf_iic,
-                           cfattach_ioconf_iic, cfdata_ioconf_iic);
+               devsw_detach(NULL, &iic_cdevsw);
 #endif
                mutex_exit(&iic_mtx);
                break;
diff --git a/sys/dev/pad/pad.c b/sys/dev/pad/pad.c
index fe0b429cf386..a779f1f71b8d 100644
--- a/sys/dev/pad/pad.c
+++ b/sys/dev/pad/pad.c
@@ -777,9 +777,7 @@ pad_modcmd(modcmd_t cmd, void *arg)
 
        case MODULE_CMD_FINI:
 #ifdef _MODULE
-               error = devsw_detach(NULL, &pad_cdevsw);
-               if (error)
-                       break;
+               devsw_detach(NULL, &pad_cdevsw);
 
                error = config_fini_component(cfdriver_ioconf_pad,
                    pad_cfattach, cfdata_ioconf_pad);
diff --git a/sys/dev/raidframe/rf_netbsdkintf.c 
b/sys/dev/raidframe/rf_netbsdkintf.c
index d1bda3553e03..87439aa70bfb 100644
--- a/sys/dev/raidframe/rf_netbsdkintf.c
+++ b/sys/dev/raidframe/rf_netbsdkintf.c
@@ -4088,16 +4088,7 @@ raid_modcmd_fini(void)
                return error;
        }
 #endif
-       error = devsw_detach(&raid_bdevsw, &raid_cdevsw);
-       if (error != 0) {
-               aprint_error("%s: cannot detach devsw\n",__func__);
-#ifdef _MODULE
-               config_cfdriver_attach(&raid_cd);
-#endif
-               config_cfattach_attach(raid_cd.cd_name, &raid_ca);
-               mutex_exit(&raid_lock);
-               return error;
-       }
+       devsw_detach(&raid_bdevsw, &raid_cdevsw);
        rf_BootRaidframe(false);
 #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0)
        rf_destroy_mutex2(rf_sparet_wait_mutex);
diff --git a/sys/dev/scsipi/sd.c b/sys/dev/scsipi/sd.c
index 7bd0d12c070b..24b3b6c1aa9c 100644
--- a/sys/dev/scsipi/sd.c
+++ b/sys/dev/scsipi/sd.c
@@ -167,6 +167,8 @@ const struct bdevsw sd_bdevsw = {
        .d_dump = sddump,
        .d_psize = sdsize,
        .d_discard = nodiscard,
+       .d_cfdriver = &sd_cd,
+       .d_devtounit = disklabel_dev_unit,
        .d_flag = D_DISK | D_MPSAFE
 };
 
@@ -182,6 +184,8 @@ const struct cdevsw sd_cdevsw = {
        .d_mmap = nommap,
        .d_kqfilter = nokqfilter,
        .d_discard = nodiscard,
+       .d_cfdriver = &sd_cd,
+       .d_devtounit = disklabel_dev_unit,
        .d_flag = D_DISK | D_MPSAFE
 };
 
diff --git a/sys/dev/sysmon/sysmon.c b/sys/dev/sysmon/sysmon.c
index 46aedaad7337..fd2993f0c180 100644
--- a/sys/dev/sysmon/sysmon.c
+++ b/sys/dev/sysmon/sysmon.c
@@ -356,7 +356,7 @@ sysmon_fini(void)
        if (error == 0) {
                mutex_enter(&sysmon_minor_mtx);
                sm_is_attached = false;
-               error = devsw_detach(NULL, &sysmon_cdevsw);
+               devsw_detach(NULL, &sysmon_cdevsw);
                mutex_exit(&sysmon_minor_mtx);
        }
 #endif
diff --git a/sys/dev/tprof/tprof.c b/sys/dev/tprof/tprof.c
index b069a5b7df5d..136fd190ad14 100644
--- a/sys/dev/tprof/tprof.c
+++ b/sys/dev/tprof/tprof.c
@@ -768,13 +768,7 @@ tprof_modcmd(modcmd_t cmd, void *arg)
 
        case MODULE_CMD_FINI:
 #if defined(_MODULE)
-               {
-                       int error;
-                       error = devsw_detach(NULL, &tprof_cdevsw);
-                       if (error) {
-                               return error;
-                       }
-               }
+               devsw_detach(NULL, &tprof_cdevsw);
 #endif /* defined(_MODULE) */
                tprof_driver_fini();
                return 0;
diff --git a/sys/dev/usb/uhid.c b/sys/dev/usb/uhid.c
index 81292fd5130e..1e6cd705e1a9 100644
--- a/sys/dev/usb/uhid.c
+++ b/sys/dev/usb/uhid.c
@@ -104,7 +104,6 @@ struct uhid_softc {
        volatile uint32_t sc_state;     /* driver state */
 #define UHID_IMMED     0x02    /* return read data immediately */
 
-       int sc_refcnt;
        int sc_raw;
        u_char sc_open;
        u_char sc_dying;
@@ -134,6 +133,8 @@ const struct cdevsw uhid_cdevsw = {
        .d_mmap = nommap,
        .d_kqfilter = uhidkqfilter,
        .d_discard = nodiscard,
+       .d_cfdriver = &uhid_cd,
+       .d_devtounit = dev_minor_unit,
        .d_flag = D_OTHER
 };
 
@@ -194,7 +195,6 @@ uhid_attach(device_t parent, device_t self, void *aux)
 
        mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_SOFTUSB);
        cv_init(&sc->sc_cv, "uhidrea");
-       cv_init(&sc->sc_detach_cv, "uhiddet");
 
        if (!pmf_device_register(self, NULL, NULL))
                aprint_error_dev(self, "couldn't establish power handler\n");
@@ -233,15 +233,6 @@ uhid_detach(device_t self, int flags)
        /* Interrupt any pending uhidev_write.  */
        uhidev_stop(&sc->sc_hdev);
 
-       /* Wait for I/O operations to complete.  */
-       mutex_enter(&sc->sc_lock);
-       while (sc->sc_refcnt) {
-               DPRINTF(("%s: open=%d refcnt=%d\n", __func__,
-                       sc->sc_open, sc->sc_refcnt));
-               cv_wait(&sc->sc_detach_cv, &sc->sc_lock);
-       }
-       mutex_exit(&sc->sc_lock);
-
        pmf_device_deregister(self);
 
        /* locate the major number */
@@ -251,28 +242,9 @@ uhid_detach(device_t self, int flags)
        mn = device_unit(self);
        vdevgone(maj, mn, mn, VCHR);
 
-       /*
-        * Wait for close to finish.
-        *
-        * XXX I assumed that vdevgone would synchronously call close,
-        * and not return before it has completed, but empirically the
-        * assertion of sc->sc_open == 0 below fires if we don't wait
-        * here.  Someone^TM should carefully examine vdevgone to
-        * ascertain what it guarantees, and audit all other users of
-        * it accordingly.
-        */
-       mutex_enter(&sc->sc_lock);
-       while (sc->sc_open) {
-               DPRINTF(("%s: open=%d\n", __func__, sc->sc_open));
-               cv_wait(&sc->sc_detach_cv, &sc->sc_lock);
-       }
-       mutex_exit(&sc->sc_lock);
-
        KASSERT(sc->sc_open == 0);
-       KASSERT(sc->sc_refcnt == 0);
 
        cv_destroy(&sc->sc_cv);
-       cv_destroy(&sc->sc_detach_cv);
        mutex_destroy(&sc->sc_lock);
        seldestroy(&sc->sc_rsel);
 
@@ -389,7 +361,6 @@ fail1:      selnotify(&sc->sc_rsel, POLLHUP, 0);
 fail0: mutex_enter(&sc->sc_lock);
        KASSERT(sc->sc_open == 1);
        sc->sc_open = 0;
-       cv_broadcast(&sc->sc_detach_cv);
        atomic_store_relaxed(&sc->sc_state, 0);
        mutex_exit(&sc->sc_lock);
        return error;
@@ -432,7 +403,6 @@ uhidclose(dev_t dev, int flag, int mode, struct lwp *l)
        mutex_enter(&sc->sc_lock);
        KASSERT(sc->sc_open == 1);
        sc->sc_open = 0;
-       cv_broadcast(&sc->sc_detach_cv);
        atomic_store_relaxed(&sc->sc_state, 0);
        mutex_exit(&sc->sc_lock);
 
@@ -454,11 +424,8 @@ uhid_enter(dev_t dev, struct uhid_softc **scp)
        KASSERT(sc->sc_open == 2);
        if (sc->sc_dying) {
                error = ENXIO;
-       } else if (sc->sc_refcnt == INT_MAX) {
-               error = EBUSY;
        } else {
                *scp = sc;
-               sc->sc_refcnt++;
                error = 0;
        }
        mutex_exit(&sc->sc_lock);
@@ -472,9 +439,6 @@ uhid_exit(struct uhid_softc *sc)
 
        mutex_enter(&sc->sc_lock);
        KASSERT(sc->sc_open == 2);
-       KASSERT(sc->sc_refcnt > 0);
-       if (--sc->sc_refcnt == 0)
-               cv_broadcast(&sc->sc_detach_cv);
        mutex_exit(&sc->sc_lock);
 }
 
diff --git a/sys/dist/pf/net/pf_ioctl.c b/sys/dist/pf/net/pf_ioctl.c
index 94bfb70a411d..e4c13be698f8 100644
--- a/sys/dist/pf/net/pf_ioctl.c
+++ b/sys/dist/pf/net/pf_ioctl.c
@@ -3459,7 +3459,8 @@ pf_modcmd(modcmd_t cmd, void *opaque)
                } else {
                        pfdetach();
                        pflogdetach();
-                       return devsw_detach(NULL, &pf_cdevsw);
+                       devsw_detach(NULL, &pf_cdevsw);
+                       return 0;
                }
        default:
                return ENOTTY;
diff --git a/sys/external/bsd/ipf/netinet/ip_fil_netbsd.c 
b/sys/external/bsd/ipf/netinet/ip_fil_netbsd.c
index d0c4ca95097c..bb4e0706cc9b 100644
--- a/sys/external/bsd/ipf/netinet/ip_fil_netbsd.c
+++ b/sys/external/bsd/ipf/netinet/ip_fil_netbsd.c
@@ -2256,7 +2256,7 @@ ipl_fini(void *opaque)
 {
 
 #ifdef _MODULE
-       (void)devsw_detach(NULL, &ipl_cdevsw);
+       devsw_detach(NULL, &ipl_cdevsw);
 #endif
 
        /*
diff --git a/sys/fs/autofs/autofs_vfsops.c b/sys/fs/autofs/autofs_vfsops.c
index fbd6eafe6532..1204d1f9b6d3 100644
--- a/sys/fs/autofs/autofs_vfsops.c
+++ b/sys/fs/autofs/autofs_vfsops.c
@@ -496,9 +496,7 @@ autofs_modcmd(modcmd_t cmd, void *arg)
                }
                mutex_exit(&autofs_softc->sc_lock);
 
-               error = devsw_detach(NULL, &autofs_cdevsw);
-               if (error)
-                       break;
+               devsw_detach(NULL, &autofs_cdevsw);
 #endif
                error = vfs_detach(&autofs_vfsops);
                if (error)
diff --git a/sys/kern/kern_drvctl.c b/sys/kern/kern_drvctl.c
index 37f4730b2512..8a4156f8a0aa 100644
--- a/sys/kern/kern_drvctl.c
+++ b/sys/kern/kern_drvctl.c
@@ -665,15 +665,10 @@ drvctl_modcmd(modcmd_t cmd, void *arg)
                devmon_insert_vec = saved_insert_vec;
                saved_insert_vec = NULL;
 #ifdef _MODULE
-               error = devsw_detach(NULL, &drvctl_cdevsw);
-               if (error != 0) {
-                       saved_insert_vec = devmon_insert_vec;
-                       devmon_insert_vec = devmon_insert;
-               }
+               devsw_detach(NULL, &drvctl_cdevsw);
 #endif
                mutex_exit(&drvctl_lock);
-               if (error == 0)
-                       drvctl_fini();
+               drvctl_fini();
 
                break;
        default:
diff --git a/sys/kern/subr_autoconf.c b/sys/kern/subr_autoconf.c
index 54391c6baea6..d98050ba7afb 100644
--- a/sys/kern/subr_autoconf.c
+++ b/sys/kern/subr_autoconf.c
@@ -108,6 +108,7 @@ __KERNEL_RCSID(0, "$NetBSD: subr_autoconf.c,v 1.291 
2021/12/31 14:19:57 riastrad
 #include <sys/cpu.h>
 #include <sys/sysctl.h>
 #include <sys/stdarg.h>
+#include <sys/localcount.h>
 
 #include <sys/disk.h>
 
@@ -1453,6 +1454,9 @@ config_devdelete(device_t dev)
        if (dg->dg_devs != NULL)
                kmem_free(dg->dg_devs, sizeof(device_t) * dg->dg_ndevs);
 
+       localcount_fini(dev->dv_localcount);
+       kmem_free(dev->dv_localcount, sizeof(*dev->dv_localcount));
+
        cv_destroy(&dvl->dvl_cv);
        mutex_destroy(&dvl->dvl_mtx);
 
@@ -1556,6 +1560,7 @@ config_devalloc(const device_t parent, const cfdata_t cf,
        dev->dv_activity_handlers = NULL;
        dev->dv_private = dev_private;
        dev->dv_flags = ca->ca_flags;   /* inherit flags from class */
+       dev->dv_attaching = curlwp;
 
        myunit = config_unit_alloc(dev, cd, cf);
        if (myunit == -1) {
@@ -1604,6 +1609,10 @@ config_devalloc(const device_t parent, const cfdata_t cf,
                    "device-parent", device_xname(parent));
        }
 
+       dev->dv_localcount = kmem_zalloc(sizeof(*dev->dv_localcount),
+           KM_SLEEP);
+       localcount_init(dev->dv_localcount);
+
        if (dev->dv_cfdriver->cd_attrs != NULL)
                config_add_attrib_dict(dev);
 
@@ -1755,8 +1764,29 @@ config_attach_internal(device_t parent, cfdata_t cf, 
void *aux, cfprint_t print,
        /* Let userland know */
        devmon_report_device(dev, true);
 
+       /*
+        * Prevent detach until the driver's attach function, and all
+        * deferred actions, have finished.
+        */
        config_pending_incr(dev);
+
+       /* Call the driver's attach function.  */
        (*dev->dv_cfattach->ca_attach)(parent, dev, aux);
+
+       /*
+        * Allow other threads to acquire references to the device now
+        * that the driver's attach function is done.
+        */
+       mutex_enter(&config_misc_lock);
+       KASSERT(dev->dv_attaching == curlwp);
+       dev->dv_attaching = NULL;
+       cv_broadcast(&config_misc_cv);
+       mutex_exit(&config_misc_lock);
+
+       /*
+        * Synchronous parts of attach are done.  Allow detach, unless
+        * the driver's attach function scheduled deferred actions.
+        */
        config_pending_decr(dev);
 
        mutex_enter(&config_misc_lock);
@@ -1822,8 +1852,29 @@ config_attach_pseudo(cfdata_t cf)
        /* Let userland know */
        devmon_report_device(dev, true);
 
+       /*
+        * Prevent detach until the driver's attach function, and all
+        * deferred actions, have finished.
+        */
        config_pending_incr(dev);
+
+       /* Call the driver's attach function.  */
        (*dev->dv_cfattach->ca_attach)(ROOT, dev, NULL);
+
+       /*
+        * Allow other threads to acquire references to the device now
+        * that the driver's attach function is done.
+        */
+       mutex_enter(&config_misc_lock);
+       KASSERT(dev->dv_attaching == curlwp);
+       dev->dv_attaching = NULL;
+       cv_broadcast(&config_misc_cv);
+       mutex_exit(&config_misc_lock);
+
+       /*
+        * Synchronous parts of attach are done.  Allow detach, unless
+        * the driver's attach function scheduled deferred actions.
+        */
        config_pending_decr(dev);
 
        config_process_deferred(&deferred_config_queue, dev);
@@ -1872,24 +1923,49 @@ config_dump_garbage(struct devicelist *garbage)
 static int
 config_detach_enter(device_t dev)
 {
-       int error;
+       int error = 0;
 
        mutex_enter(&config_misc_lock);
-       for (;;) {
-               if (dev->dv_pending == 0 && dev->dv_detaching == NULL) {
-                       dev->dv_detaching = curlwp;
-                       error = 0;
-                       break;
-               }
+
+       /*
+        * Wait until attach has fully completed, and until any
+        * concurrent detach (e.g., drvctl racing with USB event
+        * thread) has completed.
+        *
+        * Caller must hold alldevs_nread or alldevs_nwrite (e.g., via
+        * deviter) to ensure the winner of the race doesn't free the
+        * device leading the loser of the race into use-after-free.
+        *
+        * XXX Not all callers do this!
+        */
+       while (dev->dv_pending || dev->dv_detaching) {
                KASSERTMSG(dev->dv_detaching != curlwp,
                    "recursively detaching %s", device_xname(dev));
                error = cv_wait_sig(&config_misc_cv, &config_misc_lock);
                if (error)
-                       break;
+                       goto out;
        }
-       KASSERT(error || dev->dv_detaching == curlwp);
-       mutex_exit(&config_misc_lock);
 
+       /*
+        * Attach has completed, and no other concurrent detach is
+        * running.  Claim the device for detaching.  This will cause
+        * all new attempts to acquire references to block.
+        */
+       KASSERT(dev->dv_attaching == NULL);
+       KASSERT(dev->dv_detaching == NULL);
+       dev->dv_detaching = curlwp;
+
+       /*
+        * Wait for all device_lookup_acquire references -- mostly, for
+        * all attempts to open the device -- to drain.  There may
+        * still be open instances of the device after this point, but
+        * all new attempts to acquire references will block until
+        * dv_detaching clears.
+        */
+       localcount_drain(dev->dv_localcount,
+           &config_misc_cv, &config_misc_lock);
+
+out:   mutex_exit(&config_misc_lock);
        return error;
 }
 
@@ -1980,9 +2056,18 @@ config_detach(device_t dev, int flags)
         */
        if (rv == 0)
                dev->dv_flags &= ~DVF_ACTIVE;
-       else if ((flags & DETACH_FORCE) == 0)
+       else if ((flags & DETACH_FORCE) == 0) {
+               /*
+                * Detach failed -- likely EBUSY.  Reset the
+                * localcount.  At this point there can be no
+                * references held, and no new references acquired --
+                * calls to device_lookup_acquire are held up on
+                * dv_detaching until config_detach_exit.
+                */
+               localcount_fini(dev->dv_localcount);
+               localcount_init(dev->dv_localcount);
                goto out;
-       else {
+       } else {
                panic("config_detach: forced detach of %s failed (%d)",
                    device_xname(dev), rv);
        }
@@ -2498,6 +2583,14 @@ config_alldevs_exit(struct alldevs_foray *af)
  * device_lookup:
  *
  *     Look up a device instance for a given driver.
+ *
+ *     Caller is responsible for ensuring the device's state is
+ *     stable, either by holding a reference already obtained with
+ *     device_lookup_acquire or by otherwise ensuring the device is
+ *     attached and can't be detached (e.g., holding an open device
+ *     node and ensuring *_detach calls vdevgone).
+ *
+ *     XXX Find a way to assert this.
  */
 device_t
 device_lookup(cfdriver_t cd, int unit)
@@ -2526,6 +2619,69 @@ device_lookup_private(cfdriver_t cd, int unit)
        return device_private(device_lookup(cd, unit));
 }
 
+/*
+ * device_lookup_acquire:
+ *
+ *     Look up a device instance for a given driver, and return a
+ *     reference to it that must be released by device_release.
+ *
+ *     => If the device is still attaching, blocks until *_attach has
+ *        returned.
+ *
+ *     => If the device is detaching, blocks until *_detach has
+ *        returned.  May succeed or fail in that case, depending on
+ *        whether *_detach has backed out (EBUSY) or committed to
+ *        detaching.
+ */
+device_t
+device_lookup_acquire(cfdriver_t cd, int unit)
+{
+       device_t dv;
+
+       /* XXX This should have a pserialized fast path -- TBD.  */
+       mutex_enter(&config_misc_lock);
+       mutex_enter(&alldevs_lock);
+retry: if (unit < 0 || unit >= cd->cd_ndevs ||
+           (dv = cd->cd_devs[unit]) == NULL ||
+           dv->dv_del_gen != 0) {
+               dv = NULL;
+       } else {
+               /*
+                * Wait for the device to stabilize, if attaching or
+                * detaching.  Either way we must wait for *_attach or
+                * *_detach to complete, and either way we must retry:
+                * even if detaching, *_detach might fail (EBUSY) so
+                * the device may still be there.
+                */
+               if ((dv->dv_attaching != NULL && dv->dv_attaching != curlwp) ||
+                   dv->dv_detaching != NULL) {
+                       mutex_exit(&alldevs_lock);
+                       cv_wait(&config_misc_cv, &config_misc_lock);
+                       mutex_enter(&alldevs_lock);
+                       goto retry;
+               }
+               localcount_acquire(dv->dv_localcount);
+       }
+       mutex_exit(&alldevs_lock);
+       mutex_exit(&config_misc_lock);
+
+       return dv;
+}
+
+/*
+ * device_release:
+ *
+ *     Release a reference to a device acquired with
+ *     device_lookup_acquire.
+ */
+void
+device_release(device_t dv)
+{
+
+       localcount_release(dv->dv_localcount,
+           &config_misc_cv, &config_misc_lock);
+}
+
 /*
  * device_find_by_xname:
  *
diff --git a/sys/kern/subr_devsw.c b/sys/kern/subr_devsw.c
index 1a0f721fdd65..598436e19d18 100644
--- a/sys/kern/subr_devsw.c
+++ b/sys/kern/subr_devsw.c
@@ -85,6 +85,11 @@ __KERNEL_RCSID(0, "$NetBSD: subr_devsw.c,v 1.38 2017/11/07 
18:35:57 christos Exp
 #include <sys/buf.h>
 #include <sys/reboot.h>
 #include <sys/sdt.h>
+#include <sys/atomic.h>
+#include <sys/localcount.h>
+#include <sys/pserialize.h>
+#include <sys/xcall.h>
+#include <sys/device.h>
 
 #ifdef DEVSW_DEBUG
 #define        DPRINTF(x)      printf x
@@ -97,12 +102,21 @@ __KERNEL_RCSID(0, "$NetBSD: subr_devsw.c,v 1.38 2017/11/07 
18:35:57 christos Exp
 #define        CDEVSW_SIZE     (sizeof(struct cdevsw *))
 #define        DEVSWCONV_SIZE  (sizeof(struct devsw_conv))
 
+struct devswref {
+       struct localcount       dr_lc;
+};
+
+/* XXX bdevsw, cdevsw, max_bdevsws, and max_cdevsws should be volatile */
 extern const struct bdevsw **bdevsw, *bdevsw0[];
 extern const struct cdevsw **cdevsw, *cdevsw0[];
 extern struct devsw_conv *devsw_conv, devsw_conv0[];
 extern const int sys_bdevsws, sys_cdevsws;
 extern int max_bdevsws, max_cdevsws, max_devsw_convs;
 
+static struct devswref *cdevswref;
+static struct devswref *bdevswref;
+static kcondvar_t devsw_cv;
+
 static int bdevsw_attach(const struct bdevsw *, devmajor_t *);
 static int cdevsw_attach(const struct cdevsw *, devmajor_t *);
 static void devsw_detach_locked(const struct bdevsw *, const struct cdevsw *);
@@ -118,6 +132,8 @@ devsw_init(void)
        KASSERT(sys_bdevsws < MAXDEVSW - 1);
        KASSERT(sys_cdevsws < MAXDEVSW - 1);
        mutex_init(&device_lock, MUTEX_DEFAULT, IPL_NONE);
+
+       cv_init(&devsw_cv, "devsw");
 }
 
 int
@@ -160,13 +176,17 @@ devsw_attach(const char *devname,
                }
 
                if (bdev != NULL)
-                       bdevsw[*bmajor] = bdev;
-               cdevsw[*cmajor] = cdev;
+                       atomic_store_release(&bdevsw[*bmajor], bdev);
+               atomic_store_release(&cdevsw[*cmajor], cdev);
 
                mutex_exit(&device_lock);
                return (0);
        }
 
+       /*
+        * XXX This should allocate what it needs up front so we never
+        * need to flail around trying to unwind.
+        */
        error = bdevsw_attach(bdev, bmajor);
        if (error != 0) 
                goto fail;
@@ -224,7 +244,8 @@ devsw_attach(const char *devname,
 static int
 bdevsw_attach(const struct bdevsw *devsw, devmajor_t *devmajor)
 {
-       const struct bdevsw **newptr;
+       const struct bdevsw **newbdevsw = NULL;
+       struct devswref *newbdevswref = NULL;
        devmajor_t bmajor;
        int i;
 
@@ -255,18 +276,34 @@ bdevsw_attach(const struct bdevsw *devsw, devmajor_t 
*devmajor)
 
        if (*devmajor >= max_bdevsws) {
                KASSERT(bdevsw == bdevsw0);
-               newptr = kmem_zalloc(MAXDEVSW * BDEVSW_SIZE, KM_NOSLEEP);
-               if (newptr == NULL)
+               newbdevsw = kmem_zalloc(MAXDEVSW * sizeof(newbdevsw[0]),
+                   KM_NOSLEEP);
+               newbdevswref = kmem_zalloc(((MAXDEVSW - sys_bdevsws) *
+                       sizeof(newbdevsw[0])),
+                   KM_NOSLEEP);
+               if (newbdevsw == NULL || newbdevswref == NULL) {
+                       if (newbdevsw) {
+                               kmem_free(newbdevsw,
+                                   MAXDEVSW * sizeof(newbdevsw[0]));
+                       }
+                       if (newbdevswref) {
+                               kmem_free(newbdevswref,
+                                   ((MAXDEVSW - sys_bdevsws) *
+                                       sizeof(newbdevswref[0])));
+                       }
                        return (ENOMEM);
-               memcpy(newptr, bdevsw, max_bdevsws * BDEVSW_SIZE);
-               bdevsw = newptr;
-               max_bdevsws = MAXDEVSW;
+               }
+               memcpy(newbdevsw, bdevsw, max_bdevsws * BDEVSW_SIZE);
+               atomic_store_release(&bdevsw, newbdevsw);
+               atomic_store_release(&bdevswref, newbdevswref);
+               atomic_store_release(&max_bdevsws, MAXDEVSW);
        }
 
        if (bdevsw[*devmajor] != NULL)
                return (EEXIST);
 
-       bdevsw[*devmajor] = devsw;
+       atomic_store_release(&bdevsw[*devmajor], devsw);
+       localcount_init(&bdevswref[*devmajor - sys_bdevsws].dr_lc);
 
        return (0);
 }
@@ -274,7 +311,8 @@ bdevsw_attach(const struct bdevsw *devsw, devmajor_t 
*devmajor)
 static int
 cdevsw_attach(const struct cdevsw *devsw, devmajor_t *devmajor)
 {
-       const struct cdevsw **newptr;
+       const struct cdevsw **newcdevsw = NULL;
+       struct devswref *newcdevswref = NULL;
        devmajor_t cmajor;
        int i;
 
@@ -302,18 +340,34 @@ cdevsw_attach(const struct cdevsw *devsw, devmajor_t 
*devmajor)
 
        if (*devmajor >= max_cdevsws) {
                KASSERT(cdevsw == cdevsw0);
-               newptr = kmem_zalloc(MAXDEVSW * CDEVSW_SIZE, KM_NOSLEEP);
-               if (newptr == NULL)
+               newcdevsw = kmem_zalloc(MAXDEVSW * sizeof(newcdevsw[0]),
+                   KM_NOSLEEP);
+               newcdevswref = kmem_zalloc(((MAXDEVSW - sys_cdevsws) *
+                       sizeof(newcdevswref[0])),
+                   KM_NOSLEEP);
+               if (newcdevsw == NULL || newcdevswref == NULL) {
+                       if (newcdevsw) {
+                               kmem_free(newcdevsw,
+                                   MAXDEVSW * sizeof(newcdevsw[0]));
+                       }
+                       if (newcdevswref) {
+                               kmem_free(newcdevswref,
+                                   ((MAXDEVSW - sys_cdevsws) *
+                                       sizeof(newcdevswref[0])));
+                       }
                        return (ENOMEM);
-               memcpy(newptr, cdevsw, max_cdevsws * CDEVSW_SIZE);
-               cdevsw = newptr;
-               max_cdevsws = MAXDEVSW;
+               }
+               memcpy(newcdevsw, cdevsw, max_cdevsws * sizeof(cdevsw[0]));
+               atomic_store_release(&cdevsw, newcdevsw);
+               atomic_store_release(&cdevswref, newcdevswref);
+               atomic_store_release(&max_cdevsws, MAXDEVSW);
        }
 
        if (cdevsw[*devmajor] != NULL)
                return (EEXIST);
 
-       cdevsw[*devmajor] = devsw;
+       atomic_store_release(&cdevsw[*devmajor], devsw);
+       localcount_init(&cdevswref[*devmajor - sys_cdevsws].dr_lc);
 
        return (0);
 }
@@ -321,36 +375,69 @@ cdevsw_attach(const struct cdevsw *devsw, devmajor_t 
*devmajor)
 static void
 devsw_detach_locked(const struct bdevsw *bdev, const struct cdevsw *cdev)
 {
-       int i;
+       int bi, ci;
 
        KASSERT(mutex_owned(&device_lock));
 
+       /* Prevent new references.  */
        if (bdev != NULL) {
-               for (i = 0 ; i < max_bdevsws ; i++) {
-                       if (bdevsw[i] != bdev)
+               for (bi = 0; bi < max_bdevsws; bi++) {
+                       if (bdevsw[bi] != bdev)
                                continue;
-                       bdevsw[i] = NULL;
+                       atomic_store_relaxed(&bdevsw[bi], NULL);
                        break;
                }
+               KASSERT(bi < max_bdevsws);
        }
        if (cdev != NULL) {
-               for (i = 0 ; i < max_cdevsws ; i++) {
-                       if (cdevsw[i] != cdev)
+               for (ci = 0; ci < max_cdevsws; ci++) {
+                       if (cdevsw[ci] != cdev)
                                continue;
-                       cdevsw[i] = NULL;
+                       atomic_store_relaxed(&cdevsw[ci], NULL);
                        break;
                }
+               KASSERT(ci < max_cdevsws);
+       }
+
+       if (bdev == NULL && cdev == NULL) /* XXX possible? */
+               return;
+
+       /*
+        * Wait for all bdevsw_lookup_acquire, cdevsw_lookup_acquire
+        * calls to notice that the devsw is gone.
+        *
+        * XXX Can't use pserialize_perform here because devsw_init is
+        * too early for pserialize_create().
+        */
+       xc_barrier(0);
+
+       /*
+        * Wait for all references to drain.  It is the caller's
+        * responsibility to ensure that at this point, there are no
+        * extant open instances and all new d_open calls will fail.
+        *
+        * Note that localcount_drain may release and reacquire
+        * device_lock.
+        */
+       if (bdev != NULL) {
+               localcount_drain(&bdevswref[bi - sys_bdevsws].dr_lc,
+                   &devsw_cv, &device_lock);
+               localcount_fini(&bdevswref[bi - sys_bdevsws].dr_lc);
+       }
+       if (cdev != NULL) {
+               localcount_drain(&cdevswref[ci - sys_cdevsws].dr_lc,
+                   &devsw_cv, &device_lock);
+               localcount_fini(&cdevswref[ci - sys_cdevsws].dr_lc);
        }
 }
 
-int
+void
 devsw_detach(const struct bdevsw *bdev, const struct cdevsw *cdev)
 {
 
        mutex_enter(&device_lock);
        devsw_detach_locked(bdev, cdev);
        mutex_exit(&device_lock);
-       return 0;
 }
 
 /*
@@ -366,10 +453,63 @@ bdevsw_lookup(dev_t dev)
        if (dev == NODEV)
                return (NULL);
        bmajor = major(dev);
-       if (bmajor < 0 || bmajor >= max_bdevsws)
+       if (bmajor < 0 || bmajor >= atomic_load_relaxed(&max_bdevsws))
                return (NULL);
 
-       return (bdevsw[bmajor]);
+       return atomic_load_consume(&bdevsw)[bmajor];
+}
+
+static const struct bdevsw *
+bdevsw_lookup_acquire(dev_t dev, struct localcount **lcp)
+{
+       devmajor_t bmajor;
+       const struct bdevsw *bdev = NULL, *const *curbdevsw;
+       struct devswref *curbdevswref;
+       int s;
+
+       if (dev == NODEV)
+               return NULL;
+       bmajor = major(dev);
+       if (bmajor < 0)
+               return NULL;
+
+       /*
+        * If it's statically linked into the kernel, no need for
+        * localcount reference -- it will never go away until the
+        * machine reboots.
+        */
+       if (bmajor < sys_bdevsws) {
+               *lcp = NULL;
+               return bdevsw0[bmajor];
+       }
+
+       s = pserialize_read_enter();
+
+       /*
+        * max_bdevsws never goes down, so it is safe to rely on this
+        * condition without any locking for the array access below.
+        */
+       if (bmajor >= atomic_load_acquire(&max_bdevsws))
+               goto out;
+       curbdevsw = atomic_load_consume(&bdevsw);
+       if ((bdev = atomic_load_consume(&curbdevsw[bmajor])) == NULL)
+               goto out;
+
+       curbdevswref = atomic_load_consume(&bdevswref);
+       *lcp = &curbdevswref[bmajor - sys_bdevsws].dr_lc;
+       localcount_acquire(*lcp);
+
+out:   pserialize_read_exit(s);
+       return bdev;
+}
+
+static void
+bdevsw_release(const struct bdevsw *bdev, struct localcount *lc)
+{
+
+       if (lc == NULL)
+               return;
+       localcount_release(lc, &devsw_cv, &device_lock);
 }
 
 /*
@@ -385,10 +525,63 @@ cdevsw_lookup(dev_t dev)
        if (dev == NODEV)
                return (NULL);
        cmajor = major(dev);
-       if (cmajor < 0 || cmajor >= max_cdevsws)
+       if (cmajor < 0 || cmajor >= atomic_load_relaxed(&max_cdevsws))
                return (NULL);
 
-       return (cdevsw[cmajor]);
+       return atomic_load_consume(&cdevsw)[cmajor];
+}
+
+static const struct cdevsw *
+cdevsw_lookup_acquire(dev_t dev, struct localcount **lcp)
+{
+       devmajor_t cmajor;
+       const struct cdevsw *cdev = NULL, *const *curcdevsw;
+       struct devswref *curcdevswref;
+       int s;
+
+       if (dev == NODEV)
+               return NULL;
+       cmajor = major(dev);
+       if (cmajor < 0)
+               return NULL;
+
+       /*
+        * If it's statically linked into the kernel, no need for
+        * localcount reference -- it will never go away until the
+        * machine reboots.
+        */
+       if (cmajor < sys_cdevsws) {
+               *lcp = NULL;
+               return cdevsw0[cmajor];
+       }
+
+       s = pserialize_read_enter();
+
+       /*
+        * max_cdevsws never goes down, so it is safe to rely on this
+        * condition without any locking for the array access below.
+        */
+       if (cmajor >= atomic_load_acquire(&max_cdevsws))
+               goto out;
+       curcdevsw = atomic_load_consume(&cdevsw);
+       if ((cdev = atomic_load_consume(&curcdevsw[cmajor])) == NULL)
+               goto out;
+
+       curcdevswref = atomic_load_consume(&cdevswref);
+       *lcp = &curcdevswref[cmajor - sys_cdevsws].dr_lc;
+       localcount_acquire(*lcp);
+
+out:   pserialize_read_exit(s);
+       return cdev;
+}
+
+static void
+cdevsw_release(const struct cdevsw *cdev, struct localcount *lc)
+{
+
+       if (lc == NULL)
+               return;
+       localcount_release(lc, &devsw_cv, &device_lock);
 }
 
 /*
@@ -400,10 +593,13 @@ cdevsw_lookup(dev_t dev)
 devmajor_t
 bdevsw_lookup_major(const struct bdevsw *bdev)
 {
-       devmajor_t bmajor;
+       const struct bdevsw *const *curbdevsw;
+       devmajor_t bmajor, bmax;
 
-       for (bmajor = 0 ; bmajor < max_bdevsws ; bmajor++) {
-               if (bdevsw[bmajor] == bdev)
+       bmax = atomic_load_acquire(&max_bdevsws);
+       curbdevsw = atomic_load_consume(&bdevsw);
+       for (bmajor = 0; bmajor < bmax; bmajor++) {
+               if (atomic_load_relaxed(&curbdevsw[bmajor]) == bdev)
                        return (bmajor);
        }
 
@@ -419,10 +615,13 @@ bdevsw_lookup_major(const struct bdevsw *bdev)
 devmajor_t
 cdevsw_lookup_major(const struct cdevsw *cdev)
 {
-       devmajor_t cmajor;
+       const struct cdevsw *const *curcdevsw;
+       devmajor_t cmajor, cmax;
 
-       for (cmajor = 0 ; cmajor < max_cdevsws ; cmajor++) {
-               if (cdevsw[cmajor] == cdev)
+       cmax = atomic_load_acquire(&max_cdevsws);
+       curcdevsw = atomic_load_consume(&cdevsw);
+       for (cmajor = 0; cmajor < cmax; cmajor++) {
+               if (atomic_load_relaxed(&curcdevsw[cmajor]) == cdev)
                        return (cmajor);
        }
 
@@ -697,22 +896,41 @@ int
 bdev_open(dev_t dev, int flag, int devtype, lwp_t *l)
 {
        const struct bdevsw *d;
-       int rv, mpflag;
+       struct localcount *lc;
+       device_t dv = NULL/*XXXGCC*/;
+       int unit, rv, mpflag;
 
-       /*
-        * For open we need to lock, in order to synchronize
-        * with attach/detach.
-        */
-       mutex_enter(&device_lock);
-       d = bdevsw_lookup(dev);
-       mutex_exit(&device_lock);
+       d = bdevsw_lookup_acquire(dev, &lc);
        if (d == NULL)
                return ENXIO;
 
+       if (d->d_devtounit) {
+               /*
+                * If the device node corresponds to an autoconf device
+                * instance, acquire a reference to it so that during
+                * d_open, device_lookup is stable.
+                *
+                * XXX This should also arrange to instantiate cloning
+                * pseudo-devices if appropriate, but that requires
+                * reviewing them all to find and verify a common
+                * pattern.
+                */
+               if ((unit = (*d->d_devtounit)(dev)) == -1)
+                       return ENXIO;
+               if ((dv = device_lookup_acquire(d->d_cfdriver, unit)) == NULL)
+                       return ENXIO;
+       }
+
        DEV_LOCK(d);
        rv = (*d->d_open)(dev, flag, devtype, l);
        DEV_UNLOCK(d);
 
+       if (d->d_devtounit) {
+               device_release(dv);
+       }
+
+       bdevsw_release(d, lc);
+
        return rv;
 }
 
@@ -855,22 +1073,41 @@ int
 cdev_open(dev_t dev, int flag, int devtype, lwp_t *l)
 {
        const struct cdevsw *d;
-       int rv, mpflag;
+       struct localcount *lc;
+       device_t dv = NULL/*XXXGCC*/;
+       int unit, rv, mpflag;
 
-       /*
-        * For open we need to lock, in order to synchronize
-        * with attach/detach.
-        */
-       mutex_enter(&device_lock);
-       d = cdevsw_lookup(dev);
-       mutex_exit(&device_lock);
+       d = cdevsw_lookup_acquire(dev, &lc);
        if (d == NULL)
                return ENXIO;
 
+       if (d->d_devtounit) {
+               /*
+                * If the device node corresponds to an autoconf device
+                * instance, acquire a reference to it so that during
+                * d_open, device_lookup is stable.
+                *
+                * XXX This should also arrange to instantiate cloning
+                * pseudo-devices if appropriate, but that requires
+                * reviewing them all to find and verify a common
+                * pattern.
+                */
+               if ((unit = (*d->d_devtounit)(dev)) == -1)
+                       return ENXIO;
+               if ((dv = device_lookup_acquire(d->d_cfdriver, unit)) == NULL)
+                       return ENXIO;
+       }
+
        DEV_LOCK(d);
        rv = (*d->d_open)(dev, flag, devtype, l);
        DEV_UNLOCK(d);
 
+       if (d->d_devtounit) {
+               device_release(dv);
+       }
+
+       cdevsw_release(d, lc);
+
        return rv;
 }
 
@@ -1063,3 +1300,18 @@ nommap(dev_t dev, off_t off, int prot)
 
        return (paddr_t)-1;
 }
+
+/*
+ * dev_minor_unit(dev)
+ *
+ *     Returns minor(dev) as an int.  Intended for use with struct
+ *     bdevsw, cdevsw::d_devtounit for drivers whose /dev nodes are
+ *     implemented by reference to an autoconf instance with the minor
+ *     number.
+ */
+int
+dev_minor_unit(dev_t dev)
+{
+
+       return minor(dev);
+}
diff --git a/sys/kern/subr_disk.c b/sys/kern/subr_disk.c
index da664f920382..41218421db57 100644
--- a/sys/kern/subr_disk.c
+++ b/sys/kern/subr_disk.c
@@ -728,3 +728,10 @@ disk_set_info(device_t dev, struct disk *dk, const char 
*type)
        if (odisk_info)
                prop_object_release(odisk_info);
 }
+
+int
+disklabel_dev_unit(dev_t dev)
+{
+
+       return DISKUNIT(dev);
+}
diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c
index b4bc4c34ab03..b8fef2c7014f 100644
--- a/sys/miscfs/specfs/spec_vnops.c
+++ b/sys/miscfs/specfs/spec_vnops.c
@@ -81,6 +81,7 @@ __KERNEL_RCSID(0, "$NetBSD: spec_vnops.c,v 1.183 2021/07/18 
23:57:14 dholland Ex
 #include <sys/kauth.h>
 #include <sys/fstrans.h>
 #include <sys/module.h>
+#include <sys/atomic.h>
 
 #include <miscfs/genfs/genfs.h>
 #include <miscfs/specfs/specdev.h>
@@ -103,6 +104,7 @@ const char  devcls[] = "devcls";
 
 static vnode_t *specfs_hash[SPECHSZ];
 extern struct mount *dead_rootmount;
+static struct kcondvar specfs_iocv;
 
 /*
  * This vnode operations vector is used for special device nodes
@@ -210,6 +212,100 @@ spec_init(void)
 
        rawio_listener = kauth_listen_scope(KAUTH_SCOPE_DEVICE,
            rawio_listener_cb, NULL);
+       cv_init(&specfs_iocv, "specio");
+}
+
+/*
+ * spec_io_enter(vp, &sn, &dev)
+ *
+ *     Enter an operation that may not hold vp's vnode lock or an
+ *     fstrans on vp's mount.  Until spec_io_exit, the vnode will not
+ *     be revoked.
+ *
+ *     On success, set sn to the specnode pointer and dev to the dev_t
+ *     number and return zero.  Caller must later call spec_io_exit
+ *     when done.
+ *
+ *     On failure, return ENXIO -- the device has been revoked and no
+ *     longer exists.
+ */
+static int
+spec_io_enter(struct vnode *vp, struct specnode **snp, dev_t *devp)
+{
+       dev_t dev;
+       struct specnode *sn;
+       unsigned iocnt;
+       int error = 0;
+
+       mutex_enter(vp->v_interlock);
+
+       /*
+        * Extract all the info we need from the vnode, unless the
+        * vnode has already been reclaimed.  This can happen if the
+        * underlying device has been removed and all the device nodes
+        * for it have been revoked.  The caller may not hold a vnode
+        * lock or fstrans to prevent this from happening before it has
+        * had an opportunity to notice the vnode is dead.
+        */
+       if (vdead_check(vp, VDEAD_NOWAIT) != 0 ||
+           (sn = vp->v_specnode) == NULL ||
+           (dev = vp->v_rdev) == NODEV) {
+               error = ENXIO;
+               goto out;
+       }
+
+       /*
+        * Notify spec_node_revoke that we are doing an I/O operation
+        * which may not be not bracketed by fstrans(9) and thus is not
+        * blocked by vfs suspension.
+        *
+        * We could hold this reference with psref(9) instead, but we
+        * already have to take the interlock for vdead_check, so
+        * there's not much more cost here to another atomic operation.
+        */
+       iocnt = atomic_inc_uint_nv(&sn->sn_iocnt);
+       CTASSERT(MAXLWP < UINT_MAX);
+       KASSERT(iocnt < UINT_MAX);
+
+       /* Success!  */
+       *snp = sn;
+       *devp = dev;
+       error = 0;
+
+out:   mutex_exit(vp->v_interlock);
+       return error;
+}
+
+/*
+ * spec_io_exit(vp, sn)
+ *
+ *     Exit an operation entered with a successful spec_io_enter --
+ *     allow concurrent spec_node_revoke to proceed.  The argument sn
+ *     must match the struct specnode pointer returned by spec_io_exit
+ *     for vp.
+ */
+static void
+spec_io_exit(struct vnode *vp, struct specnode *sn)
+{
+       unsigned iocnt;
+
+       KASSERT(vp->v_specnode == sn);
+
+       /*
+        * We are done.  Notify spec_node_revoke if appropriate.  The
+        * transition of 1 -> 0 must happen under device_lock so
+        * spec_node_revoke doesn't miss a wakeup.
+        */
+       do {
+               iocnt = atomic_load_relaxed(&sn->sn_iocnt);
+               if (iocnt == 1) {
+                       mutex_enter(&device_lock);
+                       if (atomic_dec_uint_nv(&sn->sn_iocnt) == 0)
+                               cv_broadcast(&specfs_iocv);
+                       mutex_exit(&device_lock);
+                       break;
+               }
+       } while (atomic_cas_uint(&sn->sn_iocnt, iocnt, iocnt - 1) != iocnt);
 }
 
 /*
@@ -261,6 +357,7 @@ spec_node_init(vnode_t *vp, dev_t rdev)
        sn->sn_opencnt = 0;
        sn->sn_rdev = rdev;
        sn->sn_gone = false;
+       sn->sn_iocnt = 0;
        vp->v_specnode = sn;
        vp->v_specnext = *vpp;
        *vpp = vp;
@@ -406,7 +503,22 @@ spec_node_revoke(vnode_t *vp)
        if (sn->sn_opencnt != 0) {
                sd->sd_opencnt -= (sn->sn_opencnt - 1);
                sn->sn_opencnt = 1;
+
+               /* Prevent new opens.  */
                sn->sn_gone = true;
+
+               /*
+                * Wait for all I/O operations in spec_io_enter/exit to
+                * complete.  The caller holds the file system
+                * suspended, but file system suspension doesn't block
+                * all device special operations -- only those that run
+                * to completion holding the vnode lock and a handful
+                * of others.  It is the driver's responsibility to
+                * ensure that new opens fail gracefully at this point.
+                */
+               while (atomic_load_relaxed(&sn->sn_iocnt) != 0)
+                       cv_wait(&specfs_iocv, &device_lock);
+
                mutex_exit(&device_lock);
 
                VOP_CLOSE(vp, FNONBLOCK, NOCRED);
@@ -435,6 +547,7 @@ spec_node_destroy(vnode_t *vp)
        KASSERT(vp->v_type == VBLK || vp->v_type == VCHR);
        KASSERT(vp->v_specnode != NULL);
        KASSERT(sn->sn_opencnt == 0);
+       KASSERT(sn->sn_iocnt == 0);
 
        mutex_enter(&device_lock);
        /* Remove from the hash and destroy the node. */
@@ -690,6 +803,7 @@ spec_read(void *v)
        struct vnode *vp = ap->a_vp;
        struct uio *uio = ap->a_uio;
        struct lwp *l = curlwp;
+       dev_t dev;
        struct buf *bp;
        daddr_t bn;
        int bsize, bscale;
@@ -701,28 +815,32 @@ spec_read(void *v)
        int *rasizes;
        int nrablks, ratogo;
 
+       KASSERT(VOP_ISLOCKED(vp) || (vp->v_vflag & VV_LOCKSWORK) == 0);
        KASSERT(uio->uio_rw == UIO_READ);
        KASSERTMSG(VMSPACE_IS_KERNEL_P(uio->uio_vmspace) ||
                   uio->uio_vmspace == curproc->p_vmspace,
                "vmspace belongs to neither kernel nor curproc");
 
        if (uio->uio_resid == 0)
-               return (0);
+               return 0;
 
-       switch (vp->v_type) {
+       dev = vp->v_rdev;
 
+       switch (vp->v_type) {
        case VCHR:
+               fstrans_start(vp->v_mount);
                VOP_UNLOCK(vp);
-               error = cdev_read(vp->v_rdev, uio, ap->a_ioflag);
+               error = cdev_read(dev, uio, ap->a_ioflag);
                vn_lock(vp, LK_SHARED | LK_RETRY);
-               return (error);
+               fstrans_done(vp->v_mount);
+               return error;
 
        case VBLK:
                KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
                if (uio->uio_offset < 0)
-                       return (EINVAL);
+                       return EINVAL;
 
-               if (bdev_ioctl(vp->v_rdev, DIOCGPARTINFO, &pi, FREAD, l) == 0)
+               if (bdev_ioctl(dev, DIOCGPARTINFO, &pi, FREAD, l) == 0)
                        bsize = imin(imax(pi.pi_bsize, DEV_BSIZE), MAXBSIZE);
                else
                        bsize = BLKDEV_IOSIZE;
@@ -767,7 +885,7 @@ spec_read(void *v)
                kmem_free(rablks, nra * sizeof(*rablks));
                kmem_free(rasizes, nra * sizeof(*rasizes));
 
-               return (error);
+               return error;
 
        default:
                panic("spec_read type");
@@ -790,6 +908,7 @@ spec_write(void *v)
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct uio *uio = ap->a_uio;
+       dev_t dev;
        struct lwp *l = curlwp;
        struct buf *bp;
        daddr_t bn;
@@ -798,27 +917,32 @@ spec_write(void *v)
        int n, on;
        int error = 0;
 
+       KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE ||
+           (vp->v_vflag & VV_LOCKSWORK) == 0);
        KASSERT(uio->uio_rw == UIO_WRITE);
        KASSERTMSG(VMSPACE_IS_KERNEL_P(uio->uio_vmspace) ||
                   uio->uio_vmspace == curproc->p_vmspace,
                "vmspace belongs to neither kernel nor curproc");
 
-       switch (vp->v_type) {
+       dev = vp->v_rdev;
 
+       switch (vp->v_type) {
        case VCHR:
+               fstrans_start(vp->v_mount);
                VOP_UNLOCK(vp);
-               error = cdev_write(vp->v_rdev, uio, ap->a_ioflag);
+               error = cdev_write(dev, uio, ap->a_ioflag);
                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
-               return (error);
+               fstrans_done(vp->v_mount);
+               return error;
 
        case VBLK:
                KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
                if (uio->uio_resid == 0)
-                       return (0);
+                       return 0;
                if (uio->uio_offset < 0)
-                       return (EINVAL);
+                       return EINVAL;
 
-               if (bdev_ioctl(vp->v_rdev, DIOCGPARTINFO, &pi, FREAD, l) == 0)
+               if (bdev_ioctl(dev, DIOCGPARTINFO, &pi, FREAD, l) == 0)
                        bsize = imin(imax(pi.pi_bsize, DEV_BSIZE), MAXBSIZE);
                else
                        bsize = BLKDEV_IOSIZE;
@@ -866,31 +990,23 @@ spec_fdiscard(void *v)
                off_t a_pos;
                off_t a_len;
        } */ *ap = v;
-       struct vnode *vp;
+       struct vnode *vp = ap->a_vp;
        dev_t dev;
 
-       vp = ap->a_vp;
-       dev = NODEV;
-
-       mutex_enter(vp->v_interlock);
-       if (vdead_check(vp, VDEAD_NOWAIT) == 0 && vp->v_specnode != NULL) {
-               dev = vp->v_rdev;
-       }
-       mutex_exit(vp->v_interlock);
+       KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE ||
+           (vp->v_vflag & VV_LOCKSWORK) == 0);
 
-       if (dev == NODEV) {
-               return ENXIO;
-       }
+       dev = vp->v_rdev;
 
        switch (vp->v_type) {
-           case VCHR:
+       case VCHR:
                // this is not stored for character devices
                //KASSERT(vp == vp->v_specnode->sn_dev->sd_cdevvp);
                return cdev_discard(dev, ap->a_pos, ap->a_len);
-           case VBLK:
+       case VBLK:
                KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
                return bdev_discard(dev, ap->a_pos, ap->a_len);
-           default:
+       default:
                panic("spec_fdiscard: not a device\n");
        }
 }
@@ -909,40 +1025,32 @@ spec_ioctl(void *v)
                int  a_fflag;
                kauth_cred_t a_cred;
        } */ *ap = v;
-       struct vnode *vp;
+       struct vnode *vp = ap->a_vp;
+       struct specnode *sn;
        dev_t dev;
+       int error;
 
-       /*
-        * Extract all the info we need from the vnode, taking care to
-        * avoid a race with VOP_REVOKE().
-        */
-
-       vp = ap->a_vp;
-       dev = NODEV;
-       mutex_enter(vp->v_interlock);
-       if (vdead_check(vp, VDEAD_NOWAIT) == 0 && vp->v_specnode) {
-               dev = vp->v_rdev;
-       }
-       mutex_exit(vp->v_interlock);
-       if (dev == NODEV) {
-               return ENXIO;
-       }
+       error = spec_io_enter(vp, &sn, &dev);
+       if (error)
+               return error;
 
        switch (vp->v_type) {
-
        case VCHR:
-               return cdev_ioctl(dev, ap->a_command, ap->a_data,
+               error = cdev_ioctl(dev, ap->a_command, ap->a_data,
                    ap->a_fflag, curlwp);
-
+               break;
        case VBLK:
                KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
-               return bdev_ioctl(dev, ap->a_command, ap->a_data,
+               error = bdev_ioctl(dev, ap->a_command, ap->a_data,
                   ap->a_fflag, curlwp);
-
+               break;
        default:
                panic("spec_ioctl");
                /* NOTREACHED */
        }
+
+       spec_io_exit(vp, sn);
+       return error;
 }
 
 /* ARGSUSED */
@@ -953,33 +1061,25 @@ spec_poll(void *v)
                struct vnode *a_vp;
                int a_events;
        } */ *ap = v;
-       struct vnode *vp;
+       struct vnode *vp = ap->a_vp;
+       struct specnode *sn;
        dev_t dev;
+       int revents;
 
-       /*
-        * Extract all the info we need from the vnode, taking care to
-        * avoid a race with VOP_REVOKE().
-        */
-
-       vp = ap->a_vp;
-       dev = NODEV;
-       mutex_enter(vp->v_interlock);
-       if (vdead_check(vp, VDEAD_NOWAIT) == 0 && vp->v_specnode) {
-               dev = vp->v_rdev;
-       }
-       mutex_exit(vp->v_interlock);
-       if (dev == NODEV) {
+       if (spec_io_enter(vp, &sn, &dev) != 0)
                return POLLERR;
-       }
 
        switch (vp->v_type) {
-
        case VCHR:
-               return cdev_poll(dev, ap->a_events, curlwp);
-
+               revents = cdev_poll(dev, ap->a_events, curlwp);
+               break;
        default:
-               return (genfs_poll(v));
+               revents = genfs_poll(v);
+               break;
        }
+
+       spec_io_exit(vp, sn);
+       return revents;
 }
 
 /* ARGSUSED */
@@ -990,20 +1090,30 @@ spec_kqfilter(void *v)
                struct vnode    *a_vp;
                struct proc     *a_kn;
        } */ *ap = v;
+       struct vnode *vp = ap->a_vp;
+       struct specnode *sn;
        dev_t dev;
+       int error;
 
-       switch (ap->a_vp->v_type) {
+       error = spec_io_enter(vp, &sn, &dev);
+       if (error)
+               return error;
 
+       switch (vp->v_type) {
        case VCHR:
-               dev = ap->a_vp->v_rdev;
-               return cdev_kqfilter(dev, ap->a_kn);
+               error = cdev_kqfilter(dev, ap->a_kn);
+               break;
        default:
                /*
                 * Block devices don't support kqfilter, and refuse it
                 * for any other files (like those vflush()ed) too.
                 */
-               return (EOPNOTSUPP);
+               error = EOPNOTSUPP;
+               break;
        }
+
+       spec_io_exit(vp, sn);
+       return error;
 }
 
 /*
@@ -1018,11 +1128,19 @@ spec_mmap(void *v)
                kauth_cred_t a_cred;
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
+       struct specnode *sn;
+       dev_t dev;
+       int error;
+
+       error = spec_io_enter(vp, &sn, &dev);
+       if (error)
+               return error;
 
        KASSERT(vp->v_type == VBLK);
-       if (bdev_type(vp->v_rdev) != D_DISK)
+       if (bdev_type(dev) != D_DISK)
                return EINVAL;
 
+       spec_io_exit(vp, sn);
        return 0;
 }
 
@@ -1067,27 +1185,15 @@ spec_strategy(void *v)
        } */ *ap = v;
        struct vnode *vp = ap->a_vp;
        struct buf *bp = ap->a_bp;
+       struct specnode *sn;
        dev_t dev;
+       bool ioentered = false;
        int error;
 
-       dev = NODEV;
-
-       /*
-        * Extract all the info we need from the vnode, taking care to
-        * avoid a race with VOP_REVOKE().
-        */
-
-       mutex_enter(vp->v_interlock);
-       if (vdead_check(vp, VDEAD_NOWAIT) == 0 && vp->v_specnode != NULL) {
-               KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp);
-               dev = vp->v_rdev;
-       }
-       mutex_exit(vp->v_interlock);
-
-       if (dev == NODEV) {
-               error = ENXIO;
+       error = spec_io_enter(vp, &sn, &dev);
+       if (error)
                goto out;
-       }
+       ioentered = true;
        bp->b_dev = dev;
 
        if (!(bp->b_flags & B_READ)) {
@@ -1106,14 +1212,15 @@ spec_strategy(void *v)
                        goto out;
        }
        bdev_strategy(bp);
-
-       return 0;
-
-out:
-       bp->b_error = error;
-       bp->b_resid = bp->b_bcount;
-       biodone(bp);
-
+       error = 0;
+
+out:   if (ioentered)
+               spec_io_exit(vp, sn);
+       if (error) {
+               bp->b_error = error;
+               bp->b_resid = bp->b_bcount;
+               biodone(bp);
+       }
        return error;
 }
 
@@ -1284,18 +1391,22 @@ spec_close(void *v)
         * might end up sleeping for someone else who wants our queues. They
         * won't get them if we hold the vnode locked.
         */
-       if (!(flags & FNONBLOCK))
+       if (!(flags & FNONBLOCK)) {
+               fstrans_start(vp->v_mount);
                VOP_UNLOCK(vp);
+       }
 
        if (vp->v_type == VBLK)
                error = bdev_close(dev, flags, mode, curlwp);
        else
                error = cdev_close(dev, flags, mode, curlwp);
 
-       if (!(flags & FNONBLOCK))
+       if (!(flags & FNONBLOCK)) {
                vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+               fstrans_done(vp->v_mount);
+       }
 
-       return (error);
+       return error;
 }
 
 /*
diff --git a/sys/miscfs/specfs/specdev.h b/sys/miscfs/specfs/specdev.h
index 621c103e80b1..e8afc0606db9 100644
--- a/sys/miscfs/specfs/specdev.h
+++ b/sys/miscfs/specfs/specdev.h
@@ -69,6 +69,7 @@ typedef struct specnode {
        u_int           sn_opencnt;
        dev_t           sn_rdev;
        bool            sn_gone;
+       volatile u_int  sn_iocnt;       /* # active I/O operations */
 } specnode_t;
 
 typedef struct specdev {
diff --git a/sys/modules/examples/pollpal/pollpal.c 
b/sys/modules/examples/pollpal/pollpal.c
index b76e0733699b..d8ddc73450e0 100644
--- a/sys/modules/examples/pollpal/pollpal.c
+++ b/sys/modules/examples/pollpal/pollpal.c
@@ -311,7 +311,8 @@ pollpal_modcmd(modcmd_t cmd, void *arg __unused)
        case MODULE_CMD_FINI:
                if (pollpal_nopen != 0)
                        return EBUSY;
-               return devsw_detach(NULL, &pollpal_cdevsw);
+               devsw_detach(NULL, &pollpal_cdevsw);
+               return 0;
        default:
                return ENOTTY;
        } 
diff --git a/sys/net/if_tap.c b/sys/net/if_tap.c
index 0b57ad4a711c..314f4647707c 100644
--- a/sys/net/if_tap.c
+++ b/sys/net/if_tap.c
@@ -256,9 +256,7 @@ tapdetach(void)
 
        if_clone_detach(&tap_cloners);
 #ifdef _MODULE
-       error = devsw_detach(NULL, &tap_cdevsw);
-       if (error != 0)
-               goto out2;
+       devsw_detach(NULL, &tap_cdevsw);
 #endif
 
        if (tap_count != 0) {
@@ -277,7 +275,6 @@ tapdetach(void)
  out1:
 #ifdef _MODULE
        devsw_attach("tap", NULL, &tap_bmajor, &tap_cdevsw, &tap_cmajor);
- out2:
 #endif
        if_clone_attach(&tap_cloners);
 
diff --git a/sys/net/if_tun.c b/sys/net/if_tun.c
index 4f533a8f08d1..f4e5b6d86d43 100644
--- a/sys/net/if_tun.c
+++ b/sys/net/if_tun.c
@@ -142,17 +142,10 @@ tuninit(void)
 static int
 tundetach(void)
 {
-#ifdef _MODULE
-       int error;
-#endif
 
        if_clone_detach(&tun_cloner);
 #ifdef _MODULE
-       error = devsw_detach(NULL, &tun_cdevsw);
-       if (error != 0) {
-               if_clone_attach(&tun_cloner);
-               return error;
-       }
+       devsw_detach(NULL, &tun_cdevsw);
 #endif
 
        if (!LIST_EMPTY(&tun_softc_list) || !LIST_EMPTY(&tunz_softc_list)) {
diff --git a/sys/rump/dev/lib/libbpf/bpf_component.c 
b/sys/rump/dev/lib/libbpf/bpf_component.c
index 05807d371d40..d41d1987afe8 100644
--- a/sys/rump/dev/lib/libbpf/bpf_component.c
+++ b/sys/rump/dev/lib/libbpf/bpf_component.c
@@ -50,6 +50,5 @@ RUMP_COMPONENT(RUMP_COMPONENT_NET)
                panic("bpf devsw attach failed: %d", error);
        if ((error = rump_vfs_makeonedevnode(S_IFCHR, "/dev/bpf", cmaj, 0)) !=0)
                panic("cannot create bpf device nodes: %d", error);
-       if ((error = devsw_detach(NULL, &bpf_cdevsw)) != 0)
-               panic("cannot detach bpf devsw: %d", error);
+       devsw_detach(NULL, &bpf_cdevsw);
 }
diff --git a/sys/rump/dev/lib/libdrvctl/drvctl_component.c 
b/sys/rump/dev/lib/libdrvctl/drvctl_component.c
index e2e79f45f9de..ac4e103fdb9c 100644
--- a/sys/rump/dev/lib/libdrvctl/drvctl_component.c
+++ b/sys/rump/dev/lib/libdrvctl/drvctl_component.c
@@ -51,7 +51,5 @@ RUMP_COMPONENT(RUMP_COMPONENT_DEV)
        if ( error !=0)
                panic("cannot create drvctl device node: %d", error);
 
-       error = devsw_detach(NULL, &drvctl_cdevsw);
-       if (error != 0)
-               panic("cannot detach drvctl devsw: %d", error);
+       devsw_detach(NULL, &drvctl_cdevsw);
 }
diff --git a/sys/sys/conf.h b/sys/sys/conf.h
index 081631d2111f..cb4c287d1982 100644
--- a/sys/sys/conf.h
+++ b/sys/sys/conf.h
@@ -76,6 +76,8 @@ struct bdevsw {
        int             (*d_dump)(dev_t, daddr_t, void *, size_t);
        int             (*d_psize)(dev_t);
        int             (*d_discard)(dev_t, off_t, off_t);
+       int             (*d_devtounit)(dev_t);
+       struct cfdriver *d_cfdriver;
        int             d_flag;
 };
 
@@ -94,6 +96,8 @@ struct cdevsw {
        paddr_t         (*d_mmap)(dev_t, off_t, int);
        int             (*d_kqfilter)(dev_t, struct knote *);
        int             (*d_discard)(dev_t, off_t, off_t);
+       int             (*d_devtounit)(dev_t);
+       struct cfdriver *d_cfdriver;
        int             d_flag;
 };
 
@@ -104,7 +108,7 @@ extern kmutex_t device_lock;
 
 int devsw_attach(const char *, const struct bdevsw *, devmajor_t *,
                 const struct cdevsw *, devmajor_t *);
-int devsw_detach(const struct bdevsw *, const struct cdevsw *);
+void devsw_detach(const struct bdevsw *, const struct cdevsw *);
 const struct bdevsw *bdevsw_lookup(dev_t);
 const struct cdevsw *cdevsw_lookup(dev_t);
 devmajor_t bdevsw_lookup_major(const struct bdevsw *);
@@ -276,6 +280,7 @@ devmajor_t devsw_name2blk(const char *, char *, size_t);
 devmajor_t devsw_name2chr(const char *, char *, size_t);
 dev_t devsw_chr2blk(dev_t);
 dev_t devsw_blk2chr(dev_t);
+int dev_minor_unit(dev_t);
 
 void mm_init(void);
 #endif /* _KERNEL */
diff --git a/sys/sys/device.h b/sys/sys/device.h
index 99940ab1563e..76d9f8be5ad6 100644
--- a/sys/sys/device.h
+++ b/sys/sys/device.h
@@ -274,10 +274,12 @@ struct device {
        void            *dv_private;    /* this device's private storage */
        int             *dv_locators;   /* our actual locators (optional) */
        prop_dictionary_t dv_properties;/* properties dictionary */
+       struct localcount *dv_localcount;/* reference count */
 
        int             dv_pending;     /* config_pending count */
        TAILQ_ENTRY(device) dv_pending_list;
 
+       struct lwp      *dv_attaching;  /* thread not yet finished in attach */
        struct lwp      *dv_detaching;  /* detach lock (config_misc_lock/cv) */
 
        size_t          dv_activity_count;
@@ -651,6 +653,10 @@ void       null_childdetached(device_t, device_t);
 
 device_t       device_lookup(cfdriver_t, int);
 void           *device_lookup_private(cfdriver_t, int);
+
+device_t       device_lookup_acquire(cfdriver_t, int);
+void           device_release(device_t);
+
 void           device_register(device_t, void *);
 void           device_register_post_config(device_t, void *);
 
diff --git a/sys/sys/disklabel.h b/sys/sys/disklabel.h
index 4e94b8671332..853cdbe668a3 100644
--- a/sys/sys/disklabel.h
+++ b/sys/sys/disklabel.h
@@ -509,6 +509,7 @@ const char *convertdisklabel(struct disklabel *, void 
(*)(struct buf *),
 int     bounds_check_with_label(struct disk *, struct buf *, int);
 int     bounds_check_with_mediasize(struct buf *, int, uint64_t);
 const char *getfstypename(int);
+int    disklabel_dev_unit(dev_t);
 #endif
 #endif /* _LOCORE */
 

Reply via email to