Module Name:    src
Committed By:   riastradh
Date:           Mon Mar 28 12:39:10 UTC 2022

Modified Files:
        src/sys/kern: subr_devsw.c
        src/sys/miscfs/specfs: spec_vnops.c
        src/sys/sys: conf.h

Log Message:
driver(9): New devsw d_cancel op to interrupt I/O before close.

If specified, when revoking a device node or closing its last open
node, specfs will:

1. Call d_cancel, which should return promptly without blocking.
2. Wait for all concurrent d_read/write/ioctl/&c. to drain.
3. Call d_close.

Otherwise, specfs will:

1. Call d_close.
2. Wait for all concurrent d_read/write/ioctl/&c. to drain.

This fallback is problematic because often parts of d_close rely on
concurrent devsw operations to have completed already, so it is up to
each driver to have its own mechanism for waiting, and the extra step
in (2) is almost redundant.  But it is still important to ensure that
devsw operations are not active by the time a module tries to invoke
devsw_detach, because only d_open is protected against that.

The signature of d_cancel matches d_close, mostly so we don't raise
questions about `why is this different?'; the lwp argument is not
useful but we should remove it from open/cancel/close all at the same
time.

The only way d_cancel should fail, if it does at all, is with ENODEV,
meaning the driver doesn't support cancelling outstanding I/O, and
will take responsibility for that in d_close.  I would make it return
void and only have bdev_cancel and cdev_cancel possibly return ENODEV
so specfs can detect whether a driver supports it, but this would
break the pattern around devsw operation types.

Drivers are allowed to omit it from struct bdevsw, struct cdevsw --
if so, it is as if they used a function that just returns ENODEV.

XXX kernel ABI change to struct bdevsw/cdevsw requires bump


To generate a diff of this commit:
cvs rdiff -u -r1.43 -r1.44 src/sys/kern/subr_devsw.c
cvs rdiff -u -r1.209 -r1.210 src/sys/miscfs/specfs/spec_vnops.c
cvs rdiff -u -r1.159 -r1.160 src/sys/sys/conf.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/subr_devsw.c
diff -u src/sys/kern/subr_devsw.c:1.43 src/sys/kern/subr_devsw.c:1.44
--- src/sys/kern/subr_devsw.c:1.43	Mon Mar 28 12:38:33 2022
+++ src/sys/kern/subr_devsw.c	Mon Mar 28 12:39:10 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: subr_devsw.c,v 1.43 2022/03/28 12:38:33 riastradh Exp $	*/
+/*	$NetBSD: subr_devsw.c,v 1.44 2022/03/28 12:39:10 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2001, 2002, 2007, 2008 The NetBSD Foundation, Inc.
@@ -69,7 +69,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: subr_devsw.c,v 1.43 2022/03/28 12:38:33 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: subr_devsw.c,v 1.44 2022/03/28 12:39:10 riastradh Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_dtrace.h"
@@ -934,6 +934,24 @@ bdev_open(dev_t dev, int flag, int devty
 }
 
 int
+bdev_cancel(dev_t dev, int flag, int devtype, struct lwp *l)
+{
+	const struct bdevsw *d;
+	int rv, mpflag;
+
+	if ((d = bdevsw_lookup(dev)) == NULL)
+		return ENXIO;
+	if (d->d_cancel == NULL)
+		return ENODEV;
+
+	DEV_LOCK(d);
+	rv = (*d->d_cancel)(dev, flag, devtype, l);
+	DEV_UNLOCK(d);
+
+	return rv;
+}
+
+int
 bdev_close(dev_t dev, int flag, int devtype, lwp_t *l)
 {
 	const struct bdevsw *d;
@@ -1129,6 +1147,24 @@ cdev_open(dev_t dev, int flag, int devty
 }
 
 int
+cdev_cancel(dev_t dev, int flag, int devtype, struct lwp *l)
+{
+	const struct cdevsw *d;
+	int rv, mpflag;
+
+	if ((d = cdevsw_lookup(dev)) == NULL)
+		return ENXIO;
+	if (d->d_cancel == NULL)
+		return ENODEV;
+
+	DEV_LOCK(d);
+	rv = (*d->d_cancel)(dev, flag, devtype, l);
+	DEV_UNLOCK(d);
+
+	return rv;
+}
+
+int
 cdev_close(dev_t dev, int flag, int devtype, lwp_t *l)
 {
 	const struct cdevsw *d;

Index: src/sys/miscfs/specfs/spec_vnops.c
diff -u src/sys/miscfs/specfs/spec_vnops.c:1.209 src/sys/miscfs/specfs/spec_vnops.c:1.210
--- src/sys/miscfs/specfs/spec_vnops.c:1.209	Mon Mar 28 12:37:56 2022
+++ src/sys/miscfs/specfs/spec_vnops.c	Mon Mar 28 12:39:10 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: spec_vnops.c,v 1.209 2022/03/28 12:37:56 riastradh Exp $	*/
+/*	$NetBSD: spec_vnops.c,v 1.210 2022/03/28 12:39:10 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -58,7 +58,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: spec_vnops.c,v 1.209 2022/03/28 12:37:56 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: spec_vnops.c,v 1.210 2022/03/28 12:39:10 riastradh Exp $");
 
 #include <sys/param.h>
 #include <sys/proc.h>
@@ -1688,6 +1688,22 @@ spec_close(void *v)
 	if (!(flags & FNONBLOCK))
 		VOP_UNLOCK(vp);
 
+	/*
+	 * If we can cancel all outstanding I/O, then wait for it to
+	 * drain before we call .d_close.  Drivers that split up
+	 * .d_cancel and .d_close this way need not have any internal
+	 * mechanism for waiting in .d_close for I/O to drain.
+	 */
+	if (vp->v_type == VBLK)
+		error = bdev_cancel(dev, flags, mode, curlwp);
+	else
+		error = cdev_cancel(dev, flags, mode, curlwp);
+	if (error == 0)
+		spec_io_drain(sd);
+	else
+		KASSERTMSG(error == ENODEV, "cancel dev=0x%lx failed with %d",
+		    (unsigned long)dev, error);
+
 	if (vp->v_type == VBLK)
 		error = bdev_close(dev, flags, mode, curlwp);
 	else

Index: src/sys/sys/conf.h
diff -u src/sys/sys/conf.h:1.159 src/sys/sys/conf.h:1.160
--- src/sys/sys/conf.h:1.159	Mon Mar 28 12:38:33 2022
+++ src/sys/sys/conf.h	Mon Mar 28 12:39:10 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: conf.h,v 1.159 2022/03/28 12:38:33 riastradh Exp $	*/
+/*	$NetBSD: conf.h,v 1.160 2022/03/28 12:39:10 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 1990, 1993
@@ -70,6 +70,7 @@ struct vnode;
  */
 struct bdevsw {
 	int		(*d_open)(dev_t, int, int, struct lwp *);
+	int		(*d_cancel)(dev_t, int, int, struct lwp *);
 	int		(*d_close)(dev_t, int, int, struct lwp *);
 	void		(*d_strategy)(struct buf *);
 	int		(*d_ioctl)(dev_t, u_long, void *, int, struct lwp *);
@@ -86,6 +87,7 @@ struct bdevsw {
  */
 struct cdevsw {
 	int		(*d_open)(dev_t, int, int, struct lwp *);
+	int		(*d_cancel)(dev_t, int, int, struct lwp *);
 	int		(*d_close)(dev_t, int, int, struct lwp *);
 	int		(*d_read)(dev_t, struct uio *, int);
 	int		(*d_write)(dev_t, struct uio *, int);
@@ -115,6 +117,7 @@ devmajor_t bdevsw_lookup_major(const str
 devmajor_t cdevsw_lookup_major(const struct cdevsw *);
 
 #define	dev_type_open(n)	int n (dev_t, int, int, struct lwp *)
+#define	dev_type_cancel(n)	int n (dev_t, int, int, struct lwp *)
 #define	dev_type_close(n)	int n (dev_t, int, int, struct lwp *)
 #define	dev_type_read(n)	int n (dev_t, struct uio *, int)
 #define	dev_type_write(n)	int n (dev_t, struct uio *, int)
@@ -165,6 +168,7 @@ paddr_t	nommap(dev_t, off_t, int);
 /* device access wrappers. */
 
 dev_type_open(bdev_open);
+dev_type_cancel(bdev_cancel);
 dev_type_close(bdev_close);
 dev_type_strategy(bdev_strategy);
 dev_type_ioctl(bdev_ioctl);
@@ -175,6 +179,7 @@ dev_type_discard(bdev_discard);
 void	bdev_detached(dev_t);
 
 dev_type_open(cdev_open);
+dev_type_cancel(cdev_cancel);
 dev_type_close(cdev_close);
 dev_type_read(cdev_read);
 dev_type_write(cdev_write);

Reply via email to