Module Name:    src
Committed By:   riastradh
Date:           Sat Sep 11 10:08:56 UTC 2021

Modified Files:
        src/sys/compat/netbsd32: netbsd32_fs.c
        src/sys/kern: sys_generic.c vfs_syscalls.c vfs_vnops.c
        src/sys/sys: file.h

Log Message:
sys/kern: Allow custom fileops to specify fo_seek method.

Previously only vnodes allowed lseek/pread[v]/pwrite[v], which meant
converting a regular device to a cloning device doesn't always work.

Semantics is:

(*fp->f_ops->fo_seek)(fp, delta, whence, newoffp, flags)

1. Compute a new offset according to whence + delta -- that is, if
   whence is SEEK_CUR, add delta to fp->f_offset; if whence is
   SEEK_END, add delta to end of file; if whence is SEEK_CUR, use delta
   as is.

2. If newoffp is nonnull, return the new offset in *newoffp.

3. If flags & FOF_UPDATE_OFFSET, set fp->f_offset to the new offset.

Access to fp->f_offset, and *newoffp if newoffp = &fp->f_offset, must
happen under the object lock (e.g., vnode lock), in order to
synchronize fp->f_offset reads and writes.

This change has the side effect that every call to VOP_SEEK happens
under the vnode lock now, when previously it didn't.  However, from a
review of all the VOP_SEEK implementations, it does not appear that
any file system even examines the vnode, let alone locks it.  So I
think this is safe -- and essentially the only reasonable way to do
things, given that it is used to validate a change from oldoff to
newoff, and oldoff becomes stale the moment we unlock the vnode.

No kernel bump because this reuses a spare entry in struct fileops,
and it is safe for the entry to be null, so all existing fileops will
continue to work as before (rejecting seek).


To generate a diff of this commit:
cvs rdiff -u -r1.93 -r1.94 src/sys/compat/netbsd32/netbsd32_fs.c
cvs rdiff -u -r1.132 -r1.133 src/sys/kern/sys_generic.c
cvs rdiff -u -r1.551 -r1.552 src/sys/kern/vfs_syscalls.c
cvs rdiff -u -r1.221 -r1.222 src/sys/kern/vfs_vnops.c
cvs rdiff -u -r1.86 -r1.87 src/sys/sys/file.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/compat/netbsd32/netbsd32_fs.c
diff -u src/sys/compat/netbsd32/netbsd32_fs.c:1.93 src/sys/compat/netbsd32/netbsd32_fs.c:1.94
--- src/sys/compat/netbsd32/netbsd32_fs.c:1.93	Tue Feb 16 14:47:20 2021
+++ src/sys/compat/netbsd32/netbsd32_fs.c	Sat Sep 11 10:08:55 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: netbsd32_fs.c,v 1.93 2021/02/16 14:47:20 simonb Exp $	*/
+/*	$NetBSD: netbsd32_fs.c,v 1.94 2021/09/11 10:08:55 riastradh Exp $	*/
 
 /*
  * Copyright (c) 1998, 2001 Matthew R. Green
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: netbsd32_fs.c,v 1.93 2021/02/16 14:47:20 simonb Exp $");
+__KERNEL_RCSID(0, "$NetBSD: netbsd32_fs.c,v 1.94 2021/09/11 10:08:55 riastradh Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -648,7 +648,6 @@ netbsd32_preadv(struct lwp *l, const str
 		syscallarg(netbsd32_off_t) offset;
 	} */
 	file_t *fp;
-	struct vnode *vp;
 	off_t offset;
 	int error, fd = SCARG(uap, fd);
 
@@ -660,19 +659,14 @@ netbsd32_preadv(struct lwp *l, const str
 		return EBADF;
 	}
 
-	vp = fp->f_vnode;
-	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
+	if (fp->f_ops->fo_seek == NULL) {
 		error = ESPIPE;
 		goto out;
 	}
 
 	offset = SCARG(uap, offset);
-
-	/*
-	 * XXX This works because no file systems actually
-	 * XXX take any action on the seek operation.
-	 */
-	if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
+	error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0);
+	if (error)
 		goto out;
 
 	return dofilereadv32(fd, fp, SCARG_P32(uap, iovp),
@@ -694,7 +688,6 @@ netbsd32_pwritev(struct lwp *l, const st
 		syscallarg(netbsd32_off_t) offset;
 	} */
 	file_t *fp;
-	struct vnode *vp;
 	off_t offset;
 	int error, fd = SCARG(uap, fd);
 
@@ -706,19 +699,14 @@ netbsd32_pwritev(struct lwp *l, const st
 		return EBADF;
 	}
 
-	vp = fp->f_vnode;
-	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
+	if (fp->f_ops->fo_seek == NULL) {
 		error = ESPIPE;
 		goto out;
 	}
 
 	offset = SCARG(uap, offset);
-
-	/*
-	 * XXX This works because no file systems actually
-	 * XXX take any action on the seek operation.
-	 */
-	if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
+	error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0);
+	if (error)
 		goto out;
 
 	return dofilewritev32(fd, fp, SCARG_P32(uap, iovp),

Index: src/sys/kern/sys_generic.c
diff -u src/sys/kern/sys_generic.c:1.132 src/sys/kern/sys_generic.c:1.133
--- src/sys/kern/sys_generic.c:1.132	Sat May 23 23:42:43 2020
+++ src/sys/kern/sys_generic.c	Sat Sep 11 10:08:55 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: sys_generic.c,v 1.132 2020/05/23 23:42:43 ad Exp $	*/
+/*	$NetBSD: sys_generic.c,v 1.133 2021/09/11 10:08:55 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -70,7 +70,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.132 2020/05/23 23:42:43 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.133 2021/09/11 10:08:55 riastradh Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -208,17 +208,18 @@ do_filereadv(int fd, const struct iovec 
 	if (offset == NULL)
 		offset = &fp->f_offset;
 	else {
-		struct vnode *vp = fp->f_vnode;
-		if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
+		/*
+		 * Caller must not specify &fp->f_offset -- we can't
+		 * safely dereference it for the call to fo_seek
+		 * without holding some underlying object lock.
+		 */
+		KASSERT(offset != &fp->f_offset);
+		if (fp->f_ops->fo_seek == NULL) {
 			error = ESPIPE;
 			goto out;
 		}
-		/*
-		 * Test that the device is seekable ?
-		 * XXX This works because no file systems actually
-		 * XXX take any action on the seek operation.
-		 */
-		error = VOP_SEEK(vp, fp->f_offset, *offset, fp->f_cred);
+		error = (*fp->f_ops->fo_seek)(fp, *offset, SEEK_SET, NULL,
+		    0);
 		if (error != 0)
 			goto out;
 	}
@@ -408,17 +409,18 @@ do_filewritev(int fd, const struct iovec
 	if (offset == NULL)
 		offset = &fp->f_offset;
 	else {
-		struct vnode *vp = fp->f_vnode;
-		if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
+		/*
+		 * Caller must not specify &fp->f_offset -- we can't
+		 * safely dereference it for the call to fo_seek
+		 * without holding some underlying object lock.
+		 */
+		KASSERT(offset != &fp->f_offset);
+		if (fp->f_ops->fo_seek == NULL) {
 			error = ESPIPE;
 			goto out;
 		}
-		/*
-		 * Test that the device is seekable ?
-		 * XXX This works because no file systems actually
-		 * XXX take any action on the seek operation.
-		 */
-		error = VOP_SEEK(vp, fp->f_offset, *offset, fp->f_cred);
+		error = (*fp->f_ops->fo_seek)(fp, *offset, SEEK_SET, NULL,
+		    0);
 		if (error != 0)
 			goto out;
 	}

Index: src/sys/kern/vfs_syscalls.c
diff -u src/sys/kern/vfs_syscalls.c:1.551 src/sys/kern/vfs_syscalls.c:1.552
--- src/sys/kern/vfs_syscalls.c:1.551	Sat Jul  3 09:39:26 2021
+++ src/sys/kern/vfs_syscalls.c	Sat Sep 11 10:08:55 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_syscalls.c,v 1.551 2021/07/03 09:39:26 mlelstv Exp $	*/
+/*	$NetBSD: vfs_syscalls.c,v 1.552 2021/09/11 10:08:55 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc.
@@ -70,7 +70,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.551 2021/07/03 09:39:26 mlelstv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.552 2021/09/11 10:08:55 riastradh Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_fileassoc.h"
@@ -2856,50 +2856,30 @@ sys_lseek(struct lwp *l, const struct sy
 		syscallarg(off_t) offset;
 		syscallarg(int) whence;
 	} */
-	kauth_cred_t cred = l->l_cred;
 	file_t *fp;
-	struct vnode *vp;
-	struct vattr vattr;
-	off_t newoff;
 	int error, fd;
 
+	switch (SCARG(uap, whence)) {
+	case SEEK_CUR:
+	case SEEK_END:
+	case SEEK_SET:
+		break;
+	default:
+		return EINVAL;
+	}
+
 	fd = SCARG(uap, fd);
 
 	if ((fp = fd_getfile(fd)) == NULL)
 		return (EBADF);
 
-	vp = fp->f_vnode;
-	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
+	if (fp->f_ops->fo_seek == NULL) {
 		error = ESPIPE;
 		goto out;
 	}
 
-	vn_lock(vp, LK_SHARED | LK_RETRY);
-
-	switch (SCARG(uap, whence)) {
-	case SEEK_CUR:
-		newoff = fp->f_offset + SCARG(uap, offset);
-		break;
-	case SEEK_END:
-		error = VOP_GETATTR(vp, &vattr, cred);
-		if (error) {
-			VOP_UNLOCK(vp);
-			goto out;
-		}
-		newoff = SCARG(uap, offset) + vattr.va_size;
-		break;
-	case SEEK_SET:
-		newoff = SCARG(uap, offset);
-		break;
-	default:
-		error = EINVAL;
-		VOP_UNLOCK(vp);
-		goto out;
-	}
-	VOP_UNLOCK(vp);
-	if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) {
-		*(off_t *)retval = fp->f_offset = newoff;
-	}
+	error = (*fp->f_ops->fo_seek)(fp, SCARG(uap, offset),
+	    SCARG(uap, whence), (off_t *)retval, FOF_UPDATE_OFFSET);
  out:
  	fd_putfile(fd);
 	return (error);
@@ -2918,7 +2898,6 @@ sys_pread(struct lwp *l, const struct sy
 		syscallarg(off_t) offset;
 	} */
 	file_t *fp;
-	struct vnode *vp;
 	off_t offset;
 	int error, fd = SCARG(uap, fd);
 
@@ -2930,19 +2909,14 @@ sys_pread(struct lwp *l, const struct sy
 		return (EBADF);
 	}
 
-	vp = fp->f_vnode;
-	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
+	if (fp->f_ops->fo_seek == NULL) {
 		error = ESPIPE;
 		goto out;
 	}
 
 	offset = SCARG(uap, offset);
-
-	/*
-	 * XXX This works because no file systems actually
-	 * XXX take any action on the seek operation.
-	 */
-	if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
+	error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0);
+	if (error)
 		goto out;
 
 	/* dofileread() will unuse the descriptor for us */
@@ -2985,7 +2959,6 @@ sys_pwrite(struct lwp *l, const struct s
 		syscallarg(off_t) offset;
 	} */
 	file_t *fp;
-	struct vnode *vp;
 	off_t offset;
 	int error, fd = SCARG(uap, fd);
 
@@ -2997,19 +2970,14 @@ sys_pwrite(struct lwp *l, const struct s
 		return (EBADF);
 	}
 
-	vp = fp->f_vnode;
-	if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) {
+	if (fp->f_ops->fo_seek == NULL) {
 		error = ESPIPE;
 		goto out;
 	}
 
 	offset = SCARG(uap, offset);
-
-	/*
-	 * XXX This works because no file systems actually
-	 * XXX take any action on the seek operation.
-	 */
-	if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0)
+	error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0);
+	if (error)
 		goto out;
 
 	/* dofilewrite() will unuse the descriptor for us */

Index: src/sys/kern/vfs_vnops.c
diff -u src/sys/kern/vfs_vnops.c:1.221 src/sys/kern/vfs_vnops.c:1.222
--- src/sys/kern/vfs_vnops.c:1.221	Sun Jul 18 09:30:36 2021
+++ src/sys/kern/vfs_vnops.c	Sat Sep 11 10:08:55 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_vnops.c,v 1.221 2021/07/18 09:30:36 dholland Exp $	*/
+/*	$NetBSD: vfs_vnops.c,v 1.222 2021/09/11 10:08:55 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2009 The NetBSD Foundation, Inc.
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.221 2021/07/18 09:30:36 dholland Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.222 2021/09/11 10:08:55 riastradh Exp $");
 
 #include "veriexec.h"
 
@@ -121,6 +121,7 @@ static int vn_statfile(file_t *fp, struc
 static int vn_ioctl(file_t *fp, u_long com, void *data);
 static int vn_mmap(struct file *, off_t *, size_t, int, int *, int *,
 		   struct uvm_object **, int *);
+static int vn_seek(struct file *, off_t, int, off_t *, int);
 
 const struct fileops vnops = {
 	.fo_name = "vn",
@@ -134,6 +135,7 @@ const struct fileops vnops = {
 	.fo_kqfilter = vn_kqfilter,
 	.fo_restart = fnullop_restart,
 	.fo_mmap = vn_mmap,
+	.fo_seek = vn_seek,
 };
 
 /*
@@ -1110,7 +1112,56 @@ vn_mmap(struct file *fp, off_t *offp, si
 	return 0;
 }
 
+static int
+vn_seek(struct file *fp, off_t delta, int whence, off_t *newoffp,
+    int flags)
+{
+	kauth_cred_t cred = fp->f_cred;
+	off_t oldoff, newoff;
+	struct vnode *vp = fp->f_vnode;
+	struct vattr vattr;
+	int error;
+
+	if (vp->v_type == VFIFO)
+		return ESPIPE;
+
+	vn_lock(vp, LK_SHARED | LK_RETRY);
+
+	/* Compute the old and new offsets.  */
+	oldoff = fp->f_offset;
+	switch (whence) {
+	case SEEK_CUR:
+		newoff = oldoff + delta; /* XXX arithmetic overflow */
+		break;
+	case SEEK_END:
+		error = VOP_GETATTR(vp, &vattr, cred);
+		if (error)
+			goto out;
+		newoff = delta + vattr.va_size; /* XXX arithmetic overflow */
+		break;
+	case SEEK_SET:
+		newoff = delta;
+		break;
+	default:
+		error = EINVAL;
+		goto out;
+	}
+
+	/* Pass the proposed change to the file system to audit.  */
+	error = VOP_SEEK(vp, oldoff, newoff, cred);
+	if (error)
+		goto out;
+
+	/* Success!  */
+	if (newoffp)
+		*newoffp = newoff;
+	if (flags & FOF_UPDATE_OFFSET)
+		fp->f_offset = newoff;
+	error = 0;
 
+out:	VOP_UNLOCK(vp);
+	return error;
+}
 
 /*
  * Check that the vnode is still valid, and if so

Index: src/sys/sys/file.h
diff -u src/sys/sys/file.h:1.86 src/sys/sys/file.h:1.87
--- src/sys/sys/file.h:1.86	Sat May  2 18:43:02 2020
+++ src/sys/sys/file.h	Sat Sep 11 10:08:55 2021
@@ -1,4 +1,4 @@
-/*	$NetBSD: file.h,v 1.86 2020/05/02 18:43:02 christos Exp $	*/
+/*	$NetBSD: file.h,v 1.87 2021/09/11 10:08:55 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2009 The NetBSD Foundation, Inc.
@@ -94,7 +94,7 @@ struct fileops {
 	void	(*fo_restart)	(struct file *);
 	int	(*fo_mmap)	(struct file *, off_t *, size_t, int, int *,
 				 int *, struct uvm_object **, int *);
-	void	(*fo_spare2)	(void);
+	int	(*fo_seek)	(struct file *, off_t, int, off_t *, int);
 };
 
 union file_data {

Reply via email to