Module Name: src Committed By: riastradh Date: Sat Sep 11 10:08:56 UTC 2021
Modified Files: src/sys/compat/netbsd32: netbsd32_fs.c src/sys/kern: sys_generic.c vfs_syscalls.c vfs_vnops.c src/sys/sys: file.h Log Message: sys/kern: Allow custom fileops to specify fo_seek method. Previously only vnodes allowed lseek/pread[v]/pwrite[v], which meant converting a regular device to a cloning device doesn't always work. Semantics is: (*fp->f_ops->fo_seek)(fp, delta, whence, newoffp, flags) 1. Compute a new offset according to whence + delta -- that is, if whence is SEEK_CUR, add delta to fp->f_offset; if whence is SEEK_END, add delta to end of file; if whence is SEEK_CUR, use delta as is. 2. If newoffp is nonnull, return the new offset in *newoffp. 3. If flags & FOF_UPDATE_OFFSET, set fp->f_offset to the new offset. Access to fp->f_offset, and *newoffp if newoffp = &fp->f_offset, must happen under the object lock (e.g., vnode lock), in order to synchronize fp->f_offset reads and writes. This change has the side effect that every call to VOP_SEEK happens under the vnode lock now, when previously it didn't. However, from a review of all the VOP_SEEK implementations, it does not appear that any file system even examines the vnode, let alone locks it. So I think this is safe -- and essentially the only reasonable way to do things, given that it is used to validate a change from oldoff to newoff, and oldoff becomes stale the moment we unlock the vnode. No kernel bump because this reuses a spare entry in struct fileops, and it is safe for the entry to be null, so all existing fileops will continue to work as before (rejecting seek). To generate a diff of this commit: cvs rdiff -u -r1.93 -r1.94 src/sys/compat/netbsd32/netbsd32_fs.c cvs rdiff -u -r1.132 -r1.133 src/sys/kern/sys_generic.c cvs rdiff -u -r1.551 -r1.552 src/sys/kern/vfs_syscalls.c cvs rdiff -u -r1.221 -r1.222 src/sys/kern/vfs_vnops.c cvs rdiff -u -r1.86 -r1.87 src/sys/sys/file.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/compat/netbsd32/netbsd32_fs.c diff -u src/sys/compat/netbsd32/netbsd32_fs.c:1.93 src/sys/compat/netbsd32/netbsd32_fs.c:1.94 --- src/sys/compat/netbsd32/netbsd32_fs.c:1.93 Tue Feb 16 14:47:20 2021 +++ src/sys/compat/netbsd32/netbsd32_fs.c Sat Sep 11 10:08:55 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: netbsd32_fs.c,v 1.93 2021/02/16 14:47:20 simonb Exp $ */ +/* $NetBSD: netbsd32_fs.c,v 1.94 2021/09/11 10:08:55 riastradh Exp $ */ /* * Copyright (c) 1998, 2001 Matthew R. Green @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: netbsd32_fs.c,v 1.93 2021/02/16 14:47:20 simonb Exp $"); +__KERNEL_RCSID(0, "$NetBSD: netbsd32_fs.c,v 1.94 2021/09/11 10:08:55 riastradh Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -648,7 +648,6 @@ netbsd32_preadv(struct lwp *l, const str syscallarg(netbsd32_off_t) offset; } */ file_t *fp; - struct vnode *vp; off_t offset; int error, fd = SCARG(uap, fd); @@ -660,19 +659,14 @@ netbsd32_preadv(struct lwp *l, const str return EBADF; } - vp = fp->f_vnode; - if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { + if (fp->f_ops->fo_seek == NULL) { error = ESPIPE; goto out; } offset = SCARG(uap, offset); - - /* - * XXX This works because no file systems actually - * XXX take any action on the seek operation. - */ - if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) + error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0); + if (error) goto out; return dofilereadv32(fd, fp, SCARG_P32(uap, iovp), @@ -694,7 +688,6 @@ netbsd32_pwritev(struct lwp *l, const st syscallarg(netbsd32_off_t) offset; } */ file_t *fp; - struct vnode *vp; off_t offset; int error, fd = SCARG(uap, fd); @@ -706,19 +699,14 @@ netbsd32_pwritev(struct lwp *l, const st return EBADF; } - vp = fp->f_vnode; - if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { + if (fp->f_ops->fo_seek == NULL) { error = ESPIPE; goto out; } offset = SCARG(uap, offset); - - /* - * XXX This works because no file systems actually - * XXX take any action on the seek operation. - */ - if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) + error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0); + if (error) goto out; return dofilewritev32(fd, fp, SCARG_P32(uap, iovp), Index: src/sys/kern/sys_generic.c diff -u src/sys/kern/sys_generic.c:1.132 src/sys/kern/sys_generic.c:1.133 --- src/sys/kern/sys_generic.c:1.132 Sat May 23 23:42:43 2020 +++ src/sys/kern/sys_generic.c Sat Sep 11 10:08:55 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: sys_generic.c,v 1.132 2020/05/23 23:42:43 ad Exp $ */ +/* $NetBSD: sys_generic.c,v 1.133 2021/09/11 10:08:55 riastradh Exp $ */ /*- * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. @@ -70,7 +70,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.132 2020/05/23 23:42:43 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.133 2021/09/11 10:08:55 riastradh Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -208,17 +208,18 @@ do_filereadv(int fd, const struct iovec if (offset == NULL) offset = &fp->f_offset; else { - struct vnode *vp = fp->f_vnode; - if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { + /* + * Caller must not specify &fp->f_offset -- we can't + * safely dereference it for the call to fo_seek + * without holding some underlying object lock. + */ + KASSERT(offset != &fp->f_offset); + if (fp->f_ops->fo_seek == NULL) { error = ESPIPE; goto out; } - /* - * Test that the device is seekable ? - * XXX This works because no file systems actually - * XXX take any action on the seek operation. - */ - error = VOP_SEEK(vp, fp->f_offset, *offset, fp->f_cred); + error = (*fp->f_ops->fo_seek)(fp, *offset, SEEK_SET, NULL, + 0); if (error != 0) goto out; } @@ -408,17 +409,18 @@ do_filewritev(int fd, const struct iovec if (offset == NULL) offset = &fp->f_offset; else { - struct vnode *vp = fp->f_vnode; - if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { + /* + * Caller must not specify &fp->f_offset -- we can't + * safely dereference it for the call to fo_seek + * without holding some underlying object lock. + */ + KASSERT(offset != &fp->f_offset); + if (fp->f_ops->fo_seek == NULL) { error = ESPIPE; goto out; } - /* - * Test that the device is seekable ? - * XXX This works because no file systems actually - * XXX take any action on the seek operation. - */ - error = VOP_SEEK(vp, fp->f_offset, *offset, fp->f_cred); + error = (*fp->f_ops->fo_seek)(fp, *offset, SEEK_SET, NULL, + 0); if (error != 0) goto out; } Index: src/sys/kern/vfs_syscalls.c diff -u src/sys/kern/vfs_syscalls.c:1.551 src/sys/kern/vfs_syscalls.c:1.552 --- src/sys/kern/vfs_syscalls.c:1.551 Sat Jul 3 09:39:26 2021 +++ src/sys/kern/vfs_syscalls.c Sat Sep 11 10:08:55 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_syscalls.c,v 1.551 2021/07/03 09:39:26 mlelstv Exp $ */ +/* $NetBSD: vfs_syscalls.c,v 1.552 2021/09/11 10:08:55 riastradh Exp $ */ /*- * Copyright (c) 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc. @@ -70,7 +70,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.551 2021/07/03 09:39:26 mlelstv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.552 2021/09/11 10:08:55 riastradh Exp $"); #ifdef _KERNEL_OPT #include "opt_fileassoc.h" @@ -2856,50 +2856,30 @@ sys_lseek(struct lwp *l, const struct sy syscallarg(off_t) offset; syscallarg(int) whence; } */ - kauth_cred_t cred = l->l_cred; file_t *fp; - struct vnode *vp; - struct vattr vattr; - off_t newoff; int error, fd; + switch (SCARG(uap, whence)) { + case SEEK_CUR: + case SEEK_END: + case SEEK_SET: + break; + default: + return EINVAL; + } + fd = SCARG(uap, fd); if ((fp = fd_getfile(fd)) == NULL) return (EBADF); - vp = fp->f_vnode; - if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { + if (fp->f_ops->fo_seek == NULL) { error = ESPIPE; goto out; } - vn_lock(vp, LK_SHARED | LK_RETRY); - - switch (SCARG(uap, whence)) { - case SEEK_CUR: - newoff = fp->f_offset + SCARG(uap, offset); - break; - case SEEK_END: - error = VOP_GETATTR(vp, &vattr, cred); - if (error) { - VOP_UNLOCK(vp); - goto out; - } - newoff = SCARG(uap, offset) + vattr.va_size; - break; - case SEEK_SET: - newoff = SCARG(uap, offset); - break; - default: - error = EINVAL; - VOP_UNLOCK(vp); - goto out; - } - VOP_UNLOCK(vp); - if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { - *(off_t *)retval = fp->f_offset = newoff; - } + error = (*fp->f_ops->fo_seek)(fp, SCARG(uap, offset), + SCARG(uap, whence), (off_t *)retval, FOF_UPDATE_OFFSET); out: fd_putfile(fd); return (error); @@ -2918,7 +2898,6 @@ sys_pread(struct lwp *l, const struct sy syscallarg(off_t) offset; } */ file_t *fp; - struct vnode *vp; off_t offset; int error, fd = SCARG(uap, fd); @@ -2930,19 +2909,14 @@ sys_pread(struct lwp *l, const struct sy return (EBADF); } - vp = fp->f_vnode; - if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { + if (fp->f_ops->fo_seek == NULL) { error = ESPIPE; goto out; } offset = SCARG(uap, offset); - - /* - * XXX This works because no file systems actually - * XXX take any action on the seek operation. - */ - if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) + error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0); + if (error) goto out; /* dofileread() will unuse the descriptor for us */ @@ -2985,7 +2959,6 @@ sys_pwrite(struct lwp *l, const struct s syscallarg(off_t) offset; } */ file_t *fp; - struct vnode *vp; off_t offset; int error, fd = SCARG(uap, fd); @@ -2997,19 +2970,14 @@ sys_pwrite(struct lwp *l, const struct s return (EBADF); } - vp = fp->f_vnode; - if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { + if (fp->f_ops->fo_seek == NULL) { error = ESPIPE; goto out; } offset = SCARG(uap, offset); - - /* - * XXX This works because no file systems actually - * XXX take any action on the seek operation. - */ - if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) + error = (*fp->f_ops->fo_seek)(fp, offset, SEEK_SET, &offset, 0); + if (error) goto out; /* dofilewrite() will unuse the descriptor for us */ Index: src/sys/kern/vfs_vnops.c diff -u src/sys/kern/vfs_vnops.c:1.221 src/sys/kern/vfs_vnops.c:1.222 --- src/sys/kern/vfs_vnops.c:1.221 Sun Jul 18 09:30:36 2021 +++ src/sys/kern/vfs_vnops.c Sat Sep 11 10:08:55 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_vnops.c,v 1.221 2021/07/18 09:30:36 dholland Exp $ */ +/* $NetBSD: vfs_vnops.c,v 1.222 2021/09/11 10:08:55 riastradh Exp $ */ /*- * Copyright (c) 2009 The NetBSD Foundation, Inc. @@ -66,7 +66,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.221 2021/07/18 09:30:36 dholland Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.222 2021/09/11 10:08:55 riastradh Exp $"); #include "veriexec.h" @@ -121,6 +121,7 @@ static int vn_statfile(file_t *fp, struc static int vn_ioctl(file_t *fp, u_long com, void *data); static int vn_mmap(struct file *, off_t *, size_t, int, int *, int *, struct uvm_object **, int *); +static int vn_seek(struct file *, off_t, int, off_t *, int); const struct fileops vnops = { .fo_name = "vn", @@ -134,6 +135,7 @@ const struct fileops vnops = { .fo_kqfilter = vn_kqfilter, .fo_restart = fnullop_restart, .fo_mmap = vn_mmap, + .fo_seek = vn_seek, }; /* @@ -1110,7 +1112,56 @@ vn_mmap(struct file *fp, off_t *offp, si return 0; } +static int +vn_seek(struct file *fp, off_t delta, int whence, off_t *newoffp, + int flags) +{ + kauth_cred_t cred = fp->f_cred; + off_t oldoff, newoff; + struct vnode *vp = fp->f_vnode; + struct vattr vattr; + int error; + + if (vp->v_type == VFIFO) + return ESPIPE; + + vn_lock(vp, LK_SHARED | LK_RETRY); + + /* Compute the old and new offsets. */ + oldoff = fp->f_offset; + switch (whence) { + case SEEK_CUR: + newoff = oldoff + delta; /* XXX arithmetic overflow */ + break; + case SEEK_END: + error = VOP_GETATTR(vp, &vattr, cred); + if (error) + goto out; + newoff = delta + vattr.va_size; /* XXX arithmetic overflow */ + break; + case SEEK_SET: + newoff = delta; + break; + default: + error = EINVAL; + goto out; + } + + /* Pass the proposed change to the file system to audit. */ + error = VOP_SEEK(vp, oldoff, newoff, cred); + if (error) + goto out; + + /* Success! */ + if (newoffp) + *newoffp = newoff; + if (flags & FOF_UPDATE_OFFSET) + fp->f_offset = newoff; + error = 0; +out: VOP_UNLOCK(vp); + return error; +} /* * Check that the vnode is still valid, and if so Index: src/sys/sys/file.h diff -u src/sys/sys/file.h:1.86 src/sys/sys/file.h:1.87 --- src/sys/sys/file.h:1.86 Sat May 2 18:43:02 2020 +++ src/sys/sys/file.h Sat Sep 11 10:08:55 2021 @@ -1,4 +1,4 @@ -/* $NetBSD: file.h,v 1.86 2020/05/02 18:43:02 christos Exp $ */ +/* $NetBSD: file.h,v 1.87 2021/09/11 10:08:55 riastradh Exp $ */ /*- * Copyright (c) 2009 The NetBSD Foundation, Inc. @@ -94,7 +94,7 @@ struct fileops { void (*fo_restart) (struct file *); int (*fo_mmap) (struct file *, off_t *, size_t, int, int *, int *, struct uvm_object **, int *); - void (*fo_spare2) (void); + int (*fo_seek) (struct file *, off_t, int, off_t *, int); }; union file_data {