Module Name:    src
Committed By:   riastradh
Date:           Sat Apr 22 11:22:36 UTC 2023

Modified Files:
        src/sys/kern: vfs_vnops.c

Log Message:
readdir(2), lseek(2): Fix races in access to struct file::f_offset.

For non-directory vnodes:
- reading f_offset requires a shared or exclusive vnode lock
- writing f_offset requires an exclusive vnode lock

For directory vnodes, access (read or write) requires either:
- a shared vnode lock AND f_lock, or
- an exclusive vnode lock.

This way, two files for the same underlying directory vnode can still
do VOP_READDIR in parallel, but if two readdir(2) or lseek(2) calls
run in parallel on the same file, the load and store of f_offset is
atomic (otherwise, e.g., on 32-bit systems it might be torn and lead
to corrupt offsets).

There is still a potential problem: the _whole transaction_ of
readdir(2) may not be atomic.  For example, if thread A and thread B
read n bytes of directory content, thread A might get bytes [0,n) and
thread B might get bytes [n,2n) but f_offset might end up at n
instead of 2n once both operations complete.  (However, f_offset
wouldn't be some corrupt garbled number like n & 0xffffffff00000000.)
Fixing this would require either:
(a) using an exclusive vnode lock in vn_readdir,
(b) introducing a new lock that serializes vn_readdir on the same
    file (but ont necessarily the same vnode), or
(c) proving it is safe to hold f_lock across VOP_READDIR, VOP_SEEK,
    and VOP_GETATTR.


To generate a diff of this commit:
cvs rdiff -u -r1.237 -r1.238 src/sys/kern/vfs_vnops.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/vfs_vnops.c
diff -u src/sys/kern/vfs_vnops.c:1.237 src/sys/kern/vfs_vnops.c:1.238
--- src/sys/kern/vfs_vnops.c:1.237	Mon Mar 13 18:13:18 2023
+++ src/sys/kern/vfs_vnops.c	Sat Apr 22 11:22:36 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_vnops.c,v 1.237 2023/03/13 18:13:18 riastradh Exp $	*/
+/*	$NetBSD: vfs_vnops.c,v 1.238 2023/04/22 11:22:36 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2009 The NetBSD Foundation, Inc.
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.237 2023/03/13 18:13:18 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.238 2023/04/22 11:22:36 riastradh Exp $");
 
 #include "veriexec.h"
 
@@ -595,9 +595,11 @@ unionread:
 	}
 	auio.uio_resid = count;
 	vn_lock(vp, LK_SHARED | LK_RETRY);
+	mutex_enter(&fp->f_lock);
 	auio.uio_offset = fp->f_offset;
+	mutex_exit(&fp->f_lock);
 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, cookies,
-		    ncookies);
+	    ncookies);
 	mutex_enter(&fp->f_lock);
 	fp->f_offset = auio.uio_offset;
 	mutex_exit(&fp->f_lock);
@@ -656,7 +658,13 @@ vn_read(file_t *fp, off_t *offset, struc
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	else
 		vn_lock(vp, LK_SHARED | LK_RETRY);
+	if (__predict_false(vp->v_type == VDIR) &&
+	    offset == &fp->f_offset && (flags & FOF_UPDATE_OFFSET) == 0)
+		mutex_enter(&fp->f_lock);
 	uio->uio_offset = *offset;
+	if (__predict_false(vp->v_type == VDIR) &&
+	    offset == &fp->f_offset && (flags & FOF_UPDATE_OFFSET) == 0)
+		mutex_enter(&fp->f_lock);
 	count = uio->uio_resid;
 	error = VOP_READ(vp, uio, ioflag, cred);
 	if (flags & FOF_UPDATE_OFFSET)
@@ -825,8 +833,13 @@ vn_ioctl(file_t *fp, u_long com, void *d
 		if (com == FIONREAD) {
 			vn_lock(vp, LK_SHARED | LK_RETRY);
 			error = VOP_GETATTR(vp, &vattr, kauth_cred_get());
-			if (error == 0)
+			if (error == 0) {
+				if (vp->v_type == VDIR)
+					mutex_enter(&fp->f_lock);
 				*(int *)data = vattr.va_size - fp->f_offset;
+				if (vp->v_type == VDIR)
+					mutex_exit(&fp->f_lock);
+			}
 			VOP_UNLOCK(vp);
 			if (error)
 				return error;
@@ -1149,7 +1162,11 @@ vn_seek(struct file *fp, off_t delta, in
 		vn_lock(vp, LK_SHARED | LK_RETRY);
 
 	/* Compute the old and new offsets.  */
+	if (vp->v_type == VDIR && (flags & FOF_UPDATE_OFFSET) == 0)
+		mutex_enter(&fp->f_lock);
 	oldoff = fp->f_offset;
+	if (vp->v_type == VDIR && (flags & FOF_UPDATE_OFFSET) == 0)
+		mutex_exit(&fp->f_lock);
 	switch (whence) {
 	case SEEK_CUR:
 		if (delta > 0) {

Reply via email to