Module Name:    src
Committed By:   ad
Date:           Sun Jan 19 21:19:25 UTC 2020

Modified Files:
        src/sys/kern [ad-namecache]: vfs_cache.c vfs_getcwd.c vfs_lookup.c
        src/sys/sys [ad-namecache]: fstypes.h namei.src

Log Message:
- Add a LOCKSHARED flag to namei (matching FreeBSD) indicating that we want
  the leaf locked with LK_SHARED.

- Add an IMNT_SHRLOOKUP flag to struct mount indicating that the file
  system can do VOP_LOOKUP() with an shared lock.  If it encounters
  something tricky, VOP_LOOKUP() is free to return ENOLCK and namei() will
  retry the lookup with an exclusive lock.  If the file system has this flag
  set, namei() will try with shared locks for all of the "read only"
  lookups, i.e. nameiop=LOOKUP or !ISLASTCN.

- vfs_getcwd: only take vnode locks when really needed, take shared locks if
  possible, and where the namecache has identify info for the directories,
  do it all in the namecache.

- vfs_lookup: when crossing mountpoints take only a shared lock on the
  covered vnode; don't need anything else.


To generate a diff of this commit:
cvs rdiff -u -r1.126.2.8 -r1.126.2.9 src/sys/kern/vfs_cache.c
cvs rdiff -u -r1.53.2.2 -r1.53.2.3 src/sys/kern/vfs_getcwd.c
cvs rdiff -u -r1.212.4.3 -r1.212.4.4 src/sys/kern/vfs_lookup.c
cvs rdiff -u -r1.37 -r1.37.6.1 src/sys/sys/fstypes.h
cvs rdiff -u -r1.47.2.4 -r1.47.2.5 src/sys/sys/namei.src

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/vfs_cache.c
diff -u src/sys/kern/vfs_cache.c:1.126.2.8 src/sys/kern/vfs_cache.c:1.126.2.9
--- src/sys/kern/vfs_cache.c:1.126.2.8	Sat Jan 18 17:16:20 2020
+++ src/sys/kern/vfs_cache.c	Sun Jan 19 21:19:25 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_cache.c,v 1.126.2.8 2020/01/18 17:16:20 ad Exp $	*/
+/*	$NetBSD: vfs_cache.c,v 1.126.2.9 2020/01/19 21:19:25 ad Exp $	*/
 
 /*-
  * Copyright (c) 2008, 2019, 2020 The NetBSD Foundation, Inc.
@@ -148,7 +148,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.126.2.8 2020/01/18 17:16:20 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.126.2.9 2020/01/19 21:19:25 ad Exp $");
 
 #define __NAMECACHE_PRIVATE
 #ifdef _KERNEL_OPT
@@ -326,11 +326,15 @@ cache_key(const char *name, size_t nlen)
 	KASSERT(nlen <= USHRT_MAX);
 
 	key = hash32_buf(name, nlen, HASH32_STR_INIT);
-	return (key << 16) | nlen;
+	return (key << 32) | nlen;
 }
 
 /*
- * Like memcmp() but tuned for small strings of equal length.
+ * Like bcmp() but tuned for the use case here which is:
+ *
+ * - always of equal length both sides
+ * - almost always the same string both sides
+ * - small strings
  */
 static inline int
 cache_namecmp(struct namecache *nc, const char *name, size_t namelen)
@@ -743,7 +747,8 @@ cache_lookup_linked(struct vnode *dvp, c
  * Returns 0 on success, -1 on cache miss, positive errno on failure.
  */
 int
-cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
+cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp,
+    bool checkaccess, int perms)
 {
 	struct nchnode *nn = VNODE_TO_VIMPL(vp)->vi_ncache;
 	struct namecache *nc;
@@ -757,6 +762,27 @@ cache_revlookup(struct vnode *vp, struct
 		goto out;
 
 	rw_enter(&nn->nn_listlock, RW_READER);
+	if (checkaccess) {
+		/*
+		 * Check if the user is allowed to see.  NOTE: this is
+		 * checking for access on the "wrong" directory.  getcwd()
+		 * wants to see that there is access on every component
+		 * along the way, not that there is access to any individual
+		 * component.
+		 */
+		KASSERT(nn->nn_mode != VNOVAL && nn->nn_uid != VNOVAL &&
+		    nn->nn_gid != VNOVAL);
+		error = kauth_authorize_vnode(curlwp->l_cred,
+		    KAUTH_ACCESS_ACTION(VEXEC, vp->v_type, nn->nn_mode &
+		    ALLPERMS), vp, NULL, genfs_can_access(vp->v_type,
+		    nn->nn_mode & ALLPERMS, nn->nn_uid, nn->nn_gid,
+		    perms, curlwp->l_cred));
+		    if (error != 0) {
+		    	rw_exit(&nn->nn_listlock);
+			COUNT(ncs_denied);
+			return EACCES;
+		}
+	}
 	TAILQ_FOREACH(nc, &nn->nn_list, nc_list) {
 		KASSERT(nc->nc_nn == nn);
 		KASSERT(nc->nc_dnn != NULL);
@@ -931,12 +957,14 @@ cache_set_id(struct vnode *dvp, mode_t m
 
 	if (dvp->v_type == VDIR) {
 		rw_enter(&nn->nn_lock, RW_WRITER);
+		rw_enter(&nn->nn_listlock, RW_WRITER);
 		KASSERT(nn->nn_mode == VNOVAL);
 		KASSERT(nn->nn_uid == VNOVAL);
 		KASSERT(nn->nn_gid == VNOVAL);
 		nn->nn_mode = mode;
 		nn->nn_uid = uid;
 		nn->nn_gid = gid;
+		rw_exit(&nn->nn_listlock);
 		rw_exit(&nn->nn_lock);
 	}
 }
@@ -953,16 +981,30 @@ cache_update_id(struct vnode *dvp, mode_
 
 	if (dvp->v_type == VDIR) {
 		rw_enter(&nn->nn_lock, RW_WRITER);
+		rw_enter(&nn->nn_listlock, RW_WRITER);
 		if (nn->nn_mode != VNOVAL) {
 			nn->nn_mode = mode;
 			nn->nn_uid = uid;
 			nn->nn_gid = gid;
 		}
+		rw_exit(&nn->nn_listlock);
 		rw_exit(&nn->nn_lock);
 	}
 }
 
 /*
+ * Return true if we have identity for the given vnode.
+ */
+bool
+cache_have_id(struct vnode *dvp)
+{
+	struct nchnode *nn = VNODE_TO_VIMPL(dvp)->vi_ncache;
+
+	/* Unlocked check.  Only goes VNOVAL -> valid, never back. */
+	return nn->nn_mode != VNOVAL;
+}
+
+/*
  * Name cache initialization, from vfs_init() when the system is booting.
  */
 void
@@ -1259,7 +1301,7 @@ static void
 cache_reclaim(void)
 {
 	struct namecache *nc;
-	struct nchnode *nn;
+	struct nchnode *dnn;
 	int toscan, total;
 
 	/* Scan up to a preset maxium number of entries. */
@@ -1276,9 +1318,9 @@ cache_reclaim(void)
 		if (nc == NULL) {
 			break;
 		}
-		nn = nc->nc_nn;
+		dnn = nc->nc_dnn;
 		KASSERT(nc->nc_lrulist == LRU_INACTIVE);
-		KASSERT(nn != NULL);
+		KASSERT(dnn != NULL);
 
 		/*
 		 * Locking in the wrong direction.  If we can't get the
@@ -1286,7 +1328,7 @@ cache_reclaim(void)
 		 * cause problems for the next guy in here, so send the
 		 * entry to the back of the list.
 		 */
-		if (!rw_tryenter(&nn->nn_lock, RW_WRITER)) {
+		if (!rw_tryenter(&dnn->nn_lock, RW_WRITER)) {
 			TAILQ_REMOVE(&cache_lru.list[LRU_INACTIVE],
 			    nc, nc_lru);
 			TAILQ_INSERT_TAIL(&cache_lru.list[LRU_INACTIVE],
@@ -1303,7 +1345,7 @@ cache_reclaim(void)
 		 */
 		mutex_exit(&cache_lru_lock);
 		cache_remove(nc, true);
-		rw_exit(&nn->nn_lock);
+		rw_exit(&dnn->nn_lock);
 		mutex_enter(&cache_lru_lock);
 	}
 	mutex_exit(&cache_lru_lock);
@@ -1388,8 +1430,8 @@ cache_stat_sysctl(SYSCTLFN_ARGS)
 void
 namecache_print(struct vnode *vp, void (*pr)(const char *, ...))
 {
+	struct nchnode *dnn = NULL;
 	struct namecache *nc;
-	struct nchnode *dnn;
 	enum cache_lru_id id;
 
 	for (id = 0; id < LRU_COUNT; id++) {

Index: src/sys/kern/vfs_getcwd.c
diff -u src/sys/kern/vfs_getcwd.c:1.53.2.2 src/sys/kern/vfs_getcwd.c:1.53.2.3
--- src/sys/kern/vfs_getcwd.c:1.53.2.2	Fri Jan 17 21:54:27 2020
+++ src/sys/kern/vfs_getcwd.c	Sun Jan 19 21:19:25 2020
@@ -1,7 +1,7 @@
-/* $NetBSD: vfs_getcwd.c,v 1.53.2.2 2020/01/17 21:54:27 ad Exp $ */
+/* $NetBSD: vfs_getcwd.c,v 1.53.2.3 2020/01/19 21:19:25 ad Exp $ */
 
 /*-
- * Copyright (c) 1999 The NetBSD Foundation, Inc.
+ * Copyright (c) 1999, 2020 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_getcwd.c,v 1.53.2.2 2020/01/17 21:54:27 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_getcwd.c,v 1.53.2.3 2020/01/19 21:19:25 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -87,7 +87,7 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_getcwd.c
  * On exit, *uvpp is either NULL or is a locked vnode reference.
  */
 static int
-getcwd_scandir(struct vnode **lvpp, struct vnode **uvpp, char **bpp,
+getcwd_scandir(struct vnode *lvp, struct vnode **uvpp, char **bpp,
     char *bufp, struct lwp *l)
 {
 	int     error = 0;
@@ -101,14 +101,13 @@ getcwd_scandir(struct vnode **lvpp, stru
 	ino_t   fileno;
 	struct vattr va;
 	struct vnode *uvp = NULL;
-	struct vnode *lvp = *lvpp;
 	kauth_cred_t cred = l->l_cred;
 	struct componentname cn;
 	int len, reclen;
 	tries = 0;
 
-	/* Upgrade to exclusive for UFS VOP_GETATTR (itimes) & VOP_LOOKUP. */
-	vn_lock(lvp, LK_UPGRADE | LK_RETRY);
+	/* Need exclusive for UFS VOP_GETATTR (itimes) & VOP_LOOKUP. */
+	KASSERT(VOP_ISLOCKED(lvp) == LK_EXCLUSIVE);
 
 	/*
 	 * If we want the filename, get some info we need while the
@@ -117,8 +116,7 @@ getcwd_scandir(struct vnode **lvpp, stru
 	if (bufp != NULL) {
 		error = VOP_GETATTR(lvp, &va, cred);
 		if (error) {
-			vput(lvp);
-			*lvpp = NULL;
+			VOP_UNLOCK(lvp);
 			*uvpp = NULL;
 			return error;
 		}
@@ -137,24 +135,14 @@ getcwd_scandir(struct vnode **lvpp, stru
 
 	/* At this point, lvp is locked  */
 	error = VOP_LOOKUP(lvp, uvpp, &cn);
-	vput(lvp);
+	VOP_UNLOCK(lvp);
 	if (error) {
-		*lvpp = NULL;
 		*uvpp = NULL;
 		return error;
 	}
 	uvp = *uvpp;
-	/* Now lvp is unlocked, try to lock uvp */
-	error = vn_lock(uvp, LK_SHARED);
-	if (error) {
-		*lvpp = NULL;
-		*uvpp = NULL;
-		return error;
-	}
-
 	/* If we don't care about the pathname, we're done */
 	if (bufp == NULL) {
-		*lvpp = NULL;
 		return 0;
 	}
 
@@ -166,6 +154,14 @@ getcwd_scandir(struct vnode **lvpp, stru
 		dirbuflen = va.va_blocksize;
 	dirbuf = kmem_alloc(dirbuflen, KM_SLEEP);
 
+	/* Now lvp is unlocked, try to lock uvp */
+	error = vn_lock(uvp, LK_SHARED);
+	if (error) {
+		vrele(uvp);
+		*uvpp = NULL;
+		return error;
+	}
+
 #if 0
 unionread:
 #endif
@@ -264,66 +260,14 @@ unionread:
 	error = ENOENT;
 
 out:
-	*lvpp = NULL;
+	VOP_UNLOCK(uvp);
 	kmem_free(dirbuf, dirbuflen);
 	return error;
 }
 
 /*
- * Look in the vnode-to-name reverse cache to see if
- * we can find things the easy way.
- *
- * XXX vget failure path is untested.
- *
- * On entry, *lvpp is a locked vnode reference.
- * On exit, one of the following is the case:
- *	0) Both *lvpp and *uvpp are NULL and failure is returned.
- * 	1) *uvpp is NULL, *lvpp remains locked and -1 is returned (cache miss)
- *	2) *uvpp is a locked vnode reference, *lvpp is vput and NULL'ed
- *	   and 0 is returned (cache hit)
- */
-
-static int
-getcwd_getcache(struct vnode **lvpp, struct vnode **uvpp, char **bpp,
-    char *bufp)
-{
-	struct vnode *lvp, *uvp = NULL;
-	int error;
-
-	lvp = *lvpp;
-
-	/*
-	 * This returns 0 on a cache hit, -1 on a clean cache miss,
-	 * or an errno on other failure.
-	 */
-	error = cache_revlookup(lvp, uvpp, bpp, bufp);
-	if (error) {
-		if (error != -1) {
-			vput(lvp);
-			*lvpp = NULL;
-			*uvpp = NULL;
-		}
-		return error;
-	}
-	uvp = *uvpp;
-
-	/*
-	 * Since we're going up, we have to release the current lock
-	 * before we take the parent lock.
-	 */
-
-	VOP_UNLOCK(lvp);
-	vn_lock(uvp, LK_SHARED | LK_RETRY);
-	vrele(lvp);
-	*lvpp = NULL;
-
-	return error;
-}
-
-/*
  * common routine shared by sys___getcwd() and vn_isunder()
  */
-
 int
 getcwd_common(struct vnode *lvp, struct vnode *rvp, char **bpp, char *bufp,
     int limit, int flags, struct lwp *l)
@@ -348,11 +292,10 @@ getcwd_common(struct vnode *lvp, struct 
 	/*
 	 * Error handling invariant:
 	 * Before a `goto out':
-	 *	lvp is either NULL, or locked and held.
-	 *	uvp is either NULL, or locked and held.
+	 *	lvp is either NULL, or held.
+	 *	uvp is either NULL, or held.
 	 */
 
-	vn_lock(lvp, LK_SHARED | LK_RETRY);
 	if (bufp)
 		bp = *bpp;
 
@@ -372,63 +315,93 @@ getcwd_common(struct vnode *lvp, struct 
 		 * access check here is optional, depending on
 		 * whether or not caller cares.
 		 */
-		if (flags & GETCWD_CHECK_ACCESS) {
-			error = VOP_ACCESS(lvp, perms, cred);
-			if (error)
-				goto out;
-			perms = VEXEC|VREAD;
-		}
+		int chkaccess = (flags & GETCWD_CHECK_ACCESS);
+		bool locked = false;
 
 		/*
 		 * step up if we're a covered vnode..
+		 * check access on the first vnode only.
 		 */
-		while (lvp->v_vflag & VV_ROOT) {
-			struct vnode *tvp;
+		if (lvp->v_vflag & VV_ROOT) {
+			vn_lock(lvp, LK_SHARED | LK_RETRY);
+			if (chkaccess) {
+				error = VOP_ACCESS(lvp, perms, cred);
+				if (error) {
+					VOP_UNLOCK(lvp);
+					goto out;
+				}
+				chkaccess = 0;
+			}
+			while (lvp->v_vflag & VV_ROOT) {
+				struct vnode *tvp;
 
-			if (lvp == rvp)
-				goto out;
+				if (lvp == rvp) {
+					VOP_UNLOCK(lvp);
+					goto out;
+				}
 
-			tvp = lvp;
-			lvp = lvp->v_mount->mnt_vnodecovered;
-			vput(tvp);
-			/*
-			 * hodie natus est radici frater
-			 */
-			if (lvp == NULL) {
-				error = ENOENT;
-				goto out;
+				tvp = lvp->v_mount->mnt_vnodecovered;
+				/*
+				 * hodie natus est radici frater
+				 */
+				if (tvp == NULL) {
+					VOP_UNLOCK(lvp);
+					error = ENOENT;
+					goto out;
+				}
+				vref(tvp);
+				vput(lvp);
+				lvp = tvp;
+				if (lvp->v_vflag & VV_ROOT)
+					vn_lock(lvp, LK_SHARED | LK_RETRY);
 			}
-			vref(lvp);
-			error = vn_lock(lvp, LK_SHARED | LK_RETRY);
-			if (error != 0) {
-				vrele(lvp);
-				lvp = NULL;
+		}
+
+		/* Do we need to check access to the directory? */
+		if (chkaccess && !cache_have_id(lvp)) {
+			/* Need exclusive for UFS VOP_GETATTR (itimes) & VOP_LOOKUP. */
+			vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY);
+			error = VOP_ACCESS(lvp, perms, cred);
+			if (error) {
+				VOP_UNLOCK(lvp);
 				goto out;
 			}
+			chkaccess = 0;
+			locked = true;
 		}
+
 		/*
 		 * Look in the name cache; if that fails, look in the
 		 * directory..
 		 */
-		error = getcwd_getcache(&lvp, &uvp, &bp, bufp);
+		error = cache_revlookup(lvp, &uvp, &bp, bufp, chkaccess,
+		    perms);
 		if (error == -1) {
+			if (!locked) {
+				locked = true;
+				vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY);
+			}
 			if (lvp->v_type != VDIR) {
+				VOP_UNLOCK(lvp);
 				error = ENOTDIR;
 				goto out;
 			}
-			error = getcwd_scandir(&lvp, &uvp, &bp, bufp, l);
+			error = getcwd_scandir(lvp, &uvp, &bp, bufp, l);
+			/* lvp now unlocked */
+		} else if (locked) {
+			VOP_UNLOCK(lvp);
 		}
 		if (error)
 			goto out;
 #if DIAGNOSTIC
-		if (lvp != NULL)
-			panic("getcwd: oops, forgot to null lvp");
 		if (bufp && (bp <= bufp)) {
 			panic("getcwd: oops, went back too far");
 		}
 #endif
+		perms = VEXEC | VREAD;
 		if (bp)
 			*(--bp) = '/';
+		vrele(lvp);
 		lvp = uvp;
 		uvp = NULL;
 		limit--;
@@ -438,9 +411,9 @@ out:
 	if (bpp)
 		*bpp = bp;
 	if (uvp)
-		vput(uvp);
+		vrele(uvp);
 	if (lvp)
-		vput(lvp);
+		vrele(lvp);
 	vrele(rvp);
 	return error;
 }
@@ -560,7 +533,7 @@ vnode_to_path(char *path, size_t len, st
 	bp = bend = &path[len];
 	*(--bp) = '\0';
 
-	error = cache_revlookup(vp, &dvp, &bp, path);
+	error = cache_revlookup(vp, &dvp, &bp, path, false, 0);
 	if (error != 0)
 		return (error == -1 ? ENOENT : error);
 

Index: src/sys/kern/vfs_lookup.c
diff -u src/sys/kern/vfs_lookup.c:1.212.4.3 src/sys/kern/vfs_lookup.c:1.212.4.4
--- src/sys/kern/vfs_lookup.c:1.212.4.3	Fri Jan 17 22:26:25 2020
+++ src/sys/kern/vfs_lookup.c	Sun Jan 19 21:19:25 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_lookup.c,v 1.212.4.3 2020/01/17 22:26:25 ad Exp $	*/
+/*	$NetBSD: vfs_lookup.c,v 1.212.4.4 2020/01/19 21:19:25 ad Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -37,7 +37,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.212.4.3 2020/01/17 22:26:25 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.212.4.4 2020/01/19 21:19:25 ad Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_magiclinks.h"
@@ -922,54 +922,68 @@ static int
 lookup_crossmount(struct namei_state *state,
 		  struct vnode **searchdir_ret,
 		  struct vnode **foundobj_ret,
-		  bool searchdir_locked)
+		  bool *searchdir_locked)
 {
 	struct componentname *cnp = state->cnp;
 	struct vnode *foundobj;
 	struct vnode *searchdir;
 	struct mount *mp;
-	int error;
+	int error, lktype;
 
 	searchdir = *searchdir_ret;
 	foundobj = *foundobj_ret;
+	error = 0;
 
 	KASSERT((cnp->cn_flags & NOCROSSMOUNT) == 0);
 	KASSERT(searchdir != NULL);
 
+	/* First, unlock searchdir (oof). */
+	if (*searchdir_locked) {
+		lktype = VOP_ISLOCKED(searchdir);
+		VOP_UNLOCK(searchdir);
+		*searchdir_locked = false;
+	} else {
+		lktype = LK_NONE;
+	}
+
 	/*
-	 * Check to see if the vnode has been mounted on;
-	 * if so find the root of the mounted file system.
+	 * Do an unlocked check to see if the vnode has been mounted on; if
+	 * so find the root of the mounted file system.
 	 */
-	error = vn_lock(foundobj, LK_SHARED);
-	if (error != 0) {
-		vrele(foundobj);
-		*foundobj_ret = NULL;
-		return error;
-	}
 	while (foundobj->v_type == VDIR &&
 	    (mp = foundobj->v_mountedhere) != NULL &&
 	    (cnp->cn_flags & NOCROSSMOUNT) == 0) {
 		KASSERTMSG(searchdir != foundobj, "same vn %p", searchdir);
-
-		error = vfs_busy(mp);
+		/* First get the vnode stable. */
+		error = vn_lock(foundobj, LK_SHARED);
 		if (error != 0) {
-			vput(foundobj);
+			vrele(foundobj);
 			*foundobj_ret = NULL;
-			return error;
+			break;
 		}
-		if (searchdir_locked) {
-			VOP_UNLOCK(searchdir);
+
+		/* Then check to see if something is still mounted there. */
+		if ((mp = foundobj->v_mountedhere) == NULL) {
+			VOP_UNLOCK(foundobj);
+			break;
 		}
+
+		/* Get a reference to the mountpoint, and ditch foundobj. */
+		error = vfs_busy(mp);
 		vput(foundobj);
-		error = VFS_ROOT(mp, LK_SHARED, &foundobj);
+		if (error != 0) {
+			*foundobj_ret = NULL;
+			break;
+		}
+
+		/* Now get a reference on the root vnode, and drop mount. */
+		error = VFS_ROOT(mp, LK_NONE, &foundobj);
 		vfs_unbusy(mp);
 		if (error) {
-			if (searchdir_locked) {
-				vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY);
-			}
 			*foundobj_ret = NULL;
-			return error;
+			break;
 		}
+
 		/*
 		 * Avoid locking vnodes from two filesystems because
 		 * it's prone to deadlock, e.g. when using puffs.
@@ -989,14 +1003,16 @@ lookup_crossmount(struct namei_state *st
 			vrele(searchdir);
 			*searchdir_ret = searchdir = NULL;
 			*foundobj_ret = foundobj;
-		} else if (searchdir_locked) {
-			VOP_UNLOCK(foundobj);
-			vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY);
-			vn_lock(foundobj, LK_SHARED | LK_RETRY);
+			lktype = LK_NONE;
 		}
 	}
-	VOP_UNLOCK(foundobj);
-	return 0;
+
+	/* If searchdir is still around, re-lock it. */
+ 	if (error == 0 && lktype != LK_NONE) {
+		vn_lock(searchdir, lktype | LK_RETRY);
+		*searchdir_locked = true;
+	}
+	return error;
 }
 
 /*
@@ -1022,7 +1038,7 @@ lookup_once(struct namei_state *state,
 	struct vnode *foundobj;		/* result */
 	struct lwp *l = curlwp;
 	bool searchdir_locked = false;
-	int error;
+	int error, lktype;
 
 	struct componentname *cnp = state->cnp;
 	struct nameidata *ndp = state->ndp;
@@ -1091,13 +1107,27 @@ lookup_once(struct namei_state *state,
 	}
 
 	/*
+	 * If the file system supports VOP_LOOKUP() with a shared lock, and
+	 * we are not making any modifications (nameiop LOOKUP) or this is
+	 * not the last component then get a shared lock LK_SHARED.
+	 */
+	if ((searchdir->v_mount->mnt_iflag & IMNT_SHRLOOKUP) != 0 &&
+	    (cnp->cn_nameiop == LOOKUP || (cnp->cn_flags & ISLASTCN) == 0)) {
+	    	lktype = LK_SHARED;
+	} else {
+		lktype = LK_EXCLUSIVE;
+	}
+
+	/*
 	 * We now have a segment name to search for, and a directory to search.
 	 * Our vnode state here is that "searchdir" is held.
 	 */
 unionlookup:
 	foundobj = NULL;
-	vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY);
-	searchdir_locked = true;
+	if (!searchdir_locked) {
+		vn_lock(searchdir, lktype | LK_RETRY);
+		searchdir_locked = true;
+	}
 	error = VOP_LOOKUP(searchdir, &foundobj, cnp);
 
 	if (error != 0) {
@@ -1107,6 +1137,23 @@ unionlookup:
 #ifdef NAMEI_DIAGNOSTIC
 		printf("not found\n");
 #endif /* NAMEI_DIAGNOSTIC */
+
+		/*
+		 * If ENOLCK, the file system needs us to retry the lookup
+		 * with an exclusive lock.  It's likely nothing was found in
+		 * cache and/or modifications need to be made.
+		 */
+		if (error == ENOLCK) {
+			KASSERT(VOP_ISLOCKED(searchdir) == LK_SHARED);
+			KASSERT(searchdir_locked);
+			if (vn_lock(searchdir, LK_UPGRADE | LK_NOWAIT)) {
+				VOP_UNLOCK(searchdir);
+				searchdir_locked = false;
+			}
+			lktype = LK_EXCLUSIVE;
+			goto unionlookup;
+		}
+
 		if ((error == ENOENT) &&
 		    (searchdir->v_vflag & VV_ROOT) &&
 		    (searchdir->v_mount->mnt_flag & MNT_UNION)) {
@@ -1194,7 +1241,7 @@ done:
  * many "easy" to find components of the path as possible.
  *
  * We use the namecache's node locks to form a chain, and avoid as many
- * vnode references and locks as possible.  In the most ideal case, only the
+ * vnode references and locks as possible.  In the ideal case, only the
  * final vnode will have its reference count adjusted and lock taken.
  */
 static int
@@ -1257,7 +1304,7 @@ lookup_fastforward(struct namei_state *s
 
 		/*
 		 * Good, now look for it in cache.  cache_lookup_linked()
-		 * will fail if there's nothing there, or if there's not
+		 * will fail if there's nothing there, or if there's no
 		 * ownership info for the directory, or if the user doesn't
 		 * have permission to look up files in this directory.
 		 */
@@ -1425,7 +1472,7 @@ namei_oneroot(struct namei_state *state,
 		    foundobj->v_mountedhere != NULL &&
 		    (cnp->cn_flags & NOCROSSMOUNT) == 0) {
 		    	error = lookup_crossmount(state, &searchdir,
-		    	    &foundobj, searchdir_locked);
+		    	    &foundobj, &searchdir_locked);
 		}
 
 		if (error) {
@@ -1692,7 +1739,9 @@ namei_oneroot(struct namei_state *state,
 		} else if ((cnp->cn_flags & LOCKLEAF) != 0 &&
 		    (searchdir != foundobj ||
 		    (cnp->cn_flags & LOCKPARENT) == 0)) {
-			vn_lock(foundobj, LK_EXCLUSIVE | LK_RETRY);
+			const int lktype = (cnp->cn_flags & LOCKSHARED) != 0 ?
+			    LK_SHARED : LK_EXCLUSIVE;
+			vn_lock(foundobj, lktype | LK_RETRY);
 		}
 	}
 

Index: src/sys/sys/fstypes.h
diff -u src/sys/sys/fstypes.h:1.37 src/sys/sys/fstypes.h:1.37.6.1
--- src/sys/sys/fstypes.h:1.37	Wed Feb 20 10:07:27 2019
+++ src/sys/sys/fstypes.h	Sun Jan 19 21:19:25 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: fstypes.h,v 1.37 2019/02/20 10:07:27 hannken Exp $	*/
+/*	$NetBSD: fstypes.h,v 1.37.6.1 2020/01/19 21:19:25 ad Exp $	*/
 
 /*
  * Copyright (c) 1989, 1991, 1993
@@ -221,6 +221,7 @@ typedef struct fhandle	fhandle_t;
 #define	IMNT_WANTRDWR	0x00000004	/* upgrade to read/write requested */
 #define	IMNT_WANTRDONLY	0x00000008	/* upgrade to readonly requested */
 #define	IMNT_DTYPE	0x00000040	/* returns d_type fields */
+#define	IMNT_SHRLOOKUP	0x00000080	/* can do LK_SHARED lookups */
 #define	IMNT_MPSAFE	0x00000100	/* file system code MP safe */
 #define	IMNT_CAN_RWTORO	0x00000200	/* can downgrade fs to from rw to r/o */
 #define	IMNT_ONWORKLIST	0x00000400	/* on syncer worklist */
@@ -271,6 +272,7 @@ typedef struct fhandle	fhandle_t;
 	"\13IMNT_ONWORKLIST" \
 	"\12IMNT_CAN_RWTORO" \
 	"\11IMNT_MPSAFE" \
+	"\10IMNT_SHRLOOKUP" \
 	"\07IMNT_DTYPE" \
 	"\04IMNT_WANTRDONLY" \
 	"\03IMNT_WANTRDWR" \

Index: src/sys/sys/namei.src
diff -u src/sys/sys/namei.src:1.47.2.4 src/sys/sys/namei.src:1.47.2.5
--- src/sys/sys/namei.src:1.47.2.4	Fri Jan 17 22:26:26 2020
+++ src/sys/sys/namei.src	Sun Jan 19 21:19:25 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: namei.src,v 1.47.2.4 2020/01/17 22:26:26 ad Exp $	*/
+/*	$NetBSD: namei.src,v 1.47.2.5 2020/01/19 21:19:25 ad Exp $	*/
 
 /*
  * Copyright (c) 1985, 1989, 1991, 1993
@@ -152,13 +152,14 @@ NAMEIFL	NOFOLLOW	0x00000000	/* do not fo
 					   (pseudo) */
 NAMEIFL	EMULROOTSET	0x00000080	/* emulation root already
 					   in ni_erootdir */
+NAMEIFL	LOCKSHARED	0x00000100	/* want shared locks if possible */
 NAMEIFL	NOCHROOT	0x01000000	/* no chroot on abs path lookups */
-NAMEIFL	MODMASK		0x010000fc	/* mask of operational modifiers */
+NAMEIFL	MODMASK		0x010001fc	/* mask of operational modifiers */
 /*
  * Namei parameter descriptors.
  */
-NAMEIFL	NOCROSSMOUNT	0x0000100	/* do not cross mount points */
-NAMEIFL	RDONLY		0x0000200	/* lookup with read-only semantics */
+NAMEIFL	NOCROSSMOUNT	0x0000800	/* do not cross mount points */
+NAMEIFL	RDONLY		0x0001000	/* lookup with read-only semantics */
 NAMEIFL	ISDOTDOT	0x0002000	/* current component name is .. */
 NAMEIFL	MAKEENTRY	0x0004000	/* entry is to be added to name cache */
 NAMEIFL	ISLASTCN	0x0008000	/* this is last component of pathname */
@@ -166,7 +167,7 @@ NAMEIFL	ISWHITEOUT	0x0020000	/* found wh
 NAMEIFL	DOWHITEOUT	0x0040000	/* do whiteouts */
 NAMEIFL	REQUIREDIR	0x0080000	/* must be a directory */
 NAMEIFL	CREATEDIR	0x0200000	/* trailing slashes are ok */
-NAMEIFL	PARAMASK	0x02ee300	/* mask of parameter descriptors */
+NAMEIFL	PARAMASK	0x02ef800	/* mask of parameter descriptors */
 
 /*
  * Initialization of a nameidata structure.
@@ -291,11 +292,14 @@ bool	cache_lookup_raw(struct vnode *, co
 			int *, struct vnode **);
 bool	cache_lookup_linked(struct vnode *, const char *, size_t,
 			    struct vnode **, krwlock_t **, kauth_cred_t);
-int	cache_revlookup(struct vnode *, struct vnode **, char **, char *);
+int	cache_revlookup(struct vnode *, struct vnode **, char **, char *,
+			bool, int);
+int	cache_diraccess(struct vnode *, int);
 void	cache_enter(struct vnode *, struct vnode *,
 			const char *, size_t, uint32_t);
 void	cache_set_id(struct vnode *, mode_t, uid_t, gid_t);
 void	cache_update_id(struct vnode *, mode_t, uid_t, gid_t);
+bool	cache_have_id(struct vnode *);
 void	cache_vnode_init(struct vnode * );
 void	cache_vnode_fini(struct vnode * );
 void	cache_cpu_init(struct cpu_info *);

Reply via email to