Module Name:    src
Committed By:   ad
Date:           Mon Jan 13 08:51:07 UTC 2020

Modified Files:
        src/sys/kern [ad-namecache]: vfs_cache.c
        src/sys/sys [ad-namecache]: namei.src vnode_impl.h

Log Message:
Make the per-directory lock a rwlock.


To generate a diff of this commit:
cvs rdiff -u -r1.126.2.3 -r1.126.2.4 src/sys/kern/vfs_cache.c
cvs rdiff -u -r1.47.2.1 -r1.47.2.2 src/sys/sys/namei.src
cvs rdiff -u -r1.19.2.1 -r1.19.2.2 src/sys/sys/vnode_impl.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/vfs_cache.c
diff -u src/sys/kern/vfs_cache.c:1.126.2.3 src/sys/kern/vfs_cache.c:1.126.2.4
--- src/sys/kern/vfs_cache.c:1.126.2.3	Wed Jan  8 21:55:10 2020
+++ src/sys/kern/vfs_cache.c	Mon Jan 13 08:51:07 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_cache.c,v 1.126.2.3 2020/01/08 21:55:10 ad Exp $	*/
+/*	$NetBSD: vfs_cache.c,v 1.126.2.4 2020/01/13 08:51:07 ad Exp $	*/
 
 /*-
  * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc.
@@ -100,9 +100,10 @@
  *	vp->v_interlock: per vnode interlock taken when acquiring a ref.
  *
  *	Most all modifications are made holding both cache_list_lock and the
- *	directory lock.  nc_hittime is modified with only the directory lock
- *	held.  See the definition of "struct namecache" in src/sys/namei.src
- *	for the particulars.
+ *	directory lock write locked.  nc_hittime does not have any kind of
+ *	serialization appliet to it - updates are racy, but since it's only
+ *	used for pseudo-LRU replacement it doesn't matter.  See definition
+ *	of "struct namecache" in src/sys/namei.src for the particulars.
  *
  *	Per-CPU statistics, and "numcache" are read unlocked, since an
  *	approximate value is OK.  We maintain uintptr_t sized per-CPU
@@ -149,7 +150,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.126.2.3 2020/01/08 21:55:10 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.126.2.4 2020/01/13 08:51:07 ad Exp $");
 
 #define __NAMECACHE_PRIVATE
 #ifdef _KERNEL_OPT
@@ -280,7 +281,7 @@ static void
 cache_remove(struct namecache *ncp, bool inreverse)
 {
 
-	KASSERT(mutex_owned(VNODE_TO_VIMPL(ncp->nc_dvp)->vi_nclock));
+	KASSERT(rw_write_held(VNODE_TO_VIMPL(ncp->nc_dvp)->vi_nclock));
 
 	SDT_PROBE(vfs, namecache, invalidate, done, ncp->nc_dvp,
 	    0, 0, 0, 0);
@@ -323,7 +324,7 @@ cache_lookup_entry(struct vnode *dvp, co
 	struct namecache *ncp;
 	struct iovec iov;
 
-	KASSERT(mutex_owned(VNODE_TO_VIMPL(dvp)->vi_nclock));
+	KASSERT(rw_lock_held(VNODE_TO_VIMPL(dvp)->vi_nclock));
 
 	iov.iov_base = __UNCONST(name);
 	iov.iov_len = namelen;
@@ -333,7 +334,9 @@ cache_lookup_entry(struct vnode *dvp, co
 		KASSERT(ncp->nc_dvp == dvp);
 		/*
 		 * Avoid false sharing: don't write back to nc_hittime
-		 * unless changed significantly.
+		 * unless changed significantly.  This is an unlocked
+		 * update and is racy, but it doesn't matter since it's
+		 * only used for pseudo-LRU replacement.
 		 */
 		if (((ncp->nc_hittime ^ hardclock_ticks) & ~31) != 0) {
 			ncp->nc_hittime = hardclock_ticks;
@@ -404,9 +407,10 @@ cache_lookup(struct vnode *dvp, const ch
 {
 	struct namecache *ncp;
 	struct vnode *vp;
-	kmutex_t *dirlock;
+	krwlock_t *dirlock;
 	int error;
 	bool hit;
+	krw_t op;
 
 	/* Establish default result values */
 	if (iswht_ret != NULL) {
@@ -426,11 +430,19 @@ cache_lookup(struct vnode *dvp, const ch
 		return false;
 	}
 
+	/* Could the entry be purged below? */
+	if ((cnflags & ISLASTCN) != 0 &&
+	    ((cnflags & MAKEENTRY) == 0 || nameiop == CREATE)) {
+	    	op = RW_WRITER;
+	} else {
+		op = RW_READER;
+	}
+
 	dirlock = VNODE_TO_VIMPL(dvp)->vi_nclock;
-	mutex_enter(dirlock);
+	rw_enter(dirlock, op);
 	ncp = cache_lookup_entry(dvp, name, namelen);
 	if (__predict_false(ncp == NULL)) {
-		mutex_exit(dirlock);
+		rw_exit(dirlock);
 		/* found nothing */
 		COUNT(ncs_miss);
 		return false;
@@ -442,7 +454,7 @@ cache_lookup(struct vnode *dvp, const ch
 		 * want cache entry to exist.
 		 */
 		cache_remove(ncp, false);
-		mutex_exit(dirlock);
+		rw_exit(dirlock);
 		/* found nothing */
 		COUNT(ncs_badhits);
 		return false;
@@ -472,12 +484,12 @@ cache_lookup(struct vnode *dvp, const ch
 		} else {
 			KASSERT(!ncp->nc_whiteout);
 		}
-		mutex_exit(dirlock);
+		rw_exit(dirlock);
 		return hit;
 	}
 	vp = ncp->nc_vp;
 	mutex_enter(vp->v_interlock);
-	mutex_exit(dirlock);
+	rw_exit(dirlock);
 
 	/*
 	 * Unlocked except for the vnode interlock.  Call vcache_tryvget().
@@ -513,7 +525,7 @@ cache_lookup_raw(struct vnode *dvp, cons
 {
 	struct namecache *ncp;
 	struct vnode *vp;
-	kmutex_t *dirlock;
+	krwlock_t *dirlock;
 	int error;
 
 	/* Establish default results. */
@@ -528,16 +540,16 @@ cache_lookup_raw(struct vnode *dvp, cons
 	}
 
 	dirlock = VNODE_TO_VIMPL(dvp)->vi_nclock;
-	mutex_enter(dirlock);
+	rw_enter(dirlock, RW_READER);
 	if (__predict_false(namelen > USHRT_MAX)) {
-		mutex_exit(dirlock);
+		rw_exit(dirlock);
 		/* found nothing */
 		COUNT(ncs_long);
 		return false;
 	}
 	ncp = cache_lookup_entry(dvp, name, namelen);
 	if (__predict_false(ncp == NULL)) {
-		mutex_exit(dirlock);
+		rw_exit(dirlock);
 		/* found nothing */
 		COUNT(ncs_miss);
 		return false;
@@ -551,13 +563,13 @@ cache_lookup_raw(struct vnode *dvp, cons
 			/*cnp->cn_flags |= ncp->nc_flags;*/
 			*iswht_ret = ncp->nc_whiteout;
 		}
-		mutex_exit(dirlock);
+		rw_exit(dirlock);
 		/* found negative entry; vn is already null from above */
 		COUNT(ncs_neghits);
 		return true;
 	}
 	mutex_enter(vp->v_interlock);
-	mutex_exit(dirlock);
+	rw_exit(dirlock);
 
 	/*
 	 * Unlocked except for the vnode interlock.  Call vcache_tryvget().
@@ -672,6 +684,7 @@ cache_enter(struct vnode *dvp, struct vn
 	struct namecache *ncp;
 	struct namecache *oncp;
 	vnode_impl_t *vi, *dvi;
+	krwlock_t *dirlock;
 
 	/* First, check whether we can/should add a cache entry. */
 	if ((cnflags & MAKEENTRY) == 0 ||
@@ -701,7 +714,8 @@ cache_enter(struct vnode *dvp, struct vn
 	 * cache entry.  if there's a duplicated entry, free it.
 	 */
 	dvi = VNODE_TO_VIMPL(dvp);
-	mutex_enter(dvi->vi_nclock);
+	dirlock = dvi->vi_nclock;
+	rw_enter(dirlock, RW_WRITER);
 	oncp = cache_lookup_entry(dvp, name, namelen);
 	if (oncp) {
 		cache_remove(oncp, false);
@@ -738,7 +752,7 @@ cache_enter(struct vnode *dvp, struct vn
 		}
 	}
 	mutex_exit(&cache_list_lock);
-	mutex_exit(dvi->vi_nclock);
+	rw_exit(dirlock);
 }
 
 /*
@@ -809,7 +823,7 @@ cache_vnode_init(struct vnode *vp)
 {
 	vnode_impl_t *vi = VNODE_TO_VIMPL(vp);
 
-	vi->vi_nclock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
+	vi->vi_nclock = rw_obj_alloc();
 	rb_tree_init(&vi->vi_nctree, &cache_rbtree_ops);
 	TAILQ_INIT(&vi->vi_nclist);
 }
@@ -824,7 +838,7 @@ cache_vnode_fini(struct vnode *vp)
 
 	KASSERT(RB_TREE_MIN(&vi->vi_nctree) == NULL);
 	KASSERT(TAILQ_EMPTY(&vi->vi_nclist));
-	mutex_obj_free(vi->vi_nclock);
+	rw_obj_free(vi->vi_nclock);
 }
 
 /*
@@ -835,7 +849,7 @@ void
 cache_purge1(struct vnode *vp, const char *name, size_t namelen, int flags)
 {
 	struct namecache *ncp;
-	kmutex_t *dirlock, *blocked;
+	krwlock_t *dirlock, *blocked;
 
 	if (flags & PURGE_PARENTS) {
 		SDT_PROBE(vfs, namecache, purge, parents, vp, 0, 0, 0, 0);
@@ -854,13 +868,13 @@ cache_purge1(struct vnode *vp, const cha
 			 * in a row, we'll give the other side a breather to
 			 * prevent livelock.
 			 */
-			if (!mutex_tryenter(dirlock)) {
-				mutex_obj_hold(dirlock);
+			if (!rw_tryenter(dirlock, RW_WRITER)) {
+				rw_obj_hold(dirlock);
 				mutex_exit(&cache_list_lock);
-				mutex_enter(dirlock);
+				rw_enter(dirlock, RW_WRITER);
 				/* Do nothing. */
-				mutex_exit(dirlock);
-				mutex_obj_free(dirlock);
+				rw_exit(dirlock);
+				rw_obj_free(dirlock);
 				if (blocked == dirlock) {
 					kpause("livelock", false, 1, NULL);
 				}
@@ -868,7 +882,7 @@ cache_purge1(struct vnode *vp, const cha
 				blocked = dirlock;
 			} else {
 				cache_remove(ncp, true);
-				mutex_exit(dirlock);
+				rw_exit(dirlock);
 				blocked = NULL;
 			}
 		}
@@ -877,22 +891,22 @@ cache_purge1(struct vnode *vp, const cha
 	if (flags & PURGE_CHILDREN) {
 		SDT_PROBE(vfs, namecache, purge, children, vp, 0, 0, 0, 0);
 		dirlock = VNODE_TO_VIMPL(vp)->vi_nclock;
-		mutex_enter(dirlock);
+		rw_enter(dirlock, RW_WRITER);
 		while ((ncp = rb_tree_iterate(&VNODE_TO_VIMPL(vp)->vi_nctree,
 		    NULL, RB_DIR_RIGHT)) != NULL) {
 			cache_remove(ncp, false);
 		}
-		mutex_exit(dirlock);
+		rw_exit(dirlock);
 	}
 	if (name != NULL) {
 		SDT_PROBE(vfs, namecache, purge, name, name, namelen, 0, 0, 0);
 		dirlock = VNODE_TO_VIMPL(vp)->vi_nclock;
-		mutex_enter(dirlock);
+		rw_enter(dirlock, RW_WRITER);
 		ncp = cache_lookup_entry(vp, name, namelen);
 		if (ncp) {
 			cache_remove(ncp, false);
 		}
-		mutex_exit(dirlock);
+		rw_exit(dirlock);
 	}
 }
 
@@ -906,7 +920,7 @@ cache_purgevfs(struct mount *mp)
 	struct vnode_iterator *marker;
 	struct namecache *ncp;
 	vnode_impl_t *vi;
-	kmutex_t *dirlock;
+	krwlock_t *dirlock;
 	vnode_t *vp;
 
 	/*
@@ -923,12 +937,12 @@ cache_purgevfs(struct mount *mp)
 			continue;
 		}
 		dirlock = vi->vi_nclock;
-		mutex_enter(dirlock);
+		rw_enter(dirlock, RW_WRITER);
 		while ((ncp = rb_tree_iterate(&vi->vi_nctree, NULL,
 		    RB_DIR_RIGHT)) != NULL) {
 			cache_remove(ncp, false);
 		}
-		mutex_exit(dirlock);
+		rw_exit(dirlock);
 	}
 	vfs_vnode_iterator_destroy(marker);
 }
@@ -945,7 +959,7 @@ static void
 cache_reclaim(void)
 {
 	struct namecache *ncp, *next;
-	kmutex_t *dirlock;
+	krwlock_t *dirlock;
 	int toscan, delta;
 
 	KASSERT(mutex_owned(&cache_list_lock));
@@ -965,7 +979,7 @@ cache_reclaim(void)
 		 * cause problems for the next guy in here, so send the
 		 * entry to the back of the list.
 		 */
-		if (!mutex_tryenter(dirlock)) {
+		if (!rw_tryenter(dirlock, RW_WRITER)) {
 			TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
 			TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
 			continue;
@@ -978,13 +992,13 @@ cache_reclaim(void)
 		if (ncp->nc_hittime + delta > hardclock_ticks) {
 			TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
 			TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
-			mutex_exit(dirlock);
+			rw_exit(dirlock);
 			continue;
 		}
 
 		/* We have a victim: reclaim it. */
 		cache_remove(ncp, true);
-		mutex_exit(dirlock);
+		rw_exit(dirlock);
 	}
 }
 

Index: src/sys/sys/namei.src
diff -u src/sys/sys/namei.src:1.47.2.1 src/sys/sys/namei.src:1.47.2.2
--- src/sys/sys/namei.src:1.47.2.1	Wed Jan  8 11:02:16 2020
+++ src/sys/sys/namei.src	Mon Jan 13 08:51:06 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: namei.src,v 1.47.2.1 2020/01/08 11:02:16 ad Exp $	*/
+/*	$NetBSD: namei.src,v 1.47.2.2 2020/01/13 08:51:06 ad Exp $	*/
 
 /*
  * Copyright (c) 1985, 1989, 1991, 1993
@@ -208,6 +208,7 @@ NAMEIFL	PARAMASK	0x02ee300	/* mask of pa
  * -  stable throught the lifetime of the namecache entry
  * d  protected by nc_dvp->vi_nclock
  * l  protected by cache_list_lock
+ * u  accesses are unlocked, no serialization applied
  */
 struct namecache {
 	struct rb_node nc_node;		/* d  red-black tree node */
@@ -215,7 +216,7 @@ struct namecache {
 	TAILQ_ENTRY(namecache) nc_vlist;/* l  vp's list of cache entries */
 	struct	vnode *nc_dvp;		/* -  vnode of parent of name */
 	struct	vnode *nc_vp;		/* -  vnode the name refers to */
-	int	nc_hittime;		/* d  approx time of last hit */
+	int	nc_hittime;		/* u  approx time of last hit */
 	u_short	nc_nlen;		/* -  length of name */
 	bool	nc_whiteout;		/* -  true if a whiteout */
 	char	nc_name[49];		/* -  segment name */

Index: src/sys/sys/vnode_impl.h
diff -u src/sys/sys/vnode_impl.h:1.19.2.1 src/sys/sys/vnode_impl.h:1.19.2.2
--- src/sys/sys/vnode_impl.h:1.19.2.1	Wed Jan  8 11:02:16 2020
+++ src/sys/sys/vnode_impl.h	Mon Jan 13 08:51:06 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: vnode_impl.h,v 1.19.2.1 2020/01/08 11:02:16 ad Exp $	*/
+/*	$NetBSD: vnode_impl.h,v 1.19.2.2 2020/01/13 08:51:06 ad Exp $	*/
 
 /*-
  * Copyright (c) 2016, 2019 The NetBSD Foundation, Inc.
@@ -70,7 +70,7 @@ struct vnode_impl {
 	TAILQ_ENTRY(vnode_impl) vi_lrulist;	/* d: lru list */
 	TAILQ_HEAD(, namecache) vi_nclist;	/* n: namecaches (parent) */
 	rb_tree_t vi_nctree;			/* n: namecache tree */
-	kmutex_t *vi_nclock;			/* n: namecache lock */
+	krwlock_t *vi_nclock;			/* n: namecache lock */
 	int vi_synclist_slot;			/* s: synclist slot index */
 	int vi_lrulisttm;			/* i: time of lru enqueue */
 	TAILQ_ENTRY(vnode_impl) vi_synclist;	/* s: vnodes with dirty bufs */

Reply via email to