Module Name:    src
Committed By:   ad
Date:           Sun Dec  1 13:56:29 UTC 2019

Modified Files:
        src/sys/kern: vfs_subr.c vfs_vnode.c vfs_vnops.c vnode_if.sh
        src/sys/miscfs/genfs: genfs_vnops.c
        src/sys/sys: vnode.h vnode_impl.h

Log Message:
Minor vnode locking changes:

- Stop using atomics to maniupulate v_usecount.  It was a mistake to begin
  with.  It doesn't work as intended unless the XLOCK bit is incorporated in
  v_usecount and we don't have that any more.  When I introduced this 10+
  years ago it was to reduce pressure on v_interlock but it doesn't do that,
  it just makes stuff disappear from lockstat output and introduces problems
  elsewhere.  We could do atomic usecounts on vnodes but there has to be a
  well thought out scheme.

- Resurrect LK_UPGRADE/LK_DOWNGRADE which will be needed to work effectively
  when there is increased use of shared locks on vnodes.

- Allocate the vnode lock using rw_obj_alloc() to reduce false sharing of
  struct vnode.

- Put all of the LRU lists into a single cache line, and do not requeue a
  vnode if it's already on the correct list and was requeued recently (less
  than a second ago).

Kernel build before and after:

119.63s real  1453.16s user  2742.57s system
115.29s real  1401.52s user  2690.94s system


To generate a diff of this commit:
cvs rdiff -u -r1.474 -r1.475 src/sys/kern/vfs_subr.c
cvs rdiff -u -r1.103 -r1.104 src/sys/kern/vfs_vnode.c
cvs rdiff -u -r1.202 -r1.203 src/sys/kern/vfs_vnops.c
cvs rdiff -u -r1.67 -r1.68 src/sys/kern/vnode_if.sh
cvs rdiff -u -r1.199 -r1.200 src/sys/miscfs/genfs/genfs_vnops.c
cvs rdiff -u -r1.283 -r1.284 src/sys/sys/vnode.h
cvs rdiff -u -r1.17 -r1.18 src/sys/sys/vnode_impl.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/vfs_subr.c
diff -u src/sys/kern/vfs_subr.c:1.474 src/sys/kern/vfs_subr.c:1.475
--- src/sys/kern/vfs_subr.c:1.474	Sat Nov 16 10:05:44 2019
+++ src/sys/kern/vfs_subr.c	Sun Dec  1 13:56:29 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_subr.c,v 1.474 2019/11/16 10:05:44 maxv Exp $	*/
+/*	$NetBSD: vfs_subr.c,v 1.475 2019/12/01 13:56:29 ad Exp $	*/
 
 /*-
  * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc.
@@ -68,7 +68,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.474 2019/11/16 10:05:44 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.475 2019/12/01 13:56:29 ad Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_ddb.h"
@@ -1110,7 +1110,7 @@ vprint_common(struct vnode *vp, const ch
 	    vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
 	(*pr)("%ssize %" PRIx64 " writesize %" PRIx64 " numoutput %d\n",
 	    prefix, vp->v_size, vp->v_writesize, vp->v_numoutput);
-	(*pr)("%sdata %p lock %p\n", prefix, vp->v_data, &vip->vi_lock);
+	(*pr)("%sdata %p lock %p\n", prefix, vp->v_data, vip->vi_lock);
 
 	(*pr)("%sstate %s key(%p %zd)", prefix, vstate_name(vip->vi_state),
 	    vip->vi_key.vk_mount, vip->vi_key.vk_key_len);
@@ -1543,7 +1543,7 @@ vfs_vnode_lock_print(void *vlock, int fu
 
 	for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp)) {
 		TAILQ_FOREACH(vip, &mp->mnt_vnodelist, vi_mntvnodes) {
-			if (&vip->vi_lock != vlock)
+			if (vip->vi_lock != vlock)
 				continue;
 			vfs_vnode_print(VIMPL_TO_VNODE(vip), full, pr);
 		}

Index: src/sys/kern/vfs_vnode.c
diff -u src/sys/kern/vfs_vnode.c:1.103 src/sys/kern/vfs_vnode.c:1.104
--- src/sys/kern/vfs_vnode.c:1.103	Wed Feb 20 10:07:27 2019
+++ src/sys/kern/vfs_vnode.c	Sun Dec  1 13:56:29 2019
@@ -1,7 +1,7 @@
-/*	$NetBSD: vfs_vnode.c,v 1.103 2019/02/20 10:07:27 hannken Exp $	*/
+/*	$NetBSD: vfs_vnode.c,v 1.104 2019/12/01 13:56:29 ad Exp $	*/
 
 /*-
- * Copyright (c) 1997-2011 The NetBSD Foundation, Inc.
+ * Copyright (c) 1997-2011, 2019 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
@@ -143,20 +143,10 @@
  *	as vput(9), routines.  Common points holding references are e.g.
  *	file openings, current working directory, mount points, etc.  
  *
- * Note on v_usecount and its locking
- *
- *	At nearly all points it is known that v_usecount could be zero,
- *	the vnode_t::v_interlock will be held.  To change v_usecount away
- *	from zero, the interlock must be held.  To change from a non-zero
- *	value to zero, again the interlock must be held.
- *
- *	Changing the usecount from a non-zero value to a non-zero value can
- *	safely be done using atomic operations, without the interlock held.
- *
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.103 2019/02/20 10:07:27 hannken Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.104 2019/12/01 13:56:29 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -181,33 +171,39 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,
 
 #include <uvm/uvm.h>
 #include <uvm/uvm_readahead.h>
+#include <uvm/uvm_stat.h>
 
 /* Flags to vrelel. */
-#define	VRELEL_ASYNC_RELE	0x0001	/* Always defer to vrele thread. */
-#define	VRELEL_FORCE_RELE	0x0002	/* Must always succeed. */
-
-u_int			numvnodes		__cacheline_aligned;
+#define	VRELEL_ASYNC	0x0001	/* Always defer to vrele thread. */
+#define	VRELEL_FORCE	0x0002	/* Must always succeed. */
+#define	VRELEL_NOINACT	0x0004	/* Don't bother calling VOP_INACTIVE(). */
+
+#define	LRU_VRELE	0
+#define	LRU_FREE	1
+#define	LRU_HOLD	2
+#define	LRU_COUNT	3
 
 /*
  * There are three lru lists: one holds vnodes waiting for async release,
- * one is for vnodes which have no buffer/page references and
- * one for those which do (i.e. v_holdcnt is non-zero).
+ * one is for vnodes which have no buffer/page references and one for those
+ * which do (i.e.  v_holdcnt is non-zero).  We put the lists into a single,
+ * private cache line as vnodes migrate between them while under the same
+ * lock (vdrain_lock).
  */
-static vnodelst_t	lru_vrele_list		__cacheline_aligned;
-static vnodelst_t	lru_free_list		__cacheline_aligned;
-static vnodelst_t	lru_hold_list		__cacheline_aligned;
+u_int			numvnodes		__cacheline_aligned;
+static vnodelst_t	lru_list[LRU_COUNT]	__cacheline_aligned;
 static kmutex_t		vdrain_lock		__cacheline_aligned;
-static kcondvar_t	vdrain_cv		__cacheline_aligned;
+static kcondvar_t	vdrain_cv;
 static int		vdrain_gen;
 static kcondvar_t	vdrain_gen_cv;
 static bool		vdrain_retry;
 static lwp_t *		vdrain_lwp;
 SLIST_HEAD(hashhead, vnode_impl);
 static kmutex_t		vcache_lock		__cacheline_aligned;
-static kcondvar_t	vcache_cv		__cacheline_aligned;
+static kcondvar_t	vcache_cv;
 static u_int		vcache_hashsize;
 static u_long		vcache_hashmask;
-static struct hashhead	*vcache_hashtab		__cacheline_aligned;
+static struct hashhead	*vcache_hashtab;
 static pool_cache_t	vcache_pool;
 static void		lru_requeue(vnode_t *, vnodelst_t *);
 static vnodelst_t *	lru_which(vnode_t *);
@@ -378,17 +374,16 @@ vstate_change(vnode_t *vp, enum vnode_st
 void
 vfs_vnode_sysinit(void)
 {
-	int error __diagused;
+	int error __diagused, i;
 
 	dead_rootmount = vfs_mountalloc(&dead_vfsops, NULL);
 	KASSERT(dead_rootmount != NULL);
 	dead_rootmount->mnt_iflag |= IMNT_MPSAFE;
 
 	mutex_init(&vdrain_lock, MUTEX_DEFAULT, IPL_NONE);
-	TAILQ_INIT(&lru_free_list);
-	TAILQ_INIT(&lru_hold_list);
-	TAILQ_INIT(&lru_vrele_list);
-
+	for (i = 0; i < LRU_COUNT; i++) {
+		TAILQ_INIT(&lru_list[i]);
+	}
 	vcache_init();
 
 	cv_init(&vdrain_cv, "vdrain");
@@ -452,9 +447,9 @@ lru_which(vnode_t *vp)
 	KASSERT(mutex_owned(vp->v_interlock));
 
 	if (vp->v_holdcnt > 0)
-		return &lru_hold_list;
+		return &lru_list[LRU_HOLD];
 	else
-		return &lru_free_list;
+		return &lru_list[LRU_FREE];
 }
 
 /*
@@ -466,19 +461,39 @@ static void
 lru_requeue(vnode_t *vp, vnodelst_t *listhd)
 {
 	vnode_impl_t *vip;
+	int d;
 
-	mutex_enter(&vdrain_lock);
+	/*
+	 * If the vnode is on the correct list, and was put there recently,
+	 * then leave it be, thus avoiding huge cache and lock contention.
+	 */
 	vip = VNODE_TO_VIMPL(vp);
+	if (listhd == vip->vi_lrulisthd &&
+	    (hardclock_ticks - vip->vi_lrulisttm) < hz) {
+	    	return;
+	}
+
+	mutex_enter(&vdrain_lock);
+	d = 0;
 	if (vip->vi_lrulisthd != NULL)
 		TAILQ_REMOVE(vip->vi_lrulisthd, vip, vi_lrulist);
 	else
-		numvnodes++;
+		d++;
 	vip->vi_lrulisthd = listhd;
+	vip->vi_lrulisttm = hardclock_ticks;
 	if (vip->vi_lrulisthd != NULL)
 		TAILQ_INSERT_TAIL(vip->vi_lrulisthd, vip, vi_lrulist);
 	else
-		numvnodes--;
-	if (numvnodes > desiredvnodes || listhd == &lru_vrele_list)
+		d--;
+	if (d != 0) {
+		/*
+		 * Looks strange?  This is not a bug.  Don't store
+		 * numvnodes unless there is a change - avoid false
+		 * sharing on MP.
+		 */
+		numvnodes += d;
+	}
+	if (numvnodes > desiredvnodes || listhd == &lru_list[LRU_VRELE])
 		cv_broadcast(&vdrain_cv);
 	mutex_exit(&vdrain_lock);
 }
@@ -491,33 +506,37 @@ void
 vrele_flush(struct mount *mp)
 {
 	vnode_impl_t *vip, *marker;
+	vnode_t *vp;
 
 	KASSERT(fstrans_is_owner(mp));
 
 	marker = VNODE_TO_VIMPL(vnalloc_marker(NULL));
 
 	mutex_enter(&vdrain_lock);
-	TAILQ_INSERT_HEAD(&lru_vrele_list, marker, vi_lrulist);
+	TAILQ_INSERT_HEAD(&lru_list[LRU_VRELE], marker, vi_lrulist);
 
 	while ((vip = TAILQ_NEXT(marker, vi_lrulist))) {
-		TAILQ_REMOVE(&lru_vrele_list, marker, vi_lrulist);
-		TAILQ_INSERT_AFTER(&lru_vrele_list, vip, marker, vi_lrulist);
-		if (vnis_marker(VIMPL_TO_VNODE(vip)))
+		TAILQ_REMOVE(&lru_list[LRU_VRELE], marker, vi_lrulist);
+		TAILQ_INSERT_AFTER(&lru_list[LRU_VRELE], vip, marker,
+		    vi_lrulist);
+		vp = VIMPL_TO_VNODE(vip);
+		if (vnis_marker(vp))
 			continue;
 
-		KASSERT(vip->vi_lrulisthd == &lru_vrele_list);
+		KASSERT(vip->vi_lrulisthd == &lru_list[LRU_VRELE]);
 		TAILQ_REMOVE(vip->vi_lrulisthd, vip, vi_lrulist);
-		vip->vi_lrulisthd = &lru_hold_list;
+		vip->vi_lrulisthd = &lru_list[LRU_HOLD];
+		vip->vi_lrulisttm = hardclock_ticks;
 		TAILQ_INSERT_TAIL(vip->vi_lrulisthd, vip, vi_lrulist);
 		mutex_exit(&vdrain_lock);
 
-		mutex_enter(VIMPL_TO_VNODE(vip)->v_interlock);
-		vrelel(VIMPL_TO_VNODE(vip), VRELEL_FORCE_RELE);
+		mutex_enter(vp->v_interlock);
+		vrelel(vp, VRELEL_FORCE);
 
 		mutex_enter(&vdrain_lock);
 	}
 
-	TAILQ_REMOVE(&lru_vrele_list, marker, vi_lrulist);
+	TAILQ_REMOVE(&lru_list[LRU_VRELE], marker, vi_lrulist);
 	mutex_exit(&vdrain_lock);
 
 	vnfree_marker(VIMPL_TO_VNODE(marker));
@@ -555,7 +574,7 @@ vdrain_remove(vnode_t *vp)
 	if (vcache_vget(vp) == 0) {
 		if (!vrecycle(vp)) {
 			mutex_enter(vp->v_interlock);
-			vrelel(vp, VRELEL_FORCE_RELE);
+			vrelel(vp, VRELEL_FORCE);
 		}
 	}
 	fstrans_done(mp);
@@ -584,16 +603,17 @@ vdrain_vrele(vnode_t *vp)
 	 * will put it back onto the right list before
 	 * its v_usecount reaches zero.
 	 */
-	KASSERT(vip->vi_lrulisthd == &lru_vrele_list);
+	KASSERT(vip->vi_lrulisthd == &lru_list[LRU_VRELE]);
 	TAILQ_REMOVE(vip->vi_lrulisthd, vip, vi_lrulist);
-	vip->vi_lrulisthd = &lru_hold_list;
+	vip->vi_lrulisthd = &lru_list[LRU_HOLD];
+	vip->vi_lrulisttm = hardclock_ticks;
 	TAILQ_INSERT_TAIL(vip->vi_lrulisthd, vip, vi_lrulist);
 
 	vdrain_retry = true;
 	mutex_exit(&vdrain_lock);
 
 	mutex_enter(vp->v_interlock);
-	vrelel(vp, VRELEL_FORCE_RELE);
+	vrelel(vp, VRELEL_FORCE);
 	fstrans_done(mp);
 
 	mutex_enter(&vdrain_lock);
@@ -606,9 +626,6 @@ vdrain_vrele(vnode_t *vp)
 static void
 vdrain_thread(void *cookie)
 {
-	vnodelst_t *listhd[] = {
-	    &lru_vrele_list, &lru_free_list, &lru_hold_list
-	};
 	int i;
 	u_int target;
 	vnode_impl_t *vip, *marker;
@@ -621,22 +638,22 @@ vdrain_thread(void *cookie)
 		vdrain_retry = false;
 		target = desiredvnodes - desiredvnodes/10;
 
-		for (i = 0; i < __arraycount(listhd); i++) {
-			TAILQ_INSERT_HEAD(listhd[i], marker, vi_lrulist);
+		for (i = 0; i < LRU_COUNT; i++) {
+			TAILQ_INSERT_HEAD(&lru_list[i], marker, vi_lrulist);
 			while ((vip = TAILQ_NEXT(marker, vi_lrulist))) {
-				TAILQ_REMOVE(listhd[i], marker, vi_lrulist);
-				TAILQ_INSERT_AFTER(listhd[i], vip, marker,
+				TAILQ_REMOVE(&lru_list[i], marker, vi_lrulist);
+				TAILQ_INSERT_AFTER(&lru_list[i], vip, marker,
 				    vi_lrulist);
 				if (vnis_marker(VIMPL_TO_VNODE(vip)))
 					continue;
-				if (listhd[i] == &lru_vrele_list)
+				if (i == LRU_VRELE)
 					vdrain_vrele(VIMPL_TO_VNODE(vip));
 				else if (numvnodes < target)
 					break;
 				else
 					vdrain_remove(VIMPL_TO_VNODE(vip));
 			}
-			TAILQ_REMOVE(listhd[i], marker, vi_lrulist);
+			TAILQ_REMOVE(&lru_list[i], marker, vi_lrulist);
 		}
 
 		if (vdrain_retry) {
@@ -663,35 +680,14 @@ vput(vnode_t *vp)
 }
 
 /*
- * Try to drop reference on a vnode.  Abort if we are releasing the
- * last reference.  Note: this _must_ succeed if not the last reference.
- */
-static inline bool
-vtryrele(vnode_t *vp)
-{
-	u_int use, next;
-
-	for (use = vp->v_usecount;; use = next) {
-		if (use == 1) {
-			return false;
-		}
-		KASSERT(use > 1);
-		next = atomic_cas_uint(&vp->v_usecount, use, use - 1);
-		if (__predict_true(next == use)) {
-			return true;
-		}
-	}
-}
-
-/*
  * Vnode release.  If reference count drops to zero, call inactive
  * routine and either return to freelist or free to the pool.
  */
 static void
 vrelel(vnode_t *vp, int flags)
 {
-	const bool async = ((flags & VRELEL_ASYNC_RELE) != 0);
-	const bool force = ((flags & VRELEL_FORCE_RELE) != 0);
+	const bool async = ((flags & VRELEL_ASYNC) != 0);
+	const bool force = ((flags & VRELEL_FORCE) != 0);
 	bool recycle, defer;
 	int error;
 
@@ -706,7 +702,8 @@ vrelel(vnode_t *vp, int flags)
 	 * If not the last reference, just drop the reference count
 	 * and unlock.
 	 */
-	if (vtryrele(vp)) {
+	if (vp->v_usecount > 1) {
+		vp->v_usecount--;
 		mutex_exit(vp->v_interlock);
 		return;
 	}
@@ -728,12 +725,14 @@ vrelel(vnode_t *vp, int flags)
 	 */
 	if ((curlwp == uvm.pagedaemon_lwp) || async) {
 		defer = true;
-	} else {
+	} else if (force) {
 		mutex_exit(vp->v_interlock);
-		error = vn_lock(vp,
-		    LK_EXCLUSIVE | LK_RETRY | (force ? 0 : LK_NOWAIT));
+		error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		defer = (error != 0);
 		mutex_enter(vp->v_interlock);
+	} else {
+		error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT);
+		defer = (error != 0);
 	}
 	KASSERT(mutex_owned(vp->v_interlock));
 	KASSERT(! (force && defer));
@@ -742,7 +741,7 @@ vrelel(vnode_t *vp, int flags)
 		 * Defer reclaim to the kthread; it's not safe to
 		 * clean it here.  We donate it our last reference.
 		 */
-		lru_requeue(vp, &lru_vrele_list);
+		lru_requeue(vp, &lru_list[LRU_VRELE]);
 		mutex_exit(vp->v_interlock);
 		return;
 	}
@@ -751,7 +750,8 @@ vrelel(vnode_t *vp, int flags)
 	 * If the node got another reference while we
 	 * released the interlock, don't try to inactivate it yet.
 	 */
-	if (__predict_false(vtryrele(vp))) {
+	if (vp->v_usecount > 1) {
+		vp->v_usecount--;
 		VOP_UNLOCK(vp);
 		mutex_exit(vp->v_interlock);
 		return;
@@ -782,14 +782,16 @@ vrelel(vnode_t *vp, int flags)
 		mutex_enter(vp->v_interlock);
 		VSTATE_CHANGE(vp, VS_BLOCKED, VS_LOADED);
 		if (!recycle) {
-			if (vtryrele(vp)) {
+			if (vp->v_usecount > 1) {
+				vp->v_usecount--;
 				mutex_exit(vp->v_interlock);
 				return;
 			}
 		}
 
 		/* Take care of space accounting. */
-		if (vp->v_iflag & VI_EXECMAP) {
+		if ((vp->v_iflag & VI_EXECMAP) != 0 &&
+		    vp->v_uobj.uo_npages != 0) {
 			atomic_add_int(&uvmexp.execpages,
 			    -vp->v_uobj.uo_npages);
 			atomic_add_int(&uvmexp.filepages,
@@ -810,7 +812,8 @@ vrelel(vnode_t *vp, int flags)
 		KASSERT(vp->v_usecount > 0);
 	}
 
-	if (atomic_dec_uint_nv(&vp->v_usecount) != 0) {
+	vp->v_usecount--;
+	if (vp->v_usecount != 0) {
 		/* Gained another reference while being reclaimed. */
 		mutex_exit(vp->v_interlock);
 		return;
@@ -837,9 +840,6 @@ void
 vrele(vnode_t *vp)
 {
 
-	if (vtryrele(vp)) {
-		return;
-	}
 	mutex_enter(vp->v_interlock);
 	vrelel(vp, 0);
 }
@@ -851,11 +851,8 @@ void
 vrele_async(vnode_t *vp)
 {
 
-	if (vtryrele(vp)) {
-		return;
-	}
 	mutex_enter(vp->v_interlock);
-	vrelel(vp, VRELEL_ASYNC_RELE);
+	vrelel(vp, VRELEL_ASYNC);
 }
 
 /*
@@ -868,7 +865,9 @@ vref(vnode_t *vp)
 
 	KASSERT(vp->v_usecount != 0);
 
-	atomic_inc_uint(&vp->v_usecount);
+	mutex_enter(vp->v_interlock);
+	vp->v_usecount++;
+	mutex_exit(vp->v_interlock);
 }
 
 /*
@@ -1006,7 +1005,7 @@ vrevoke(vnode_t *vp)
 	if (VSTATE_GET(vp) == VS_RECLAIMED) {
 		mutex_exit(vp->v_interlock);
 	} else if (vp->v_type != VBLK && vp->v_type != VCHR) {
-		atomic_inc_uint(&vp->v_usecount);
+		vp->v_usecount++;
 		mutex_exit(vp->v_interlock);
 		vgone(vp);
 	} else {
@@ -1128,7 +1127,7 @@ vcache_alloc(void)
 	vip = pool_cache_get(vcache_pool, PR_WAITOK);
 	memset(vip, 0, sizeof(*vip));
 
-	rw_init(&vip->vi_lock);
+	vip->vi_lock = rw_obj_alloc();
 	/* SLIST_INIT(&vip->vi_hash); */
 	/* LIST_INIT(&vip->vi_nclist); */
 	/* LIST_INIT(&vip->vi_dnclist); */
@@ -1143,7 +1142,7 @@ vcache_alloc(void)
 
 	vip->vi_state = VS_LOADING;
 
-	lru_requeue(vp, &lru_free_list);
+	lru_requeue(vp, &lru_list[LRU_FREE]);
 
 	return vip;
 }
@@ -1192,7 +1191,7 @@ vcache_free(vnode_impl_t *vip)
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		spec_node_destroy(vp);
 
-	rw_destroy(&vip->vi_lock);
+	rw_obj_free(vip->vi_lock);
 	uvm_obj_destroy(&vp->v_uobj, true);
 	cv_destroy(&vp->v_cv);
 	pool_cache_put(vcache_pool, vip);
@@ -1216,10 +1215,8 @@ vcache_tryvget(vnode_t *vp)
 		error = ENOENT;
 	else if (__predict_false(VSTATE_GET(vp) != VS_LOADED))
 		error = EBUSY;
-	else if (vp->v_usecount == 0)
-		vp->v_usecount = 1;
 	else
-		atomic_inc_uint(&vp->v_usecount);
+		vp->v_usecount++;
 
 	mutex_exit(vp->v_interlock);
 
@@ -1253,11 +1250,7 @@ vcache_vget(vnode_t *vp)
 		return ENOENT;
 	}
 	VSTATE_ASSERT(vp, VS_LOADED);
-	if (vp->v_usecount == 0)
-		vp->v_usecount = 1;
-	else
-		atomic_inc_uint(&vp->v_usecount);
-
+	vp->v_usecount++;
 	mutex_exit(vp->v_interlock);
 
 	return 0;
@@ -1571,7 +1564,7 @@ vcache_reclaim(vnode_t *vp)
 	 * while we clean it out.
 	 */
 	VSTATE_CHANGE(vp, VS_LOADED, VS_RECLAIMING);
-	if (vp->v_iflag & VI_EXECMAP) {
+	if ((vp->v_iflag & VI_EXECMAP) != 0 && vp->v_uobj.uo_npages != 0) {
 		atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
 		atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
 	}

Index: src/sys/kern/vfs_vnops.c
diff -u src/sys/kern/vfs_vnops.c:1.202 src/sys/kern/vfs_vnops.c:1.203
--- src/sys/kern/vfs_vnops.c:1.202	Sun Nov 10 06:47:30 2019
+++ src/sys/kern/vfs_vnops.c	Sun Dec  1 13:56:29 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_vnops.c,v 1.202 2019/11/10 06:47:30 mlelstv Exp $	*/
+/*	$NetBSD: vfs_vnops.c,v 1.203 2019/12/01 13:56:29 ad Exp $	*/
 
 /*-
  * Copyright (c) 2009 The NetBSD Foundation, Inc.
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.202 2019/11/10 06:47:30 mlelstv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.203 2019/12/01 13:56:29 ad Exp $");
 
 #include "veriexec.h"
 
@@ -1035,8 +1035,9 @@ vn_lock(struct vnode *vp, int flags)
 #if 0
 	KASSERT(vp->v_usecount > 0 || (vp->v_iflag & VI_ONWORKLST) != 0);
 #endif
-	KASSERT((flags & ~(LK_SHARED|LK_EXCLUSIVE|LK_NOWAIT|LK_RETRY)) == 0);
-	KASSERT(!mutex_owned(vp->v_interlock));
+	KASSERT((flags & ~(LK_SHARED|LK_EXCLUSIVE|LK_NOWAIT|LK_RETRY|
+	    LK_UPGRADE|LK_DOWNGRADE)) == 0);
+	KASSERT((flags & LK_NOWAIT) != 0 || !mutex_owned(vp->v_interlock));
 
 #ifdef DIAGNOSTIC
 	if (wapbl_vphaswapbl(vp))

Index: src/sys/kern/vnode_if.sh
diff -u src/sys/kern/vnode_if.sh:1.67 src/sys/kern/vnode_if.sh:1.68
--- src/sys/kern/vnode_if.sh:1.67	Fri Oct 11 08:04:52 2019
+++ src/sys/kern/vnode_if.sh	Sun Dec  1 13:56:29 2019
@@ -29,7 +29,7 @@ copyright="\
  * SUCH DAMAGE.
  */
 "
-SCRIPT_ID='$NetBSD: vnode_if.sh,v 1.67 2019/10/11 08:04:52 hannken Exp $'
+SCRIPT_ID='$NetBSD: vnode_if.sh,v 1.68 2019/12/01 13:56:29 ad Exp $'
 
 # Script to produce VFS front-end sugar.
 #
@@ -481,7 +481,7 @@ function bodynorm() {
 	}
 	if (fstrans == "LOCK")
 		printf("\terror = vop_pre(%s, &mp, &mpsafe, %s);\n",
-			argname[0], "(flags & LK_NOWAIT ? FST_TRY : FST_YES)");
+			argname[0], "(flags & (LK_UPGRADE|LK_DOWNGRADE) ? FST_NO : (flags & LK_NOWAIT ? FST_TRY : FST_YES))");
 	else if (fstrans == "UNLOCK")
 		printf("\terror = vop_pre(%s, &mp, &mpsafe, FST_%s);\n",
 			argname[0], "NO");
@@ -493,7 +493,7 @@ function bodynorm() {
 		argname[0], name);
 	if (fstrans == "LOCK")
 		printf("\tvop_post(%s, mp, mpsafe, %s);\n",
-			argname[0], "(error ? FST_YES : FST_NO)");
+			argname[0], "(flags & (LK_UPGRADE|LK_DOWNGRADE) ? FST_NO : (error ? FST_YES : FST_NO))");
 	else if (fstrans == "UNLOCK")
 		printf("\tvop_post(%s, mp, mpsafe, FST_%s);\n",
 			argname[0], "YES");

Index: src/sys/miscfs/genfs/genfs_vnops.c
diff -u src/sys/miscfs/genfs/genfs_vnops.c:1.199 src/sys/miscfs/genfs/genfs_vnops.c:1.200
--- src/sys/miscfs/genfs/genfs_vnops.c:1.199	Wed Oct 25 08:12:39 2017
+++ src/sys/miscfs/genfs/genfs_vnops.c	Sun Dec  1 13:56:29 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: genfs_vnops.c,v 1.199 2017/10/25 08:12:39 maya Exp $	*/
+/*	$NetBSD: genfs_vnops.c,v 1.200 2019/12/01 13:56:29 ad Exp $	*/
 
 /*-
  * Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -57,7 +57,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.199 2017/10/25 08:12:39 maya Exp $");
+__KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.200 2019/12/01 13:56:29 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -292,12 +292,23 @@ genfs_deadlock(void *v)
 	if (! ISSET(flags, LK_RETRY))
 		return ENOENT;
 
-	op = (ISSET(flags, LK_EXCLUSIVE) ? RW_WRITER : RW_READER);
-	if (ISSET(flags, LK_NOWAIT)) {
-		if (! rw_tryenter(&vip->vi_lock, op))
-			return EBUSY;
+	if (ISSET(flags, LK_DOWNGRADE)) {
+		rw_downgrade(vip->vi_lock);
+	} else if (ISSET(flags, LK_UPGRADE)) {
+		if (!rw_tryupgrade(vip->vi_lock)) {
+			if (ISSET(flags, LK_NOWAIT))
+				return EBUSY;
+			rw_exit(vip->vi_lock);
+			rw_enter(vip->vi_lock, RW_WRITER);
+		}
 	} else {
-		rw_enter(&vip->vi_lock, op);
+		op = (ISSET(flags, LK_EXCLUSIVE) ? RW_WRITER : RW_READER);
+		if (ISSET(flags, LK_NOWAIT)) {
+			if (!rw_tryenter(vip->vi_lock, op))
+				return EBUSY;
+		} else {
+			rw_enter(vip->vi_lock, op);
+		}
 	}
 	VSTATE_ASSERT_UNLOCKED(vp, VS_RECLAIMED);
 	return 0;
@@ -315,7 +326,7 @@ genfs_deadunlock(void *v)
 	vnode_t *vp = ap->a_vp;
 	vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
 
-	rw_exit(&vip->vi_lock);
+	rw_exit(vip->vi_lock);
 
 	return 0;
 }
@@ -335,12 +346,23 @@ genfs_lock(void *v)
 	int flags = ap->a_flags;
 	krw_t op;
 
-	op = (ISSET(flags, LK_EXCLUSIVE) ? RW_WRITER : RW_READER);
-	if (ISSET(flags, LK_NOWAIT)) {
-		if (! rw_tryenter(&vip->vi_lock, op))
-			return EBUSY;
+	if (ISSET(flags, LK_DOWNGRADE)) {
+		rw_downgrade(vip->vi_lock);
+	} else if (ISSET(flags, LK_UPGRADE)) {
+		if (!rw_tryupgrade(vip->vi_lock)) {
+			if (ISSET(flags, LK_NOWAIT))
+				return EBUSY;
+			rw_exit(vip->vi_lock);
+			rw_enter(vip->vi_lock, RW_WRITER);
+		}
 	} else {
-		rw_enter(&vip->vi_lock, op);
+		op = (ISSET(flags, LK_EXCLUSIVE) ? RW_WRITER : RW_READER);
+		if (ISSET(flags, LK_NOWAIT)) {
+			if (!rw_tryenter(vip->vi_lock, op))
+				return EBUSY;
+		} else {
+			rw_enter(vip->vi_lock, op);
+		}
 	}
 	VSTATE_ASSERT_UNLOCKED(vp, VS_ACTIVE);
 	return 0;
@@ -358,7 +380,7 @@ genfs_unlock(void *v)
 	vnode_t *vp = ap->a_vp;
 	vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
 
-	rw_exit(&vip->vi_lock);
+	rw_exit(vip->vi_lock);
 
 	return 0;
 }
@@ -375,10 +397,10 @@ genfs_islocked(void *v)
 	vnode_t *vp = ap->a_vp;
 	vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
 
-	if (rw_write_held(&vip->vi_lock))
+	if (rw_write_held(vip->vi_lock))
 		return LK_EXCLUSIVE;
 
-	if (rw_read_held(&vip->vi_lock))
+	if (rw_read_held(vip->vi_lock))
 		return LK_SHARED;
 
 	return 0;

Index: src/sys/sys/vnode.h
diff -u src/sys/sys/vnode.h:1.283 src/sys/sys/vnode.h:1.284
--- src/sys/sys/vnode.h:1.283	Sun Nov 10 06:47:30 2019
+++ src/sys/sys/vnode.h	Sun Dec  1 13:56:29 2019
@@ -1,4 +1,4 @@
-/*	$NetBSD: vnode.h,v 1.283 2019/11/10 06:47:30 mlelstv Exp $	*/
+/*	$NetBSD: vnode.h,v 1.284 2019/12/01 13:56:29 ad Exp $	*/
 
 /*-
  * Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -206,10 +206,13 @@ typedef struct vnode vnode_t;
 /*
  * vnode lock flags
  */
+#define	LK_NONE		0x00000000	/* no lock - for VOP_ISLOCKED() */
 #define	LK_SHARED	0x00000001	/* shared lock */
 #define	LK_EXCLUSIVE	0x00000002	/* exclusive lock */
-#define	LK_NOWAIT	0x00000010	/* do not sleep to await lock */
-#define	LK_RETRY	0x00020000	/* vn_lock: retry until locked */
+#define	LK_UPGRADE	0x00000010	/* upgrade shared -> exclusive */
+#define	LK_DOWNGRADE	0x00000020	/* downgrade exclusive -> shared */
+#define	LK_NOWAIT	0x00000100	/* do not sleep to await lock */
+#define	LK_RETRY	0x00000200	/* vn_lock: retry until locked */
 
 /*
  * Vnode attributes.  A field value of VNOVAL represents a field whose value

Index: src/sys/sys/vnode_impl.h
diff -u src/sys/sys/vnode_impl.h:1.17 src/sys/sys/vnode_impl.h:1.18
--- src/sys/sys/vnode_impl.h:1.17	Thu Sep 21 18:19:44 2017
+++ src/sys/sys/vnode_impl.h	Sun Dec  1 13:56:29 2019
@@ -1,12 +1,9 @@
-/*	$NetBSD: vnode_impl.h,v 1.17 2017/09/21 18:19:44 joerg Exp $	*/
+/*	$NetBSD: vnode_impl.h,v 1.18 2019/12/01 13:56:29 ad Exp $	*/
 
 /*-
- * Copyright (c) 2016 The NetBSD Foundation, Inc.
+ * Copyright (c) 2016, 2019 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
- * This code is derived from software contributed to The NetBSD Foundation
- * by 
- *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -74,10 +71,11 @@ struct vnode_impl {
 	LIST_HEAD(, namecache) vi_dnclist;	/* n: namecaches (children) */
 	LIST_HEAD(, namecache) vi_nclist;	/* n: namecaches (parent) */
 	int vi_synclist_slot;			/* s: synclist slot index */
+	int vi_lrulisttm;			/* i: time of lru enqueue */
 	TAILQ_ENTRY(vnode_impl) vi_synclist;	/* s: vnodes with dirty bufs */
 	TAILQ_ENTRY(vnode_impl) vi_mntvnodes;	/* m: vnodes for mount point */
 	SLIST_ENTRY(vnode_impl) vi_hash;	/* c: vnode cache list */
-	krwlock_t vi_lock;			/* -: lock for this vnode */
+	krwlock_t *vi_lock;			/* -: lock for this vnode */
 	struct vcache_key vi_key;		/* c: vnode cache key */
 };
 typedef struct vnode_impl vnode_impl_t;

Reply via email to