Module Name:    src
Committed By:   hannken
Date:           Thu May 26 11:09:55 UTC 2016

Modified Files:
        src/sys/kern: vfs_vnode.c
        src/sys/sys: vnode.h

Log Message:
Use vnode state to replace VI_MARKER, VI_CHANGING, VI_XLOCK and VI_CLEAN.

Presented on tech-kern@


To generate a diff of this commit:
cvs rdiff -u -r1.51 -r1.52 src/sys/kern/vfs_vnode.c
cvs rdiff -u -r1.261 -r1.262 src/sys/sys/vnode.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/vfs_vnode.c
diff -u src/sys/kern/vfs_vnode.c:1.51 src/sys/kern/vfs_vnode.c:1.52
--- src/sys/kern/vfs_vnode.c:1.51	Thu May 26 11:08:44 2016
+++ src/sys/kern/vfs_vnode.c	Thu May 26 11:09:55 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_vnode.c,v 1.51 2016/05/26 11:08:44 hannken Exp $	*/
+/*	$NetBSD: vfs_vnode.c,v 1.52 2016/05/26 11:09:55 hannken Exp $	*/
 
 /*-
  * Copyright (c) 1997-2011 The NetBSD Foundation, Inc.
@@ -92,6 +92,49 @@
  *	or cleaned via vclean(9), which calls VOP_RECLAIM(9) to disassociate
  *	underlying file system from the vnode, and finally destroyed.
  *
+ * Vnode state
+ *
+ *	Vnode is always in one of six states:
+ *	- MARKER	This is a marker vnode to help list traversal.  It
+ *			will never change its state.
+ *	- LOADING	Vnode is associating underlying file system and not
+ *			yet ready to use.
+ *	- ACTIVE	Vnode has associated underlying file system and is
+ *			ready to use.
+ *	- BLOCKED	Vnode is active but cannot get new references.
+ *	- RECLAIMING	Vnode is disassociating from the underlying file
+ *			system.
+ *	- RECLAIMED	Vnode has disassociated from underlying file system
+ *			and is dead.
+ *
+ *	Valid state changes are:
+ *	LOADING -> ACTIVE
+ *			Vnode has been initialised in vcache_get() or
+ *			vcache_new() and is ready to use.
+ *	ACTIVE -> RECLAIMING
+ *			Vnode starts disassociation from underlying file
+ *			system in vclean().
+ *	RECLAIMING -> RECLAIMED
+ *			Vnode finished disassociation from underlying file
+ *			system in vclean().
+ *	ACTIVE -> BLOCKED
+ *			Either vcache_rekey*() is changing the vnode key or
+ *			vrelel() is about to call VOP_INACTIVE().
+ *	BLOCKED -> ACTIVE
+ *			The block condition is over.
+ *	LOADING -> RECLAIMED
+ *			Either vcache_get() or vcache_new() failed to
+ *			associate the underlying file system or vcache_rekey*()
+ *			drops a vnode used as placeholder.
+ *
+ *	Of these states LOADING, BLOCKED and RECLAIMING are intermediate
+ *	and it is possible to wait for state change.
+ *
+ *	State is protected with v_interlock with one exception:
+ *	to change from LOADING both v_interlock and vcache.lock must be held
+ *	so it is possible to check "state == LOADING" without holding
+ *	v_interlock.  See vcache_get() for details.
+ *
  * Reference counting
  *
  *	Vnode is considered active, if reference count (vnode_t::v_usecount)
@@ -109,14 +152,10 @@
  *	Changing the usecount from a non-zero value to a non-zero value can
  *	safely be done using atomic operations, without the interlock held.
  *
- *	Note: if VI_CLEAN is set, vnode_t::v_interlock will be released while
- *	mntvnode_lock is still held.
- *
- *	See PR 41374.
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.51 2016/05/26 11:08:44 hannken Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.52 2016/05/26 11:09:55 hannken Exp $");
 
 #define _VFS_VNODE_PRIVATE
 
@@ -146,7 +185,6 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,
 
 /* Flags to vrelel. */
 #define	VRELEL_ASYNC_RELE	0x0001	/* Always defer to vrele thread. */
-#define	VRELEL_CHANGING_SET	0x0002	/* VI_CHANGING set by caller. */
 
 enum vcache_state {
 	VN_MARKER,	/* Stable, used as marker. Will not change. */
@@ -162,10 +200,9 @@ struct vcache_key {
 	size_t vk_key_len;
 };
 struct vcache_node {
-	struct vnode vn_data;
+	struct vnode vn_vnode;
 	enum vcache_state vn_state;
 	SLIST_ENTRY(vcache_node) vn_hash;
-	struct vnode *vn_vnode;
 	struct vcache_key vn_key;
 };
 
@@ -211,7 +248,6 @@ static void		vdrain_thread(void *);
 static void		vrele_thread(void *);
 static void		vnpanic(vnode_t *, const char *, ...)
     __printflike(2, 3);
-static void		vwait(vnode_t *, int);
 
 /* Routines having to do with the management of the vnode table. */
 extern struct mount	*dead_rootmount;
@@ -253,7 +289,7 @@ vstate_name(enum vcache_state state)
 #define VSTATE_ASSERT(vp, state) \
 	vstate_assert((vp), (state), __func__, __LINE__)
 
-static void __unused
+static void
 vstate_assert(vnode_t *vp, enum vcache_state state, const char *func, int line)
 {
 	struct vcache_node *node = VP_TO_VN(vp);
@@ -266,7 +302,7 @@ vstate_assert(vnode_t *vp, enum vcache_s
 	    vstate_name(node->vn_state), vstate_name(state), func, line);
 }
 
-static enum vcache_state __unused
+static enum vcache_state
 vstate_assert_get(vnode_t *vp, const char *func, int line)
 {
 	struct vcache_node *node = VP_TO_VN(vp);
@@ -279,7 +315,7 @@ vstate_assert_get(vnode_t *vp, const cha
 	return node->vn_state;
 }
 
-static void __unused
+static void
 vstate_assert_wait_stable(vnode_t *vp, const char *func, int line)
 {
 	struct vcache_node *node = VP_TO_VN(vp);
@@ -297,7 +333,7 @@ vstate_assert_wait_stable(vnode_t *vp, c
 		    vstate_name(node->vn_state), func, line);
 }
 
-static void __unused
+static void
 vstate_assert_change(vnode_t *vp, enum vcache_state from, enum vcache_state to,
     const char *func, int line)
 {
@@ -334,7 +370,7 @@ vstate_assert_change(vnode_t *vp, enum v
 	vstate_wait_stable((vp))
 #define VSTATE_ASSERT(vp, state)
 
-static void __unused
+static void
 vstate_wait_stable(vnode_t *vp)
 {
 	struct vcache_node *node = VP_TO_VN(vp);
@@ -343,7 +379,7 @@ vstate_wait_stable(vnode_t *vp)
 		cv_wait(&vp->v_cv, vp->v_interlock);
 }
 
-static void __unused
+static void
 vstate_change(vnode_t *vp, enum vcache_state from, enum vcache_state to)
 {
 	struct vcache_node *node = VP_TO_VN(vp);
@@ -399,7 +435,7 @@ vnalloc_marker(struct mount *mp)
 	uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0);
 	vp->v_mount = mp;
 	vp->v_type = VBAD;
-	vp->v_iflag = VI_MARKER;
+	node->vn_state = VN_MARKER;
 
 	return vp;
 }
@@ -413,7 +449,7 @@ vnfree_marker(vnode_t *vp)
 	struct vcache_node *node;
 
 	node = VP_TO_VN(vp);
-	KASSERT(ISSET(vp->v_iflag, VI_MARKER));
+	KASSERT(node->vn_state == VN_MARKER);
 	uvm_obj_destroy(&vp->v_uobj, true);
 	pool_cache_put(vcache.pool, node);
 }
@@ -425,7 +461,7 @@ bool
 vnis_marker(vnode_t *vp)
 {
 
-	return (ISSET(vp->v_iflag, VI_MARKER));
+	return (VP_TO_VN(vp)->vn_state == VN_MARKER);
 }
 
 /*
@@ -452,7 +488,6 @@ try_nextlist:
 		 * lists.
 		 */
 		KASSERT(vp->v_usecount == 0);
-		KASSERT((vp->v_iflag & VI_CLEAN) == 0);
 		KASSERT(vp->v_freelisthd == listhd);
 
 		if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0)
@@ -461,7 +496,6 @@ try_nextlist:
 			VOP_UNLOCK(vp);
 			continue;
 		}
-		KASSERT((vp->v_iflag & VI_XLOCK) == 0);
 		mp = vp->v_mount;
 		if (fstrans_start_nowait(mp, FSTRANS_SHARED) != 0) {
 			mutex_exit(vp->v_interlock);
@@ -493,10 +527,8 @@ try_nextlist:
 	 * before doing this.
 	 */
 	vp->v_usecount = 1;
-	KASSERT((vp->v_iflag & VI_CHANGING) == 0);
-	vp->v_iflag |= VI_CHANGING;
 	vclean(vp);
-	vrelel(vp, VRELEL_CHANGING_SET);
+	vrelel(vp, 0);
 	fstrans_done(mp);
 
 	return 0;
@@ -552,21 +584,22 @@ vremfree(vnode_t *vp)
 
 /*
  * vget: get a particular vnode from the free list, increment its reference
- * count and lock it.
+ * count and return it.
  *
- * => Should be called with v_interlock held.
+ * => Must be called with v_interlock held.
  *
- * If VI_CHANGING is set, the vnode may be eliminated in vgone()/vclean().
+ * If state is VN_RECLAIMING, the vnode may be eliminated in vgone()/vclean().
  * In that case, we cannot grab the vnode, so the process is awakened when
  * the transition is completed, and an error returned to indicate that the
  * vnode is no longer usable.
+ *
+ * If state is VN_LOADING or VN_BLOCKED, wait until the vnode enters a
+ * stable state (VN_ACTIVE or VN_RECLAIMED).
  */
 int
 vget(vnode_t *vp, int flags, bool waitok)
 {
-	int error = 0;
 
-	KASSERT((vp->v_iflag & VI_MARKER) == 0);
 	KASSERT(mutex_owned(vp->v_interlock));
 	KASSERT((flags & ~LK_NOWAIT) == 0);
 	KASSERT(waitok == ((flags & LK_NOWAIT) == 0));
@@ -587,24 +620,24 @@ vget(vnode_t *vp, int flags, bool waitok
 	 * for the change to complete and take care not to return
 	 * a clean vnode.
 	 */
-	if ((vp->v_iflag & VI_CHANGING) != 0) {
-		if ((flags & LK_NOWAIT) != 0) {
-			vrelel(vp, 0);
-			return EBUSY;
-		}
-		vwait(vp, VI_CHANGING);
-		if ((vp->v_iflag & VI_CLEAN) != 0) {
-			vrelel(vp, 0);
-			return ENOENT;
-		}
+	if (! ISSET(flags, LK_NOWAIT))
+		VSTATE_WAIT_STABLE(vp);
+	if (VSTATE_GET(vp) == VN_RECLAIMED) {
+		vrelel(vp, 0);
+		return ENOENT;
+	} else if (VSTATE_GET(vp) != VN_ACTIVE) {
+		KASSERT(ISSET(flags, LK_NOWAIT));
+		vrelel(vp, 0);
+		return EBUSY;
 	}
 
 	/*
 	 * Ok, we got it in good shape.
 	 */
-	KASSERT((vp->v_iflag & VI_CLEAN) == 0);
+	VSTATE_ASSERT(vp, VN_ACTIVE);
 	mutex_exit(vp->v_interlock);
-	return error;
+
+	return 0;
 }
 
 /*
@@ -614,8 +647,6 @@ void
 vput(vnode_t *vp)
 {
 
-	KASSERT((vp->v_iflag & VI_MARKER) == 0);
-
 	VOP_UNLOCK(vp);
 	vrele(vp);
 }
@@ -652,11 +683,10 @@ vrelel(vnode_t *vp, int flags)
 	int error;
 
 	KASSERT(mutex_owned(vp->v_interlock));
-	KASSERT((vp->v_iflag & VI_MARKER) == 0);
 	KASSERT(vp->v_freelisthd == NULL);
 
 	if (__predict_false(vp->v_op == dead_vnodeop_p &&
-	    (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) {
+	    VSTATE_GET(vp) != VN_RECLAIMED)) {
 		vnpanic(vp, "dead but not clean");
 	}
 
@@ -665,11 +695,6 @@ vrelel(vnode_t *vp, int flags)
 	 * and unlock.
 	 */
 	if (vtryrele(vp)) {
-		if ((flags & VRELEL_CHANGING_SET) != 0) {
-			KASSERT((vp->v_iflag & VI_CHANGING) != 0);
-			vp->v_iflag &= ~VI_CHANGING;
-			cv_broadcast(&vp->v_cv);
-		}
 		mutex_exit(vp->v_interlock);
 		return;
 	}
@@ -677,8 +702,6 @@ vrelel(vnode_t *vp, int flags)
 		vnpanic(vp, "%s: bad ref count", __func__);
 	}
 
-	KASSERT((vp->v_iflag & VI_XLOCK) == 0);
-
 #ifdef DIAGNOSTIC
 	if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
 	    vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) {
@@ -690,7 +713,7 @@ vrelel(vnode_t *vp, int flags)
 	 * If not clean, deactivate the vnode, but preserve
 	 * our reference across the call to VOP_INACTIVE().
 	 */
-	if ((vp->v_iflag & VI_CLEAN) == 0) {
+	if (VSTATE_GET(vp) != VN_RECLAIMED) {
 		recycle = false;
 
 		/*
@@ -729,11 +752,6 @@ vrelel(vnode_t *vp, int flags)
 			 * Defer reclaim to the kthread; it's not safe to
 			 * clean it here.  We donate it our last reference.
 			 */
-			if ((flags & VRELEL_CHANGING_SET) != 0) {
-				KASSERT((vp->v_iflag & VI_CHANGING) != 0);
-				vp->v_iflag &= ~VI_CHANGING;
-				cv_broadcast(&vp->v_cv);
-			}
 			mutex_enter(&vrele_lock);
 			TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist);
 			if (++vrele_pending > (desiredvnodes >> 8))
@@ -749,28 +767,17 @@ vrelel(vnode_t *vp, int flags)
 		 */
 		if (__predict_false(vtryrele(vp))) {
 			VOP_UNLOCK(vp);
-			if ((flags & VRELEL_CHANGING_SET) != 0) {
-				KASSERT((vp->v_iflag & VI_CHANGING) != 0);
-				vp->v_iflag &= ~VI_CHANGING;
-				cv_broadcast(&vp->v_cv);
-			}
 			mutex_exit(vp->v_interlock);
 			return;
 		}
-
-		if ((flags & VRELEL_CHANGING_SET) == 0) {
-			KASSERT((vp->v_iflag & VI_CHANGING) == 0);
-			vp->v_iflag |= VI_CHANGING;
-		}
+		VSTATE_CHANGE(vp, VN_ACTIVE, VN_BLOCKED);
 		mutex_exit(vp->v_interlock);
 
 		/*
-		 * The vnode can gain another reference while being
+		 * The vnode must not gain another reference while being
 		 * deactivated.  If VOP_INACTIVE() indicates that
 		 * the described file has been deleted, then recycle
-		 * the vnode irrespective of additional references.
-		 * Another thread may be waiting to re-use the on-disk
-		 * inode.
+		 * the vnode.
 		 *
 		 * Note that VOP_INACTIVE() will drop the vnode lock.
 		 */
@@ -781,11 +788,9 @@ vrelel(vnode_t *vp, int flags)
 				recycle = false;
 		}
 		mutex_enter(vp->v_interlock);
+		VSTATE_CHANGE(vp, VN_BLOCKED, VN_ACTIVE);
 		if (!recycle) {
 			if (vtryrele(vp)) {
-				KASSERT((vp->v_iflag & VI_CHANGING) != 0);
-				vp->v_iflag &= ~VI_CHANGING;
-				cv_broadcast(&vp->v_cv);
 				mutex_exit(vp->v_interlock);
 				return;
 			}
@@ -806,26 +811,19 @@ vrelel(vnode_t *vp, int flags)
 		 * otherwise just free it.
 		 */
 		if (recycle) {
+			VSTATE_ASSERT(vp, VN_ACTIVE);
 			vclean(vp);
 		}
 		KASSERT(vp->v_usecount > 0);
-	} else { /* vnode was already clean */
-		if ((flags & VRELEL_CHANGING_SET) == 0) {
-			KASSERT((vp->v_iflag & VI_CHANGING) == 0);
-			vp->v_iflag |= VI_CHANGING;
-		}
 	}
 
 	if (atomic_dec_uint_nv(&vp->v_usecount) != 0) {
 		/* Gained another reference while being reclaimed. */
-		KASSERT((vp->v_iflag & VI_CHANGING) != 0);
-		vp->v_iflag &= ~VI_CHANGING;
-		cv_broadcast(&vp->v_cv);
 		mutex_exit(vp->v_interlock);
 		return;
 	}
 
-	if ((vp->v_iflag & VI_CLEAN) != 0) {
+	if (VSTATE_GET(vp) == VN_RECLAIMED) {
 		/*
 		 * It's clean so destroy it.  It isn't referenced
 		 * anywhere since it has been reclaimed.
@@ -852,9 +850,6 @@ vrelel(vnode_t *vp, int flags)
 		}
 		TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
 		mutex_exit(&vnode_free_list_lock);
-		KASSERT((vp->v_iflag & VI_CHANGING) != 0);
-		vp->v_iflag &= ~VI_CHANGING;
-		cv_broadcast(&vp->v_cv);
 		mutex_exit(vp->v_interlock);
 	}
 }
@@ -863,8 +858,6 @@ void
 vrele(vnode_t *vp)
 {
 
-	KASSERT((vp->v_iflag & VI_MARKER) == 0);
-
 	if (vtryrele(vp)) {
 		return;
 	}
@@ -879,8 +872,6 @@ void
 vrele_async(vnode_t *vp)
 {
 
-	KASSERT((vp->v_iflag & VI_MARKER) == 0);
-
 	if (vtryrele(vp)) {
 		return;
 	}
@@ -948,7 +939,6 @@ void
 vref(vnode_t *vp)
 {
 
-	KASSERT((vp->v_iflag & VI_MARKER) == 0);
 	KASSERT(vp->v_usecount != 0);
 
 	atomic_inc_uint(&vp->v_usecount);
@@ -963,7 +953,6 @@ vholdl(vnode_t *vp)
 {
 
 	KASSERT(mutex_owned(vp->v_interlock));
-	KASSERT((vp->v_iflag & VI_MARKER) == 0);
 
 	if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) {
 		mutex_enter(&vnode_free_list_lock);
@@ -984,7 +973,6 @@ holdrelel(vnode_t *vp)
 {
 
 	KASSERT(mutex_owned(vp->v_interlock));
-	KASSERT((vp->v_iflag & VI_MARKER) == 0);
 
 	if (vp->v_holdcnt <= 0) {
 		vnpanic(vp, "%s: holdcnt vp %p", __func__, vp);
@@ -1017,8 +1005,6 @@ vclean(vnode_t *vp)
 	KASSERT((vp->v_vflag & VV_LOCKSWORK) == 0 ||
 	    VOP_ISLOCKED(vp) == LK_EXCLUSIVE);
 	KASSERT(mutex_owned(vp->v_interlock));
-	KASSERT((vp->v_iflag & VI_MARKER) == 0);
-	KASSERT((vp->v_iflag & (VI_XLOCK | VI_CLEAN)) == 0);
 	KASSERT(vp->v_usecount != 0);
 
 	active = (vp->v_usecount > 1);
@@ -1026,7 +1012,7 @@ vclean(vnode_t *vp)
 	 * Prevent the vnode from being recycled or brought into use
 	 * while we clean it out.
 	 */
-	vp->v_iflag |= VI_XLOCK;
+	VSTATE_CHANGE(vp, VN_ACTIVE, VN_RECLAIMING);
 	if (vp->v_iflag & VI_EXECMAP) {
 		atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
 		atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
@@ -1056,7 +1042,7 @@ vclean(vnode_t *vp)
 	} else {
 		/*
 		 * Any other processes trying to obtain this lock must first
-		 * wait for VI_XLOCK to clear, then call the new lock operation.
+		 * wait for VN_RECLAIMED, then call the new lock operation.
 		 */
 		VOP_UNLOCK(vp);
 	}
@@ -1086,11 +1072,9 @@ vclean(vnode_t *vp)
 	mutex_enter(vp->v_interlock);
 	vp->v_op = dead_vnodeop_p;
 	vp->v_vflag |= VV_LOCKSWORK;
-	vp->v_iflag |= VI_CLEAN;
+	VSTATE_CHANGE(vp, VN_RECLAIMING, VN_RECLAIMED);
 	vp->v_tag = VT_NON;
 	KNOTE(&vp->v_klist, NOTE_REVOKE);
-	vp->v_iflag &= ~VI_XLOCK;
-	cv_broadcast(&vp->v_cv);
 
 	KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
 }
@@ -1107,24 +1091,13 @@ vrecycle(vnode_t *vp)
 
 	mutex_enter(vp->v_interlock);
 
-	KASSERT((vp->v_iflag & VI_MARKER) == 0);
-
-	if (vp->v_usecount != 1) {
-		mutex_exit(vp->v_interlock);
-		VOP_UNLOCK(vp);
-		return false;
-	}
-	if ((vp->v_iflag & VI_CHANGING) != 0)
-		vwait(vp, VI_CHANGING);
 	if (vp->v_usecount != 1) {
 		mutex_exit(vp->v_interlock);
 		VOP_UNLOCK(vp);
 		return false;
 	}
-	KASSERT((vp->v_iflag & VI_CLEAN) == 0);
-	vp->v_iflag |= VI_CHANGING;
 	vclean(vp);
-	vrelel(vp, VRELEL_CHANGING_SET);
+	vrelel(vp, 0);
 	return true;
 }
 
@@ -1142,7 +1115,8 @@ vrevoke(vnode_t *vp)
 	KASSERT(vp->v_usecount > 0);
 
 	mutex_enter(vp->v_interlock);
-	if ((vp->v_iflag & VI_CLEAN) != 0) {
+	VSTATE_WAIT_STABLE(vp);
+	if (VSTATE_GET(vp) == VN_RECLAIMED) {
 		mutex_exit(vp->v_interlock);
 		return;
 	} else if (vp->v_type != VBLK && vp->v_type != VCHR) {
@@ -1170,16 +1144,13 @@ vgone(vnode_t *vp)
 {
 
 	if (vn_lock(vp, LK_EXCLUSIVE) != 0) {
-		KASSERT((vp->v_iflag & VI_CLEAN) != 0);
+		VSTATE_ASSERT(vp, VN_RECLAIMED);
 		vrele(vp);
 	}
 
 	mutex_enter(vp->v_interlock);
-	if ((vp->v_iflag & VI_CHANGING) != 0)
-		vwait(vp, VI_CHANGING);
-	vp->v_iflag |= VI_CHANGING;
 	vclean(vp);
-	vrelel(vp, VRELEL_CHANGING_SET);
+	vrelel(vp, 0);
 }
 
 static inline uint32_t
@@ -1300,7 +1271,6 @@ vcache_free(struct vcache_node *node)
 	vp = VN_TO_VP(node);
 
 	KASSERT(vp->v_usecount == 0);
-	KASSERT((vp->v_iflag & VI_MARKER) == 0);
 
 	rw_destroy(&vp->v_lock);
 	mutex_enter(&vnode_free_list_lock);
@@ -1339,8 +1309,21 @@ again:
 	node = vcache_hash_lookup(&vcache_key, hash);
 
 	/* If found, take a reference or retry. */
-	if (__predict_true(node != NULL && node->vn_vnode != NULL)) {
-		vp = node->vn_vnode;
+	if (__predict_true(node != NULL)) {
+		/*
+		 * If the vnode is loading we cannot take the v_interlock
+		 * here as it might change during load (see uvm_obj_setlock()).
+		 * As changing state from VN_LOADING requires both vcache.lock
+		 * and v_interlock it is safe to test with vcache.lock held.
+		 *
+		 * Wait for vnodes changing state from VN_LOADING and retry.
+		 */
+		if (__predict_false(node->vn_state == VN_LOADING)) {
+			cv_wait(&vcache.cv, &vcache.lock);
+			mutex_exit(&vcache.lock);
+			goto again;
+		}
+		vp = VN_TO_VP(node);
 		mutex_enter(vp->v_interlock);
 		mutex_exit(&vcache.lock);
 		error = vget(vp, 0, true /* wait */);
@@ -1351,14 +1334,6 @@ again:
 		KASSERT((error != 0) == (*vpp == NULL));
 		return error;
 	}
-
-	/* If another thread loads this node, wait and retry. */
-	if (node != NULL) {
-		KASSERT(node->vn_vnode == NULL);
-		mutex_exit(&vcache.lock);
-		kpause("vcache", false, mstohz(20), NULL);
-		goto again;
-	}
 	mutex_exit(&vcache.lock);
 
 	/* Allocate and initialize a new vcache / vnode pair. */
@@ -1375,28 +1350,28 @@ again:
 		    new_node, vn_hash);
 		node = new_node;
 	}
-	mutex_exit(&vcache.lock);
 
 	/* If another thread beat us inserting this node, retry. */
 	if (node != new_node) {
-		KASSERT(vp->v_usecount == 1);
-		vp->v_usecount = 0;
-		vcache_free(new_node);
+		mutex_enter(vp->v_interlock);
+		VSTATE_CHANGE(vp, VN_LOADING, VN_RECLAIMED);
+		mutex_exit(&vcache.lock);
+		vrelel(vp, 0);
 		vfs_unbusy(mp, false, NULL);
 		goto again;
 	}
+	mutex_exit(&vcache.lock);
 
-	/* Load the fs node.  Exclusive as new_node->vn_vnode is NULL. */
-	vp->v_iflag |= VI_CHANGING;
+	/* Load the fs node.  Exclusive as new_node is VN_LOADING. */
 	error = VFS_LOADVNODE(mp, vp, key, key_len, &new_key);
 	if (error) {
 		mutex_enter(&vcache.lock);
 		SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask],
 		    new_node, vcache_node, vn_hash);
+		mutex_enter(vp->v_interlock);
+		VSTATE_CHANGE(vp, VN_LOADING, VN_RECLAIMED);
 		mutex_exit(&vcache.lock);
-		KASSERT(vp->v_usecount == 1);
-		vp->v_usecount = 0;
-		vcache_free(new_node);
+		vrelel(vp, 0);
 		vfs_unbusy(mp, false, NULL);
 		KASSERT(*vpp == NULL);
 		return error;
@@ -1412,12 +1387,10 @@ again:
 	/* Finished loading, finalize node. */
 	mutex_enter(&vcache.lock);
 	new_node->vn_key.vk_key = new_key;
-	new_node->vn_vnode = vp;
-	mutex_exit(&vcache.lock);
 	mutex_enter(vp->v_interlock);
-	vp->v_iflag &= ~VI_CHANGING;
-	cv_broadcast(&vp->v_cv);
+	VSTATE_CHANGE(vp, VN_LOADING, VN_ACTIVE);
 	mutex_exit(vp->v_interlock);
+	mutex_exit(&vcache.lock);
 	*vpp = vp;
 	return 0;
 }
@@ -1431,7 +1404,7 @@ vcache_new(struct mount *mp, struct vnod
 {
 	int error;
 	uint32_t hash;
-	struct vnode *vp;
+	struct vnode *ovp, *vp;
 	struct vcache_node *new_node;
 	struct vcache_node *old_node __diagused;
 
@@ -1446,13 +1419,14 @@ vcache_new(struct mount *mp, struct vnod
 	vp = VN_TO_VP(new_node);
 
 	/* Create and load the fs node. */
-	vp->v_iflag |= VI_CHANGING;
 	error = VFS_NEWVNODE(mp, dvp, vp, vap, cred,
 	    &new_node->vn_key.vk_key_len, &new_node->vn_key.vk_key);
 	if (error) {
-		KASSERT(vp->v_usecount == 1);
-		vp->v_usecount = 0;
-		vcache_free(VP_TO_VN(vp));
+		mutex_enter(&vcache.lock);
+		mutex_enter(vp->v_interlock);
+		VSTATE_CHANGE(vp, VN_LOADING, VN_RECLAIMED);
+		mutex_exit(&vcache.lock);
+		vrelel(vp, 0);
 		vfs_unbusy(mp, false, NULL);
 		KASSERT(*vpp == NULL);
 		return error;
@@ -1464,16 +1438,11 @@ vcache_new(struct mount *mp, struct vnod
 	/* Wait for previous instance to be reclaimed, then insert new node. */
 	mutex_enter(&vcache.lock);
 	while ((old_node = vcache_hash_lookup(&new_node->vn_key, hash))) {
-#ifdef DIAGNOSTIC
-		if (old_node->vn_vnode != NULL)
-			mutex_enter(old_node->vn_vnode->v_interlock);
-		KASSERT(old_node->vn_vnode == NULL ||
-		    (old_node->vn_vnode->v_iflag & (VI_XLOCK | VI_CLEAN)) != 0);
-		if (old_node->vn_vnode != NULL)
-			mutex_exit(old_node->vn_vnode->v_interlock);
-#endif
+		ovp = VN_TO_VP(old_node);
+		mutex_enter(ovp->v_interlock);
 		mutex_exit(&vcache.lock);
-		kpause("vcache", false, mstohz(20), NULL);
+		error = vget(ovp, 0, true /* wait */);
+		KASSERT(error == ENOENT);
 		mutex_enter(&vcache.lock);
 	}
 	SLIST_INSERT_HEAD(&vcache.hashtab[hash & vcache.hashmask],
@@ -1486,11 +1455,9 @@ vcache_new(struct mount *mp, struct vnod
 
 	/* Finished loading, finalize node. */
 	mutex_enter(&vcache.lock);
-	new_node->vn_vnode = vp;
-	mutex_exit(&vcache.lock);
 	mutex_enter(vp->v_interlock);
-	vp->v_iflag &= ~VI_CHANGING;
-	cv_broadcast(&vp->v_cv);
+	VSTATE_CHANGE(vp, VN_LOADING, VN_ACTIVE);
+	mutex_exit(&vcache.lock);
 	mutex_exit(vp->v_interlock);
 	*vpp = vp;
 	return 0;
@@ -1508,6 +1475,7 @@ vcache_rekey_enter(struct mount *mp, str
 	uint32_t old_hash, new_hash;
 	struct vcache_key old_vcache_key, new_vcache_key;
 	struct vcache_node *node, *new_node;
+	struct vnode *tvp;
 
 	old_vcache_key.vk_mount = mp;
 	old_vcache_key.vk_key = old_key;
@@ -1521,16 +1489,16 @@ vcache_rekey_enter(struct mount *mp, str
 
 	new_node = vcache_alloc();
 	new_node->vn_key = new_vcache_key;
-
-	mutex_enter(&vcache.lock);
+	tvp = VN_TO_VP(new_node);
 
 	/* Insert locked new node used as placeholder. */
+	mutex_enter(&vcache.lock);
 	node = vcache_hash_lookup(&new_vcache_key, new_hash);
 	if (node != NULL) {
+		mutex_enter(tvp->v_interlock);
+		VSTATE_CHANGE(tvp, VN_LOADING, VN_RECLAIMED);
 		mutex_exit(&vcache.lock);
-		KASSERT(VN_TO_VP(new_node)->v_usecount == 1);
-		VN_TO_VP(new_node)->v_usecount = 0;
-		vcache_free(new_node);
+		vrelel(tvp, 0);
 		return EEXIST;
 	}
 	SLIST_INSERT_HEAD(&vcache.hashtab[new_hash & vcache.hashmask],
@@ -1539,9 +1507,11 @@ vcache_rekey_enter(struct mount *mp, str
 	/* Lock old node. */
 	node = vcache_hash_lookup(&old_vcache_key, old_hash);
 	KASSERT(node != NULL);
-	KASSERT(node->vn_vnode == vp);
-	node->vn_vnode = NULL;
+	KASSERT(VN_TO_VP(node) == vp);
+	mutex_enter(vp->v_interlock);
+	VSTATE_CHANGE(vp, VN_ACTIVE, VN_BLOCKED);
 	node->vn_key = old_vcache_key;
+	mutex_exit(vp->v_interlock);
 	mutex_exit(&vcache.lock);
 	return 0;
 }
@@ -1557,6 +1527,7 @@ vcache_rekey_exit(struct mount *mp, stru
 	uint32_t old_hash, new_hash;
 	struct vcache_key old_vcache_key, new_vcache_key;
 	struct vcache_node *old_node, *new_node;
+	struct vnode *tvp;
 
 	old_vcache_key.vk_mount = mp;
 	old_vcache_key.vk_key = old_key;
@@ -1573,13 +1544,18 @@ vcache_rekey_exit(struct mount *mp, stru
 	/* Lookup old and new node. */
 	old_node = vcache_hash_lookup(&old_vcache_key, old_hash);
 	KASSERT(old_node != NULL);
-	KASSERT(old_node->vn_vnode == NULL);
+	KASSERT(VN_TO_VP(old_node) == vp);
+	mutex_enter(vp->v_interlock);
+	VSTATE_ASSERT(vp, VN_BLOCKED);
+
 	new_node = vcache_hash_lookup(&new_vcache_key, new_hash);
-	KASSERT(new_node != NULL && new_node->vn_vnode == NULL);
+	KASSERT(new_node != NULL);
 	KASSERT(new_node->vn_key.vk_key_len == new_key_len);
+	tvp = VN_TO_VP(new_node);
+	mutex_enter(tvp->v_interlock);
+	VSTATE_ASSERT(VN_TO_VP(new_node), VN_LOADING);
 
 	/* Rekey old node and put it onto its new hashlist. */
-	old_node->vn_vnode = vp;
 	old_node->vn_key = new_vcache_key;
 	if (old_hash != new_hash) {
 		SLIST_REMOVE(&vcache.hashtab[old_hash & vcache.hashmask],
@@ -1587,14 +1563,15 @@ vcache_rekey_exit(struct mount *mp, stru
 		SLIST_INSERT_HEAD(&vcache.hashtab[new_hash & vcache.hashmask],
 		    old_node, vn_hash);
 	}
+	VSTATE_CHANGE(vp, VN_BLOCKED, VN_ACTIVE);
+	mutex_exit(vp->v_interlock);
 
 	/* Remove new node used as placeholder. */
 	SLIST_REMOVE(&vcache.hashtab[new_hash & vcache.hashmask],
 	    new_node, vcache_node, vn_hash);
+	VSTATE_CHANGE(tvp, VN_LOADING, VN_RECLAIMED);
 	mutex_exit(&vcache.lock);
-	KASSERT(VN_TO_VP(new_node)->v_usecount == 1);
-	VN_TO_VP(new_node)->v_usecount = 0;
-	vcache_free(new_node);
+	vrelel(tvp, 0);
 }
 
 /*
@@ -1675,30 +1652,18 @@ vdead_check(struct vnode *vp, int flags)
 {
 
 	KASSERT(mutex_owned(vp->v_interlock));
-	if (ISSET(vp->v_iflag, VI_XLOCK)) {
-		if (ISSET(flags, VDEAD_NOWAIT))
-			return EBUSY;
-		vwait(vp, VI_XLOCK);
-		KASSERT(ISSET(vp->v_iflag, VI_CLEAN));
-	}
-	if (ISSET(vp->v_iflag, VI_CLEAN))
-		return ENOENT;
-	return 0;
-}
 
-/*
- * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or
- * recycled.
- */
-static void
-vwait(vnode_t *vp, int flags)
-{
+	if (! ISSET(flags, VDEAD_NOWAIT))
+		VSTATE_WAIT_STABLE(vp);
 
-	KASSERT(mutex_owned(vp->v_interlock));
-	KASSERT(vp->v_usecount != 0);
+	if (VSTATE_GET(vp) == VN_RECLAIMING) {
+		KASSERT(ISSET(flags, VDEAD_NOWAIT));
+		return EBUSY;
+	} else if (VSTATE_GET(vp) == VN_RECLAIMED) {
+		return ENOENT;
+	}
 
-	while ((vp->v_iflag & flags) != 0)
-		cv_wait(&vp->v_cv, vp->v_interlock);
+	return 0;
 }
 
 int

Index: src/sys/sys/vnode.h
diff -u src/sys/sys/vnode.h:1.261 src/sys/sys/vnode.h:1.262
--- src/sys/sys/vnode.h:1.261	Thu May 26 11:07:33 2016
+++ src/sys/sys/vnode.h	Thu May 26 11:09:55 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: vnode.h,v 1.261 2016/05/26 11:07:33 hannken Exp $	*/
+/*	$NetBSD: vnode.h,v 1.262 2016/05/26 11:09:55 hannken Exp $	*/
 
 /*-
  * Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -204,17 +204,7 @@ typedef struct vnode vnode_t;
 #define	VI_EXECMAP	0x00000200	/* might have PROT_EXEC mappings */
 #define	VI_WRMAP	0x00000400	/* might have PROT_WRITE u. mappings */
 #define	VI_WRMAPDIRTY	0x00000800	/* might have dirty pages */
-#ifdef _VFS_VNODE_PRIVATE
-#define	VI_XLOCK	0x00001000	/* vnode is locked to change type */
-#endif	/* _VFS_VNODE_PRIVATE */
 #define	VI_ONWORKLST	0x00004000	/* On syncer work-list */
-#ifdef _VFS_VNODE_PRIVATE
-#define	VI_MARKER	0x00008000	/* Dummy marker vnode */
-#endif	/* _VFS_VNODE_PRIVATE */
-#ifdef _VFS_VNODE_PRIVATE
-#define	VI_CLEAN	0x00080000	/* has been reclaimed */
-#define	VI_CHANGING	0x00100000	/* vnode changes state */
-#endif	/* _VFS_VNODE_PRIVATE */
 
 /*
  * The third set are locked by the underlying file system.
@@ -223,8 +213,7 @@ typedef struct vnode vnode_t;
 
 #define	VNODE_FLAGBITS \
     "\20\1ROOT\2SYSTEM\3ISTTY\4MAPPED\5MPSAFE\6LOCKSWORK\11TEXT\12EXECMAP" \
-    "\13WRMAP\14WRMAPDIRTY\15XLOCK\17ONWORKLST\20MARKER" \
-    "\24CLEAN\25CHANGING\31DIROP"
+    "\13WRMAP\14WRMAPDIRTY\17ONWORKLST\31DIROP"
 
 #define	VSIZENOTSET	((voff_t)-1)
 

Reply via email to