Module Name:    src
Committed By:   hannken
Date:           Sun Oct  2 13:00:07 UTC 2011

Modified Files:
        src/sys/kern: vfs_vnode.c

Log Message:
The path getnewvnode()->getcleanvnode()->vclean()->VOP_LOCK() will panic
if the vnode we want to clean is a layered vnode and the caller already
locked its lower vnode.

Change getnewvnode() to always allocate a fresh vnode and add a helper
thread (vdrain) to keep the number of allocated vnodes within desiredvnodes.

Rename getcleanvnode() to cleanvnode() and let it take a vnode from the
lists, clean and free it.

Reviewed by: David Holland <dholl...@netbsd.org>

Should fix:
PR #19110 (nullfs mounts over NFS cause lock manager problems)
PR #34102 (ffs panic in NetBSD 3.0_STABLE)
PR #45115 (lock error panic when build.sh*3 and daily script is running)
PR #45355 (Reader/writer lock error:  rw_vector_enter: locking against myself)


To generate a diff of this commit:
cvs rdiff -u -r1.11 -r1.12 src/sys/kern/vfs_vnode.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/kern/vfs_vnode.c
diff -u src/sys/kern/vfs_vnode.c:1.11 src/sys/kern/vfs_vnode.c:1.12
--- src/sys/kern/vfs_vnode.c:1.11	Thu Sep 29 20:51:38 2011
+++ src/sys/kern/vfs_vnode.c	Sun Oct  2 13:00:06 2011
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_vnode.c,v 1.11 2011/09/29 20:51:38 christos Exp $	*/
+/*	$NetBSD: vfs_vnode.c,v 1.12 2011/10/02 13:00:06 hannken Exp $	*/
 
 /*-
  * Copyright (c) 1997-2011 The NetBSD Foundation, Inc.
@@ -76,7 +76,6 @@
  *	starts in one of the following ways:
  *
  *	- Allocation, via getnewvnode(9) and/or vnalloc(9).
- *	- Recycle from a free list, via getnewvnode(9) -> getcleanvnode(9).
  *	- Reclamation of inactive vnode, via vget(9).
  *
  *	The life-cycle ends when the last reference is dropped, usually
@@ -121,7 +120,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.11 2011/09/29 20:51:38 christos Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.12 2011/10/02 13:00:06 hannken Exp $");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
@@ -159,8 +158,10 @@ static kcondvar_t	vrele_cv		__cacheline_
 static lwp_t *		vrele_lwp		__cacheline_aligned;
 static int		vrele_pending		__cacheline_aligned;
 static int		vrele_gen		__cacheline_aligned;
+static kcondvar_t	vdrain_cv		__cacheline_aligned;
 
-static vnode_t *	getcleanvnode(void);
+static int		cleanvnode(void);
+static void		vdrain_thread(void *);
 static void		vrele_thread(void *);
 static void		vnpanic(vnode_t *, const char *, ...)
     __attribute__((__format__(__printf__, 2, 3)));
@@ -183,7 +184,11 @@ vfs_vnode_sysinit(void)
 	TAILQ_INIT(&vrele_list);
 
 	mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE);
+	cv_init(&vdrain_cv, "vdrain");
 	cv_init(&vrele_cv, "vrele");
+	error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vdrain_thread,
+	    NULL, NULL, "vdrain");
+	KASSERT(error == 0);
 	error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread,
 	    NULL, &vrele_lwp, "vrele");
 	KASSERT(error == 0);
@@ -249,13 +254,12 @@ vnfree(vnode_t *vp)
 }
 
 /*
- * getcleanvnode: grab a vnode from freelist and clean it.
+ * cleanvnode: grab a vnode from freelist, clean and free it.
  *
  * => Releases vnode_free_list_lock.
- * => Returns referenced vnode on success.
  */
-static vnode_t *
-getcleanvnode(void)
+static int
+cleanvnode(void)
 {
 	vnode_t *vp;
 	vnodelst_t *listhd;
@@ -288,7 +292,7 @@ try_nextlist:
 			goto try_nextlist;
 		}
 		mutex_exit(&vnode_free_list_lock);
-		return NULL;
+		return EBUSY;
 	}
 
 	/* Remove it from the freelist. */
@@ -300,7 +304,7 @@ try_nextlist:
 
 	/*
 	 * The vnode is still associated with a file system, so we must
-	 * clean it out before reusing it.  We need to add a reference
+	 * clean it out before freeing it.  We need to add a reference
 	 * before doing this.  If the vnode gains another reference while
 	 * being cleaned out then we lose - retry.
 	 */
@@ -308,15 +312,7 @@ try_nextlist:
 	vclean(vp, DOCLOSE);
 	KASSERT(vp->v_usecount >= 1 + VC_XLOCK);
 	atomic_add_int(&vp->v_usecount, -VC_XLOCK);
-	if (vp->v_usecount == 1) {
-		/* We're about to dirty it. */
-		vp->v_iflag &= ~VI_CLEAN;
-		mutex_exit(vp->v_interlock);
-		if (vp->v_type == VBLK || vp->v_type == VCHR) {
-			spec_node_destroy(vp);
-		}
-		vp->v_type = VNON;
-	} else {
+	if (vp->v_usecount > 1) {
 		/*
 		 * Don't return to freelist - the holder of the last
 		 * reference will destroy it.
@@ -326,17 +322,26 @@ try_nextlist:
 		goto retry;
 	}
 
+	KASSERT((vp->v_iflag & VI_CLEAN) == VI_CLEAN);
+	mutex_exit(vp->v_interlock);
+	if (vp->v_type == VBLK || vp->v_type == VCHR) {
+		spec_node_destroy(vp);
+	}
+	vp->v_type = VNON;
+
 	KASSERT(vp->v_data == NULL);
 	KASSERT(vp->v_uobj.uo_npages == 0);
 	KASSERT(TAILQ_EMPTY(&vp->v_uobj.memq));
 	KASSERT(vp->v_numoutput == 0);
 	KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
 
-	return vp;
+	vrele(vp);
+
+	return 0;
 }
 
 /*
- * getnewvnode: return the next vnode from the free list.
+ * getnewvnode: return a fresh vnode.
  *
  * => Returns referenced vnode, moved into the mount queue.
  * => Shares the interlock specified by 'slock', if it is not NULL.
@@ -346,9 +351,8 @@ getnewvnode(enum vtagtype tag, struct mo
     kmutex_t *slock, vnode_t **vpp)
 {
 	struct uvm_object *uobj;
-	static int toggle;
 	vnode_t *vp;
-	int error = 0, tryalloc;
+	int error = 0;
 
 try_again:
 	if (mp != NULL) {
@@ -361,80 +365,28 @@ try_again:
 			return error;
 	}
 
-	/*
-	 * We must choose whether to allocate a new vnode or recycle an
-	 * existing one. The criterion for allocating a new one is that
-	 * the total number of vnodes is less than the number desired or
-	 * there are no vnodes on either free list. Generally we only
-	 * want to recycle vnodes that have no buffers associated with
-	 * them, so we look first on the vnode_free_list. If it is empty,
-	 * we next consider vnodes with referencing buffers on the
-	 * vnode_hold_list. The toggle ensures that half the time we
-	 * will use a buffer from the vnode_hold_list, and half the time
-	 * we will allocate a new one unless the list has grown to twice
-	 * the desired size. We are reticent to recycle vnodes from the
-	 * vnode_hold_list because we will lose the identity of all its
-	 * referencing buffers.
-	 */
-
 	vp = NULL;
 
+	/* Allocate a new vnode. */
 	mutex_enter(&vnode_free_list_lock);
-
-	toggle ^= 1;
-	if (numvnodes > 2 * desiredvnodes)
-		toggle = 0;
-
-	tryalloc = numvnodes < desiredvnodes ||
-	    (TAILQ_FIRST(&vnode_free_list) == NULL &&
-	    (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
-
-	if (tryalloc) {
-		/* Allocate a new vnode. */
-		numvnodes++;
+	numvnodes++;
+	if (numvnodes > desiredvnodes + desiredvnodes / 10)
+		cv_signal(&vdrain_cv);
+	mutex_exit(&vnode_free_list_lock);
+	if ((vp = vnalloc(NULL)) == NULL) {
+		mutex_enter(&vnode_free_list_lock);
+		numvnodes--;
 		mutex_exit(&vnode_free_list_lock);
-		if ((vp = vnalloc(NULL)) == NULL) {
-			mutex_enter(&vnode_free_list_lock);
-			numvnodes--;
-		} else
-			vp->v_usecount = 1;
-	}
-
-	if (vp == NULL) {
-		/* Recycle and get vnode clean. */
-		vp = getcleanvnode();
-		if (vp == NULL) {
-			if (mp != NULL) {
-				vfs_unbusy(mp, false, NULL);
-			}
-			if (tryalloc) {
-				printf("WARNING: unable to allocate new "
-				    "vnode, retrying...\n");
-				kpause("newvn", false, hz, NULL);
-				goto try_again;
-			}
-			tablefull("vnode", "increase kern.maxvnodes or NVNODE");
-			*vpp = 0;
-			return ENFILE;
-		}
-		if ((vp->v_iflag & VI_LOCKSHARE) != 0 || slock) {
-			/* We must remove vnode from the old mount point. */
-			if (vp->v_mount) {
-				vfs_insmntque(vp, NULL);
-			}
-			/* Allocate a new interlock, if it was shared. */
-			if (vp->v_iflag & VI_LOCKSHARE) {
-				uvm_obj_setlock(&vp->v_uobj, NULL);
-				vp->v_iflag &= ~VI_LOCKSHARE;
-			}
+		if (mp != NULL) {
+			vfs_unbusy(mp, false, NULL);
 		}
-		vp->v_iflag = 0;
-		vp->v_vflag = 0;
-		vp->v_uflag = 0;
-		vp->v_socket = NULL;
+		printf("WARNING: unable to allocate new vnode, retrying...\n");
+		kpause("newvn", false, hz, NULL);
+		goto try_again;
 	}
 
-	KASSERT(vp->v_usecount == 1);
+	vp->v_usecount = 1;
+
 	KASSERT(vp->v_freelisthd == NULL);
 	KASSERT(LIST_EMPTY(&vp->v_nclist));
 	KASSERT(LIST_EMPTY(&vp->v_dnclist));
@@ -493,6 +445,29 @@ ungetnewvnode(vnode_t *vp)
 }
 
 /*
+ * Helper thread to keep the number of vnodes below desiredvnodes.
+ */
+static void
+vdrain_thread(void *cookie)
+{
+	int error;
+
+	mutex_enter(&vnode_free_list_lock);
+
+	for (;;) {
+		cv_timedwait(&vdrain_cv, &vnode_free_list_lock, hz);
+		while (numvnodes > desiredvnodes) {
+			error = cleanvnode();
+			if (error)
+				kpause("vndsbusy", false, hz, NULL);
+			mutex_enter(&vnode_free_list_lock);
+			if (error)
+				break;
+		}
+	}
+}
+
+/*
  * Remove a vnode from its freelist.
  */
 void
@@ -1204,17 +1179,19 @@ vwait(vnode_t *vp, int flags)
 int
 vfs_drainvnodes(long target)
 {
+	int error;
 
-	while (numvnodes > target) {
-		vnode_t *vp;
+	mutex_enter(&vnode_free_list_lock);
 
+	while (numvnodes > target) {
+		error = cleanvnode();
+		if (error != 0)
+			return error;
 		mutex_enter(&vnode_free_list_lock);
-		vp = getcleanvnode();
-		if (vp == NULL) {
-			return EBUSY;
-		}
-		ungetnewvnode(vp);
 	}
+
+	mutex_exit(&vnode_free_list_lock);
+
 	return 0;
 }
 

Reply via email to