Module Name:    src
Committed By:   manu
Date:           Sat Jul 21 05:17:11 UTC 2012

Modified Files:
        src/lib/libpuffs: dispatcher.c puffs.h
        src/sys/fs/puffs: puffs_msgif.c puffs_msgif.h puffs_sys.h
            puffs_vfsops.c puffs_vnops.c

Log Message:
- Improve PUFFS_KFLAG_CACHE_FS_TTL by reclaiming older inactive nodes.

The normal kernel behavior is to retain inactive nodes in the freelist
until it runs out of vnodes. This has some merit for local filesystems,
where the cost of an allocation is about the same as the cost of a
lookup. But that situation is not true for distributed filesystems.
On the other hand, keeping inactive nodes for a long time hold memory
in the file server process, and when the kernel runs out of vnodes, it
produce reclaim avalanches that increase lattency for other operations.

We do not reclaim inactive vnodes immediatly either, as they may be
looked up again shortly. Instead we introduce a grace time and we
reclaim nodes that have been inactive beyond the grace time.

- Fix lookup/reclaim race condition.

The above improvement undercovered a race condition between lookup and
reclaim. If we reclaimed a vnode associated with a userland cookie while
a lookup returning that same cookiewas inprogress, then the kernel ends
up with a vnode associated with a cookie that has been reclaimed in
userland. Next operation on the cookie will crash (or at least confuse)
the filesystem.

We fix this by introducing a lookup count in kernel and userland. On
reclaim, the kernel sends the count, which enable userland to detect
situation where it initiated a lookup that is not completed in kernel.
In such a situation, the reclaim must be ignored, as the node is about
to be looked up again.


To generate a diff of this commit:
cvs rdiff -u -r1.41 -r1.42 src/lib/libpuffs/dispatcher.c
cvs rdiff -u -r1.122 -r1.123 src/lib/libpuffs/puffs.h
cvs rdiff -u -r1.89 -r1.90 src/sys/fs/puffs/puffs_msgif.c
cvs rdiff -u -r1.78 -r1.79 src/sys/fs/puffs/puffs_msgif.h
cvs rdiff -u -r1.79 -r1.80 src/sys/fs/puffs/puffs_sys.h
cvs rdiff -u -r1.101 -r1.102 src/sys/fs/puffs/puffs_vfsops.c
cvs rdiff -u -r1.166 -r1.167 src/sys/fs/puffs/puffs_vnops.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/lib/libpuffs/dispatcher.c
diff -u src/lib/libpuffs/dispatcher.c:1.41 src/lib/libpuffs/dispatcher.c:1.42
--- src/lib/libpuffs/dispatcher.c:1.41	Wed Jun 27 13:25:23 2012
+++ src/lib/libpuffs/dispatcher.c	Sat Jul 21 05:17:10 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: dispatcher.c,v 1.41 2012/06/27 13:25:23 manu Exp $	*/
+/*	$NetBSD: dispatcher.c,v 1.42 2012/07/21 05:17:10 manu Exp $	*/
 
 /*
  * Copyright (c) 2006, 2007, 2008 Antti Kantee.  All Rights Reserved.
@@ -31,7 +31,7 @@
 
 #include <sys/cdefs.h>
 #if !defined(lint)
-__RCSID("$NetBSD: dispatcher.c,v 1.41 2012/06/27 13:25:23 manu Exp $");
+__RCSID("$NetBSD: dispatcher.c,v 1.42 2012/07/21 05:17:10 manu Exp $");
 #endif /* !lint */
 
 #include <sys/types.h>
@@ -301,6 +301,12 @@ dispatch(struct puffs_cc *pcc)
 						    &pcn.pcn_po_full);
 				}
 			}
+
+			if (!error) {
+				if (pn == NULL)
+					pn = PU_CMAP(pu, auxt->pvnr_newnode);
+				pn->pn_nlookup++;
+			}
 			break;
 		}
 
@@ -810,12 +816,28 @@ dispatch(struct puffs_cc *pcc)
 
 		case PUFFS_VN_RECLAIM:
 		{
-
+			struct puffs_vnmsg_reclaim *auxt = auxbuf;
+			struct puffs_node *pn;
+		
 			if (pops->puffs_node_reclaim == NULL) {
 				error = 0;
 				break;
 			}
 
+			/*
+			 * This fixes a race condition, 
+			 * where a node in reclaimed by kernel 
+			 * after a lookup request is sent, 
+			 * but before the reply, leaving the kernel
+			 * with a invalid vnode/cookie reference.
+			 */
+			pn = PU_CMAP(pu, opcookie);
+			pn->pn_nlookup -= auxt->pvnr_nlookup;
+			if (pn->pn_nlookup >= 1) {
+				error = 0;
+				break;
+			}
+
 			error = pops->puffs_node_reclaim(pu, opcookie);
 			break;
 		}

Index: src/lib/libpuffs/puffs.h
diff -u src/lib/libpuffs/puffs.h:1.122 src/lib/libpuffs/puffs.h:1.123
--- src/lib/libpuffs/puffs.h:1.122	Wed Jun 27 13:25:23 2012
+++ src/lib/libpuffs/puffs.h	Sat Jul 21 05:17:10 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: puffs.h,v 1.122 2012/06/27 13:25:23 manu Exp $	*/
+/*	$NetBSD: puffs.h,v 1.123 2012/07/21 05:17:10 manu Exp $	*/
 
 /*
  * Copyright (c) 2005, 2006, 2007  Antti Kantee.  All Rights Reserved.
@@ -79,6 +79,7 @@ struct puffs_kcache {
 struct puffs_node {
 	off_t			pn_size;
 	int			pn_flags;
+	int			pn_nlookup;
 	struct vattr		pn_va;
 
 	void			*pn_data;	/* private data		*/

Index: src/sys/fs/puffs/puffs_msgif.c
diff -u src/sys/fs/puffs/puffs_msgif.c:1.89 src/sys/fs/puffs/puffs_msgif.c:1.90
--- src/sys/fs/puffs/puffs_msgif.c:1.89	Wed Oct 19 01:39:29 2011
+++ src/sys/fs/puffs/puffs_msgif.c	Sat Jul 21 05:17:10 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: puffs_msgif.c,v 1.89 2011/10/19 01:39:29 manu Exp $	*/
+/*	$NetBSD: puffs_msgif.c,v 1.90 2012/07/21 05:17:10 manu Exp $	*/
 
 /*
  * Copyright (c) 2005, 2006, 2007  Antti Kantee.  All Rights Reserved.
@@ -30,9 +30,10 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: puffs_msgif.c,v 1.89 2011/10/19 01:39:29 manu Exp $");
+__KERNEL_RCSID(0, "$NetBSD: puffs_msgif.c,v 1.90 2012/07/21 05:17:10 manu Exp $");
 
 #include <sys/param.h>
+#include <sys/kernel.h>
 #include <sys/atomic.h>
 #include <sys/kmem.h>
 #include <sys/kthread.h>
@@ -94,6 +95,8 @@ static pool_cache_t parkpc;
 static int totalpark;
 #endif
 
+int puffs_sopreq_expire_timeout = PUFFS_SOPREQ_EXPIRE_TIMEOUT;
+
 static int
 makepark(void *arg, void *obj, int flags)
 {
@@ -839,6 +842,30 @@ puffsop_msg(void *this, struct puffs_req
 	puffs_msgpark_release1(park, 2);
 }
 
+/*
+ * Node expiry. We come here after an inactive on an unexpired node.
+ * The expiry has been queued and is done in sop thread.
+ */
+static bool
+puffsop_expire(struct puffs_mount *pmp, puffs_cookie_t cookie)
+{
+	struct vnode *vp;
+
+	KASSERT(PUFFS_USE_FS_TTL(pmp));
+
+	/* 
+	 * If it still exists and has no reference,
+	 * vrele should cause it to be reclaimed.
+	 * Otherwise, we have nothing to do.
+	 */
+	if (puffs_cookie2vnode(pmp, cookie, 0, 0, &vp) == 0) {
+		VPTOPP(vp)->pn_stat &= ~PNODE_SOPEXP;
+		vrele(vp); 
+	}
+
+	return false;
+}
+
 static void
 puffsop_flush(struct puffs_mount *pmp, struct puffs_flush *pf)
 {
@@ -959,6 +986,7 @@ puffs_msgif_dispatch(void *this, struct 
 		psopr = kmem_alloc(sizeof(*psopr), KM_SLEEP);
 		memcpy(&psopr->psopr_pf, pf, sizeof(*pf));
 		psopr->psopr_sopreq = PUFFS_SOPREQ_FLUSH;
+		psopr->psopr_at = hardclock_ticks;
 
 		mutex_enter(&pmp->pmp_sopmtx);
 		if (pmp->pmp_sopthrcount == 0) {
@@ -966,7 +994,7 @@ puffs_msgif_dispatch(void *this, struct 
 			kmem_free(psopr, sizeof(*psopr));
 			puffs_msg_sendresp(pmp, preq, ENXIO);
 		} else {
-			TAILQ_INSERT_TAIL(&pmp->pmp_sopreqs,
+			TAILQ_INSERT_TAIL(&pmp->pmp_sopfastreqs,
 			    psopr, psopr_entries);
 			cv_signal(&pmp->pmp_sopcv);
 			mutex_exit(&pmp->pmp_sopmtx);
@@ -983,6 +1011,7 @@ puffs_msgif_dispatch(void *this, struct 
 		psopr = kmem_alloc(sizeof(*psopr), KM_SLEEP);
 		psopr->psopr_preq = *preq;
 		psopr->psopr_sopreq = PUFFS_SOPREQ_UNMOUNT;
+		psopr->psopr_at = hardclock_ticks;
 
 		mutex_enter(&pmp->pmp_sopmtx);
 		if (pmp->pmp_sopthrcount == 0) {
@@ -990,7 +1019,7 @@ puffs_msgif_dispatch(void *this, struct 
 			kmem_free(psopr, sizeof(*psopr));
 			puffs_msg_sendresp(pmp, preq, ENXIO);
 		} else {
-			TAILQ_INSERT_TAIL(&pmp->pmp_sopreqs,
+			TAILQ_INSERT_TAIL(&pmp->pmp_sopfastreqs,
 			    psopr, psopr_entries);
 			cv_signal(&pmp->pmp_sopcv);
 			mutex_exit(&pmp->pmp_sopmtx);
@@ -1014,6 +1043,8 @@ puffs_msgif_dispatch(void *this, struct 
  * server, i.e. a long-term kernel lock which will be released only
  * once the file server acknowledges a request
  */
+#define TIMED_OUT(expire) \
+    ((int)((unsigned int)hardclock_ticks - (unsigned int)expire) > 0)
 void
 puffs_sop_thread(void *arg)
 {
@@ -1022,12 +1053,36 @@ puffs_sop_thread(void *arg)
 	struct puffs_sopreq *psopr;
 	bool keeprunning;
 	bool unmountme = false;
+	int timeo;
+
+	timeo = PUFFS_USE_FS_TTL(pmp) ? puffs_sopreq_expire_timeout : 0;
 
 	mutex_enter(&pmp->pmp_sopmtx);
 	for (keeprunning = true; keeprunning; ) {
-		while ((psopr = TAILQ_FIRST(&pmp->pmp_sopreqs)) == NULL)
-			cv_wait(&pmp->pmp_sopcv, &pmp->pmp_sopmtx);
-		TAILQ_REMOVE(&pmp->pmp_sopreqs, psopr, psopr_entries);
+		/*
+		 * We have a higher priority queue for flush and umount
+		 * and a lower priority queue for reclaims. Request are
+		 * not honoured before clock reaches psopr_at. This code
+		 * assumes that requests are ordered by psopr_at in queues.
+		 */
+		do {
+			psopr = TAILQ_FIRST(&pmp->pmp_sopfastreqs);
+			if ((psopr != NULL) && TIMED_OUT(psopr->psopr_at)) {
+				TAILQ_REMOVE(&pmp->pmp_sopfastreqs,
+					     psopr, psopr_entries);
+				break;
+			}
+
+			psopr = TAILQ_FIRST(&pmp->pmp_sopslowreqs);
+			if ((psopr != NULL) && TIMED_OUT(psopr->psopr_at)) {
+				TAILQ_REMOVE(&pmp->pmp_sopslowreqs,
+					     psopr, psopr_entries);
+				break;
+			}
+
+			cv_timedwait(&pmp->pmp_sopcv, &pmp->pmp_sopmtx, timeo);
+		} while (1 /* CONSTCOND */);
+
 		mutex_exit(&pmp->pmp_sopmtx);
 
 		switch (psopr->psopr_sopreq) {
@@ -1037,6 +1092,9 @@ puffs_sop_thread(void *arg)
 		case PUFFS_SOPREQ_FLUSH:
 			puffsop_flush(pmp, &psopr->psopr_pf);
 			break;
+		case PUFFS_SOPREQ_EXPIRE:
+			puffsop_expire(pmp, psopr->psopr_ck);
+			break;
 		case PUFFS_SOPREQ_UNMOUNT:
 			puffs_msg_sendresp(pmp, &psopr->psopr_preq, 0);
 
@@ -1058,8 +1116,16 @@ puffs_sop_thread(void *arg)
 	/*
 	 * Purge remaining ops.
 	 */
-	while ((psopr = TAILQ_FIRST(&pmp->pmp_sopreqs)) != NULL) {
-		TAILQ_REMOVE(&pmp->pmp_sopreqs, psopr, psopr_entries);
+	while ((psopr = TAILQ_FIRST(&pmp->pmp_sopfastreqs)) != NULL) {
+		TAILQ_REMOVE(&pmp->pmp_sopfastreqs, psopr, psopr_entries);
+		mutex_exit(&pmp->pmp_sopmtx);
+		puffs_msg_sendresp(pmp, &psopr->psopr_preq, ENXIO);
+		kmem_free(psopr, sizeof(*psopr));
+		mutex_enter(&pmp->pmp_sopmtx);
+	}
+
+	while ((psopr = TAILQ_FIRST(&pmp->pmp_sopslowreqs)) != NULL) {
+		TAILQ_REMOVE(&pmp->pmp_sopslowreqs, psopr, psopr_entries);
 		mutex_exit(&pmp->pmp_sopmtx);
 		puffs_msg_sendresp(pmp, &psopr->psopr_preq, ENXIO);
 		kmem_free(psopr, sizeof(*psopr));

Index: src/sys/fs/puffs/puffs_msgif.h
diff -u src/sys/fs/puffs/puffs_msgif.h:1.78 src/sys/fs/puffs/puffs_msgif.h:1.79
--- src/sys/fs/puffs/puffs_msgif.h:1.78	Sun Apr  8 15:04:41 2012
+++ src/sys/fs/puffs/puffs_msgif.h	Sat Jul 21 05:17:10 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: puffs_msgif.h,v 1.78 2012/04/08 15:04:41 manu Exp $	*/
+/*	$NetBSD: puffs_msgif.h,v 1.79 2012/07/21 05:17:10 manu Exp $	*/
 
 /*
  * Copyright (c) 2005, 2006, 2007  Antti Kantee.  All Rights Reserved.
@@ -567,6 +567,8 @@ struct puffs_vnmsg_readlink {
 
 struct puffs_vnmsg_reclaim {
 	struct puffs_req	pvn_pr;
+
+	int			pvnr_nlookup;		/* OUT */
 };
 
 struct puffs_vnmsg_inactive {

Index: src/sys/fs/puffs/puffs_sys.h
diff -u src/sys/fs/puffs/puffs_sys.h:1.79 src/sys/fs/puffs/puffs_sys.h:1.80
--- src/sys/fs/puffs/puffs_sys.h:1.79	Sun Apr  8 15:04:41 2012
+++ src/sys/fs/puffs/puffs_sys.h	Sat Jul 21 05:17:10 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: puffs_sys.h,v 1.79 2012/04/08 15:04:41 manu Exp $	*/
+/*	$NetBSD: puffs_sys.h,v 1.80 2012/07/21 05:17:10 manu Exp $	*/
 
 /*
  * Copyright (c) 2005, 2006  Antti Kantee.  All Rights Reserved.
@@ -101,23 +101,30 @@ struct puffs_newcookie {
 	LIST_ENTRY(puffs_newcookie) pnc_entries;
 };
 
+#define PUFFS_SOPREQ_EXPIRE_TIMEOUT 1000
+extern int puffs_sopreq_expire_timeout;
+
 enum puffs_sopreqtype {
 	PUFFS_SOPREQSYS_EXIT,
 	PUFFS_SOPREQ_FLUSH,
 	PUFFS_SOPREQ_UNMOUNT,
+	PUFFS_SOPREQ_EXPIRE,
 };
 
 struct puffs_sopreq {
 	union {
 		struct puffs_req preq;
 		struct puffs_flush pf;
+		puffs_cookie_t ck;
 	} psopr_u;
 
 	enum puffs_sopreqtype psopr_sopreq;
 	TAILQ_ENTRY(puffs_sopreq) psopr_entries;
+	int psopr_at;
 };
 #define psopr_preq psopr_u.preq
 #define psopr_pf psopr_u.pf
+#define psopr_ck psopr_u.ck
 
 TAILQ_HEAD(puffs_wq, puffs_msgpark);
 LIST_HEAD(puffs_node_hashlist, puffs_node);
@@ -168,7 +175,8 @@ struct puffs_mount {
 	kmutex_t			pmp_sopmtx;
 	kcondvar_t			pmp_sopcv;
 	int				pmp_sopthrcount;
-	TAILQ_HEAD(, puffs_sopreq)	pmp_sopreqs;
+	TAILQ_HEAD(, puffs_sopreq)	pmp_sopfastreqs;
+	TAILQ_HEAD(, puffs_sopreq)	pmp_sopslowreqs;
 	bool				pmp_docompat;
 };
 
@@ -178,10 +186,11 @@ struct puffs_mount {
 #define PUFFSTAT_DYING		3 /* Do you want your possessions identified? */
 
 
-#define PNODE_NOREFS	0x01	/* no backend reference			*/
-#define PNODE_DYING	0x02	/* NOREFS + inactive			*/
-#define PNODE_FAF	0x04	/* issue all operations as FAF		*/
-#define PNODE_DOINACT	0x08	/* if inactive-on-demand, call inactive */
+#define PNODE_NOREFS	0x001	/* no backend reference			*/
+#define PNODE_DYING	0x002	/* NOREFS + inactive			*/
+#define PNODE_FAF	0x004	/* issue all operations as FAF		*/
+#define PNODE_DOINACT 	0x008	/* if inactive-on-demand, call inactive */
+#define PNODE_SOPEXP	0x100	/* Node reclaim postponed in sop thread	*/
 
 #define PNODE_METACACHE_ATIME	0x10	/* cache atime metadata */
 #define PNODE_METACACHE_CTIME	0x20	/* cache atime metadata */
@@ -194,6 +203,7 @@ struct puffs_node {
 
 	kmutex_t	pn_mtx;
 	int		pn_refcount;
+	int		pn_nlookup;
 
 	puffs_cookie_t	pn_cookie;	/* userspace pnode cookie	*/
 	struct vnode	*pn_vp;		/* backpointer to vnode		*/
@@ -215,6 +225,7 @@ struct puffs_node {
 	kmutex_t	pn_sizemtx;	/* size modification mutex	*/
 	
 	int		pn_cn_timeout;	/* path cache */
+	int		pn_cn_grace;	/* grace time before reclaim */
 	int		pn_va_timeout;	/* attribute cache */
 	struct vattr *	pn_va_cache;	/* attribute cache */
 

Index: src/sys/fs/puffs/puffs_vfsops.c
diff -u src/sys/fs/puffs/puffs_vfsops.c:1.101 src/sys/fs/puffs/puffs_vfsops.c:1.102
--- src/sys/fs/puffs/puffs_vfsops.c:1.101	Sun Apr  8 15:04:41 2012
+++ src/sys/fs/puffs/puffs_vfsops.c	Sat Jul 21 05:17:11 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: puffs_vfsops.c,v 1.101 2012/04/08 15:04:41 manu Exp $	*/
+/*	$NetBSD: puffs_vfsops.c,v 1.102 2012/07/21 05:17:11 manu Exp $	*/
 
 /*
  * Copyright (c) 2005, 2006  Antti Kantee.  All Rights Reserved.
@@ -30,9 +30,10 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: puffs_vfsops.c,v 1.101 2012/04/08 15:04:41 manu Exp $");
+__KERNEL_RCSID(0, "$NetBSD: puffs_vfsops.c,v 1.102 2012/07/21 05:17:11 manu Exp $");
 
 #include <sys/param.h>
+#include <sys/kernel.h>
 #include <sys/mount.h>
 #include <sys/malloc.h>
 #include <sys/extattr.h>
@@ -306,7 +307,8 @@ puffs_vfsop_mount(struct mount *mp, cons
 	cv_init(&pmp->pmp_sopcv, "puffsop");
 	TAILQ_INIT(&pmp->pmp_msg_touser);
 	TAILQ_INIT(&pmp->pmp_msg_replywait);
-	TAILQ_INIT(&pmp->pmp_sopreqs);
+	TAILQ_INIT(&pmp->pmp_sopfastreqs);
+	TAILQ_INIT(&pmp->pmp_sopslowreqs);
 
 	if ((error = kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
 	    puffs_sop_thread, pmp, NULL, "puffsop")) != 0)
@@ -420,6 +422,7 @@ puffs_vfsop_unmount(struct mount *mp, in
 		KASSERT(curlwp != uvm.pagedaemon_lwp);
 		psopr = kmem_alloc(sizeof(*psopr), KM_SLEEP);
 		psopr->psopr_sopreq = PUFFS_SOPREQSYS_EXIT;
+		psopr->psopr_at = hardclock_ticks;
 		mutex_enter(&pmp->pmp_sopmtx);
 		if (pmp->pmp_sopthrcount == 0) {
 			mutex_exit(&pmp->pmp_sopmtx);
@@ -427,7 +430,7 @@ puffs_vfsop_unmount(struct mount *mp, in
 			mutex_enter(&pmp->pmp_sopmtx);
 			KASSERT(pmp->pmp_sopthrcount == 0);
 		} else {
-			TAILQ_INSERT_TAIL(&pmp->pmp_sopreqs,
+			TAILQ_INSERT_TAIL(&pmp->pmp_sopfastreqs,
 			    psopr, psopr_entries);
 			cv_signal(&pmp->pmp_sopcv);
 		}

Index: src/sys/fs/puffs/puffs_vnops.c
diff -u src/sys/fs/puffs/puffs_vnops.c:1.166 src/sys/fs/puffs/puffs_vnops.c:1.167
--- src/sys/fs/puffs/puffs_vnops.c:1.166	Wed Apr 18 00:42:50 2012
+++ src/sys/fs/puffs/puffs_vnops.c	Sat Jul 21 05:17:11 2012
@@ -1,4 +1,4 @@
-/*	$NetBSD: puffs_vnops.c,v 1.166 2012/04/18 00:42:50 manu Exp $	*/
+/*	$NetBSD: puffs_vnops.c,v 1.167 2012/07/21 05:17:11 manu Exp $	*/
 
 /*
  * Copyright (c) 2005, 2006, 2007  Antti Kantee.  All Rights Reserved.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: puffs_vnops.c,v 1.166 2012/04/18 00:42:50 manu Exp $");
+__KERNEL_RCSID(0, "$NetBSD: puffs_vnops.c,v 1.167 2012/07/21 05:17:11 manu Exp $");
 
 #include <sys/param.h>
 #include <sys/buf.h>
@@ -414,7 +414,7 @@ static int callremove(struct puffs_mount
 static int callrmdir(struct puffs_mount *, puffs_cookie_t, puffs_cookie_t,
 			   struct componentname *);
 static void callinactive(struct puffs_mount *, puffs_cookie_t, int);
-static void callreclaim(struct puffs_mount *, puffs_cookie_t);
+static void callreclaim(struct puffs_mount *, puffs_cookie_t, int);
 static int  flushvncache(struct vnode *, off_t, off_t, bool);
 static void update_va(struct vnode *, struct vattr *, struct vattr *,
 		      struct timespec *, struct timespec *, int);
@@ -446,7 +446,7 @@ puffs_abortbutton(struct puffs_mount *pm
 	}
 
 	callinactive(pmp, ck, 0);
-	callreclaim(pmp, ck);
+	callreclaim(pmp, ck, 0);
 }
 
 /*
@@ -507,16 +507,15 @@ puffs_vnop_lookup(void *v)
 	/*
 	 * Check if someone fed it into the cache
 	 */
-	if (PUFFS_USE_NAMECACHE(pmp)) {
+	if (!isdot && PUFFS_USE_NAMECACHE(pmp)) {
 		error = cache_lookup(dvp, ap->a_vpp, cnp);
 
 		if ((error == 0) && PUFFS_USE_FS_TTL(pmp)) {
-
 			cvp = *ap->a_vpp;
 			cpn = VPTOPP(cvp);
-			if (TIMED_OUT(cpn->pn_cn_timeout)) {
-				cache_purge1(cvp, NULL, PURGE_CHILDREN);
 
+			if (TIMED_OUT(cpn->pn_cn_timeout)) {
+				cache_purge(cvp);
 				/*
 				 * cached vnode (cvp) is still locked
 				 * so that we can reuse it upon a new
@@ -534,7 +533,7 @@ puffs_vnop_lookup(void *v)
 		if ((error == ENOENT) && PUFFS_USE_FS_TTL(pmp))
 			error = -1;
 
-		if (error >= 0) 
+		if (error >= 0)
 			return error;
 	}
 
@@ -543,9 +542,6 @@ puffs_vnop_lookup(void *v)
 		if (cnp->cn_nameiop == RENAME && (cnp->cn_flags & ISLASTCN))
 			return EISDIR;
 
-		if (cvp != NULL) 
-			vput(cvp);
-
 		vp = ap->a_dvp;
 		vref(vp);
 		*ap->a_vpp = vp;
@@ -612,31 +608,47 @@ puffs_vnop_lookup(void *v)
 	}
 
 	/*
-	 * On successfull relookup, do not create a new node.
+	 * Check if we looked up the cached vnode
 	 */
-	if (cvp != NULL) {
+	vp = NULL;
+	if (cvp && (VPTOPP(cvp)->pn_cookie == lookup_msg->pvnr_newnode)) {
+		int grace;
+
+		/*
+		 * Bump grace time of this node so that it does not get 
+		 * reclaimed too fast. We try to increase a bit more the
+		 * lifetime of busiest * nodes - with some limits.
+		 */
+		grace = 10 * puffs_sopreq_expire_timeout;
+		cpn->pn_cn_grace = hardclock_ticks + grace;
 		vp = cvp;
-	} else {
+	}
+
+	/*
+	 * No cached vnode available, or the cached vnode does not
+	 * match the userland cookie anymore: is the node known?
+	 */
+	if (vp == NULL) {
 		error = puffs_cookie2vnode(pmp, lookup_msg->pvnr_newnode,
 					   1, 1, &vp);
+	}
 
-		if (error == PUFFS_NOSUCHCOOKIE) {
-			error = puffs_getvnode(dvp->v_mount,
-			    lookup_msg->pvnr_newnode, lookup_msg->pvnr_vtype,
-			    lookup_msg->pvnr_size, lookup_msg->pvnr_rdev, &vp);
-			if (error) {
-				puffs_abortbutton(pmp, PUFFS_ABORT_LOOKUP,
-				    VPTOPNC(dvp), lookup_msg->pvnr_newnode,
-				    ap->a_cnp);
-				goto out;
-			}
-
-			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
-		} else if (error) {
-			puffs_abortbutton(pmp, PUFFS_ABORT_LOOKUP, VPTOPNC(dvp),
-			    lookup_msg->pvnr_newnode, ap->a_cnp);
+	if (error == PUFFS_NOSUCHCOOKIE) {
+		error = puffs_getvnode(dvp->v_mount,
+		    lookup_msg->pvnr_newnode, lookup_msg->pvnr_vtype,
+		    lookup_msg->pvnr_size, lookup_msg->pvnr_rdev, &vp);
+		if (error) {
+			puffs_abortbutton(pmp, PUFFS_ABORT_LOOKUP,
+			    VPTOPNC(dvp), lookup_msg->pvnr_newnode,
+			    ap->a_cnp);
 			goto out;
 		}
+
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+	} else if (error) {
+		puffs_abortbutton(pmp, PUFFS_ABORT_LOOKUP, VPTOPNC(dvp),
+		    lookup_msg->pvnr_newnode, ap->a_cnp);
+		goto out;
 	}
 
 	/*
@@ -649,6 +661,7 @@ puffs_vnop_lookup(void *v)
 			  va_ttl, cn_ttl, SETATTR_CHSIZE);
 	}
 
+	KASSERT(lookup_msg->pvnr_newnode == VPTOPP(vp)->pn_cookie);
 	*ap->a_vpp = vp;
 
 	if ((cnp->cn_flags & MAKEENTRY) != 0 && PUFFS_USE_NAMECACHE(pmp))
@@ -661,14 +674,12 @@ puffs_vnop_lookup(void *v)
 		cnp->cn_consume = MIN(lookup_msg->pvnr_cn.pkcn_consume,
 		    strlen(cnp->cn_nameptr) - cnp->cn_namelen);
 
+	VPTOPP(vp)->pn_nlookup++;
  out:
 	if (cvp != NULL) {
 		mutex_exit(&cpn->pn_sizemtx);
-	 	/*
-		 * We had a cached vnode but new lookup failed, 	
-		 * unlock it and let it die now.
-		 */
-		if (error != 0)
+
+		if (error || (cvp != vp))
 			vput(cvp);
 	}
 
@@ -928,8 +939,10 @@ update_va(struct vnode *vp, struct vattr
 {
 	struct puffs_node *pn = VPTOPP(vp);
 
-	if (TTL_VALID(cn_ttl))
+	if (TTL_VALID(cn_ttl)) {
 		pn->pn_cn_timeout = TTL_TO_TIMEOUT(cn_ttl);
+		pn->pn_cn_grace = MAX(pn->pn_cn_timeout, pn->pn_cn_grace);
+	}
 
 	/*
 	 * Don't listen to the file server regarding special device
@@ -1198,6 +1211,7 @@ puffs_vnop_inactive(void *v)
 	struct vnode *vp = ap->a_vp;
 	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
 	struct puffs_node *pnode;
+	bool recycle = false;
 	int error;
 
 	pnode = vp->v_data;
@@ -1221,9 +1235,62 @@ puffs_vnop_inactive(void *v)
 	 */
 	if (pnode->pn_stat & PNODE_NOREFS) {
 		pnode->pn_stat |= PNODE_DYING;
-		*ap->a_recycle = true;
+		recycle = true;
+	}
+
+	/*
+	 * Handle node TTL. 
+	 * If grace has already timed out, make it reclaimed.
+	 * Otherwise, we queue its expiration by sop thread, so
+	 * that it does not remain for ages in the freelist, 
+	 * holding memory in userspace, while we will have 
+	 * to look it up again anyway.
+	 */ 
+	if (PUFFS_USE_FS_TTL(pmp) && !(vp->v_vflag & VV_ROOT) && !recycle) {
+		bool incache = !TIMED_OUT(pnode->pn_cn_timeout);
+		bool ingrace = !TIMED_OUT(pnode->pn_cn_grace);
+		bool reclaimqueued = pnode->pn_stat & PNODE_SOPEXP;
+
+		if (!incache && !ingrace && !reclaimqueued) {
+			pnode->pn_stat |= PNODE_DYING;
+			recycle = true;
+		}
+
+		if (!recycle && !reclaimqueued) {
+			struct puffs_sopreq *psopr;
+			int at = MAX(pnode->pn_cn_grace, pnode->pn_cn_timeout);
+
+			KASSERT(curlwp != uvm.pagedaemon_lwp);
+			psopr = kmem_alloc(sizeof(*psopr), KM_SLEEP);
+			psopr->psopr_ck = VPTOPNC(pnode->pn_vp);
+			psopr->psopr_sopreq = PUFFS_SOPREQ_EXPIRE;
+			psopr->psopr_at = at;
+
+			mutex_enter(&pmp->pmp_sopmtx);
+
+			/*
+			 * If thread has disapeared, just give up. The
+			 * fs is being unmounted and the node will be 
+			 * be reclaimed anyway.
+			 *
+			 * Otherwise, we queue the request but do not
+			 * immediatly signal the thread, as the node
+			 * has not been expired yet.
+			 */
+			if (pmp->pmp_sopthrcount == 0) {
+				kmem_free(psopr, sizeof(*psopr));
+			} else {
+				TAILQ_INSERT_TAIL(&pmp->pmp_sopslowreqs,
+				    psopr, psopr_entries); 
+				pnode->pn_stat |= PNODE_SOPEXP;
+			}
+
+			mutex_exit(&pmp->pmp_sopmtx);
+		}
 	}
 
+	*ap->a_recycle = recycle;
+
 	mutex_exit(&pnode->pn_sizemtx);
 	VOP_UNLOCK(vp);
 
@@ -1231,7 +1298,7 @@ puffs_vnop_inactive(void *v)
 }
 
 static void
-callreclaim(struct puffs_mount *pmp, puffs_cookie_t ck)
+callreclaim(struct puffs_mount *pmp, puffs_cookie_t ck, int nlookup)
 {
 	PUFFS_MSG_VARS(vn, reclaim);
 
@@ -1239,11 +1306,13 @@ callreclaim(struct puffs_mount *pmp, puf
 		return;
 
 	PUFFS_MSG_ALLOC(vn, reclaim);
+	reclaim_msg->pvnr_nlookup = nlookup;
 	puffs_msg_setfaf(park_reclaim);
 	puffs_msg_setinfo(park_reclaim, PUFFSOP_VN, PUFFS_VN_RECLAIM, ck);
 
 	puffs_msg_enqueue(pmp, park_reclaim);
 	PUFFS_MSG_RELEASE(reclaim);
+	return;
 }
 
 /*
@@ -1283,10 +1352,15 @@ puffs_vnop_reclaim(void *v)
 	 */
 	mutex_enter(&pmp->pmp_lock);
 	LIST_REMOVE(pnode, pn_hashent);
+	if (PUFFS_USE_NAMECACHE(pmp))
+		cache_purge(vp);
 	mutex_exit(&pmp->pmp_lock);
 
-	if (notifyserver)
-		callreclaim(MPTOPUFFSMP(vp->v_mount), VPTOPNC(vp));
+	if (notifyserver) {
+		int nlookup = VPTOPP(vp)->pn_nlookup;
+
+		callreclaim(MPTOPUFFSMP(vp->v_mount), VPTOPNC(vp), nlookup);
+	}
 
 	puffs_putvnode(vp);
 	vp->v_data = NULL;

Reply via email to