Ok let's try this again:
>
> I could use some assistance in testing this, particularly on some of
> the more odd archetectures.
>
> This diff makes a bunch of changes to the vfs name cache:
>
> 1) it gets rid of the global hash table and reverse hash table for namecahe
> entries. namecache entries
> are now allocated and freed in two global LRU's - one for regular, and one
> for negative entries.
>
> 2) Entries are no longer searched in the global lists, instead they are kept
> track of in the relevant
> vnodes. Since each vnode can be either the parent (directory) vnode of a
> namecache entry, or the target of
> the entry, we keep track of it in both ways in the vnode. We now use a rb
> tree to search the namecache from
> a directory vnode, and keep a list of which entries that we are the target
> vnode.
>
> 3) (most importantly) namecache entries can now be allocated and freed.
>
> 4) cache_purge now actually does something rather than depending on vnode
> horror to work. when recycling a vnode
> cache_purge will now correctly clear the name cache entries associated with
> the vnode. (before it basically
> didn't do anything, and depended on us noticing the vnodes were being
> recycled underneath us)
>
> This has been beat on a bunch, and appears not to slow anything down. I
> do require some testing
> and reports on other arch's particularly the likes of sparc, vax, and strange
> things if possible
>
> Thanks,
> -Bob
I had this ready to go after c2k9, but with the instability in the
tree I never committed it. I'd like some people to beat on it again please.
This is a first step toward making vnodes something more generic and
sane where hopefully we can allocate and deallocate them without major
pain. I really need testing of this. - now updated to apply again.
-Bob
Index: kern/vfs_cache.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_cache.c,v
retrieving revision 1.30
diff -u -r1.30 vfs_cache.c
--- kern/vfs_cache.c 9 Jul 2009 22:29:56 -0000 1.30
+++ kern/vfs_cache.c 9 Aug 2009 18:30:12 -0000
@@ -68,26 +68,59 @@
/*
* Structures associated with name caching.
*/
-LIST_HEAD(nchashhead, namecache) *nchashtbl;
u_long nchash; /* size of hash table - 1 */
-long numcache; /* number of cache entries allocated */
-TAILQ_HEAD(, namecache) nclruhead; /* LRU chain */
-struct nchstats nchstats; /* cache effectiveness statistics */
-
-LIST_HEAD(ncvhashhead, namecache) *ncvhashtbl;
-u_long ncvhash; /* size of hash table - 1 */
-
-#define NCHASH(dvp, cnp) \
- hash32_buf(&(dvp)->v_id, sizeof((dvp)->v_id), (cnp)->cn_hash) & nchash
+long numcache; /* total number of cache entries
allocated */
+long numneg; /* number of negative cache entries */
-#define NCVHASH(vp) (vp)->v_id & ncvhash
+TAILQ_HEAD(, namecache) nclruhead; /* Regular Entry LRU chain */
+TAILQ_HEAD(, namecache) nclruneghead; /* Negative Entry LRU chain */
+struct nchstats nchstats; /* cache effectiveness statistics */
int doingcache = 1; /* 1 => enable the cache */
struct pool nch_pool;
+void cache_zap(struct namecache *);
u_long nextvnodeid;
+static int
+namecache_compare(struct namecache *n1, struct namecache *n2)
+{
+ if (n1->nc_nlen == n2->nc_nlen)
+ return (memcmp(n1->nc_name, n2->nc_name, n1->nc_nlen));
+ else
+ return (n1->nc_nlen - n2->nc_nlen);
+}
+
+RB_GENERATE(namecache_rb_cache, namecache, n_rbcache, namecache_compare);
+
+/*
+ * blow away a namecache entry
+ */
+void
+cache_zap(struct namecache *ncp)
+{
+ struct vnode *dvp = NULL;
+
+ if (ncp->nc_vp != NULL) {
+ TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
+ numcache--;
+ } else {
+ TAILQ_REMOVE(&nclruneghead, ncp, nc_neg);
+ numneg--;
+ }
+ if (ncp->nc_dvp) {
+ RB_REMOVE(namecache_rb_cache, &ncp->nc_dvp->v_nc_tree, ncp);
+ if (RB_EMPTY(&ncp->nc_dvp->v_nc_tree))
+ dvp = ncp->nc_dvp;
+ }
+ if (ncp->nc_vp)
+ TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_me);
+ pool_put(&nch_pool, ncp);
+ if (dvp)
+ vdrop(dvp);
+}
+
/*
* Look for a name in the cache. We don't do this if the segment name is
* long, simply so the cache can avoid holding long names (which would
@@ -107,7 +140,7 @@
cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp)
{
struct namecache *ncp;
- struct nchashhead *ncpp;
+ struct namecache n;
struct vnode *vp;
struct proc *p = curproc;
u_long vpid;
@@ -125,14 +158,11 @@
return (-1);
}
- ncpp = &nchashtbl[NCHASH(dvp, cnp)];
- LIST_FOREACH(ncp, ncpp, nc_hash) {
- if (ncp->nc_dvp == dvp &&
- ncp->nc_dvpid == dvp->v_id &&
- ncp->nc_nlen == cnp->cn_namelen &&
- !memcmp(ncp->nc_name, cnp->cn_nameptr, (u_int)ncp->nc_nlen))
- break;
- }
+ /* lookup in directory vnode's redblack tree */
+ n.nc_nlen = cnp->cn_namelen;
+ memcpy(n.nc_name, cnp->cn_nameptr, n.nc_nlen);
+ ncp = RB_FIND(namecache_rb_cache, &dvp->v_nc_tree, &n);
+
if (ncp == NULL) {
nchstats.ncs_miss++;
return (-1);
@@ -145,13 +175,10 @@
(cnp->cn_flags & ISLASTCN) == 0) {
nchstats.ncs_neghits++;
/*
- * Move this slot to end of LRU chain,
- * if not already there.
+ * Move this slot to end of the negative LRU chain,
*/
- if (TAILQ_NEXT(ncp, nc_lru) != NULL) {
- TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
- TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
- }
+ TAILQ_REMOVE(&nclruneghead, ncp, nc_neg);
+ TAILQ_INSERT_TAIL(&nclruneghead, ncp, nc_neg);
return (ENOENT);
} else {
nchstats.ncs_badhits++;
@@ -220,12 +247,10 @@
nchstats.ncs_goodhits++;
/*
- * Move this slot to end of LRU chain, if not already there.
+ * Move this slot to end of the regular LRU chain.
*/
- if (TAILQ_NEXT(ncp, nc_lru) != NULL) {
- TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
- TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
- }
+ TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
+ TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
*vpp = vp;
return (0);
@@ -235,16 +260,7 @@
* the cache entry is invalid, or otherwise don't
* want cache entry to exist.
*/
- TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
- LIST_REMOVE(ncp, nc_hash);
- ncp->nc_hash.le_prev = NULL;
-
- if (ncp->nc_vhash.le_prev != NULL) {
- LIST_REMOVE(ncp, nc_vhash);
- ncp->nc_vhash.le_prev = NULL;
- }
-
- TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);
+ cache_zap(ncp);
return (-1);
}
@@ -267,62 +283,51 @@
cache_revlookup(struct vnode *vp, struct vnode **dvpp, char **bpp, char *bufp)
{
struct namecache *ncp;
- struct vnode *dvp;
- struct ncvhashhead *nvcpp;
+ struct vnode *dvp = NULL;
char *bp;
if (!doingcache)
goto out;
-
- nvcpp = &ncvhashtbl[NCVHASH(vp)];
-
- LIST_FOREACH(ncp, nvcpp, nc_vhash) {
- if (ncp->nc_vp == vp &&
- ncp->nc_vpid == vp->v_id &&
- (dvp = ncp->nc_dvp) != NULL &&
- /* avoid pesky '.' entries.. */
- dvp != vp && ncp->nc_dvpid == dvp->v_id) {
-
+ TAILQ_FOREACH(ncp, &vp->v_cache_dst, nc_me) {
+ dvp = ncp->nc_dvp;
+ if (dvp && dvp != vp)
+ goto found;
+ }
+ goto miss;
+found:
#ifdef DIAGNOSTIC
- if (ncp->nc_nlen == 1 &&
- ncp->nc_name[0] == '.')
- panic("cache_revlookup: found entry for .");
-
- if (ncp->nc_nlen == 2 &&
- ncp->nc_name[0] == '.' &&
- ncp->nc_name[1] == '.')
- panic("cache_revlookup: found entry for ..");
+ if (ncp->nc_nlen == 1 &&
+ ncp->nc_name[0] == '.')
+ panic("cache_revlookup: found entry for .");
+ if (ncp->nc_nlen == 2 &&
+ ncp->nc_name[0] == '.' &&
+ ncp->nc_name[1] == '.')
+ panic("cache_revlookup: found entry for ..");
#endif
- nchstats.ncs_revhits++;
-
- if (bufp != NULL) {
- bp = *bpp;
- bp -= ncp->nc_nlen;
- if (bp <= bufp) {
- *dvpp = NULL;
- return (ERANGE);
- }
- memcpy(bp, ncp->nc_name, ncp->nc_nlen);
- *bpp = bp;
- }
+ nchstats.ncs_revhits++;
- *dvpp = dvp;
-
- /*
- * XXX: Should we vget() here to have more
- * consistent semantics with cache_lookup()?
- *
- * For MP safety it might be necessary to do
- * this here, while also protecting hash
- * tables themselves to provide some sort of
- * sane inter locking.
- */
- return (0);
+ if (bufp != NULL) {
+ bp = *bpp;
+ bp -= ncp->nc_nlen;
+ if (bp <= bufp) {
+ *dvpp = NULL;
+ return (ERANGE);
}
+ memcpy(bp, ncp->nc_name, ncp->nc_nlen);
+ *bpp = bp;
}
- nchstats.ncs_revmiss++;
- out:
+ *dvpp = dvp;
+
+ /*
+ * XXX: Should we vget() here to have more
+ * consistent semantics with cache_lookup()?
+ */
+ return (0);
+
+miss:
+ nchstats.ncs_revmiss++;
+out:
*dvpp = NULL;
return (-1);
}
@@ -334,57 +339,49 @@
cache_enter(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
{
struct namecache *ncp;
- struct nchashhead *ncpp;
- struct ncvhashhead *nvcpp;
if (!doingcache || cnp->cn_namelen > NCHNAMLEN)
return;
/*
- * Free the cache slot at head of lru chain.
+ * allocate, or recycle (free and allocate) an ncp.
*/
- if (numcache < desiredvnodes) {
- ncp = pool_get(&nch_pool, PR_WAITOK|PR_ZERO);
- numcache++;
- } else if ((ncp = TAILQ_FIRST(&nclruhead)) != NULL) {
- TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
- if (ncp->nc_hash.le_prev != NULL) {
- LIST_REMOVE(ncp, nc_hash);
- ncp->nc_hash.le_prev = NULL;
- }
- if (ncp->nc_vhash.le_prev != NULL) {
- LIST_REMOVE(ncp, nc_vhash);
- ncp->nc_vhash.le_prev = NULL;
- }
- } else
- return;
+ if (numcache >= desiredvnodes) {
+ if ((ncp = TAILQ_FIRST(&nclruhead)) != NULL)
+ cache_zap(ncp);
+ else if ((ncp = TAILQ_FIRST(&nclruneghead)) != NULL)
+ cache_zap(ncp);
+ else
+ panic("wtf? leak?");
+ }
+ ncp = pool_get(&nch_pool, PR_WAITOK|PR_ZERO);
+
/* grab the vnode we just found */
ncp->nc_vp = vp;
if (vp)
ncp->nc_vpid = vp->v_id;
+
/* fill in cache info */
ncp->nc_dvp = dvp;
ncp->nc_dvpid = dvp->v_id;
ncp->nc_nlen = cnp->cn_namelen;
bcopy(cnp->cn_nameptr, ncp->nc_name, (unsigned)ncp->nc_nlen);
- TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
- ncpp = &nchashtbl[NCHASH(dvp, cnp)];
- LIST_INSERT_HEAD(ncpp, ncp, nc_hash);
-
- /*
- * Create reverse-cache entries (used in getcwd) for
- * directories.
- */
-
- ncp->nc_vhash.le_prev = NULL;
- ncp->nc_vhash.le_next = NULL;
-
- if (vp && vp != dvp && vp->v_type == VDIR &&
- (ncp->nc_nlen > 2 ||
- (ncp->nc_nlen > 1 && ncp->nc_name[1] != '.') ||
- (ncp->nc_nlen > 0 && ncp->nc_name[0] != '.'))) {
- nvcpp = &ncvhashtbl[NCVHASH(vp)];
- LIST_INSERT_HEAD(nvcpp, ncp, nc_vhash);
+ if (RB_EMPTY(&dvp->v_nc_tree)) {
+ vhold(dvp);
+ }
+ if (RB_INSERT(namecache_rb_cache, &dvp->v_nc_tree, ncp) != NULL)
+ panic("Ich habe eine Rotweinflasche in meinem Arsch\n");
+ if (vp) {
+ TAILQ_INSERT_TAIL(&nclruhead, ncp, nc_lru);
+ numcache++;
+ TAILQ_INSERT_TAIL(&vp->v_cache_dst, ncp, nc_me);
+ } else {
+ TAILQ_INSERT_TAIL(&nclruneghead, ncp, nc_neg);
+ numneg++;
+ }
+ if (numneg > desiredvnodes) {
+ if ((ncp = TAILQ_FIRST(&nclruneghead)) != NULL)
+ cache_zap(ncp);
}
}
@@ -394,10 +391,8 @@
void
nchinit()
{
-
TAILQ_INIT(&nclruhead);
- nchashtbl = hashinit(desiredvnodes, M_CACHE, M_WAITOK, &nchash);
- ncvhashtbl = hashinit(desiredvnodes/8, M_CACHE, M_WAITOK, &ncvhash);
+ TAILQ_INIT(&nclruneghead);
pool_init(&nch_pool, sizeof(struct namecache), 0, 0, 0, "nchpl",
&pool_allocator_nointr);
}
@@ -410,18 +405,16 @@
cache_purge(struct vnode *vp)
{
struct namecache *ncp;
- struct nchashhead *ncpp;
+ while ((ncp = TAILQ_FIRST(&vp->v_cache_dst)))
+ cache_zap(ncp);
+ while ((ncp = RB_ROOT(&vp->v_nc_tree)))
+ cache_zap(ncp);
+
+ /* XXX this blows goats */
vp->v_id = ++nextvnodeid;
- if (nextvnodeid != 0)
- return;
- for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) {
- LIST_FOREACH(ncp, ncpp, nc_hash) {
- ncp->nc_vpid = 0;
- ncp->nc_dvpid = 0;
- }
- }
- vp->v_id = ++nextvnodeid;
+ if (vp->v_id == 0)
+ vp->v_id = ++nextvnodeid;
}
/*
@@ -437,6 +430,7 @@
{
struct namecache *ncp, *nxtcp;
+ /* whack the regular entries */
for (ncp = TAILQ_FIRST(&nclruhead); ncp != TAILQ_END(&nclruhead);
ncp = nxtcp) {
if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) {
@@ -444,19 +438,20 @@
continue;
}
/* free the resources we had */
- ncp->nc_vp = NULL;
- ncp->nc_dvp = NULL;
- TAILQ_REMOVE(&nclruhead, ncp, nc_lru);
- if (ncp->nc_hash.le_prev != NULL) {
- LIST_REMOVE(ncp, nc_hash);
- ncp->nc_hash.le_prev = NULL;
- }
- if (ncp->nc_vhash.le_prev != NULL) {
- LIST_REMOVE(ncp, nc_vhash);
- ncp->nc_vhash.le_prev = NULL;
- }
+ cache_zap(ncp);
/* cause rescan of list, it may have altered */
nxtcp = TAILQ_FIRST(&nclruhead);
- TAILQ_INSERT_HEAD(&nclruhead, ncp, nc_lru);
+ }
+ /* whack the negative entries */
+ for (ncp = TAILQ_FIRST(&nclruneghead); ncp != TAILQ_END(&nclruneghead);
+ ncp = nxtcp) {
+ if (ncp->nc_dvp == NULL || ncp->nc_dvp->v_mount != mp) {
+ nxtcp = TAILQ_NEXT(ncp, nc_neg);
+ continue;
+ }
+ /* free the resources we had */
+ cache_zap(ncp);
+ /* cause rescan of list, it may have altered */
+ nxtcp = TAILQ_FIRST(&nclruneghead);
}
}
Index: kern/vfs_subr.c
===================================================================
RCS file: /cvs/src/sys/kern/vfs_subr.c,v
retrieving revision 1.180
diff -u -r1.180 vfs_subr.c
--- kern/vfs_subr.c 2 Aug 2009 16:28:40 -0000 1.180
+++ kern/vfs_subr.c 9 Aug 2009 18:30:12 -0000
@@ -360,6 +360,8 @@
splx(s);
vp = pool_get(&vnode_pool, PR_WAITOK | PR_ZERO);
RB_INIT(&vp->v_bufs_tree);
+ RB_INIT(&vp->v_nc_tree);
+ TAILQ_INIT(&vp->v_cache_dst);
numvnodes++;
} else {
for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
Index: sys/namei.h
===================================================================
RCS file: /cvs/src/sys/sys/namei.h,v
retrieving revision 1.22
diff -u -r1.22 namei.h
--- sys/namei.h 29 Aug 2008 08:57:28 -0000 1.22
+++ sys/namei.h 9 Aug 2009 18:30:12 -0000
@@ -36,6 +36,11 @@
#define _SYS_NAMEI_H_
#include <sys/queue.h>
+#include <sys/tree.h>
+
+struct namecache;
+struct namecache_rb_cache;
+RB_PROTOTYPE(namecache_rb_cache, namecache, n_rbcache, namecache_compare);
/*
* Encapsulation of namei parameters.
@@ -156,9 +161,10 @@
#define NCHNAMLEN 31 /* maximum name segment length we
bother with */
struct namecache {
- LIST_ENTRY(namecache) nc_hash; /* hash chain */
- LIST_ENTRY(namecache) nc_vhash; /* (reverse) directory hash chain */
- TAILQ_ENTRY(namecache) nc_lru; /* LRU chain */
+ TAILQ_ENTRY(namecache) nc_lru; /* Regular Entry LRU chain */
+ TAILQ_ENTRY(namecache) nc_neg; /* Negative Entry LRU chain */
+ RB_ENTRY(namecache) n_rbcache; /* Namecache rb tree from vnode */
+ TAILQ_ENTRY(namecache) nc_me; /* ncp's referring to me */
struct vnode *nc_dvp; /* vnode of parent of name */
u_long nc_dvpid; /* capability number of nc_dvp */
struct vnode *nc_vp; /* vnode the name refers to */
Index: sys/vnode.h
===================================================================
RCS file: /cvs/src/sys/sys/vnode.h,v
retrieving revision 1.102
diff -u -r1.102 vnode.h
--- sys/vnode.h 2 Aug 2009 16:28:40 -0000 1.102
+++ sys/vnode.h 9 Aug 2009 18:30:12 -0000
@@ -36,6 +36,7 @@
#include <sys/types.h>
#include <sys/queue.h>
#include <sys/lock.h>
+#include <sys/namei.h>
#include <sys/selinfo.h>
#include <sys/tree.h>
@@ -82,6 +83,7 @@
LIST_HEAD(buflists, buf);
RB_HEAD(buf_rb_bufs, buf);
+RB_HEAD(namecache_rb_cache, namecache);
struct vnode {
struct uvm_vnode v_uvm; /* uvm data */
@@ -110,6 +112,10 @@
struct fifoinfo *vu_fifoinfo; /* fifo (VFIFO) */
} v_un;
+ /* VFS namecache */
+ struct namecache_rb_cache v_nc_tree;
+ TAILQ_HEAD(, namecache) v_cache_dst; /* cache entries to us */
+
enum vtagtype v_tag; /* type of underlying data */
void *v_data; /* private data for fs */
struct selinfo v_selectinfo; /* identity of poller(s) */
@@ -247,8 +253,9 @@
extern int maxvnodes; /* XXX number of vnodes to allocate */
extern time_t syncdelay; /* time to delay syncing vnodes */
extern int rushjob; /* # of slots syncer should run ASAP */
+extern void vhold(struct vnode *);
+extern void vdrop(struct vnode *);
#endif /* _KERNEL */
-
/*
* Mods for exensibility.