Module Name: src
Committed By: ad
Date: Sat Sep 9 18:27:59 UTC 2023
Modified Files:
src/sys/kern: vfs_cache.c
src/sys/sys: namei.src
src/usr.bin/pmap: main.h pmap.c
Log Message:
- Shrink namecache entries to 64 bytes on 32-bit platforms and use 32-bit
key values there for speed (remains 128 bytes & 64-bits on _LP64).
- Comments.
To generate a diff of this commit:
cvs rdiff -u -r1.154 -r1.155 src/sys/kern/vfs_cache.c
cvs rdiff -u -r1.60 -r1.61 src/sys/sys/namei.src
cvs rdiff -u -r1.7 -r1.8 src/usr.bin/pmap/main.h
cvs rdiff -u -r1.57 -r1.58 src/usr.bin/pmap/pmap.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/kern/vfs_cache.c
diff -u src/sys/kern/vfs_cache.c:1.154 src/sys/kern/vfs_cache.c:1.155
--- src/sys/kern/vfs_cache.c:1.154 Sat Apr 29 10:07:22 2023
+++ src/sys/kern/vfs_cache.c Sat Sep 9 18:27:59 2023
@@ -1,7 +1,7 @@
-/* $NetBSD: vfs_cache.c,v 1.154 2023/04/29 10:07:22 riastradh Exp $ */
+/* $NetBSD: vfs_cache.c,v 1.155 2023/09/09 18:27:59 ad Exp $ */
/*-
- * Copyright (c) 2008, 2019, 2020 The NetBSD Foundation, Inc.
+ * Copyright (c) 2008, 2019, 2020, 2023 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -79,10 +79,9 @@
* we are not sensible, and use a per-directory data structure to index
* names, but the cache otherwise functions the same.
*
- * The index is a red-black tree. There are no special concurrency
- * requirements placed on it, because it's per-directory and protected
- * by the namecache's per-directory locks. It should therefore not be
- * difficult to experiment with other types of index.
+ * The index is a red-black tree. It should not be difficult to
+ * experiment with other types of index, however note that a tree
+ * can trivially be made to support lockless lookup.
*
* Each cached name is stored in a struct namecache, along with a
* pointer to the associated vnode (nc_vp). Names longer than a
@@ -91,6 +90,19 @@
* in struct namecache. If it is a "negative" entry, (i.e. for a name
* that is known NOT to exist) the vnode pointer will be NULL.
*
+ * In practice this implementation is not any slower than the hash
+ * table that preceeded it and in some cases it significantly
+ * outperforms the hash table. Some reasons why this might be:
+ *
+ * - natural partitioning provided by the file system structure, which
+ * the prior implementation discarded (global hash table).
+ * - worst case tree traversal of O(log n), the hash table could have
+ * many collisions.
+ * - minimized cache misses & total L2/L3 CPU cache footprint; struct
+ * namecache and vnode_impl_t are laid out to keep cache footprint
+ * minimal in the lookup path; no hash table buckets to cache.
+ * - minimized number of conditionals & string comparisons.
+ *
* For a directory with 3 cached names for 3 distinct vnodes, the
* various vnodes and namecache structs would be connected like this
* (the root is at the bottom of the diagram):
@@ -167,12 +179,10 @@
* used during reverse lookup)
*
* 3) cache_lru_lock (LRU list direction, used during reclaim)
- *
- * 4) vp->v_interlock (what the cache entry points to)
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.154 2023/04/29 10:07:22 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.155 2023/09/09 18:27:59 ad Exp $");
#define __NAMECACHE_PRIVATE
#ifdef _KERNEL_OPT
@@ -202,6 +212,16 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,
#include <miscfs/genfs/genfs.h>
+/*
+ * Assert that data structure layout hasn't changed unintentionally.
+ */
+#ifdef _LP64
+CTASSERT(sizeof(struct namecache) == 128);
+#else
+CTASSERT(sizeof(struct namecache) == 64);
+#endif
+CTASSERT(NC_NLEN_MASK >= MAXPATHLEN);
+
static void cache_activate(struct namecache *);
static void cache_update_stats(void *);
static int cache_compare_nodes(void *, const void *, const void *);
@@ -262,7 +282,7 @@ static kmutex_t cache_stat_lock __cachel
*/
int cache_lru_maxdeact __read_mostly = 2; /* max # to deactivate */
int cache_lru_maxscan __read_mostly = 64; /* max # to scan/reclaim */
-int cache_maxlen __read_mostly = USHRT_MAX; /* max name length to cache */
+int cache_maxlen __read_mostly = NC_NLEN_MASK; /* max name length to cache */
int cache_stat_interval __read_mostly = 300; /* in seconds */
/*
@@ -328,8 +348,8 @@ cache_compare_nodes(void *context, const
if (nc1->nc_key > nc2->nc_key) {
return 1;
}
- KASSERT(nc1->nc_nlen == nc2->nc_nlen);
- return memcmp(nc1->nc_name, nc2->nc_name, nc1->nc_nlen);
+ KASSERT(NC_NLEN(nc1) == NC_NLEN(nc2));
+ return memcmp(nc1->nc_name, nc2->nc_name, NC_NLEN(nc1));
}
/*
@@ -337,15 +357,15 @@ cache_compare_nodes(void *context, const
* the key value to try and improve uniqueness, and so that length doesn't
* need to be compared separately for string comparisons.
*/
-static inline uint64_t
+static uintptr_t
cache_key(const char *name, size_t nlen)
{
- uint64_t key;
+ uintptr_t key;
- KASSERT(nlen <= USHRT_MAX);
+ KASSERT((nlen & ~NC_NLEN_MASK) == 0);
key = hash32_buf(name, nlen, HASH32_STR_INIT);
- return (key << 32) | nlen;
+ return (key << NC_NLEN_BITS) | (uintptr_t)nlen;
}
/*
@@ -358,13 +378,13 @@ cache_remove(struct namecache *ncp, cons
{
struct vnode *vp, *dvp = ncp->nc_dvp;
vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp);
+ size_t namelen = NC_NLEN(ncp);
KASSERT(rw_write_held(&dvi->vi_nc_lock));
- KASSERT(cache_key(ncp->nc_name, ncp->nc_nlen) == ncp->nc_key);
+ KASSERT(cache_key(ncp->nc_name, namelen) == ncp->nc_key);
KASSERT(rb_tree_find_node(&dvi->vi_nc_tree, ncp) == ncp);
- SDT_PROBE(vfs, namecache, invalidate, done, ncp,
- 0, 0, 0, 0);
+ SDT_PROBE(vfs, namecache, invalidate, done, ncp, 0, 0, 0, 0);
/*
* Remove from the vnode's list. This excludes cache_revlookup(),
@@ -391,8 +411,8 @@ cache_remove(struct namecache *ncp, cons
mutex_exit(&cache_lru_lock);
/* Finally, free it. */
- if (ncp->nc_nlen > NCHNAMLEN) {
- size_t sz = offsetof(struct namecache, nc_name[ncp->nc_nlen]);
+ if (namelen > NCHNAMLEN) {
+ size_t sz = offsetof(struct namecache, nc_name[namelen]);
kmem_free(ncp, sz);
} else {
pool_cache_put(cache_pool, ncp);
@@ -404,13 +424,15 @@ cache_remove(struct namecache *ncp, cons
*/
static struct namecache * __noinline
cache_lookup_entry(struct vnode *dvp, const char *name, size_t namelen,
- uint64_t key)
+ uintptr_t key)
{
vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp);
struct rb_node *node = dvi->vi_nc_tree.rbt_root;
struct namecache *ncp;
- int lrulist, diff;
+ enum cache_lru_id lrulist;
+ int diff;
+ KASSERT(namelen <= MAXPATHLEN);
KASSERT(rw_lock_held(&dvi->vi_nc_lock));
/*
@@ -430,7 +452,7 @@ cache_lookup_entry(struct vnode *dvp, co
KASSERT((void *)&ncp->nc_tree == (void *)ncp);
KASSERT(ncp->nc_dvp == dvp);
if (ncp->nc_key == key) {
- KASSERT(ncp->nc_nlen == namelen);
+ KASSERT(NC_NLEN(ncp) == namelen);
diff = memcmp(ncp->nc_name, name, namelen);
if (__predict_true(diff == 0)) {
break;
@@ -511,7 +533,7 @@ cache_lookup(struct vnode *dvp, const ch
vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp);
struct namecache *ncp;
struct vnode *vp;
- uint64_t key;
+ uintptr_t key;
int error;
bool hit;
krw_t op;
@@ -564,7 +586,7 @@ cache_lookup(struct vnode *dvp, const ch
COUNT(ncs_badhits);
return false;
}
- if (ncp->nc_vp == NULL) {
+ if ((vp = ncp->nc_vp) == NULL) {
if (iswht_ret != NULL) {
/*
* Restore the ISWHITEOUT flag saved earlier.
@@ -592,7 +614,6 @@ cache_lookup(struct vnode *dvp, const ch
rw_exit(&dvi->vi_nc_lock);
return hit;
}
- vp = ncp->nc_vp;
error = vcache_tryvget(vp);
rw_exit(&dvi->vi_nc_lock);
if (error) {
@@ -638,7 +659,8 @@ cache_lookup_linked(struct vnode *dvp, c
vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp);
struct namecache *ncp;
krwlock_t *oldlock, *newlock;
- uint64_t key;
+ struct vnode *vp;
+ uintptr_t key;
int error;
KASSERT(namelen != cache_mp_nlen || name == cache_mp_name);
@@ -727,7 +749,7 @@ cache_lookup_linked(struct vnode *dvp, c
name, namelen, 0, 0);
return false;
}
- if (ncp->nc_vp == NULL) {
+ if ((vp = ncp->nc_vp) == NULL) {
/* found negative entry; vn is already null from above */
KASSERT(namelen != cache_mp_nlen);
KASSERT(name != cache_mp_name);
@@ -749,7 +771,7 @@ cache_lookup_linked(struct vnode *dvp, c
if (newlock) {
*plock = newlock;
}
- *vn_ret = ncp->nc_vp;
+ *vn_ret = vp;
return true;
}
@@ -772,8 +794,9 @@ cache_revlookup(struct vnode *vp, struct
{
vnode_impl_t *vi = VNODE_TO_VIMPL(vp);
struct namecache *ncp;
+ enum cache_lru_id lrulist;
struct vnode *dvp;
- int error, nlen, lrulist;
+ int error, nlen;
char *bp;
KASSERT(vp != NULL);
@@ -812,12 +835,12 @@ cache_revlookup(struct vnode *vp, struct
TAILQ_FOREACH(ncp, &vi->vi_nc_list, nc_list) {
KASSERT(ncp->nc_vp == vp);
KASSERT(ncp->nc_dvp != NULL);
- nlen = ncp->nc_nlen;
+ nlen = NC_NLEN(ncp);
/*
* Ignore mountpoint entries.
*/
- if (ncp->nc_nlen == cache_mp_nlen) {
+ if (nlen == cache_mp_nlen) {
continue;
}
@@ -929,7 +952,6 @@ cache_enter(struct vnode *dvp, struct vn
ncp->nc_vp = vp;
ncp->nc_dvp = dvp;
ncp->nc_key = cache_key(name, namelen);
- ncp->nc_nlen = namelen;
ncp->nc_whiteout = ((cnflags & ISWHITEOUT) != 0);
memcpy(ncp->nc_name, name, namelen);
@@ -941,7 +963,7 @@ cache_enter(struct vnode *dvp, struct vn
oncp = rb_tree_insert_node(&dvi->vi_nc_tree, ncp);
if (oncp != ncp) {
KASSERT(oncp->nc_key == ncp->nc_key);
- KASSERT(oncp->nc_nlen == ncp->nc_nlen);
+ KASSERT(NC_NLEN(oncp) == NC_NLEN(ncp));
KASSERT(memcmp(oncp->nc_name, name, namelen) == 0);
cache_remove(oncp, true);
oncp = rb_tree_insert_node(&dvi->vi_nc_tree, ncp);
@@ -1107,12 +1129,11 @@ nchinit(void)
void
cache_cpu_init(struct cpu_info *ci)
{
- void *p;
size_t sz;
- sz = roundup2(sizeof(struct nchcpu), coherency_unit) + coherency_unit;
- p = kmem_zalloc(sz, KM_SLEEP);
- ci->ci_data.cpu_nch = (void *)roundup2((uintptr_t)p, coherency_unit);
+ sz = roundup2(sizeof(struct nchcpu), coherency_unit);
+ ci->ci_data.cpu_nch = kmem_zalloc(sz, KM_SLEEP);
+ KASSERT(((uintptr_t)ci->ci_data.cpu_nch & (coherency_unit - 1)) == 0);
}
/*
@@ -1232,7 +1253,7 @@ cache_purge_name(struct vnode *dvp, cons
{
vnode_impl_t *dvi = VNODE_TO_VIMPL(dvp);
struct namecache *ncp;
- uint64_t key;
+ uintptr_t key;
SDT_PROBE(vfs, namecache, purge, name, name, namelen, 0, 0, 0);
@@ -1524,7 +1545,7 @@ namecache_print(struct vnode *vp, void (
for (id = 0; id < LRU_COUNT; id++) {
TAILQ_FOREACH(ncp, &cache_lru.list[id], nc_lru) {
if (ncp->nc_vp == vp) {
- (*pr)("name %.*s\n", ncp->nc_nlen,
+ (*pr)("name %.*s\n", NC_NLEN(ncp),
ncp->nc_name);
dvp = ncp->nc_dvp;
}
@@ -1537,7 +1558,7 @@ namecache_print(struct vnode *vp, void (
for (id = 0; id < LRU_COUNT; id++) {
TAILQ_FOREACH(ncp, &cache_lru.list[id], nc_lru) {
if (ncp->nc_vp == dvp) {
- (*pr)("parent %.*s\n", ncp->nc_nlen,
+ (*pr)("parent %.*s\n", NC_NLEN(ncp),
ncp->nc_name);
}
}
Index: src/sys/sys/namei.src
diff -u src/sys/sys/namei.src:1.60 src/sys/sys/namei.src:1.61
--- src/sys/sys/namei.src:1.60 Tue Jun 29 22:39:21 2021
+++ src/sys/sys/namei.src Sat Sep 9 18:27:59 2023
@@ -1,4 +1,4 @@
-/* $NetBSD: namei.src,v 1.60 2021/06/29 22:39:21 dholland Exp $ */
+/* $NetBSD: namei.src,v 1.61 2023/09/09 18:27:59 ad Exp $ */
/*
* Copyright (c) 1985, 1989, 1991, 1993
@@ -200,12 +200,24 @@ NAMEIFL PARAMASK 0x02ff800 /* mask of pa
#define NCHNAMLEN sizeof(((struct namecache *)NULL)->nc_name)
/*
+ * The uintptr_t-sized key value computed for each name consists of name
+ * length and a hash value. On 32-bit platforms the top NC_NLEN_BITS of
+ * the 32-bit hash value is lobbed off.
+ */
+
+#define NC_NLEN_BITS 11
+#define NC_NLEN_MASK ((1 << NC_NLEN_BITS) - 1)
+#define NC_NLEN(ncp) ((ncp)->nc_key & NC_NLEN_MASK)
+
+/*
* Namecache entry.
*
* This structure describes the elements in the cache of recent names looked
- * up by namei. It's carefully sized to take up 128 bytes on _LP64, to make
- * good use of space and the CPU caches. Items used during RB tree lookup
- * (nc_tree, nc_key) are clustered at the start of the structure.
+ * up by namei. It's carefully sized to take up 128 bytes on _LP64 and 64
+ * bytes on 32-bit machines, to make good use of space and the CPU caches.
+ *
+ * Items used during RB tree lookup (nc_tree, nc_key) are clustered at the
+ * start of the structure to minimise cache misses during lookup.
*
* Field markings and their corresponding locks:
*
@@ -216,17 +228,20 @@ NAMEIFL PARAMASK 0x02ff800 /* mask of pa
*/
struct namecache {
struct rb_node nc_tree; /* d red-black tree, must be first */
- uint64_t nc_key; /* - hashed key value */
+ uintptr_t nc_key; /* - hashed key value */
TAILQ_ENTRY(namecache) nc_list; /* v nc_vp's list of cache entries */
TAILQ_ENTRY(namecache) nc_lru; /* l pseudo-lru chain */
struct vnode *nc_dvp; /* - vnode of parent of name */
struct vnode *nc_vp; /* - vnode the name refers to */
- int nc_lrulist; /* l which LRU list it's on */
- u_short nc_nlen; /* - length of the name */
- char nc_whiteout; /* - true if a whiteout */
- char nc_name[41]; /* - segment name */
-};
+ u_char nc_lrulist; /* l LRU list entry is on */
+ u_char nc_whiteout; /* - whiteout indicator */
+#ifdef _LP64
+ char nc_name[46]; /* - segment name */
+#else
+ char nc_name[22]; /* - segment name */
#endif
+};
+#endif /* __NAMECACHE_PRIVATE */
#ifdef _KERNEL
#include <sys/pool.h>
Index: src/usr.bin/pmap/main.h
diff -u src/usr.bin/pmap/main.h:1.7 src/usr.bin/pmap/main.h:1.8
--- src/usr.bin/pmap/main.h:1.7 Sun Aug 21 07:46:52 2022
+++ src/usr.bin/pmap/main.h Sat Sep 9 18:27:59 2023
@@ -1,4 +1,4 @@
-/* $NetBSD: main.h,v 1.7 2022/08/21 07:46:52 mlelstv Exp $ */
+/* $NetBSD: main.h,v 1.8 2023/09/09 18:27:59 ad Exp $ */
/*
* Copyright (c) 2003 The NetBSD Foundation, Inc.
@@ -36,9 +36,6 @@ extern u_long kernel_map_addr;
extern void *uvm_vnodeops, *uvm_deviceops, *aobj_pager, *ubc_pager;
extern rlim_t maxssiz;
-LIST_HEAD(nchashhead, namecache);
-extern struct nchashhead *nchashtbl;
-
struct cache_entry {
LIST_ENTRY(cache_entry) ce_next;
struct vnode *ce_vp, *ce_pvp;
Index: src/usr.bin/pmap/pmap.c
diff -u src/usr.bin/pmap/pmap.c:1.57 src/usr.bin/pmap/pmap.c:1.58
--- src/usr.bin/pmap/pmap.c:1.57 Sun Aug 21 07:46:52 2022
+++ src/usr.bin/pmap/pmap.c Sat Sep 9 18:27:59 2023
@@ -1,7 +1,7 @@
-/* $NetBSD: pmap.c,v 1.57 2022/08/21 07:46:52 mlelstv Exp $ */
+/* $NetBSD: pmap.c,v 1.58 2023/09/09 18:27:59 ad Exp $ */
/*
- * Copyright (c) 2002, 2003, 2020 The NetBSD Foundation, Inc.
+ * Copyright (c) 2002, 2003, 2020, 2023 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -31,7 +31,7 @@
#include <sys/cdefs.h>
#ifndef lint
-__RCSID("$NetBSD: pmap.c,v 1.57 2022/08/21 07:46:52 mlelstv Exp $");
+__RCSID("$NetBSD: pmap.c,v 1.58 2023/09/09 18:27:59 ad Exp $");
#endif
#include <string.h>
@@ -874,7 +874,7 @@ search_cache(kvm_t *kd, struct vnode *vp
(vi.vi_vnode.v_vflag & VV_ROOT) != 0)
break;
/* Otherwise pull first NCHNAMLEN chars of name. */
- nlen = MIN((size_t)nc.nc_nlen, NCHNAMLEN);
+ nlen = MIN(NC_NLEN(&nc), NCHNAMLEN);
/* too small */
if ((size_t)(o - buf) < nlen + (o != e ? 1 : 0))
break;