Module Name: src Committed By: dennis Date: Sun Dec 7 22:23:38 UTC 2014
Modified Files: src/sys/kern: vfs_cache.c Log Message: Take 2. Do a fairly mechanical reversion of the locking protocol to that of revision 1.94. Add a comment documenting my best guess about the locking requirements in this subsystem. Don't take locks solely for the sake of stats counter increments; the locking around the increment of ->ncs_pass2 and/or ->ncs_2passes caused a deadlock. Defer fixing stats-keeping to allow this change to be easily backed out of. To generate a diff of this commit: cvs rdiff -u -r1.101 -r1.102 src/sys/kern/vfs_cache.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/kern/vfs_cache.c diff -u src/sys/kern/vfs_cache.c:1.101 src/sys/kern/vfs_cache.c:1.102 --- src/sys/kern/vfs_cache.c:1.101 Wed Dec 3 01:30:32 2014 +++ src/sys/kern/vfs_cache.c Sun Dec 7 22:23:38 2014 @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_cache.c,v 1.101 2014/12/03 01:30:32 christos Exp $ */ +/* $NetBSD: vfs_cache.c,v 1.102 2014/12/07 22:23:38 dennis Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. @@ -58,7 +58,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.101 2014/12/03 01:30:32 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_cache.c,v 1.102 2014/12/07 22:23:38 dennis Exp $"); #include "opt_ddb.h" #include "opt_revcache.h" @@ -102,7 +102,39 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_cache.c, */ /* - * Per-cpu namecache data. + * The locking in this subsystem works as follows: + * + * When an entry is added to the cache, via cache_enter(), + * namecache_lock is taken to exclude other writers. The new + * entry is added to the hash list in a way which permits + * concurrent lookups and invalidations in the cache done on + * other CPUs to continue in parallel. + * + * When a lookup is done in the cache, via cache_lookup() or + * cache_lookup_raw(), the per-cpu lock below is taken. This + * protects calls to cache_lookup_entry() and cache_invalidate() + * against cache_reclaim() but allows lookups to continue in + * parallel with cache_enter(). + * + * cache_revlookup() takes namecache_lock to exclude cache_enter() + * and cache_reclaim() since the list it operates on is not + * maintained to allow concurrent reads. + * + * When cache_reclaim() is called namecache_lock is held to hold + * off calls to cache_enter()/cache_revlookup() and each of the + * per-cpu locks is taken to hold off lookups. Holding all these + * locks essentially idles the subsystem, ensuring there are no + * concurrent references to the cache entries being freed. + * + * As a side effect while running cache_reclaim(), once the per-cpu + * locks are held the per-cpu stats are sampled, added to the + * subsystem total and zeroed. Only some of the per-cpu stats are + * incremented with the per-cpu lock held, however, and attempting + * to add locking around the remaining counters currently + * incremented without a lock can cause deadlock, so we don't + * do that. XXX Fix this up in a later revision. + * + * Per-cpu namecache data is defined next. */ struct nchcpu { kmutex_t cpu_lock; @@ -286,7 +318,8 @@ cache_unlock_cpus(void) } /* - * Find a single cache entry and return it locked. + * Find a single cache entry and return it locked. 'namecache_lock' or + * at least one of the per-CPU locks must be held. */ static struct namecache * cache_lookup_entry(const struct vnode *dvp, const char *name, size_t namelen) @@ -296,7 +329,6 @@ cache_lookup_entry(const struct vnode *d nchash_t hash; KASSERT(dvp != NULL); - KASSERT(mutex_owned(namecache_lock)); hash = cache_hash(name, namelen); ncpp = &nchashtbl[NCHASH2(hash, dvp)]; @@ -388,27 +420,22 @@ cache_lookup(struct vnode *dvp, const ch } cpup = curcpu()->ci_data.cpu_nch; + mutex_enter(&cpup->cpu_lock); if (__predict_false(namelen > NCHNAMLEN)) { - mutex_enter(&cpup->cpu_lock); COUNT(cpup->cpu_stats, ncs_long); mutex_exit(&cpup->cpu_lock); /* found nothing */ return 0; } - mutex_enter(namecache_lock); ncp = cache_lookup_entry(dvp, name, namelen); - mutex_exit(namecache_lock); if (__predict_false(ncp == NULL)) { - mutex_enter(&cpup->cpu_lock); COUNT(cpup->cpu_stats, ncs_miss); mutex_exit(&cpup->cpu_lock); /* found nothing */ return 0; } if ((cnflags & MAKEENTRY) == 0) { - mutex_enter(&cpup->cpu_lock); COUNT(cpup->cpu_stats, ncs_badhits); - mutex_exit(&cpup->cpu_lock); /* * Last component and we are renaming or deleting, * the cache entry is invalid, or otherwise don't @@ -416,6 +443,7 @@ cache_lookup(struct vnode *dvp, const ch */ cache_invalidate(ncp); mutex_exit(&ncp->nc_lock); + mutex_exit(&cpup->cpu_lock); /* found nothing */ return 0; } @@ -432,16 +460,13 @@ cache_lookup(struct vnode *dvp, const ch if (__predict_true(nameiop != CREATE || (cnflags & ISLASTCN) == 0)) { - mutex_enter(&cpup->cpu_lock); COUNT(cpup->cpu_stats, ncs_neghits); - mutex_exit(&cpup->cpu_lock); mutex_exit(&ncp->nc_lock); + mutex_exit(&cpup->cpu_lock); /* found neg entry; vn is already null from above */ return 1; } else { - mutex_enter(&cpup->cpu_lock); COUNT(cpup->cpu_stats, ncs_badhits); - mutex_exit(&cpup->cpu_lock); /* * Last component and we are renaming or * deleting, the cache entry is invalid, @@ -450,6 +475,7 @@ cache_lookup(struct vnode *dvp, const ch */ cache_invalidate(ncp); mutex_exit(&ncp->nc_lock); + mutex_exit(&cpup->cpu_lock); /* found nothing */ return 0; } @@ -458,6 +484,7 @@ cache_lookup(struct vnode *dvp, const ch vp = ncp->nc_vp; mutex_enter(vp->v_interlock); mutex_exit(&ncp->nc_lock); + mutex_exit(&cpup->cpu_lock); error = vget(vp, LK_NOWAIT); if (error) { KASSERT(error == EBUSY); @@ -465,9 +492,7 @@ cache_lookup(struct vnode *dvp, const ch * This vnode is being cleaned out. * XXX badhits? */ - mutex_enter(&cpup->cpu_lock); COUNT(cpup->cpu_stats, ncs_falsehits); - mutex_exit(&cpup->cpu_lock); /* found nothing */ return 0; } @@ -481,9 +506,7 @@ cache_lookup(struct vnode *dvp, const ch #endif /* DEBUG */ /* We don't have the right lock, but this is only for stats. */ - mutex_enter(&cpup->cpu_lock); COUNT(cpup->cpu_stats, ncs_goodhits); - mutex_exit(&cpup->cpu_lock); /* found it */ *vn_ret = vp; @@ -512,18 +535,15 @@ cache_lookup_raw(struct vnode *dvp, cons } cpup = curcpu()->ci_data.cpu_nch; + mutex_enter(&cpup->cpu_lock); if (__predict_false(namelen > NCHNAMLEN)) { - mutex_enter(&cpup->cpu_lock); COUNT(cpup->cpu_stats, ncs_long); mutex_exit(&cpup->cpu_lock); /* found nothing */ return 0; } - mutex_enter(namecache_lock); ncp = cache_lookup_entry(dvp, name, namelen); - mutex_exit(namecache_lock); if (__predict_false(ncp == NULL)) { - mutex_enter(&cpup->cpu_lock); COUNT(cpup->cpu_stats, ncs_miss); mutex_exit(&cpup->cpu_lock); /* found nothing */ @@ -539,15 +559,15 @@ cache_lookup_raw(struct vnode *dvp, cons /*cnp->cn_flags |= ncp->nc_flags;*/ *iswht_ret = (ncp->nc_flags & ISWHITEOUT) != 0; } - mutex_enter(&cpup->cpu_lock); COUNT(cpup->cpu_stats, ncs_neghits); - mutex_exit(&cpup->cpu_lock); mutex_exit(&ncp->nc_lock); + mutex_exit(&cpup->cpu_lock); /* found negative entry; vn is already null from above */ return 1; } mutex_enter(vp->v_interlock); mutex_exit(&ncp->nc_lock); + mutex_exit(&cpup->cpu_lock); error = vget(vp, LK_NOWAIT); if (error) { KASSERT(error == EBUSY); @@ -555,17 +575,13 @@ cache_lookup_raw(struct vnode *dvp, cons * This vnode is being cleaned out. * XXX badhits? */ - mutex_enter(&cpup->cpu_lock); COUNT(cpup->cpu_stats, ncs_falsehits); - mutex_exit(&cpup->cpu_lock); /* found nothing */ return 0; } /* Unlocked, but only for stats. */ - mutex_enter(&cpup->cpu_lock); COUNT(cpup->cpu_stats, ncs_goodhits); /* XXX can be "badhits" */ - mutex_exit(&cpup->cpu_lock); /* found it */ *vn_ret = vp; @@ -617,9 +633,7 @@ cache_revlookup(struct vnode *vp, struct ncp->nc_name[1] == '.') panic("cache_revlookup: found entry for .."); #endif - mutex_enter(&cpup->cpu_lock); COUNT(cpup->cpu_stats, ncs_revhits); - mutex_exit(&cpup->cpu_lock); nlen = ncp->nc_nlen; if (bufp) { @@ -651,9 +665,7 @@ cache_revlookup(struct vnode *vp, struct } mutex_exit(&ncp->nc_lock); } - mutex_enter(&cpup->cpu_lock); COUNT(cpup->cpu_stats, ncs_revmiss); - mutex_exit(&cpup->cpu_lock); mutex_exit(namecache_lock); out: *dvpp = NULL; @@ -1101,9 +1113,7 @@ namecache_count_pass2(void) { struct nchcpu *cpup = curcpu()->ci_data.cpu_nch; - mutex_enter(&cpup->cpu_lock); COUNT(cpup->cpu_stats, ncs_pass2); - mutex_exit(&cpup->cpu_lock); } void @@ -1111,9 +1121,7 @@ namecache_count_2passes(void) { struct nchcpu *cpup = curcpu()->ci_data.cpu_nch; - mutex_enter(&cpup->cpu_lock); COUNT(cpup->cpu_stats, ncs_2passes); - mutex_exit(&cpup->cpu_lock); } static int