Module Name: src
Committed By: ad
Date: Sun Feb 23 22:14:04 UTC 2020
Modified Files:
src/sys/compat/netbsd32: netbsd32_fs.c
src/sys/kern: kern_exec.c kern_proc.c uipc_usrreq.c vfs_cwd.c
vfs_getcwd.c vfs_lookup.c vfs_mount.c vfs_subr.c vfs_syscalls.c
vfs_vnode.c vnode_if.sh
src/sys/miscfs/genfs: genfs_vnops.c
src/sys/miscfs/procfs: procfs_vnops.c
src/sys/sys: filedesc.h vfs_syscalls.h vnode.h vnode_impl.h
Log Message:
Merge from ad-namecache:
- Have a stab at clustering the members of vnode_t and vnode_impl_t in a
more cache-conscious way. With that done, go back to adjusting v_usecount
with atomics and keep vi_lock directly in vnode_impl_t (saves KVA).
- Allow VOP_LOCK(LK_NONE) for the benefit of VFS_VGET() and VFS_ROOT().
Make sure LK_UPGRADE always comes with LK_NOWAIT.
- Make cwdinfo use mostly lockless.
To generate a diff of this commit:
cvs rdiff -u -r1.86 -r1.87 src/sys/compat/netbsd32/netbsd32_fs.c
cvs rdiff -u -r1.492 -r1.493 src/sys/kern/kern_exec.c
cvs rdiff -u -r1.241 -r1.242 src/sys/kern/kern_proc.c
cvs rdiff -u -r1.196 -r1.197 src/sys/kern/uipc_usrreq.c
cvs rdiff -u -r1.4 -r1.5 src/sys/kern/vfs_cwd.c
cvs rdiff -u -r1.54 -r1.55 src/sys/kern/vfs_getcwd.c
cvs rdiff -u -r1.213 -r1.214 src/sys/kern/vfs_lookup.c
cvs rdiff -u -r1.74 -r1.75 src/sys/kern/vfs_mount.c
cvs rdiff -u -r1.480 -r1.481 src/sys/kern/vfs_subr.c
cvs rdiff -u -r1.541 -r1.542 src/sys/kern/vfs_syscalls.c
cvs rdiff -u -r1.111 -r1.112 src/sys/kern/vfs_vnode.c
cvs rdiff -u -r1.68 -r1.69 src/sys/kern/vnode_if.sh
cvs rdiff -u -r1.201 -r1.202 src/sys/miscfs/genfs/genfs_vnops.c
cvs rdiff -u -r1.208 -r1.209 src/sys/miscfs/procfs/procfs_vnops.c
cvs rdiff -u -r1.65 -r1.66 src/sys/sys/filedesc.h
cvs rdiff -u -r1.26 -r1.27 src/sys/sys/vfs_syscalls.h
cvs rdiff -u -r1.288 -r1.289 src/sys/sys/vnode.h
cvs rdiff -u -r1.20 -r1.21 src/sys/sys/vnode_impl.h
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/compat/netbsd32/netbsd32_fs.c
diff -u src/sys/compat/netbsd32/netbsd32_fs.c:1.86 src/sys/compat/netbsd32/netbsd32_fs.c:1.87
--- src/sys/compat/netbsd32/netbsd32_fs.c:1.86 Fri Jan 31 09:01:23 2020
+++ src/sys/compat/netbsd32/netbsd32_fs.c Sun Feb 23 22:14:03 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: netbsd32_fs.c,v 1.86 2020/01/31 09:01:23 maxv Exp $ */
+/* $NetBSD: netbsd32_fs.c,v 1.87 2020/02/23 22:14:03 ad Exp $ */
/*
* Copyright (c) 1998, 2001 Matthew R. Green
@@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: netbsd32_fs.c,v 1.86 2020/01/31 09:01:23 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: netbsd32_fs.c,v 1.87 2020/02/23 22:14:03 ad Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -740,13 +740,12 @@ netbsd32___getcwd(struct lwp *l, const s
syscallarg(char *) bufp;
syscallarg(size_t) length;
} */
- struct proc *p = l->l_proc;
int error;
char *path;
char *bp, *bend;
int len = (int)SCARG(uap, length);
int lenused;
- struct cwdinfo *cwdi;
+ struct vnode *dvp;
if (len > MAXPATHLEN*4)
len = MAXPATHLEN*4;
@@ -764,11 +763,10 @@ netbsd32___getcwd(struct lwp *l, const s
* limit it to N/2 vnodes for an N byte buffer.
*/
#define GETCWD_CHECK_ACCESS 0x0001
- cwdi = p->p_cwdi;
- rw_enter(&cwdi->cwdi_lock, RW_READER);
- error = getcwd_common (cwdi->cwdi_cdir, NULL, &bp, path, len/2,
+ dvp = cwdcdir();
+ error = getcwd_common (dvp, NULL, &bp, path, len/2,
GETCWD_CHECK_ACCESS, l);
- rw_exit(&cwdi->cwdi_lock);
+ vrele(dvp);
if (error)
goto out;
Index: src/sys/kern/kern_exec.c
diff -u src/sys/kern/kern_exec.c:1.492 src/sys/kern/kern_exec.c:1.493
--- src/sys/kern/kern_exec.c:1.492 Sat Feb 15 17:13:55 2020
+++ src/sys/kern/kern_exec.c Sun Feb 23 22:14:03 2020
@@ -1,7 +1,7 @@
-/* $NetBSD: kern_exec.c,v 1.492 2020/02/15 17:13:55 ad Exp $ */
+/* $NetBSD: kern_exec.c,v 1.493 2020/02/23 22:14:03 ad Exp $ */
/*-
- * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc.
+ * Copyright (c) 2008, 2019, 2020 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -62,7 +62,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.492 2020/02/15 17:13:55 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.493 2020/02/23 22:14:03 ad Exp $");
#include "opt_exec.h"
#include "opt_execfmt.h"
@@ -672,7 +672,7 @@ exec_makepathbuf(struct lwp *l, const ch
char *path, *bp;
size_t len, tlen;
int error;
- struct cwdinfo *cwdi;
+ struct vnode *dvp;
path = PNBUF_GET();
if (seg == UIO_SYSSPACE) {
@@ -698,11 +698,10 @@ exec_makepathbuf(struct lwp *l, const ch
memmove(bp, path, len);
*(--bp) = '/';
- cwdi = l->l_proc->p_cwdi;
- rw_enter(&cwdi->cwdi_lock, RW_READER);
- error = getcwd_common(cwdi->cwdi_cdir, NULL, &bp, path, MAXPATHLEN / 2,
+ dvp = cwdcdir();
+ error = getcwd_common(dvp, NULL, &bp, path, MAXPATHLEN / 2,
GETCWD_CHECK_ACCESS, l);
- rw_exit(&cwdi->cwdi_lock);
+ vrele(dvp);
if (error)
goto err;
@@ -1119,6 +1118,7 @@ static void
emulexec(struct lwp *l, struct exec_package *epp)
{
struct proc *p = l->l_proc;
+ struct cwdinfo *cwdi;
/* The emulation root will usually have been found when we looked
* for the elf interpreter (or similar), if not look now. */
@@ -1127,9 +1127,10 @@ emulexec(struct lwp *l, struct exec_pack
emul_find_root(l, epp);
/* Any old emulation root got removed by fdcloseexec */
- rw_enter(&p->p_cwdi->cwdi_lock, RW_WRITER);
- p->p_cwdi->cwdi_edir = epp->ep_emul_root;
- rw_exit(&p->p_cwdi->cwdi_lock);
+ KASSERT(p == curproc);
+ cwdi = cwdenter(RW_WRITER);
+ cwdi->cwdi_edir = epp->ep_emul_root;
+ cwdexit(cwdi);
epp->ep_emul_root = NULL;
if (epp->ep_interp != NULL)
vrele(epp->ep_interp);
Index: src/sys/kern/kern_proc.c
diff -u src/sys/kern/kern_proc.c:1.241 src/sys/kern/kern_proc.c:1.242
--- src/sys/kern/kern_proc.c:1.241 Fri Feb 21 00:26:22 2020
+++ src/sys/kern/kern_proc.c Sun Feb 23 22:14:03 2020
@@ -1,7 +1,7 @@
-/* $NetBSD: kern_proc.c,v 1.241 2020/02/21 00:26:22 joerg Exp $ */
+/* $NetBSD: kern_proc.c,v 1.242 2020/02/23 22:14:03 ad Exp $ */
/*-
- * Copyright (c) 1999, 2006, 2007, 2008 The NetBSD Foundation, Inc.
+ * Copyright (c) 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -62,7 +62,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.241 2020/02/21 00:26:22 joerg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.242 2020/02/23 22:14:03 ad Exp $");
#ifdef _KERNEL_OPT
#include "opt_kstack.h"
@@ -106,6 +106,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,
#include <sys/exec.h>
#include <sys/cpu.h>
#include <sys/compat_stub.h>
+#include <sys/vnode.h>
#include <uvm/uvm_extern.h>
#include <uvm/uvm.h>
@@ -476,7 +477,7 @@ proc0_init(void)
p->p_cred = cred0;
/* Create the CWD info. */
- rw_init(&cwdi0.cwdi_lock);
+ mutex_init(&cwdi0.cwdi_lock, MUTEX_DEFAULT, IPL_NONE);
/* Create the limits structures. */
mutex_init(&limit0.pl_lock, MUTEX_DEFAULT, IPL_NONE);
@@ -2594,7 +2595,7 @@ fill_cwd(struct lwp *l, pid_t pid, void
struct proc *p;
char *path;
char *bp, *bend;
- struct cwdinfo *cwdi;
+ const struct cwdinfo *cwdi;
struct vnode *vp;
size_t len, lenused;
@@ -2609,11 +2610,12 @@ fill_cwd(struct lwp *l, pid_t pid, void
bend = bp;
*(--bp) = '\0';
- cwdi = p->p_cwdi;
- rw_enter(&cwdi->cwdi_lock, RW_READER);
+ cwdi = cwdlock(p);
vp = cwdi->cwdi_cdir;
+ vref(vp);
+ cwdunlock(p);
error = getcwd_common(vp, NULL, &bp, path, len/2, 0, l);
- rw_exit(&cwdi->cwdi_lock);
+ vrele(vp);
if (error)
goto out;
Index: src/sys/kern/uipc_usrreq.c
diff -u src/sys/kern/uipc_usrreq.c:1.196 src/sys/kern/uipc_usrreq.c:1.197
--- src/sys/kern/uipc_usrreq.c:1.196 Sat Feb 1 02:23:23 2020
+++ src/sys/kern/uipc_usrreq.c Sun Feb 23 22:14:03 2020
@@ -1,7 +1,7 @@
-/* $NetBSD: uipc_usrreq.c,v 1.196 2020/02/01 02:23:23 riastradh Exp $ */
+/* $NetBSD: uipc_usrreq.c,v 1.197 2020/02/23 22:14:03 ad Exp $ */
/*-
- * Copyright (c) 1998, 2000, 2004, 2008, 2009 The NetBSD Foundation, Inc.
+ * Copyright (c) 1998, 2000, 2004, 2008, 2009, 2020 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -96,7 +96,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.196 2020/02/01 02:23:23 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.197 2020/02/23 22:14:03 ad Exp $");
#ifdef _KERNEL_OPT
#include "opt_compat_netbsd.h"
@@ -1395,6 +1395,7 @@ unp_externalize(struct mbuf *rights, str
{
struct cmsghdr * const cm = mtod(rights, struct cmsghdr *);
struct proc * const p = l->l_proc;
+ struct vnode *rvp = NULL;
file_t **rp;
int error = 0;
@@ -1404,9 +1405,11 @@ unp_externalize(struct mbuf *rights, str
goto noop;
int * const fdp = kmem_alloc(nfds * sizeof(int), KM_SLEEP);
- rw_enter(&p->p_cwdi->cwdi_lock, RW_READER);
+
+ KASSERT(l == curlwp);
/* Make sure the recipient should be able to see the files.. */
+ rvp = cwdrdir();
rp = (file_t **)CMSG_DATA(cm);
for (size_t i = 0; i < nfds; i++) {
file_t * const fp = *rp++;
@@ -1420,16 +1423,15 @@ unp_externalize(struct mbuf *rights, str
* sure it's inside the subtree we're allowed
* to access.
*/
- if (p->p_cwdi->cwdi_rdir != NULL && fp->f_type == DTYPE_VNODE) {
+ if (rvp != NULL && fp->f_type == DTYPE_VNODE) {
vnode_t *vp = fp->f_vnode;
- if ((vp->v_type == VDIR) &&
- !vn_isunder(vp, p->p_cwdi->cwdi_rdir, l)) {
+ if ((vp->v_type == VDIR) && !vn_isunder(vp, rvp, l)) {
error = EPERM;
goto out;
}
}
}
-
+
restart:
/*
* First loop -- allocate file descriptor table slots for the
@@ -1506,7 +1508,6 @@ unp_externalize(struct mbuf *rights, str
cm->cmsg_len = CMSG_LEN(0);
rights->m_len = CMSG_SPACE(0);
}
- rw_exit(&p->p_cwdi->cwdi_lock);
kmem_free(fdp, nfds * sizeof(int));
noop:
@@ -1516,6 +1517,10 @@ unp_externalize(struct mbuf *rights, str
KASSERT(cm->cmsg_len <= rights->m_len);
memset(&mtod(rights, char *)[cm->cmsg_len], 0, rights->m_len -
cm->cmsg_len);
+
+ /* Async release since in the networking code. */
+ if (rvp != NULL)
+ vrele_async(rvp);
return error;
}
Index: src/sys/kern/vfs_cwd.c
diff -u src/sys/kern/vfs_cwd.c:1.4 src/sys/kern/vfs_cwd.c:1.5
--- src/sys/kern/vfs_cwd.c:1.4 Tue Feb 15 15:54:28 2011
+++ src/sys/kern/vfs_cwd.c Sun Feb 23 22:14:03 2020
@@ -1,7 +1,7 @@
-/* $NetBSD: vfs_cwd.c,v 1.4 2011/02/15 15:54:28 pooka Exp $ */
+/* $NetBSD: vfs_cwd.c,v 1.5 2020/02/23 22:14:03 ad Exp $ */
/*-
- * Copyright (c) 2008 The NetBSD Foundation, Inc.
+ * Copyright (c) 2008, 2020 The NetBSD Foundation, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,13 +31,14 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_cwd.c,v 1.4 2011/02/15 15:54:28 pooka Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_cwd.c,v 1.5 2020/02/23 22:14:03 ad Exp $");
#include <sys/param.h>
#include <sys/atomic.h>
#include <sys/filedesc.h>
#include <sys/proc.h>
#include <sys/vnode.h>
+#include <sys/xcall.h>
static int cwdi_ctor(void *, void *, int);
static void cwdi_dtor(void *, void *);
@@ -64,9 +65,8 @@ cwdinit(void)
struct cwdinfo *copy;
cwdi = pool_cache_get(cwdi_cache, PR_WAITOK);
- copy = curproc->p_cwdi;
- rw_enter(©->cwdi_lock, RW_READER);
+ copy = cwdenter(RW_READER);
cwdi->cwdi_cdir = copy->cwdi_cdir;
if (cwdi->cwdi_cdir)
vref(cwdi->cwdi_cdir);
@@ -78,7 +78,7 @@ cwdinit(void)
vref(cwdi->cwdi_edir);
cwdi->cwdi_cmask = copy->cwdi_cmask;
cwdi->cwdi_refcnt = 1;
- rw_exit(©->cwdi_lock);
+ cwdexit(copy);
return (cwdi);
}
@@ -88,7 +88,7 @@ cwdi_ctor(void *arg, void *obj, int flag
{
struct cwdinfo *cwdi = obj;
- rw_init(&cwdi->cwdi_lock);
+ mutex_init(&cwdi->cwdi_lock, MUTEX_DEFAULT, IPL_NONE);
return 0;
}
@@ -98,7 +98,7 @@ cwdi_dtor(void *arg, void *obj)
{
struct cwdinfo *cwdi = obj;
- rw_destroy(&cwdi->cwdi_lock);
+ mutex_destroy(&cwdi->cwdi_lock);
}
/*
@@ -159,3 +159,120 @@ cwdexec(struct proc *p)
vrele(p->p_cwdi->cwdi_edir);
}
}
+
+/*
+ * Used when curlwp wants to use or update its cwdinfo, and needs to prevent
+ * concurrent changes.
+ *
+ * "op" is either RW_READER or RW_WRITER indicating the kind of lock
+ * required. If a read lock on the cwdinfo is requested, then curlwp must
+ * not block while holding the lock, or the cwdinfo could become stale.
+ * It's okay to block while holding a write lock.
+ */
+struct cwdinfo *
+cwdenter(krw_t op)
+{
+ struct cwdinfo *cwdi = curproc->p_cwdi;
+
+ if (__predict_true(op == RW_READER)) {
+ /*
+ * Disable preemption to hold off the writer side's xcall,
+ * then observe the lock. If it's already taken, we need to
+ * join in the melee. Otherwise we're good to go; keeping
+ * the xcall at bay with kpreempt_disable() will prevent any
+ * changes while the caller is pondering the cwdinfo.
+ */
+ kpreempt_disable();
+ if (__predict_true(mutex_owner(&cwdi->cwdi_lock) == NULL)) {
+ membar_consumer();
+ return cwdi;
+ }
+ kpreempt_enable();
+ mutex_enter(&cwdi->cwdi_lock);
+ } else {
+ /*
+ * About to make changes. If there's more than one
+ * reference on the cwdinfo, or curproc has more than one
+ * LWP, then LWPs other than curlwp can also see the
+ * cwdinfo. Run a cross call to get all LWPs out of the
+ * read section.
+ */
+ mutex_enter(&cwdi->cwdi_lock);
+ if (cwdi->cwdi_refcnt + curproc->p_nlwps > 2)
+ xc_barrier(0);
+ }
+ return cwdi;
+}
+
+/*
+ * Release a lock previously taken with cwdenter().
+ */
+void
+cwdexit(struct cwdinfo *cwdi)
+{
+ struct lwp *l = curlwp;
+
+ KASSERT(cwdi == l->l_proc->p_cwdi);
+
+ if (__predict_true(mutex_owner(&cwdi->cwdi_lock) != l))
+ kpreempt_enable();
+ else
+ mutex_exit(&cwdi->cwdi_lock);
+}
+
+/*
+ * Called when there is a need to inspect some other process' cwdinfo. Used
+ * by procfs and sysctl. This gets you a read lock; the cwdinfo must NOT be
+ * changed.
+ */
+const struct cwdinfo *
+cwdlock(struct proc *p)
+{
+ struct cwdinfo *cwdi = p->p_cwdi;
+
+ mutex_enter(&cwdi->cwdi_lock);
+ return cwdi;
+}
+
+/*
+ * Release a lock acquired with cwdlock().
+ */
+void
+cwdunlock(struct proc *p)
+{
+ struct cwdinfo *cwdi = p->p_cwdi;
+
+ mutex_exit(&cwdi->cwdi_lock);
+}
+
+/*
+ * Get a reference to the current working directory and return it.
+ */
+struct vnode *
+cwdcdir(void)
+{
+ struct cwdinfo *cwdi;
+ struct vnode *vp;
+
+ cwdi = cwdenter(RW_READER);
+ if ((vp = cwdi->cwdi_cdir) != NULL)
+ vref(vp);
+ cwdexit(cwdi);
+ return vp;
+}
+
+/*
+ * Get a reference to the root directory and return it.
+ */
+struct vnode *
+cwdrdir(void)
+{
+ struct cwdinfo *cwdi;
+ struct vnode *vp;
+
+ cwdi = cwdenter(RW_READER);
+ if ((vp = cwdi->cwdi_rdir) != NULL)
+ vref(vp);
+ cwdexit(cwdi);
+ return vp;
+}
Index: src/sys/kern/vfs_getcwd.c
diff -u src/sys/kern/vfs_getcwd.c:1.54 src/sys/kern/vfs_getcwd.c:1.55
--- src/sys/kern/vfs_getcwd.c:1.54 Wed Jan 8 12:04:56 2020
+++ src/sys/kern/vfs_getcwd.c Sun Feb 23 22:14:03 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: vfs_getcwd.c,v 1.54 2020/01/08 12:04:56 ad Exp $ */
+/* $NetBSD: vfs_getcwd.c,v 1.55 2020/02/23 22:14:03 ad Exp $ */
/*-
* Copyright (c) 1999 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_getcwd.c,v 1.54 2020/01/08 12:04:56 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_getcwd.c,v 1.55 2020/02/23 22:14:03 ad Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -503,7 +503,7 @@ sys___getcwd(struct lwp *l, const struct
char *bp, *bend;
int len = SCARG(uap, length);
int lenused;
- struct cwdinfo *cwdi;
+ struct vnode *dvp;
if (len > MAXPATHLEN * 4)
len = MAXPATHLEN * 4;
@@ -520,11 +520,10 @@ sys___getcwd(struct lwp *l, const struct
* Since each entry takes up at least 2 bytes in the output buffer,
* limit it to N/2 vnodes for an N byte buffer.
*/
- cwdi = l->l_proc->p_cwdi;
- rw_enter(&cwdi->cwdi_lock, RW_READER);
- error = getcwd_common(cwdi->cwdi_cdir, NULL, &bp, path,
+ dvp = cwdcdir();
+ error = getcwd_common(dvp, NULL, &bp, path,
len/2, GETCWD_CHECK_ACCESS, l);
- rw_exit(&cwdi->cwdi_lock);
+ vrele(dvp);
if (error)
goto out;
Index: src/sys/kern/vfs_lookup.c
diff -u src/sys/kern/vfs_lookup.c:1.213 src/sys/kern/vfs_lookup.c:1.214
--- src/sys/kern/vfs_lookup.c:1.213 Fri Jan 17 20:08:09 2020
+++ src/sys/kern/vfs_lookup.c Sun Feb 23 22:14:03 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: vfs_lookup.c,v 1.213 2020/01/17 20:08:09 ad Exp $ */
+/* $NetBSD: vfs_lookup.c,v 1.214 2020/02/23 22:14:03 ad Exp $ */
/*
* Copyright (c) 1982, 1986, 1989, 1993
@@ -37,7 +37,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.213 2020/01/17 20:08:09 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.214 2020/02/23 22:14:03 ad Exp $");
#ifdef _KERNEL_OPT
#include "opt_magiclinks.h"
@@ -535,7 +535,6 @@ namei_getstartdir(struct namei_state *st
struct nameidata *ndp = state->ndp;
struct componentname *cnp = state->cnp;
struct cwdinfo *cwdi; /* pointer to cwd state */
- struct lwp *self = curlwp; /* thread doing namei() */
struct vnode *rootdir, *erootdir, *curdir, *startdir;
if (state->root_referenced) {
@@ -546,8 +545,8 @@ namei_getstartdir(struct namei_state *st
state->root_referenced = 0;
}
- cwdi = self->l_proc->p_cwdi;
- rw_enter(&cwdi->cwdi_lock, RW_READER);
+ /* NB: must not block while inspecting the cwdinfo. */
+ cwdi = cwdenter(RW_READER);
/* root dir */
if (cwdi->cwdi_rdir == NULL || (cnp->cn_flags & NOCHROOT)) {
@@ -605,7 +604,7 @@ namei_getstartdir(struct namei_state *st
vref(state->ndp->ni_erootdir);
state->root_referenced = 1;
- rw_exit(&cwdi->cwdi_lock);
+ cwdexit(cwdi);
return startdir;
}
Index: src/sys/kern/vfs_mount.c
diff -u src/sys/kern/vfs_mount.c:1.74 src/sys/kern/vfs_mount.c:1.75
--- src/sys/kern/vfs_mount.c:1.74 Fri Jan 17 20:08:09 2020
+++ src/sys/kern/vfs_mount.c Sun Feb 23 22:14:03 2020
@@ -1,7 +1,7 @@
-/* $NetBSD: vfs_mount.c,v 1.74 2020/01/17 20:08:09 ad Exp $ */
+/* $NetBSD: vfs_mount.c,v 1.75 2020/02/23 22:14:03 ad Exp $ */
/*-
- * Copyright (c) 1997-2019 The NetBSD Foundation, Inc.
+ * Copyright (c) 1997-2020 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -67,7 +67,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.74 2020/01/17 20:08:09 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.75 2020/02/23 22:14:03 ad Exp $");
#include <sys/param.h>
#include <sys/kernel.h>
@@ -90,6 +90,7 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,
#include <sys/systm.h>
#include <sys/vfs_syscalls.h>
#include <sys/vnode_impl.h>
+#include <sys/xcall.h>
#include <miscfs/genfs/genfs.h>
#include <miscfs/specfs/specdev.h>
@@ -675,18 +676,23 @@ mount_checkdirs(vnode_t *olddp)
rele2 = NULL;
atomic_inc_uint(&cwdi->cwdi_refcnt);
mutex_exit(proc_lock);
- rw_enter(&cwdi->cwdi_lock, RW_WRITER);
- if (cwdi->cwdi_cdir == olddp) {
- rele1 = cwdi->cwdi_cdir;
- vref(newdp);
- cwdi->cwdi_cdir = newdp;
- }
- if (cwdi->cwdi_rdir == olddp) {
- rele2 = cwdi->cwdi_rdir;
- vref(newdp);
- cwdi->cwdi_rdir = newdp;
+ mutex_enter(&cwdi->cwdi_lock);
+ if (cwdi->cwdi_cdir == olddp ||
+ cwdi->cwdi_rdir == olddp) {
+ /* XXX belongs in vfs_cwd.c, but rump. */
+ xc_barrier(0);
+ if (cwdi->cwdi_cdir == olddp) {
+ rele1 = cwdi->cwdi_cdir;
+ vref(newdp);
+ cwdi->cwdi_cdir = newdp;
+ }
+ if (cwdi->cwdi_rdir == olddp) {
+ rele2 = cwdi->cwdi_rdir;
+ vref(newdp);
+ cwdi->cwdi_rdir = newdp;
+ }
}
- rw_exit(&cwdi->cwdi_lock);
+ mutex_exit(&cwdi->cwdi_lock);
cwdfree(cwdi);
if (rele1 != NULL)
vrele(rele1);
Index: src/sys/kern/vfs_subr.c
diff -u src/sys/kern/vfs_subr.c:1.480 src/sys/kern/vfs_subr.c:1.481
--- src/sys/kern/vfs_subr.c:1.480 Sun Feb 23 15:46:41 2020
+++ src/sys/kern/vfs_subr.c Sun Feb 23 22:14:03 2020
@@ -1,7 +1,7 @@
-/* $NetBSD: vfs_subr.c,v 1.480 2020/02/23 15:46:41 ad Exp $ */
+/* $NetBSD: vfs_subr.c,v 1.481 2020/02/23 22:14:03 ad Exp $ */
/*-
- * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008, 2019
+ * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008, 2019, 2020
* The NetBSD Foundation, Inc.
* All rights reserved.
*
@@ -69,7 +69,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.480 2020/02/23 15:46:41 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.481 2020/02/23 22:14:03 ad Exp $");
#ifdef _KERNEL_OPT
#include "opt_ddb.h"
@@ -1111,7 +1111,7 @@ vprint_common(struct vnode *vp, const ch
vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
(*pr)("%ssize %" PRIx64 " writesize %" PRIx64 " numoutput %d\n",
prefix, vp->v_size, vp->v_writesize, vp->v_numoutput);
- (*pr)("%sdata %p lock %p\n", prefix, vp->v_data, vip->vi_lock);
+ (*pr)("%sdata %p lock %p\n", prefix, vp->v_data, &vip->vi_lock);
(*pr)("%sstate %s key(%p %zd)", prefix, vstate_name(vip->vi_state),
vip->vi_key.vk_mount, vip->vi_key.vk_key_len);
@@ -1215,24 +1215,25 @@ set_statvfs_info(const char *onp, int uk
size_t size;
struct statvfs *sfs = &mp->mnt_stat;
int (*fun)(const void *, void *, size_t, size_t *);
+ struct vnode *rvp;
(void)strlcpy(mp->mnt_stat.f_fstypename, vfsname,
sizeof(mp->mnt_stat.f_fstypename));
if (onp) {
- struct cwdinfo *cwdi = l->l_proc->p_cwdi;
fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
- if (cwdi->cwdi_rdir != NULL) {
+ KASSERT(l == curlwp);
+ rvp = cwdrdir();
+ if (rvp != NULL) {
size_t len;
char *bp;
char *path = PNBUF_GET();
bp = path + MAXPATHLEN;
*--bp = '\0';
- rw_enter(&cwdi->cwdi_lock, RW_READER);
- error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
+ error = getcwd_common(rvp, rootvnode, &bp,
path, MAXPATHLEN / 2, 0, l);
- rw_exit(&cwdi->cwdi_lock);
+ vrele(rvp);
if (error) {
PNBUF_PUT(path);
return error;
@@ -1544,7 +1545,7 @@ vfs_vnode_lock_print(void *vlock, int fu
for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp)) {
TAILQ_FOREACH(vip, &mp->mnt_vnodelist, vi_mntvnodes) {
- if (vip->vi_lock == vlock ||
+ if (&vip->vi_lock == vlock ||
VIMPL_TO_VNODE(vip)->v_interlock == vlock)
vfs_vnode_print(VIMPL_TO_VNODE(vip), full, pr);
}
Index: src/sys/kern/vfs_syscalls.c
diff -u src/sys/kern/vfs_syscalls.c:1.541 src/sys/kern/vfs_syscalls.c:1.542
--- src/sys/kern/vfs_syscalls.c:1.541 Sat Feb 22 08:58:39 2020
+++ src/sys/kern/vfs_syscalls.c Sun Feb 23 22:14:04 2020
@@ -1,7 +1,7 @@
-/* $NetBSD: vfs_syscalls.c,v 1.541 2020/02/22 08:58:39 maxv Exp $ */
+/* $NetBSD: vfs_syscalls.c,v 1.542 2020/02/23 22:14:04 ad Exp $ */
/*-
- * Copyright (c) 2008, 2009, 2019 The NetBSD Foundation, Inc.
+ * Copyright (c) 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -70,7 +70,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.541 2020/02/22 08:58:39 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.542 2020/02/23 22:14:04 ad Exp $");
#ifdef _KERNEL_OPT
#include "opt_fileassoc.h"
@@ -1100,7 +1100,7 @@ int
dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags,
int root)
{
- struct cwdinfo *cwdi = l->l_proc->p_cwdi;
+ struct vnode *rvp;
int error = 0;
/*
@@ -1111,19 +1111,20 @@ dostatvfs(struct mount *mp, struct statv
if (flags == MNT_NOWAIT || flags == MNT_LAZY ||
(flags != MNT_WAIT && flags != 0)) {
memcpy(sp, &mp->mnt_stat, sizeof(*sp));
- goto done;
- }
-
- /* Get the filesystem stats now */
- memset(sp, 0, sizeof(*sp));
- if ((error = VFS_STATVFS(mp, sp)) != 0) {
- return error;
+ rvp = NULL;
+ } else {
+ /* Get the filesystem stats now */
+ memset(sp, 0, sizeof(*sp));
+ if ((error = VFS_STATVFS(mp, sp)) != 0) {
+ return error;
+ }
+ KASSERT(l == curlwp);
+ rvp = cwdrdir();
+ if (rvp == NULL)
+ (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
}
- if (cwdi->cwdi_rdir == NULL)
- (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat));
-done:
- if (cwdi->cwdi_rdir != NULL) {
+ if (rvp != NULL) {
size_t len;
char *bp;
char c;
@@ -1131,12 +1132,11 @@ done:
bp = path + MAXPATHLEN;
*--bp = '\0';
- rw_enter(&cwdi->cwdi_lock, RW_READER);
- error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path,
+ error = getcwd_common(rvp, rootvnode, &bp, path,
MAXPATHLEN / 2, 0, l);
- rw_exit(&cwdi->cwdi_lock);
if (error) {
PNBUF_PUT(path);
+ vrele(rvp);
return error;
}
len = strlen(bp);
@@ -1161,6 +1161,7 @@ done:
}
}
PNBUF_PUT(path);
+ vrele(rvp);
}
sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK;
return error;
@@ -1330,7 +1331,6 @@ sys_fchdir(struct lwp *l, const struct s
/* {
syscallarg(int) fd;
} */
- struct proc *p = l->l_proc;
struct cwdinfo *cwdi;
struct vnode *vp, *tdp;
struct mount *mp;
@@ -1370,8 +1370,7 @@ sys_fchdir(struct lwp *l, const struct s
* Disallow changing to a directory not under the process's
* current root directory (if there is one).
*/
- cwdi = p->p_cwdi;
- rw_enter(&cwdi->cwdi_lock, RW_WRITER);
+ cwdi = cwdenter(RW_WRITER);
if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) {
vrele(vp);
error = EPERM; /* operation not permitted */
@@ -1379,7 +1378,7 @@ sys_fchdir(struct lwp *l, const struct s
vrele(cwdi->cwdi_cdir);
cwdi->cwdi_cdir = vp;
}
- rw_exit(&cwdi->cwdi_lock);
+ cwdexit(cwdi);
out:
fd_putfile(fd);
@@ -1393,7 +1392,6 @@ sys_fchdir(struct lwp *l, const struct s
int
sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval)
{
- struct proc *p = l->l_proc;
struct vnode *vp;
file_t *fp;
int error, fd = SCARG(uap, fd);
@@ -1414,8 +1412,7 @@ sys_fchroot(struct lwp *l, const struct
if (error)
goto out;
vref(vp);
-
- change_root(p->p_cwdi, vp, l);
+ change_root(vp);
out:
fd_putfile(fd);
@@ -1432,19 +1429,19 @@ sys_chdir(struct lwp *l, const struct sy
/* {
syscallarg(const char *) path;
} */
- struct proc *p = l->l_proc;
struct cwdinfo *cwdi;
int error;
- struct vnode *vp;
+ struct vnode *vp, *ovp;
- if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
- &vp, l)) != 0)
+ error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, &vp, l);
+ if (error != 0)
return (error);
- cwdi = p->p_cwdi;
- rw_enter(&cwdi->cwdi_lock, RW_WRITER);
- vrele(cwdi->cwdi_cdir);
+
+ cwdi = cwdenter(RW_WRITER);
+ ovp = cwdi->cwdi_cdir;
cwdi->cwdi_cdir = vp;
- rw_exit(&cwdi->cwdi_lock);
+ cwdexit(cwdi);
+ vrele(ovp);
return (0);
}
@@ -1458,20 +1455,17 @@ sys_chroot(struct lwp *l, const struct s
/* {
syscallarg(const char *) path;
} */
- struct proc *p = l->l_proc;
int error;
struct vnode *vp;
if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT,
KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0)
return (error);
- if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE,
- &vp, l)) != 0)
- return (error);
-
- change_root(p->p_cwdi, vp, l);
- return (0);
+ error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, &vp, l);
+ if (error == 0)
+ change_root(vp);
+ return error;
}
/*
@@ -1479,14 +1473,16 @@ sys_chroot(struct lwp *l, const struct s
* NB: callers need to properly authorize the change root operation.
*/
void
-change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l)
+change_root(struct vnode *vp)
{
- struct proc *p = l->l_proc;
+ struct cwdinfo *cwdi;
kauth_cred_t ncred;
+ struct lwp *l = curlwp;
+ struct proc *p = l->l_proc;
ncred = kauth_cred_alloc();
- rw_enter(&cwdi->cwdi_lock, RW_WRITER);
+ cwdi = cwdenter(RW_WRITER);
if (cwdi->cwdi_rdir != NULL)
vrele(cwdi->cwdi_rdir);
cwdi->cwdi_rdir = vp;
@@ -1505,7 +1501,7 @@ change_root(struct cwdinfo *cwdi, struct
vref(vp);
cwdi->cwdi_cdir = vp;
}
- rw_exit(&cwdi->cwdi_lock);
+ cwdexit(cwdi);
/* Get a write lock on the process credential. */
proc_crmod_enter();
@@ -4676,21 +4672,15 @@ sys_umask(struct lwp *l, const struct sy
/* {
syscallarg(mode_t) newmask;
} */
- struct proc *p = l->l_proc;
- struct cwdinfo *cwdi;
/*
- * cwdi->cwdi_cmask will be read unlocked elsewhere. What's
- * important is that we serialize changes to the mask. The
- * rw_exit() will issue a write memory barrier on our behalf,
- * and force the changes out to other CPUs (as it must use an
- * atomic operation, draining the local CPU's store buffers).
- */
- cwdi = p->p_cwdi;
- rw_enter(&cwdi->cwdi_lock, RW_WRITER);
- *retval = cwdi->cwdi_cmask;
- cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS;
- rw_exit(&cwdi->cwdi_lock);
+ * cwdi->cwdi_cmask will be read unlocked elsewhere, and no kind of
+ * serialization with those reads is required. It's important to
+ * return a coherent answer for the caller of umask() though, and
+ * the atomic operation accomplishes that.
+ */
+ *retval = atomic_swap_uint(&curproc->p_cwdi->cwdi_cmask,
+ SCARG(uap, newmask) & ALLPERMS);
return (0);
}
Index: src/sys/kern/vfs_vnode.c
diff -u src/sys/kern/vfs_vnode.c:1.111 src/sys/kern/vfs_vnode.c:1.112
--- src/sys/kern/vfs_vnode.c:1.111 Sun Feb 23 15:46:41 2020
+++ src/sys/kern/vfs_vnode.c Sun Feb 23 22:14:04 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: vfs_vnode.c,v 1.111 2020/02/23 15:46:41 ad Exp $ */
+/* $NetBSD: vfs_vnode.c,v 1.112 2020/02/23 22:14:04 ad Exp $ */
/*-
* Copyright (c) 1997-2011, 2019, 2020 The NetBSD Foundation, Inc.
@@ -143,10 +143,19 @@
* as vput(9), routines. Common points holding references are e.g.
* file openings, current working directory, mount points, etc.
*
+ * Note on v_usecount and its locking
+ *
+ * At nearly all points it is known that v_usecount could be zero,
+ * the vnode_t::v_interlock will be held. To change the count away
+ * from zero, the interlock must be held. To change from a non-zero
+ * value to zero, again the interlock must be held.
+ *
+ * Changing the usecount from a non-zero value to a non-zero value can
+ * safely be done using atomic operations, without the interlock held.
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.111 2020/02/23 15:46:41 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.112 2020/02/23 22:14:04 ad Exp $");
#ifdef _KERNEL_OPT
#include "opt_pax.h"
@@ -677,6 +686,27 @@ vdrain_thread(void *cookie)
}
/*
+ * Try to drop reference on a vnode. Abort if we are releasing the
+ * last reference. Note: this _must_ succeed if not the last reference.
+ */
+static bool
+vtryrele(vnode_t *vp)
+{
+ u_int use, next;
+
+ for (use = atomic_load_relaxed(&vp->v_usecount);; use = next) {
+ if (__predict_false(use == 1)) {
+ return false;
+ }
+ KASSERT(use > 1);
+ next = atomic_cas_uint(&vp->v_usecount, use, use - 1);
+ if (__predict_true(next == use)) {
+ return true;
+ }
+ }
+}
+
+/*
* vput: unlock and release the reference.
*/
void
@@ -684,7 +714,20 @@ vput(vnode_t *vp)
{
int lktype;
- if ((vp->v_vflag & VV_LOCKSWORK) == 0) {
+ /*
+ * Do an unlocked check of v_usecount. If it looks like we're not
+ * about to drop the last reference, then unlock the vnode and try
+ * to drop the reference. If it ends up being the last reference
+ * after all, vrelel() can fix it all up. Most of the time this
+ * will all go to plan.
+ */
+ if (atomic_load_relaxed(&vp->v_usecount) > 1) {
+ VOP_UNLOCK(vp);
+ if (vtryrele(vp)) {
+ return;
+ }
+ lktype = LK_NONE;
+ } else if ((vp->v_vflag & VV_LOCKSWORK) == 0) {
lktype = LK_EXCLUSIVE;
} else {
lktype = VOP_ISLOCKED(vp);
@@ -713,14 +756,15 @@ vrelel(vnode_t *vp, int flags, int lktyp
}
/*
- * If not the last reference, just drop the reference count
- * and unlock.
+ * If not the last reference, just drop the reference count and
+ * unlock. VOP_UNLOCK() is called here without a vnode reference
+ * held, but is ok as the hold of v_interlock will stop the vnode
+ * from disappearing.
*/
- if (vp->v_usecount > 1) {
+ if (vtryrele(vp)) {
if (lktype != LK_NONE) {
VOP_UNLOCK(vp);
}
- vp->v_usecount--;
mutex_exit(vp->v_interlock);
return;
}
@@ -802,8 +846,7 @@ vrelel(vnode_t *vp, int flags, int lktyp
mutex_enter(vp->v_interlock);
VSTATE_CHANGE(vp, VS_BLOCKED, VS_LOADED);
if (!recycle) {
- if (vp->v_usecount > 1) {
- vp->v_usecount--;
+ if (vtryrele(vp)) {
mutex_exit(vp->v_interlock);
return;
}
@@ -834,8 +877,7 @@ vrelel(vnode_t *vp, int flags, int lktyp
KASSERT(vp->v_usecount > 0);
}
- vp->v_usecount--;
- if (vp->v_usecount != 0) {
+ if (atomic_dec_uint_nv(&vp->v_usecount) != 0) {
/* Gained another reference while being reclaimed. */
mutex_exit(vp->v_interlock);
return;
@@ -862,6 +904,9 @@ void
vrele(vnode_t *vp)
{
+ if (vtryrele(vp)) {
+ return;
+ }
mutex_enter(vp->v_interlock);
vrelel(vp, 0, LK_NONE);
}
@@ -873,6 +918,9 @@ void
vrele_async(vnode_t *vp)
{
+ if (vtryrele(vp)) {
+ return;
+ }
mutex_enter(vp->v_interlock);
vrelel(vp, VRELEL_ASYNC, LK_NONE);
}
@@ -880,16 +928,16 @@ vrele_async(vnode_t *vp)
/*
* Vnode reference, where a reference is already held by some other
* object (for example, a file structure).
+ *
+ * NB: we have lockless code sequences that rely on this not blocking.
*/
void
vref(vnode_t *vp)
{
- KASSERT(vp->v_usecount != 0);
+ KASSERT(atomic_load_relaxed(&vp->v_usecount) != 0);
- mutex_enter(vp->v_interlock);
- vp->v_usecount++;
- mutex_exit(vp->v_interlock);
+ atomic_inc_uint(&vp->v_usecount);
}
/*
@@ -907,6 +955,18 @@ vholdl(vnode_t *vp)
}
/*
+ * Page or buffer structure gets a reference.
+ */
+void
+vhold(vnode_t *vp)
+{
+
+ mutex_enter(vp->v_interlock);
+ vholdl(vp);
+ mutex_exit(vp->v_interlock);
+}
+
+/*
* Page or buffer structure frees a reference.
* Called with v_interlock held.
*/
@@ -926,6 +986,18 @@ holdrelel(vnode_t *vp)
}
/*
+ * Page or buffer structure frees a reference.
+ */
+void
+holdrele(vnode_t *vp)
+{
+
+ mutex_enter(vp->v_interlock);
+ holdrelel(vp);
+ mutex_exit(vp->v_interlock);
+}
+
+/*
* Recycle an unused vnode if caller holds the last reference.
*/
bool
@@ -1027,7 +1099,7 @@ vrevoke(vnode_t *vp)
if (VSTATE_GET(vp) == VS_RECLAIMED) {
mutex_exit(vp->v_interlock);
} else if (vp->v_type != VBLK && vp->v_type != VCHR) {
- vp->v_usecount++;
+ atomic_inc_uint(&vp->v_usecount);
mutex_exit(vp->v_interlock);
vgone(vp);
} else {
@@ -1082,8 +1154,8 @@ static void
vcache_init(void)
{
- vcache_pool = pool_cache_init(sizeof(vnode_impl_t), 0, 0, 0,
- "vcachepl", NULL, IPL_NONE, NULL, NULL, NULL);
+ vcache_pool = pool_cache_init(sizeof(vnode_impl_t), coherency_unit,
+ 0, 0, "vcachepl", NULL, IPL_NONE, NULL, NULL, NULL);
KASSERT(vcache_pool != NULL);
mutex_init(&vcache_lock, MUTEX_DEFAULT, IPL_NONE);
cv_init(&vcache_cv, "vcache");
@@ -1154,7 +1226,7 @@ vcache_alloc(void)
vp = VIMPL_TO_VNODE(vip);
memset(vip, 0, sizeof(*vip));
- vip->vi_lock = rw_obj_alloc();
+ rw_init(&vip->vi_lock);
vp->v_interlock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
/* SLIST_INIT(&vip->vi_hash); */
@@ -1220,7 +1292,7 @@ vcache_free(vnode_impl_t *vip)
spec_node_destroy(vp);
mutex_obj_free(vp->v_interlock);
- rw_obj_free(vip->vi_lock);
+ rw_destroy(&vip->vi_lock);
uvm_obj_destroy(&vp->v_uobj, true);
cv_destroy(&vp->v_cv);
pool_cache_put(vcache_pool, vip);
@@ -1244,8 +1316,10 @@ vcache_tryvget(vnode_t *vp)
error = ENOENT;
else if (__predict_false(VSTATE_GET(vp) != VS_LOADED))
error = EBUSY;
+ else if (vp->v_usecount == 0)
+ vp->v_usecount = 1;
else
- vp->v_usecount++;
+ atomic_inc_uint(&vp->v_usecount);
mutex_exit(vp->v_interlock);
@@ -1279,7 +1353,10 @@ vcache_vget(vnode_t *vp)
return ENOENT;
}
VSTATE_ASSERT(vp, VS_LOADED);
- vp->v_usecount++;
+ if (vp->v_usecount == 0)
+ vp->v_usecount = 1;
+ else
+ atomic_inc_uint(&vp->v_usecount);
mutex_exit(vp->v_interlock);
return 0;
Index: src/sys/kern/vnode_if.sh
diff -u src/sys/kern/vnode_if.sh:1.68 src/sys/kern/vnode_if.sh:1.69
--- src/sys/kern/vnode_if.sh:1.68 Sun Dec 1 13:56:29 2019
+++ src/sys/kern/vnode_if.sh Sun Feb 23 22:14:04 2020
@@ -29,7 +29,7 @@ copyright="\
* SUCH DAMAGE.
*/
"
-SCRIPT_ID='$NetBSD: vnode_if.sh,v 1.68 2019/12/01 13:56:29 ad Exp $'
+SCRIPT_ID='$NetBSD: vnode_if.sh,v 1.69 2020/02/23 22:14:04 ad Exp $'
# Script to produce VFS front-end sugar.
#
@@ -481,7 +481,7 @@ function bodynorm() {
}
if (fstrans == "LOCK")
printf("\terror = vop_pre(%s, &mp, &mpsafe, %s);\n",
- argname[0], "(flags & (LK_UPGRADE|LK_DOWNGRADE) ? FST_NO : (flags & LK_NOWAIT ? FST_TRY : FST_YES))");
+ argname[0], "(!(flags & (LK_SHARED|LK_EXCLUSIVE)) ? FST_NO : (flags & LK_NOWAIT ? FST_TRY : FST_YES))");
else if (fstrans == "UNLOCK")
printf("\terror = vop_pre(%s, &mp, &mpsafe, FST_%s);\n",
argname[0], "NO");
Index: src/sys/miscfs/genfs/genfs_vnops.c
diff -u src/sys/miscfs/genfs/genfs_vnops.c:1.201 src/sys/miscfs/genfs/genfs_vnops.c:1.202
--- src/sys/miscfs/genfs/genfs_vnops.c:1.201 Sun Feb 23 15:46:41 2020
+++ src/sys/miscfs/genfs/genfs_vnops.c Sun Feb 23 22:14:04 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: genfs_vnops.c,v 1.201 2020/02/23 15:46:41 ad Exp $ */
+/* $NetBSD: genfs_vnops.c,v 1.202 2020/02/23 22:14:04 ad Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -57,7 +57,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.201 2020/02/23 15:46:41 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: genfs_vnops.c,v 1.202 2020/02/23 22:14:04 ad Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -293,21 +293,19 @@ genfs_deadlock(void *v)
return ENOENT;
if (ISSET(flags, LK_DOWNGRADE)) {
- rw_downgrade(vip->vi_lock);
+ rw_downgrade(&vip->vi_lock);
} else if (ISSET(flags, LK_UPGRADE)) {
- if (!rw_tryupgrade(vip->vi_lock)) {
- if (ISSET(flags, LK_NOWAIT))
- return EBUSY;
- rw_exit(vip->vi_lock);
- rw_enter(vip->vi_lock, RW_WRITER);
+ KASSERT(ISSET(flags, LK_NOWAIT));
+ if (!rw_tryupgrade(&vip->vi_lock)) {
+ return EBUSY;
}
- } else {
+ } else if ((flags & (LK_EXCLUSIVE | LK_SHARED)) != 0) {
op = (ISSET(flags, LK_EXCLUSIVE) ? RW_WRITER : RW_READER);
if (ISSET(flags, LK_NOWAIT)) {
- if (!rw_tryenter(vip->vi_lock, op))
+ if (!rw_tryenter(&vip->vi_lock, op))
return EBUSY;
} else {
- rw_enter(vip->vi_lock, op);
+ rw_enter(&vip->vi_lock, op);
}
}
VSTATE_ASSERT_UNLOCKED(vp, VS_RECLAIMED);
@@ -326,7 +324,7 @@ genfs_deadunlock(void *v)
vnode_t *vp = ap->a_vp;
vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
- rw_exit(vip->vi_lock);
+ rw_exit(&vip->vi_lock);
return 0;
}
@@ -347,21 +345,19 @@ genfs_lock(void *v)
krw_t op;
if (ISSET(flags, LK_DOWNGRADE)) {
- rw_downgrade(vip->vi_lock);
+ rw_downgrade(&vip->vi_lock);
} else if (ISSET(flags, LK_UPGRADE)) {
- if (!rw_tryupgrade(vip->vi_lock)) {
- if (ISSET(flags, LK_NOWAIT))
- return EBUSY;
- rw_exit(vip->vi_lock);
- rw_enter(vip->vi_lock, RW_WRITER);
+ KASSERT(ISSET(flags, LK_NOWAIT));
+ if (!rw_tryupgrade(&vip->vi_lock)) {
+ return EBUSY;
}
- } else {
+ } else if ((flags & (LK_EXCLUSIVE | LK_SHARED)) != 0) {
op = (ISSET(flags, LK_EXCLUSIVE) ? RW_WRITER : RW_READER);
if (ISSET(flags, LK_NOWAIT)) {
- if (!rw_tryenter(vip->vi_lock, op))
+ if (!rw_tryenter(&vip->vi_lock, op))
return EBUSY;
} else {
- rw_enter(vip->vi_lock, op);
+ rw_enter(&vip->vi_lock, op);
}
}
VSTATE_ASSERT_UNLOCKED(vp, VS_ACTIVE);
@@ -380,7 +376,7 @@ genfs_unlock(void *v)
vnode_t *vp = ap->a_vp;
vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
- rw_exit(vip->vi_lock);
+ rw_exit(&vip->vi_lock);
return 0;
}
@@ -397,10 +393,10 @@ genfs_islocked(void *v)
vnode_t *vp = ap->a_vp;
vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
- if (rw_write_held(vip->vi_lock))
+ if (rw_write_held(&vip->vi_lock))
return LK_EXCLUSIVE;
- if (rw_read_held(vip->vi_lock))
+ if (rw_read_held(&vip->vi_lock))
return LK_SHARED;
return 0;
Index: src/sys/miscfs/procfs/procfs_vnops.c
diff -u src/sys/miscfs/procfs/procfs_vnops.c:1.208 src/sys/miscfs/procfs/procfs_vnops.c:1.209
--- src/sys/miscfs/procfs/procfs_vnops.c:1.208 Sat Feb 1 02:23:04 2020
+++ src/sys/miscfs/procfs/procfs_vnops.c Sun Feb 23 22:14:04 2020
@@ -1,7 +1,7 @@
-/* $NetBSD: procfs_vnops.c,v 1.208 2020/02/01 02:23:04 riastradh Exp $ */
+/* $NetBSD: procfs_vnops.c,v 1.209 2020/02/23 22:14:04 ad Exp $ */
/*-
- * Copyright (c) 2006, 2007, 2008 The NetBSD Foundation, Inc.
+ * Copyright (c) 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
@@ -105,7 +105,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.208 2020/02/01 02:23:04 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.209 2020/02/23 22:14:04 ad Exp $");
#include <sys/param.h>
#include <sys/atomic.h>
@@ -558,7 +558,7 @@ static void
procfs_dir(pfstype t, struct lwp *caller, struct proc *target, char **bpp,
char *path, size_t len)
{
- struct cwdinfo *cwdi;
+ const struct cwdinfo *cwdi;
struct vnode *vp, *rvp;
char *bp;
@@ -567,26 +567,25 @@ procfs_dir(pfstype t, struct lwp *caller
* we are interested in to prevent it from disappearing
* before getcwd_common() below.
*/
- rw_enter(&target->p_cwdi->cwdi_lock, RW_READER);
+ cwdi = cwdlock(target);
switch (t) {
case PFScwd:
- vp = target->p_cwdi->cwdi_cdir;
+ vp = cwdi->cwdi_cdir;
break;
case PFSchroot:
- vp = target->p_cwdi->cwdi_rdir;
+ vp = cwdi->cwdi_rdir;
break;
default:
- rw_exit(&target->p_cwdi->cwdi_lock);
+ cwdunlock(target);
return;
}
if (vp != NULL)
vref(vp);
- rw_exit(&target->p_cwdi->cwdi_lock);
+ cwdunlock(target);
- cwdi = caller->l_proc->p_cwdi;
- rw_enter(&cwdi->cwdi_lock, RW_READER);
+ KASSERT(caller == curlwp);
- rvp = cwdi->cwdi_rdir;
+ rvp = cwdrdir();
bp = bpp ? *bpp : NULL;
/*
@@ -599,12 +598,15 @@ procfs_dir(pfstype t, struct lwp *caller
*bpp = bp;
}
vrele(vp);
- rw_exit(&cwdi->cwdi_lock);
+ if (rvp != NULL)
+ vrele(rvp);
return;
}
- if (rvp == NULL)
+ if (rvp == NULL) {
rvp = rootvnode;
+ vref(rvp);
+ }
if (vp == NULL || getcwd_common(vp, rvp, bp ? &bp : NULL, path,
len / 2, 0, caller) != 0) {
if (bpp) {
@@ -618,7 +620,8 @@ procfs_dir(pfstype t, struct lwp *caller
if (vp != NULL)
vrele(vp);
- rw_exit(&cwdi->cwdi_lock);
+ if (rvp != NULL)
+ vrele(rvp);
}
/*
@@ -1647,7 +1650,7 @@ procfs_readlink(void *v)
len = strlen(bp);
} else {
file_t *fp;
- struct vnode *vxp, *vp;
+ struct vnode *vxp, *rvp;
if ((error = procfs_proc_lock(pfs->pfs_pid, &pown, ESRCH)) != 0)
return error;
@@ -1680,14 +1683,13 @@ procfs_readlink(void *v)
if (vxp->v_tag == VT_PROCFS) {
*--bp = '/';
} else {
- rw_enter(&curproc->p_cwdi->cwdi_lock,
- RW_READER);
- vp = curproc->p_cwdi->cwdi_rdir;
- if (vp == NULL)
- vp = rootvnode;
- error = getcwd_common(vxp, vp, &bp, path,
+ if ((rvp = cwdrdir()) == NULL) {
+ rvp = rootvnode;
+ vref(rvp);
+ }
+ error = getcwd_common(vxp, rvp, &bp, path,
MAXPATHLEN / 2, 0, curlwp);
- rw_exit(&curproc->p_cwdi->cwdi_lock);
+ vrele(rvp);
}
if (error)
break;
Index: src/sys/sys/filedesc.h
diff -u src/sys/sys/filedesc.h:1.65 src/sys/sys/filedesc.h:1.66
--- src/sys/sys/filedesc.h:1.65 Sun Oct 6 07:15:34 2019
+++ src/sys/sys/filedesc.h Sun Feb 23 22:14:04 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: filedesc.h,v 1.65 2019/10/06 07:15:34 mlelstv Exp $ */
+/* $NetBSD: filedesc.h,v 1.66 2020/02/23 22:14:04 ad Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -168,8 +168,8 @@ typedef struct cwdinfo {
struct vnode *cwdi_cdir; /* current directory */
struct vnode *cwdi_rdir; /* root directory */
struct vnode *cwdi_edir; /* emulation root (if known) */
- krwlock_t cwdi_lock; /* lock on entire struct */
- u_short cwdi_cmask; /* mask for file creation */
+ kmutex_t cwdi_lock; /* lock on entire struct */
+ u_int cwdi_cmask; /* mask for file creation */
u_int cwdi_refcnt; /* reference count */
} cwdinfo_t;
@@ -215,11 +215,17 @@ int pipe1(struct lwp *, int *, int);
int dodup(struct lwp *, int, int, int, register_t *);
void cwd_sys_init(void);
-struct cwdinfo *cwdinit(void);
+struct cwdinfo *cwdinit(void);
void cwdshare(proc_t *);
void cwdunshare(proc_t *);
void cwdfree(struct cwdinfo *);
void cwdexec(struct proc *);
+struct cwdinfo *cwdenter(krw_t);
+void cwdexit(struct cwdinfo *);
+const struct cwdinfo *cwdlock(struct proc *);
+void cwdunlock(struct proc *);
+struct vnode *cwdcdir(void);
+struct vnode *cwdrdir(void);
#define GETCWD_CHECK_ACCESS 0x0001
int getcwd_common(struct vnode *, struct vnode *, char **, char *, int,
Index: src/sys/sys/vfs_syscalls.h
diff -u src/sys/sys/vfs_syscalls.h:1.26 src/sys/sys/vfs_syscalls.h:1.27
--- src/sys/sys/vfs_syscalls.h:1.26 Thu Sep 26 01:34:16 2019
+++ src/sys/sys/vfs_syscalls.h Sun Feb 23 22:14:04 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: vfs_syscalls.h,v 1.26 2019/09/26 01:34:16 christos Exp $ */
+/* $NetBSD: vfs_syscalls.h,v 1.27 2020/02/23 22:14:04 ad Exp $ */
/*
* Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc.
@@ -82,7 +82,7 @@ int do_sys_quotactl(const char *, const
void do_sys_sync(struct lwp *);
int chdir_lookup(const char *, int, struct vnode **, struct lwp *);
-void change_root(struct cwdinfo *, struct vnode *, struct lwp *);
+void change_root(struct vnode *);
extern const char *const mountcompatnames[];
extern const u_int nmountcompatnames;
Index: src/sys/sys/vnode.h
diff -u src/sys/sys/vnode.h:1.288 src/sys/sys/vnode.h:1.289
--- src/sys/sys/vnode.h:1.288 Sun Feb 23 15:46:42 2020
+++ src/sys/sys/vnode.h Sun Feb 23 22:14:04 2020
@@ -1,4 +1,4 @@
-/* $NetBSD: vnode.h,v 1.288 2020/02/23 15:46:42 ad Exp $ */
+/* $NetBSD: vnode.h,v 1.289 2020/02/23 22:14:04 ad Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
@@ -138,22 +138,36 @@ LIST_HEAD(buflists, buf);
* it from v_data.
*/
struct vnode {
- struct uvm_object v_uobj; /* i the VM object */
- kmutex_t *v_interlock; /* - vnode interlock */
- kcondvar_t v_cv; /* i synchronization */
+ /*
+ * VM system related items.
+ */
+ struct uvm_object v_uobj; /* u the VM object */
voff_t v_size; /* i+u size of file */
voff_t v_writesize; /* i+u new size after write */
- int v_iflag; /* i VI_* flags */
- int v_vflag; /* v VV_* flags */
+
+ /*
+ * Unstable items get their own cache line.
+ * On _LP64 this fills the space nicely.
+ */
+ kcondvar_t v_cv /* i synchronization */
+ __aligned(COHERENCY_UNIT);
+ int v_iflag; /* i+u VI_* flags */
int v_uflag; /* k VU_* flags */
int v_usecount; /* i reference count */
int v_numoutput; /* i # of pending writes */
int v_writecount; /* i ref count of writers */
int v_holdcnt; /* i page & buffer refs */
- struct mount *v_mount; /* v ptr to vfs we are in */
- int (**v_op)(void *); /* - vnode operations vector */
struct buflists v_cleanblkhd; /* i+b clean blocklist head */
struct buflists v_dirtyblkhd; /* i+b dirty blocklist head */
+
+ /*
+ * The remaining items are largely stable.
+ */
+ int v_vflag /* v VV_* flags */
+ __aligned(COHERENCY_UNIT);
+ kmutex_t *v_interlock; /* - vnode interlock */
+ struct mount *v_mount; /* v ptr to vfs we are in */
+ int (**v_op)(void *); /* : vnode operations vector */
union {
struct mount *vu_mountedhere;/* v ptr to vfs (VDIR) */
struct socket *vu_socket; /* v unix ipc (VSOCK) */
@@ -327,36 +341,11 @@ extern const int vttoif_tab[];
#define VDEAD_NOWAIT 0x0001 /* vdead_check: do not sleep */
void holdrelel(struct vnode *);
+void holdrele(struct vnode *);
void vholdl(struct vnode *);
+void vhold(struct vnode *);
void vref(struct vnode *);
-static __inline void holdrele(struct vnode *) __unused;
-static __inline void vhold(struct vnode *) __unused;
-
-/*
- * decrease buf or page ref
- */
-static __inline void
-holdrele(struct vnode *vp)
-{
-
- mutex_enter(vp->v_interlock);
- holdrelel(vp);
- mutex_exit(vp->v_interlock);
-}
-
-/*
- * increase buf or page ref
- */
-static __inline void
-vhold(struct vnode *vp)
-{
-
- mutex_enter(vp->v_interlock);
- vholdl(vp);
- mutex_exit(vp->v_interlock);
-}
-
#define NULLVP ((struct vnode *)NULL)
static __inline void
Index: src/sys/sys/vnode_impl.h
diff -u src/sys/sys/vnode_impl.h:1.20 src/sys/sys/vnode_impl.h:1.21
--- src/sys/sys/vnode_impl.h:1.20 Wed Jan 8 12:04:56 2020
+++ src/sys/sys/vnode_impl.h Sun Feb 23 22:14:04 2020
@@ -1,7 +1,7 @@
-/* $NetBSD: vnode_impl.h,v 1.20 2020/01/08 12:04:56 ad Exp $ */
+/* $NetBSD: vnode_impl.h,v 1.21 2020/02/23 22:14:04 ad Exp $ */
/*-
- * Copyright (c) 2016, 2019 The NetBSD Foundation, Inc.
+ * Copyright (c) 2016, 2019, 2020 The NetBSD Foundation, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -32,6 +32,7 @@
#include <sys/vnode.h>
struct namecache;
+struct nchnode;
enum vnode_state {
VS_ACTIVE, /* Assert only, fs node attached and usecount > 0. */
@@ -56,27 +57,53 @@ struct vcache_key {
* Reading or writing any of these items requires holding the appropriate
* lock. Field markings and the corresponding locks:
*
+ * - stable throughout the life of the vnode
* c vcache_lock
* d vdrain_lock
* i v_interlock
+ * l vi_nc_listlock
* m mnt_vnodelock
* n namecache_lock
* s syncer_data_lock
*/
struct vnode_impl {
struct vnode vi_vnode;
- enum vnode_state vi_state; /* i: current state */
- struct vnodelst *vi_lrulisthd; /* d: current lru list head */
- TAILQ_ENTRY(vnode_impl) vi_lrulist; /* d: lru list */
- LIST_HEAD(, namecache) vi_dnclist; /* n: namecaches (children) */
+
+ /*
+ * Largely stable data.
+ */
+ struct vcache_key vi_key; /* c vnode cache key */
+
+ /*
+ * Namecache. Give it a separate line so activity doesn't impinge
+ * on the stable stuff (pending merge of ad-namecache branch).
+ */
+ LIST_HEAD(, namecache) vi_dnclist /* n: namecaches (children) */
+ __aligned(COHERENCY_UNIT);
TAILQ_HEAD(, namecache) vi_nclist; /* n: namecaches (parent) */
- int vi_synclist_slot; /* s: synclist slot index */
- int vi_lrulisttm; /* i: time of lru enqueue */
- TAILQ_ENTRY(vnode_impl) vi_synclist; /* s: vnodes with dirty bufs */
- TAILQ_ENTRY(vnode_impl) vi_mntvnodes; /* m: vnodes for mount point */
- SLIST_ENTRY(vnode_impl) vi_hash; /* c: vnode cache list */
- krwlock_t *vi_lock; /* -: lock for this vnode */
- struct vcache_key vi_key; /* c: vnode cache key */
+
+ /*
+ * vnode cache, LRU and syncer. This all changes with some
+ * regularity so keep it together.
+ */
+ struct vnodelst *vi_lrulisthd /* d current lru list head */
+ __aligned(COHERENCY_UNIT);
+ TAILQ_ENTRY(vnode_impl) vi_lrulist; /* d lru list */
+ int vi_synclist_slot; /* s synclist slot index */
+ int vi_lrulisttm; /* i time of lru enqueue */
+ TAILQ_ENTRY(vnode_impl) vi_synclist; /* s vnodes with dirty bufs */
+ SLIST_ENTRY(vnode_impl) vi_hash; /* c vnode cache list */
+ enum vnode_state vi_state; /* i current state */
+
+ /*
+ * Locks and expensive to access items which can be expected to
+ * generate a cache miss.
+ */
+ krwlock_t vi_lock /* - lock for this vnode */
+ __aligned(COHERENCY_UNIT);
+ krwlock_t vi_nc_lock; /* - lock on node */
+ krwlock_t vi_nc_listlock; /* - lock on nn_list */
+ TAILQ_ENTRY(vnode_impl) vi_mntvnodes; /* m vnodes for mount point */
};
typedef struct vnode_impl vnode_impl_t;