Module Name: src Committed By: ad Date: Sat Jan 25 15:54:04 UTC 2020
Modified Files: src/sys/compat/netbsd32 [ad-namecache]: netbsd32_fs.c src/sys/kern [ad-namecache]: kern_exec.c kern_proc.c uipc_usrreq.c vfs_cwd.c vfs_getcwd.c vfs_lookup.c vfs_mount.c vfs_subr.c vfs_syscalls.c vfs_vnode.c src/sys/miscfs/procfs [ad-namecache]: procfs_vnops.c src/sys/sys [ad-namecache]: filedesc.h vfs_syscalls.h Log Message: Make cwdinfo use mostly lockless, and largely hide the details in vfs_cwd.c. To generate a diff of this commit: cvs rdiff -u -r1.85 -r1.85.2.1 src/sys/compat/netbsd32/netbsd32_fs.c cvs rdiff -u -r1.485.2.1 -r1.485.2.2 src/sys/kern/kern_exec.c cvs rdiff -u -r1.239 -r1.239.2.1 src/sys/kern/kern_proc.c cvs rdiff -u -r1.194 -r1.194.4.1 src/sys/kern/uipc_usrreq.c cvs rdiff -u -r1.4 -r1.4.62.1 src/sys/kern/vfs_cwd.c cvs rdiff -u -r1.53.2.3 -r1.53.2.4 src/sys/kern/vfs_getcwd.c cvs rdiff -u -r1.212.4.8 -r1.212.4.9 src/sys/kern/vfs_lookup.c cvs rdiff -u -r1.73.2.1 -r1.73.2.2 src/sys/kern/vfs_mount.c cvs rdiff -u -r1.478.2.2 -r1.478.2.3 src/sys/kern/vfs_subr.c cvs rdiff -u -r1.539.2.2 -r1.539.2.3 src/sys/kern/vfs_syscalls.c cvs rdiff -u -r1.105.2.5 -r1.105.2.6 src/sys/kern/vfs_vnode.c cvs rdiff -u -r1.207 -r1.207.2.1 src/sys/miscfs/procfs/procfs_vnops.c cvs rdiff -u -r1.65 -r1.65.2.1 src/sys/sys/filedesc.h cvs rdiff -u -r1.26 -r1.26.2.1 src/sys/sys/vfs_syscalls.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/compat/netbsd32/netbsd32_fs.c diff -u src/sys/compat/netbsd32/netbsd32_fs.c:1.85 src/sys/compat/netbsd32/netbsd32_fs.c:1.85.2.1 --- src/sys/compat/netbsd32/netbsd32_fs.c:1.85 Thu Sep 26 01:32:09 2019 +++ src/sys/compat/netbsd32/netbsd32_fs.c Sat Jan 25 15:54:03 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: netbsd32_fs.c,v 1.85 2019/09/26 01:32:09 christos Exp $ */ +/* $NetBSD: netbsd32_fs.c,v 1.85.2.1 2020/01/25 15:54:03 ad Exp $ */ /* * Copyright (c) 1998, 2001 Matthew R. Green @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: netbsd32_fs.c,v 1.85 2019/09/26 01:32:09 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: netbsd32_fs.c,v 1.85.2.1 2020/01/25 15:54:03 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -740,13 +740,12 @@ netbsd32___getcwd(struct lwp *l, const s syscallarg(char *) bufp; syscallarg(size_t) length; } */ - struct proc *p = l->l_proc; int error; char *path; char *bp, *bend; int len = (int)SCARG(uap, length); int lenused; - struct cwdinfo *cwdi; + struct vnode *dvp; if (len > MAXPATHLEN*4) len = MAXPATHLEN*4; @@ -764,11 +763,10 @@ netbsd32___getcwd(struct lwp *l, const s * limit it to N/2 vnodes for an N byte buffer. */ #define GETCWD_CHECK_ACCESS 0x0001 - cwdi = p->p_cwdi; - rw_enter(&cwdi->cwdi_lock, RW_READER); - error = getcwd_common (cwdi->cwdi_cdir, NULL, &bp, path, len/2, + dvp = cwdcdir(); + error = getcwd_common (dvp, NULL, &bp, path, len/2, GETCWD_CHECK_ACCESS, l); - rw_exit(&cwdi->cwdi_lock); + vrele(dvp); if (error) goto out; Index: src/sys/kern/kern_exec.c diff -u src/sys/kern/kern_exec.c:1.485.2.1 src/sys/kern/kern_exec.c:1.485.2.2 --- src/sys/kern/kern_exec.c:1.485.2.1 Fri Jan 17 21:47:35 2020 +++ src/sys/kern/kern_exec.c Sat Jan 25 15:54:03 2020 @@ -1,7 +1,7 @@ -/* $NetBSD: kern_exec.c,v 1.485.2.1 2020/01/17 21:47:35 ad Exp $ */ +/* $NetBSD: kern_exec.c,v 1.485.2.2 2020/01/25 15:54:03 ad Exp $ */ /*- - * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc. + * Copyright (c) 2008, 2019, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -62,7 +62,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.485.2.1 2020/01/17 21:47:35 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.485.2.2 2020/01/25 15:54:03 ad Exp $"); #include "opt_exec.h" #include "opt_execfmt.h" @@ -664,7 +664,7 @@ exec_makepathbuf(struct lwp *l, const ch char *path, *bp; size_t len, tlen; int error; - struct cwdinfo *cwdi; + struct vnode *dvp; path = PNBUF_GET(); if (seg == UIO_SYSSPACE) { @@ -690,11 +690,10 @@ exec_makepathbuf(struct lwp *l, const ch memmove(bp, path, len); *(--bp) = '/'; - cwdi = l->l_proc->p_cwdi; - rw_enter(&cwdi->cwdi_lock, RW_READER); - error = getcwd_common(cwdi->cwdi_cdir, NULL, &bp, path, MAXPATHLEN / 2, + dvp = cwdcdir(); + error = getcwd_common(dvp, NULL, &bp, path, MAXPATHLEN / 2, GETCWD_CHECK_ACCESS, l); - rw_exit(&cwdi->cwdi_lock); + vrele(dvp); if (error) goto err; @@ -1111,6 +1110,7 @@ static void emulexec(struct lwp *l, struct exec_package *epp) { struct proc *p = l->l_proc; + struct cwdinfo *cwdi; /* The emulation root will usually have been found when we looked * for the elf interpreter (or similar), if not look now. */ @@ -1119,9 +1119,10 @@ emulexec(struct lwp *l, struct exec_pack emul_find_root(l, epp); /* Any old emulation root got removed by fdcloseexec */ - rw_enter(&p->p_cwdi->cwdi_lock, RW_WRITER); - p->p_cwdi->cwdi_edir = epp->ep_emul_root; - rw_exit(&p->p_cwdi->cwdi_lock); + KASSERT(p == curproc); + cwdi = cwdenter(RW_WRITER); + cwdi->cwdi_edir = epp->ep_emul_root; + cwdexit(cwdi); epp->ep_emul_root = NULL; if (epp->ep_interp != NULL) vrele(epp->ep_interp); Index: src/sys/kern/kern_proc.c diff -u src/sys/kern/kern_proc.c:1.239 src/sys/kern/kern_proc.c:1.239.2.1 --- src/sys/kern/kern_proc.c:1.239 Tue Dec 31 13:07:13 2019 +++ src/sys/kern/kern_proc.c Sat Jan 25 15:54:03 2020 @@ -1,7 +1,7 @@ -/* $NetBSD: kern_proc.c,v 1.239 2019/12/31 13:07:13 ad Exp $ */ +/* $NetBSD: kern_proc.c,v 1.239.2.1 2020/01/25 15:54:03 ad Exp $ */ /*- - * Copyright (c) 1999, 2006, 2007, 2008 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -62,7 +62,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.239 2019/12/31 13:07:13 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.239.2.1 2020/01/25 15:54:03 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_kstack.h" @@ -106,6 +106,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_proc.c, #include <sys/exec.h> #include <sys/cpu.h> #include <sys/compat_stub.h> +#include <sys/vnode.h> #include <uvm/uvm_extern.h> #include <uvm/uvm.h> @@ -470,7 +471,7 @@ proc0_init(void) p->p_cred = cred0; /* Create the CWD info. */ - rw_init(&cwdi0.cwdi_lock); + mutex_init(&cwdi0.cwdi_lock, MUTEX_DEFAULT, IPL_NONE); /* Create the limits structures. */ mutex_init(&limit0.pl_lock, MUTEX_DEFAULT, IPL_NONE); @@ -2588,7 +2589,7 @@ fill_cwd(struct lwp *l, pid_t pid, void struct proc *p; char *path; char *bp, *bend; - struct cwdinfo *cwdi; + const struct cwdinfo *cwdi; struct vnode *vp; size_t len, lenused; @@ -2603,11 +2604,12 @@ fill_cwd(struct lwp *l, pid_t pid, void bend = bp; *(--bp) = '\0'; - cwdi = p->p_cwdi; - rw_enter(&cwdi->cwdi_lock, RW_READER); + cwdi = cwdlock(p); vp = cwdi->cwdi_cdir; + vref(vp); + cwdunlock(p); error = getcwd_common(vp, NULL, &bp, path, len/2, 0, l); - rw_exit(&cwdi->cwdi_lock); + vrele(vp); if (error) goto out; Index: src/sys/kern/uipc_usrreq.c diff -u src/sys/kern/uipc_usrreq.c:1.194 src/sys/kern/uipc_usrreq.c:1.194.4.1 --- src/sys/kern/uipc_usrreq.c:1.194 Mon Jul 29 09:42:17 2019 +++ src/sys/kern/uipc_usrreq.c Sat Jan 25 15:54:03 2020 @@ -1,7 +1,7 @@ -/* $NetBSD: uipc_usrreq.c,v 1.194 2019/07/29 09:42:17 maxv Exp $ */ +/* $NetBSD: uipc_usrreq.c,v 1.194.4.1 2020/01/25 15:54:03 ad Exp $ */ /*- - * Copyright (c) 1998, 2000, 2004, 2008, 2009 The NetBSD Foundation, Inc. + * Copyright (c) 1998, 2000, 2004, 2008, 2009, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -96,7 +96,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.194 2019/07/29 09:42:17 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.194.4.1 2020/01/25 15:54:03 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_compat_netbsd.h" @@ -1395,6 +1395,7 @@ unp_externalize(struct mbuf *rights, str { struct cmsghdr * const cm = mtod(rights, struct cmsghdr *); struct proc * const p = l->l_proc; + struct vnode *rvp = NULL; file_t **rp; int error = 0; @@ -1404,9 +1405,11 @@ unp_externalize(struct mbuf *rights, str goto noop; int * const fdp = kmem_alloc(nfds * sizeof(int), KM_SLEEP); - rw_enter(&p->p_cwdi->cwdi_lock, RW_READER); + + KASSERT(l == curlwp); /* Make sure the recipient should be able to see the files.. */ + rvp = cwdrdir(); rp = (file_t **)CMSG_DATA(cm); for (size_t i = 0; i < nfds; i++) { file_t * const fp = *rp++; @@ -1420,15 +1423,15 @@ unp_externalize(struct mbuf *rights, str * sure it's inside the subtree we're allowed * to access. */ - if (p->p_cwdi->cwdi_rdir != NULL && fp->f_type == DTYPE_VNODE) { + if (rvp != NULL && fp->f_type == DTYPE_VNODE) { vnode_t *vp = fp->f_vnode; - if ((vp->v_type == VDIR) && - !vn_isunder(vp, p->p_cwdi->cwdi_rdir, l)) { + if ((vp->v_type == VDIR) && !vn_isunder(vp, rvp, l)) { error = EPERM; goto out; } } } + restart: /* @@ -1506,7 +1509,6 @@ unp_externalize(struct mbuf *rights, str cm->cmsg_len = CMSG_LEN(0); rights->m_len = CMSG_SPACE(0); } - rw_exit(&p->p_cwdi->cwdi_lock); kmem_free(fdp, nfds * sizeof(int)); noop: @@ -1516,6 +1518,10 @@ unp_externalize(struct mbuf *rights, str KASSERT(cm->cmsg_len <= rights->m_len); memset(&mtod(rights, char *)[cm->cmsg_len], 0, rights->m_len - cm->cmsg_len); + + /* Async release please since in the networking code. */ + if (rvp != NULL) + vrele_async(rvp); return error; } Index: src/sys/kern/vfs_cwd.c diff -u src/sys/kern/vfs_cwd.c:1.4 src/sys/kern/vfs_cwd.c:1.4.62.1 --- src/sys/kern/vfs_cwd.c:1.4 Tue Feb 15 15:54:28 2011 +++ src/sys/kern/vfs_cwd.c Sat Jan 25 15:54:03 2020 @@ -1,7 +1,7 @@ -/* $NetBSD: vfs_cwd.c,v 1.4 2011/02/15 15:54:28 pooka Exp $ */ +/* $NetBSD: vfs_cwd.c,v 1.4.62.1 2020/01/25 15:54:03 ad Exp $ */ /*- - * Copyright (c) 2008 The NetBSD Foundation, Inc. + * Copyright (c) 2008, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,13 +31,14 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_cwd.c,v 1.4 2011/02/15 15:54:28 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_cwd.c,v 1.4.62.1 2020/01/25 15:54:03 ad Exp $"); #include <sys/param.h> #include <sys/atomic.h> #include <sys/filedesc.h> #include <sys/proc.h> #include <sys/vnode.h> +#include <sys/xcall.h> static int cwdi_ctor(void *, void *, int); static void cwdi_dtor(void *, void *); @@ -64,9 +65,8 @@ cwdinit(void) struct cwdinfo *copy; cwdi = pool_cache_get(cwdi_cache, PR_WAITOK); - copy = curproc->p_cwdi; - rw_enter(©->cwdi_lock, RW_READER); + copy = cwdenter(RW_READER); cwdi->cwdi_cdir = copy->cwdi_cdir; if (cwdi->cwdi_cdir) vref(cwdi->cwdi_cdir); @@ -78,7 +78,7 @@ cwdinit(void) vref(cwdi->cwdi_edir); cwdi->cwdi_cmask = copy->cwdi_cmask; cwdi->cwdi_refcnt = 1; - rw_exit(©->cwdi_lock); + cwdexit(copy); return (cwdi); } @@ -88,7 +88,7 @@ cwdi_ctor(void *arg, void *obj, int flag { struct cwdinfo *cwdi = obj; - rw_init(&cwdi->cwdi_lock); + mutex_init(&cwdi->cwdi_lock, MUTEX_DEFAULT, IPL_NONE); return 0; } @@ -98,7 +98,7 @@ cwdi_dtor(void *arg, void *obj) { struct cwdinfo *cwdi = obj; - rw_destroy(&cwdi->cwdi_lock); + mutex_destroy(&cwdi->cwdi_lock); } /* @@ -159,3 +159,120 @@ cwdexec(struct proc *p) vrele(p->p_cwdi->cwdi_edir); } } + +/* + * Used when curlwp wants to use or update its cwdinfo, and needs to prevent + * concurrent changes. + * + * "op" is either RW_READER or RW_WRITER indicating the kind of lock + * required. If a read lock on the cwdinfo is requested, then curlwp must + * not block while holding the lock, or the cwdinfo could become stale. + * It's okay to block while holding a write lock. + */ +struct cwdinfo * +cwdenter(krw_t op) +{ + struct cwdinfo *cwdi = curproc->p_cwdi; + + if (__predict_true(op == RW_READER)) { + /* + * Disable preemption to hold off the writer side's xcall, + * then observe the lock. If it's already taken, we need to + * join in the melee. Otherwise we're good to go; keeping + * the xcall at bay with kpreempt_disable() will prevent any + * changes while the caller is pondering the cwdinfo. + */ + kpreempt_disable(); + if (__predict_true(mutex_owner(&cwdi->cwdi_lock) == NULL)) + return cwdi; + kpreempt_enable(); + mutex_enter(&cwdi->cwdi_lock); + } else { + /* + * About to make changes. If there's more than one + * reference on the cwdinfo, or curproc has more than one + * LWP, then LWPs other than curlwp can also see the + * cwdinfo. Run a cross call to get all LWPs out of the + * read section. This also acts as a global memory barrier, + * meaning we don't need to do anything special with on + * the reader side. + */ + mutex_enter(&cwdi->cwdi_lock); + if (cwdi->cwdi_refcnt + curproc->p_nlwps > 2) + xc_barrier(0); + } + return cwdi; +} + +/* + * Release a lock previously taken with cwdenter(). + */ +void +cwdexit(struct cwdinfo *cwdi) +{ + struct lwp *l = curlwp; + + KASSERT(cwdi == l->l_proc->p_cwdi); + + if (__predict_true(mutex_owner(&cwdi->cwdi_lock) != l)) + kpreempt_enable(); + else + mutex_exit(&cwdi->cwdi_lock); +} + +/* + * Called when there is a need to inspect some other process' cwdinfo. Used + * by procfs and sysctl. This gets you a read lock; the cwdinfo must NOT be + * changed. + */ +const struct cwdinfo * +cwdlock(struct proc *p) +{ + struct cwdinfo *cwdi = p->p_cwdi; + + mutex_enter(&cwdi->cwdi_lock); + return cwdi; +} + +/* + * Release a lock acquired with cwdlock(). + */ +void +cwdunlock(struct proc *p) +{ + struct cwdinfo *cwdi = p->p_cwdi; + + mutex_exit(&cwdi->cwdi_lock); +} + +/* + * Get a reference to the current working directory and return it. + */ +struct vnode * +cwdcdir(void) +{ + struct cwdinfo *cwdi; + struct vnode *vp; + + cwdi = cwdenter(RW_READER); + if ((vp = cwdi->cwdi_cdir) != NULL) + vref(vp); + cwdexit(cwdi); + return vp; +} + +/* + * Get a reference to the root directory and return it. + */ +struct vnode * +cwdrdir(void) +{ + struct cwdinfo *cwdi; + struct vnode *vp; + + cwdi = cwdenter(RW_READER); + if ((vp = cwdi->cwdi_rdir) != NULL) + vref(vp); + cwdexit(cwdi); + return vp; +} Index: src/sys/kern/vfs_getcwd.c diff -u src/sys/kern/vfs_getcwd.c:1.53.2.3 src/sys/kern/vfs_getcwd.c:1.53.2.4 --- src/sys/kern/vfs_getcwd.c:1.53.2.3 Sun Jan 19 21:19:25 2020 +++ src/sys/kern/vfs_getcwd.c Sat Jan 25 15:54:03 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_getcwd.c,v 1.53.2.3 2020/01/19 21:19:25 ad Exp $ */ +/* $NetBSD: vfs_getcwd.c,v 1.53.2.4 2020/01/25 15:54:03 ad Exp $ */ /*- * Copyright (c) 1999, 2020 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_getcwd.c,v 1.53.2.3 2020/01/19 21:19:25 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_getcwd.c,v 1.53.2.4 2020/01/25 15:54:03 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -479,7 +479,7 @@ sys___getcwd(struct lwp *l, const struct char *bp, *bend; int len = SCARG(uap, length); int lenused; - struct cwdinfo *cwdi; + struct vnode *dvp; if (len > MAXPATHLEN * 4) len = MAXPATHLEN * 4; @@ -496,11 +496,10 @@ sys___getcwd(struct lwp *l, const struct * Since each entry takes up at least 2 bytes in the output buffer, * limit it to N/2 vnodes for an N byte buffer. */ - cwdi = l->l_proc->p_cwdi; - rw_enter(&cwdi->cwdi_lock, RW_READER); - error = getcwd_common(cwdi->cwdi_cdir, NULL, &bp, path, + dvp = cwdcdir(); + error = getcwd_common(dvp, NULL, &bp, path, len/2, GETCWD_CHECK_ACCESS, l); - rw_exit(&cwdi->cwdi_lock); + vrele(dvp); if (error) goto out; Index: src/sys/kern/vfs_lookup.c diff -u src/sys/kern/vfs_lookup.c:1.212.4.8 src/sys/kern/vfs_lookup.c:1.212.4.9 --- src/sys/kern/vfs_lookup.c:1.212.4.8 Fri Jan 24 16:05:37 2020 +++ src/sys/kern/vfs_lookup.c Sat Jan 25 15:54:03 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_lookup.c,v 1.212.4.8 2020/01/24 16:05:37 ad Exp $ */ +/* $NetBSD: vfs_lookup.c,v 1.212.4.9 2020/01/25 15:54:03 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1993 @@ -37,7 +37,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.212.4.8 2020/01/24 16:05:37 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_lookup.c,v 1.212.4.9 2020/01/25 15:54:03 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_magiclinks.h" @@ -536,7 +536,6 @@ namei_getstartdir(struct namei_state *st struct nameidata *ndp = state->ndp; struct componentname *cnp = state->cnp; struct cwdinfo *cwdi; /* pointer to cwd state */ - struct lwp *self = curlwp; /* thread doing namei() */ struct vnode *rootdir, *erootdir, *curdir, *startdir; if (state->root_referenced) { @@ -547,8 +546,8 @@ namei_getstartdir(struct namei_state *st state->root_referenced = 0; } - cwdi = self->l_proc->p_cwdi; - rw_enter(&cwdi->cwdi_lock, RW_READER); + /* NB: we must not block while holding the cwdi read locked. */ + cwdi = cwdenter(RW_READER); /* root dir */ if (cwdi->cwdi_rdir == NULL || (cnp->cn_flags & NOCHROOT)) { @@ -606,7 +605,7 @@ namei_getstartdir(struct namei_state *st vref(state->ndp->ni_erootdir); state->root_referenced = 1; - rw_exit(&cwdi->cwdi_lock); + cwdexit(cwdi); return startdir; } Index: src/sys/kern/vfs_mount.c diff -u src/sys/kern/vfs_mount.c:1.73.2.1 src/sys/kern/vfs_mount.c:1.73.2.2 --- src/sys/kern/vfs_mount.c:1.73.2.1 Fri Jan 17 21:47:35 2020 +++ src/sys/kern/vfs_mount.c Sat Jan 25 15:54:03 2020 @@ -1,7 +1,7 @@ -/* $NetBSD: vfs_mount.c,v 1.73.2.1 2020/01/17 21:47:35 ad Exp $ */ +/* $NetBSD: vfs_mount.c,v 1.73.2.2 2020/01/25 15:54:03 ad Exp $ */ /*- - * Copyright (c) 1997-2019 The NetBSD Foundation, Inc. + * Copyright (c) 1997-2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -67,7 +67,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.73.2.1 2020/01/17 21:47:35 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.73.2.2 2020/01/25 15:54:03 ad Exp $"); #include <sys/param.h> #include <sys/kernel.h> @@ -90,6 +90,7 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c, #include <sys/systm.h> #include <sys/vfs_syscalls.h> #include <sys/vnode_impl.h> +#include <sys/xcall.h> #include <miscfs/genfs/genfs.h> #include <miscfs/specfs/specdev.h> @@ -675,18 +676,23 @@ mount_checkdirs(vnode_t *olddp) rele2 = NULL; atomic_inc_uint(&cwdi->cwdi_refcnt); mutex_exit(proc_lock); - rw_enter(&cwdi->cwdi_lock, RW_WRITER); - if (cwdi->cwdi_cdir == olddp) { - rele1 = cwdi->cwdi_cdir; - vref(newdp); - cwdi->cwdi_cdir = newdp; - } - if (cwdi->cwdi_rdir == olddp) { - rele2 = cwdi->cwdi_rdir; - vref(newdp); - cwdi->cwdi_rdir = newdp; + mutex_enter(&cwdi->cwdi_lock); + if (cwdi->cwdi_cdir == olddp || + cwdi->cwdi_rdir == olddp) { + /* XXX belongs in vfs_cwd.c, but rump. */ + xc_barrier(0); + if (cwdi->cwdi_cdir == olddp) { + rele1 = cwdi->cwdi_cdir; + vref(newdp); + cwdi->cwdi_cdir = newdp; + } + if (cwdi->cwdi_rdir == olddp) { + rele2 = cwdi->cwdi_rdir; + vref(newdp); + cwdi->cwdi_rdir = newdp; + } } - rw_exit(&cwdi->cwdi_lock); + mutex_exit(&cwdi->cwdi_lock); cwdfree(cwdi); if (rele1 != NULL) vrele(rele1); Index: src/sys/kern/vfs_subr.c diff -u src/sys/kern/vfs_subr.c:1.478.2.2 src/sys/kern/vfs_subr.c:1.478.2.3 --- src/sys/kern/vfs_subr.c:1.478.2.2 Fri Jan 24 16:05:22 2020 +++ src/sys/kern/vfs_subr.c Sat Jan 25 15:54:03 2020 @@ -1,7 +1,7 @@ -/* $NetBSD: vfs_subr.c,v 1.478.2.2 2020/01/24 16:05:22 ad Exp $ */ +/* $NetBSD: vfs_subr.c,v 1.478.2.3 2020/01/25 15:54:03 ad Exp $ */ /*- - * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008, 2019 + * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008, 2019, 2020 * The NetBSD Foundation, Inc. * All rights reserved. * @@ -69,7 +69,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.478.2.2 2020/01/24 16:05:22 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.478.2.3 2020/01/25 15:54:03 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_ddb.h" @@ -1215,24 +1215,25 @@ set_statvfs_info(const char *onp, int uk size_t size; struct statvfs *sfs = &mp->mnt_stat; int (*fun)(const void *, void *, size_t, size_t *); + struct vnode *rvp; (void)strlcpy(mp->mnt_stat.f_fstypename, vfsname, sizeof(mp->mnt_stat.f_fstypename)); if (onp) { - struct cwdinfo *cwdi = l->l_proc->p_cwdi; fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr; - if (cwdi->cwdi_rdir != NULL) { + KASSERT(l == curlwp); + rvp = cwdrdir(); + if (rvp != NULL) { size_t len; char *bp; char *path = PNBUF_GET(); bp = path + MAXPATHLEN; *--bp = '\0'; - rw_enter(&cwdi->cwdi_lock, RW_READER); - error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, + error = getcwd_common(rvp, rootvnode, &bp, path, MAXPATHLEN / 2, 0, l); - rw_exit(&cwdi->cwdi_lock); + vrele(rvp); if (error) { PNBUF_PUT(path); return error; Index: src/sys/kern/vfs_syscalls.c diff -u src/sys/kern/vfs_syscalls.c:1.539.2.2 src/sys/kern/vfs_syscalls.c:1.539.2.3 --- src/sys/kern/vfs_syscalls.c:1.539.2.2 Sun Jan 19 21:23:36 2020 +++ src/sys/kern/vfs_syscalls.c Sat Jan 25 15:54:03 2020 @@ -1,7 +1,7 @@ -/* $NetBSD: vfs_syscalls.c,v 1.539.2.2 2020/01/19 21:23:36 ad Exp $ */ +/* $NetBSD: vfs_syscalls.c,v 1.539.2.3 2020/01/25 15:54:03 ad Exp $ */ /*- - * Copyright (c) 2008, 2009, 2019 The NetBSD Foundation, Inc. + * Copyright (c) 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -70,7 +70,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.539.2.2 2020/01/19 21:23:36 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.539.2.3 2020/01/25 15:54:03 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_fileassoc.h" @@ -1100,7 +1100,7 @@ int dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, int root) { - struct cwdinfo *cwdi = l->l_proc->p_cwdi; + struct vnode *rvp; int error = 0; /* @@ -1111,19 +1111,20 @@ dostatvfs(struct mount *mp, struct statv if (flags == MNT_NOWAIT || flags == MNT_LAZY || (flags != MNT_WAIT && flags != 0)) { memcpy(sp, &mp->mnt_stat, sizeof(*sp)); - goto done; - } - - /* Get the filesystem stats now */ - memset(sp, 0, sizeof(*sp)); - if ((error = VFS_STATVFS(mp, sp)) != 0) { - return error; + rvp = NULL; + } else { + /* Get the filesystem stats now */ + memset(sp, 0, sizeof(*sp)); + if ((error = VFS_STATVFS(mp, sp)) != 0) { + return error; + } + KASSERT(l == curlwp); + rvp = cwdrdir(); + if (rvp == NULL) + (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); } - if (cwdi->cwdi_rdir == NULL) - (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); -done: - if (cwdi->cwdi_rdir != NULL) { + if (rvp != NULL) { size_t len; char *bp; char c; @@ -1131,12 +1132,11 @@ done: bp = path + MAXPATHLEN; *--bp = '\0'; - rw_enter(&cwdi->cwdi_lock, RW_READER); - error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, + error = getcwd_common(rvp, rootvnode, &bp, path, MAXPATHLEN / 2, 0, l); - rw_exit(&cwdi->cwdi_lock); if (error) { PNBUF_PUT(path); + vrele(rvp); return error; } len = strlen(bp); @@ -1161,6 +1161,7 @@ done: } } PNBUF_PUT(path); + vrele(rvp); } sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; return error; @@ -1330,7 +1331,6 @@ sys_fchdir(struct lwp *l, const struct s /* { syscallarg(int) fd; } */ - struct proc *p = l->l_proc; struct cwdinfo *cwdi; struct vnode *vp, *tdp; struct mount *mp; @@ -1370,8 +1370,7 @@ sys_fchdir(struct lwp *l, const struct s * Disallow changing to a directory not under the process's * current root directory (if there is one). */ - cwdi = p->p_cwdi; - rw_enter(&cwdi->cwdi_lock, RW_WRITER); + cwdi = cwdenter(RW_WRITER); if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { vrele(vp); error = EPERM; /* operation not permitted */ @@ -1379,7 +1378,7 @@ sys_fchdir(struct lwp *l, const struct s vrele(cwdi->cwdi_cdir); cwdi->cwdi_cdir = vp; } - rw_exit(&cwdi->cwdi_lock); + cwdexit(cwdi); out: fd_putfile(fd); @@ -1393,7 +1392,6 @@ sys_fchdir(struct lwp *l, const struct s int sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) { - struct proc *p = l->l_proc; struct vnode *vp; file_t *fp; int error, fd = SCARG(uap, fd); @@ -1414,8 +1412,7 @@ sys_fchroot(struct lwp *l, const struct if (error) goto out; vref(vp); - - change_root(p->p_cwdi, vp, l); + change_root(vp); out: fd_putfile(fd); @@ -1432,19 +1429,19 @@ sys_chdir(struct lwp *l, const struct sy /* { syscallarg(const char *) path; } */ - struct proc *p = l->l_proc; struct cwdinfo *cwdi; int error; - struct vnode *vp; + struct vnode *vp, *ovp; - if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, - &vp, l)) != 0) + error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, &vp, l); + if (error != 0) return (error); - cwdi = p->p_cwdi; - rw_enter(&cwdi->cwdi_lock, RW_WRITER); - vrele(cwdi->cwdi_cdir); + + cwdi = cwdenter(RW_WRITER); + ovp = cwdi->cwdi_cdir; cwdi->cwdi_cdir = vp; - rw_exit(&cwdi->cwdi_lock); + cwdexit(cwdi); + vrele(ovp); return (0); } @@ -1458,20 +1455,17 @@ sys_chroot(struct lwp *l, const struct s /* { syscallarg(const char *) path; } */ - struct proc *p = l->l_proc; int error; struct vnode *vp; if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) return (error); - if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, - &vp, l)) != 0) - return (error); - change_root(p->p_cwdi, vp, l); - - return (0); + error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, &vp, l); + if (error == 0) + change_root(vp); + return error; } /* @@ -1479,14 +1473,16 @@ sys_chroot(struct lwp *l, const struct s * NB: callers need to properly authorize the change root operation. */ void -change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) +change_root(struct vnode *vp) { - struct proc *p = l->l_proc; + struct cwdinfo *cwdi; kauth_cred_t ncred; + struct lwp *l = curlwp; + struct proc *p = l->l_proc; ncred = kauth_cred_alloc(); - rw_enter(&cwdi->cwdi_lock, RW_WRITER); + cwdi = cwdenter(RW_WRITER); if (cwdi->cwdi_rdir != NULL) vrele(cwdi->cwdi_rdir); cwdi->cwdi_rdir = vp; @@ -1505,7 +1501,7 @@ change_root(struct cwdinfo *cwdi, struct vref(vp); cwdi->cwdi_cdir = vp; } - rw_exit(&cwdi->cwdi_lock); + cwdexit(cwdi); /* Get a write lock on the process credential. */ proc_crmod_enter(); @@ -4674,21 +4670,15 @@ sys_umask(struct lwp *l, const struct sy /* { syscallarg(mode_t) newmask; } */ - struct proc *p = l->l_proc; - struct cwdinfo *cwdi; /* - * cwdi->cwdi_cmask will be read unlocked elsewhere. What's - * important is that we serialize changes to the mask. The - * rw_exit() will issue a write memory barrier on our behalf, - * and force the changes out to other CPUs (as it must use an - * atomic operation, draining the local CPU's store buffers). - */ - cwdi = p->p_cwdi; - rw_enter(&cwdi->cwdi_lock, RW_WRITER); - *retval = cwdi->cwdi_cmask; - cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; - rw_exit(&cwdi->cwdi_lock); + * cwdi->cwdi_cmask will be read unlocked elsewhere, and no kind of + * serialization with those reads is required. All that's important + * is that we get the correct answer for the caller of umask() and + * the atomic operation accomplishes. + */ + *retval = atomic_swap_uint(&curproc->p_cwdi->cwdi_cmask, + SCARG(uap, newmask) & ALLPERMS); return (0); } @@ -4699,7 +4689,7 @@ dorevoke(struct vnode *vp, kauth_cred_t struct vattr vattr; int error, fs_decision; - vn_lock(vp, LK_SHARED | LK_RETRY); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_GETATTR(vp, &vattr, cred); VOP_UNLOCK(vp); if (error != 0) Index: src/sys/kern/vfs_vnode.c diff -u src/sys/kern/vfs_vnode.c:1.105.2.5 src/sys/kern/vfs_vnode.c:1.105.2.6 --- src/sys/kern/vfs_vnode.c:1.105.2.5 Fri Jan 24 16:05:22 2020 +++ src/sys/kern/vfs_vnode.c Sat Jan 25 15:54:03 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_vnode.c,v 1.105.2.5 2020/01/24 16:05:22 ad Exp $ */ +/* $NetBSD: vfs_vnode.c,v 1.105.2.6 2020/01/25 15:54:03 ad Exp $ */ /*- * Copyright (c) 1997-2011, 2019 The NetBSD Foundation, Inc. @@ -154,7 +154,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.105.2.5 2020/01/24 16:05:22 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.105.2.6 2020/01/25 15:54:03 ad Exp $"); #include <sys/param.h> #include <sys/kernel.h> @@ -912,6 +912,8 @@ vrele_async(vnode_t *vp) /* * Vnode reference, where a reference is already held by some other * object (for example, a file structure). + * + * NB: we have lockless code sequences that rely on this not blocking. */ void vref(vnode_t *vp) Index: src/sys/miscfs/procfs/procfs_vnops.c diff -u src/sys/miscfs/procfs/procfs_vnops.c:1.207 src/sys/miscfs/procfs/procfs_vnops.c:1.207.2.1 --- src/sys/miscfs/procfs/procfs_vnops.c:1.207 Thu Aug 29 06:43:13 2019 +++ src/sys/miscfs/procfs/procfs_vnops.c Sat Jan 25 15:54:04 2020 @@ -1,7 +1,7 @@ -/* $NetBSD: procfs_vnops.c,v 1.207 2019/08/29 06:43:13 hannken Exp $ */ +/* $NetBSD: procfs_vnops.c,v 1.207.2.1 2020/01/25 15:54:04 ad Exp $ */ /*- - * Copyright (c) 2006, 2007, 2008 The NetBSD Foundation, Inc. + * Copyright (c) 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -105,7 +105,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.207 2019/08/29 06:43:13 hannken Exp $"); +__KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.207.2.1 2020/01/25 15:54:04 ad Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -557,7 +557,7 @@ static void procfs_dir(pfstype t, struct lwp *caller, struct proc *target, char **bpp, char *path, size_t len) { - struct cwdinfo *cwdi; + const struct cwdinfo *cwdi; struct vnode *vp, *rvp; char *bp; @@ -566,26 +566,25 @@ procfs_dir(pfstype t, struct lwp *caller * we are interested in to prevent it from disappearing * before getcwd_common() below. */ - rw_enter(&target->p_cwdi->cwdi_lock, RW_READER); + cwdi = cwdlock(target); switch (t) { case PFScwd: - vp = target->p_cwdi->cwdi_cdir; + vp = cwdi->cwdi_cdir; break; case PFSchroot: - vp = target->p_cwdi->cwdi_rdir; + vp = cwdi->cwdi_rdir; break; default: - rw_exit(&target->p_cwdi->cwdi_lock); + cwdunlock(target); return; } if (vp != NULL) vref(vp); - rw_exit(&target->p_cwdi->cwdi_lock); + cwdunlock(target); - cwdi = caller->l_proc->p_cwdi; - rw_enter(&cwdi->cwdi_lock, RW_READER); + KASSERT(caller == curlwp); - rvp = cwdi->cwdi_rdir; + rvp = cwdrdir(); bp = bpp ? *bpp : NULL; /* @@ -598,12 +597,15 @@ procfs_dir(pfstype t, struct lwp *caller *bpp = bp; } vrele(vp); - rw_exit(&cwdi->cwdi_lock); + if (rvp != NULL) + vrele(rvp); return; } - if (rvp == NULL) + if (rvp == NULL) { rvp = rootvnode; + vref(rvp); + } if (vp == NULL || getcwd_common(vp, rvp, bp ? &bp : NULL, path, len / 2, 0, caller) != 0) { if (bpp) { @@ -617,7 +619,8 @@ procfs_dir(pfstype t, struct lwp *caller if (vp != NULL) vrele(vp); - rw_exit(&cwdi->cwdi_lock); + if (rvp != NULL) + vrele(rvp); } /* @@ -1646,7 +1649,7 @@ procfs_readlink(void *v) len = strlen(bp); } else { file_t *fp; - struct vnode *vxp, *vp; + struct vnode *vxp, *rvp; if ((error = procfs_proc_lock(pfs->pfs_pid, &pown, ESRCH)) != 0) return error; @@ -1679,14 +1682,13 @@ procfs_readlink(void *v) if (vxp->v_tag == VT_PROCFS) { *--bp = '/'; } else { - rw_enter(&curproc->p_cwdi->cwdi_lock, - RW_READER); - vp = curproc->p_cwdi->cwdi_rdir; - if (vp == NULL) - vp = rootvnode; - error = getcwd_common(vxp, vp, &bp, path, + if ((rvp = cwdrdir()) == NULL) { + rvp = rootvnode; + vref(rvp); + } + error = getcwd_common(vxp, rvp, &bp, path, MAXPATHLEN / 2, 0, curlwp); - rw_exit(&curproc->p_cwdi->cwdi_lock); + vrele(rvp); } if (error) break; Index: src/sys/sys/filedesc.h diff -u src/sys/sys/filedesc.h:1.65 src/sys/sys/filedesc.h:1.65.2.1 --- src/sys/sys/filedesc.h:1.65 Sun Oct 6 07:15:34 2019 +++ src/sys/sys/filedesc.h Sat Jan 25 15:54:04 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: filedesc.h,v 1.65 2019/10/06 07:15:34 mlelstv Exp $ */ +/* $NetBSD: filedesc.h,v 1.65.2.1 2020/01/25 15:54:04 ad Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. @@ -168,8 +168,8 @@ typedef struct cwdinfo { struct vnode *cwdi_cdir; /* current directory */ struct vnode *cwdi_rdir; /* root directory */ struct vnode *cwdi_edir; /* emulation root (if known) */ - krwlock_t cwdi_lock; /* lock on entire struct */ - u_short cwdi_cmask; /* mask for file creation */ + kmutex_t cwdi_lock; /* lock on entire struct */ + u_int cwdi_cmask; /* mask for file creation */ u_int cwdi_refcnt; /* reference count */ } cwdinfo_t; @@ -215,11 +215,17 @@ int pipe1(struct lwp *, int *, int); int dodup(struct lwp *, int, int, int, register_t *); void cwd_sys_init(void); -struct cwdinfo *cwdinit(void); +struct cwdinfo *cwdinit(void); void cwdshare(proc_t *); void cwdunshare(proc_t *); void cwdfree(struct cwdinfo *); void cwdexec(struct proc *); +struct cwdinfo *cwdenter(krw_t); +void cwdexit(struct cwdinfo *); +const struct cwdinfo *cwdlock(struct proc *); +void cwdunlock(struct proc *); +struct vnode *cwdcdir(void); +struct vnode *cwdrdir(void); #define GETCWD_CHECK_ACCESS 0x0001 int getcwd_common(struct vnode *, struct vnode *, char **, char *, int, Index: src/sys/sys/vfs_syscalls.h diff -u src/sys/sys/vfs_syscalls.h:1.26 src/sys/sys/vfs_syscalls.h:1.26.2.1 --- src/sys/sys/vfs_syscalls.h:1.26 Thu Sep 26 01:34:16 2019 +++ src/sys/sys/vfs_syscalls.h Sat Jan 25 15:54:04 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_syscalls.h,v 1.26 2019/09/26 01:34:16 christos Exp $ */ +/* $NetBSD: vfs_syscalls.h,v 1.26.2.1 2020/01/25 15:54:04 ad Exp $ */ /* * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. @@ -82,7 +82,7 @@ int do_sys_quotactl(const char *, const void do_sys_sync(struct lwp *); int chdir_lookup(const char *, int, struct vnode **, struct lwp *); -void change_root(struct cwdinfo *, struct vnode *, struct lwp *); +void change_root(struct vnode *); extern const char *const mountcompatnames[]; extern const u_int nmountcompatnames;