Module Name: src Committed By: thorpej Date: Mon Jul 18 04:30:31 UTC 2022
Modified Files: src/sys/fs/union: union_subr.c src/sys/kern: vfs_vnode.c vfs_vnops.c vnode_if.sh src/sys/miscfs/genfs: layer_vfsops.c src/sys/sys: param.h vnode.h vnode_impl.h src/tests/lib/libc/kevent_nullmnt: t_nullmnt.sh Log Message: Make kqueue event status for vnodes shareable, and for stacked file systems like nullfs, make the upper vnode share that status with the lower vnode. And, lo, NetBSD 9.99.99. Fixes PR kern/56713. To generate a diff of this commit: cvs rdiff -u -r1.81 -r1.82 src/sys/fs/union/union_subr.c cvs rdiff -u -r1.143 -r1.144 src/sys/kern/vfs_vnode.c cvs rdiff -u -r1.233 -r1.234 src/sys/kern/vfs_vnops.c cvs rdiff -u -r1.75 -r1.76 src/sys/kern/vnode_if.sh cvs rdiff -u -r1.54 -r1.55 src/sys/miscfs/genfs/layer_vfsops.c cvs rdiff -u -r1.711 -r1.712 src/sys/sys/param.h cvs rdiff -u -r1.301 -r1.302 src/sys/sys/vnode.h cvs rdiff -u -r1.23 -r1.24 src/sys/sys/vnode_impl.h cvs rdiff -u -r1.5 -r1.6 src/tests/lib/libc/kevent_nullmnt/t_nullmnt.sh Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/fs/union/union_subr.c diff -u src/sys/fs/union/union_subr.c:1.81 src/sys/fs/union/union_subr.c:1.82 --- src/sys/fs/union/union_subr.c:1.81 Sat Mar 19 13:53:32 2022 +++ src/sys/fs/union/union_subr.c Mon Jul 18 04:30:30 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: union_subr.c,v 1.81 2022/03/19 13:53:32 hannken Exp $ */ +/* $NetBSD: union_subr.c,v 1.82 2022/07/18 04:30:30 thorpej Exp $ */ /* * Copyright (c) 1994 @@ -72,7 +72,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: union_subr.c,v 1.81 2022/03/19 13:53:32 hannken Exp $"); +__KERNEL_RCSID(0, "$NetBSD: union_subr.c,v 1.82 2022/07/18 04:30:30 thorpej Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -232,10 +232,11 @@ union_newupper(struct union_node *un, st unlock_ap.a_desc = VDESC(vop_unlock); unlock_ap.a_vp = UNIONTOV(un); genfs_unlock(&unlock_ap); - /* Update union vnode interlock & vmobjlock. */ + /* Update union vnode interlock, vmobjlock, & klist. */ vshareilock(UNIONTOV(un), uppervp); rw_obj_hold(uppervp->v_uobj.vmobjlock); uvm_obj_setlock(&UNIONTOV(un)->v_uobj, uppervp->v_uobj.vmobjlock); + vshareklist(UNIONTOV(un), uppervp); mutex_exit(&un->un_lock); if (ohash != nhash) { LIST_INSERT_HEAD(&uhashtbl[nhash], un, un_cache); @@ -577,6 +578,7 @@ union_loadvnode(struct mount *mp, struct vshareilock(vp, svp); rw_obj_hold(svp->v_uobj.vmobjlock); uvm_obj_setlock(&vp->v_uobj, svp->v_uobj.vmobjlock); + vshareklist(vp, svp); /* detect the root vnode (and aliases) */ if ((un->un_uppervp == um->um_uppervp) && Index: src/sys/kern/vfs_vnode.c diff -u src/sys/kern/vfs_vnode.c:1.143 src/sys/kern/vfs_vnode.c:1.144 --- src/sys/kern/vfs_vnode.c:1.143 Sat Apr 9 23:45:45 2022 +++ src/sys/kern/vfs_vnode.c Mon Jul 18 04:30:30 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_vnode.c,v 1.143 2022/04/09 23:45:45 riastradh Exp $ */ +/* $NetBSD: vfs_vnode.c,v 1.144 2022/07/18 04:30:30 thorpej Exp $ */ /*- * Copyright (c) 1997-2011, 2019, 2020 The NetBSD Foundation, Inc. @@ -148,7 +148,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.143 2022/04/09 23:45:45 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.144 2022/07/18 04:30:30 thorpej Exp $"); #ifdef _KERNEL_OPT #include "opt_pax.h" @@ -457,7 +457,8 @@ vnalloc_marker(struct mount *mp) vp->v_mount = mp; vp->v_type = VBAD; vp->v_interlock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); - klist_init(&vp->v_klist); + klist_init(&vip->vi_klist.vk_klist); + vp->v_klist = &vip->vi_klist; vip->vi_state = VS_MARKER; return vp; @@ -475,7 +476,7 @@ vnfree_marker(vnode_t *vp) KASSERT(vip->vi_state == VS_MARKER); mutex_obj_free(vp->v_interlock); uvm_obj_destroy(&vp->v_uobj, true); - klist_fini(&vp->v_klist); + klist_fini(&vip->vi_klist.vk_klist); pool_cache_put(vcache_pool, vip); } @@ -1391,7 +1392,8 @@ vcache_alloc(void) vp->v_interlock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 1); - klist_init(&vp->v_klist); + klist_init(&vip->vi_klist.vk_klist); + vp->v_klist = &vip->vi_klist; cv_init(&vp->v_cv, "vnode"); cache_vnode_init(vp); @@ -1453,7 +1455,9 @@ vcache_free(vnode_impl_t *vip) mutex_obj_free(vp->v_interlock); rw_destroy(&vip->vi_lock); uvm_obj_destroy(&vp->v_uobj, true); - klist_fini(&vp->v_klist); + KASSERT(vp->v_klist == &vip->vi_klist || + SLIST_EMPTY(&vip->vi_klist.vk_klist)); + klist_fini(&vip->vi_klist.vk_klist); cv_destroy(&vp->v_cv); cache_vnode_fini(vp); pool_cache_put(vcache_pool, vip); @@ -1916,7 +1920,7 @@ vcache_reclaim(vnode_t *vp) * Don't check for interest in NOTE_REVOKE; it's always posted * because it sets EV_EOF. */ - KNOTE(&vp->v_klist, NOTE_REVOKE); + KNOTE(&vp->v_klist->vk_klist, NOTE_REVOKE); mutex_exit(vp->v_interlock); /* @@ -2095,3 +2099,28 @@ vshareilock(vnode_t *tvp, vnode_t *fvp) tvp->v_interlock = fvp->v_interlock; mutex_obj_free(oldlock); } + +void +vshareklist(vnode_t *tvp, vnode_t *fvp) +{ + /* + * If two vnodes share klist state, they must also share + * an interlock. + */ + KASSERT(tvp->v_interlock == fvp->v_interlock); + + /* + * We make the following assumptions: + * + * ==> Some other synchronization is happening outside of + * our view to make this safe. + * + * ==> That the "to" vnode will have the necessary references + * on the "from" vnode so that the storage for the klist + * won't be yanked out from beneath us (the vnode_impl). + * + * ==> If "from" is also sharing, we then assume that "from" + * has the necessary references, and so on. + */ + tvp->v_klist = fvp->v_klist; +} Index: src/sys/kern/vfs_vnops.c diff -u src/sys/kern/vfs_vnops.c:1.233 src/sys/kern/vfs_vnops.c:1.234 --- src/sys/kern/vfs_vnops.c:1.233 Wed Jul 6 13:52:24 2022 +++ src/sys/kern/vfs_vnops.c Mon Jul 18 04:30:30 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: vfs_vnops.c,v 1.233 2022/07/06 13:52:24 riastradh Exp $ */ +/* $NetBSD: vfs_vnops.c,v 1.234 2022/07/18 04:30:30 thorpej Exp $ */ /*- * Copyright (c) 2009 The NetBSD Foundation, Inc. @@ -66,7 +66,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.233 2022/07/06 13:52:24 riastradh Exp $"); +__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.234 2022/07/18 04:30:30 thorpej Exp $"); #include "veriexec.h" @@ -79,7 +79,7 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c, #include <sys/proc.h> #include <sys/mount.h> #include <sys/namei.h> -#include <sys/vnode.h> +#include <sys/vnode_impl.h> #include <sys/ioctl.h> #include <sys/tty.h> #include <sys/poll.h> @@ -1428,9 +1428,17 @@ vn_knote_to_interest(const struct knote void vn_knote_attach(struct vnode *vp, struct knote *kn) { + struct vnode_klist *vk = vp->v_klist; long interest = 0; /* + * In the case of layered / stacked file systems, knotes + * should only ever be associated with the base vnode. + */ + KASSERT(kn->kn_hook == vp); + KASSERT(vp->v_klist == &VNODE_TO_VIMPL(vp)->vi_klist); + + /* * We maintain a bitmask of the kevents that there is interest in, * to minimize the impact of having watchers. It's silly to have * to traverse vn_klist every time a read or write happens simply @@ -1439,18 +1447,23 @@ vn_knote_attach(struct vnode *vp, struct */ mutex_enter(vp->v_interlock); - SLIST_INSERT_HEAD(&vp->v_klist, kn, kn_selnext); - SLIST_FOREACH(kn, &vp->v_klist, kn_selnext) { + SLIST_INSERT_HEAD(&vk->vk_klist, kn, kn_selnext); + SLIST_FOREACH(kn, &vk->vk_klist, kn_selnext) { interest |= vn_knote_to_interest(kn); } - vp->v_klist_interest = interest; + vk->vk_interest = interest; mutex_exit(vp->v_interlock); } void vn_knote_detach(struct vnode *vp, struct knote *kn) { - int interest = 0; + struct vnode_klist *vk = vp->v_klist; + long interest = 0; + + /* See above. */ + KASSERT(kn->kn_hook == vp); + KASSERT(vp->v_klist == &VNODE_TO_VIMPL(vp)->vi_klist); /* * We special case removing the head of the list, because: @@ -1464,16 +1477,16 @@ vn_knote_detach(struct vnode *vp, struct */ mutex_enter(vp->v_interlock); - if (__predict_true(kn == SLIST_FIRST(&vp->v_klist))) { - SLIST_REMOVE_HEAD(&vp->v_klist, kn_selnext); - SLIST_FOREACH(kn, &vp->v_klist, kn_selnext) { + if (__predict_true(kn == SLIST_FIRST(&vk->vk_klist))) { + SLIST_REMOVE_HEAD(&vk->vk_klist, kn_selnext); + SLIST_FOREACH(kn, &vk->vk_klist, kn_selnext) { interest |= vn_knote_to_interest(kn); } - vp->v_klist_interest = interest; + vk->vk_interest = interest; } else { struct knote *thiskn, *nextkn, *prevkn = NULL; - SLIST_FOREACH_SAFE(thiskn, &vp->v_klist, kn_selnext, nextkn) { + SLIST_FOREACH_SAFE(thiskn, &vk->vk_klist, kn_selnext, nextkn) { if (thiskn == kn) { KASSERT(kn != NULL); KASSERT(prevkn != NULL); @@ -1484,7 +1497,7 @@ vn_knote_detach(struct vnode *vp, struct prevkn = thiskn; } } - vp->v_klist_interest = interest; + vk->vk_interest = interest; } mutex_exit(vp->v_interlock); } Index: src/sys/kern/vnode_if.sh diff -u src/sys/kern/vnode_if.sh:1.75 src/sys/kern/vnode_if.sh:1.76 --- src/sys/kern/vnode_if.sh:1.75 Tue May 3 13:54:18 2022 +++ src/sys/kern/vnode_if.sh Mon Jul 18 04:30:30 2022 @@ -29,7 +29,7 @@ copyright="\ * SUCH DAMAGE. */ " -SCRIPT_ID='$NetBSD: vnode_if.sh,v 1.75 2022/05/03 13:54:18 hannken Exp $' +SCRIPT_ID='$NetBSD: vnode_if.sh,v 1.76 2022/07/18 04:30:30 thorpej Exp $' # Script to produce VFS front-end sugar. # @@ -444,7 +444,7 @@ do { \\ */ \\ mutex_enter((thisvp)->v_interlock); \\ if (__predict_true((e) == 0)) { \\ - knote(&(thisvp)->v_klist, (n)); \\ + knote(&(thisvp)->v_klist->vk_klist, (n)); \\ } \\ holdrelel((thisvp)); \\ mutex_exit((thisvp)->v_interlock); \\ @@ -557,7 +557,7 @@ do { \\ * meaningless from the watcher's perspective. \\ */ \\ if (__predict_true(thisvp->v_op != dead_vnodeop_p)) { \\ - knote(&thisvp->v_klist, \\ + knote(&thisvp->v_klist->vk_klist, \\ ((ap)->a_fflag & FWRITE) \\ ? NOTE_CLOSE_WRITE : NOTE_CLOSE); \\ } \\ Index: src/sys/miscfs/genfs/layer_vfsops.c diff -u src/sys/miscfs/genfs/layer_vfsops.c:1.54 src/sys/miscfs/genfs/layer_vfsops.c:1.55 --- src/sys/miscfs/genfs/layer_vfsops.c:1.54 Sun Feb 23 15:46:41 2020 +++ src/sys/miscfs/genfs/layer_vfsops.c Mon Jul 18 04:30:30 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: layer_vfsops.c,v 1.54 2020/02/23 15:46:41 ad Exp $ */ +/* $NetBSD: layer_vfsops.c,v 1.55 2022/07/18 04:30:30 thorpej Exp $ */ /* * Copyright (c) 1999 National Aeronautics & Space Administration @@ -74,7 +74,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: layer_vfsops.c,v 1.54 2020/02/23 15:46:41 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: layer_vfsops.c,v 1.55 2022/07/18 04:30:30 thorpej Exp $"); #include <sys/param.h> #include <sys/sysctl.h> @@ -205,10 +205,11 @@ layerfs_loadvnode(struct mount *mp, stru xp = kmem_alloc(lmp->layerm_size, KM_SLEEP); - /* Share the interlock and vmobjlock with the lower node. */ + /* Share the interlock, vmobjlock, and klist with the lower node. */ vshareilock(vp, lowervp); rw_obj_hold(lowervp->v_uobj.vmobjlock); uvm_obj_setlock(&vp->v_uobj, lowervp->v_uobj.vmobjlock); + vshareklist(vp, lowervp); vp->v_tag = lmp->layerm_tag; vp->v_type = lowervp->v_type; Index: src/sys/sys/param.h diff -u src/sys/sys/param.h:1.711 src/sys/sys/param.h:1.712 --- src/sys/sys/param.h:1.711 Mon Jun 20 08:38:56 2022 +++ src/sys/sys/param.h Mon Jul 18 04:30:30 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: param.h,v 1.711 2022/06/20 08:38:56 yamaguchi Exp $ */ +/* $NetBSD: param.h,v 1.712 2022/07/18 04:30:30 thorpej Exp $ */ /*- * Copyright (c) 1982, 1986, 1989, 1993 @@ -67,7 +67,7 @@ * 2.99.9 (299000900) */ -#define __NetBSD_Version__ 999009800 /* NetBSD 9.99.98 */ +#define __NetBSD_Version__ 999009900 /* NetBSD 9.99.99 */ #define __NetBSD_Prereq__(M,m,p) (((((M) * 100000000) + \ (m) * 1000000) + (p) * 100) <= __NetBSD_Version__) Index: src/sys/sys/vnode.h diff -u src/sys/sys/vnode.h:1.301 src/sys/sys/vnode.h:1.302 --- src/sys/sys/vnode.h:1.301 Fri Mar 25 08:56:36 2022 +++ src/sys/sys/vnode.h Mon Jul 18 04:30:30 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: vnode.h,v 1.301 2022/03/25 08:56:36 hannken Exp $ */ +/* $NetBSD: vnode.h,v 1.302 2022/07/18 04:30:30 thorpej Exp $ */ /*- * Copyright (c) 2008, 2020 The NetBSD Foundation, Inc. @@ -179,8 +179,8 @@ struct vnode { enum vtype v_type; /* - vnode type */ enum vtagtype v_tag; /* - type of underlying data */ void *v_data; /* - private data for fs */ - struct klist v_klist; /* i notes attached to vnode */ - long v_klist_interest; /* i what the noes are interested in */ + struct vnode_klist *v_klist; /* i kevent / knote info */ + void *v_segvguard; /* e for PAX_SEGVGUARD */ }; #define v_mountedhere v_un.vu_mountedhere @@ -190,6 +190,19 @@ struct vnode { #define v_ractx v_un.vu_ractx typedef struct vnode vnode_t; + +/* + * Structure that encompasses the kevent state for a vnode. This is + * carved out as a separate structure because some vnodes may share + * this state with one another. + * + * N.B. if two vnodes share a vnode_klist, then they must also share + * v_interlock. + */ +struct vnode_klist { + struct klist vk_klist; /* i notes attached to vnode */ + long vk_interest; /* i what the notes are interested in */ +}; #endif /* @@ -415,7 +428,7 @@ void vref(struct vnode *); * Macro to determine kevent interest on a vnode. */ #define VN_KEVENT_INTEREST(vp, n) \ - ((vp)->v_klist_interest != 0) + (((vp)->v_klist->vk_interest & (n)) != 0) static inline void VN_KNOTE(struct vnode *vp, long hint) @@ -429,7 +442,7 @@ VN_KNOTE(struct vnode *vp, long hint) */ if (__predict_false(VN_KEVENT_INTEREST(vp, hint))) { mutex_enter(vp->v_interlock); - knote(&vp->v_klist, hint); + knote(&vp->v_klist->vk_klist, hint); mutex_exit(vp->v_interlock); } } @@ -594,6 +607,7 @@ int vdead_check(struct vnode *, int); void vrevoke(struct vnode *); void vremfree(struct vnode *); void vshareilock(struct vnode *, struct vnode *); +void vshareklist(struct vnode *, struct vnode *); int vrefcnt(struct vnode *); int vcache_get(struct mount *, const void *, size_t, struct vnode **); int vcache_new(struct mount *, struct vnode *, Index: src/sys/sys/vnode_impl.h diff -u src/sys/sys/vnode_impl.h:1.23 src/sys/sys/vnode_impl.h:1.24 --- src/sys/sys/vnode_impl.h:1.23 Sun Mar 22 14:38:37 2020 +++ src/sys/sys/vnode_impl.h Mon Jul 18 04:30:30 2022 @@ -1,4 +1,4 @@ -/* $NetBSD: vnode_impl.h,v 1.23 2020/03/22 14:38:37 ad Exp $ */ +/* $NetBSD: vnode_impl.h,v 1.24 2022/07/18 04:30:30 thorpej Exp $ */ /*- * Copyright (c) 2016, 2019, 2020 The NetBSD Foundation, Inc. @@ -77,6 +77,12 @@ struct vnode_impl { struct vcache_key vi_key; /* c vnode cache key */ /* + * The vnode klist is accessed frequently, but rarely + * modified. + */ + struct vnode_klist vi_klist; /* i kevent / knote state */ + + /* * vnode cache, LRU and syncer. This all changes with some * regularity so keep it together. */ Index: src/tests/lib/libc/kevent_nullmnt/t_nullmnt.sh diff -u src/tests/lib/libc/kevent_nullmnt/t_nullmnt.sh:1.5 src/tests/lib/libc/kevent_nullmnt/t_nullmnt.sh:1.6 --- src/tests/lib/libc/kevent_nullmnt/t_nullmnt.sh:1.5 Sat Jun 4 20:32:49 2022 +++ src/tests/lib/libc/kevent_nullmnt/t_nullmnt.sh Mon Jul 18 04:30:30 2022 @@ -33,7 +33,6 @@ nullmnt_upper_lower_head() } nullmnt_upper_lower_body() { - atf_expect_fail "PR kern/56713" nullmnt_common lower_dir upper_dir } nullmnt_upper_lower_cleanup() @@ -48,7 +47,6 @@ nullmnt_upper_upper_head() } nullmnt_upper_upper_body() { - atf_expect_fail "PR kern/56713" nullmnt_common upper_dir upper_dir } nullmnt_upper_upper_cleanup()