Module Name:    src
Committed By:   thorpej
Date:           Mon Jul 18 04:30:31 UTC 2022

Modified Files:
        src/sys/fs/union: union_subr.c
        src/sys/kern: vfs_vnode.c vfs_vnops.c vnode_if.sh
        src/sys/miscfs/genfs: layer_vfsops.c
        src/sys/sys: param.h vnode.h vnode_impl.h
        src/tests/lib/libc/kevent_nullmnt: t_nullmnt.sh

Log Message:
Make kqueue event status for vnodes shareable, and for stacked file systems
like nullfs, make the upper vnode share that status with the lower vnode.

And, lo, NetBSD 9.99.99.

Fixes PR kern/56713.


To generate a diff of this commit:
cvs rdiff -u -r1.81 -r1.82 src/sys/fs/union/union_subr.c
cvs rdiff -u -r1.143 -r1.144 src/sys/kern/vfs_vnode.c
cvs rdiff -u -r1.233 -r1.234 src/sys/kern/vfs_vnops.c
cvs rdiff -u -r1.75 -r1.76 src/sys/kern/vnode_if.sh
cvs rdiff -u -r1.54 -r1.55 src/sys/miscfs/genfs/layer_vfsops.c
cvs rdiff -u -r1.711 -r1.712 src/sys/sys/param.h
cvs rdiff -u -r1.301 -r1.302 src/sys/sys/vnode.h
cvs rdiff -u -r1.23 -r1.24 src/sys/sys/vnode_impl.h
cvs rdiff -u -r1.5 -r1.6 src/tests/lib/libc/kevent_nullmnt/t_nullmnt.sh

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/fs/union/union_subr.c
diff -u src/sys/fs/union/union_subr.c:1.81 src/sys/fs/union/union_subr.c:1.82
--- src/sys/fs/union/union_subr.c:1.81	Sat Mar 19 13:53:32 2022
+++ src/sys/fs/union/union_subr.c	Mon Jul 18 04:30:30 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: union_subr.c,v 1.81 2022/03/19 13:53:32 hannken Exp $	*/
+/*	$NetBSD: union_subr.c,v 1.82 2022/07/18 04:30:30 thorpej Exp $	*/
 
 /*
  * Copyright (c) 1994
@@ -72,7 +72,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: union_subr.c,v 1.81 2022/03/19 13:53:32 hannken Exp $");
+__KERNEL_RCSID(0, "$NetBSD: union_subr.c,v 1.82 2022/07/18 04:30:30 thorpej Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -232,10 +232,11 @@ union_newupper(struct union_node *un, st
 	unlock_ap.a_desc = VDESC(vop_unlock);
 	unlock_ap.a_vp = UNIONTOV(un);
 	genfs_unlock(&unlock_ap);
-	/* Update union vnode interlock & vmobjlock. */
+	/* Update union vnode interlock, vmobjlock, & klist. */
 	vshareilock(UNIONTOV(un), uppervp);
 	rw_obj_hold(uppervp->v_uobj.vmobjlock);
 	uvm_obj_setlock(&UNIONTOV(un)->v_uobj, uppervp->v_uobj.vmobjlock);
+	vshareklist(UNIONTOV(un), uppervp);
 	mutex_exit(&un->un_lock);
 	if (ohash != nhash) {
 		LIST_INSERT_HEAD(&uhashtbl[nhash], un, un_cache);
@@ -577,6 +578,7 @@ union_loadvnode(struct mount *mp, struct
 	vshareilock(vp, svp);
 	rw_obj_hold(svp->v_uobj.vmobjlock);
 	uvm_obj_setlock(&vp->v_uobj, svp->v_uobj.vmobjlock);
+	vshareklist(vp, svp);
 
 	/* detect the root vnode (and aliases) */
 	if ((un->un_uppervp == um->um_uppervp) &&

Index: src/sys/kern/vfs_vnode.c
diff -u src/sys/kern/vfs_vnode.c:1.143 src/sys/kern/vfs_vnode.c:1.144
--- src/sys/kern/vfs_vnode.c:1.143	Sat Apr  9 23:45:45 2022
+++ src/sys/kern/vfs_vnode.c	Mon Jul 18 04:30:30 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_vnode.c,v 1.143 2022/04/09 23:45:45 riastradh Exp $	*/
+/*	$NetBSD: vfs_vnode.c,v 1.144 2022/07/18 04:30:30 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 1997-2011, 2019, 2020 The NetBSD Foundation, Inc.
@@ -148,7 +148,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.143 2022/04/09 23:45:45 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.144 2022/07/18 04:30:30 thorpej Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_pax.h"
@@ -457,7 +457,8 @@ vnalloc_marker(struct mount *mp)
 	vp->v_mount = mp;
 	vp->v_type = VBAD;
 	vp->v_interlock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
-	klist_init(&vp->v_klist);
+	klist_init(&vip->vi_klist.vk_klist);
+	vp->v_klist = &vip->vi_klist;
 	vip->vi_state = VS_MARKER;
 
 	return vp;
@@ -475,7 +476,7 @@ vnfree_marker(vnode_t *vp)
 	KASSERT(vip->vi_state == VS_MARKER);
 	mutex_obj_free(vp->v_interlock);
 	uvm_obj_destroy(&vp->v_uobj, true);
-	klist_fini(&vp->v_klist);
+	klist_fini(&vip->vi_klist.vk_klist);
 	pool_cache_put(vcache_pool, vip);
 }
 
@@ -1391,7 +1392,8 @@ vcache_alloc(void)
 	vp->v_interlock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
 
 	uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 1);
-	klist_init(&vp->v_klist);
+	klist_init(&vip->vi_klist.vk_klist);
+	vp->v_klist = &vip->vi_klist;
 	cv_init(&vp->v_cv, "vnode");
 	cache_vnode_init(vp);
 
@@ -1453,7 +1455,9 @@ vcache_free(vnode_impl_t *vip)
 	mutex_obj_free(vp->v_interlock);
 	rw_destroy(&vip->vi_lock);
 	uvm_obj_destroy(&vp->v_uobj, true);
-	klist_fini(&vp->v_klist);
+	KASSERT(vp->v_klist == &vip->vi_klist ||
+		SLIST_EMPTY(&vip->vi_klist.vk_klist));
+	klist_fini(&vip->vi_klist.vk_klist);
 	cv_destroy(&vp->v_cv);
 	cache_vnode_fini(vp);
 	pool_cache_put(vcache_pool, vip);
@@ -1916,7 +1920,7 @@ vcache_reclaim(vnode_t *vp)
 	 * Don't check for interest in NOTE_REVOKE; it's always posted
 	 * because it sets EV_EOF.
 	 */
-	KNOTE(&vp->v_klist, NOTE_REVOKE);
+	KNOTE(&vp->v_klist->vk_klist, NOTE_REVOKE);
 	mutex_exit(vp->v_interlock);
 
 	/*
@@ -2095,3 +2099,28 @@ vshareilock(vnode_t *tvp, vnode_t *fvp)
 	tvp->v_interlock = fvp->v_interlock;
 	mutex_obj_free(oldlock);
 }
+
+void
+vshareklist(vnode_t *tvp, vnode_t *fvp)
+{
+	/*
+	 * If two vnodes share klist state, they must also share
+	 * an interlock.
+	 */
+	KASSERT(tvp->v_interlock == fvp->v_interlock);
+
+	/*
+	 * We make the following assumptions:
+	 *
+	 * ==> Some other synchronization is happening outside of
+	 *     our view to make this safe.
+	 *
+	 * ==> That the "to" vnode will have the necessary references
+	 *     on the "from" vnode so that the storage for the klist
+	 *     won't be yanked out from beneath us (the vnode_impl).
+	 *
+	 * ==> If "from" is also sharing, we then assume that "from"
+	 *     has the necessary references, and so on.
+	 */
+	tvp->v_klist = fvp->v_klist;
+}

Index: src/sys/kern/vfs_vnops.c
diff -u src/sys/kern/vfs_vnops.c:1.233 src/sys/kern/vfs_vnops.c:1.234
--- src/sys/kern/vfs_vnops.c:1.233	Wed Jul  6 13:52:24 2022
+++ src/sys/kern/vfs_vnops.c	Mon Jul 18 04:30:30 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_vnops.c,v 1.233 2022/07/06 13:52:24 riastradh Exp $	*/
+/*	$NetBSD: vfs_vnops.c,v 1.234 2022/07/18 04:30:30 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 2009 The NetBSD Foundation, Inc.
@@ -66,7 +66,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.233 2022/07/06 13:52:24 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.234 2022/07/18 04:30:30 thorpej Exp $");
 
 #include "veriexec.h"
 
@@ -79,7 +79,7 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
-#include <sys/vnode.h>
+#include <sys/vnode_impl.h>
 #include <sys/ioctl.h>
 #include <sys/tty.h>
 #include <sys/poll.h>
@@ -1428,9 +1428,17 @@ vn_knote_to_interest(const struct knote 
 void
 vn_knote_attach(struct vnode *vp, struct knote *kn)
 {
+	struct vnode_klist *vk = vp->v_klist;
 	long interest = 0;
 
 	/*
+	 * In the case of layered / stacked file systems, knotes
+	 * should only ever be associated with the base vnode.
+	 */
+	KASSERT(kn->kn_hook == vp);
+	KASSERT(vp->v_klist == &VNODE_TO_VIMPL(vp)->vi_klist);
+
+	/*
 	 * We maintain a bitmask of the kevents that there is interest in,
 	 * to minimize the impact of having watchers.  It's silly to have
 	 * to traverse vn_klist every time a read or write happens simply
@@ -1439,18 +1447,23 @@ vn_knote_attach(struct vnode *vp, struct
 	 */
 
 	mutex_enter(vp->v_interlock);
-	SLIST_INSERT_HEAD(&vp->v_klist, kn, kn_selnext);
-	SLIST_FOREACH(kn, &vp->v_klist, kn_selnext) {
+	SLIST_INSERT_HEAD(&vk->vk_klist, kn, kn_selnext);
+	SLIST_FOREACH(kn, &vk->vk_klist, kn_selnext) {
 		interest |= vn_knote_to_interest(kn);
 	}
-	vp->v_klist_interest = interest;
+	vk->vk_interest = interest;
 	mutex_exit(vp->v_interlock);
 }
 
 void
 vn_knote_detach(struct vnode *vp, struct knote *kn)
 {
-	int interest = 0;
+	struct vnode_klist *vk = vp->v_klist;
+	long interest = 0;
+
+	/* See above. */
+	KASSERT(kn->kn_hook == vp);
+	KASSERT(vp->v_klist == &VNODE_TO_VIMPL(vp)->vi_klist);
 
 	/*
 	 * We special case removing the head of the list, because:
@@ -1464,16 +1477,16 @@ vn_knote_detach(struct vnode *vp, struct
 	 */
 
 	mutex_enter(vp->v_interlock);
-	if (__predict_true(kn == SLIST_FIRST(&vp->v_klist))) {
-		SLIST_REMOVE_HEAD(&vp->v_klist, kn_selnext);
-		SLIST_FOREACH(kn, &vp->v_klist, kn_selnext) {
+	if (__predict_true(kn == SLIST_FIRST(&vk->vk_klist))) {
+		SLIST_REMOVE_HEAD(&vk->vk_klist, kn_selnext);
+		SLIST_FOREACH(kn, &vk->vk_klist, kn_selnext) {
 			interest |= vn_knote_to_interest(kn);
 		}
-		vp->v_klist_interest = interest;
+		vk->vk_interest = interest;
 	} else {
 		struct knote *thiskn, *nextkn, *prevkn = NULL;
 
-		SLIST_FOREACH_SAFE(thiskn, &vp->v_klist, kn_selnext, nextkn) {
+		SLIST_FOREACH_SAFE(thiskn, &vk->vk_klist, kn_selnext, nextkn) {
 			if (thiskn == kn) {
 				KASSERT(kn != NULL);
 				KASSERT(prevkn != NULL);
@@ -1484,7 +1497,7 @@ vn_knote_detach(struct vnode *vp, struct
 				prevkn = thiskn;
 			}
 		}
-		vp->v_klist_interest = interest;
+		vk->vk_interest = interest;
 	}
 	mutex_exit(vp->v_interlock);
 }

Index: src/sys/kern/vnode_if.sh
diff -u src/sys/kern/vnode_if.sh:1.75 src/sys/kern/vnode_if.sh:1.76
--- src/sys/kern/vnode_if.sh:1.75	Tue May  3 13:54:18 2022
+++ src/sys/kern/vnode_if.sh	Mon Jul 18 04:30:30 2022
@@ -29,7 +29,7 @@ copyright="\
  * SUCH DAMAGE.
  */
 "
-SCRIPT_ID='$NetBSD: vnode_if.sh,v 1.75 2022/05/03 13:54:18 hannken Exp $'
+SCRIPT_ID='$NetBSD: vnode_if.sh,v 1.76 2022/07/18 04:30:30 thorpej Exp $'
 
 # Script to produce VFS front-end sugar.
 #
@@ -444,7 +444,7 @@ do {									\\
 	 */								\\
 	mutex_enter((thisvp)->v_interlock);				\\
 	if (__predict_true((e) == 0)) {					\\
-		knote(&(thisvp)->v_klist, (n));				\\
+		knote(&(thisvp)->v_klist->vk_klist, (n));		\\
 	}								\\
 	holdrelel((thisvp));						\\
 	mutex_exit((thisvp)->v_interlock);				\\
@@ -557,7 +557,7 @@ do {									\\
 		 * meaningless from the watcher's perspective.		\\
 		 */							\\
 		if (__predict_true(thisvp->v_op != dead_vnodeop_p)) {	\\
-			knote(&thisvp->v_klist,				\\
+			knote(&thisvp->v_klist->vk_klist,		\\
 			    ((ap)->a_fflag & FWRITE)			\\
 			    ? NOTE_CLOSE_WRITE : NOTE_CLOSE);		\\
 		}							\\

Index: src/sys/miscfs/genfs/layer_vfsops.c
diff -u src/sys/miscfs/genfs/layer_vfsops.c:1.54 src/sys/miscfs/genfs/layer_vfsops.c:1.55
--- src/sys/miscfs/genfs/layer_vfsops.c:1.54	Sun Feb 23 15:46:41 2020
+++ src/sys/miscfs/genfs/layer_vfsops.c	Mon Jul 18 04:30:30 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: layer_vfsops.c,v 1.54 2020/02/23 15:46:41 ad Exp $	*/
+/*	$NetBSD: layer_vfsops.c,v 1.55 2022/07/18 04:30:30 thorpej Exp $	*/
 
 /*
  * Copyright (c) 1999 National Aeronautics & Space Administration
@@ -74,7 +74,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: layer_vfsops.c,v 1.54 2020/02/23 15:46:41 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: layer_vfsops.c,v 1.55 2022/07/18 04:30:30 thorpej Exp $");
 
 #include <sys/param.h>
 #include <sys/sysctl.h>
@@ -205,10 +205,11 @@ layerfs_loadvnode(struct mount *mp, stru
 
 	xp = kmem_alloc(lmp->layerm_size, KM_SLEEP);
 
-	/* Share the interlock and vmobjlock with the lower node. */
+	/* Share the interlock, vmobjlock, and klist with the lower node. */
 	vshareilock(vp, lowervp);
 	rw_obj_hold(lowervp->v_uobj.vmobjlock);
 	uvm_obj_setlock(&vp->v_uobj, lowervp->v_uobj.vmobjlock);
+	vshareklist(vp, lowervp);
 
 	vp->v_tag = lmp->layerm_tag;
 	vp->v_type = lowervp->v_type;

Index: src/sys/sys/param.h
diff -u src/sys/sys/param.h:1.711 src/sys/sys/param.h:1.712
--- src/sys/sys/param.h:1.711	Mon Jun 20 08:38:56 2022
+++ src/sys/sys/param.h	Mon Jul 18 04:30:30 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: param.h,v 1.711 2022/06/20 08:38:56 yamaguchi Exp $	*/
+/*	$NetBSD: param.h,v 1.712 2022/07/18 04:30:30 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
@@ -67,7 +67,7 @@
  *	2.99.9		(299000900)
  */
 
-#define	__NetBSD_Version__	999009800	/* NetBSD 9.99.98 */
+#define	__NetBSD_Version__	999009900	/* NetBSD 9.99.99 */
 
 #define __NetBSD_Prereq__(M,m,p) (((((M) * 100000000) + \
     (m) * 1000000) + (p) * 100) <= __NetBSD_Version__)

Index: src/sys/sys/vnode.h
diff -u src/sys/sys/vnode.h:1.301 src/sys/sys/vnode.h:1.302
--- src/sys/sys/vnode.h:1.301	Fri Mar 25 08:56:36 2022
+++ src/sys/sys/vnode.h	Mon Jul 18 04:30:30 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: vnode.h,v 1.301 2022/03/25 08:56:36 hannken Exp $	*/
+/*	$NetBSD: vnode.h,v 1.302 2022/07/18 04:30:30 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 2008, 2020 The NetBSD Foundation, Inc.
@@ -179,8 +179,8 @@ struct vnode {
 	enum vtype	v_type;			/* -   vnode type */
 	enum vtagtype	v_tag;			/* -   type of underlying data */
 	void 		*v_data;		/* -   private data for fs */
-	struct klist	v_klist;		/* i   notes attached to vnode */
-	long		v_klist_interest;	/* i   what the noes are interested in */
+	struct vnode_klist *v_klist;		/* i   kevent / knote info */
+
 	void		*v_segvguard;		/* e   for PAX_SEGVGUARD */
 };
 #define	v_mountedhere	v_un.vu_mountedhere
@@ -190,6 +190,19 @@ struct vnode {
 #define	v_ractx		v_un.vu_ractx
 
 typedef struct vnode vnode_t;
+
+/*
+ * Structure that encompasses the kevent state for a vnode.  This is
+ * carved out as a separate structure because some vnodes may share
+ * this state with one another.
+ *
+ * N.B. if two vnodes share a vnode_klist, then they must also share
+ * v_interlock.
+ */
+struct vnode_klist {
+	struct klist	vk_klist;	/* i   notes attached to vnode */
+	long		vk_interest;	/* i   what the notes are interested in */
+};
 #endif
 
 /*
@@ -415,7 +428,7 @@ void vref(struct vnode *);
  * Macro to determine kevent interest on a vnode.
  */
 #define	VN_KEVENT_INTEREST(vp, n)					\
-	((vp)->v_klist_interest != 0)
+	(((vp)->v_klist->vk_interest & (n)) != 0)
 
 static inline void
 VN_KNOTE(struct vnode *vp, long hint)
@@ -429,7 +442,7 @@ VN_KNOTE(struct vnode *vp, long hint)
 	 */
 	if (__predict_false(VN_KEVENT_INTEREST(vp, hint))) {
 		mutex_enter(vp->v_interlock);
-		knote(&vp->v_klist, hint);
+		knote(&vp->v_klist->vk_klist, hint);
 		mutex_exit(vp->v_interlock);
 	}
 }
@@ -594,6 +607,7 @@ int	vdead_check(struct vnode *, int);
 void	vrevoke(struct vnode *);
 void	vremfree(struct vnode *);
 void	vshareilock(struct vnode *, struct vnode *);
+void	vshareklist(struct vnode *, struct vnode *);
 int	vrefcnt(struct vnode *);
 int	vcache_get(struct mount *, const void *, size_t, struct vnode **);
 int	vcache_new(struct mount *, struct vnode *,

Index: src/sys/sys/vnode_impl.h
diff -u src/sys/sys/vnode_impl.h:1.23 src/sys/sys/vnode_impl.h:1.24
--- src/sys/sys/vnode_impl.h:1.23	Sun Mar 22 14:38:37 2020
+++ src/sys/sys/vnode_impl.h	Mon Jul 18 04:30:30 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: vnode_impl.h,v 1.23 2020/03/22 14:38:37 ad Exp $	*/
+/*	$NetBSD: vnode_impl.h,v 1.24 2022/07/18 04:30:30 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 2016, 2019, 2020 The NetBSD Foundation, Inc.
@@ -77,6 +77,12 @@ struct vnode_impl {
 	struct vcache_key vi_key;		/* c   vnode cache key */
 
 	/*
+	 * The vnode klist is accessed frequently, but rarely
+	 * modified.
+	 */
+	struct vnode_klist vi_klist;		/* i   kevent / knote state */
+
+	/*
 	 * vnode cache, LRU and syncer.  This all changes with some
 	 * regularity so keep it together.
 	 */

Index: src/tests/lib/libc/kevent_nullmnt/t_nullmnt.sh
diff -u src/tests/lib/libc/kevent_nullmnt/t_nullmnt.sh:1.5 src/tests/lib/libc/kevent_nullmnt/t_nullmnt.sh:1.6
--- src/tests/lib/libc/kevent_nullmnt/t_nullmnt.sh:1.5	Sat Jun  4 20:32:49 2022
+++ src/tests/lib/libc/kevent_nullmnt/t_nullmnt.sh	Mon Jul 18 04:30:30 2022
@@ -33,7 +33,6 @@ nullmnt_upper_lower_head()
 }
 nullmnt_upper_lower_body()
 {
-	atf_expect_fail "PR kern/56713"
 	nullmnt_common lower_dir upper_dir
 } 
 nullmnt_upper_lower_cleanup()
@@ -48,7 +47,6 @@ nullmnt_upper_upper_head()
 }
 nullmnt_upper_upper_body()
 {
-	atf_expect_fail "PR kern/56713"
 	nullmnt_common upper_dir upper_dir
 } 
 nullmnt_upper_upper_cleanup()

Reply via email to