NFS poll(2)/select(2) and kqueue(2) behaviors are incoherent. Diff
below uses the kernel-only NOTE_IMM hint to make the kqueue handlers
behave like the current poll handler: the poller is bypassed.
The new EVFILT_WRITE handler doesn't check for NOTE_IMM because it is
unlikely to introduce regression.
Is this a preferred approach? Ok?
Index: nfs/nfs_kq.c
===================================================================
RCS file: /cvs/src/sys/nfs/nfs_kq.c,v
retrieving revision 1.30
diff -u -p -r1.30 nfs_kq.c
--- nfs/nfs_kq.c 7 Apr 2020 13:27:52 -0000 1.30
+++ nfs/nfs_kq.c 31 May 2020 08:43:36 -0000
@@ -50,9 +50,12 @@
#include <nfs/nfs_var.h>
void nfs_kqpoll(void *);
+int nfs_kqwatch(struct vnode *);
+void nfs_kqunwatch(struct vnode *);
void filt_nfsdetach(struct knote *);
int filt_nfsread(struct knote *, long);
+int filt_nfswrite(struct knote *, long);
int filt_nfsvnode(struct knote *, long);
struct kevq {
@@ -182,11 +185,19 @@ void
filt_nfsdetach(struct knote *kn)
{
struct vnode *vp = (struct vnode *)kn->kn_hook;
- struct kevq *ke;
klist_remove(&vp->v_selectinfo.si_note, kn);
/* Remove the vnode from watch list */
+ if ((kn->kn_sfflags & NOTE_IMM) == 0)
+ nfs_kqunwatch(vp);
+}
+
+void
+nfs_kqunwatch(struct vnode *vp)
+{
+ struct kevq *ke;
+
rw_enter_write(&nfskevq_lock);
SLIST_FOREACH(ke, &kevlist, kev_link) {
if (ke->vp == vp) {
@@ -238,6 +249,22 @@ filt_nfsread(struct knote *kn, long hint
}
int
+filt_nfswrite(struct knote *kn, long hint)
+{
+ /*
+ * filesystem is gone, so set the EOF flag and schedule
+ * the knote for deletion.
+ */
+ if (hint == NOTE_REVOKE) {
+ kn->kn_flags |= (EV_EOF | EV_ONESHOT);
+ return (1);
+ }
+
+ kn->kn_data = 0;
+ return (1);
+}
+
+int
filt_nfsvnode(struct knote *kn, long hint)
{
if (kn->kn_sfflags & hint)
@@ -256,6 +283,13 @@ static const struct filterops nfsread_fi
.f_event = filt_nfsread,
};
+const struct filterops nfswrite_filtops = {
+ .f_flags = FILTEROP_ISFD,
+ .f_attach = NULL,
+ .f_detach = filt_nfsdetach,
+ .f_event = filt_nfswrite,
+};
+
static const struct filterops nfsvnode_filtops = {
.f_flags = FILTEROP_ISFD,
.f_attach = NULL,
@@ -269,10 +303,6 @@ nfs_kqfilter(void *v)
struct vop_kqfilter_args *ap = v;
struct vnode *vp;
struct knote *kn;
- struct kevq *ke;
- int error = 0;
- struct vattr attr;
- struct proc *p = curproc; /* XXX */
vp = ap->a_vp;
kn = ap->a_kn;
@@ -286,6 +316,9 @@ nfs_kqfilter(void *v)
case EVFILT_READ:
kn->kn_fop = &nfsread_filtops;
break;
+ case EVFILT_WRITE:
+ kn->kn_fop = &nfswrite_filtops;
+ break;
case EVFILT_VNODE:
kn->kn_fop = &nfsvnode_filtops;
break;
@@ -298,7 +331,27 @@ nfs_kqfilter(void *v)
/*
* Put the vnode to watched list.
*/
-
+ if ((kn->kn_sfflags & NOTE_IMM) == 0) {
+ int error;
+
+ error = nfs_kqwatch(vp);
+ if (error)
+ return (error);
+ }
+
+ klist_insert(&vp->v_selectinfo.si_note, kn);
+
+ return (0);
+}
+
+int
+nfs_kqwatch(struct vnode *vp)
+{
+ struct proc *p = curproc; /* XXX */
+ struct vattr attr;
+ struct kevq *ke;
+ int error = 0;
+
/*
* Fetch current attributes. It's only needed when the vnode
* is not watched yet, but we need to do this without lock
@@ -338,8 +391,6 @@ nfs_kqfilter(void *v)
/* kick the poller */
wakeup(pnfskq);
-
- klist_insert(&vp->v_selectinfo.si_note, kn);
out:
rw_exit_write(&nfskevq_lock);