When a knote's kn_status is updated, it is necessary to lock the kqueue
that owns the knote, to ensure proper serialization. filt_proc() has
a mistake in this, and the following diff fixes it.

proc_filtops is MP-unsafe and all its callbacks run with the kernel
locked. The kernel lock should provide sufficient memory synchronization
for filt_procdetach() to check condition (kn->kn_status & KN_DETACHED)
without kq_lock; the value of kn_status seen by filt_procdetach() should
be at least as recent as seen by the latest filt_proc(NOTE_EXIT) call.

Also, the splitting of the splhigh() section in filt_proc() should be
harmless. The KN_DETACHED flag is only checked by filt_procdetach()
which runs in process context.

OK?

Index: kern/kern_event.c
===================================================================
RCS file: src/sys/kern/kern_event.c,v
retrieving revision 1.166
diff -u -p -r1.166 kern_event.c
--- kern/kern_event.c   11 Jun 2021 04:29:54 -0000      1.166
+++ kern/kern_event.c   14 Jun 2021 13:37:55 -0000
@@ -331,6 +331,8 @@ filt_procdetach(struct knote *kn)
        struct process *pr = kn->kn_ptr.p_process;
        int s;
 
+       KERNEL_ASSERT_LOCKED();
+
        if (kn->kn_status & KN_DETACHED)
                return;
 
@@ -342,6 +344,7 @@ filt_procdetach(struct knote *kn)
 int
 filt_proc(struct knote *kn, long hint)
 {
+       struct kqueue *kq = kn->kn_kq;
        u_int event;
 
        /*
@@ -363,8 +366,11 @@ filt_proc(struct knote *kn, long hint)
                struct process *pr = kn->kn_ptr.p_process;
                int s;
 
-               s = splhigh();
+               mtx_enter(&kq->kq_lock);
                kn->kn_status |= KN_DETACHED;
+               mtx_leave(&kq->kq_lock);
+
+               s = splhigh();
                kn->kn_flags |= (EV_EOF | EV_ONESHOT);
                kn->kn_data = W_EXITCODE(pr->ps_xexit, pr->ps_xsig);
                klist_remove_locked(&pr->ps_klist, kn);
@@ -391,7 +397,7 @@ filt_proc(struct knote *kn, long hint)
                kev.fflags = kn->kn_sfflags;
                kev.data = kn->kn_id;                   /* parent */
                kev.udata = kn->kn_udata;               /* preserve udata */
-               error = kqueue_register(kn->kn_kq, &kev, NULL);
+               error = kqueue_register(kq, &kev, NULL);
                if (error)
                        kn->kn_fflags |= NOTE_TRACKERR;
        }
Index: sys/event.h
===================================================================
RCS file: src/sys/sys/event.h,v
retrieving revision 1.55
diff -u -p -r1.55 event.h
--- sys/event.h 2 Jun 2021 13:56:28 -0000       1.55
+++ sys/event.h 14 Jun 2021 13:37:55 -0000
@@ -228,6 +228,7 @@ struct filterops {
  * Locking:
  *     I       immutable after creation
  *     o       object lock
+ *     q       kn_kq->kq_lock
  */
 struct knote {
        SLIST_ENTRY(knote)      kn_link;        /* for fd */
@@ -235,7 +236,7 @@ struct knote {
        TAILQ_ENTRY(knote)      kn_tqe;
        struct                  kqueue *kn_kq;  /* [I] which queue we are on */
        struct                  kevent kn_kevent;
-       int                     kn_status;
+       int                     kn_status;      /* [q] */
        int                     kn_sfflags;     /* [o] saved filter flags */
        __int64_t               kn_sdata;       /* [o] saved data field */
        union {

Reply via email to