On Mon, Jun 14, 2021 at 11:44:15PM +0200, Martin Pieuchot wrote:
> On 14/06/21(Mon) 13:45, Visa Hankala wrote:
> > When a knote's kn_status is updated, it is necessary to lock the kqueue
> > that owns the knote, to ensure proper serialization. filt_proc() has
> > a mistake in this, and the following diff fixes it.
>
> The fix is here to ensure `kn_status' cannot be written by two different
> threads at the same time, right?
That is correct. Even though knotes of EVFILT_PROC / proc_filtops are
processed with the kernel locked, the knotes can be knote_acquire()'ed
without the kernel lock once kevent(2) is unlocked.
> > proc_filtops is MP-unsafe and all its callbacks run with the kernel
> > locked. The kernel lock should provide sufficient memory synchronization
> > for filt_procdetach() to check condition (kn->kn_status & KN_DETACHED)
> > without kq_lock; the value of kn_status seen by filt_procdetach() should
> > be at least as recent as seen by the latest filt_proc(NOTE_EXIT) call.
>
> I understand that protecting the read in filt_procdetach() is not
> strictly necessary, but isn't it too clever? I haven't seen any other
> piece of code that access `kn_status' w/o holding `kq_lock'. Unless you
> have pending plans for this, I'd suggest we also grab the lock there.
Well, I don't know if it is too clever. I think that the critical section
is longer than what the mutex covers, and rely on the kernel lock to
provide the serialization between filt_proc() and filt_procdetach().
However, here is an updated diff:
Index: kern/kern_event.c
===================================================================
RCS file: src/sys/kern/kern_event.c,v
retrieving revision 1.166
diff -u -p -r1.166 kern_event.c
--- kern/kern_event.c 11 Jun 2021 04:29:54 -0000 1.166
+++ kern/kern_event.c 15 Jun 2021 12:51:41 -0000
@@ -328,10 +328,15 @@ filt_procattach(struct knote *kn)
void
filt_procdetach(struct knote *kn)
{
+ struct kqueue *kq = kn->kn_kq;
struct process *pr = kn->kn_ptr.p_process;
- int s;
+ int s, status;
- if (kn->kn_status & KN_DETACHED)
+ mtx_enter(&kq->kq_lock);
+ status = kn->kn_status;
+ mtx_leave(&kq->kq_lock);
+
+ if (status & KN_DETACHED)
return;
s = splhigh();
@@ -342,6 +347,7 @@ filt_procdetach(struct knote *kn)
int
filt_proc(struct knote *kn, long hint)
{
+ struct kqueue *kq = kn->kn_kq;
u_int event;
/*
@@ -363,8 +369,11 @@ filt_proc(struct knote *kn, long hint)
struct process *pr = kn->kn_ptr.p_process;
int s;
- s = splhigh();
+ mtx_enter(&kq->kq_lock);
kn->kn_status |= KN_DETACHED;
+ mtx_leave(&kq->kq_lock);
+
+ s = splhigh();
kn->kn_flags |= (EV_EOF | EV_ONESHOT);
kn->kn_data = W_EXITCODE(pr->ps_xexit, pr->ps_xsig);
klist_remove_locked(&pr->ps_klist, kn);
@@ -391,7 +400,7 @@ filt_proc(struct knote *kn, long hint)
kev.fflags = kn->kn_sfflags;
kev.data = kn->kn_id; /* parent */
kev.udata = kn->kn_udata; /* preserve udata */
- error = kqueue_register(kn->kn_kq, &kev, NULL);
+ error = kqueue_register(kq, &kev, NULL);
if (error)
kn->kn_fflags |= NOTE_TRACKERR;
}
Index: sys/event.h
===================================================================
RCS file: src/sys/sys/event.h,v
retrieving revision 1.55
diff -u -p -r1.55 event.h
--- sys/event.h 2 Jun 2021 13:56:28 -0000 1.55
+++ sys/event.h 15 Jun 2021 12:51:41 -0000
@@ -228,6 +228,7 @@ struct filterops {
* Locking:
* I immutable after creation
* o object lock
+ * q kn_kq->kq_lock
*/
struct knote {
SLIST_ENTRY(knote) kn_link; /* for fd */
@@ -235,7 +236,7 @@ struct knote {
TAILQ_ENTRY(knote) kn_tqe;
struct kqueue *kn_kq; /* [I] which queue we are on */
struct kevent kn_kevent;
- int kn_status;
+ int kn_status; /* [q] */
int kn_sfflags; /* [o] saved filter flags */
__int64_t kn_sdata; /* [o] saved data field */
union {