On 22-05-08 05:20:03, Visa Hankala wrote:
> On Sat, May 07, 2022 at 07:47:12PM +1000, Joel Sing wrote:
> > After upgrading an OpenBSD arm64 (pine64+ board) Go builder to 7.1, the
> > following has been triggered twice (once every couple of days):
> > 
> > panic: kernel diagnostic assertion "kn->kn_kq == kq" failed: file 
> > "/usr/src/sys/kern/kern_event.c", line 1811
> > Stopped at      panic+0x160:    cmp     w21, #0x0
> >     TID    PID    UID     PRFLAGS     PFLAGS  CPU  COMMAND
> >  152330  84924   1001         0x3  0x4000000    3  http.test
> > *427942  85055   1001         0x3  0x4000000    1  net.test
> >  460437  53928      0     0x14000      0x200    2  tztq
> >  197440  95098      0     0x14000      0x200    0  softnet
> > db_enter() at panic+0x15c
> > panic() at __assert+0x24
> > panic() at knote_remove+0x2c4
> > knote_remove() at knote_fdclose+0x88
> > knote_fdclose() at fdrelease+0x94
> > fdrelease() at svc_handler+0x2d4
> > svc_handler() at do_el0_sync+0xa0
> > 
> > This was stable with 7.0, hence seems to be a regression between 7.0 and 7.1
> > (presumably unlocking related).
> 
> There is a race condition in knote_remove(). The function does not
> take into account that another thread might resize kq_knlist[] while
> knote_remove() clears a knote.
> 
> I think the flaw has been present since r1.109 of kern_event.c when I
> made the subsystem allow sleeping inside event filters. I guess the
> recent unlockings now enable such timings that trigger the error.
> 
> Joel, could you test the following patch?
> 
> For a speedier trial, the bug's incidence rate can be increased by
> lowering the value of KQEXTENT in sys/sys/eventvar.h. Even a value
> as low as 1 should work (though one can ask if this will actually
> test the original issue).

Thanks - I recompiled with KQEXTENT = 8 and was able to reproduce
the problem in less than an hour. Applying the diff, the machine
has been up and running for nearly 48 hours without an issue.

Diff makes sense - ok jsing@

> Index: sys/kern/kern_event.c
> ===================================================================
> RCS file: src/sys/kern/kern_event.c,v
> retrieving revision 1.187
> diff -u -p -r1.187 kern_event.c
> --- sys/kern/kern_event.c     6 May 2022 13:12:16 -0000       1.187
> +++ sys/kern/kern_event.c     8 May 2022 04:31:21 -0000
> @@ -121,8 +121,8 @@ void      knote_dequeue(struct knote *kn);
>  int  knote_acquire(struct knote *kn, struct klist *, int);
>  void knote_release(struct knote *kn);
>  void knote_activate(struct knote *kn);
> -void knote_remove(struct proc *p, struct kqueue *kq, struct knlist *list,
> -         int purge);
> +void knote_remove(struct proc *p, struct kqueue *kq, struct knlist **plist,
> +         int idx, int purge);
>  
>  void filt_kqdetach(struct knote *kn);
>  int  filt_kqueue(struct knote *kn, long hint);
> @@ -1563,10 +1563,10 @@ kqueue_purge(struct proc *p, struct kque
>  
>       mtx_enter(&kq->kq_lock);
>       for (i = 0; i < kq->kq_knlistsize; i++)
> -             knote_remove(p, kq, &kq->kq_knlist[i], 1);
> +             knote_remove(p, kq, &kq->kq_knlist, i, 1);
>       if (kq->kq_knhashmask != 0) {
>               for (i = 0; i < kq->kq_knhashmask + 1; i++)
> -                     knote_remove(p, kq, &kq->kq_knhash[i], 1);
> +                     knote_remove(p, kq, &kq->kq_knhash, i, 1);
>       }
>       mtx_leave(&kq->kq_lock);
>  }
> @@ -1789,13 +1789,15 @@ knote(struct klist *list, long hint)
>   * remove all knotes from a specified knlist
>   */
>  void
> -knote_remove(struct proc *p, struct kqueue *kq, struct knlist *list, int 
> purge)
> +knote_remove(struct proc *p, struct kqueue *kq, struct knlist **plist, int 
> idx,
> +    int purge)
>  {
>       struct knote *kn;
>  
>       MUTEX_ASSERT_LOCKED(&kq->kq_lock);
>  
> -     while ((kn = SLIST_FIRST(list)) != NULL) {
> +     /* Always fetch array pointer as another thread can resize kq_knlist. */
> +     while ((kn = SLIST_FIRST(*plist + idx)) != NULL) {
>               KASSERT(kn->kn_kq == kq);
>  
>               if (!purge) {
> @@ -1863,7 +1865,7 @@ knote_fdclose(struct proc *p, int fd)
>       LIST_FOREACH(kq, &fdp->fd_kqlist, kq_next) {
>               mtx_enter(&kq->kq_lock);
>               if (fd < kq->kq_knlistsize)
> -                     knote_remove(p, kq, &kq->kq_knlist[fd], 0);
> +                     knote_remove(p, kq, &kq->kq_knlist, fd, 0);
>               mtx_leave(&kq->kq_lock);
>       }
>  }

Reply via email to