On 10/04/20(Fri) 14:43, Martin Pieuchot wrote: > Diff below reduces kqueue_scan() to the collect of events on a given > kqueue and let its caller, sys_kevent(), responsible for the copyout(9). > > Apart from the code simplification, this refactoring clearly separates > kqueue_scan() from the syscall logic. That should allow us to re-use > the function in a different context and to address its need for locking > independently. > > Since the number of events that are ready to be collected can be bigger > than the size of the array, the pair kqueue_scan()/copyout() may be > called multiple times. In that case, successive calls should no longer > block, this is performed by using a zero, but not NULL, timeout which > correspond to a non-blocking scan. > > This is the next piece of the ongoing work to move select/poll/kqueue. > I'd like to be sure it doesn't introduce any regression.
Updated diff based on recent changes and incorporating feedbacks from visa@. Comments? Oks? Index: kern/kern_event.c =================================================================== RCS file: /cvs/src/sys/kern/kern_event.c,v retrieving revision 1.132 diff -u -p -r1.132 kern_event.c --- kern/kern_event.c 17 May 2020 10:53:14 -0000 1.132 +++ kern/kern_event.c 25 May 2020 11:54:08 -0000 @@ -62,9 +62,6 @@ void KQREF(struct kqueue *); void KQRELE(struct kqueue *); int kqueue_sleep(struct kqueue *, struct timespec *); -int kqueue_scan(struct kqueue_scan_state *scan, int maxevents, - struct kevent *ulistp, struct timespec *timeout, - struct proc *p, int *retval); int kqueue_read(struct file *, struct uio *, int); int kqueue_write(struct file *, struct uio *, int); @@ -545,6 +542,7 @@ sys_kevent(struct proc *p, void *v, regi struct timespec ts; struct timespec *tsp = NULL; int i, n, nerrors, error; + int ready, total; struct kevent kev[KQ_NEVENTS]; if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL) @@ -573,9 +571,9 @@ sys_kevent(struct proc *p, void *v, regi kq = fp->f_data; nerrors = 0; - while (SCARG(uap, nchanges) > 0) { - n = SCARG(uap, nchanges) > KQ_NEVENTS ? - KQ_NEVENTS : SCARG(uap, nchanges); + while ((n = SCARG(uap, nchanges)) > 0) { + if (n > nitems(kev)) + n = nitems(kev); error = copyin(SCARG(uap, changelist), kev, n * sizeof(struct kevent)); if (error) @@ -611,14 +609,41 @@ sys_kevent(struct proc *p, void *v, regi goto done; } + KQREF(kq); FRELE(fp, p); + /* + * Collect as many events as we can. The timeout on successive + * loops is disabled (kqueue_scan() becomes non-blocking). + */ + total = 0; + error = 0; kqueue_scan_setup(&scan, kq); - error = kqueue_scan(&scan, SCARG(uap, nevents), SCARG(uap, eventlist), - tsp, p, &n); + while ((n = SCARG(uap, nevents) - total) > 0) { + if (n > nitems(kev)) + n = nitems(kev); + ready = kqueue_scan(&scan, n, kev, tsp, p, &error); + if (ready == 0) + break; + error = copyout(kev, SCARG(uap, eventlist) + total, + sizeof(struct kevent) * ready); +#ifdef KTRACE + if (KTRPOINT(p, KTR_STRUCT)) + ktrevent(p, kev, ready); +#endif + total += ready; + /* + * Stop if there was an error or if we had enough + * place to collect all events that were ready. + */ + if (error || ready < n) + break; + tsp = &ts; /* successive loops non-blocking */ + timespecclear(tsp); + } kqueue_scan_finish(&scan); KQRELE(kq); - *retval = n; + *retval = total; return (error); done: @@ -872,15 +897,18 @@ kqueue_sleep(struct kqueue *kq, struct t return (error); } +/* + * Scan the kqueue, blocking if necessary until the target time is reached. + * If tsp is NULL we block indefinitely. If tsp->ts_secs/nsecs are both + * 0 we do not block at all. + */ int kqueue_scan(struct kqueue_scan_state *scan, int maxevents, - struct kevent *ulistp, struct timespec *tsp, struct proc *p, int *retval) + struct kevent *kevp, struct timespec *tsp, struct proc *p, int *errorp) { - struct kevent *kevp; struct knote *kn; struct kqueue *kq = scan->kqs_kq; int s, count, nkev = 0, error = 0; - struct kevent kev[KQ_NEVENTS]; count = maxevents; if (count == 0) @@ -892,7 +920,6 @@ retry: goto done; } - kevp = &kev[0]; s = splhigh(); if (kq->kq_count == 0) { if ((tsp != NULL && !timespecisset(tsp)) || @@ -933,14 +960,8 @@ retry: while (count) { kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe); if (kn->kn_filter == EVFILT_MARKER) { - if (kn == &scan->kqs_end) { - TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, - kn_tqe); - splx(s); - if (scan->kqs_nevent == 0) - goto retry; - goto done; - } + if (kn == &scan->kqs_end) + break; /* Move start marker past another thread's marker. */ TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); @@ -976,6 +997,9 @@ retry: count--; scan->kqs_nevent++; + /* + * Post-event action on the note + */ if (kn->kn_flags & EV_ONESHOT) { splx(s); kn->kn_fop->f_detach(kn); @@ -1001,35 +1025,14 @@ retry: knote_release(kn); } kqueue_check(kq); - if (nkev == KQ_NEVENTS) { - splx(s); -#ifdef KTRACE - if (KTRPOINT(p, KTR_STRUCT)) - ktrevent(p, kev, nkev); -#endif - error = copyout(kev, ulistp, - sizeof(struct kevent) * nkev); - ulistp += nkev; - nkev = 0; - kevp = &kev[0]; - s = splhigh(); - if (error) - break; - } } TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe); splx(s); + if (scan->kqs_nevent == 0) + goto retry; done: - if (nkev != 0) { -#ifdef KTRACE - if (KTRPOINT(p, KTR_STRUCT)) - ktrevent(p, kev, nkev); -#endif - error = copyout(kev, ulistp, - sizeof(struct kevent) * nkev); - } - *retval = maxevents - count; - return (error); + *errorp = error; + return (nkev); } void Index: sys/event.h =================================================================== RCS file: /cvs/src/sys/sys/event.h,v retrieving revision 1.37 diff -u -p -r1.37 event.h --- sys/event.h 17 May 2020 10:53:14 -0000 1.37 +++ sys/event.h 25 May 2020 11:34:34 -0000 @@ -211,6 +211,8 @@ extern int kqueue_register(struct kqueue struct kevent *kev, struct proc *p); extern void kqueue_scan_setup(struct kqueue_scan_state *, struct kqueue *); extern void kqueue_scan_finish(struct kqueue_scan_state *); +extern int kqueue_scan(struct kqueue_scan_state *, int, struct kevent *, + struct timespec *, struct proc *, int *); extern int filt_seltrue(struct knote *kn, long hint); extern int seltrue_kqfilter(dev_t, struct knote *); extern void klist_insert(struct klist *, struct knote *);