On 10/04/20(Fri) 14:43, Martin Pieuchot wrote:
> Diff below reduces kqueue_scan() to the collect of events on a given
> kqueue and let its caller, sys_kevent(), responsible for the copyout(9).
> 
> Apart from the code simplification, this refactoring clearly separates
> kqueue_scan() from the syscall logic.  That should allow us to re-use
> the function in a different context and to address its need for locking
> independently.
> 
> Since the number of events that are ready to be collected can be bigger
> than the size of the array, the pair kqueue_scan()/copyout() may be
> called multiple times.  In that case, successive calls should no longer
> block, this is performed by using a zero, but not NULL, timeout which
> correspond to a non-blocking scan.
> 
> This is the next piece of the ongoing work to move select/poll/kqueue.
> I'd like to be sure it doesn't introduce any regression.

Updated diff based on recent changes and incorporating feedbacks from
visa@.

Comments?  Oks?

Index: kern/kern_event.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_event.c,v
retrieving revision 1.132
diff -u -p -r1.132 kern_event.c
--- kern/kern_event.c   17 May 2020 10:53:14 -0000      1.132
+++ kern/kern_event.c   25 May 2020 11:54:08 -0000
@@ -62,9 +62,6 @@ void  KQREF(struct kqueue *);
 void   KQRELE(struct kqueue *);
 
 int    kqueue_sleep(struct kqueue *, struct timespec *);
-int    kqueue_scan(struct kqueue_scan_state *scan, int maxevents,
-                   struct kevent *ulistp, struct timespec *timeout,
-                   struct proc *p, int *retval);
 
 int    kqueue_read(struct file *, struct uio *, int);
 int    kqueue_write(struct file *, struct uio *, int);
@@ -545,6 +542,7 @@ sys_kevent(struct proc *p, void *v, regi
        struct timespec ts;
        struct timespec *tsp = NULL;
        int i, n, nerrors, error;
+       int ready, total;
        struct kevent kev[KQ_NEVENTS];
 
        if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
@@ -573,9 +571,9 @@ sys_kevent(struct proc *p, void *v, regi
        kq = fp->f_data;
        nerrors = 0;
 
-       while (SCARG(uap, nchanges) > 0) {
-               n = SCARG(uap, nchanges) > KQ_NEVENTS ?
-                   KQ_NEVENTS : SCARG(uap, nchanges);
+       while ((n = SCARG(uap, nchanges)) > 0) {
+               if (n > nitems(kev))
+                       n = nitems(kev);
                error = copyin(SCARG(uap, changelist), kev,
                    n * sizeof(struct kevent));
                if (error)
@@ -611,14 +609,41 @@ sys_kevent(struct proc *p, void *v, regi
                goto done;
        }
 
+
        KQREF(kq);
        FRELE(fp, p);
+       /*
+        * Collect as many events as we can.  The timeout on successive
+        * loops is disabled (kqueue_scan() becomes non-blocking).
+        */
+       total = 0;
+       error = 0;
        kqueue_scan_setup(&scan, kq);
-       error = kqueue_scan(&scan, SCARG(uap, nevents), SCARG(uap, eventlist),
-           tsp, p, &n);
+       while ((n = SCARG(uap, nevents) - total) > 0) {
+               if (n > nitems(kev))
+                       n = nitems(kev);
+               ready = kqueue_scan(&scan, n, kev, tsp, p, &error);
+               if (ready == 0)
+                       break;
+               error = copyout(kev, SCARG(uap, eventlist) + total,
+                   sizeof(struct kevent) * ready);
+#ifdef KTRACE
+               if (KTRPOINT(p, KTR_STRUCT))
+                       ktrevent(p, kev, ready);
+#endif
+               total += ready;
+               /*
+                * Stop if there was an error or if we had enough
+                * place to collect all events that were ready.
+                */
+               if (error || ready < n)
+                       break;
+               tsp = &ts;              /* successive loops non-blocking */
+               timespecclear(tsp);
+       }
        kqueue_scan_finish(&scan);
        KQRELE(kq);
-       *retval = n;
+       *retval = total;
        return (error);
 
  done:
@@ -872,15 +897,18 @@ kqueue_sleep(struct kqueue *kq, struct t
        return (error);
 }
 
+/*
+ * Scan the kqueue, blocking if necessary until the target time is reached.
+ * If tsp is NULL we block indefinitely.  If tsp->ts_secs/nsecs are both
+ * 0 we do not block at all.
+ */
 int
 kqueue_scan(struct kqueue_scan_state *scan, int maxevents,
-    struct kevent *ulistp, struct timespec *tsp, struct proc *p, int *retval)
+    struct kevent *kevp, struct timespec *tsp, struct proc *p, int *errorp)
 {
-       struct kevent *kevp;
        struct knote *kn;
        struct kqueue *kq = scan->kqs_kq;
        int s, count, nkev = 0, error = 0;
-       struct kevent kev[KQ_NEVENTS];
 
        count = maxevents;
        if (count == 0)
@@ -892,7 +920,6 @@ retry:
                goto done;
        }
 
-       kevp = &kev[0];
        s = splhigh();
        if (kq->kq_count == 0) {
                if ((tsp != NULL && !timespecisset(tsp)) ||
@@ -933,14 +960,8 @@ retry:
        while (count) {
                kn = TAILQ_NEXT(&scan->kqs_start, kn_tqe);
                if (kn->kn_filter == EVFILT_MARKER) {
-                       if (kn == &scan->kqs_end) {
-                               TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start,
-                                   kn_tqe);
-                               splx(s);
-                               if (scan->kqs_nevent == 0)
-                                       goto retry;
-                               goto done;
-                       }
+                       if (kn == &scan->kqs_end)
+                               break;
 
                        /* Move start marker past another thread's marker. */
                        TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe);
@@ -976,6 +997,9 @@ retry:
                count--;
                scan->kqs_nevent++;
 
+               /*
+                * Post-event action on the note
+                */
                if (kn->kn_flags & EV_ONESHOT) {
                        splx(s);
                        kn->kn_fop->f_detach(kn);
@@ -1001,35 +1025,14 @@ retry:
                        knote_release(kn);
                }
                kqueue_check(kq);
-               if (nkev == KQ_NEVENTS) {
-                       splx(s);
-#ifdef KTRACE
-                       if (KTRPOINT(p, KTR_STRUCT))
-                               ktrevent(p, kev, nkev);
-#endif
-                       error = copyout(kev, ulistp,
-                           sizeof(struct kevent) * nkev);
-                       ulistp += nkev;
-                       nkev = 0;
-                       kevp = &kev[0];
-                       s = splhigh();
-                       if (error)
-                               break;
-               }
        }
        TAILQ_REMOVE(&kq->kq_head, &scan->kqs_start, kn_tqe);
        splx(s);
+       if (scan->kqs_nevent == 0)
+               goto retry;
 done:
-       if (nkev != 0) {
-#ifdef KTRACE
-               if (KTRPOINT(p, KTR_STRUCT))
-                       ktrevent(p, kev, nkev);
-#endif
-               error = copyout(kev, ulistp,
-                   sizeof(struct kevent) * nkev);
-       }
-       *retval = maxevents - count;
-       return (error);
+       *errorp = error;
+       return (nkev);
 }
 
 void
Index: sys/event.h
===================================================================
RCS file: /cvs/src/sys/sys/event.h,v
retrieving revision 1.37
diff -u -p -r1.37 event.h
--- sys/event.h 17 May 2020 10:53:14 -0000      1.37
+++ sys/event.h 25 May 2020 11:34:34 -0000
@@ -211,6 +211,8 @@ extern int  kqueue_register(struct kqueue
                    struct kevent *kev, struct proc *p);
 extern void    kqueue_scan_setup(struct kqueue_scan_state *, struct kqueue *);
 extern void    kqueue_scan_finish(struct kqueue_scan_state *);
+extern int     kqueue_scan(struct kqueue_scan_state *, int, struct kevent *,
+                   struct timespec *, struct proc *, int *);
 extern int     filt_seltrue(struct knote *kn, long hint);
 extern int     seltrue_kqfilter(dev_t, struct knote *);
 extern void    klist_insert(struct klist *, struct knote *);

Reply via email to