On Wed, Aug 31, 2022 at 04:48:37PM -0400, aisha wrote:
> I've added a patch which adds support for NOTE_{,U,M,N}SECONDS for
> EVFILT_TIMER in the kqueue interface.

It sort of makes sense to add an option to specify timeouts in
sub-millisecond precision. It feels complete overengineering to add
multiple time units on the level of the kernel interface. However,
it looks that FreeBSD and NetBSD have already done this following
macOS' lead...

> I've also added the NOTE_ABSTIME but haven't done any actual implementation
> there as I am not sure how the `data` field should be interpreted (is it
> absolute time in seconds since epoch?).

I think FreeBSD and NetBSD take NOTE_ABSTIME as time since the epoch.

Below is a revised patch that takes into account some corner cases.
It tries to be API-compatible with FreeBSD and NetBSD. I have adjusted
the NOTE_{,M,U,N}SECONDS flags so that they are enum-like.

The manual page bits are from NetBSD.

It is quite late to introduce a feature like this within this release
cycle. Until now, the timer code has ignored the fflags field. There
might be pieces of software that are careless with struct kevent and
that would break as a result of this patch. Programs that are widely
used on different BSDs are probably fine already, though.

Index: lib/libc/sys/kqueue.2
===================================================================
RCS file: src/lib/libc/sys/kqueue.2,v
retrieving revision 1.46
diff -u -p -r1.46 kqueue.2
--- lib/libc/sys/kqueue.2       31 Mar 2022 17:27:16 -0000      1.46
+++ lib/libc/sys/kqueue.2       10 Sep 2022 13:01:36 -0000
@@ -457,17 +457,71 @@ Establishes an arbitrary timer identifie
 .Fa ident .
 When adding a timer,
 .Fa data
-specifies the timeout period in milliseconds.
-The timer will be periodic unless
+specifies the timeout period in units described below, or, if
+.Dv NOTE_ABSTIME
+is set in
+.Va fflags ,
+specifies the absolute time at which the timer should fire.
+The timer will repeat unless
 .Dv EV_ONESHOT
-is specified.
+is set in
+.Va flags
+or
+.Dv NOTE_ABSTIME
+is set in
+.Va fflags .
 On return,
 .Fa data
 contains the number of times the timeout has expired since the last call to
 .Fn kevent .
-This filter automatically sets the
+This filter automatically sets
 .Dv EV_CLEAR
-flag internally.
+in
+.Va flags
+for periodic timers.
+Timers created with
+.Dv NOTE_ABSTIME
+remain activated on the kqueue once the absolute time has passed unless
+.Dv EV_CLEAR
+or
+.Dv EV_ONESHOT
+are also specified.
+.Pp
+The filter accepts the following flags in the
+.Va fflags
+argument:
+.Bl -tag -width NOTE_MSECONDS
+.It Dv NOTE_SECONDS
+The timer value in
+.Va data
+is expressed in seconds.
+.It Dv NOTE_MSECONDS
+The timer value in
+.Va data
+is expressed in milliseconds.
+.It Dv NOTE_USECONDS
+The timer value in
+.Va data
+is expressed in microseconds.
+.It Dv NOTE_NSECONDS
+The timer value in
+.Va data
+is expressed in nanoseconds.
+.It Dv NOTE_ABSTIME
+The timer value is an absolute time with
+.Dv CLOCK_REALTIME
+as the reference clock.
+.El
+.Pp
+Note that
+.Dv NOTE_SECONDS ,
+.Dv NOTE_MSECONDS ,
+.Dv NOTE_USECONDS ,
+and
+.Dv NOTE_NSECONDS
+are mutually exclusive; behavior is undefined if more than one are specified.
+If a timer value unit is not specified, the default is
+.Dv NOTE_MSECONDS .
 .Pp
 If an existing timer is re-added, the existing timer and related pending events
 will be cancelled.
@@ -557,6 +611,7 @@ No memory was available to register the 
 The specified process to attach to does not exist.
 .El
 .Sh SEE ALSO
+.Xr clock_gettime 2 ,
 .Xr poll 2 ,
 .Xr read 2 ,
 .Xr select 2 ,
Index: regress/sys/kern/kqueue/kqueue-timer.c
===================================================================
RCS file: src/regress/sys/kern/kqueue/kqueue-timer.c,v
retrieving revision 1.4
diff -u -p -r1.4 kqueue-timer.c
--- regress/sys/kern/kqueue/kqueue-timer.c      12 Jun 2021 13:30:14 -0000      
1.4
+++ regress/sys/kern/kqueue/kqueue-timer.c      10 Sep 2022 13:01:37 -0000
@@ -22,6 +22,7 @@
 #include <err.h>
 #include <errno.h>
 #include <stdio.h>
+#include <stdint.h>
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
@@ -31,9 +32,13 @@
 int
 do_timer(void)
 {
-       int kq, n;
+       static const int units[] = {
+               NOTE_SECONDS, NOTE_MSECONDS, NOTE_USECONDS, NOTE_NSECONDS
+       };
        struct kevent ev;
-       struct timespec ts;
+       struct timespec ts, start, end, now;
+       int64_t usecs;
+       int i, kq, n;
 
        ASS((kq = kqueue()) >= 0,
            warn("kqueue"));
@@ -68,6 +73,125 @@ do_timer(void)
        n = kevent(kq, NULL, 0, &ev, 1, &ts);
        ASSX(n == 1);
 
+       /* Test with different time units */
+
+       for (i = 0; i < sizeof(units) / sizeof(units[0]); i++) {
+               memset(&ev, 0, sizeof(ev));
+               ev.filter = EVFILT_TIMER;
+               ev.flags = EV_ADD | EV_ENABLE;
+               ev.fflags = units[i];
+               ev.data = 1;
+
+               n = kevent(kq, &ev, 1, NULL, 0, NULL);
+               ASSX(n != -1);
+
+               ts.tv_sec = 2;                  /* wait 2s for kqueue timeout */
+               ts.tv_nsec = 0;
+
+               n = kevent(kq, NULL, 0, &ev, 1, &ts);
+               ASSX(n == 1);
+
+               /* Delete timer to clear EV_CLEAR */
+
+               memset(&ev, 0, sizeof(ev));
+               ev.filter = EVFILT_TIMER;
+               ev.flags = EV_DELETE;
+
+               n = kevent(kq, &ev, 1, NULL, 0, NULL);
+               ASSX(n != -1);
+
+               /* Test with NOTE_ABSTIME, deadline in the future */
+
+               clock_gettime(CLOCK_MONOTONIC, &start);
+
+               clock_gettime(CLOCK_REALTIME, &now);
+               memset(&ev, 0, sizeof(ev));
+               ev.filter = EVFILT_TIMER;
+               ev.flags = EV_ADD | EV_ENABLE;
+               ev.fflags = NOTE_ABSTIME | units[i];
+
+               switch (units[i]) {
+               case NOTE_SECONDS:
+                       ev.data = now.tv_sec + 1;
+                       break;
+               case NOTE_MSECONDS:
+                       ev.data = now.tv_sec * 1000 + now.tv_nsec / 1000000
+                            + 100;
+                       break;
+               case NOTE_USECONDS:
+                       ev.data = now.tv_sec * 1000000 + now.tv_nsec / 1000
+                           + 100 * 1000;
+                       break;
+               case NOTE_NSECONDS:
+                       ev.data = now.tv_sec * 1000000000 + now.tv_nsec
+                           + 100 * 1000000;
+                       break;
+               }
+
+               n = kevent(kq, &ev, 1, NULL, 0, NULL);
+               ASSX(n != -1);
+
+               ts.tv_sec = 2;                  /* wait 2s for kqueue timeout */
+               ts.tv_nsec = 0;
+
+               n = kevent(kq, NULL, 0, &ev, 1, &ts);
+               ASSX(n == 1);
+
+               clock_gettime(CLOCK_MONOTONIC, &end);
+               timespecsub(&end, &start, &ts);
+               usecs = ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
+               ASSX(usecs > 0);
+               ASSX(usecs < 1500000);          /* allow wide margin */
+
+               /* Test with NOTE_ABSTIME, deadline in the past. */
+
+               clock_gettime(CLOCK_MONOTONIC, &start);
+
+               memset(&ev, 0, sizeof(ev));
+               ev.filter = EVFILT_TIMER;
+               ev.flags = EV_ADD | EV_ENABLE;
+               ev.fflags = NOTE_ABSTIME | units[i];
+
+               clock_gettime(CLOCK_REALTIME, &now);
+               switch (units[i]) {
+               case NOTE_SECONDS:
+                       ev.data = now.tv_sec - 1;
+                       break;
+               case NOTE_MSECONDS:
+                       ev.data = now.tv_sec * 1000 + now.tv_nsec / 1000000
+                            - 100;
+                       break;
+               case NOTE_USECONDS:
+                       ev.data = now.tv_sec * 1000000 + now.tv_nsec / 1000
+                           - 100 * 1000;
+                       break;
+               case NOTE_NSECONDS:
+                       ev.data = now.tv_sec * 1000000000 + now.tv_nsec
+                           - 100 * 1000000;
+                       break;
+               }
+
+               n = kevent(kq, &ev, 1, NULL, 0, NULL);
+               ASSX(n != -1);
+
+               n = kevent(kq, NULL, 0, &ev, 1, &ts);
+               ASSX(n == 1);
+
+               clock_gettime(CLOCK_MONOTONIC, &end);
+               timespecsub(&end, &start, &ts);
+               usecs = ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
+               ASSX(usecs > 0);
+               ASSX(usecs < 100000);           /* allow wide margin */
+
+               /* Test that the event remains active */
+
+               ts.tv_sec = 2;                  /* wait 2s for kqueue timeout */
+               ts.tv_nsec = 0;
+
+               n = kevent(kq, NULL, 0, &ev, 1, &ts);
+               ASSX(n == 1);
+       }
+
        return (0);
 }
 
@@ -96,6 +220,37 @@ do_invalid_timer(void)
                    (long long)invalid_ts[i].tv_sec, invalid_ts[i].tv_nsec));
        }
 
+       /* Test invalid fflags */
+
+       memset(&ev, 0, sizeof(ev));
+       ev.filter = EVFILT_TIMER;
+       ev.flags = EV_ADD | EV_ENABLE;
+       ev.fflags = ~NOTE_SECONDS;
+       ev.data = 1;
+
+       n = kevent(kq, &ev, 1, NULL, 0, NULL);
+       ASSX(n == -1 && errno == EINVAL);
+
+       memset(&ev, 0, sizeof(ev));
+       ev.filter = EVFILT_TIMER;
+       ev.flags = EV_ADD | EV_ENABLE;
+       ev.fflags = NOTE_MSECONDS;
+       ev.data = 500;
+
+       n = kevent(kq, &ev, 1, NULL, 0, NULL);
+       ASSX(n == 0);
+
+       /* Modify the existing timer */
+
+       memset(&ev, 0, sizeof(ev));
+       ev.filter = EVFILT_TIMER;
+       ev.flags = EV_ADD | EV_ENABLE;
+       ev.fflags = ~NOTE_SECONDS;
+       ev.data = 1;
+
+       n = kevent(kq, &ev, 1, NULL, 0, NULL);
+       ASSX(n == -1 && errno == EINVAL);
+
        return (0);
 }
 
Index: sys/kern/kern_event.c
===================================================================
RCS file: src/sys/kern/kern_event.c,v
retrieving revision 1.193
diff -u -p -r1.193 kern_event.c
--- sys/kern/kern_event.c       14 Aug 2022 01:58:27 -0000      1.193
+++ sys/kern/kern_event.c       10 Sep 2022 13:01:38 -0000
@@ -449,17 +449,61 @@ filt_proc(struct knote *kn, long hint)
        return (kn->kn_fflags != 0);
 }
 
+#define NOTE_TIMER_UNITMASK \
+       (NOTE_SECONDS|NOTE_MSECONDS|NOTE_USECONDS|NOTE_NSECONDS)
+
+static int
+filt_timervalidate(int sfflags, int64_t sdata, struct timespec *ts)
+{
+       if (sfflags & ~(NOTE_TIMER_UNITMASK | NOTE_ABSTIME))
+               return (EINVAL);
+
+       switch (sfflags & NOTE_TIMER_UNITMASK) {
+       case NOTE_SECONDS:
+               ts->tv_sec = sdata;
+               ts->tv_nsec = 0;
+               break;
+       case NOTE_MSECONDS:
+               ts->tv_sec = sdata / 1000;
+               ts->tv_nsec = (sdata % 1000) * 1000000;
+               break;
+       case NOTE_USECONDS:
+               ts->tv_sec = sdata / 1000000;
+               ts->tv_nsec = (sdata % 1000000) * 1000;
+               break;
+       case NOTE_NSECONDS:
+               ts->tv_sec = sdata / 1000000000;
+               ts->tv_nsec = sdata % 1000000000;
+               break;
+       default:
+               return (EINVAL);
+       }
+
+       return (0);
+}
+
 static void
-filt_timer_timeout_add(struct knote *kn)
+filt_timeradd(struct knote *kn, struct timespec *ts)
 {
-       struct timeval tv;
+       struct timespec expiry, now;
        struct timeout *to = kn->kn_hook;
        int tticks;
 
-       tv.tv_sec = kn->kn_sdata / 1000;
-       tv.tv_usec = (kn->kn_sdata % 1000) * 1000;
-       tticks = tvtohz(&tv);
-       /* Remove extra tick from tvtohz() if timeout has fired before. */
+       if (kn->kn_sfflags & NOTE_ABSTIME) {
+               nanotime(&now);
+               if (timespeccmp(ts, &now, >)) {
+                       timespecsub(ts, &now, &expiry);
+                       /* XXX timeout_at_ts */
+                       timeout_add(to, tstohz(&expiry));
+               } else {
+                       /* Expire immediately. */
+                       filt_timerexpire(kn);
+               }
+               return;
+       }
+
+       tticks = tstohz(ts);
+       /* Remove extra tick from tstohz() if timeout has fired before. */
        if (timeout_triggered(to))
                tticks--;
        timeout_add(to, (tticks > 0) ? tticks : 1);
@@ -468,6 +512,7 @@ filt_timer_timeout_add(struct knote *kn)
 void
 filt_timerexpire(void *knx)
 {
+       struct timespec ts;
        struct knote *kn = knx;
        struct kqueue *kq = kn->kn_kq;
 
@@ -476,28 +521,37 @@ filt_timerexpire(void *knx)
        knote_activate(kn);
        mtx_leave(&kq->kq_lock);
 
-       if ((kn->kn_flags & EV_ONESHOT) == 0)
-               filt_timer_timeout_add(kn);
+       if ((kn->kn_flags & EV_ONESHOT) == 0 &&
+           (kn->kn_sfflags & NOTE_ABSTIME) == 0) {
+               (void)filt_timervalidate(kn->kn_sfflags, kn->kn_sdata, &ts);
+               filt_timeradd(kn, &ts);
+       }
 }
 
-
 /*
- * data contains amount of time to sleep, in milliseconds
+ * data contains amount of time to sleep
  */
 int
 filt_timerattach(struct knote *kn)
 {
+       struct timespec ts;
        struct timeout *to;
+       int error;
+
+       error = filt_timervalidate(kn->kn_sfflags, kn->kn_sdata, &ts);
+       if (error != 0)
+               return (error);
 
        if (kq_ntimeouts > kq_timeoutmax)
                return (ENOMEM);
        kq_ntimeouts++;
 
-       kn->kn_flags |= EV_CLEAR;       /* automatically set */
+       if ((kn->kn_sfflags & NOTE_ABSTIME) == 0)
+               kn->kn_flags |= EV_CLEAR;       /* automatically set */
        to = malloc(sizeof(*to), M_KEVENT, M_WAITOK);
        timeout_set(to, filt_timerexpire, kn);
        kn->kn_hook = to;
-       filt_timer_timeout_add(kn);
+       filt_timeradd(kn, &ts);
 
        return (0);
 }
@@ -516,8 +570,17 @@ filt_timerdetach(struct knote *kn)
 int
 filt_timermodify(struct kevent *kev, struct knote *kn)
 {
+       struct timespec ts;
        struct kqueue *kq = kn->kn_kq;
        struct timeout *to = kn->kn_hook;
+       int error;
+
+       error = filt_timervalidate(kev->fflags, kev->data, &ts);
+       if (error != 0) {
+               kev->flags |= EV_ERROR;
+               kev->data = error;
+               return (0);
+       }
 
        /* Reset the timer. Any pending events are discarded. */
 
@@ -533,7 +596,7 @@ filt_timermodify(struct kevent *kev, str
        knote_assign(kev, kn);
        /* Reinit timeout to invoke tick adjustment again. */
        timeout_set(to, filt_timerexpire, kn);
-       filt_timer_timeout_add(kn);
+       filt_timeradd(kn, &ts);
 
        return (0);
 }
Index: sys/sys/event.h
===================================================================
RCS file: src/sys/sys/event.h,v
retrieving revision 1.67
diff -u -p -r1.67 event.h
--- sys/sys/event.h     31 Mar 2022 01:41:22 -0000      1.67
+++ sys/sys/event.h     10 Sep 2022 13:01:38 -0000
@@ -122,6 +122,13 @@ struct kevent {
 /* data/hint flags for EVFILT_DEVICE, shared with userspace */
 #define NOTE_CHANGE    0x00000001              /* device change event */
 
+/* additional flags for EVFILT_TIMER */
+#define NOTE_MSECONDS  0x00000000              /* data is milliseconds */
+#define NOTE_SECONDS   0x00000001              /* data is seconds */
+#define NOTE_USECONDS  0x00000002              /* data is microseconds */
+#define NOTE_NSECONDS  0x00000003              /* data is nanoseconds */
+#define NOTE_ABSTIME   0x00000010              /* timeout is absolute */
+
 /*
  * This is currently visible to userland to work around broken
  * programs which pull in <sys/proc.h> or <sys/selinfo.h>.
Index: usr.bin/kdump/mksubr
===================================================================
RCS file: src/usr.bin/kdump/mksubr,v
retrieving revision 1.38
diff -u -p -r1.38 mksubr
--- usr.bin/kdump/mksubr        22 Feb 2022 17:35:01 -0000      1.38
+++ usr.bin/kdump/mksubr        10 Sep 2022 13:01:38 -0000
@@ -559,6 +559,29 @@ _EOF_
                printf "\t\tif_print_or(fflags, %s, or);\n", $i }'
 cat <<_EOF_
                break;
+       case EVFILT_TIMER:
+#define NOTE_TIMER_UNITMASK \
+    (NOTE_SECONDS|NOTE_MSECONDS|NOTE_USECONDS|NOTE_NSECONDS)
+               switch (fflags & NOTE_TIMER_UNITMASK) {
+               case NOTE_SECONDS:
+                       printf("NOTE_SECONDS");
+                       break;
+               case NOTE_MSECONDS:
+                       printf("NOTE_MSECONDS");
+                       break;
+               case NOTE_USECONDS:
+                       printf("NOTE_USECONDS");
+                       break;
+               case NOTE_NSECONDS:
+                       printf("NOTE_NSECONDS");
+                       break;
+               default:
+                       printf("invalid");
+                       break;
+               }
+               or = 1;
+               if_print_or(fflags, NOTE_ABSTIME, or);
+               break;
        }
        printf(">");
 }

Reply via email to