On Wed, Aug 31, 2022 at 04:48:37PM -0400, aisha wrote: > I've added a patch which adds support for NOTE_{,U,M,N}SECONDS for > EVFILT_TIMER in the kqueue interface.
It sort of makes sense to add an option to specify timeouts in sub-millisecond precision. It feels complete overengineering to add multiple time units on the level of the kernel interface. However, it looks that FreeBSD and NetBSD have already done this following macOS' lead... > I've also added the NOTE_ABSTIME but haven't done any actual implementation > there as I am not sure how the `data` field should be interpreted (is it > absolute time in seconds since epoch?). I think FreeBSD and NetBSD take NOTE_ABSTIME as time since the epoch. Below is a revised patch that takes into account some corner cases. It tries to be API-compatible with FreeBSD and NetBSD. I have adjusted the NOTE_{,M,U,N}SECONDS flags so that they are enum-like. The manual page bits are from NetBSD. It is quite late to introduce a feature like this within this release cycle. Until now, the timer code has ignored the fflags field. There might be pieces of software that are careless with struct kevent and that would break as a result of this patch. Programs that are widely used on different BSDs are probably fine already, though. Index: lib/libc/sys/kqueue.2 =================================================================== RCS file: src/lib/libc/sys/kqueue.2,v retrieving revision 1.46 diff -u -p -r1.46 kqueue.2 --- lib/libc/sys/kqueue.2 31 Mar 2022 17:27:16 -0000 1.46 +++ lib/libc/sys/kqueue.2 10 Sep 2022 13:01:36 -0000 @@ -457,17 +457,71 @@ Establishes an arbitrary timer identifie .Fa ident . When adding a timer, .Fa data -specifies the timeout period in milliseconds. -The timer will be periodic unless +specifies the timeout period in units described below, or, if +.Dv NOTE_ABSTIME +is set in +.Va fflags , +specifies the absolute time at which the timer should fire. +The timer will repeat unless .Dv EV_ONESHOT -is specified. +is set in +.Va flags +or +.Dv NOTE_ABSTIME +is set in +.Va fflags . On return, .Fa data contains the number of times the timeout has expired since the last call to .Fn kevent . -This filter automatically sets the +This filter automatically sets .Dv EV_CLEAR -flag internally. +in +.Va flags +for periodic timers. +Timers created with +.Dv NOTE_ABSTIME +remain activated on the kqueue once the absolute time has passed unless +.Dv EV_CLEAR +or +.Dv EV_ONESHOT +are also specified. +.Pp +The filter accepts the following flags in the +.Va fflags +argument: +.Bl -tag -width NOTE_MSECONDS +.It Dv NOTE_SECONDS +The timer value in +.Va data +is expressed in seconds. +.It Dv NOTE_MSECONDS +The timer value in +.Va data +is expressed in milliseconds. +.It Dv NOTE_USECONDS +The timer value in +.Va data +is expressed in microseconds. +.It Dv NOTE_NSECONDS +The timer value in +.Va data +is expressed in nanoseconds. +.It Dv NOTE_ABSTIME +The timer value is an absolute time with +.Dv CLOCK_REALTIME +as the reference clock. +.El +.Pp +Note that +.Dv NOTE_SECONDS , +.Dv NOTE_MSECONDS , +.Dv NOTE_USECONDS , +and +.Dv NOTE_NSECONDS +are mutually exclusive; behavior is undefined if more than one are specified. +If a timer value unit is not specified, the default is +.Dv NOTE_MSECONDS . .Pp If an existing timer is re-added, the existing timer and related pending events will be cancelled. @@ -557,6 +611,7 @@ No memory was available to register the The specified process to attach to does not exist. .El .Sh SEE ALSO +.Xr clock_gettime 2 , .Xr poll 2 , .Xr read 2 , .Xr select 2 , Index: regress/sys/kern/kqueue/kqueue-timer.c =================================================================== RCS file: src/regress/sys/kern/kqueue/kqueue-timer.c,v retrieving revision 1.4 diff -u -p -r1.4 kqueue-timer.c --- regress/sys/kern/kqueue/kqueue-timer.c 12 Jun 2021 13:30:14 -0000 1.4 +++ regress/sys/kern/kqueue/kqueue-timer.c 10 Sep 2022 13:01:37 -0000 @@ -22,6 +22,7 @@ #include <err.h> #include <errno.h> #include <stdio.h> +#include <stdint.h> #include <string.h> #include <time.h> #include <unistd.h> @@ -31,9 +32,13 @@ int do_timer(void) { - int kq, n; + static const int units[] = { + NOTE_SECONDS, NOTE_MSECONDS, NOTE_USECONDS, NOTE_NSECONDS + }; struct kevent ev; - struct timespec ts; + struct timespec ts, start, end, now; + int64_t usecs; + int i, kq, n; ASS((kq = kqueue()) >= 0, warn("kqueue")); @@ -68,6 +73,125 @@ do_timer(void) n = kevent(kq, NULL, 0, &ev, 1, &ts); ASSX(n == 1); + /* Test with different time units */ + + for (i = 0; i < sizeof(units) / sizeof(units[0]); i++) { + memset(&ev, 0, sizeof(ev)); + ev.filter = EVFILT_TIMER; + ev.flags = EV_ADD | EV_ENABLE; + ev.fflags = units[i]; + ev.data = 1; + + n = kevent(kq, &ev, 1, NULL, 0, NULL); + ASSX(n != -1); + + ts.tv_sec = 2; /* wait 2s for kqueue timeout */ + ts.tv_nsec = 0; + + n = kevent(kq, NULL, 0, &ev, 1, &ts); + ASSX(n == 1); + + /* Delete timer to clear EV_CLEAR */ + + memset(&ev, 0, sizeof(ev)); + ev.filter = EVFILT_TIMER; + ev.flags = EV_DELETE; + + n = kevent(kq, &ev, 1, NULL, 0, NULL); + ASSX(n != -1); + + /* Test with NOTE_ABSTIME, deadline in the future */ + + clock_gettime(CLOCK_MONOTONIC, &start); + + clock_gettime(CLOCK_REALTIME, &now); + memset(&ev, 0, sizeof(ev)); + ev.filter = EVFILT_TIMER; + ev.flags = EV_ADD | EV_ENABLE; + ev.fflags = NOTE_ABSTIME | units[i]; + + switch (units[i]) { + case NOTE_SECONDS: + ev.data = now.tv_sec + 1; + break; + case NOTE_MSECONDS: + ev.data = now.tv_sec * 1000 + now.tv_nsec / 1000000 + + 100; + break; + case NOTE_USECONDS: + ev.data = now.tv_sec * 1000000 + now.tv_nsec / 1000 + + 100 * 1000; + break; + case NOTE_NSECONDS: + ev.data = now.tv_sec * 1000000000 + now.tv_nsec + + 100 * 1000000; + break; + } + + n = kevent(kq, &ev, 1, NULL, 0, NULL); + ASSX(n != -1); + + ts.tv_sec = 2; /* wait 2s for kqueue timeout */ + ts.tv_nsec = 0; + + n = kevent(kq, NULL, 0, &ev, 1, &ts); + ASSX(n == 1); + + clock_gettime(CLOCK_MONOTONIC, &end); + timespecsub(&end, &start, &ts); + usecs = ts.tv_sec * 1000000 + ts.tv_nsec / 1000; + ASSX(usecs > 0); + ASSX(usecs < 1500000); /* allow wide margin */ + + /* Test with NOTE_ABSTIME, deadline in the past. */ + + clock_gettime(CLOCK_MONOTONIC, &start); + + memset(&ev, 0, sizeof(ev)); + ev.filter = EVFILT_TIMER; + ev.flags = EV_ADD | EV_ENABLE; + ev.fflags = NOTE_ABSTIME | units[i]; + + clock_gettime(CLOCK_REALTIME, &now); + switch (units[i]) { + case NOTE_SECONDS: + ev.data = now.tv_sec - 1; + break; + case NOTE_MSECONDS: + ev.data = now.tv_sec * 1000 + now.tv_nsec / 1000000 + - 100; + break; + case NOTE_USECONDS: + ev.data = now.tv_sec * 1000000 + now.tv_nsec / 1000 + - 100 * 1000; + break; + case NOTE_NSECONDS: + ev.data = now.tv_sec * 1000000000 + now.tv_nsec + - 100 * 1000000; + break; + } + + n = kevent(kq, &ev, 1, NULL, 0, NULL); + ASSX(n != -1); + + n = kevent(kq, NULL, 0, &ev, 1, &ts); + ASSX(n == 1); + + clock_gettime(CLOCK_MONOTONIC, &end); + timespecsub(&end, &start, &ts); + usecs = ts.tv_sec * 1000000 + ts.tv_nsec / 1000; + ASSX(usecs > 0); + ASSX(usecs < 100000); /* allow wide margin */ + + /* Test that the event remains active */ + + ts.tv_sec = 2; /* wait 2s for kqueue timeout */ + ts.tv_nsec = 0; + + n = kevent(kq, NULL, 0, &ev, 1, &ts); + ASSX(n == 1); + } + return (0); } @@ -96,6 +220,37 @@ do_invalid_timer(void) (long long)invalid_ts[i].tv_sec, invalid_ts[i].tv_nsec)); } + /* Test invalid fflags */ + + memset(&ev, 0, sizeof(ev)); + ev.filter = EVFILT_TIMER; + ev.flags = EV_ADD | EV_ENABLE; + ev.fflags = ~NOTE_SECONDS; + ev.data = 1; + + n = kevent(kq, &ev, 1, NULL, 0, NULL); + ASSX(n == -1 && errno == EINVAL); + + memset(&ev, 0, sizeof(ev)); + ev.filter = EVFILT_TIMER; + ev.flags = EV_ADD | EV_ENABLE; + ev.fflags = NOTE_MSECONDS; + ev.data = 500; + + n = kevent(kq, &ev, 1, NULL, 0, NULL); + ASSX(n == 0); + + /* Modify the existing timer */ + + memset(&ev, 0, sizeof(ev)); + ev.filter = EVFILT_TIMER; + ev.flags = EV_ADD | EV_ENABLE; + ev.fflags = ~NOTE_SECONDS; + ev.data = 1; + + n = kevent(kq, &ev, 1, NULL, 0, NULL); + ASSX(n == -1 && errno == EINVAL); + return (0); } Index: sys/kern/kern_event.c =================================================================== RCS file: src/sys/kern/kern_event.c,v retrieving revision 1.193 diff -u -p -r1.193 kern_event.c --- sys/kern/kern_event.c 14 Aug 2022 01:58:27 -0000 1.193 +++ sys/kern/kern_event.c 10 Sep 2022 13:01:38 -0000 @@ -449,17 +449,61 @@ filt_proc(struct knote *kn, long hint) return (kn->kn_fflags != 0); } +#define NOTE_TIMER_UNITMASK \ + (NOTE_SECONDS|NOTE_MSECONDS|NOTE_USECONDS|NOTE_NSECONDS) + +static int +filt_timervalidate(int sfflags, int64_t sdata, struct timespec *ts) +{ + if (sfflags & ~(NOTE_TIMER_UNITMASK | NOTE_ABSTIME)) + return (EINVAL); + + switch (sfflags & NOTE_TIMER_UNITMASK) { + case NOTE_SECONDS: + ts->tv_sec = sdata; + ts->tv_nsec = 0; + break; + case NOTE_MSECONDS: + ts->tv_sec = sdata / 1000; + ts->tv_nsec = (sdata % 1000) * 1000000; + break; + case NOTE_USECONDS: + ts->tv_sec = sdata / 1000000; + ts->tv_nsec = (sdata % 1000000) * 1000; + break; + case NOTE_NSECONDS: + ts->tv_sec = sdata / 1000000000; + ts->tv_nsec = sdata % 1000000000; + break; + default: + return (EINVAL); + } + + return (0); +} + static void -filt_timer_timeout_add(struct knote *kn) +filt_timeradd(struct knote *kn, struct timespec *ts) { - struct timeval tv; + struct timespec expiry, now; struct timeout *to = kn->kn_hook; int tticks; - tv.tv_sec = kn->kn_sdata / 1000; - tv.tv_usec = (kn->kn_sdata % 1000) * 1000; - tticks = tvtohz(&tv); - /* Remove extra tick from tvtohz() if timeout has fired before. */ + if (kn->kn_sfflags & NOTE_ABSTIME) { + nanotime(&now); + if (timespeccmp(ts, &now, >)) { + timespecsub(ts, &now, &expiry); + /* XXX timeout_at_ts */ + timeout_add(to, tstohz(&expiry)); + } else { + /* Expire immediately. */ + filt_timerexpire(kn); + } + return; + } + + tticks = tstohz(ts); + /* Remove extra tick from tstohz() if timeout has fired before. */ if (timeout_triggered(to)) tticks--; timeout_add(to, (tticks > 0) ? tticks : 1); @@ -468,6 +512,7 @@ filt_timer_timeout_add(struct knote *kn) void filt_timerexpire(void *knx) { + struct timespec ts; struct knote *kn = knx; struct kqueue *kq = kn->kn_kq; @@ -476,28 +521,37 @@ filt_timerexpire(void *knx) knote_activate(kn); mtx_leave(&kq->kq_lock); - if ((kn->kn_flags & EV_ONESHOT) == 0) - filt_timer_timeout_add(kn); + if ((kn->kn_flags & EV_ONESHOT) == 0 && + (kn->kn_sfflags & NOTE_ABSTIME) == 0) { + (void)filt_timervalidate(kn->kn_sfflags, kn->kn_sdata, &ts); + filt_timeradd(kn, &ts); + } } - /* - * data contains amount of time to sleep, in milliseconds + * data contains amount of time to sleep */ int filt_timerattach(struct knote *kn) { + struct timespec ts; struct timeout *to; + int error; + + error = filt_timervalidate(kn->kn_sfflags, kn->kn_sdata, &ts); + if (error != 0) + return (error); if (kq_ntimeouts > kq_timeoutmax) return (ENOMEM); kq_ntimeouts++; - kn->kn_flags |= EV_CLEAR; /* automatically set */ + if ((kn->kn_sfflags & NOTE_ABSTIME) == 0) + kn->kn_flags |= EV_CLEAR; /* automatically set */ to = malloc(sizeof(*to), M_KEVENT, M_WAITOK); timeout_set(to, filt_timerexpire, kn); kn->kn_hook = to; - filt_timer_timeout_add(kn); + filt_timeradd(kn, &ts); return (0); } @@ -516,8 +570,17 @@ filt_timerdetach(struct knote *kn) int filt_timermodify(struct kevent *kev, struct knote *kn) { + struct timespec ts; struct kqueue *kq = kn->kn_kq; struct timeout *to = kn->kn_hook; + int error; + + error = filt_timervalidate(kev->fflags, kev->data, &ts); + if (error != 0) { + kev->flags |= EV_ERROR; + kev->data = error; + return (0); + } /* Reset the timer. Any pending events are discarded. */ @@ -533,7 +596,7 @@ filt_timermodify(struct kevent *kev, str knote_assign(kev, kn); /* Reinit timeout to invoke tick adjustment again. */ timeout_set(to, filt_timerexpire, kn); - filt_timer_timeout_add(kn); + filt_timeradd(kn, &ts); return (0); } Index: sys/sys/event.h =================================================================== RCS file: src/sys/sys/event.h,v retrieving revision 1.67 diff -u -p -r1.67 event.h --- sys/sys/event.h 31 Mar 2022 01:41:22 -0000 1.67 +++ sys/sys/event.h 10 Sep 2022 13:01:38 -0000 @@ -122,6 +122,13 @@ struct kevent { /* data/hint flags for EVFILT_DEVICE, shared with userspace */ #define NOTE_CHANGE 0x00000001 /* device change event */ +/* additional flags for EVFILT_TIMER */ +#define NOTE_MSECONDS 0x00000000 /* data is milliseconds */ +#define NOTE_SECONDS 0x00000001 /* data is seconds */ +#define NOTE_USECONDS 0x00000002 /* data is microseconds */ +#define NOTE_NSECONDS 0x00000003 /* data is nanoseconds */ +#define NOTE_ABSTIME 0x00000010 /* timeout is absolute */ + /* * This is currently visible to userland to work around broken * programs which pull in <sys/proc.h> or <sys/selinfo.h>. Index: usr.bin/kdump/mksubr =================================================================== RCS file: src/usr.bin/kdump/mksubr,v retrieving revision 1.38 diff -u -p -r1.38 mksubr --- usr.bin/kdump/mksubr 22 Feb 2022 17:35:01 -0000 1.38 +++ usr.bin/kdump/mksubr 10 Sep 2022 13:01:38 -0000 @@ -559,6 +559,29 @@ _EOF_ printf "\t\tif_print_or(fflags, %s, or);\n", $i }' cat <<_EOF_ break; + case EVFILT_TIMER: +#define NOTE_TIMER_UNITMASK \ + (NOTE_SECONDS|NOTE_MSECONDS|NOTE_USECONDS|NOTE_NSECONDS) + switch (fflags & NOTE_TIMER_UNITMASK) { + case NOTE_SECONDS: + printf("NOTE_SECONDS"); + break; + case NOTE_MSECONDS: + printf("NOTE_MSECONDS"); + break; + case NOTE_USECONDS: + printf("NOTE_USECONDS"); + break; + case NOTE_NSECONDS: + printf("NOTE_NSECONDS"); + break; + default: + printf("invalid"); + break; + } + or = 1; + if_print_or(fflags, NOTE_ABSTIME, or); + break; } printf(">"); }