On Sat, Jul 25, 2020 at 08:46:08PM -0500, Scott Cheloha wrote:
> 
> [...]
> 
> I want to add clock-based timeouts to the kernel because tick-based
> timeouts suffer from a few problems:
> 
> [...]
> 
> Basically, ticks are a poor approximation for the system clock.  We
> should use the real thing where possible.
> 
> [...]
> 
> Thoughts on this approach?  Thoughts on the proposed API?

6 week bump.

Attached is an rebased and streamlined diff.

Let's try again:

This patch adds support for timeouts scheduled against the hardware
timecounter.  I call these "kclock timeouts".  They are distinct from
the current tick-based timeouts because ticks are "software time", not
"real time".

For now we have one kclock, KCLOCK_UPTIME, which corresponds to
nanouptime(9).  In the future I intend to add support for runtime and
UTC kclocks.

Why do we want kclock timeouts at all?

1. Kclock timeouts expire at an actual time, not a tick.  They
   have nanosecond resolution and are NTP-sensitive.  Thus, they
   will *never* fire early.

2. One upshot of nanosecond resolution is that we don't need to
   "round up" to the next tick when scheduling a timeout to prevent
   early execution.  The extra resolution allows us to reduce
   latency in some contexts.

3. Kclock timeouts cover the entire range of the kernel timeline.
   We can remove the "tick loops" like the one sys_nanosleep().

4. Kclock timeouts are scheduled as absolute deadlines.  This makes
   supporting absolute timeouts trivial, which means we can add support
   for clock_nanosleep(2) and the absolute pthread timeouts to the
   kernel.

Kclock timeouts aren't actually used anywhere yet, so merging this
patch will not break anything like last time (CC bluhm@).

In a subsequent diff I will put them to use in tsleep_nsec(9) etc.
This will enable those interfaces to block for less than a tick, which
in turn will allow userspace to block for less than a tick in e.g.
futex(2), and poll(2).  pd@ has verified that this fixes the "time
problem" in OpenBSD vmm(4) VMs (CC pd@).

You initialize kclock timeouts with timeout_set_kclock().  You
schedule them with timeout_in_nsec(), a relative timeout interface
that accepts a count of nanoseconds.  If your timeout is in some
other unit (seconds, milliseconds, whatever) you must convert it
to nanoseconds before scheduling.  Something like this will work:

        timeout_in_nsec(&my_timeout, SEC_TO_NSEC(1));

There won't be a flavored API supporting every conceivable time unit.

In the future I will expose an absolute timeout interface and a
periodic timeout rescheduling interface.  We don't need either of
these interfaces to start, though.

Tick-based timeouts and kclock-based timeouts are *not* compatible.
You cannot schedule a kclock timeout with timeout_add(9).  You cannot
schedule a tick-based timeout with timeout_in_nsec(9).  I have added
KASSERTs to prevent this.

Scheduling a kclock timeout with timeout_in_nsec() is more costly than
scheduling a tick-based timeout with timeout_add(9) because you have
to read the hardware timecounter.  The cost will vary with your clock:
bad clocks have lots of overhead, good clocks have low-to-no overhead.
The programmer will need to decide if the potential overhead is too
high when employing these timeouts.  In most cases the overhead will
not be a problem.  The network stack is one spot where it might be.

Processing the kclock timeout wheel during hardclock(9) adds
negligible overhead to that routine.

Processing a kclock timeout during softclock() is roughly 4 times as
expensive as processing a tick-based timeout.  At idle on my 2Ghz
amd64 machine tick-based timeouts take ~125 cycles to process while
kclock timeouts take ~500 cycles.  The average cost seems to drop as
more kclock timeouts are processed, though I can't really explain why.

Thoughts?  ok?

Index: kern/kern_timeout.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_timeout.c,v
retrieving revision 1.79
diff -u -p -r1.79 kern_timeout.c
--- kern/kern_timeout.c 7 Aug 2020 00:45:25 -0000       1.79
+++ kern/kern_timeout.c 4 Sep 2020 22:41:12 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: kern_timeout.c,v 1.79 2020/08/07 00:45:25 cheloha Exp $       
*/
+/*     $OpenBSD: kern_timeout.c,v 1.77 2020/08/01 08:40:20 anton Exp $ */
 /*
  * Copyright (c) 2001 Thomas Nordin <nor...@openbsd.org>
  * Copyright (c) 2000-2001 Artur Grabowski <a...@openbsd.org>
@@ -64,16 +64,27 @@ struct timeoutstat tostat;          /* [T] stati
  * of the global variable "ticks" when the timeout should be called. There are
  * four levels with 256 buckets each.
  */
-#define BUCKETS 1024
+#define WHEELCOUNT 4
 #define WHEELSIZE 256
 #define WHEELMASK 255
 #define WHEELBITS 8
+#define BUCKETS (WHEELCOUNT * WHEELSIZE)
 
-struct circq timeout_wheel[BUCKETS];   /* [T] Queues of timeouts */
+struct circq timeout_wheel[BUCKETS];   /* [T] Tick-based timeouts */
+struct circq timeout_wheel_kc[BUCKETS];        /* [T] Clock-based timeouts */
 struct circq timeout_new;              /* [T] New, unscheduled timeouts */
 struct circq timeout_todo;             /* [T] Due or needs rescheduling */
 struct circq timeout_proc;             /* [T] Due + needs process context */
 
+time_t timeout_level_width[WHEELCOUNT];        /* [I] Wheel level width 
(seconds) */
+struct timespec tick_ts;               /* [I] Length of a tick (1/hz secs) */
+
+struct kclock {
+       struct timespec kc_lastscan;    /* [T] Clock time at last wheel scan */
+       struct timespec kc_late;        /* [T] Late if due prior */
+       struct timespec kc_offset;      /* [T] Offset from primary kclock */
+} timeout_kclock[KCLOCK_MAX];
+
 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK)
 
 #define BUCKET(rel, abs)                                               \
@@ -155,9 +166,15 @@ struct lock_type timeout_spinlock_type =
        ((needsproc) ? &timeout_sleeplock_obj : &timeout_spinlock_obj)
 #endif
 
+void kclock_nanotime(int, struct timespec *);
 void softclock(void *);
 void softclock_create_thread(void *);
+void softclock_process_kclock_timeout(struct timeout *, int);
+void softclock_process_tick_timeout(struct timeout *, int);
 void softclock_thread(void *);
+uint32_t timeout_bucket(struct timeout *);
+uint32_t timeout_maskwheel(uint32_t, const struct timespec *);
+void timeout_run(struct timeout *);
 void timeout_proc_barrier(void *);
 
 /*
@@ -207,13 +224,19 @@ timeout_sync_leave(int needsproc)
 void
 timeout_startup(void)
 {
-       int b;
+       int b, level;
 
        CIRCQ_INIT(&timeout_new);
        CIRCQ_INIT(&timeout_todo);
        CIRCQ_INIT(&timeout_proc);
        for (b = 0; b < nitems(timeout_wheel); b++)
                CIRCQ_INIT(&timeout_wheel[b]);
+       for (b = 0; b < nitems(timeout_wheel_kc); b++)
+               CIRCQ_INIT(&timeout_wheel_kc[b]);
+
+       for (level = 0; level < nitems(timeout_level_width); level++)
+               timeout_level_width[level] = 2 << (level * WHEELBITS);
+       NSEC_TO_TIMESPEC(tick_nsec, &tick_ts);
 }
 
 void
@@ -229,25 +252,39 @@ timeout_proc_init(void)
        kthread_create_deferred(softclock_create_thread, NULL);
 }
 
+static inline void
+_timeout_set(struct timeout *to, void (*fn)(void *), void *arg, int flags,
+    int kclock)
+{
+       to->to_func = fn;
+       to->to_arg = arg;
+       to->to_flags = flags | TIMEOUT_INITIALIZED;
+       to->to_kclock = kclock;
+}
+
 void
 timeout_set(struct timeout *new, void (*fn)(void *), void *arg)
 {
-       timeout_set_flags(new, fn, arg, 0);
+       _timeout_set(new, fn, arg, 0, KCLOCK_NONE);
 }
 
 void
 timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int flags)
 {
-       to->to_func = fn;
-       to->to_arg = arg;
-       to->to_process = NULL;
-       to->to_flags = flags | TIMEOUT_INITIALIZED;
+       _timeout_set(to, fn, arg, flags, KCLOCK_NONE);
 }
 
 void
 timeout_set_proc(struct timeout *new, void (*fn)(void *), void *arg)
 {
-       timeout_set_flags(new, fn, arg, TIMEOUT_PROC);
+       _timeout_set(new, fn, arg, TIMEOUT_PROC, KCLOCK_NONE);
+}
+
+void
+timeout_set_kclock(struct timeout *to, void (*fn)(void *), void *arg, int 
flags,
+    int kclock)
+{
+       _timeout_set(to, fn, arg, flags | TIMEOUT_KCLOCK, kclock);
 }
 
 int
@@ -257,6 +294,8 @@ timeout_add(struct timeout *new, int to_
        int ret = 1;
 
        KASSERT(ISSET(new->to_flags, TIMEOUT_INITIALIZED));
+       KASSERT(!ISSET(new->to_flags, TIMEOUT_KCLOCK));
+       KASSERT(new->to_kclock == KCLOCK_NONE);
        KASSERT(to_ticks >= 0);
 
        mtx_enter(&timeout_mutex);
@@ -356,6 +395,65 @@ timeout_add_nsec(struct timeout *to, int
 }
 
 int
+timeout_at_ts(struct timeout *to, const struct timespec *abstime)
+{
+       struct timespec old_abstime;
+       int ret = 1;
+
+       KASSERT(ISSET(to->to_flags, TIMEOUT_INITIALIZED | TIMEOUT_KCLOCK));
+       KASSERT(new->to_kclock != KCLOCK_NONE);
+
+       mtx_enter(&timeout_mutex);
+
+       old_abstime = to->to_abstime;
+       to->to_abstime = *abstime;
+       CLR(to->to_flags, TIMEOUT_TRIGGERED);
+
+       if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) {
+               if (timespeccmp(abstime, &old_abstime, <)) {
+                       CIRCQ_REMOVE(&to->to_list);
+                       CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list);
+               }
+               tostat.tos_readded++;
+               ret = 0;
+       } else {
+               SET(to->to_flags, TIMEOUT_ONQUEUE);
+               CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list);
+       }
+#if NKCOV > 0
+       new->to_process = curproc->p_p;
+#endif
+       tostat.tos_added++;
+       mtx_leave(&timeout_mutex);
+
+       return ret;
+}
+
+int
+timeout_in_nsec(struct timeout *to, uint64_t nsecs)
+{
+       struct timespec deadline, interval, now;
+
+       kclock_nanotime(to->to_kclock, &now);
+       NSEC_TO_TIMESPEC(nsecs, &interval);
+       timespecadd(&now, &interval, &deadline);
+
+       return timeout_at_ts(to, &deadline);
+}
+
+void
+kclock_nanotime(int kclock, struct timespec *now)
+{
+       switch (kclock) {
+       case KCLOCK_UPTIME:
+               nanouptime(now);
+               break;
+       default:
+               panic("invalid kclock: 0x%x", kclock);
+       }
+}
+
+int
 timeout_del(struct timeout *to)
 {
        int ret = 0;
@@ -425,6 +523,35 @@ timeout_proc_barrier(void *arg)
        cond_signal(c);
 }
 
+uint32_t
+timeout_bucket(struct timeout *to)
+{
+       struct kclock *kc = &timeout_kclock[to->to_kclock];
+       struct timespec diff;
+       uint32_t level;
+
+       KASSERT(ISSET(to->to_flags, TIMEOUT_KCLOCK));
+       KASSERT(timespeccmp(&kc->kc_lastscan, &to->to_abstime, <));
+
+       timespecsub(&to->to_abstime, &kc->kc_lastscan, &diff);
+       for (level = 0; level < nitems(timeout_level_width) - 1; level++) {
+               if (diff.tv_sec < timeout_level_width[level])
+                       break;
+       }
+       return level * WHEELSIZE + timeout_maskwheel(level, &to->to_abstime);
+}
+
+uint32_t
+timeout_maskwheel(uint32_t level, const struct timespec *abstime)
+{
+       uint32_t hi, lo;
+
+       hi = abstime->tv_sec << 7;
+       lo = abstime->tv_nsec / 7812500;
+
+       return ((hi | lo) >> (level * WHEELBITS)) & WHEELMASK;
+}
+
 /*
  * This is called from hardclock() on the primary CPU at the start of
  * every tick.
@@ -432,7 +559,15 @@ timeout_proc_barrier(void *arg)
 void
 timeout_hardclock_update(void)
 {
-       int need_softclock = 1;
+       struct timespec elapsed, now;
+       struct kclock *kc;
+       struct timespec *lastscan;
+       int b, done, first, i, last, level, need_softclock, off;
+
+       kclock_nanotime(KCLOCK_UPTIME, &now);
+       lastscan = &timeout_kclock[KCLOCK_UPTIME].kc_lastscan;
+       timespecsub(&now, lastscan, &elapsed);
+       need_softclock = 1;
 
        mtx_enter(&timeout_mutex);
 
@@ -446,6 +581,44 @@ timeout_hardclock_update(void)
                }
        }
 
+       /*
+        * Dump the buckets that expired while we were away.
+        *
+        * If the elapsed time has exceeded a level's limit then we need
+        * to dump every bucket in the level.  We have necessarily completed
+        * a lap of that level, too, so we need to process buckets in the
+        * next level.
+        *
+        * Otherwise we need to compare indices: if the index of the first
+        * expired bucket is greater than that of the last then we have
+        * completed a lap of the level and need to process buckets in the
+        * next level.
+        */
+       for (level = 0; level < nitems(timeout_level_width); level++) {
+               first = timeout_maskwheel(level, lastscan);
+               if (elapsed.tv_sec >= timeout_level_width[level]) {
+                       last = (first == 0) ? WHEELSIZE - 1 : first - 1;
+                       done = 0;
+               } else {
+                       last = timeout_maskwheel(level, &now);
+                       done = first <= last;
+               }
+               off = level * WHEELSIZE;
+               for (b = first;; b = (b + 1) % WHEELSIZE) {
+                       CIRCQ_CONCAT(&timeout_todo, &timeout_wheel_kc[off + b]);
+                       if (b == last)
+                               break;
+               }
+               if (done)
+                       break;
+       }
+
+       for (i = 0; i < nitems(timeout_kclock); i++) {
+               kc = &timeout_kclock[i];
+               timespecadd(&now, &kc->kc_offset, &kc->kc_lastscan);
+               timespecsub(&kc->kc_lastscan, &tick_ts, &kc->kc_late);
+       }
+
        if (CIRCQ_EMPTY(&timeout_new) && CIRCQ_EMPTY(&timeout_todo))
                need_softclock = 0;
 
@@ -485,6 +658,51 @@ timeout_run(struct timeout *to)
        mtx_enter(&timeout_mutex);
 }
 
+void
+softclock_process_kclock_timeout(struct timeout *to, int new)
+{
+       struct kclock *kc = &timeout_kclock[to->to_kclock];
+       
+       if (timespeccmp(&to->to_abstime, &kc->kc_lastscan, >)) {
+               tostat.tos_scheduled++;
+               if (!new)
+                       tostat.tos_rescheduled++;
+               CIRCQ_INSERT_TAIL(&timeout_wheel_kc[timeout_bucket(to)],
+                   &to->to_list);
+               return;
+       }
+       if (!new && timespeccmp(&to->to_abstime, &kc->kc_late, <=))
+               tostat.tos_late++;
+       if (ISSET(to->to_flags, TIMEOUT_PROC)) {
+               CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
+               return;
+       }
+       timeout_run(to);
+       tostat.tos_run_softclock++;
+}
+
+void
+softclock_process_tick_timeout(struct timeout *to, int new)
+{
+       int delta = to->to_time - ticks;
+
+       if (delta > 0) {
+               tostat.tos_scheduled++;
+               if (!new)
+                       tostat.tos_rescheduled++;
+               CIRCQ_INSERT_TAIL(&BUCKET(delta, to->to_time), &to->to_list);
+               return;
+       }
+       if (!new && delta < 0)
+               tostat.tos_late++;
+       if (ISSET(to->to_flags, TIMEOUT_PROC)) {
+               CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
+               return;
+       }
+       timeout_run(to);
+       tostat.tos_run_softclock++;
+}
+
 /*
  * Timeouts are processed here instead of timeout_hardclock_update()
  * to avoid doing any more work at IPL_CLOCK than absolutely necessary.
@@ -494,9 +712,8 @@ timeout_run(struct timeout *to)
 void
 softclock(void *arg)
 {
-       struct circq *bucket;
        struct timeout *first_new, *to;
-       int delta, needsproc, new;
+       int needsproc, new;
 
        first_new = NULL;
        new = 0;
@@ -510,28 +727,10 @@ softclock(void *arg)
                CIRCQ_REMOVE(&to->to_list);
                if (to == first_new)
                        new = 1;
-
-               /*
-                * If due run it or defer execution to the thread,
-                * otherwise insert it into the right bucket.
-                */
-               delta = to->to_time - ticks;
-               if (delta > 0) {
-                       bucket = &BUCKET(delta, to->to_time);
-                       CIRCQ_INSERT_TAIL(bucket, &to->to_list);
-                       tostat.tos_scheduled++;
-                       if (!new)
-                               tostat.tos_rescheduled++;
-                       continue;
-               }
-               if (!new && delta < 0)
-                       tostat.tos_late++;
-               if (ISSET(to->to_flags, TIMEOUT_PROC)) {
-                       CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
-                       continue;
-               }
-               timeout_run(to);
-               tostat.tos_run_softclock++;
+               if (ISSET(to->to_flags, TIMEOUT_KCLOCK))
+                       softclock_process_kclock_timeout(to, new);
+               else
+                       softclock_process_tick_timeout(to, new);
        }
        tostat.tos_softclocks++;
        needsproc = !CIRCQ_EMPTY(&timeout_proc);
@@ -630,52 +829,114 @@ timeout_sysctl(void *oldp, size_t *oldle
 }
 
 #ifdef DDB
+const char *db_kclock(int);
 void db_show_callout_bucket(struct circq *);
+void db_show_timeout(struct timeout *, struct circq *);
+const char *db_timespec(const struct timespec *);
+
+const char *
+db_kclock(int kclock)
+{
+       switch (kclock) {
+       case KCLOCK_UPTIME:
+               return "uptime";
+       default:
+               return "invalid";
+       }
+}
+
+const char *
+db_timespec(const struct timespec *ts)
+{
+       static char buf[32];
+       struct timespec tmp, zero;
+
+       if (ts->tv_sec >= 0) {
+               snprintf(buf, sizeof(buf), "%lld.%09ld",
+                   ts->tv_sec, ts->tv_nsec);
+               return buf;
+       }
+
+       timespecclear(&zero);
+       timespecsub(&zero, ts, &tmp);
+       snprintf(buf, sizeof(buf), "-%lld.%09ld", tmp.tv_sec, tmp.tv_nsec);
+       return buf;
+}
 
 void
 db_show_callout_bucket(struct circq *bucket)
 {
-       char buf[8];
-       struct timeout *to;
        struct circq *p;
+
+       CIRCQ_FOREACH(p, bucket)
+               db_show_timeout(timeout_from_circq(p), bucket);
+}
+
+void
+db_show_timeout(struct timeout *to, struct circq *bucket)
+{
+       struct timespec remaining;
+       struct kclock *kc;
+       char buf[8];
        db_expr_t offset;
+       struct circq *wheel;
        char *name, *where;
        int width = sizeof(long) * 2;
 
-       CIRCQ_FOREACH(p, bucket) {
-               to = timeout_from_circq(p);
-               db_find_sym_and_offset((vaddr_t)to->to_func, &name, &offset);
-               name = name ? name : "?";
-               if (bucket == &timeout_todo)
-                       where = "softint";
-               else if (bucket == &timeout_proc)
-                       where = "thread";
-               else if (bucket == &timeout_new)
-                       where = "new";
-               else {
-                       snprintf(buf, sizeof(buf), "%3ld/%1ld",
-                           (bucket - timeout_wheel) % WHEELSIZE,
-                           (bucket - timeout_wheel) / WHEELSIZE);
-                       where = buf;
-               }
-               db_printf("%9d  %7s  0x%0*lx  %s\n",
-                   to->to_time - ticks, where, width, (ulong)to->to_arg, name);
+       db_find_sym_and_offset((vaddr_t)to->to_func, &name, &offset);
+       name = name ? name : "?";
+       if (bucket == &timeout_new)
+               where = "new";
+       else if (bucket == &timeout_todo)
+               where = "softint";
+       else if (bucket == &timeout_proc)
+               where = "thread";
+       else {
+               if (ISSET(to->to_flags, TIMEOUT_KCLOCK))
+                       wheel = timeout_wheel_kc;
+               else
+                       wheel = timeout_wheel;
+               snprintf(buf, sizeof(buf), "%3ld/%1ld",
+                   (bucket - wheel) % WHEELSIZE,
+                   (bucket - wheel) / WHEELSIZE);
+               where = buf;
+       }
+       if (ISSET(to->to_flags, TIMEOUT_KCLOCK)) {
+               kc = &timeout_kclock[to->to_kclock];
+               timespecsub(&to->to_abstime, &kc->kc_lastscan, &remaining);
+               db_printf("%20s  %8s  %7s  0x%0*lx  %s\n",
+                   db_timespec(&remaining), db_kclock(to->to_kclock), where,
+                   width, (ulong)to->to_arg, name);
+       } else {
+               db_printf("%20d  %8s  %7s  0x%0*lx  %s\n",
+                   to->to_time - ticks, "ticks", where,
+                   width, (ulong)to->to_arg, name);
        }
 }
 
 void
 db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif)
 {
+       struct kclock *kc;
        int width = sizeof(long) * 2 + 2;
-       int b;
-
-       db_printf("ticks now: %d\n", ticks);
-       db_printf("%9s  %7s  %*s  func\n", "ticks", "wheel", width, "arg");
+       int b, i;
 
+       db_printf("%20s  %8s\n", "lastscan", "clock");
+       db_printf("%20d  %8s\n", ticks, "ticks");
+       for (i = 0; i < nitems(timeout_kclock); i++) {
+               kc = &timeout_kclock[i];
+               db_printf("%20s  %8s\n",
+                   db_timespec(&kc->kc_lastscan), db_kclock(i));
+       }
+       db_printf("\n");        
+       db_printf("%20s  %8s  %7s  %*s  %s\n",
+           "remaining", "clock", "wheel", width, "arg", "func");
        db_show_callout_bucket(&timeout_new);
        db_show_callout_bucket(&timeout_todo);
        db_show_callout_bucket(&timeout_proc);
        for (b = 0; b < nitems(timeout_wheel); b++)
                db_show_callout_bucket(&timeout_wheel[b]);
+       for (b = 0; b < nitems(timeout_wheel_kc); b++)
+               db_show_callout_bucket(&timeout_wheel_kc[b]);
 }
 #endif
Index: sys/timeout.h
===================================================================
RCS file: /cvs/src/sys/sys/timeout.h,v
retrieving revision 1.39
diff -u -p -r1.39 timeout.h
--- sys/timeout.h       7 Aug 2020 00:45:25 -0000       1.39
+++ sys/timeout.h       4 Sep 2020 22:41:12 -0000
@@ -1,4 +1,4 @@
-/*     $OpenBSD: timeout.h,v 1.39 2020/08/07 00:45:25 cheloha Exp $    */
+/*     $OpenBSD: timeout.h,v 1.38 2020/08/01 08:40:20 anton Exp $      */
 /*
  * Copyright (c) 2000-2001 Artur Grabowski <a...@openbsd.org>
  * All rights reserved. 
@@ -51,6 +51,8 @@
  * These functions may be called in interrupt context (anything below splhigh).
  */
 
+#include <sys/time.h>
+
 struct circq {
        struct circq *next;             /* next element */
        struct circq *prev;             /* previous element */
@@ -58,13 +60,15 @@ struct circq {
 
 struct timeout {
        struct circq to_list;                   /* timeout queue, don't move */
+       struct timespec to_abstime;             /* absolute time to run at */
        void (*to_func)(void *);                /* function to call */
        void *to_arg;                           /* function argument */
-       int to_time;                            /* ticks on event */
-       int to_flags;                           /* misc flags */
 #if 1 /* NKCOV > 0 */
        struct process *to_process;             /* kcov identifier */
 #endif
+       int to_time;                            /* ticks on event */
+       int to_flags;                           /* misc flags */
+       int to_kclock;                          /* abstime's kernel clock */
 };
 
 /*
@@ -74,6 +78,7 @@ struct timeout {
 #define TIMEOUT_ONQUEUE                0x02    /* on any timeout queue */
 #define TIMEOUT_INITIALIZED    0x04    /* initialized */
 #define TIMEOUT_TRIGGERED      0x08    /* running or ran */
+#define TIMEOUT_KCLOCK         0x10    /* clock-based timeout */
 
 struct timeoutstat {
        uint64_t tos_added;             /* timeout_add*(9) calls */
@@ -103,25 +108,43 @@ int timeout_sysctl(void *, size_t *, voi
 #define timeout_initialized(to) ((to)->to_flags & TIMEOUT_INITIALIZED)
 #define timeout_triggered(to) ((to)->to_flags & TIMEOUT_TRIGGERED)
 
-#define TIMEOUT_INITIALIZER_FLAGS(fn, arg, flags) {                    \
+#define KCLOCK_NONE    (-1)            /* dummy clock for sanity checks */
+#define KCLOCK_UPTIME  0               /* uptime clock; time since boot */
+#define KCLOCK_MAX     1
+
+#define _TIMEOUT_INITIALIZER(fn, arg, flags, kclock) {                 \
        .to_list = { NULL, NULL },                                      \
+       .to_abstime = { .tv_sec = 0, .tv_nsec = 0 },                    \
        .to_func = (fn),                                                \
        .to_arg = (arg),                                                \
        .to_time = 0,                                                   \
-       .to_flags = (flags) | TIMEOUT_INITIALIZED                       \
+       .to_flags = (flags) | TIMEOUT_INITIALIZED,                      \
+       .to_kclock = (kclock)                                           \
 }
 
-#define TIMEOUT_INITIALIZER(_f, _a) TIMEOUT_INITIALIZER_FLAGS((_f), (_a), 0)
+#define TIMEOUT_INITIALIZER_KCLOCK(fn, arg, flags, kclock)             \
+    _TIMEOUT_INITIALIZER((fn), (args), (flags) | TIMEOUT_KCLOCK, (kclock))
+
+#define TIMEOUT_INITIALIZER_FLAGS(fn, arg, flags)                      \
+    _TIMEOUT_INITIALIZER((fn), (args), (flags), KCLOCK_NONE)
+
+#define TIMEOUT_INITIALIZER(_f, _a)                                    \
+    _TIMEOUT_INITIALIZER((_f), (_a), 0, KCLOCK_NONE)
 
 void timeout_set(struct timeout *, void (*)(void *), void *);
 void timeout_set_flags(struct timeout *, void (*)(void *), void *, int);
+void timeout_set_kclock(struct timeout *, void (*)(void *), void *, int, int);
 void timeout_set_proc(struct timeout *, void (*)(void *), void *);
+
 int timeout_add(struct timeout *, int);
 int timeout_add_tv(struct timeout *, const struct timeval *);
 int timeout_add_sec(struct timeout *, int);
 int timeout_add_msec(struct timeout *, int);
 int timeout_add_usec(struct timeout *, int);
 int timeout_add_nsec(struct timeout *, int);
+
+int timeout_in_nsec(struct timeout *, uint64_t);
+
 int timeout_del(struct timeout *);
 int timeout_del_barrier(struct timeout *);
 void timeout_barrier(struct timeout *);

Reply via email to