On Fri, Sep 04, 2020 at 05:55:40PM -0500, Scott Cheloha wrote: > On Sat, Jul 25, 2020 at 08:46:08PM -0500, Scott Cheloha wrote: > > > > [...] > > > > I want to add clock-based timeouts to the kernel because tick-based > > timeouts suffer from a few problems: > > > > [...] > > > > Basically, ticks are a poor approximation for the system clock. We > > should use the real thing where possible. > > > > [...] > > > > Thoughts on this approach? Thoughts on the proposed API? > > 6 week bump. > > Attached is an rebased and streamlined diff. > > [...]
Updated diff fixes a pasto. Index: kern/kern_timeout.c =================================================================== RCS file: /cvs/src/sys/kern/kern_timeout.c,v retrieving revision 1.79 diff -u -p -r1.79 kern_timeout.c --- kern/kern_timeout.c 7 Aug 2020 00:45:25 -0000 1.79 +++ kern/kern_timeout.c 5 Sep 2020 01:54:42 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_timeout.c,v 1.79 2020/08/07 00:45:25 cheloha Exp $ */ +/* $OpenBSD: kern_timeout.c,v 1.77 2020/08/01 08:40:20 anton Exp $ */ /* * Copyright (c) 2001 Thomas Nordin <nor...@openbsd.org> * Copyright (c) 2000-2001 Artur Grabowski <a...@openbsd.org> @@ -64,16 +64,27 @@ struct timeoutstat tostat; /* [T] stati * of the global variable "ticks" when the timeout should be called. There are * four levels with 256 buckets each. */ -#define BUCKETS 1024 +#define WHEELCOUNT 4 #define WHEELSIZE 256 #define WHEELMASK 255 #define WHEELBITS 8 +#define BUCKETS (WHEELCOUNT * WHEELSIZE) -struct circq timeout_wheel[BUCKETS]; /* [T] Queues of timeouts */ +struct circq timeout_wheel[BUCKETS]; /* [T] Tick-based timeouts */ +struct circq timeout_wheel_kc[BUCKETS]; /* [T] Clock-based timeouts */ struct circq timeout_new; /* [T] New, unscheduled timeouts */ struct circq timeout_todo; /* [T] Due or needs rescheduling */ struct circq timeout_proc; /* [T] Due + needs process context */ +time_t timeout_level_width[WHEELCOUNT]; /* [I] Wheel level width (seconds) */ +struct timespec tick_ts; /* [I] Length of a tick (1/hz secs) */ + +struct kclock { + struct timespec kc_lastscan; /* [T] Clock time at last wheel scan */ + struct timespec kc_late; /* [T] Late if due prior */ + struct timespec kc_offset; /* [T] Offset from primary kclock */ +} timeout_kclock[KCLOCK_MAX]; + #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK) #define BUCKET(rel, abs) \ @@ -155,9 +166,15 @@ struct lock_type timeout_spinlock_type = ((needsproc) ? &timeout_sleeplock_obj : &timeout_spinlock_obj) #endif +void kclock_nanotime(int, struct timespec *); void softclock(void *); void softclock_create_thread(void *); +void softclock_process_kclock_timeout(struct timeout *, int); +void softclock_process_tick_timeout(struct timeout *, int); void softclock_thread(void *); +uint32_t timeout_bucket(struct timeout *); +uint32_t timeout_maskwheel(uint32_t, const struct timespec *); +void timeout_run(struct timeout *); void timeout_proc_barrier(void *); /* @@ -207,13 +224,19 @@ timeout_sync_leave(int needsproc) void timeout_startup(void) { - int b; + int b, level; CIRCQ_INIT(&timeout_new); CIRCQ_INIT(&timeout_todo); CIRCQ_INIT(&timeout_proc); for (b = 0; b < nitems(timeout_wheel); b++) CIRCQ_INIT(&timeout_wheel[b]); + for (b = 0; b < nitems(timeout_wheel_kc); b++) + CIRCQ_INIT(&timeout_wheel_kc[b]); + + for (level = 0; level < nitems(timeout_level_width); level++) + timeout_level_width[level] = 2 << (level * WHEELBITS); + NSEC_TO_TIMESPEC(tick_nsec, &tick_ts); } void @@ -229,25 +252,39 @@ timeout_proc_init(void) kthread_create_deferred(softclock_create_thread, NULL); } +static inline void +_timeout_set(struct timeout *to, void (*fn)(void *), void *arg, int flags, + int kclock) +{ + to->to_func = fn; + to->to_arg = arg; + to->to_flags = flags | TIMEOUT_INITIALIZED; + to->to_kclock = kclock; +} + void timeout_set(struct timeout *new, void (*fn)(void *), void *arg) { - timeout_set_flags(new, fn, arg, 0); + _timeout_set(new, fn, arg, 0, KCLOCK_NONE); } void timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int flags) { - to->to_func = fn; - to->to_arg = arg; - to->to_process = NULL; - to->to_flags = flags | TIMEOUT_INITIALIZED; + _timeout_set(to, fn, arg, flags, KCLOCK_NONE); } void timeout_set_proc(struct timeout *new, void (*fn)(void *), void *arg) { - timeout_set_flags(new, fn, arg, TIMEOUT_PROC); + _timeout_set(new, fn, arg, TIMEOUT_PROC, KCLOCK_NONE); +} + +void +timeout_set_kclock(struct timeout *to, void (*fn)(void *), void *arg, int flags, + int kclock) +{ + _timeout_set(to, fn, arg, flags | TIMEOUT_KCLOCK, kclock); } int @@ -257,6 +294,8 @@ timeout_add(struct timeout *new, int to_ int ret = 1; KASSERT(ISSET(new->to_flags, TIMEOUT_INITIALIZED)); + KASSERT(!ISSET(new->to_flags, TIMEOUT_KCLOCK)); + KASSERT(new->to_kclock == KCLOCK_NONE); KASSERT(to_ticks >= 0); mtx_enter(&timeout_mutex); @@ -356,6 +395,65 @@ timeout_add_nsec(struct timeout *to, int } int +timeout_at_ts(struct timeout *to, const struct timespec *abstime) +{ + struct timespec old_abstime; + int ret = 1; + + KASSERT(ISSET(to->to_flags, TIMEOUT_INITIALIZED | TIMEOUT_KCLOCK)); + KASSERT(to->to_kclock != KCLOCK_NONE); + + mtx_enter(&timeout_mutex); + + old_abstime = to->to_abstime; + to->to_abstime = *abstime; + CLR(to->to_flags, TIMEOUT_TRIGGERED); + + if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) { + if (timespeccmp(abstime, &old_abstime, <)) { + CIRCQ_REMOVE(&to->to_list); + CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list); + } + tostat.tos_readded++; + ret = 0; + } else { + SET(to->to_flags, TIMEOUT_ONQUEUE); + CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list); + } +#if NKCOV > 0 + new->to_process = curproc->p_p; +#endif + tostat.tos_added++; + mtx_leave(&timeout_mutex); + + return ret; +} + +int +timeout_in_nsec(struct timeout *to, uint64_t nsecs) +{ + struct timespec deadline, interval, now; + + kclock_nanotime(to->to_kclock, &now); + NSEC_TO_TIMESPEC(nsecs, &interval); + timespecadd(&now, &interval, &deadline); + + return timeout_at_ts(to, &deadline); +} + +void +kclock_nanotime(int kclock, struct timespec *now) +{ + switch (kclock) { + case KCLOCK_UPTIME: + nanouptime(now); + break; + default: + panic("invalid kclock: 0x%x", kclock); + } +} + +int timeout_del(struct timeout *to) { int ret = 0; @@ -425,6 +523,35 @@ timeout_proc_barrier(void *arg) cond_signal(c); } +uint32_t +timeout_bucket(struct timeout *to) +{ + struct kclock *kc = &timeout_kclock[to->to_kclock]; + struct timespec diff; + uint32_t level; + + KASSERT(ISSET(to->to_flags, TIMEOUT_KCLOCK)); + KASSERT(timespeccmp(&kc->kc_lastscan, &to->to_abstime, <)); + + timespecsub(&to->to_abstime, &kc->kc_lastscan, &diff); + for (level = 0; level < nitems(timeout_level_width) - 1; level++) { + if (diff.tv_sec < timeout_level_width[level]) + break; + } + return level * WHEELSIZE + timeout_maskwheel(level, &to->to_abstime); +} + +uint32_t +timeout_maskwheel(uint32_t level, const struct timespec *abstime) +{ + uint32_t hi, lo; + + hi = abstime->tv_sec << 7; + lo = abstime->tv_nsec / 7812500; + + return ((hi | lo) >> (level * WHEELBITS)) & WHEELMASK; +} + /* * This is called from hardclock() on the primary CPU at the start of * every tick. @@ -432,7 +559,15 @@ timeout_proc_barrier(void *arg) void timeout_hardclock_update(void) { - int need_softclock = 1; + struct timespec elapsed, now; + struct kclock *kc; + struct timespec *lastscan; + int b, done, first, i, last, level, need_softclock, off; + + kclock_nanotime(KCLOCK_UPTIME, &now); + lastscan = &timeout_kclock[KCLOCK_UPTIME].kc_lastscan; + timespecsub(&now, lastscan, &elapsed); + need_softclock = 1; mtx_enter(&timeout_mutex); @@ -446,6 +581,44 @@ timeout_hardclock_update(void) } } + /* + * Dump the buckets that expired while we were away. + * + * If the elapsed time has exceeded a level's limit then we need + * to dump every bucket in the level. We have necessarily completed + * a lap of that level, too, so we need to process buckets in the + * next level. + * + * Otherwise we need to compare indices: if the index of the first + * expired bucket is greater than that of the last then we have + * completed a lap of the level and need to process buckets in the + * next level. + */ + for (level = 0; level < nitems(timeout_level_width); level++) { + first = timeout_maskwheel(level, lastscan); + if (elapsed.tv_sec >= timeout_level_width[level]) { + last = (first == 0) ? WHEELSIZE - 1 : first - 1; + done = 0; + } else { + last = timeout_maskwheel(level, &now); + done = first <= last; + } + off = level * WHEELSIZE; + for (b = first;; b = (b + 1) % WHEELSIZE) { + CIRCQ_CONCAT(&timeout_todo, &timeout_wheel_kc[off + b]); + if (b == last) + break; + } + if (done) + break; + } + + for (i = 0; i < nitems(timeout_kclock); i++) { + kc = &timeout_kclock[i]; + timespecadd(&now, &kc->kc_offset, &kc->kc_lastscan); + timespecsub(&kc->kc_lastscan, &tick_ts, &kc->kc_late); + } + if (CIRCQ_EMPTY(&timeout_new) && CIRCQ_EMPTY(&timeout_todo)) need_softclock = 0; @@ -485,6 +658,51 @@ timeout_run(struct timeout *to) mtx_enter(&timeout_mutex); } +void +softclock_process_kclock_timeout(struct timeout *to, int new) +{ + struct kclock *kc = &timeout_kclock[to->to_kclock]; + + if (timespeccmp(&to->to_abstime, &kc->kc_lastscan, >)) { + tostat.tos_scheduled++; + if (!new) + tostat.tos_rescheduled++; + CIRCQ_INSERT_TAIL(&timeout_wheel_kc[timeout_bucket(to)], + &to->to_list); + return; + } + if (!new && timespeccmp(&to->to_abstime, &kc->kc_late, <=)) + tostat.tos_late++; + if (ISSET(to->to_flags, TIMEOUT_PROC)) { + CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); + return; + } + timeout_run(to); + tostat.tos_run_softclock++; +} + +void +softclock_process_tick_timeout(struct timeout *to, int new) +{ + int delta = to->to_time - ticks; + + if (delta > 0) { + tostat.tos_scheduled++; + if (!new) + tostat.tos_rescheduled++; + CIRCQ_INSERT_TAIL(&BUCKET(delta, to->to_time), &to->to_list); + return; + } + if (!new && delta < 0) + tostat.tos_late++; + if (ISSET(to->to_flags, TIMEOUT_PROC)) { + CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); + return; + } + timeout_run(to); + tostat.tos_run_softclock++; +} + /* * Timeouts are processed here instead of timeout_hardclock_update() * to avoid doing any more work at IPL_CLOCK than absolutely necessary. @@ -494,9 +712,8 @@ timeout_run(struct timeout *to) void softclock(void *arg) { - struct circq *bucket; struct timeout *first_new, *to; - int delta, needsproc, new; + int needsproc, new; first_new = NULL; new = 0; @@ -510,28 +727,10 @@ softclock(void *arg) CIRCQ_REMOVE(&to->to_list); if (to == first_new) new = 1; - - /* - * If due run it or defer execution to the thread, - * otherwise insert it into the right bucket. - */ - delta = to->to_time - ticks; - if (delta > 0) { - bucket = &BUCKET(delta, to->to_time); - CIRCQ_INSERT_TAIL(bucket, &to->to_list); - tostat.tos_scheduled++; - if (!new) - tostat.tos_rescheduled++; - continue; - } - if (!new && delta < 0) - tostat.tos_late++; - if (ISSET(to->to_flags, TIMEOUT_PROC)) { - CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); - continue; - } - timeout_run(to); - tostat.tos_run_softclock++; + if (ISSET(to->to_flags, TIMEOUT_KCLOCK)) + softclock_process_kclock_timeout(to, new); + else + softclock_process_tick_timeout(to, new); } tostat.tos_softclocks++; needsproc = !CIRCQ_EMPTY(&timeout_proc); @@ -630,52 +829,114 @@ timeout_sysctl(void *oldp, size_t *oldle } #ifdef DDB +const char *db_kclock(int); void db_show_callout_bucket(struct circq *); +void db_show_timeout(struct timeout *, struct circq *); +const char *db_timespec(const struct timespec *); + +const char * +db_kclock(int kclock) +{ + switch (kclock) { + case KCLOCK_UPTIME: + return "uptime"; + default: + return "invalid"; + } +} + +const char * +db_timespec(const struct timespec *ts) +{ + static char buf[32]; + struct timespec tmp, zero; + + if (ts->tv_sec >= 0) { + snprintf(buf, sizeof(buf), "%lld.%09ld", + ts->tv_sec, ts->tv_nsec); + return buf; + } + + timespecclear(&zero); + timespecsub(&zero, ts, &tmp); + snprintf(buf, sizeof(buf), "-%lld.%09ld", tmp.tv_sec, tmp.tv_nsec); + return buf; +} void db_show_callout_bucket(struct circq *bucket) { - char buf[8]; - struct timeout *to; struct circq *p; + + CIRCQ_FOREACH(p, bucket) + db_show_timeout(timeout_from_circq(p), bucket); +} + +void +db_show_timeout(struct timeout *to, struct circq *bucket) +{ + struct timespec remaining; + struct kclock *kc; + char buf[8]; db_expr_t offset; + struct circq *wheel; char *name, *where; int width = sizeof(long) * 2; - CIRCQ_FOREACH(p, bucket) { - to = timeout_from_circq(p); - db_find_sym_and_offset((vaddr_t)to->to_func, &name, &offset); - name = name ? name : "?"; - if (bucket == &timeout_todo) - where = "softint"; - else if (bucket == &timeout_proc) - where = "thread"; - else if (bucket == &timeout_new) - where = "new"; - else { - snprintf(buf, sizeof(buf), "%3ld/%1ld", - (bucket - timeout_wheel) % WHEELSIZE, - (bucket - timeout_wheel) / WHEELSIZE); - where = buf; - } - db_printf("%9d %7s 0x%0*lx %s\n", - to->to_time - ticks, where, width, (ulong)to->to_arg, name); + db_find_sym_and_offset((vaddr_t)to->to_func, &name, &offset); + name = name ? name : "?"; + if (bucket == &timeout_new) + where = "new"; + else if (bucket == &timeout_todo) + where = "softint"; + else if (bucket == &timeout_proc) + where = "thread"; + else { + if (ISSET(to->to_flags, TIMEOUT_KCLOCK)) + wheel = timeout_wheel_kc; + else + wheel = timeout_wheel; + snprintf(buf, sizeof(buf), "%3ld/%1ld", + (bucket - wheel) % WHEELSIZE, + (bucket - wheel) / WHEELSIZE); + where = buf; + } + if (ISSET(to->to_flags, TIMEOUT_KCLOCK)) { + kc = &timeout_kclock[to->to_kclock]; + timespecsub(&to->to_abstime, &kc->kc_lastscan, &remaining); + db_printf("%20s %8s %7s 0x%0*lx %s\n", + db_timespec(&remaining), db_kclock(to->to_kclock), where, + width, (ulong)to->to_arg, name); + } else { + db_printf("%20d %8s %7s 0x%0*lx %s\n", + to->to_time - ticks, "ticks", where, + width, (ulong)to->to_arg, name); } } void db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif) { + struct kclock *kc; int width = sizeof(long) * 2 + 2; - int b; - - db_printf("ticks now: %d\n", ticks); - db_printf("%9s %7s %*s func\n", "ticks", "wheel", width, "arg"); + int b, i; + db_printf("%20s %8s\n", "lastscan", "clock"); + db_printf("%20d %8s\n", ticks, "ticks"); + for (i = 0; i < nitems(timeout_kclock); i++) { + kc = &timeout_kclock[i]; + db_printf("%20s %8s\n", + db_timespec(&kc->kc_lastscan), db_kclock(i)); + } + db_printf("\n"); + db_printf("%20s %8s %7s %*s %s\n", + "remaining", "clock", "wheel", width, "arg", "func"); db_show_callout_bucket(&timeout_new); db_show_callout_bucket(&timeout_todo); db_show_callout_bucket(&timeout_proc); for (b = 0; b < nitems(timeout_wheel); b++) db_show_callout_bucket(&timeout_wheel[b]); + for (b = 0; b < nitems(timeout_wheel_kc); b++) + db_show_callout_bucket(&timeout_wheel_kc[b]); } #endif Index: sys/timeout.h =================================================================== RCS file: /cvs/src/sys/sys/timeout.h,v retrieving revision 1.39 diff -u -p -r1.39 timeout.h --- sys/timeout.h 7 Aug 2020 00:45:25 -0000 1.39 +++ sys/timeout.h 5 Sep 2020 01:54:42 -0000 @@ -1,4 +1,4 @@ -/* $OpenBSD: timeout.h,v 1.39 2020/08/07 00:45:25 cheloha Exp $ */ +/* $OpenBSD: timeout.h,v 1.38 2020/08/01 08:40:20 anton Exp $ */ /* * Copyright (c) 2000-2001 Artur Grabowski <a...@openbsd.org> * All rights reserved. @@ -51,6 +51,8 @@ * These functions may be called in interrupt context (anything below splhigh). */ +#include <sys/time.h> + struct circq { struct circq *next; /* next element */ struct circq *prev; /* previous element */ @@ -58,13 +60,15 @@ struct circq { struct timeout { struct circq to_list; /* timeout queue, don't move */ + struct timespec to_abstime; /* absolute time to run at */ void (*to_func)(void *); /* function to call */ void *to_arg; /* function argument */ - int to_time; /* ticks on event */ - int to_flags; /* misc flags */ #if 1 /* NKCOV > 0 */ struct process *to_process; /* kcov identifier */ #endif + int to_time; /* ticks on event */ + int to_flags; /* misc flags */ + int to_kclock; /* abstime's kernel clock */ }; /* @@ -74,6 +78,7 @@ struct timeout { #define TIMEOUT_ONQUEUE 0x02 /* on any timeout queue */ #define TIMEOUT_INITIALIZED 0x04 /* initialized */ #define TIMEOUT_TRIGGERED 0x08 /* running or ran */ +#define TIMEOUT_KCLOCK 0x10 /* clock-based timeout */ struct timeoutstat { uint64_t tos_added; /* timeout_add*(9) calls */ @@ -103,25 +108,43 @@ int timeout_sysctl(void *, size_t *, voi #define timeout_initialized(to) ((to)->to_flags & TIMEOUT_INITIALIZED) #define timeout_triggered(to) ((to)->to_flags & TIMEOUT_TRIGGERED) -#define TIMEOUT_INITIALIZER_FLAGS(fn, arg, flags) { \ +#define KCLOCK_NONE (-1) /* dummy clock for sanity checks */ +#define KCLOCK_UPTIME 0 /* uptime clock; time since boot */ +#define KCLOCK_MAX 1 + +#define _TIMEOUT_INITIALIZER(fn, arg, flags, kclock) { \ .to_list = { NULL, NULL }, \ + .to_abstime = { .tv_sec = 0, .tv_nsec = 0 }, \ .to_func = (fn), \ .to_arg = (arg), \ .to_time = 0, \ - .to_flags = (flags) | TIMEOUT_INITIALIZED \ + .to_flags = (flags) | TIMEOUT_INITIALIZED, \ + .to_kclock = (kclock) \ } -#define TIMEOUT_INITIALIZER(_f, _a) TIMEOUT_INITIALIZER_FLAGS((_f), (_a), 0) +#define TIMEOUT_INITIALIZER_KCLOCK(fn, arg, flags, kclock) \ + _TIMEOUT_INITIALIZER((fn), (args), (flags) | TIMEOUT_KCLOCK, (kclock)) + +#define TIMEOUT_INITIALIZER_FLAGS(fn, arg, flags) \ + _TIMEOUT_INITIALIZER((fn), (args), (flags), KCLOCK_NONE) + +#define TIMEOUT_INITIALIZER(_f, _a) \ + _TIMEOUT_INITIALIZER((_f), (_a), 0, KCLOCK_NONE) void timeout_set(struct timeout *, void (*)(void *), void *); void timeout_set_flags(struct timeout *, void (*)(void *), void *, int); +void timeout_set_kclock(struct timeout *, void (*)(void *), void *, int, int); void timeout_set_proc(struct timeout *, void (*)(void *), void *); + int timeout_add(struct timeout *, int); int timeout_add_tv(struct timeout *, const struct timeval *); int timeout_add_sec(struct timeout *, int); int timeout_add_msec(struct timeout *, int); int timeout_add_usec(struct timeout *, int); int timeout_add_nsec(struct timeout *, int); + +int timeout_in_nsec(struct timeout *, uint64_t); + int timeout_del(struct timeout *); int timeout_del_barrier(struct timeout *); void timeout_barrier(struct timeout *);