Paul Irofti <p...@irofti.net> wrote:

> Here is a third version of the diff. With this robert@ is able to use
> chrome with almost all the calls to kernel clock_gettime gone. I
> think the number dropped from 600,000 to 400. Robert can give you more
> details. But the idea is that it is very fast now. Zoom zoom.

But the 


> The diff includes a temporary hack for /sbin/init, the only known issue
> remaining, to call the kernel syscall directly.

This is wrong.  It means you haven't found the function start_init()
in init_main.c yet.

>       - moved timekeep.h inside sys/time.h

Namespace issues must be considered.  That structure could collide.
The name is probably poorly chosen, and the structure may need _ or __
naming to avoid potential collision.

>       - ignoring the ELF auxv numbering as it is not an issue;
>         deraadt@ says we do not run binaries across BSDs so no need to
>         worry

That is my thought.  People who have knowledge of cross-compile should
speak up.

>       - the clock quality issue was brought back; afaics the current
>         diff does exactly what the syscall is doing and the
>         information is updated inside every tc_windup() which makes this
>         almost an exact replica of the syscall for the clocks in libc;
>         for the other clocks we go to the syscall and everything is
>         the same as before
> 
> I thought that is what Scott said about the last bit as well. If I
> missunderstood and clock quality is lost somewhere, I would apprecaite
> someone with better technical understanding point out the exact issues
> in the code. Perhaps again Scott?

Let me make it clear this is very important.  Changing this structure
later will be very painful ABI break.  The high-resolution issue brought
up by kettenis MUST be considered in the first commit.  There is no point
commiting drafts of this which simply result in painful ABI breaks in the
short term.  Meaning, this must not be rushed.

But secondly, commiting a design which *blocks work* on resolution
improvement is nasty.  This design is looking at "something changed
slowly", and there is no way to improve it.

Final point:

I think tc_clock_gettime() in the kernel and userland reading in
WRAP(clock_gettime) are very non-atomic.  What prevents the structure
assignment from reading half of an old timespec, and half of a new timespec?
As higher bytes in the timeval increase, lower bytes could be read which
decrease.  It seems this is depending on consistant cache behaviour...
which might not be the case.


> diff --git lib/libc/asr/asr.c lib/libc/asr/asr.c
> index cd056c85719..2b25d49f32a 100644
> --- lib/libc/asr/asr.c
> +++ lib/libc/asr/asr.c
> @@ -196,11 +196,11 @@ poll_intrsafe(struct pollfd *fds, nfds_t nfds, int 
> timeout)
>       struct timespec pollstart, pollend, elapsed;
>       int r;
>  
> -     if (clock_gettime(CLOCK_MONOTONIC, &pollstart))
> +     if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollstart))
>               return -1;
>  
>       while ((r = poll(fds, 1, timeout)) == -1 && errno == EINTR) {
> -             if (clock_gettime(CLOCK_MONOTONIC, &pollend))
> +             if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &pollend))
>                       return -1;
>               timespecsub(&pollend, &pollstart, &elapsed);
>               timeout -= elapsed.tv_sec * 1000 + elapsed.tv_nsec / 1000000;
> @@ -418,7 +418,7 @@ asr_check_reload(struct asr *asr)
>               asr->a_rtime = 0;
>       }
>  
> -     if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
> +     if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
>               return;
>  
>       if ((ts.tv_sec - asr->a_rtime) < RELOAD_DELAY && asr->a_rtime != 0)
> diff --git lib/libc/crypt/bcrypt.c lib/libc/crypt/bcrypt.c
> index 82de8fa33b7..02fd3013cc1 100644
> --- lib/libc/crypt/bcrypt.c
> +++ lib/libc/crypt/bcrypt.c
> @@ -248,9 +248,9 @@ _bcrypt_autorounds(void)
>       char buf[_PASSWORD_LEN];
>       int duration;
>  
> -     clock_gettime(CLOCK_THREAD_CPUTIME_ID, &before);
> +     WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &before);
>       bcrypt_newhash("testpassword", r, buf, sizeof(buf));
> -     clock_gettime(CLOCK_THREAD_CPUTIME_ID, &after);
> +     WRAP(clock_gettime)(CLOCK_THREAD_CPUTIME_ID, &after);
>  
>       duration = after.tv_sec - before.tv_sec;
>       duration *= 1000000;
> diff --git lib/libc/dlfcn/dlfcn_stubs.c lib/libc/dlfcn/dlfcn_stubs.c
> index 78d728f66cb..7b75ec4582a 100644
> --- lib/libc/dlfcn/dlfcn_stubs.c
> +++ lib/libc/dlfcn/dlfcn_stubs.c
> @@ -80,10 +80,14 @@ dlerror(void)
>       return "Wrong dl symbols!\n";
>  }
>  
> +extern void *elf_aux_timekeep;
> +extern int find_timekeep(void);
> +
>  int
>  dl_iterate_phdr(int (*callback)(struct dl_phdr_info *, size_t, void *),
>       void *data)
>  {
> +     find_timekeep();
>       if (_dl_cb != NULL && _dl_cb->dl_iterate_phdr != NULL)
>               return _dl_cb->dl_iterate_phdr(callback, data);
>  #ifndef PIC
> diff --git lib/libc/dlfcn/init.c lib/libc/dlfcn/init.c
> index 270f54aada5..0238bb50b0b 100644
> --- lib/libc/dlfcn/init.c
> +++ lib/libc/dlfcn/init.c
> @@ -69,6 +69,9 @@ extern Elf_Ehdr __executable_start[] __attribute__((weak));
>  /* provide definitions for these */
>  const dl_cb *_dl_cb __relro = NULL;
>  
> +extern void *elf_aux_timekeep;
> +extern int find_timekeep(void);
> +
>  void _libc_preinit(int, char **, char **, dl_cb_cb *) __dso_hidden;
>  void
>  _libc_preinit(int argc, char **argv, char **envp, dl_cb_cb *cb)
> @@ -126,6 +129,7 @@ _libc_preinit(int argc, char **argv, char **envp, 
> dl_cb_cb *cb)
>       if (cb == NULL)
>               setup_static_tib(phdr, phnum);
>  #endif /* !PIC */
> +     find_timekeep();
>  }
>  
>  /* ARM just had to be different... */
> diff --git lib/libc/gen/times.c lib/libc/gen/times.c
> index 02e4dd44b5c..36841810d1b 100644
> --- lib/libc/gen/times.c
> +++ lib/libc/gen/times.c
> @@ -52,7 +52,7 @@ times(struct tms *tp)
>               return ((clock_t)-1);
>       tp->tms_cutime = CONVTCK(ru.ru_utime);
>       tp->tms_cstime = CONVTCK(ru.ru_stime);
> -     if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1)
> +     if (WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts) == -1)
>               return ((clock_t)-1);
>       return (ts.tv_sec * CLK_TCK + ts.tv_nsec / (1000000000 / CLK_TCK));
>  }
> diff --git lib/libc/gen/timespec_get.c lib/libc/gen/timespec_get.c
> index 520a5954025..845cbe80356 100644
> --- lib/libc/gen/timespec_get.c
> +++ lib/libc/gen/timespec_get.c
> @@ -37,7 +37,7 @@ timespec_get(struct timespec *ts, int base)
>  {
>       switch (base) {
>       case TIME_UTC:
> -             if (clock_gettime(CLOCK_REALTIME, ts) == -1)
> +             if (WRAP(clock_gettime)(CLOCK_REALTIME, ts) == -1)
>                       return 0;
>               break;
>       default:
> diff --git lib/libc/hidden/time.h lib/libc/hidden/time.h
> index 18c49f8fcb9..d8e1e0caf64 100644
> --- lib/libc/hidden/time.h
> +++ lib/libc/hidden/time.h
> @@ -29,7 +29,7 @@ PROTO_NORMAL(asctime_r);
>  PROTO_STD_DEPRECATED(clock);
>  PROTO_DEPRECATED(clock_getcpuclockid);
>  PROTO_NORMAL(clock_getres);
> -PROTO_NORMAL(clock_gettime);
> +PROTO_WRAP(clock_gettime);
>  PROTO_NORMAL(clock_settime);
>  PROTO_STD_DEPRECATED(ctime);
>  PROTO_DEPRECATED(ctime_r);
> diff --git lib/libc/net/res_random.c lib/libc/net/res_random.c
> index 763e420bb88..9babb28470a 100644
> --- lib/libc/net/res_random.c
> +++ lib/libc/net/res_random.c
> @@ -219,7 +219,7 @@ res_initid(void)
>       if (ru_prf != NULL)
>               arc4random_buf(ru_prf, sizeof(*ru_prf));
>  
> -     clock_gettime(CLOCK_MONOTONIC, &ts);
> +     WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
>       ru_reseed = ts.tv_sec + RU_OUT;
>       ru_msb = ru_msb == 0x8000 ? 0 : 0x8000; 
>  }
> @@ -232,7 +232,7 @@ __res_randomid(void)
>       u_int r;
>       static void *randomid_mutex;
>  
> -     clock_gettime(CLOCK_MONOTONIC, &ts);
> +     WRAP(clock_gettime)(CLOCK_MONOTONIC, &ts);
>       pid = getpid();
>  
>       _MUTEX_LOCK(&randomid_mutex);
> diff --git lib/libc/rpc/clnt_tcp.c lib/libc/rpc/clnt_tcp.c
> index 8e6ef515b0e..927b4bf2028 100644
> --- lib/libc/rpc/clnt_tcp.c
> +++ lib/libc/rpc/clnt_tcp.c
> @@ -393,12 +393,12 @@ readtcp(struct ct_data *ct, caddr_t buf, int len)
>       pfd[0].events = POLLIN;
>       TIMEVAL_TO_TIMESPEC(&ct->ct_wait, &wait);
>       delta = wait;
> -     clock_gettime(CLOCK_MONOTONIC, &start);
> +     WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
>       for (;;) {
>               r = ppoll(pfd, 1, &delta, NULL);
>               save_errno = errno;
>  
> -             clock_gettime(CLOCK_MONOTONIC, &after);
> +             WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
>               timespecsub(&start, &after, &duration);
>               timespecsub(&wait, &duration, &delta);
>               if (delta.tv_sec < 0 || !timespecisset(&delta))
> diff --git lib/libc/rpc/clnt_udp.c lib/libc/rpc/clnt_udp.c
> index 68d01674410..92e1d5c350d 100644
> --- lib/libc/rpc/clnt_udp.c
> +++ lib/libc/rpc/clnt_udp.c
> @@ -265,7 +265,7 @@ send_again:
>       reply_msg.acpted_rply.ar_results.where = resultsp;
>       reply_msg.acpted_rply.ar_results.proc = xresults;
>  
> -     clock_gettime(CLOCK_MONOTONIC, &start);
> +     WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
>       for (;;) {
>               switch (ppoll(pfd, 1, &wait, NULL)) {
>               case 0:
> @@ -283,7 +283,7 @@ send_again:
>                       /* FALLTHROUGH */
>               case -1:
>                       if (errno == EINTR) {
> -                             clock_gettime(CLOCK_MONOTONIC, &after);
> +                             WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
>                               timespecsub(&after, &start, &duration);
>                               timespecadd(&time_waited, &duration, 
> &time_waited);
>                               if (timespeccmp(&time_waited, &timeout, <))
> diff --git lib/libc/rpc/svc_tcp.c lib/libc/rpc/svc_tcp.c
> index f9d7a70938f..6c99db84359 100644
> --- lib/libc/rpc/svc_tcp.c
> +++ lib/libc/rpc/svc_tcp.c
> @@ -342,7 +342,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
>        * A timeout is fatal for the connection.
>        */
>       delta = wait_per_try;
> -     clock_gettime(CLOCK_MONOTONIC, &start);
> +     WRAP(clock_gettime)(CLOCK_MONOTONIC, &start);
>       pfd[0].fd = sock;
>       pfd[0].events = POLLIN;
>       do {
> @@ -351,7 +351,7 @@ readtcp(SVCXPRT *xprt, caddr_t buf, int len)
>               case -1:
>                       if (errno != EINTR)
>                               goto fatal_err;
> -                     clock_gettime(CLOCK_MONOTONIC, &after);
> +                     WRAP(clock_gettime)(CLOCK_MONOTONIC, &after);
>                       timespecsub(&after, &start, &duration);
>                       timespecsub(&wait_per_try, &duration, &delta);
>                       if (delta.tv_sec < 0 || !timespecisset(&delta))
> diff --git lib/libc/shlib_version lib/libc/shlib_version
> index 06f98b01084..5fb0770494f 100644
> --- lib/libc/shlib_version
> +++ lib/libc/shlib_version
> @@ -1,4 +1,4 @@
>  major=96
> -minor=0
> +minor=1
>  # note: If changes were made to include/thread_private.h or if system calls
>  # were added/changed then librthread/shlib_version must also be updated.
> diff --git lib/libc/sys/Makefile.inc lib/libc/sys/Makefile.inc
> index 34769576ced..d0b5dd1bdcd 100644
> --- lib/libc/sys/Makefile.inc
> +++ lib/libc/sys/Makefile.inc
> @@ -12,7 +12,8 @@ SRCS+=      Ovfork.S brk.S ${CERROR} \
>  
>  # glue to offer userland wrappers for some syscalls
>  SRCS+=       posix_madvise.c pthread_sigmask.c \
> -     w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c
> +     w_fork.c w_sigaction.c w_sigprocmask.c w_sigsuspend.c w_vfork.c \
> +     w_clock_gettime.c
>  
>  # glue for compat with old syscall interfaces.
>  SRCS+=       ftruncate.c lseek.c mquery.c mmap.c ptrace.c semctl.c 
> truncate.c \
> @@ -43,7 +44,7 @@ SRCS+=      ${CANCEL:%=w_%.c} w_pread.c w_preadv.c 
> w_pwrite.c w_pwritev.c
>  ASM= __semctl.o __syscall.o __thrsigdivert.o \
>       access.o acct.o adjfreq.o adjtime.o \
>       bind.o chdir.o chflags.o chflagsat.o chmod.o chown.o chroot.o \
> -     clock_getres.o clock_gettime.o clock_settime.o \
> +     clock_getres.o clock_settime.o \
>       dup.o dup2.o dup3.o \
>       execve.o \
>       faccessat.o fchdir.o fchflags.o fchmod.o fchmodat.o fchown.o \
> @@ -109,7 +110,7 @@ PPSEUDO_NOERR=${PSEUDO_NOERR:.o=.po}
>  SPSEUDO_NOERR=${PSEUDO_NOERR:.o=.so}
>  DPSEUDO_NOERR=${PSEUDO_NOERR:.o=.do}
>  
> -HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o}
> +HIDDEN= ___realpath.o ___getcwd.o fork.o sigaction.o _ptrace.o ${CANCEL:=.o} 
> clock_gettime.o
>  PHIDDEN=${HIDDEN:.o=.po}
>  SHIDDEN=${HIDDEN:.o=.so}
>  DHIDDEN=${HIDDEN:.o=.do}
> diff --git lib/libc/sys/w_clock_gettime.c lib/libc/sys/w_clock_gettime.c
> new file mode 100644
> index 00000000000..061dcd47dce
> --- /dev/null
> +++ lib/libc/sys/w_clock_gettime.c
> @@ -0,0 +1,109 @@
> +/*   $OpenBSD$ */
> +/*
> + * Copyright (c) 2020 Paul Irofti <p...@irofti.net>
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include <stdlib.h>
> +#include <time.h>
> +#include <err.h>
> +
> +#include <sys/time.h>
> +
> +void *elf_aux_timekeep;
> +
> +
> +/*
> + * Needed exec_elf implementation.
> + * To be exposed by the kernel later if needed.
> + */
> +
> +#include <sys/exec_elf.h>
> +
> +typedef struct {
> +     uint32_t        au_id;                          /* 32-bit id */
> +     uint64_t        au_v;                           /* 64-bit value */
> +} AuxInfo;
> +
> +enum AuxID {
> +     AUX_null = 0,
> +     AUX_ignore = 1,
> +     AUX_execfd = 2,
> +     AUX_phdr = 3,                   /* &phdr[0] */
> +     AUX_phent = 4,                  /* sizeof(phdr[0]) */
> +     AUX_phnum = 5,                  /* # phdr entries */
> +     AUX_pagesz = 6,                 /* PAGESIZE */
> +     AUX_base = 7,                   /* ld.so base addr */
> +     AUX_flags = 8,                  /* processor flags */
> +     AUX_entry = 9,                  /* a.out entry */
> +     AUX_sun_uid = 2000,             /* euid */
> +     AUX_sun_ruid = 2001,            /* ruid */
> +     AUX_sun_gid = 2002,             /* egid */
> +     AUX_sun_rgid = 2003,            /* rgid */
> +     AUX_openbsd_timekeep = 2004,    /* userland clock_gettime */
> +};
> +
> +
> +/*
> + * Helper functions.
> + */
> +
> +int
> +find_timekeep(void)
> +{
> +     Elf_Addr *stackp;
> +     AuxInfo *auxv;
> +
> +     stackp = (Elf_Addr *)environ;
> +     while (*stackp++) ;             /* pass environment */
> +
> +     /* look-up timekeep auxv */
> +     for (auxv = (AuxInfo *)stackp; auxv->au_id != AUX_null; auxv++)
> +             if (auxv->au_id == AUX_openbsd_timekeep) {
> +                     elf_aux_timekeep = (void *)auxv->au_v;
> +                     return 0;
> +             }
> +
> +     warnx("%s", "Could not find auxv!");
> +     return -1;
> +}
> +
> +int
> +WRAP(clock_gettime)(clockid_t clock_id, struct timespec *tp)
> +{
> +     struct timekeep *timekeep;
> +
> +     if (elf_aux_timekeep == NULL && find_timekeep())
> +             return clock_gettime(clock_id, tp);
> +     timekeep = elf_aux_timekeep;
> +
> +     switch (clock_id) {
> +     case CLOCK_REALTIME:
> +             *tp = timekeep->tp_realtime;
> +             break;
> +     case CLOCK_UPTIME:
> +             *tp = timekeep->tp_uptime;
> +             break;
> +     case CLOCK_MONOTONIC:
> +             *tp = timekeep->tp_monotonic;
> +             break;
> +     case CLOCK_BOOTTIME:
> +             *tp = timekeep->tp_boottime;
> +             break;
> +     default:
> +             return clock_gettime(clock_id, tp);
> +     }
> +     return 0;
> +}
> +DEF_WRAP(clock_gettime);
> diff --git lib/libc/thread/synch.h lib/libc/thread/synch.h
> index 788890add89..df2239438d2 100644
> --- lib/libc/thread/synch.h
> +++ lib/libc/thread/synch.h
> @@ -33,7 +33,7 @@ _twait(volatile uint32_t *p, int val, clockid_t clockid, 
> const struct timespec *
>       if (abs == NULL)
>               return futex(p, FUTEX_WAIT_PRIVATE, val, NULL, NULL);
>  
> -     if (abs->tv_nsec >= 1000000000 || clock_gettime(clockid, &rel))
> +     if (abs->tv_nsec >= 1000000000 || WRAP(clock_gettime)(clockid, &rel))
>               return (EINVAL);
>  
>       rel.tv_sec = abs->tv_sec - rel.tv_sec;
> diff --git sbin/init/init.c sbin/init/init.c
> index 72d929706d3..c595d33bfac 100644
> --- sbin/init/init.c
> +++ sbin/init/init.c
> @@ -38,6 +38,7 @@
>  #include <sys/sysctl.h>
>  #include <sys/time.h>
>  #include <sys/tree.h>
> +#include <sys/syscall.h>
>  #include <sys/wait.h>
>  #include <machine/cpu.h>
>  
> @@ -1039,7 +1040,7 @@ start_getty(session_t *sp)
>       }
>  
>       if (timespecisset(&sp->se_started)) {
> -             clock_gettime(CLOCK_MONOTONIC, &current_time);
> +             syscall(SYS_clock_gettime, CLOCK_MONOTONIC, &current_time);
>               timespecsub(&current_time, &sp->se_started, &elapsed);
>               if (elapsed.tv_sec < GETTY_SPACING) {
>                       warning(
> @@ -1103,7 +1104,7 @@ collect_child(pid_t pid)
>       }
>  
>       sp->se_process = pid;
> -     clock_gettime(CLOCK_MONOTONIC, &sp->se_started);
> +     syscall(SYS_clock_gettime, CLOCK_MONOTONIC, &sp->se_started);
>       add_session(sp);
>  }
>  
> @@ -1170,7 +1171,7 @@ f_multi_user(void)
>                       break;
>               }
>               sp->se_process = pid;
> -             clock_gettime(CLOCK_MONOTONIC, &sp->se_started);
> +             syscall(SYS_clock_gettime, CLOCK_MONOTONIC, &sp->se_started);
>               add_session(sp);
>       }
>  
> diff --git sys/kern/exec_elf.c sys/kern/exec_elf.c
> index 9b5b8eb3acf..59bc923a6fb 100644
> --- sys/kern/exec_elf.c
> +++ sys/kern/exec_elf.c
> @@ -124,7 +124,7 @@ extern char *syscallnames[];
>  /*
>   * How many entries are in the AuxInfo array we pass to the process?
>   */
> -#define ELF_AUX_ENTRIES      8
> +#define ELF_AUX_ENTRIES      9
>  
>  /*
>   * This is the OpenBSD ELF emul
> @@ -860,6 +860,10 @@ exec_elf_fixup(struct proc *p, struct exec_package *epp)
>               a->au_v = ap->arg_entry;
>               a++;
>  
> +             a->au_id = AUX_openbsd_timekeep;
> +             a->au_v = p->p_p->ps_timekeep;
> +             a++;
> +
>               a->au_id = AUX_null;
>               a->au_v = 0;
>               a++;
> diff --git sys/kern/kern_exec.c sys/kern/kern_exec.c
> index 20480c2fc28..ee34c86d05b 100644
> --- sys/kern/kern_exec.c
> +++ sys/kern/kern_exec.c
> @@ -64,6 +64,11 @@
>  #include <uvm/uvm_extern.h>
>  #include <machine/tcb.h>
>  
> +#include <sys/time.h>
> +
> +struct uvm_object *timekeep_object;
> +struct timekeep* timekeep;
> +
>  void unveil_destroy(struct process *ps);
>  
>  const struct kmem_va_mode kv_exec = {
> @@ -76,6 +81,11 @@ const struct kmem_va_mode kv_exec = {
>   */
>  int exec_sigcode_map(struct process *, struct emul *);
>  
> +/*
> + * Map the shared timekeep page.
> + */
> +int exec_timekeep_map(struct process *);
> +
>  /*
>   * If non-zero, stackgap_random specifies the upper limit of the random gap 
> size
>   * added to the fixed stack position. Must be n^2.
> @@ -684,6 +694,9 @@ sys_execve(struct proc *p, void *v, register_t *retval)
>       /* map the process's signal trampoline code */
>       if (exec_sigcode_map(pr, pack.ep_emul))
>               goto free_pack_abort;
> +     /* map the process's timekeep page */
> +     if (exec_timekeep_map(pr))
> +             goto free_pack_abort;
>  
>  #ifdef __HAVE_EXEC_MD_MAP
>       /* perform md specific mappings that process might need */
> @@ -863,3 +876,38 @@ exec_sigcode_map(struct process *pr, struct emul *e)
>  
>       return (0);
>  }
> +
> +int exec_timekeep_map(struct process *pr)
> +{
> +     size_t timekeep_sz = sizeof(struct timekeep);
> +
> +     /*
> +      * Similar to the sigcode object, except that there is a single timekeep
> +      * object, and not one per emulation.
> +      */
> +     if (timekeep_object == NULL) {
> +             vaddr_t va;
> +
> +             timekeep_object = uao_create(timekeep_sz, 0);
> +             uao_reference(timekeep_object);
> +
> +             if (uvm_map(kernel_map, &va, round_page(timekeep_sz), 
> timekeep_object,
> +                 0, 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | 
> PROT_WRITE,
> +                 MAP_INHERIT_SHARE, MADV_RANDOM, 0))) {
> +                     uao_detach(timekeep_object);
> +                     return (ENOMEM);
> +             }
> +
> +             timekeep = (struct timekeep *)va;
> +     }
> +
> +     uao_reference(timekeep_object);
> +     if (uvm_map(&pr->ps_vmspace->vm_map, &pr->ps_timekeep, 
> round_page(timekeep_sz),
> +         timekeep_object, 0, 0, UVM_MAPFLAG(PROT_READ, PROT_READ,
> +         MAP_INHERIT_COPY, MADV_RANDOM, 0))) {
> +             uao_detach(timekeep_object);
> +             return (ENOMEM);
> +     }
> +
> +     return (0);
> +}
> diff --git sys/kern/kern_tc.c sys/kern/kern_tc.c
> index bcf8f689625..5f3ba524042 100644
> --- sys/kern/kern_tc.c
> +++ sys/kern/kern_tc.c
> @@ -35,6 +35,7 @@
>  #include <sys/queue.h>
>  #include <sys/malloc.h>
>  #include <dev/rndvar.h>
> +#include <sys/time.h>
>  
>  /*
>   * A large step happens on boot.  This constant detects such steps.
> @@ -209,6 +210,31 @@ microuptime(struct timeval *tvp)
>       BINTIME_TO_TIMEVAL(&bt, tvp);
>  }
>  
> +void
> +tc_clock_gettime(void)
> +{
> +     struct bintime bt;
> +
> +     if (timekeep == NULL)
> +             return;
> +
> +     /* CLOCK_REALTIME */
> +     nanotime(&timekeep->tp_realtime);
> +
> +     /* CLOCK_UPTIME */
> +     binuptime(&bt);
> +     bintimesub(&bt, &naptime, &bt);
> +     BINTIME_TO_TIMESPEC(&bt, &timekeep->tp_uptime);
> +
> +     /* CLOCK_MONOTONIC */
> +     nanouptime(&timekeep->tp_monotonic);
> +
> +     /* CLOCK_BOOTTIME */
> +     timekeep->tp_boottime = timekeep->tp_monotonic;
> +
> +     return;
> +}
> +
>  void
>  bintime(struct bintime *bt)
>  {
> @@ -613,6 +639,8 @@ tc_windup(struct bintime *new_boottime, struct bintime 
> *new_offset,
>       time_uptime = th->th_offset.sec;
>       membar_producer();
>       timehands = th;
> +
> +     tc_clock_gettime();
>  }
>  
>  /* Report or change the active timecounter hardware. */
> diff --git sys/sys/exec_elf.h sys/sys/exec_elf.h
> index a40e0510273..f55b75f1e84 100644
> --- sys/sys/exec_elf.h
> +++ sys/sys/exec_elf.h
> @@ -691,7 +691,8 @@ enum AuxID {
>       AUX_sun_uid = 2000,             /* euid */
>       AUX_sun_ruid = 2001,            /* ruid */
>       AUX_sun_gid = 2002,             /* egid */
> -     AUX_sun_rgid = 2003             /* rgid */
> +     AUX_sun_rgid = 2003,            /* rgid */
> +     AUX_openbsd_timekeep = 2004,    /* userland clock_gettime */
>  };
>  
>  struct elf_args {
> diff --git sys/sys/proc.h sys/sys/proc.h
> index 357c0c0d52c..93a79a220db 100644
> --- sys/sys/proc.h
> +++ sys/sys/proc.h
> @@ -248,6 +248,8 @@ struct process {
>       u_int   ps_rtableid;            /* Process routing table/domain. */
>       char    ps_nice;                /* Process "nice" value. */
>  
> +     vaddr_t ps_timekeep;            /* User pointer to timekeep */
> +
>       struct uprof {                  /* profile arguments */
>               caddr_t pr_base;        /* buffer base */
>               size_t  pr_size;        /* buffer size */
> diff --git sys/sys/time.h sys/sys/time.h
> index 564bae30b48..aab80121743 100644
> --- sys/sys/time.h
> +++ sys/sys/time.h
> @@ -163,6 +163,13 @@ struct clockinfo {
>  };
>  #endif /* __BSD_VISIBLE */
>  
> +struct timekeep {
> +     struct timespec tp_realtime;
> +     struct timespec tp_uptime;
> +     struct timespec tp_monotonic;
> +     struct timespec tp_boottime;
> +};
> +
>  #if defined(_KERNEL) || defined(_STANDALONE)
>  #include <sys/_time.h>
>  
> @@ -393,6 +400,8 @@ TIMESPEC_TO_NSEC(const struct timespec *ts)
>       return ts->tv_sec * 1000000000ULL + ts->tv_nsec;
>  }
>  
> +extern struct uvm_object *timekeep_object;
> +extern struct timekeep *timekeep;
>  #else /* !_KERNEL */
>  #include <time.h>
>  
> 

Reply via email to