On 04/28 15:09:56, Petri Savolainen wrote: > Use 64 bit HW time counter when available. It is used on > x86 when invariant TSC CPU flag indicates that TSC frequency > is constant. Otherwise, the system time is used as before. Direct > HW time counter usage avoids system call, and related latency > and performance issues. > > Signed-off-by: Petri Savolainen <petri.savolai...@linaro.org> > --- > platform/linux-generic/Makefile.am | 1 + > platform/linux-generic/arch/arm/odp_cpu_arch.c | 16 ++ > platform/linux-generic/arch/default/odp_cpu_arch.c | 16 ++ > platform/linux-generic/arch/mips64/odp_cpu_arch.c | 16 ++ > platform/linux-generic/arch/powerpc/odp_cpu_arch.c | 16 ++ > platform/linux-generic/arch/x86/cpu_flags.c | 9 + > platform/linux-generic/arch/x86/odp_cpu_arch.c | 59 ++++ > .../include/odp/api/plat/time_types.h | 23 +- > platform/linux-generic/include/odp_time_internal.h | 24 ++ > platform/linux-generic/odp_time.c | 300 > ++++++++++++++++----- > 10 files changed, 415 insertions(+), 65 deletions(-) > create mode 100644 platform/linux-generic/include/odp_time_internal.h > > diff --git a/platform/linux-generic/Makefile.am > b/platform/linux-generic/Makefile.am > index ab74c14c..cd7afba2 100644 > --- a/platform/linux-generic/Makefile.am > +++ b/platform/linux-generic/Makefile.am > @@ -172,6 +172,7 @@ noinst_HEADERS = \ > ${srcdir}/include/odp_schedule_if.h \ > ${srcdir}/include/odp_sorted_list_internal.h \ > ${srcdir}/include/odp_shm_internal.h \ > + ${srcdir}/include/odp_time_internal.h \ > ${srcdir}/include/odp_timer_internal.h \ > ${srcdir}/include/odp_timer_wheel_internal.h \ > ${srcdir}/include/odp_traffic_mngr_internal.h \ > diff --git a/platform/linux-generic/arch/arm/odp_cpu_arch.c > b/platform/linux-generic/arch/arm/odp_cpu_arch.c > index 2ac223e0..c31f9084 100644 > --- a/platform/linux-generic/arch/arm/odp_cpu_arch.c > +++ b/platform/linux-generic/arch/arm/odp_cpu_arch.c > @@ -13,6 +13,7 @@ > #include <odp/api/hints.h> > #include <odp/api/system_info.h> > #include <odp_debug_internal.h> > +#include <odp_time_internal.h> > > #define GIGA 1000000000 > > @@ -46,3 +47,18 @@ uint64_t odp_cpu_cycles_resolution(void) > { > return 1; > } > + > +int cpu_has_global_time(void) > +{ > + return 0; > +} > + > +uint64_t cpu_global_time(void) > +{ > + return 0; > +} > + > +uint64_t cpu_global_time_freq(void) > +{ > + return 0; > +} > diff --git a/platform/linux-generic/arch/default/odp_cpu_arch.c > b/platform/linux-generic/arch/default/odp_cpu_arch.c > index 2ac223e0..c31f9084 100644 > --- a/platform/linux-generic/arch/default/odp_cpu_arch.c > +++ b/platform/linux-generic/arch/default/odp_cpu_arch.c > @@ -13,6 +13,7 @@ > #include <odp/api/hints.h> > #include <odp/api/system_info.h> > #include <odp_debug_internal.h> > +#include <odp_time_internal.h> > > #define GIGA 1000000000 > > @@ -46,3 +47,18 @@ uint64_t odp_cpu_cycles_resolution(void) > { > return 1; > } > + > +int cpu_has_global_time(void) > +{ > + return 0; > +} > + > +uint64_t cpu_global_time(void) > +{ > + return 0; > +} > + > +uint64_t cpu_global_time_freq(void) > +{ > + return 0; > +} > diff --git a/platform/linux-generic/arch/mips64/odp_cpu_arch.c > b/platform/linux-generic/arch/mips64/odp_cpu_arch.c > index 646acf9c..f7eafa0f 100644 > --- a/platform/linux-generic/arch/mips64/odp_cpu_arch.c > +++ b/platform/linux-generic/arch/mips64/odp_cpu_arch.c > @@ -7,6 +7,7 @@ > #include <odp/api/cpu.h> > #include <odp/api/hints.h> > #include <odp/api/system_info.h> > +#include <odp_time_internal.h> > > uint64_t odp_cpu_cycles(void) > { > @@ -29,3 +30,18 @@ uint64_t odp_cpu_cycles_resolution(void) > { > return 1; > } > + > +int cpu_has_global_time(void) > +{ > + return 0; > +} > + > +uint64_t cpu_global_time(void) > +{ > + return 0; > +} > + > +uint64_t cpu_global_time_freq(void) > +{ > + return 0; > +} > diff --git a/platform/linux-generic/arch/powerpc/odp_cpu_arch.c > b/platform/linux-generic/arch/powerpc/odp_cpu_arch.c > index 2ac223e0..c31f9084 100644 > --- a/platform/linux-generic/arch/powerpc/odp_cpu_arch.c > +++ b/platform/linux-generic/arch/powerpc/odp_cpu_arch.c > @@ -13,6 +13,7 @@ > #include <odp/api/hints.h> > #include <odp/api/system_info.h> > #include <odp_debug_internal.h> > +#include <odp_time_internal.h> > > #define GIGA 1000000000 > > @@ -46,3 +47,18 @@ uint64_t odp_cpu_cycles_resolution(void) > { > return 1; > } > + > +int cpu_has_global_time(void) > +{ > + return 0; > +} > + > +uint64_t cpu_global_time(void) > +{ > + return 0; > +} > + > +uint64_t cpu_global_time_freq(void) > +{ > + return 0; > +} > diff --git a/platform/linux-generic/arch/x86/cpu_flags.c > b/platform/linux-generic/arch/x86/cpu_flags.c > index 954dac27..a492a35b 100644 > --- a/platform/linux-generic/arch/x86/cpu_flags.c > +++ b/platform/linux-generic/arch/x86/cpu_flags.c > @@ -39,6 +39,7 @@ > > #include <arch/x86/cpu_flags.h> > #include <odp_debug_internal.h> > +#include <odp_time_internal.h> > #include <stdio.h> > #include <stdint.h> > > @@ -357,3 +358,11 @@ void cpu_flags_print_all(void) > str[len] = '\0'; > ODP_PRINT("%s", str); > } > + > +int cpu_has_global_time(void) > +{ > + if (cpu_get_flag_enabled(RTE_CPUFLAG_INVTSC) > 0) > + return 1; > + > + return 0; > +} > diff --git a/platform/linux-generic/arch/x86/odp_cpu_arch.c > b/platform/linux-generic/arch/x86/odp_cpu_arch.c > index c8cf27b6..9ba601a3 100644 > --- a/platform/linux-generic/arch/x86/odp_cpu_arch.c > +++ b/platform/linux-generic/arch/x86/odp_cpu_arch.c > @@ -3,7 +3,14 @@ > * > * SPDX-License-Identifier: BSD-3-Clause > */ > + > +#include <odp_posix_extensions.h> > + > #include <odp/api/cpu.h> > +#include <odp_time_internal.h> > +#include <odp_debug_internal.h> > + > +#include <time.h> > > uint64_t odp_cpu_cycles(void) > { > @@ -31,3 +38,55 @@ uint64_t odp_cpu_cycles_resolution(void) > { > return 1; > } > + > +uint64_t cpu_global_time(void) > +{ > + return odp_cpu_cycles(); > +} > + > +#define SEC_IN_NS 1000000000ULL > + > +/* Measure TSC frequency. Frequency information registers are defined for > x86, > + * but those are often not enumerated. */ > +uint64_t cpu_global_time_freq(void) > +{ > + struct timespec sleep, ts1, ts2; > + uint64_t t1, t2, ts_nsec, cycles, hz; > + int i; > + uint64_t avg = 0; > + int rounds = 4; > + > + for (i = 0; i < rounds; i++) { > + sleep.tv_sec = 0; > + sleep.tv_nsec = SEC_IN_NS / 10; > + > + if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts1)) { > + ODP_DBG("clock_gettime failed\n"); > + return 0; > + } > + > + t1 = cpu_global_time(); > + > + if (nanosleep(&sleep, NULL) < 0) { > + ODP_DBG("nanosleep failed\n"); > + return 0; > + } > + > + if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts2)) { > + ODP_DBG("clock_gettime failed\n"); > + return 0; > + } > + > + t2 = cpu_global_time(); > + > + ts_nsec = (ts2.tv_sec - ts1.tv_sec) * SEC_IN_NS; > + ts_nsec += ts2.tv_nsec - ts1.tv_nsec; > + > + cycles = t2 - t1; > + > + hz = (cycles * SEC_IN_NS) / ts_nsec; > + avg += hz; > + } > + > + return avg / rounds; > +} > diff --git a/platform/linux-generic/include/odp/api/plat/time_types.h > b/platform/linux-generic/include/odp/api/plat/time_types.h > index 4847f3b1..1cafb1f7 100644 > --- a/platform/linux-generic/include/odp/api/plat/time_types.h > +++ b/platform/linux-generic/include/odp/api/plat/time_types.h > @@ -26,11 +26,28 @@ extern "C" { > * the linux timespec structure, which is dependent on POSIX extension level. > */ > typedef struct odp_time_t { > - int64_t tv_sec; /**< @internal Seconds */ > - int64_t tv_nsec; /**< @internal Nanoseconds */ > + union { > + /** @internal Posix timespec */ > + struct { > + /** @internal Seconds */ > + int64_t tv_sec; > + > + /** @internal Nanoseconds */ > + int64_t tv_nsec; > + } spec; > + > + /** @internal HW time counter */ > + struct { > + /** @internal Counter value */ > + uint64_t count; > + > + /** @internal Reserved */ > + uint64_t reserved; > + } hw; > + }; > } odp_time_t; > > -#define ODP_TIME_NULL ((odp_time_t){0, 0}) > +#define ODP_TIME_NULL ((odp_time_t){.spec = {0, 0} }) > > /** > * @} > diff --git a/platform/linux-generic/include/odp_time_internal.h > b/platform/linux-generic/include/odp_time_internal.h > new file mode 100644 > index 00000000..99ac7977 > --- /dev/null > +++ b/platform/linux-generic/include/odp_time_internal.h > @@ -0,0 +1,24 @@ > +/* Copyright (c) 2017, Linaro Limited > + * All rights reserved. > + * > + * SPDX-License-Identifier: BSD-3-Clause > + */ > + > +#ifndef ODP_TIME_INTERNAL_H_ > +#define ODP_TIME_INTERNAL_H_ > + > +#ifdef __cplusplus > +extern "C" { > +#endif > + > +#include <stdint.h> > + > +int cpu_has_global_time(void); > +uint64_t cpu_global_time(void); > +uint64_t cpu_global_time_freq(void); > + > +#ifdef __cplusplus > +} > +#endif > + > +#endif > diff --git a/platform/linux-generic/odp_time.c > b/platform/linux-generic/odp_time.c > index 0e5966c0..ac82175d 100644 > --- a/platform/linux-generic/odp_time.c > +++ b/platform/linux-generic/odp_time.c > @@ -10,36 +10,39 @@ > #include <odp/api/time.h> > #include <odp/api/hints.h> > #include <odp_debug_internal.h> > +#include <odp_time_internal.h> > +#include <string.h> > +#include <inttypes.h> > > -static odp_time_t start_time; > +typedef struct time_global_t { > + odp_time_t start_time; > + int use_hw; > + uint64_t hw_start; > + uint64_t hw_freq_hz; > +} time_global_t; > > -static inline > -uint64_t time_to_ns(odp_time_t time) > -{ > - uint64_t ns; > - > - ns = time.tv_sec * ODP_TIME_SEC_IN_NS; > - ns += time.tv_nsec; > +static time_global_t global; > > - return ns; > -} > +/* > + * Posix timespec based functions > + */ > > -static inline odp_time_t time_diff(odp_time_t t2, odp_time_t t1) > +static inline odp_time_t time_spec_diff(odp_time_t t2, odp_time_t t1) > { > odp_time_t time; > > - time.tv_sec = t2.tv_sec - t1.tv_sec; > - time.tv_nsec = t2.tv_nsec - t1.tv_nsec; > + time.spec.tv_sec = t2.spec.tv_sec - t1.spec.tv_sec; > + time.spec.tv_nsec = t2.spec.tv_nsec - t1.spec.tv_nsec; > > - if (time.tv_nsec < 0) { > - time.tv_nsec += ODP_TIME_SEC_IN_NS; > - --time.tv_sec; > + if (time.spec.tv_nsec < 0) { > + time.spec.tv_nsec += ODP_TIME_SEC_IN_NS; > + --time.spec.tv_sec; > } > > return time; > } > > -static inline odp_time_t time_local(void) > +static inline odp_time_t time_spec_cur(void) > { > int ret; > odp_time_t time; > @@ -49,77 +52,234 @@ static inline odp_time_t time_local(void) > if (odp_unlikely(ret != 0)) > ODP_ABORT("clock_gettime failed\n"); > > - time.tv_sec = sys_time.tv_sec; > - time.tv_nsec = sys_time.tv_nsec; > + time.spec.tv_sec = sys_time.tv_sec; > + time.spec.tv_nsec = sys_time.tv_nsec; > > - return time_diff(time, start_time); > + return time_spec_diff(time, global.start_time); > } > > -static inline int time_cmp(odp_time_t t2, odp_time_t t1) > +static inline uint64_t time_spec_res(void) > { > - if (t2.tv_sec < t1.tv_sec) > + int ret; > + struct timespec tres; > + > + ret = clock_getres(CLOCK_MONOTONIC_RAW, &tres); > + if (odp_unlikely(ret != 0)) > + ODP_ABORT("clock_getres failed\n"); > + > + return ODP_TIME_SEC_IN_NS / (uint64_t)tres.tv_nsec; > +} > + > +static inline int time_spec_cmp(odp_time_t t2, odp_time_t t1) > +{ > + if (t2.spec.tv_sec < t1.spec.tv_sec) > return -1; > > - if (t2.tv_sec > t1.tv_sec) > + if (t2.spec.tv_sec > t1.spec.tv_sec) > return 1; > > - return t2.tv_nsec - t1.tv_nsec; > + return t2.spec.tv_nsec - t1.spec.tv_nsec; > } > > -static inline odp_time_t time_sum(odp_time_t t1, odp_time_t t2) > +static inline odp_time_t time_spec_sum(odp_time_t t1, odp_time_t t2) > { > odp_time_t time; > > - time.tv_sec = t2.tv_sec + t1.tv_sec; > - time.tv_nsec = t2.tv_nsec + t1.tv_nsec; > + time.spec.tv_sec = t2.spec.tv_sec + t1.spec.tv_sec; > + time.spec.tv_nsec = t2.spec.tv_nsec + t1.spec.tv_nsec; > > - if (time.tv_nsec >= (long)ODP_TIME_SEC_IN_NS) { > - time.tv_nsec -= ODP_TIME_SEC_IN_NS; > - ++time.tv_sec; > + if (time.spec.tv_nsec >= (long)ODP_TIME_SEC_IN_NS) { > + time.spec.tv_nsec -= ODP_TIME_SEC_IN_NS; > + ++time.spec.tv_sec; > } > > return time; > } > > -static inline odp_time_t time_local_from_ns(uint64_t ns) > +static inline uint64_t time_spec_to_ns(odp_time_t time) > +{ > + uint64_t ns; > + > + ns = time.spec.tv_sec * ODP_TIME_SEC_IN_NS; > + ns += time.spec.tv_nsec; > + > + return ns; > +} > + > +static inline odp_time_t time_spec_from_ns(uint64_t ns) > { > odp_time_t time; > > - time.tv_sec = ns / ODP_TIME_SEC_IN_NS; > - time.tv_nsec = ns - time.tv_sec * ODP_TIME_SEC_IN_NS; > + time.spec.tv_sec = ns / ODP_TIME_SEC_IN_NS; > + time.spec.tv_nsec = ns - time.spec.tv_sec * ODP_TIME_SEC_IN_NS; > > return time; > } > > -static inline void time_wait_until(odp_time_t time) > +/* > + * HW time counter based functions > + */ > + > +static inline odp_time_t time_hw_cur(void) > { > - odp_time_t cur; > + odp_time_t time; > > - do { > - cur = time_local(); > - } while (time_cmp(time, cur) > 0); > + time.hw.count = cpu_global_time() - global.hw_start;
Computing the offset is unnecessarily expensive. The simplest and lowest overhead solution is to just store the value read from HW and convert at a later point in time. But, this no longer represents what odp_time_t represents. That is why I introduced odp_tick_t in the timer RFC and design doc posted to the list *several* times. > + > + return time; > } > > -static inline uint64_t time_local_res(void) > +static inline uint64_t time_hw_res(void) > { > - int ret; > - struct timespec tres; > + /* Promise a bit lower resolution than average cycle counter > + * frequency */ > + return global.hw_freq_hz / 10; > +} > > - ret = clock_getres(CLOCK_MONOTONIC_RAW, &tres); > - if (odp_unlikely(ret != 0)) > - ODP_ABORT("clock_getres failed\n"); > +static inline int time_hw_cmp(odp_time_t t2, odp_time_t t1) > +{ > + if (odp_likely(t2.hw.count > t1.hw.count)) > + return 1; > > - return ODP_TIME_SEC_IN_NS / (uint64_t)tres.tv_nsec; > + if (t2.hw.count < t1.hw.count) > + return -1; > + > + return 0; > +} > + > +static inline odp_time_t time_hw_diff(odp_time_t t2, odp_time_t t1) > +{ > + odp_time_t time; > + > + time.hw.count = t2.hw.count - t1.hw.count; > + > + return time; > +} > + > +static inline odp_time_t time_hw_sum(odp_time_t t1, odp_time_t t2) > +{ > + odp_time_t time; > + > + time.hw.count = t1.hw.count + t2.hw.count; > + > + return time; > +} > + > +static inline uint64_t time_hw_to_ns(odp_time_t time) > +{ > + uint64_t nsec; > + uint64_t freq_hz = global.hw_freq_hz; > + uint64_t count = time.hw.count; > + uint64_t sec = 0; > + > + if (count >= freq_hz) { > + sec = count / freq_hz; > + count = count - sec * freq_hz; > + } > + > + nsec = (ODP_TIME_SEC_IN_NS * count) / freq_hz; > + > + return (sec * ODP_TIME_SEC_IN_NS) + nsec; > +} > + > +static inline odp_time_t time_hw_from_ns(uint64_t ns) > +{ > + odp_time_t time; > + uint64_t count; > + uint64_t freq_hz = global.hw_freq_hz; > + uint64_t sec = 0; > + > + if (ns >= ODP_TIME_SEC_IN_NS) { > + sec = ns / ODP_TIME_SEC_IN_NS; > + ns = ns - sec * ODP_TIME_SEC_IN_NS; > + } > + > + count = sec * freq_hz; > + count += (ns * freq_hz) / ODP_TIME_SEC_IN_NS; > + > + time.hw.reserved = 0; > + time.hw.count = count; > + > + return time; > +} > + > +/* > + * Common functions > + */ > + > +static inline odp_time_t time_cur(void) > +{ > + if (global.use_hw) > + return time_hw_cur(); > + > + return time_spec_cur(); > +} > + > +static inline uint64_t time_res(void) > +{ > + if (global.use_hw) > + return time_hw_res(); > + > + return time_spec_res(); > +} > + > +static inline int time_cmp(odp_time_t t2, odp_time_t t1) > +{ > + if (global.use_hw) > + return time_hw_cmp(t2, t1); > + > + return time_spec_cmp(t2, t1); > +} > + > +static inline odp_time_t time_diff(odp_time_t t2, odp_time_t t1) > +{ > + if (global.use_hw) > + return time_hw_diff(t2, t1); > + > + return time_spec_diff(t2, t1); > +} > + > +static inline odp_time_t time_sum(odp_time_t t1, odp_time_t t2) > +{ > + if (global.use_hw) > + return time_hw_sum(t1, t2); > + > + return time_spec_sum(t1, t2); > +} > + > +static inline uint64_t time_to_ns(odp_time_t time) > +{ > + if (global.use_hw) > + return time_hw_to_ns(time); > + > + return time_spec_to_ns(time); > +} > + > +static inline odp_time_t time_from_ns(uint64_t ns) > +{ > + if (global.use_hw) > + return time_hw_from_ns(ns); > + > + return time_spec_from_ns(ns); > +} > + > +static inline void time_wait_until(odp_time_t time) > +{ > + odp_time_t cur; > + > + do { > + cur = time_cur(); > + } while (time_cmp(time, cur) > 0); > } > > odp_time_t odp_time_local(void) > { > - return time_local(); > + return time_cur(); > } > > odp_time_t odp_time_global(void) > { > - return time_local(); > + return time_cur(); > } > > odp_time_t odp_time_diff(odp_time_t t2, odp_time_t t1) > @@ -134,12 +294,12 @@ uint64_t odp_time_to_ns(odp_time_t time) > > odp_time_t odp_time_local_from_ns(uint64_t ns) > { > - return time_local_from_ns(ns); > + return time_from_ns(ns); > } > > odp_time_t odp_time_global_from_ns(uint64_t ns) > { > - return time_local_from_ns(ns); > + return time_from_ns(ns); > } > > int odp_time_cmp(odp_time_t t2, odp_time_t t1) > @@ -154,18 +314,18 @@ odp_time_t odp_time_sum(odp_time_t t1, odp_time_t t2) > > uint64_t odp_time_local_res(void) > { > - return time_local_res(); > + return time_res(); > } > > uint64_t odp_time_global_res(void) > { > - return time_local_res(); > + return time_res(); > } > > void odp_time_wait_ns(uint64_t ns) > { > - odp_time_t cur = time_local(); > - odp_time_t wait = time_local_from_ns(ns); > + odp_time_t cur = time_cur(); > + odp_time_t wait = time_from_ns(ns); > odp_time_t end_time = time_sum(cur, wait); > > time_wait_until(end_time); > @@ -178,15 +338,31 @@ void odp_time_wait_until(odp_time_t time) > > int odp_time_init_global(void) > { > - int ret; > - struct timespec time; > - > - ret = clock_gettime(CLOCK_MONOTONIC_RAW, &time); > - if (ret) { > - start_time = ODP_TIME_NULL; > - } else { > - start_time.tv_sec = time.tv_sec; > - start_time.tv_nsec = time.tv_nsec; > + struct timespec sys_time; > + int ret = 0; > + > + memset(&global, 0, sizeof(time_global_t)); > + > + if (cpu_has_global_time()) { > + global.use_hw = 1; > + global.hw_freq_hz = cpu_global_time_freq(); > + > + if (global.hw_freq_hz == 0) > + return -1; > + > + printf("HW time counter freq: %" PRIu64 " hz\n\n", > + global.hw_freq_hz); > + > + global.hw_start = cpu_global_time(); > + return 0; > + } > + > + global.start_time = ODP_TIME_NULL; > + > + ret = clock_gettime(CLOCK_MONOTONIC_RAW, &sys_time); > + if (ret == 0) { > + global.start_time.spec.tv_sec = sys_time.tv_sec; > + global.start_time.spec.tv_nsec = sys_time.tv_nsec; > } > > return ret; > -- > 2.11.0 >