This patch adds early clock feature to x86 platforms. tsc_early_init(): Determines offset, shift and multiplier for the early clock based on the TSC frequency.
tsc_early_fini() Implement the finish part of early tsc feature, prints message about the offset, which can be useful to find out how much time was spent in post and boot manager (if TSC starts from 0 during boot) sched_clock_early(): TSC based implementation of early clock and is called from sched_clock(). Call tsc_early_init() to initialize early boot time stamps functionality on the supported x86 platforms, and call tsc_early_fini() to finish this feature after permanent clock has been initialized. The supported x86 systems are those where TSC frequency is determined early in boot. Signed-off-by: Pavel Tatashin <pasha.tatas...@oracle.com> --- arch/x86/include/asm/paravirt.h | 2 +- arch/x86/kernel/tsc.c | 85 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 283efcaac8af..b4ba220163ce 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -171,7 +171,7 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) static inline unsigned long long paravirt_sched_clock(void) { - return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock); + return PVOP_CALL0(unsigned long long, pv_time_ops.active_sched_clock); } struct static_key; diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index dbce6fa32aa9..24da1ff96481 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -181,6 +181,80 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_ local_irq_restore(flags); } +#ifdef CONFIG_X86_TSC +static struct cyc2ns_data cyc2ns_early; + +static u64 sched_clock_early(void) +{ + u64 ns = mul_u64_u32_shr(rdtsc(), cyc2ns_early.cyc2ns_mul, + cyc2ns_early.cyc2ns_shift); + return ns + cyc2ns_early.cyc2ns_offset; +} + +#ifdef CONFIG_PARAVIRT +static inline void __init tsc_early_enable(void) +{ + pv_time_ops.active_sched_clock = sched_clock_early; +} + +static inline void __init tsc_early_disable(void) +{ + pv_time_ops.active_sched_clock = pv_time_ops.sched_clock; +} +#else /* CONFIG_PARAVIRT */ +/* + * For native clock we use two switches static and dynamic, the static switch is + * initially true, so we check the dynamic switch, which is initially false. + * Later when early clock is disabled, we can alter the static switch in order + * to avoid branch check on every sched_clock() call. + */ +static bool __tsc_early; +static DEFINE_STATIC_KEY_TRUE(__tsc_early_static); + +static inline void __init tsc_early_enable(void) +{ + __tsc_early = true; +} + +static inline void __init tsc_early_disable(void) +{ + __tsc_early = false; + static_branch_disable(&__tsc_early_static); +} +#endif /* CONFIG_PARAVIRT */ + +/* + * Initialize clock for early time stamps + */ +static void __init tsc_early_init(unsigned int khz) +{ + clocks_calc_mult_shift(&cyc2ns_early.cyc2ns_mul, + &cyc2ns_early.cyc2ns_shift, + khz, NSEC_PER_MSEC, 0); + cyc2ns_early.cyc2ns_offset = -sched_clock_early(); + tsc_early_enable(); +} + +static void __init tsc_early_fini(void) +{ + unsigned long long t; + unsigned long r; + + /* We did not have early sched clock if multiplier is 0 */ + if (cyc2ns_early.cyc2ns_mul == 0) { + tsc_early_disable(); + return; + } + + t = -cyc2ns_early.cyc2ns_offset; + r = do_div(t, NSEC_PER_SEC); + + tsc_early_disable(); + __sched_clock_offset = sched_clock_early() - sched_clock(); + pr_info("sched clock early is finished, offset [%lld.%09lds]\n", t, r); +} +#endif /* CONFIG_X86_TSC */ + /* * Scheduler clock - returns current time in nanosec units. */ @@ -193,6 +267,13 @@ u64 native_sched_clock(void) return cycles_2_ns(tsc_now); } +#if !defined(CONFIG_PARAVIRT) && defined(CONFIG_X86_TSC) + if (static_branch_unlikely(&__tsc_early_static)) { + if (__tsc_early) + return sched_clock_early(); + } +#endif /* !CONFIG_PARAVIRT && CONFIG_X86_TSC */ + /* * Fall back to jiffies if there's no TSC available: * ( But note that we still use it if the TSC is marked @@ -1274,6 +1355,7 @@ void __init tsc_early_delay_calibrate(void) lpj = tsc_khz * 1000; do_div(lpj, HZ); loops_per_jiffy = lpj; + tsc_early_init(tsc_khz); } void __init tsc_init(void) @@ -1283,6 +1365,7 @@ void __init tsc_init(void) if (!boot_cpu_has(X86_FEATURE_TSC)) { setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); + tsc_early_fini(); return; } @@ -1302,6 +1385,7 @@ void __init tsc_init(void) if (!tsc_khz) { mark_tsc_unstable("could not calculate TSC khz"); setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); + tsc_early_fini(); return; } @@ -1341,6 +1425,7 @@ void __init tsc_init(void) mark_tsc_unstable("TSCs unsynchronized"); detect_art(); + tsc_early_fini(); } #ifdef CONFIG_SMP -- 2.15.0