Looking at the asm for native_sched_clock() I noticed we don't inline enough. Mostly caused by sharing code with cyc2ns_read_begin(), which we didn't used to do. So mark all that __force_inline to make it DTRT.
Fixes: 59eaef78bfea ("x86/tsc: Remodel cyc2ns to use seqcount_latch()") Reported-by: Eric Dumazet <eduma...@google.com> Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org> --- arch/x86/kernel/tsc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -57,7 +57,7 @@ struct cyc2ns { static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); -void cyc2ns_read_begin(struct cyc2ns_data *data) +void __always_inline cyc2ns_read_begin(struct cyc2ns_data *data) { int seq, idx; @@ -74,7 +74,7 @@ void cyc2ns_read_begin(struct cyc2ns_dat } while (unlikely(seq != this_cpu_read(cyc2ns.seq.sequence))); } -void cyc2ns_read_end(void) +void __always_inline cyc2ns_read_end(void) { preempt_enable_notrace(); } @@ -103,7 +103,7 @@ void cyc2ns_read_end(void) * -johns...@us.ibm.com "math is hard, lets go shopping!" */ -static inline unsigned long long cycles_2_ns(unsigned long long cyc) +static __always_inline unsigned long long cycles_2_ns(unsigned long long cyc) { struct cyc2ns_data data; unsigned long long ns;