Hi Peter, On Mon, May 11, 2020 at 11:25:19AM +0200, Peter Zijlstra wrote: > On Mon, May 11, 2020 at 11:22:00AM +0200, Peter Zijlstra wrote: > > > (_completely_ untested) > > > > --- > > arch/arm64/kernel/perf_event.c | 27 ++++++++++++++++++--------- > > include/linux/sched_clock.h | 28 ++++++++++++++++++++++++++++ > > kernel/time/sched_clock.c | 41 > > +++++++++++++---------------------------- > > 3 files changed, 59 insertions(+), 37 deletions(-) > > > > diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c > > index 4d7879484cec..81a49a916660 100644 > > --- a/arch/arm64/kernel/perf_event.c > > +++ b/arch/arm64/kernel/perf_event.c > > @@ -1165,28 +1165,37 @@ device_initcall(armv8_pmu_driver_init) > > void arch_perf_update_userpage(struct perf_event *event, > > struct perf_event_mmap_page *userpg, u64 now) > > { > > - u32 freq; > > - u32 shift; > > + struct clock_read_data *rd; > > + unsigned int seq; > > > > /* > > * Internal timekeeping for enabled/running/stopped times > > * is always computed with the sched_clock. > > */ > > - freq = arch_timer_get_rate(); > > userpg->cap_user_time = 1; > > + userpg->cap_user_time_zero = 1; > > + > > + do { > > + rd = sched_clock_read_begin(&seq); > > + > > + userpg->time_mult = rd->mult; > > + userpg->time_shift = rd->shift; > > + userpg->time_offset = rd->epoch_ns; > > ^^^^^^^ wants to be time_zero > > > + > > + userpg->time_zero -= (rd->epoch_cyc * rd->shift) >> rd->shift; > > + > > + } while (sched_clock_read_retry(seq)); > > + > > + userpg->time_offset = userpf->time_zero - now; > > > > - clocks_calc_mult_shift(&userpg->time_mult, &shift, freq, > > - NSEC_PER_SEC, 0); > > And that ^^^ was complete crap. > > > /* > > * time_shift is not expected to be greater than 31 due to > > * the original published conversion algorithm shifting a > > * 32-bit value (now specifies a 64-bit value) - refer > > * perf_event_mmap_page documentation in perf_event.h. > > */ > > - if (shift == 32) { > > - shift = 31; > > + if (userpg->time_shift == 32) { > > + userpg->time_shift = 31; > > userpg->time_mult >>= 1; > > } > > - userpg->time_shift = (u16)shift; > > - userpg->time_offset = -now; > > }
I have verified this change, it works as expected on my Arm64 board. Also paste the updated code which makes building success with minor fixing. I am not sure how to proceed, will you merge this? Or you want me to send out formal patches (or only for the Arm64 part)? P.s. it's shame I still missed you guys suggestion in prvious thread even though you have provide enough ifno, and thank you for the helping! ---8<--- diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 4d7879484cec..5a34e9264c5b 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -19,6 +19,7 @@ #include <linux/of.h> #include <linux/perf/arm_pmu.h> #include <linux/platform_device.h> +#include <linux/sched_clock.h> #include <linux/smp.h> /* ARMv8 Cortex-A53 specific event types. */ @@ -1165,28 +1166,26 @@ device_initcall(armv8_pmu_driver_init) void arch_perf_update_userpage(struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now) { - u32 freq; - u32 shift; + struct clock_read_data *rd; + unsigned int seq; /* * Internal timekeeping for enabled/running/stopped times * is always computed with the sched_clock. */ - freq = arch_timer_get_rate(); userpg->cap_user_time = 1; + userpg->cap_user_time_zero = 1; - clocks_calc_mult_shift(&userpg->time_mult, &shift, freq, - NSEC_PER_SEC, 0); - /* - * time_shift is not expected to be greater than 31 due to - * the original published conversion algorithm shifting a - * 32-bit value (now specifies a 64-bit value) - refer - * perf_event_mmap_page documentation in perf_event.h. - */ - if (shift == 32) { - shift = 31; - userpg->time_mult >>= 1; - } - userpg->time_shift = (u16)shift; - userpg->time_offset = -now; + do { + rd = sched_clock_read_begin(&seq); + + userpg->time_mult = rd->mult; + userpg->time_shift = rd->shift; + userpg->time_zero = rd->epoch_ns; + + userpg->time_zero -= (rd->epoch_cyc * rd->mult) >> rd->shift; + + } while (sched_clock_read_retry(seq)); + + userpg->time_offset = userpg->time_zero - now; } diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h index 0bb04a96a6d4..528718e4ed52 100644 --- a/include/linux/sched_clock.h +++ b/include/linux/sched_clock.h @@ -6,6 +6,34 @@ #define LINUX_SCHED_CLOCK #ifdef CONFIG_GENERIC_SCHED_CLOCK +/** + * struct clock_read_data - data required to read from sched_clock() + * + * @epoch_ns: sched_clock() value at last update + * @epoch_cyc: Clock cycle value at last update. + * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit + * clocks. + * @read_sched_clock: Current clock source (or dummy source when suspended). + * @mult: Multipler for scaled math conversion. + * @shift: Shift value for scaled math conversion. + * + * Care must be taken when updating this structure; it is read by + * some very hot code paths. It occupies <=40 bytes and, when combined + * with the seqcount used to synchronize access, comfortably fits into + * a 64 byte cache line. + */ +struct clock_read_data { + u64 epoch_ns; + u64 epoch_cyc; + u64 sched_clock_mask; + u64 (*read_sched_clock)(void); + u32 mult; + u32 shift; +}; + +extern struct clock_read_data *sched_clock_read_begin(unsigned int *seq); +extern int sched_clock_read_retry(unsigned int seq); + extern void generic_sched_clock_init(void); extern void sched_clock_register(u64 (*read)(void), int bits, diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index fa3f800d7d76..0acaadc3156c 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -19,31 +19,6 @@ #include "timekeeping.h" -/** - * struct clock_read_data - data required to read from sched_clock() - * - * @epoch_ns: sched_clock() value at last update - * @epoch_cyc: Clock cycle value at last update. - * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit - * clocks. - * @read_sched_clock: Current clock source (or dummy source when suspended). - * @mult: Multipler for scaled math conversion. - * @shift: Shift value for scaled math conversion. - * - * Care must be taken when updating this structure; it is read by - * some very hot code paths. It occupies <=40 bytes and, when combined - * with the seqcount used to synchronize access, comfortably fits into - * a 64 byte cache line. - */ -struct clock_read_data { - u64 epoch_ns; - u64 epoch_cyc; - u64 sched_clock_mask; - u64 (*read_sched_clock)(void); - u32 mult; - u32 shift; -}; - /** * struct clock_data - all data needed for sched_clock() (including * registration of a new clock source) @@ -93,6 +68,17 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift) return (cyc * mult) >> shift; } +struct clock_read_data *sched_clock_read_begin(unsigned int *seq) +{ + *seq = raw_read_seqcount(&cd.seq); + return cd.read_data + (*seq & 1); +} + +int sched_clock_read_retry(unsigned int seq) +{ + return read_seqcount_retry(&cd.seq, seq); +} + unsigned long long notrace sched_clock(void) { u64 cyc, res; @@ -100,13 +86,12 @@ unsigned long long notrace sched_clock(void) struct clock_read_data *rd; do { - seq = raw_read_seqcount(&cd.seq); - rd = cd.read_data + (seq & 1); + rd = sched_clock_read_begin(&seq); cyc = (rd->read_sched_clock() - rd->epoch_cyc) & rd->sched_clock_mask; res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift); - } while (read_seqcount_retry(&cd.seq, seq)); + } while (sched_clock_read_retry(seq)); return res; }