On Fri, Apr 19, 2019 at 8:58 AM Josh Poimboeuf <jpoim...@redhat.com> wrote: > > I still don't like using regs->bp because it results in different code > paths for FP and ORC. In the FP case, the regs are treated like real > regs even though they're fake. > > Something like the below would be much simpler. Would this work? I don't > know if any other code relies on the fake regs->bp or regs->sp.
Works perfectly. My only concern is that FP path used to work very well, not sure it's a good idea to change it, and this may bring some extra overhead for FP path. > > (BTW, tomorrow is a US holiday so I may not be very responsive until > Monday.) > > diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c > index de1a924a4914..f315425d8468 100644 > --- a/arch/x86/events/core.c > +++ b/arch/x86/events/core.c > @@ -2382,6 +2382,15 @@ void arch_perf_update_userpage(struct perf_event > *event, > cyc2ns_read_end(); > } > > +/* > + * Determine whether the regs were taken from an irq/exception handler rather > + * than from perf_arch_fetch_caller_regs(). > + */ > +static bool perf_hw_regs(struct pt_regs *regs) > +{ > + return regs->flags & X86_EFLAGS_FIXED; > +} > + > void > perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs > *regs) > { > @@ -2393,11 +2402,15 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx > *entry, struct pt_regs *re > return; > } > > - if (perf_callchain_store(entry, regs->ip)) > - return; > + if (perf_hw_regs(regs)) { > + if (perf_callchain_store(entry, regs->ip)) > + return; > + unwind_start(&state, current, regs, NULL); > + } else { > + unwind_start(&state, current, NULL, (void *)regs->sp); > + } > > - for (unwind_start(&state, current, regs, NULL); !unwind_done(&state); > - unwind_next_frame(&state)) { > + for (; !unwind_done(&state); unwind_next_frame(&state)) { > addr = unwind_get_return_address(&state); > if (!addr || perf_callchain_store(entry, addr)) > return; > diff --git a/arch/x86/include/asm/perf_event.h > b/arch/x86/include/asm/perf_event.h > index 04768a3a5454..1392d5e6e8d6 100644 > --- a/arch/x86/include/asm/perf_event.h > +++ b/arch/x86/include/asm/perf_event.h > @@ -308,14 +308,9 @@ extern unsigned long perf_misc_flags(struct pt_regs > *regs); > */ > #define perf_arch_fetch_caller_regs(regs, __ip) { \ > (regs)->ip = (__ip); \ > - (regs)->bp = caller_frame_pointer(); \ > + (regs)->sp = (unsigned long)__builtin_frame_address(0); \ > (regs)->cs = __KERNEL_CS; \ > regs->flags = 0; \ > - asm volatile( \ > - _ASM_MOV "%%"_ASM_SP ", %0\n" \ > - : "=m" ((regs)->sp) \ > - :: "memory" \ > - ); \ > } > > struct perf_guest_switch_msr { > diff --git a/arch/x86/include/asm/stacktrace.h > b/arch/x86/include/asm/stacktrace.h > index d6d758a187b6..a8d0cdf48616 100644 > --- a/arch/x86/include/asm/stacktrace.h > +++ b/arch/x86/include/asm/stacktrace.h > @@ -100,19 +100,6 @@ struct stack_frame_ia32 { > u32 return_address; > }; > > -static inline unsigned long caller_frame_pointer(void) > -{ > - struct stack_frame *frame; > - > - frame = __builtin_frame_address(0); > - > -#ifdef CONFIG_FRAME_POINTER > - frame = frame->next_frame; > -#endif > - > - return (unsigned long)frame; > -} > - > void show_opcodes(struct pt_regs *regs, const char *loglvl); > void show_ip(struct pt_regs *regs, const char *loglvl); > #endif /* _ASM_X86_STACKTRACE_H */ > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h > index f3864e1c5569..0f560069aeec 100644 > --- a/include/linux/perf_event.h > +++ b/include/linux/perf_event.h > @@ -1062,7 +1062,7 @@ static inline void perf_arch_fetch_caller_regs(struct > pt_regs *regs, unsigned lo > * the nth caller. We only need a few of the regs: > * - ip for PERF_SAMPLE_IP > * - cs for user_mode() tests > - * - bp for callchains > + * - sp for callchains > * - eflags, for future purposes, just in case > */ > static inline void perf_fetch_caller_regs(struct pt_regs *regs) -- Best Regards, Kairui Song