Le 19/06/2023 à 11:47, Naveen N Rao a écrit : > With ppc64 -mprofile-kernel and ppc32 -pg, profiling instructions to > call into ftrace are emitted right at function entry. The instruction > sequence used is minimal to reduce overhead. Crucially, a stackframe is > not created for the function being traced. This breaks stack unwinding > since the function being traced does not have a stackframe for itself. > As such, it never shows up in the backtrace: > > /sys/kernel/debug/tracing # echo 1 > /proc/sys/kernel/stack_tracer_enabled > /sys/kernel/debug/tracing # cat stack_trace > Depth Size Location (17 entries) > ----- ---- -------- > 0) 4144 32 ftrace_call+0x4/0x44 > 1) 4112 432 get_page_from_freelist+0x26c/0x1ad0 > 2) 3680 496 __alloc_pages+0x290/0x1280 > 3) 3184 336 __folio_alloc+0x34/0x90 > 4) 2848 176 vma_alloc_folio+0xd8/0x540 > 5) 2672 272 __handle_mm_fault+0x700/0x1cc0 > 6) 2400 208 handle_mm_fault+0xf0/0x3f0 > 7) 2192 80 ___do_page_fault+0x3e4/0xbe0 > 8) 2112 160 do_page_fault+0x30/0xc0 > 9) 1952 256 data_access_common_virt+0x210/0x220 > 10) 1696 400 0xc00000000f16b100 > 11) 1296 384 load_elf_binary+0x804/0x1b80 > 12) 912 208 bprm_execve+0x2d8/0x7e0 > 13) 704 64 do_execveat_common+0x1d0/0x2f0 > 14) 640 160 sys_execve+0x54/0x70 > 15) 480 64 system_call_exception+0x138/0x350 > 16) 416 416 system_call_common+0x160/0x2c4 > > Fix this by having ftrace create a dummy stackframe for the function > being traced. Since this is only relevant when ftrace is active, we nop > out the instruction to store LR in the LR save area in the profiling > instruction sequence on ppc32 (and in ppc64 with older gcc versions). > Instead, in ftrace, we store LR in the LR save area of the previous > stackframe, and create a minimal stackframe to represent the function > being traced. With this, backtraces now capture the function being > traced: > > /sys/kernel/debug/tracing # cat stack_trace > Depth Size Location (17 entries) > ----- ---- -------- > 0) 3888 32 _raw_spin_trylock+0x8/0x70 > 1) 3856 576 get_page_from_freelist+0x26c/0x1ad0 > 2) 3280 64 __alloc_pages+0x290/0x1280 > 3) 3216 336 __folio_alloc+0x34/0x90 > 4) 2880 176 vma_alloc_folio+0xd8/0x540 > 5) 2704 416 __handle_mm_fault+0x700/0x1cc0 > 6) 2288 96 handle_mm_fault+0xf0/0x3f0 > 7) 2192 48 ___do_page_fault+0x3e4/0xbe0 > 8) 2144 192 do_page_fault+0x30/0xc0 > 9) 1952 608 data_access_common_virt+0x210/0x220 > 10) 1344 16 0xc0000000334bbb50 > 11) 1328 416 load_elf_binary+0x804/0x1b80 > 12) 912 64 bprm_execve+0x2d8/0x7e0 > 13) 848 176 do_execveat_common+0x1d0/0x2f0 > 14) 672 192 sys_execve+0x54/0x70 > 15) 480 64 system_call_exception+0x138/0x350 > 16) 416 416 system_call_common+0x160/0x2c4 > > This results in two additional stores in the ftrace entry code, but > produces reliable backtraces. Note that this change now aligns with > other architectures (arm64, s390, x86). > > Signed-off-by: Naveen N Rao <nav...@kernel.org>
Reviewed-by: Christophe Leroy <christophe.le...@csgroup.eu> > --- > arch/powerpc/kernel/trace/ftrace.c | 6 ++++-- > arch/powerpc/kernel/trace/ftrace_entry.S | 11 ++++++++--- > 2 files changed, 12 insertions(+), 5 deletions(-) > > diff --git a/arch/powerpc/kernel/trace/ftrace.c > b/arch/powerpc/kernel/trace/ftrace.c > index 82010629cf887c..2956196c98ffdc 100644 > --- a/arch/powerpc/kernel/trace/ftrace.c > +++ b/arch/powerpc/kernel/trace/ftrace.c > @@ -229,13 +229,15 @@ int ftrace_init_nop(struct module *mod, struct > dyn_ftrace *rec) > /* Expected sequence: 'mflr r0', 'stw r0,4(r1)', 'bl _mcount' */ > ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0))); > if (!ret) > - ret = ftrace_validate_inst(ip - 4, > ppc_inst(PPC_RAW_STW(_R0, _R1, 4))); > + ret = ftrace_modify_code(ip - 4, > ppc_inst(PPC_RAW_STW(_R0, _R1, 4)), > + ppc_inst(PPC_RAW_NOP())); > } else if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) { > /* Expected sequence: 'mflr r0', ['std r0,16(r1)'], 'bl > _mcount' */ > ret = ftrace_read_inst(ip - 4, &old); > if (!ret && !ppc_inst_equal(old, ppc_inst(PPC_RAW_MFLR(_R0)))) { > ret = ftrace_validate_inst(ip - 8, > ppc_inst(PPC_RAW_MFLR(_R0))); > - ret |= ftrace_validate_inst(ip - 4, > ppc_inst(PPC_RAW_STD(_R0, _R1, 16))); > + ret |= ftrace_modify_code(ip - 4, > ppc_inst(PPC_RAW_STD(_R0, _R1, 16)), > + ppc_inst(PPC_RAW_NOP())); > } > } else { > return -EINVAL; > diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S > b/arch/powerpc/kernel/trace/ftrace_entry.S > index bab3ab1368a33f..05e981fb526c2e 100644 > --- a/arch/powerpc/kernel/trace/ftrace_entry.S > +++ b/arch/powerpc/kernel/trace/ftrace_entry.S > @@ -33,6 +33,11 @@ > * and then arrange for the ftrace function to be called. > */ > .macro ftrace_regs_entry allregs > + /* Create a minimal stack frame for representing B */ > + PPC_STLU r1, -STACK_FRAME_MIN_SIZE(r1) > + /* Save the original return address in A's stack frame */ > + PPC_STL r0, LRSAVE+STACK_FRAME_MIN_SIZE(r1) > + > /* Create our stack frame + pt_regs */ > PPC_STLU r1,-SWITCH_FRAME_SIZE(r1) > > @@ -41,8 +46,6 @@ > SAVE_GPRS(3, 10, r1) > > #ifdef CONFIG_PPC64 > - /* Save the original return address in A's stack frame */ > - std r0, LRSAVE+SWITCH_FRAME_SIZE(r1) > /* Ok to continue? */ > lbz r3, PACA_FTRACE_ENABLED(r13) > cmpdi r3, 0 > @@ -77,6 +80,8 @@ > mflr r7 > /* Save it as pt_regs->nip */ > PPC_STL r7, _NIP(r1) > + /* Also save it in B's stackframe header for proper unwind */ > + PPC_STL r7, LRSAVE+SWITCH_FRAME_SIZE(r1) > /* Save the read LR in pt_regs->link */ > PPC_STL r0, _LINK(r1) > > @@ -142,7 +147,7 @@ > #endif > > /* Pop our stack frame */ > - addi r1, r1, SWITCH_FRAME_SIZE > + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE > > #ifdef CONFIG_LIVEPATCH_64 > /* Based on the cmpd above, if the NIP was altered handle livepatch > */