You're right. Would pushing an extra register be an adequate fix?
On Fri, Aug 2, 2019 at 7:24 AM Robin Murphy <robin.mur...@arm.com> wrote: > > On 02/08/2019 00:10, Nathan Huckleberry wrote: > > The stackframe setup when compiled with clang is different. > > Since the stack unwinder expects the gcc stackframe setup it > > fails to print backtraces. This patch adds support for the > > clang stackframe setup. > > > > Cc: clang-built-li...@googlegroups.com > > Suggested-by: Tri Vo <tr...@google.com> > > Signed-off-by: Nathan Huckleberry <nh...@google.com> > > --- > > arch/arm/Kconfig.debug | 4 +- > > arch/arm/Makefile | 2 +- > > arch/arm/lib/backtrace.S | 134 ++++++++++++++++++++++++++++++++++++--- > > 3 files changed, 128 insertions(+), 12 deletions(-) > > > > diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug > > index 85710e078afb..92fca7463e21 100644 > > --- a/arch/arm/Kconfig.debug > > +++ b/arch/arm/Kconfig.debug > > @@ -56,7 +56,7 @@ choice > > > > config UNWINDER_FRAME_POINTER > > bool "Frame pointer unwinder" > > - depends on !THUMB2_KERNEL && !CC_IS_CLANG > > + depends on !THUMB2_KERNEL > > select ARCH_WANT_FRAME_POINTERS > > select FRAME_POINTER > > help > > @@ -1872,7 +1872,7 @@ config DEBUG_UNCOMPRESS > > When this option is set, the selected DEBUG_LL output method > > will be re-used for normal decompressor output on multiplatform > > kernels. > > - > > + > > > > config UNCOMPRESS_INCLUDE > > string > > diff --git a/arch/arm/Makefile b/arch/arm/Makefile > > index c3624ca6c0bc..a593d9c4e18a 100644 > > --- a/arch/arm/Makefile > > +++ b/arch/arm/Makefile > > @@ -36,7 +36,7 @@ KBUILD_CFLAGS += $(call > > cc-option,-mno-unaligned-access) > > endif > > > > ifeq ($(CONFIG_FRAME_POINTER),y) > > -KBUILD_CFLAGS +=-fno-omit-frame-pointer -mapcs -mno-sched-prolog > > +KBUILD_CFLAGS +=-fno-omit-frame-pointer $(call cc-option,-mapcs,) > > $(call cc-option,-mno-sched-prolog,) > > endif > > > > ifeq ($(CONFIG_CPU_BIG_ENDIAN),y) > > diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S > > index 1d5210eb4776..fd64eec9f6ae 100644 > > --- a/arch/arm/lib/backtrace.S > > +++ b/arch/arm/lib/backtrace.S > > @@ -14,10 +14,7 @@ > > @ fp is 0 or stack frame > > > > #define frame r4 > > -#define sv_fp r5 > > -#define sv_pc r6 > > #define mask r7 > > -#define offset r8 > > > > ENTRY(c_backtrace) > > > > @@ -25,7 +22,8 @@ ENTRY(c_backtrace) > > ret lr > > ENDPROC(c_backtrace) > > #else > > - stmfd sp!, {r4 - r8, lr} @ Save an extra register so > > we have a location... > > + stmfd sp!, {r4 - r8, fp, lr} @ Save an extra register > > Note that the Procedure Call Standard for EABI requires that SP be > 8-byte-aligned at a public interface. Pushing an odd number of registers > here looks like it will make the subsequent calls to dump_backtrace_* > and printk violate that condition. > > Robin. > > > + @ so we have a location.. > > movs frame, r0 @ if frame pointer is zero > > beq no_frame @ we have no stack frames > > > > @@ -35,11 +33,119 @@ ENDPROC(c_backtrace) > > THUMB( orreq mask, #0x03 ) > > movne mask, #0 @ mask for 32-bit > > > > -1: stmfd sp!, {pc} @ calculate offset of PC > > stored > > - ldr r0, [sp], #4 @ by stmfd for this CPU > > - adr r1, 1b > > - sub offset, r0, r1 > > > > +#if defined(CONFIG_CC_IS_CLANG) > > +/* > > + * Clang does not store pc or sp in function prologues > > + * so we don't know exactly where the function > > + * starts. > > + * We can treat the current frame's lr as the saved pc and the > > + * preceding frame's lr as the lr, but we can't > > + * trace the most recent call. > > + * Inserting a false stack frame allows us to reference the > > + * function called last in the stacktrace. > > + * If the call instruction was a bl we can look at the callers > > + * branch instruction to calculate the saved pc. > > + * We can recover the pc in most cases, but in cases such as > > + * calling function pointers we cannot. In this > > + * case, default to using the lr. This will be > > + * some address in the function, but will not > > + * be the function start. > > + * Unfortunately due to the stack frame layout we can't dump > > + * r0 - r3, but these are less frequently saved. > > + * > > + * Stack frame layout: > > + * <larger addresses> > > + * saved lr > > + * frame => saved fp > > + * optionally saved caller registers (r4 - r10) > > + * optionally saved arguments (r0 - r3) > > + * <top of stack frame> > > + * <smaller addressses> > > + * > > + * Functions start with the following code sequence: > > + * corrected pc => stmfd sp!, {..., fp, lr} > > + * add fp, sp, #x > > + * stmfd sp!, {r0 - r3} (optional) > > + */ > > +#define sv_fp r5 > > +#define sv_pc r6 > > +#define sv_lr r8 > > + add frame, sp, #20 @ switch to false frame > > +for_each_frame: tst frame, mask @ Check for address > > exceptions > > + bne no_frame > > + > > +1001: ldr sv_pc, [frame, #4] @ get saved 'pc' > > +1002: ldr sv_fp, [frame, #0] @ get saved fp > > + > > + teq sv_fp, #0 @ make sure next frame exists > > + beq no_frame > > + > > +1003: ldr sv_lr, [sv_fp, #4] @ get saved lr from > > next frame > > + > > + //try to find function start > > + ldr r0, [sv_lr, #-4] @ get call instruction > > + ldr r3, .Ldsi+8 > > + and r2, r3, r0 @ is this a bl call > > + teq r2, r3 > > + bne finished_setup @ give up if it's not > > + and r0, #0xffffff @ get call offset 24-bit int > > + lsl r0, r0, #8 @ sign extend offset > > + asr r0, r0, #8 > > + ldr sv_pc, [sv_fp, #4] @ get lr address > > + add sv_pc, sv_pc, #-4 @ get call instruction address > > + add sv_pc, sv_pc, #8 @ take care of prefetch > > + add sv_pc, sv_pc, r0, lsl #2 @ find function start > > + b finished_setup > > + > > +finished_setup: > > + > > + bic sv_pc, sv_pc, mask @ mask PC/LR for the mode > > + > > +1004: mov r0, sv_pc > > + > > + mov r1, sv_lr > > + mov r2, frame > > + bic r1, r1, mask @ mask PC/LR for the mode > > + bl dump_backtrace_entry > > + > > +1005: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., > > fp, lr} > > + ldr r3, .Ldsi @ instruction exists, > > + teq r3, r1, lsr #11 > > + ldr r0, [frame] @ locals are stored in > > + @ the preceding frame > > + subeq r0, r0, #4 > > + bleq dump_backtrace_stm @ dump saved registers > > + > > + teq sv_fp, #0 @ zero saved fp means > > + beq no_frame @ no further frames > > + > > + cmp sv_fp, frame @ next frame must be > > + mov frame, sv_fp @ above the current frame > > + bhi for_each_frame > > + > > +1006: adr r0, .Lbad > > + mov r1, frame > > + bl printk > > +no_frame: ldmfd sp!, {r4 - r8, fp, pc} > > +ENDPROC(c_backtrace) > > + .pushsection __ex_table,"a" > > + .align 3 > > + .long 1001b, 1006b > > + .long 1002b, 1006b > > + .long 1003b, 1006b > > + .long 1004b, 1006b > > + .popsection > > + > > +.Lbad: .asciz "Backtrace aborted due to bad frame pointer > > <%p>\n" > > + .align > > +.Ldsi: .word 0xe92d4800 >> 11 @ stmfd sp!, {... fp, > > lr} > > + .word 0xe92d0000 >> 11 @ stmfd sp!, {} > > + .word 0x0b000000 @ bl if these bits are set > > + > > +ENDPROC(c_backtrace) > > + > > +#else > > /* > > * Stack frame layout: > > * optionally saved caller registers (r4 - r10) > > @@ -55,6 +161,15 @@ ENDPROC(c_backtrace) > > * stmfd sp!, {r0 - r3} (optional) > > * corrected pc => stmfd sp!, {..., fp, ip, lr, pc} > > */ > > +#define sv_fp r5 > > +#define sv_pc r6 > > +#define offset r8 > > + > > +1: stmfd sp!, {pc} @ calculate offset of PC > > stored > > + ldr r0, [sp], #4 @ by stmfd for this CPU > > + adr r1, 1b > > + sub offset, r0, r1 > > + > > for_each_frame: tst frame, mask @ Check for address > > exceptions > > bne no_frame > > > > @@ -98,7 +213,7 @@ for_each_frame: tst frame, mask @ > > Check for address exceptions > > 1006: adr r0, .Lbad > > mov r1, frame > > bl printk > > -no_frame: ldmfd sp!, {r4 - r8, pc} > > +no_frame: ldmfd sp!, {r4 - r8, fp, pc} > > ENDPROC(c_backtrace) > > > > .pushsection __ex_table,"a" > > @@ -115,3 +230,4 @@ ENDPROC(c_backtrace) > > .word 0xe92d0000 >> 11 @ stmfd sp!, {} > > > > #endif > > +#endif > >