On Sat, Dec 10, 2016 at 06:28:02PM +0100, Borislav Petkov wrote: > On Sat, Dec 10, 2016 at 11:04:44AM -0600, Josh Poimboeuf wrote: > > That's a new one. Was there anything else printed? > > It is the first line that appears in dmesg when I boot: > > [ 0.000000] WARNING: kernel stack frame pointer at ffffffffb5e03f40 in > swapper:0 has bad value (null) > [ 0.000000] Linux version 4.9.0-rc8+ (boris@gondor) (gcc version 6.2.0 > 20161109 (Debian 6.2.0-13) ) #1 SMP PREEMPT Sat Dec 10 13:25:29 CET 2016 > [ 0.000000] Command line: BOOT_IMAGE=/boot/vmlinuz-4.9.0-rc8+ > root=/dev/sda7 ro earlyprintk=ttyS0,115200 console=ttyS0,115200 console=tty0 > root=/dev/sda7 log_buf_len=10M resume=/dev/sda5 no_console_suspend > ignore_loglevel > [ 0.000000] KERNEL supported cpus: > [ 0.000000] Intel GenuineIntel > [ 0.000000] AMD AuthenticAMD > [ 0.000000] Centaur CentaurHauls > [ 0.000000] x86/fpu: Supporting XSAVE feature 0x001: 'x87 floating point > registers' > ... > > > Were you doing anything special when it happened? > > Not really - just booting ;-) > > > Do you see it reliably? > > 2 of 2 boots. > > > I should probably figure out a way to dump more data for that warning. > > Sure, I can test patches.
Can you try with this? diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index c5a7f3a..6fa75b1 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -12,7 +12,7 @@ struct unwind_state { struct task_struct *task; int graph_idx; #ifdef CONFIG_FRAME_POINTER - unsigned long *bp; + unsigned long *bp, *orig_sp; struct pt_regs *regs; #else unsigned long *sp; diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index ea7b7f9..1c93984 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -6,6 +6,36 @@ #define FRAME_HEADER_SIZE (sizeof(long) * 2) +static void unwind_dump(struct unwind_state *state, unsigned long *sp) +{ + static bool dumped_before = false; + bool prev_zero, zero = false; + unsigned long word; + + if (dumped_before) + return; + + dumped_before = true; + + printk_deferred("unwind stack type:%d next_sp:%p mask:%lx graph_idx:%d\n", + state->stack_info.type, state->stack_info.next_sp, + state->stack_mask, state->graph_idx); + + for (sp = state->orig_sp; sp < state->stack_info.end; sp++) { + prev_zero = zero; + word = READ_ONCE_NOCHECK(*sp); + zero = word == 0; + + if (zero) { + if (!prev_zero) + printk_deferred("%p: %016x ...\n", sp, 0); + continue; + } + + printk_deferred("%p: %016lx (%pB)\n", sp, word, (void *)word); + } +} + unsigned long unwind_get_return_address(struct unwind_state *state) { unsigned long addr; @@ -25,6 +55,7 @@ unsigned long unwind_get_return_address(struct unwind_state *state) "WARNING: unrecognized kernel stack return address %p at %p in %s:%d\n", (void *)addr, addr_p, state->task->comm, state->task->pid); + unwind_dump(state, addr_p); return 0; } @@ -67,6 +98,7 @@ static bool update_stack_state(struct unwind_state *state, void *addr, size_t len) { struct stack_info *info = &state->stack_info; + enum stack_type orig_type = info->type; /* * If addr isn't on the current stack, switch to the next one. @@ -80,6 +112,9 @@ static bool update_stack_state(struct unwind_state *state, void *addr, &state->stack_mask)) return false; + if (!state->orig_sp || info->type != orig_type) + state->orig_sp = addr; + return true; } @@ -178,11 +213,13 @@ bool unwind_next_frame(struct unwind_state *state) "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n", state->regs, state->task->comm, state->task->pid, next_frame); + unwind_dump(state, (unsigned long *)state->regs); } else { printk_deferred_once(KERN_WARNING "WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n", state->bp, state->task->comm, state->task->pid, next_frame); + unwind_dump(state, state->bp); } the_end: state->stack_info.type = STACK_TYPE_UNKNOWN;