As explained by commit daf00ae71dad ("powerpc/traps: restore recoverability of machine_check interrupts"), die() can't be called from within nmi_enter to nicely kill a process context that was interrupted. nmi_exit must be called first.
This adds a function die_mce which takes care of this for machine check handlers. Signed-off-by: Nicholas Piggin <npig...@gmail.com> --- arch/powerpc/include/asm/bug.h | 1 + arch/powerpc/kernel/traps.c | 21 +++++++++++++++------ arch/powerpc/platforms/powernv/opal.c | 2 +- arch/powerpc/platforms/pseries/ras.c | 2 +- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 8f09ddae9305..c10ae0a9bbaf 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -118,6 +118,7 @@ void do_bad_page_fault_segv(struct pt_regs *regs); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void _exception_pkey(struct pt_regs *, unsigned long, int); extern void die(const char *, struct pt_regs *, long); +void die_mce(const char *str, struct pt_regs *regs, long err); extern bool die_will_crash(void); extern void panic_flush_kmsg_start(void); extern void panic_flush_kmsg_end(void); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 6691774fe1fb..f9ef183a5454 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -789,6 +789,19 @@ int machine_check_generic(struct pt_regs *regs) } #endif /* everything else */ +void die_mce(const char *str, struct pt_regs *regs, long err) +{ + /* + * The machine check wants to kill the interrupted context, but + * do_exit() checks for in_interrupt() and panics in that case, so + * exit the irq/nmi before calling die. + */ + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) + nmi_exit(); + die(str, regs, err); +} +NOKPROBE_SYMBOL(die_mce); + void machine_check_exception(struct pt_regs *regs) { int recover = 0; @@ -831,15 +844,11 @@ void machine_check_exception(struct pt_regs *regs) if (check_io_access(regs)) goto bail; - if (nmi) nmi_exit(); - - die("Machine check", regs, SIGBUS); + die_mce("Machine check", regs, SIGBUS); /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) - die("Unrecoverable Machine check", regs, SIGBUS); - - return; + die_mce("Unrecoverable Machine check", regs, SIGBUS); bail: if (nmi) nmi_exit(); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index c61c3b62c8c6..303d7c775740 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -624,7 +624,7 @@ static int opal_recover_mce(struct pt_regs *regs, */ recovered = 0; } else { - die("Machine check", regs, SIGBUS); + die_mce("Machine check", regs, SIGBUS); recovered = 1; } } diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 149cec2212e6..2d9f985fd13a 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -813,7 +813,7 @@ static int recover_mce(struct pt_regs *regs, struct machine_check_event *evt) */ recovered = 0; } else { - die("Machine check", regs, SIGBUS); + die_mce("Machine check", regs, SIGBUS); recovered = 1; } } -- 2.23.0