Re: [PATCH v3 11/15] powerpc/64s: machine check interrupt update NMI accounting
Excerpts from Christophe Leroy's message of April 7, 2020 3:37 pm: > > > Le 07/04/2020 à 07:16, Nicholas Piggin a écrit : >> machine_check_early is taken as an NMI, so nmi_enter is used there. >> machine_check_exception is no longer taken as an NMI (it's invoked >> via irq_work in the case a machine check hits in kernel mode), so >> remove the nmi_enter from that case. > > Euh ... Is that also the case for PPC32 ? > > AFAIK machine_check_exception() is called as an NMI on PPC32. Sorry I missed your comment. You're right, I'll make this change depend on 64S. Thanks for reviewing them. Thanks, Nick
Re: [PATCH v3 11/15] powerpc/64s: machine check interrupt update NMI accounting
Hi Nicholas, I love your patch! Yet something to improve: [auto build test ERROR on powerpc/next] [also build test ERROR on next-20200412] [cannot apply to tip/perf/core v5.6] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system. BTW, we also suggest to use '--base' option to specify the base tree in git format-patch, please see https://stackoverflow.com/a/37406982] url: https://github.com/0day-ci/linux/commits/Nicholas-Piggin/powerpc-64-machine-check-and-system-reset-fixes/20200407-134803 base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next config: powerpc-randconfig-a001-20200412 (attached as .config) compiler: powerpc64-linux-gcc (GCC) 9.3.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree GCC_VERSION=9.3.0 make.cross ARCH=powerpc If you fix the issue, kindly add following tag as appropriate Reported-by: kbuild test robot All errors (new ones prefixed by >>): In file included from include/linux/kernel.h:15, from include/asm-generic/bug.h:19, from arch/powerpc/include/asm/bug.h:109, from include/linux/bug.h:5, from arch/powerpc/include/asm/mmu.h:130, from arch/powerpc/include/asm/paca.h:18, from arch/powerpc/include/asm/current.h:13, from include/linux/sched.h:12, from arch/powerpc/kernel/process.c:14: arch/powerpc/kernel/process.c: In function 'show_regs': >> arch/powerpc/kernel/process.c:1424:74: error: 'struct paca_struct' has no >> member named 'in_nmi' 1424 | pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, (int)get_paca()->in_nmi, (int)get_paca()->in_mce); | ^~ include/linux/printk.h:317:26: note: in definition of macro 'pr_cont' 317 | printk(KERN_CONT fmt, ##__VA_ARGS__) | ^~~ >> arch/powerpc/kernel/process.c:1424:99: error: 'struct paca_struct' has no >> member named 'in_mce' 1424 | pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, (int)get_paca()->in_nmi, (int)get_paca()->in_mce); | ^~ include/linux/printk.h:317:26: note: in definition of macro 'pr_cont' 317 | printk(KERN_CONT fmt, ##__VA_ARGS__) | ^~~ vim +1424 arch/powerpc/kernel/process.c 1400 1401 void show_regs(struct pt_regs * regs) 1402 { 1403 int i, trap; 1404 1405 show_regs_print_info(KERN_DEFAULT); 1406 1407 printk("NIP: "REG" LR: "REG" CTR: "REG"\n", 1408 regs->nip, regs->link, regs->ctr); 1409 printk("REGS: %px TRAP: %04lx %s (%s)\n", 1410 regs, regs->trap, print_tainted(), init_utsname()->release); 1411 printk("MSR: "REG" ", regs->msr); 1412 print_msr_bits(regs->msr); 1413 pr_cont(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); 1414 trap = TRAP(regs); 1415 if ((TRAP(regs) != 0xc00) && cpu_has_feature(CPU_FTR_CFAR)) 1416 pr_cont("CFAR: "REG" ", regs->orig_gpr3); 1417 if (trap == 0x200 || trap == 0x300 || trap == 0x600) 1418 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) 1419 pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr); 1420 #else 1421 pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); 1422 #endif 1423 #ifdef CONFIG_PPC64 > 1424 pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, > (int)get_paca()->in_nmi, (int)get_paca()->in_mce); 1425 #endif 1426 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 1427 if (MSR_TM_ACTIVE(regs->msr)) 1428 pr_cont("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch); 1429 #endif 1430 1431 for (i = 0; i < 32; i++) { 1432 if ((i % REGS_PER_LINE) == 0) 1433 pr_cont("\nGPR%02d: ", i); 1434 pr_cont(REG " ", regs->gpr[i]); 1435 if (i == LAST_VOLATILE && !FULL_REGS(regs)) 1436 break; 1437 } 1438 pr_cont("\n"); 1439 #ifdef CONFIG_KALLSYMS 1440 /* 1441 * Lookup NIP late so we have the best change of getting the 1442 * above info out without failing 1443 */ 1444 printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); 1445 printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); 1446 #endif 1447 show_stack(current, (unsigned long *)
Re: [PATCH v3 11/15] powerpc/64s: machine check interrupt update NMI accounting
Le 07/04/2020 à 07:16, Nicholas Piggin a écrit : machine_check_early is taken as an NMI, so nmi_enter is used there. machine_check_exception is no longer taken as an NMI (it's invoked via irq_work in the case a machine check hits in kernel mode), so remove the nmi_enter from that case. Euh ... Is that also the case for PPC32 ? AFAIK machine_check_exception() is called as an NMI on PPC32. Christophe In NMI context, hash faults don't try to refill the hash table, which can lead to crashes accessing non-pinned kernel pages. System reset still has this potential problem. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/mce.c | 7 +++ arch/powerpc/kernel/process.c | 2 +- arch/powerpc/kernel/traps.c | 13 + 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 8077b5fb18a7..be7e3f92a7b5 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -574,6 +574,9 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info); long machine_check_early(struct pt_regs *regs) { long handled = 0; + bool nested = in_nmi(); + if (!nested) + nmi_enter(); hv_nmi_check_nonrecoverable(regs); @@ -582,6 +585,10 @@ long machine_check_early(struct pt_regs *regs) */ if (ppc_md.machine_check_early) handled = ppc_md.machine_check_early(regs); + + if (!nested) + nmi_exit(); + return handled; } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9c21288f8645..44410dd3029f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1421,7 +1421,7 @@ void show_regs(struct pt_regs * regs) pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); #endif #ifdef CONFIG_PPC64 - pr_cont("IRQMASK: %lx ", regs->softe); + pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, (int)get_paca()->in_nmi, (int)get_paca()->in_mce); #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(regs->msr)) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 3fca22276bb1..9f221772eb73 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -823,9 +823,6 @@ int machine_check_generic(struct pt_regs *regs) void machine_check_exception(struct pt_regs *regs) { int recover = 0; - bool nested = in_nmi(); - if (!nested) - nmi_enter(); __this_cpu_inc(irq_stat.mce_exceptions); @@ -851,20 +848,12 @@ void machine_check_exception(struct pt_regs *regs) if (check_io_access(regs)) goto bail; - if (!nested) - nmi_exit(); - die("Machine check", regs, SIGBUS); +bail: /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) nmi_panic(regs, "Unrecoverable Machine check"); - - return; - -bail: - if (!nested) - nmi_exit(); } void SMIException(struct pt_regs *regs)
[PATCH v3 11/15] powerpc/64s: machine check interrupt update NMI accounting
machine_check_early is taken as an NMI, so nmi_enter is used there. machine_check_exception is no longer taken as an NMI (it's invoked via irq_work in the case a machine check hits in kernel mode), so remove the nmi_enter from that case. In NMI context, hash faults don't try to refill the hash table, which can lead to crashes accessing non-pinned kernel pages. System reset still has this potential problem. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/mce.c | 7 +++ arch/powerpc/kernel/process.c | 2 +- arch/powerpc/kernel/traps.c | 13 + 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 8077b5fb18a7..be7e3f92a7b5 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -574,6 +574,9 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info); long machine_check_early(struct pt_regs *regs) { long handled = 0; + bool nested = in_nmi(); + if (!nested) + nmi_enter(); hv_nmi_check_nonrecoverable(regs); @@ -582,6 +585,10 @@ long machine_check_early(struct pt_regs *regs) */ if (ppc_md.machine_check_early) handled = ppc_md.machine_check_early(regs); + + if (!nested) + nmi_exit(); + return handled; } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9c21288f8645..44410dd3029f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1421,7 +1421,7 @@ void show_regs(struct pt_regs * regs) pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); #endif #ifdef CONFIG_PPC64 - pr_cont("IRQMASK: %lx ", regs->softe); + pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, (int)get_paca()->in_nmi, (int)get_paca()->in_mce); #endif #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (MSR_TM_ACTIVE(regs->msr)) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 3fca22276bb1..9f221772eb73 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -823,9 +823,6 @@ int machine_check_generic(struct pt_regs *regs) void machine_check_exception(struct pt_regs *regs) { int recover = 0; - bool nested = in_nmi(); - if (!nested) - nmi_enter(); __this_cpu_inc(irq_stat.mce_exceptions); @@ -851,20 +848,12 @@ void machine_check_exception(struct pt_regs *regs) if (check_io_access(regs)) goto bail; - if (!nested) - nmi_exit(); - die("Machine check", regs, SIGBUS); +bail: /* Must die if the interrupt is not recoverable */ if (!(regs->msr & MSR_RI)) nmi_panic(regs, "Unrecoverable Machine check"); - - return; - -bail: - if (!nested) - nmi_exit(); } void SMIException(struct pt_regs *regs) -- 2.23.0