Re: [PATCH v3 11/15] powerpc/64s: machine check interrupt update NMI accounting

2020-05-05 Thread Nicholas Piggin
Excerpts from Christophe Leroy's message of April 7, 2020 3:37 pm:
> 
> 
> Le 07/04/2020 à 07:16, Nicholas Piggin a écrit :
>> machine_check_early is taken as an NMI, so nmi_enter is used there.
>> machine_check_exception is no longer taken as an NMI (it's invoked
>> via irq_work in the case a machine check hits in kernel mode), so
>> remove the nmi_enter from that case.
> 
> Euh ... Is that also the case for PPC32 ?
> 
> AFAIK machine_check_exception() is called as an NMI on PPC32.

Sorry I missed your comment.  You're right, I'll make this change
depend on 64S. Thanks for reviewing them.

Thanks,
Nick


Re: [PATCH v3 11/15] powerpc/64s: machine check interrupt update NMI accounting

2020-04-12 Thread kbuild test robot
Hi Nicholas,

I love your patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on next-20200412]
[cannot apply to tip/perf/core v5.6]
[if your patch is applied to the wrong git tree, please drop us a note to help
improve the system. BTW, we also suggest to use '--base' option to specify the
base tree in git format-patch, please see https://stackoverflow.com/a/37406982]

url:
https://github.com/0day-ci/linux/commits/Nicholas-Piggin/powerpc-64-machine-check-and-system-reset-fixes/20200407-134803
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-randconfig-a001-20200412 (attached as .config)
compiler: powerpc64-linux-gcc (GCC) 9.3.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=9.3.0 make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kbuild test robot 

All errors (new ones prefixed by >>):

   In file included from include/linux/kernel.h:15,
from include/asm-generic/bug.h:19,
from arch/powerpc/include/asm/bug.h:109,
from include/linux/bug.h:5,
from arch/powerpc/include/asm/mmu.h:130,
from arch/powerpc/include/asm/paca.h:18,
from arch/powerpc/include/asm/current.h:13,
from include/linux/sched.h:12,
from arch/powerpc/kernel/process.c:14:
   arch/powerpc/kernel/process.c: In function 'show_regs':
>> arch/powerpc/kernel/process.c:1424:74: error: 'struct paca_struct' has no 
>> member named 'in_nmi'
1424 |  pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, 
(int)get_paca()->in_nmi, (int)get_paca()->in_mce);
 |  
^~
   include/linux/printk.h:317:26: note: in definition of macro 'pr_cont'
 317 |  printk(KERN_CONT fmt, ##__VA_ARGS__)
 |  ^~~
>> arch/powerpc/kernel/process.c:1424:99: error: 'struct paca_struct' has no 
>> member named 'in_mce'
1424 |  pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, 
(int)get_paca()->in_nmi, (int)get_paca()->in_mce);
 |  
 ^~
   include/linux/printk.h:317:26: note: in definition of macro 'pr_cont'
 317 |  printk(KERN_CONT fmt, ##__VA_ARGS__)
 |  ^~~

vim +1424 arch/powerpc/kernel/process.c

  1400  
  1401  void show_regs(struct pt_regs * regs)
  1402  {
  1403  int i, trap;
  1404  
  1405  show_regs_print_info(KERN_DEFAULT);
  1406  
  1407  printk("NIP:  "REG" LR: "REG" CTR: "REG"\n",
  1408 regs->nip, regs->link, regs->ctr);
  1409  printk("REGS: %px TRAP: %04lx   %s  (%s)\n",
  1410 regs, regs->trap, print_tainted(), 
init_utsname()->release);
  1411  printk("MSR:  "REG" ", regs->msr);
  1412  print_msr_bits(regs->msr);
  1413  pr_cont("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer);
  1414  trap = TRAP(regs);
  1415  if ((TRAP(regs) != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
  1416  pr_cont("CFAR: "REG" ", regs->orig_gpr3);
  1417  if (trap == 0x200 || trap == 0x300 || trap == 0x600)
  1418  #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
  1419  pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, 
regs->dsisr);
  1420  #else
  1421  pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, 
regs->dsisr);
  1422  #endif
  1423  #ifdef CONFIG_PPC64
> 1424  pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, 
> (int)get_paca()->in_nmi, (int)get_paca()->in_mce);
  1425  #endif
  1426  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
  1427  if (MSR_TM_ACTIVE(regs->msr))
  1428  pr_cont("\nPACATMSCRATCH: %016llx ", 
get_paca()->tm_scratch);
  1429  #endif
  1430  
  1431  for (i = 0;  i < 32;  i++) {
  1432  if ((i % REGS_PER_LINE) == 0)
  1433  pr_cont("\nGPR%02d: ", i);
  1434  pr_cont(REG " ", regs->gpr[i]);
  1435  if (i == LAST_VOLATILE && !FULL_REGS(regs))
  1436  break;
  1437  }
  1438  pr_cont("\n");
  1439  #ifdef CONFIG_KALLSYMS
  1440  /*
  1441   * Lookup NIP late so we have the best change of getting the
  1442   * above info out without failing
  1443   */
  1444  printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
  1445  printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
  1446  #endif
  1447  show_stack(current, (unsigned long *) 

Re: [PATCH v3 11/15] powerpc/64s: machine check interrupt update NMI accounting

2020-04-06 Thread Christophe Leroy




Le 07/04/2020 à 07:16, Nicholas Piggin a écrit :

machine_check_early is taken as an NMI, so nmi_enter is used there.
machine_check_exception is no longer taken as an NMI (it's invoked
via irq_work in the case a machine check hits in kernel mode), so
remove the nmi_enter from that case.


Euh ... Is that also the case for PPC32 ?

AFAIK machine_check_exception() is called as an NMI on PPC32.

Christophe



In NMI context, hash faults don't try to refill the hash table, which
can lead to crashes accessing non-pinned kernel pages. System reset
still has this potential problem.

Signed-off-by: Nicholas Piggin 
---
  arch/powerpc/kernel/mce.c |  7 +++
  arch/powerpc/kernel/process.c |  2 +-
  arch/powerpc/kernel/traps.c   | 13 +
  3 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 8077b5fb18a7..be7e3f92a7b5 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -574,6 +574,9 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info);
  long machine_check_early(struct pt_regs *regs)
  {
long handled = 0;
+   bool nested = in_nmi();
+   if (!nested)
+   nmi_enter();
  
  	hv_nmi_check_nonrecoverable(regs);
  
@@ -582,6 +585,10 @@ long machine_check_early(struct pt_regs *regs)

 */
if (ppc_md.machine_check_early)
handled = ppc_md.machine_check_early(regs);
+
+   if (!nested)
+   nmi_exit();
+
return handled;
  }
  
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c

index 9c21288f8645..44410dd3029f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1421,7 +1421,7 @@ void show_regs(struct pt_regs * regs)
pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
  #endif
  #ifdef CONFIG_PPC64
-   pr_cont("IRQMASK: %lx ", regs->softe);
+   pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, 
(int)get_paca()->in_nmi, (int)get_paca()->in_mce);
  #endif
  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (MSR_TM_ACTIVE(regs->msr))
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 3fca22276bb1..9f221772eb73 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -823,9 +823,6 @@ int machine_check_generic(struct pt_regs *regs)
  void machine_check_exception(struct pt_regs *regs)
  {
int recover = 0;
-   bool nested = in_nmi();
-   if (!nested)
-   nmi_enter();
  
  	__this_cpu_inc(irq_stat.mce_exceptions);
  
@@ -851,20 +848,12 @@ void machine_check_exception(struct pt_regs *regs)

if (check_io_access(regs))
goto bail;
  
-	if (!nested)

-   nmi_exit();
-
die("Machine check", regs, SIGBUS);
  
+bail:

/* Must die if the interrupt is not recoverable */
if (!(regs->msr & MSR_RI))
nmi_panic(regs, "Unrecoverable Machine check");
-
-   return;
-
-bail:
-   if (!nested)
-   nmi_exit();
  }
  
  void SMIException(struct pt_regs *regs)




[PATCH v3 11/15] powerpc/64s: machine check interrupt update NMI accounting

2020-04-06 Thread Nicholas Piggin
machine_check_early is taken as an NMI, so nmi_enter is used there.
machine_check_exception is no longer taken as an NMI (it's invoked
via irq_work in the case a machine check hits in kernel mode), so
remove the nmi_enter from that case.

In NMI context, hash faults don't try to refill the hash table, which
can lead to crashes accessing non-pinned kernel pages. System reset
still has this potential problem.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/mce.c |  7 +++
 arch/powerpc/kernel/process.c |  2 +-
 arch/powerpc/kernel/traps.c   | 13 +
 3 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 8077b5fb18a7..be7e3f92a7b5 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -574,6 +574,9 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info);
 long machine_check_early(struct pt_regs *regs)
 {
long handled = 0;
+   bool nested = in_nmi();
+   if (!nested)
+   nmi_enter();
 
hv_nmi_check_nonrecoverable(regs);
 
@@ -582,6 +585,10 @@ long machine_check_early(struct pt_regs *regs)
 */
if (ppc_md.machine_check_early)
handled = ppc_md.machine_check_early(regs);
+
+   if (!nested)
+   nmi_exit();
+
return handled;
 }
 
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9c21288f8645..44410dd3029f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1421,7 +1421,7 @@ void show_regs(struct pt_regs * regs)
pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
 #endif
 #ifdef CONFIG_PPC64
-   pr_cont("IRQMASK: %lx ", regs->softe);
+   pr_cont("IRQMASK: %lx IN_NMI:%d IN_MCE:%d", regs->softe, 
(int)get_paca()->in_nmi, (int)get_paca()->in_mce);
 #endif
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (MSR_TM_ACTIVE(regs->msr))
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 3fca22276bb1..9f221772eb73 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -823,9 +823,6 @@ int machine_check_generic(struct pt_regs *regs)
 void machine_check_exception(struct pt_regs *regs)
 {
int recover = 0;
-   bool nested = in_nmi();
-   if (!nested)
-   nmi_enter();
 
__this_cpu_inc(irq_stat.mce_exceptions);
 
@@ -851,20 +848,12 @@ void machine_check_exception(struct pt_regs *regs)
if (check_io_access(regs))
goto bail;
 
-   if (!nested)
-   nmi_exit();
-
die("Machine check", regs, SIGBUS);
 
+bail:
/* Must die if the interrupt is not recoverable */
if (!(regs->msr & MSR_RI))
nmi_panic(regs, "Unrecoverable Machine check");
-
-   return;
-
-bail:
-   if (!nested)
-   nmi_exit();
 }
 
 void SMIException(struct pt_regs *regs)
-- 
2.23.0