Re: [PATCH v6 5/8] powerpc/pseries: flush SLB contents on SLB MCE errors.

2018-08-02 Thread Nicholas Piggin
On Thu, 2 Aug 2018 10:30:08 +0530
Mahesh Jagannath Salgaonkar  wrote:

> On 08/01/2018 11:28 AM, Nicholas Piggin wrote:
> > On Wed, 04 Jul 2018 23:28:21 +0530
> > Mahesh J Salgaonkar  wrote:
> >   
> >> From: Mahesh Salgaonkar 
> >>
> >> On pseries, as of today system crashes if we get a machine check
> >> exceptions due to SLB errors. These are soft errors and can be fixed by
> >> flushing the SLBs so the kernel can continue to function instead of
> >> system crash. We do this in real mode before turning on MMU. Otherwise
> >> we would run into nested machine checks. This patch now fetches the
> >> rtas error log in real mode and flushes the SLBs on SLB errors.
> >>
> >> Signed-off-by: Mahesh Salgaonkar 
> >> ---
> >>  arch/powerpc/include/asm/book3s/64/mmu-hash.h |1 
> >>  arch/powerpc/include/asm/machdep.h|1 
> >>  arch/powerpc/kernel/exceptions-64s.S  |   42 +
> >>  arch/powerpc/kernel/mce.c |   16 +++-
> >>  arch/powerpc/mm/slb.c |6 +++
> >>  arch/powerpc/platforms/pseries/pseries.h  |1 
> >>  arch/powerpc/platforms/pseries/ras.c  |   51 
> >> +
> >>  arch/powerpc/platforms/pseries/setup.c|1 
> >>  8 files changed, 116 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h 
> >> b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> >> index 50ed64fba4ae..cc00a7088cf3 100644
> >> --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> >> +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> >> @@ -487,6 +487,7 @@ extern void hpte_init_native(void);
> >>  
> >>  extern void slb_initialize(void);
> >>  extern void slb_flush_and_rebolt(void);
> >> +extern void slb_flush_and_rebolt_realmode(void);
> >>  
> >>  extern void slb_vmalloc_update(void);
> >>  extern void slb_set_size(u16 size);
> >> diff --git a/arch/powerpc/include/asm/machdep.h 
> >> b/arch/powerpc/include/asm/machdep.h
> >> index ffe7c71e1132..fe447e0d4140 100644
> >> --- a/arch/powerpc/include/asm/machdep.h
> >> +++ b/arch/powerpc/include/asm/machdep.h
> >> @@ -108,6 +108,7 @@ struct machdep_calls {
> >>  
> >>/* Early exception handlers called in realmode */
> >>int (*hmi_exception_early)(struct pt_regs *regs);
> >> +  int (*machine_check_early)(struct pt_regs *regs);
> >>  
> >>/* Called during machine check exception to retrive fixup address. */
> >>bool(*mce_check_early_recovery)(struct pt_regs *regs);
> >> diff --git a/arch/powerpc/kernel/exceptions-64s.S 
> >> b/arch/powerpc/kernel/exceptions-64s.S
> >> index f283958129f2..0038596b7906 100644
> >> --- a/arch/powerpc/kernel/exceptions-64s.S
> >> +++ b/arch/powerpc/kernel/exceptions-64s.S
> >> @@ -332,6 +332,9 @@ TRAMP_REAL_BEGIN(machine_check_pSeries)
> >>  machine_check_fwnmi:
> >>SET_SCRATCH0(r13)   /* save r13 */
> >>EXCEPTION_PROLOG_0(PACA_EXMC)
> >> +BEGIN_FTR_SECTION
> >> +  b   machine_check_pSeries_early
> >> +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
> >>  machine_check_pSeries_0:
> >>EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
> >>/*
> >> @@ -343,6 +346,45 @@ machine_check_pSeries_0:
> >>  
> >>  TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
> >>  
> >> +TRAMP_REAL_BEGIN(machine_check_pSeries_early)
> >> +BEGIN_FTR_SECTION
> >> +  EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
> >> +  mr  r10,r1  /* Save r1 */
> >> +  ld  r1,PACAMCEMERGSP(r13)   /* Use MC emergency stack */
> >> +  subir1,r1,INT_FRAME_SIZE/* alloc stack frame*/
> >> +  mfspr   r11,SPRN_SRR0   /* Save SRR0 */
> >> +  mfspr   r12,SPRN_SRR1   /* Save SRR1 */
> >> +  EXCEPTION_PROLOG_COMMON_1()
> >> +  EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
> >> +  EXCEPTION_PROLOG_COMMON_3(0x200)
> >> +  addir3,r1,STACK_FRAME_OVERHEAD
> >> +  BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI */
> >> +
> >> +  /* Move original SRR0 and SRR1 into the respective regs */
> >> +  ld  r9,_MSR(r1)
> >> +  mtspr   SPRN_SRR1,r9
> >> +  ld  r3,_NIP(r1)
> >> +  mtspr   SPRN_SRR0,r3
> >> +  ld  r9,_CTR(r1)
> >> +  mtctr   r9
> >> +  ld  r9,_XER(r1)
> >> +  mtxer   r9
> >> +  ld  r9,_LINK(r1)
> >> +  mtlrr9
> >> +  REST_GPR(0, r1)
> >> +  REST_8GPRS(2, r1)
> >> +  REST_GPR(10, r1)
> >> +  ld  r11,_CCR(r1)
> >> +  mtcrr11
> >> +  REST_GPR(11, r1)
> >> +  REST_2GPRS(12, r1)
> >> +  /* restore original r1. */
> >> +  ld  r1,GPR1(r1)
> >> +  SET_SCRATCH0(r13)   /* save r13 */
> >> +  EXCEPTION_PROLOG_0(PACA_EXMC)
> >> +  b   machine_check_pSeries_0
> >> +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
> >> +
> >>  EXC_COMMON_BEGIN(machine_check_common)
> >>/*
> >> * Machine check is different because we use a different
> >> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> >> index efdd16a79075..221271c96a57 100644
> >> --- a/arch/powerpc/kernel/mce.c

Re: [PATCH v6 5/8] powerpc/pseries: flush SLB contents on SLB MCE errors.

2018-08-01 Thread Mahesh Jagannath Salgaonkar
On 08/01/2018 11:28 AM, Nicholas Piggin wrote:
> On Wed, 04 Jul 2018 23:28:21 +0530
> Mahesh J Salgaonkar  wrote:
> 
>> From: Mahesh Salgaonkar 
>>
>> On pseries, as of today system crashes if we get a machine check
>> exceptions due to SLB errors. These are soft errors and can be fixed by
>> flushing the SLBs so the kernel can continue to function instead of
>> system crash. We do this in real mode before turning on MMU. Otherwise
>> we would run into nested machine checks. This patch now fetches the
>> rtas error log in real mode and flushes the SLBs on SLB errors.
>>
>> Signed-off-by: Mahesh Salgaonkar 
>> ---
>>  arch/powerpc/include/asm/book3s/64/mmu-hash.h |1 
>>  arch/powerpc/include/asm/machdep.h|1 
>>  arch/powerpc/kernel/exceptions-64s.S  |   42 +
>>  arch/powerpc/kernel/mce.c |   16 +++-
>>  arch/powerpc/mm/slb.c |6 +++
>>  arch/powerpc/platforms/pseries/pseries.h  |1 
>>  arch/powerpc/platforms/pseries/ras.c  |   51 
>> +
>>  arch/powerpc/platforms/pseries/setup.c|1 
>>  8 files changed, 116 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h 
>> b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
>> index 50ed64fba4ae..cc00a7088cf3 100644
>> --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
>> +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
>> @@ -487,6 +487,7 @@ extern void hpte_init_native(void);
>>  
>>  extern void slb_initialize(void);
>>  extern void slb_flush_and_rebolt(void);
>> +extern void slb_flush_and_rebolt_realmode(void);
>>  
>>  extern void slb_vmalloc_update(void);
>>  extern void slb_set_size(u16 size);
>> diff --git a/arch/powerpc/include/asm/machdep.h 
>> b/arch/powerpc/include/asm/machdep.h
>> index ffe7c71e1132..fe447e0d4140 100644
>> --- a/arch/powerpc/include/asm/machdep.h
>> +++ b/arch/powerpc/include/asm/machdep.h
>> @@ -108,6 +108,7 @@ struct machdep_calls {
>>  
>>  /* Early exception handlers called in realmode */
>>  int (*hmi_exception_early)(struct pt_regs *regs);
>> +int (*machine_check_early)(struct pt_regs *regs);
>>  
>>  /* Called during machine check exception to retrive fixup address. */
>>  bool(*mce_check_early_recovery)(struct pt_regs *regs);
>> diff --git a/arch/powerpc/kernel/exceptions-64s.S 
>> b/arch/powerpc/kernel/exceptions-64s.S
>> index f283958129f2..0038596b7906 100644
>> --- a/arch/powerpc/kernel/exceptions-64s.S
>> +++ b/arch/powerpc/kernel/exceptions-64s.S
>> @@ -332,6 +332,9 @@ TRAMP_REAL_BEGIN(machine_check_pSeries)
>>  machine_check_fwnmi:
>>  SET_SCRATCH0(r13)   /* save r13 */
>>  EXCEPTION_PROLOG_0(PACA_EXMC)
>> +BEGIN_FTR_SECTION
>> +b   machine_check_pSeries_early
>> +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
>>  machine_check_pSeries_0:
>>  EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
>>  /*
>> @@ -343,6 +346,45 @@ machine_check_pSeries_0:
>>  
>>  TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
>>  
>> +TRAMP_REAL_BEGIN(machine_check_pSeries_early)
>> +BEGIN_FTR_SECTION
>> +EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
>> +mr  r10,r1  /* Save r1 */
>> +ld  r1,PACAMCEMERGSP(r13)   /* Use MC emergency stack */
>> +subir1,r1,INT_FRAME_SIZE/* alloc stack frame*/
>> +mfspr   r11,SPRN_SRR0   /* Save SRR0 */
>> +mfspr   r12,SPRN_SRR1   /* Save SRR1 */
>> +EXCEPTION_PROLOG_COMMON_1()
>> +EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
>> +EXCEPTION_PROLOG_COMMON_3(0x200)
>> +addir3,r1,STACK_FRAME_OVERHEAD
>> +BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI */
>> +
>> +/* Move original SRR0 and SRR1 into the respective regs */
>> +ld  r9,_MSR(r1)
>> +mtspr   SPRN_SRR1,r9
>> +ld  r3,_NIP(r1)
>> +mtspr   SPRN_SRR0,r3
>> +ld  r9,_CTR(r1)
>> +mtctr   r9
>> +ld  r9,_XER(r1)
>> +mtxer   r9
>> +ld  r9,_LINK(r1)
>> +mtlrr9
>> +REST_GPR(0, r1)
>> +REST_8GPRS(2, r1)
>> +REST_GPR(10, r1)
>> +ld  r11,_CCR(r1)
>> +mtcrr11
>> +REST_GPR(11, r1)
>> +REST_2GPRS(12, r1)
>> +/* restore original r1. */
>> +ld  r1,GPR1(r1)
>> +SET_SCRATCH0(r13)   /* save r13 */
>> +EXCEPTION_PROLOG_0(PACA_EXMC)
>> +b   machine_check_pSeries_0
>> +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
>> +
>>  EXC_COMMON_BEGIN(machine_check_common)
>>  /*
>>   * Machine check is different because we use a different
>> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
>> index efdd16a79075..221271c96a57 100644
>> --- a/arch/powerpc/kernel/mce.c
>> +++ b/arch/powerpc/kernel/mce.c
>> @@ -488,9 +488,21 @@ long machine_check_early(struct pt_regs *regs)
>>  {
>>  long handled = 0;
>>  
>> -__this_cpu_inc(irq_stat.mce_exceptions);
>> +/*
>> + 

Re: [PATCH v6 5/8] powerpc/pseries: flush SLB contents on SLB MCE errors.

2018-08-01 Thread Nicholas Piggin
On Wed, 04 Jul 2018 23:28:21 +0530
Mahesh J Salgaonkar  wrote:

> From: Mahesh Salgaonkar 
> 
> On pseries, as of today system crashes if we get a machine check
> exceptions due to SLB errors. These are soft errors and can be fixed by
> flushing the SLBs so the kernel can continue to function instead of
> system crash. We do this in real mode before turning on MMU. Otherwise
> we would run into nested machine checks. This patch now fetches the
> rtas error log in real mode and flushes the SLBs on SLB errors.
> 
> Signed-off-by: Mahesh Salgaonkar 
> ---
>  arch/powerpc/include/asm/book3s/64/mmu-hash.h |1 
>  arch/powerpc/include/asm/machdep.h|1 
>  arch/powerpc/kernel/exceptions-64s.S  |   42 +
>  arch/powerpc/kernel/mce.c |   16 +++-
>  arch/powerpc/mm/slb.c |6 +++
>  arch/powerpc/platforms/pseries/pseries.h  |1 
>  arch/powerpc/platforms/pseries/ras.c  |   51 
> +
>  arch/powerpc/platforms/pseries/setup.c|1 
>  8 files changed, 116 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h 
> b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> index 50ed64fba4ae..cc00a7088cf3 100644
> --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> @@ -487,6 +487,7 @@ extern void hpte_init_native(void);
>  
>  extern void slb_initialize(void);
>  extern void slb_flush_and_rebolt(void);
> +extern void slb_flush_and_rebolt_realmode(void);
>  
>  extern void slb_vmalloc_update(void);
>  extern void slb_set_size(u16 size);
> diff --git a/arch/powerpc/include/asm/machdep.h 
> b/arch/powerpc/include/asm/machdep.h
> index ffe7c71e1132..fe447e0d4140 100644
> --- a/arch/powerpc/include/asm/machdep.h
> +++ b/arch/powerpc/include/asm/machdep.h
> @@ -108,6 +108,7 @@ struct machdep_calls {
>  
>   /* Early exception handlers called in realmode */
>   int (*hmi_exception_early)(struct pt_regs *regs);
> + int (*machine_check_early)(struct pt_regs *regs);
>  
>   /* Called during machine check exception to retrive fixup address. */
>   bool(*mce_check_early_recovery)(struct pt_regs *regs);
> diff --git a/arch/powerpc/kernel/exceptions-64s.S 
> b/arch/powerpc/kernel/exceptions-64s.S
> index f283958129f2..0038596b7906 100644
> --- a/arch/powerpc/kernel/exceptions-64s.S
> +++ b/arch/powerpc/kernel/exceptions-64s.S
> @@ -332,6 +332,9 @@ TRAMP_REAL_BEGIN(machine_check_pSeries)
>  machine_check_fwnmi:
>   SET_SCRATCH0(r13)   /* save r13 */
>   EXCEPTION_PROLOG_0(PACA_EXMC)
> +BEGIN_FTR_SECTION
> + b   machine_check_pSeries_early
> +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
>  machine_check_pSeries_0:
>   EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
>   /*
> @@ -343,6 +346,45 @@ machine_check_pSeries_0:
>  
>  TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
>  
> +TRAMP_REAL_BEGIN(machine_check_pSeries_early)
> +BEGIN_FTR_SECTION
> + EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
> + mr  r10,r1  /* Save r1 */
> + ld  r1,PACAMCEMERGSP(r13)   /* Use MC emergency stack */
> + subir1,r1,INT_FRAME_SIZE/* alloc stack frame*/
> + mfspr   r11,SPRN_SRR0   /* Save SRR0 */
> + mfspr   r12,SPRN_SRR1   /* Save SRR1 */
> + EXCEPTION_PROLOG_COMMON_1()
> + EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
> + EXCEPTION_PROLOG_COMMON_3(0x200)
> + addir3,r1,STACK_FRAME_OVERHEAD
> + BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI */
> +
> + /* Move original SRR0 and SRR1 into the respective regs */
> + ld  r9,_MSR(r1)
> + mtspr   SPRN_SRR1,r9
> + ld  r3,_NIP(r1)
> + mtspr   SPRN_SRR0,r3
> + ld  r9,_CTR(r1)
> + mtctr   r9
> + ld  r9,_XER(r1)
> + mtxer   r9
> + ld  r9,_LINK(r1)
> + mtlrr9
> + REST_GPR(0, r1)
> + REST_8GPRS(2, r1)
> + REST_GPR(10, r1)
> + ld  r11,_CCR(r1)
> + mtcrr11
> + REST_GPR(11, r1)
> + REST_2GPRS(12, r1)
> + /* restore original r1. */
> + ld  r1,GPR1(r1)
> + SET_SCRATCH0(r13)   /* save r13 */
> + EXCEPTION_PROLOG_0(PACA_EXMC)
> + b   machine_check_pSeries_0
> +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
> +
>  EXC_COMMON_BEGIN(machine_check_common)
>   /*
>* Machine check is different because we use a different
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index efdd16a79075..221271c96a57 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -488,9 +488,21 @@ long machine_check_early(struct pt_regs *regs)
>  {
>   long handled = 0;
>  
> - __this_cpu_inc(irq_stat.mce_exceptions);
> + /*
> +  * For pSeries we count mce when we go into virtual mode machine
> +  * check handler. Hence skip it. Also, We can't 

Re: [PATCH v6 5/8] powerpc/pseries: flush SLB contents on SLB MCE errors.

2018-07-10 Thread Michal Suchánek
Hello,

On Wed, 04 Jul 2018 23:28:21 +0530
"Mahesh J Salgaonkar"  wrote:

> From: Mahesh Salgaonkar 
> 
> On pseries, as of today system crashes if we get a machine check
> exceptions due to SLB errors. These are soft errors and can be fixed
> by flushing the SLBs so the kernel can continue to function instead of
> system crash. We do this in real mode before turning on MMU. Otherwise
> we would run into nested machine checks. This patch now fetches the
> rtas error log in real mode and flushes the SLBs on SLB errors.
> 
> Signed-off-by: Mahesh Salgaonkar 
> ---
>  arch/powerpc/include/asm/book3s/64/mmu-hash.h |1 
>  arch/powerpc/include/asm/machdep.h|1 
>  arch/powerpc/kernel/exceptions-64s.S  |   42
> + arch/powerpc/kernel/mce.c
> |   16 +++- arch/powerpc/mm/slb.c |6
> +++ arch/powerpc/platforms/pseries/pseries.h  |1 
>  arch/powerpc/platforms/pseries/ras.c  |   51
> +
> arch/powerpc/platforms/pseries/setup.c|1 8 files changed,
> 116 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index
> 50ed64fba4ae..cc00a7088cf3 100644 ---
> a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++
> b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -487,6 +487,7 @@
> extern void hpte_init_native(void); 
>  extern void slb_initialize(void);
>  extern void slb_flush_and_rebolt(void);
> +extern void slb_flush_and_rebolt_realmode(void);
>  
>  extern void slb_vmalloc_update(void);
>  extern void slb_set_size(u16 size);
> diff --git a/arch/powerpc/include/asm/machdep.h
> b/arch/powerpc/include/asm/machdep.h index ffe7c71e1132..fe447e0d4140
> 100644 --- a/arch/powerpc/include/asm/machdep.h
> +++ b/arch/powerpc/include/asm/machdep.h
> @@ -108,6 +108,7 @@ struct machdep_calls {
>  
>   /* Early exception handlers called in realmode */
>   int (*hmi_exception_early)(struct pt_regs
> *regs);
> + int (*machine_check_early)(struct pt_regs
> *regs); 
>   /* Called during machine check exception to retrive fixup
> address. */ bool  (*mce_check_early_recovery)(struct
> pt_regs *regs); diff --git a/arch/powerpc/kernel/exceptions-64s.S
> b/arch/powerpc/kernel/exceptions-64s.S index
> f283958129f2..0038596b7906 100644 ---
> a/arch/powerpc/kernel/exceptions-64s.S +++
> b/arch/powerpc/kernel/exceptions-64s.S @@ -332,6 +332,9 @@
> TRAMP_REAL_BEGIN(machine_check_pSeries) machine_check_fwnmi:
>   SET_SCRATCH0(r13)   /* save r13 */
>   EXCEPTION_PROLOG_0(PACA_EXMC)
> +BEGIN_FTR_SECTION
> + b   machine_check_pSeries_early
> +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
>  machine_check_pSeries_0:
>   EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
>   /*
> @@ -343,6 +346,45 @@ machine_check_pSeries_0:
>  
>  TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
>  
> +TRAMP_REAL_BEGIN(machine_check_pSeries_early)
> +BEGIN_FTR_SECTION
> + EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
> + mr  r10,r1  /* Save r1 */
> + ld  r1,PACAMCEMERGSP(r13)   /* Use MC emergency
> stack */
> + subir1,r1,INT_FRAME_SIZE/* alloc stack
> frame */
> + mfspr   r11,SPRN_SRR0   /* Save SRR0 */
> + mfspr   r12,SPRN_SRR1   /* Save SRR1 */
> + EXCEPTION_PROLOG_COMMON_1()
> + EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
> + EXCEPTION_PROLOG_COMMON_3(0x200)
> + addir3,r1,STACK_FRAME_OVERHEAD
> + BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI
> */ +
> + /* Move original SRR0 and SRR1 into the respective regs */
> + ld  r9,_MSR(r1)
> + mtspr   SPRN_SRR1,r9
> + ld  r3,_NIP(r1)
> + mtspr   SPRN_SRR0,r3
> + ld  r9,_CTR(r1)
> + mtctr   r9
> + ld  r9,_XER(r1)
> + mtxer   r9
> + ld  r9,_LINK(r1)
> + mtlrr9
> + REST_GPR(0, r1)
> + REST_8GPRS(2, r1)
> + REST_GPR(10, r1)
> + ld  r11,_CCR(r1)
> + mtcrr11
> + REST_GPR(11, r1)
> + REST_2GPRS(12, r1)
> + /* restore original r1. */
> + ld  r1,GPR1(r1)
> + SET_SCRATCH0(r13)   /* save r13 */
> + EXCEPTION_PROLOG_0(PACA_EXMC)
> + b   machine_check_pSeries_0
> +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
> +
>  EXC_COMMON_BEGIN(machine_check_common)
>   /*
>* Machine check is different because we use a different
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index efdd16a79075..221271c96a57 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -488,9 +488,21 @@ long machine_check_early(struct pt_regs *regs)
>  {
>   long handled = 0;
>  
> - __this_cpu_inc(irq_stat.mce_exceptions);
> + /*
> +  * For pSeries we count mce when we go into virtual mode
> machine
> +  * check handler. Hence skip it. Also, We can't access per
> cpu
> +  * 

[PATCH v6 5/8] powerpc/pseries: flush SLB contents on SLB MCE errors.

2018-07-04 Thread Mahesh J Salgaonkar
From: Mahesh Salgaonkar 

On pseries, as of today system crashes if we get a machine check
exceptions due to SLB errors. These are soft errors and can be fixed by
flushing the SLBs so the kernel can continue to function instead of
system crash. We do this in real mode before turning on MMU. Otherwise
we would run into nested machine checks. This patch now fetches the
rtas error log in real mode and flushes the SLBs on SLB errors.

Signed-off-by: Mahesh Salgaonkar 
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |1 
 arch/powerpc/include/asm/machdep.h|1 
 arch/powerpc/kernel/exceptions-64s.S  |   42 +
 arch/powerpc/kernel/mce.c |   16 +++-
 arch/powerpc/mm/slb.c |6 +++
 arch/powerpc/platforms/pseries/pseries.h  |1 
 arch/powerpc/platforms/pseries/ras.c  |   51 +
 arch/powerpc/platforms/pseries/setup.c|1 
 8 files changed, 116 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h 
b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 50ed64fba4ae..cc00a7088cf3 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -487,6 +487,7 @@ extern void hpte_init_native(void);
 
 extern void slb_initialize(void);
 extern void slb_flush_and_rebolt(void);
+extern void slb_flush_and_rebolt_realmode(void);
 
 extern void slb_vmalloc_update(void);
 extern void slb_set_size(u16 size);
diff --git a/arch/powerpc/include/asm/machdep.h 
b/arch/powerpc/include/asm/machdep.h
index ffe7c71e1132..fe447e0d4140 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -108,6 +108,7 @@ struct machdep_calls {
 
/* Early exception handlers called in realmode */
int (*hmi_exception_early)(struct pt_regs *regs);
+   int (*machine_check_early)(struct pt_regs *regs);
 
/* Called during machine check exception to retrive fixup address. */
bool(*mce_check_early_recovery)(struct pt_regs *regs);
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index f283958129f2..0038596b7906 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -332,6 +332,9 @@ TRAMP_REAL_BEGIN(machine_check_pSeries)
 machine_check_fwnmi:
SET_SCRATCH0(r13)   /* save r13 */
EXCEPTION_PROLOG_0(PACA_EXMC)
+BEGIN_FTR_SECTION
+   b   machine_check_pSeries_early
+END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
 machine_check_pSeries_0:
EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
/*
@@ -343,6 +346,45 @@ machine_check_pSeries_0:
 
 TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
 
+TRAMP_REAL_BEGIN(machine_check_pSeries_early)
+BEGIN_FTR_SECTION
+   EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
+   mr  r10,r1  /* Save r1 */
+   ld  r1,PACAMCEMERGSP(r13)   /* Use MC emergency stack */
+   subir1,r1,INT_FRAME_SIZE/* alloc stack frame*/
+   mfspr   r11,SPRN_SRR0   /* Save SRR0 */
+   mfspr   r12,SPRN_SRR1   /* Save SRR1 */
+   EXCEPTION_PROLOG_COMMON_1()
+   EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
+   EXCEPTION_PROLOG_COMMON_3(0x200)
+   addir3,r1,STACK_FRAME_OVERHEAD
+   BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI */
+
+   /* Move original SRR0 and SRR1 into the respective regs */
+   ld  r9,_MSR(r1)
+   mtspr   SPRN_SRR1,r9
+   ld  r3,_NIP(r1)
+   mtspr   SPRN_SRR0,r3
+   ld  r9,_CTR(r1)
+   mtctr   r9
+   ld  r9,_XER(r1)
+   mtxer   r9
+   ld  r9,_LINK(r1)
+   mtlrr9
+   REST_GPR(0, r1)
+   REST_8GPRS(2, r1)
+   REST_GPR(10, r1)
+   ld  r11,_CCR(r1)
+   mtcrr11
+   REST_GPR(11, r1)
+   REST_2GPRS(12, r1)
+   /* restore original r1. */
+   ld  r1,GPR1(r1)
+   SET_SCRATCH0(r13)   /* save r13 */
+   EXCEPTION_PROLOG_0(PACA_EXMC)
+   b   machine_check_pSeries_0
+END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
+
 EXC_COMMON_BEGIN(machine_check_common)
/*
 * Machine check is different because we use a different
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index efdd16a79075..221271c96a57 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -488,9 +488,21 @@ long machine_check_early(struct pt_regs *regs)
 {
long handled = 0;
 
-   __this_cpu_inc(irq_stat.mce_exceptions);
+   /*
+* For pSeries we count mce when we go into virtual mode machine
+* check handler. Hence skip it. Also, We can't access per cpu
+* variables in real mode for LPAR.
+*/
+   if (early_cpu_has_feature(CPU_FTR_HVMODE))
+   __this_cpu_inc(irq_stat.mce_exceptions);
 
-   if (cur_cpu_spec