Hello,

On Tue, 07 Aug 2018 19:47:14 +0530
"Mahesh J Salgaonkar" <mah...@linux.vnet.ibm.com> wrote:

> From: Mahesh Salgaonkar <mah...@linux.vnet.ibm.com>
> 
> On pseries, as of today system crashes if we get a machine check
> exceptions due to SLB errors. These are soft errors and can be fixed
> by flushing the SLBs so the kernel can continue to function instead of
> system crash. We do this in real mode before turning on MMU. Otherwise
> we would run into nested machine checks. This patch now fetches the
> rtas error log in real mode and flushes the SLBs on SLB errors.
> 
> Signed-off-by: Mahesh Salgaonkar <mah...@linux.vnet.ibm.com>
> Signed-off-by: Michal Suchanek <msucha...@suse.com>
> ---
> 
> Changes in V7:
> - Fold Michal's patch into this patch.
> - Handle MSR_RI=0 and evil context case in MC handler.
> ---
>  arch/powerpc/include/asm/book3s/64/mmu-hash.h |    1 
>  arch/powerpc/include/asm/machdep.h            |    1 
>  arch/powerpc/kernel/exceptions-64s.S          |  112
> +++++++++++++++++++++++++
> arch/powerpc/kernel/mce.c                     |   15 +++
> arch/powerpc/mm/slb.c                         |    6 +
> arch/powerpc/platforms/powernv/setup.c        |   11 ++
> arch/powerpc/platforms/pseries/pseries.h      |    1
> arch/powerpc/platforms/pseries/ras.c          |   51 +++++++++++
> arch/powerpc/platforms/pseries/setup.c        |    1 9 files changed,
> 195 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index
> 50ed64fba4ae..cc00a7088cf3 100644 ---
> a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++
> b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -487,6 +487,7 @@
> extern void hpte_init_native(void); 
>  extern void slb_initialize(void);
>  extern void slb_flush_and_rebolt(void);
> +extern void slb_flush_and_rebolt_realmode(void);
>  
>  extern void slb_vmalloc_update(void);
>  extern void slb_set_size(u16 size);
> diff --git a/arch/powerpc/include/asm/machdep.h
> b/arch/powerpc/include/asm/machdep.h index a47de82fb8e2..b4831f1338db
> 100644 --- a/arch/powerpc/include/asm/machdep.h
> +++ b/arch/powerpc/include/asm/machdep.h
> @@ -108,6 +108,7 @@ struct machdep_calls {
>  
>       /* Early exception handlers called in realmode */
>       int             (*hmi_exception_early)(struct pt_regs
> *regs);
> +     long            (*machine_check_early)(struct pt_regs
> *regs); 
>       /* Called during machine check exception to retrive fixup
> address. */ bool              (*mce_check_early_recovery)(struct
> pt_regs *regs); diff --git a/arch/powerpc/kernel/exceptions-64s.S
> b/arch/powerpc/kernel/exceptions-64s.S index
> 285c6465324a..cb06f219570a 100644 ---
> a/arch/powerpc/kernel/exceptions-64s.S +++
> b/arch/powerpc/kernel/exceptions-64s.S @@ -332,6 +332,9 @@
> TRAMP_REAL_BEGIN(machine_check_pSeries) machine_check_fwnmi:
>       SET_SCRATCH0(r13)               /* save r13 */
>       EXCEPTION_PROLOG_0(PACA_EXMC)
> +BEGIN_FTR_SECTION
> +     b       machine_check_pSeries_early
> +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
>  machine_check_pSeries_0:
>       EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
>       /*
> @@ -343,6 +346,90 @@ machine_check_pSeries_0:
>  
>  TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
>  
> +TRAMP_REAL_BEGIN(machine_check_pSeries_early)
> +BEGIN_FTR_SECTION
> +     EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
> +     mr      r10,r1                  /* Save r1 */
> +     ld      r1,PACAMCEMERGSP(r13)   /* Use MC emergency
> stack */
> +     subi    r1,r1,INT_FRAME_SIZE    /* alloc stack
> frame         */
> +     mfspr   r11,SPRN_SRR0           /* Save SRR0 */
> +     mfspr   r12,SPRN_SRR1           /* Save SRR1 */
> +     EXCEPTION_PROLOG_COMMON_1()
> +     EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
> +     EXCEPTION_PROLOG_COMMON_3(0x200)
> +     addi    r3,r1,STACK_FRAME_OVERHEAD
> +     BRANCH_LINK_TO_FAR(machine_check_early) /* Function call ABI
> */
> +     ld      r12,_MSR(r1)
> +     andi.   r11,r12,MSR_PR          /* See if coming
> from user. */
> +     bne     2f                      /* continue in V mode
> if we are. */ +
> +     /*
> +      * At this point we are not sure about what context we come
> from.
> +      * We may be in the middle of swithing stack. r1 may not be
> valid.
> +      * Hence stay on emergency stack, call
> machine_check_exception and
> +      * return from the interrupt.
> +      * But before that, check if this is an un-recoverable
> exception.
> +      * If yes, then stay on emergency stack and panic.
> +      */
> +     andi.   r11,r12,MSR_RI
> +     bne     1f
> +
> +     /*
> +      * Check if we have successfully handled/recovered from
> error, if not
> +      * then stay on emergency stack and panic.
> +      */
> +     cmpdi   r3,0            /* see if we handled MCE
> successfully */
> +     bne     1f              /* if handled then return from
> interrupt */ +
> +     LOAD_HANDLER(r10,unrecover_mce)
> +     mtspr   SPRN_SRR0,r10
> +     ld      r10,PACAKMSR(r13)
> +     /*
> +      * We are going down. But there are chances that we might
> get hit by
> +      * another MCE during panic path and we may run into
> unstable state
> +      * with no way out. Hence, turn ME bit off while going down,
> so that
> +      * when another MCE is hit during panic path, hypervisor will
> +      * power cycle the lpar, instead of getting into MCE loop.
> +      */
> +     li      r3,MSR_ME
> +     andc    r10,r10,r3              /* Turn off MSR_ME */
> +     mtspr   SPRN_SRR1,r10
> +     RFI_TO_KERNEL
> +     b       .
> +
> +     /* Stay on emergency stack and return from interrupt. */
> +1:   LOAD_HANDLER(r10,mce_return)
> +     mtspr   SPRN_SRR0,r10
> +     ld      r10,PACAKMSR(r13)
> +     mtspr   SPRN_SRR1,r10
> +     RFI_TO_KERNEL
> +     b       .

I think that the logic should be inverted here. That is we should check
for unrecoverable and unhandled exceptions and jump to unrecov_mce if
found, fallthrough to mce_return otherwise.

Thanks

Michal


> +
> +     /* Move original SRR0 and SRR1 into the respective regs */
> +2:   ld      r9,_MSR(r1)
> +     mtspr   SPRN_SRR1,r9
> +     ld      r3,_NIP(r1)
> +     mtspr   SPRN_SRR0,r3
> +     ld      r9,_CTR(r1)
> +     mtctr   r9
> +     ld      r9,_XER(r1)
> +     mtxer   r9
> +     ld      r9,_LINK(r1)
> +     mtlr    r9
> +     REST_GPR(0, r1)
> +     REST_8GPRS(2, r1)
> +     REST_GPR(10, r1)
> +     ld      r11,_CCR(r1)
> +     mtcr    r11
> +     REST_GPR(11, r1)
> +     REST_2GPRS(12, r1)
> +     /* restore original r1. */
> +     ld      r1,GPR1(r1)
> +     SET_SCRATCH0(r13)               /* save r13 */
> +     EXCEPTION_PROLOG_0(PACA_EXMC)
> +     b       machine_check_pSeries_0
> +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
> +
>  EXC_COMMON_BEGIN(machine_check_common)
>       /*
>        * Machine check is different because we use a different
> @@ -536,6 +623,31 @@ EXC_COMMON_BEGIN(unrecover_mce)
>       bl      unrecoverable_exception
>       b       1b
>  
> +EXC_COMMON_BEGIN(mce_return)
> +     /* Invoke machine_check_exception to print MCE event and
> return. */
> +     addi    r3,r1,STACK_FRAME_OVERHEAD
> +     bl      machine_check_exception
> +     ld      r9,_MSR(r1)
> +     mtspr   SPRN_SRR1,r9
> +     ld      r3,_NIP(r1)
> +     mtspr   SPRN_SRR0,r3
> +     ld      r9,_CTR(r1)
> +     mtctr   r9
> +     ld      r9,_XER(r1)
> +     mtxer   r9
> +     ld      r9,_LINK(r1)
> +     mtlr    r9
> +     REST_GPR(0, r1)
> +     REST_8GPRS(2, r1)
> +     REST_GPR(10, r1)
> +     ld      r11,_CCR(r1)
> +     mtcr    r11
> +     REST_GPR(11, r1)
> +     REST_2GPRS(12, r1)
> +     /* restore original r1. */
> +     ld      r1,GPR1(r1)
> +     RFI_TO_KERNEL
> +     b       .
>  
>  EXC_REAL(data_access, 0x300, 0x80)
>  EXC_VIRT(data_access, 0x4300, 0x80, 0x300)
> diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
> index efdd16a79075..ae17d8aa60c4 100644
> --- a/arch/powerpc/kernel/mce.c
> +++ b/arch/powerpc/kernel/mce.c
> @@ -488,10 +488,19 @@ long machine_check_early(struct pt_regs *regs)
>  {
>       long handled = 0;
>  
> -     __this_cpu_inc(irq_stat.mce_exceptions);
> +     /*
> +      * For pSeries we count mce when we go into virtual mode
> machine
> +      * check handler. Hence skip it. Also, We can't access per
> cpu
> +      * variables in real mode for LPAR.
> +      */
> +     if (early_cpu_has_feature(CPU_FTR_HVMODE))
> +             __this_cpu_inc(irq_stat.mce_exceptions);
>  
> -     if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
> -             handled = cur_cpu_spec->machine_check_early(regs);
> +     /*
> +      * See if platform is capable of handling machine check.
> +      */
> +     if (ppc_md.machine_check_early)
> +             handled = ppc_md.machine_check_early(regs);
>       return handled;
>  }
>  
> diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
> index cb796724a6fc..e89f675f1b5e 100644
> --- a/arch/powerpc/mm/slb.c
> +++ b/arch/powerpc/mm/slb.c
> @@ -145,6 +145,12 @@ void slb_flush_and_rebolt(void)
>       get_paca()->slb_cache_ptr = 0;
>  }
>  
> +void slb_flush_and_rebolt_realmode(void)
> +{
> +     __slb_flush_and_rebolt();
> +     get_paca()->slb_cache_ptr = 0;
> +}
> +
>  void slb_vmalloc_update(void)
>  {
>       unsigned long vflags;
> diff --git a/arch/powerpc/platforms/powernv/setup.c
> b/arch/powerpc/platforms/powernv/setup.c index
> f96df0a25d05..b74c93bc2e55 100644 ---
> a/arch/powerpc/platforms/powernv/setup.c +++
> b/arch/powerpc/platforms/powernv/setup.c @@ -431,6 +431,16 @@ static
> unsigned long pnv_get_proc_freq(unsigned int cpu) return ret_freq;
>  }
>  
> +static long pnv_machine_check_early(struct pt_regs *regs)
> +{
> +     long handled = 0;
> +
> +     if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
> +             handled = cur_cpu_spec->machine_check_early(regs);
> +
> +     return handled;
> +}
> +
>  define_machine(powernv) {
>       .name                   = "PowerNV",
>       .probe                  = pnv_probe,
> @@ -442,6 +452,7 @@ define_machine(powernv) {
>       .machine_shutdown       = pnv_shutdown,
>       .power_save             = NULL,
>       .calibrate_decr         = generic_calibrate_decr,
> +     .machine_check_early    = pnv_machine_check_early,
>  #ifdef CONFIG_KEXEC_CORE
>       .kexec_cpu_down         = pnv_kexec_cpu_down,
>  #endif
> diff --git a/arch/powerpc/platforms/pseries/pseries.h
> b/arch/powerpc/platforms/pseries/pseries.h index
> 60db2ee511fb..ec2a5f61d4a4 100644 ---
> a/arch/powerpc/platforms/pseries/pseries.h +++
> b/arch/powerpc/platforms/pseries/pseries.h @@ -24,6 +24,7 @@ struct
> pt_regs; 
>  extern int pSeries_system_reset_exception(struct pt_regs *regs);
>  extern int pSeries_machine_check_exception(struct pt_regs *regs);
> +extern long pSeries_machine_check_realmode(struct pt_regs *regs);
>  
>  #ifdef CONFIG_SMP
>  extern void smp_init_pseries(void);
> diff --git a/arch/powerpc/platforms/pseries/ras.c
> b/arch/powerpc/platforms/pseries/ras.c index
> 851ce326874a..e4420f7c8fda 100644 ---
> a/arch/powerpc/platforms/pseries/ras.c +++
> b/arch/powerpc/platforms/pseries/ras.c @@ -427,6 +427,35 @@ int
> pSeries_system_reset_exception(struct pt_regs *regs) return 0; /*
> need to perform reset */ }
>  
> +static int mce_handle_error(struct rtas_error_log *errp)
> +{
> +     struct pseries_errorlog *pseries_log;
> +     struct pseries_mc_errorlog *mce_log;
> +     int disposition = rtas_error_disposition(errp);
> +     uint8_t error_type;
> +
> +     if (!rtas_error_extended(errp))
> +             goto out;
> +
> +     pseries_log = get_pseries_errorlog(errp,
> PSERIES_ELOG_SECT_ID_MCE);
> +     if (pseries_log == NULL)
> +             goto out;
> +
> +     mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
> +     error_type = rtas_mc_error_type(mce_log);
> +
> +     if ((disposition == RTAS_DISP_NOT_RECOVERED) &&
> +                     (error_type == PSERIES_MC_ERROR_TYPE_SLB)) {
> +             /* Store the old slb content someplace. */
> +             slb_flush_and_rebolt_realmode();
> +             disposition = RTAS_DISP_FULLY_RECOVERED;
> +             rtas_set_disposition_recovered(errp);
> +     }
> +
> +out:
> +     return disposition;
> +}
> +
>  /*
>   * Process MCE rtas errlog event.
>   */
> @@ -503,11 +532,31 @@ int pSeries_machine_check_exception(struct
> pt_regs *regs) struct rtas_error_log *errp;
>  
>       if (fwnmi_active) {
> -             errp = fwnmi_get_errinfo(regs);
>               fwnmi_release_errinfo();
> +             errp = fwnmi_get_errlog();
>               if (errp && recover_mce(regs, errp))
>                       return 1;
>       }
>  
>       return 0;
>  }
> +
> +long pSeries_machine_check_realmode(struct pt_regs *regs)
> +{
> +     struct rtas_error_log *errp;
> +     int disposition;
> +
> +     if (fwnmi_active) {
> +             errp = fwnmi_get_errinfo(regs);
> +             /*
> +              * Call to fwnmi_release_errinfo() in real mode
> causes kernel
> +              * to panic. Hence we will call it as soon as we go
> into
> +              * virtual mode.
> +              */
> +             disposition = mce_handle_error(errp);
> +             if (disposition == RTAS_DISP_FULLY_RECOVERED)
> +                     return 1;
> +     }
> +
> +     return 0;
> +}
> diff --git a/arch/powerpc/platforms/pseries/setup.c
> b/arch/powerpc/platforms/pseries/setup.c index
> b42087cd8c6b..7a9421d089d8 100644 ---
> a/arch/powerpc/platforms/pseries/setup.c +++
> b/arch/powerpc/platforms/pseries/setup.c @@ -1000,6 +1000,7 @@
> define_machine(pseries) { .calibrate_decr             =
> generic_calibrate_decr, .progress             = rtas_progress,
>       .system_reset_exception = pSeries_system_reset_exception,
> +     .machine_check_early    = pSeries_machine_check_realmode,
>       .machine_check_exception = pSeries_machine_check_exception,
>  #ifdef CONFIG_KEXEC_CORE
>       .machine_kexec          = pSeries_machine_kexec,
> 
> 

Reply via email to