From: Aravind Gopalakrishnan <[email protected]>

For upcoming processors with Scalable MCA feature, we need to check the
"succor" CPUID bit and the TCC bit in the MCx_STATUS register in order
to grade an MCE's severity.

Signed-off-by: Aravind Gopalakrishnan <[email protected]>
Cc: Aravind Gopalakrishnan <[email protected]>
Cc: Tony Luck <[email protected]>
Cc: linux-edac <[email protected]>
Cc: x86-ml <[email protected]>
Link: 
http://lkml.kernel.org/r/[email protected]
Signed-off-by: Yazen Ghannam <[email protected]>
[ Simplify code flow, shorten comments. ]
Signed-off-by: Borislav Petkov <[email protected]>
---
 arch/x86/kernel/cpu/mcheck/mce-severity.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c 
b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 5119766d9889..631356c8cca4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -204,6 +204,33 @@ static int error_context(struct mce *m)
        return IN_KERNEL;
 }
 
+static int mce_severity_amd_smca(struct mce *m, int err_ctx)
+{
+       u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
+       u32 low, high;
+
+       /*
+        * We need to look at the following bits:
+        * - "succor" bit (data poisoning support), and
+        * - TCC bit (Task Context Corrupt)
+        * in MCi_STATUS to determine error severity.
+        */
+       if (!mce_flags.succor)
+               return MCE_PANIC_SEVERITY;
+
+       if (rdmsr_safe(addr, &low, &high))
+               return MCE_PANIC_SEVERITY;
+
+       /* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
+       if ((low & MCI_CONFIG_MCAX) &&
+           (m->status & MCI_STATUS_TCC) &&
+           (err_ctx == IN_KERNEL))
+               return MCE_PANIC_SEVERITY;
+
+        /* ...otherwise invoke hwpoison handler. */
+       return MCE_AR_SEVERITY;
+}
+
 /*
  * See AMD Error Scope Hierarchy table in a newer BKDG. For example
  * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
@@ -225,6 +252,9 @@ static int mce_severity_amd(struct mce *m, int tolerant, 
char **msg, bool is_exc
                 * to at least kill process to prolong system operation.
                 */
                if (mce_flags.overflow_recov) {
+                       if (mce_flags.smca)
+                               return mce_severity_amd_smca(m, ctx);
+
                        /* software can try to contain */
                        if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == 
IN_KERNEL))
                                return MCE_PANIC_SEVERITY;
-- 
2.7.3

Reply via email to