[GIT PULL] RAS changes for v4.7

Ingo Molnar Mon, 16 May 2016 10:05:11 -0700

Linus,

Please pull the latest ras-core-for-linus git tree from:


   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git ras-core-for-linus

   # HEAD: 754a92305980b1fecffe033dd3fdc49c37f8e4b0 x86/RAS: Add SMCA support 
to AMD Error Injector

Main changes in this cycle were:

 - AMD MCE/RAS handling updates (Yazen Ghannam, Aravind Gopalakrishnan)
 - Cleanups (Borislav Petkov)
 - logging fix (Tony Luck)

 Thanks,

        Ingo

------------------>
Aravind Gopalakrishnan (3):
      x86/mce: Log MCEs after a warm rest on AMD, Fam17h and later
      x86/mce: Grade uncorrected errors for SMCA-enabled systems
      x86/mce: Carve out writes to MCx_STATUS and MCx_CTL

Borislav Petkov (2):
      x86/RAS: Rename AMD MCE injector config item
      x86/mce/AMD: Save an indentation level in prepare_threshold_block()

Davidlohr Bueso (1):
      x86/mce: Remove explicit smp_rmb() when starting CPUs sync

Tony Luck (1):
      x86/mce: Look in genpool instead of mcelog for pending error records

Yazen Ghannam (9):
      x86/mce: Define vendor-specific MSR accessors
      x86/mce: Detect and use SMCA-specific msr_ops
      x86/mce: Detect local MCEs properly
      x86/mce/AMD: Log Deferred Errors using SMCA MCA_DE{STAT,ADDR} registers
      x86/mce/AMD: Disable LogDeferredInMcaStat for SMCA systems
      x86/cpu: Add detection of AMD RAS Capabilities
      x86/mce: Update AMD mcheck init to use cpu_has() facilities
      EDAC, mce_amd: Detect SMCA using X86_FEATURE_SMCA
      x86/RAS: Add SMCA support to AMD Error Injector


 arch/x86/include/asm/cpufeature.h         |   1 +
 arch/x86/include/asm/cpufeatures.h        |   7 +-
 arch/x86/include/asm/mce.h                |  19 ++++
 arch/x86/kernel/cpu/common.c              |  10 +-
 arch/x86/kernel/cpu/mcheck/mce-genpool.c  |  46 +++++++++
 arch/x86/kernel/cpu/mcheck/mce-internal.h |  15 +++
 arch/x86/kernel/cpu/mcheck/mce-severity.c |  30 ++++++
 arch/x86/kernel/cpu/mcheck/mce.c          | 160 +++++++++++++++++++++---------
 arch/x86/kernel/cpu/mcheck/mce_amd.c      |  94 ++++++++++++------
 arch/x86/ras/Kconfig                      |   2 +-
 arch/x86/ras/Makefile                     |   2 +-
 arch/x86/ras/mce_amd_inj.c                |  31 ++++--
 drivers/edac/mce_amd.c                    |   9 +-
 13 files changed, 331 insertions(+), 95 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h 
b/arch/x86/include/asm/cpufeature.h
index 3636ec06c887..53ac9bbf2064 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -27,6 +27,7 @@ enum cpuid_leafs
        CPUID_6_EAX,
        CPUID_8000_000A_EDX,
        CPUID_7_ECX,
+       CPUID_8000_0007_EBX,
 };
 
 #ifdef CONFIG_X86_FEATURE_NAMES
diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index 8f9afefd2dc5..d4e5018e9a44 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -12,7 +12,7 @@
 /*
  * Defines x86 CPU feature bits
  */
-#define NCAPINTS       17      /* N 32-bit words worth of info */
+#define NCAPINTS       18      /* N 32-bit words worth of info */
 #define NBUGINTS       1       /* N 32-bit bug flags */
 
 /*
@@ -280,6 +280,11 @@
 #define X86_FEATURE_PKU                (16*32+ 3) /* Protection Keys for 
Userspace */
 #define X86_FEATURE_OSPKE      (16*32+ 4) /* OS Protection Keys Enable */
 
+/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
+#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support 
*/
+#define X86_FEATURE_SUCCOR     (17*32+1) /* Uncorrectable error containment 
and recovery */
+#define X86_FEATURE_SMCA       (17*32+3) /* Scalable MCA */
+
 /*
  * BUG word(s)
  */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 92b6f651fa4f..8bf766ef0e18 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -104,13 +104,23 @@
 #define MCE_LOG_SIGNATURE      "MACHINECHECK"
 
 /* AMD Scalable MCA */
+#define MSR_AMD64_SMCA_MC0_CTL         0xc0002000
+#define MSR_AMD64_SMCA_MC0_STATUS      0xc0002001
+#define MSR_AMD64_SMCA_MC0_ADDR                0xc0002002
 #define MSR_AMD64_SMCA_MC0_MISC0       0xc0002003
 #define MSR_AMD64_SMCA_MC0_CONFIG      0xc0002004
 #define MSR_AMD64_SMCA_MC0_IPID                0xc0002005
+#define MSR_AMD64_SMCA_MC0_DESTAT      0xc0002008
+#define MSR_AMD64_SMCA_MC0_DEADDR      0xc0002009
 #define MSR_AMD64_SMCA_MC0_MISC1       0xc000200a
+#define MSR_AMD64_SMCA_MCx_CTL(x)      (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_STATUS(x)   (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_ADDR(x)     (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_MISC(x)     (MSR_AMD64_SMCA_MC0_MISC0 + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_CONFIG(x)   (MSR_AMD64_SMCA_MC0_CONFIG + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_IPID(x)     (MSR_AMD64_SMCA_MC0_IPID + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_DESTAT(x)   (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_DEADDR(x)   (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
 #define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + 
(0x10*(x)))
 
 /*
@@ -168,9 +178,18 @@ struct mce_vendor_flags {
 
              __reserved_0      : 61;
 };
+
+struct mca_msr_regs {
+       u32 (*ctl)      (int bank);
+       u32 (*status)   (int bank);
+       u32 (*addr)     (int bank);
+       u32 (*misc)     (int bank);
+};
+
 extern struct mce_vendor_flags mce_flags;
 
 extern struct mca_config mca_cfg;
+extern struct mca_msr_regs msr_ops;
 extern void mce_register_decode_chain(struct notifier_block *nb);
 extern void mce_unregister_decode_chain(struct notifier_block *nb);
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8394b3d1f94f..dbc6f066e231 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -717,6 +717,13 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
                }
        }
 
+       if (c->extended_cpuid_level >= 0x80000007) {
+               cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
+
+               c->x86_capability[CPUID_8000_0007_EBX] = ebx;
+               c->x86_power = edx;
+       }
+
        if (c->extended_cpuid_level >= 0x80000008) {
                cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
 
@@ -729,9 +736,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
                c->x86_phys_bits = 36;
 #endif
 
-       if (c->extended_cpuid_level >= 0x80000007)
-               c->x86_power = cpuid_edx(0x80000007);
-
        if (c->extended_cpuid_level >= 0x8000000a)
                c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a);
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c 
b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
index 2658e2af74ec..93d824ec3120 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
@@ -26,6 +26,52 @@ static struct gen_pool *mce_evt_pool;
 static LLIST_HEAD(mce_event_llist);
 static char gen_pool_buf[MCE_POOLSZ];
 
+/*
+ * Compare the record "t" with each of the records on list "l" to see if
+ * an equivalent one is present in the list.
+ */
+static bool is_duplicate_mce_record(struct mce_evt_llist *t, struct 
mce_evt_llist *l)
+{
+       struct mce_evt_llist *node;
+       struct mce *m1, *m2;
+
+       m1 = &t->mce;
+
+       llist_for_each_entry(node, &l->llnode, llnode) {
+               m2 = &node->mce;
+
+               if (!mce_cmp(m1, m2))
+                       return true;
+       }
+       return false;
+}
+
+/*
+ * The system has panicked - we'd like to peruse the list of MCE records
+ * that have been queued, but not seen by anyone yet.  The list is in
+ * reverse time order, so we need to reverse it. While doing that we can
+ * also drop duplicate records (these were logged because some banks are
+ * shared between cores or by all threads on a socket).
+ */
+struct llist_node *mce_gen_pool_prepare_records(void)
+{
+       struct llist_node *head;
+       LLIST_HEAD(new_head);
+       struct mce_evt_llist *node, *t;
+
+       head = llist_del_all(&mce_event_llist);
+       if (!head)
+               return NULL;
+
+       /* squeeze out duplicates while reversing order */
+       llist_for_each_entry_safe(node, t, head, llnode) {
+               if (!is_duplicate_mce_record(node, t))
+                       llist_add(&node->llnode, &new_head);
+       }
+
+       return new_head.first;
+}
+
 void mce_gen_pool_process(void)
 {
        struct llist_node *head;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h 
b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 547720efd923..cd74a3f00aea 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -35,6 +35,7 @@ void mce_gen_pool_process(void);
 bool mce_gen_pool_empty(void);
 int mce_gen_pool_add(struct mce *mce);
 int mce_gen_pool_init(void);
+struct llist_node *mce_gen_pool_prepare_records(void);
 
 extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool 
is_excp);
 struct dentry *mce_get_debugfs_dir(void);
@@ -81,3 +82,17 @@ static inline int apei_clear_mce(u64 record_id)
 #endif
 
 void mce_inject_log(struct mce *m);
+
+/*
+ * We consider records to be equivalent if bank+status+addr+misc all match.
+ * This is only used when the system is going down because of a fatal error
+ * to avoid cluttering the console log with essentially repeated information.
+ * In normal processing all errors seen are logged.
+ */
+static inline bool mce_cmp(struct mce *m1, struct mce *m2)
+{
+       return m1->bank != m2->bank ||
+               m1->status != m2->status ||
+               m1->addr != m2->addr ||
+               m1->misc != m2->misc;
+}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c 
b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 5119766d9889..631356c8cca4 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -204,6 +204,33 @@ static int error_context(struct mce *m)
        return IN_KERNEL;
 }
 
+static int mce_severity_amd_smca(struct mce *m, int err_ctx)
+{
+       u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
+       u32 low, high;
+
+       /*
+        * We need to look at the following bits:
+        * - "succor" bit (data poisoning support), and
+        * - TCC bit (Task Context Corrupt)
+        * in MCi_STATUS to determine error severity.
+        */
+       if (!mce_flags.succor)
+               return MCE_PANIC_SEVERITY;
+
+       if (rdmsr_safe(addr, &low, &high))
+               return MCE_PANIC_SEVERITY;
+
+       /* TCC (Task context corrupt). If set and if IN_KERNEL, panic. */
+       if ((low & MCI_CONFIG_MCAX) &&
+           (m->status & MCI_STATUS_TCC) &&
+           (err_ctx == IN_KERNEL))
+               return MCE_PANIC_SEVERITY;
+
+        /* ...otherwise invoke hwpoison handler. */
+       return MCE_AR_SEVERITY;
+}
+
 /*
  * See AMD Error Scope Hierarchy table in a newer BKDG. For example
  * 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
@@ -225,6 +252,9 @@ static int mce_severity_amd(struct mce *m, int tolerant, 
char **msg, bool is_exc
                 * to at least kill process to prolong system operation.
                 */
                if (mce_flags.overflow_recov) {
+                       if (mce_flags.smca)
+                               return mce_severity_amd_smca(m, ctx);
+
                        /* software can try to contain */
                        if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == 
IN_KERNEL))
                                return MCE_PANIC_SEVERITY;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index f0c921b03e42..92e5e37d97bf 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -161,7 +161,6 @@ void mce_log(struct mce *mce)
        if (!mce_gen_pool_add(mce))
                irq_work_queue(&mce_irq_work);
 
-       mce->finished = 0;
        wmb();
        for (;;) {
                entry = mce_log_get_idx_check(mcelog.next);
@@ -194,7 +193,6 @@ void mce_log(struct mce *mce)
        mcelog.entry[entry].finished = 1;
        wmb();
 
-       mce->finished = 1;
        set_bit(0, &mce_need_notify);
 }
 
@@ -224,6 +222,53 @@ void mce_unregister_decode_chain(struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
 
+static inline u32 ctl_reg(int bank)
+{
+       return MSR_IA32_MCx_CTL(bank);
+}
+
+static inline u32 status_reg(int bank)
+{
+       return MSR_IA32_MCx_STATUS(bank);
+}
+
+static inline u32 addr_reg(int bank)
+{
+       return MSR_IA32_MCx_ADDR(bank);
+}
+
+static inline u32 misc_reg(int bank)
+{
+       return MSR_IA32_MCx_MISC(bank);
+}
+
+static inline u32 smca_ctl_reg(int bank)
+{
+       return MSR_AMD64_SMCA_MCx_CTL(bank);
+}
+
+static inline u32 smca_status_reg(int bank)
+{
+       return MSR_AMD64_SMCA_MCx_STATUS(bank);
+}
+
+static inline u32 smca_addr_reg(int bank)
+{
+       return MSR_AMD64_SMCA_MCx_ADDR(bank);
+}
+
+static inline u32 smca_misc_reg(int bank)
+{
+       return MSR_AMD64_SMCA_MCx_MISC(bank);
+}
+
+struct mca_msr_regs msr_ops = {
+       .ctl    = ctl_reg,
+       .status = status_reg,
+       .addr   = addr_reg,
+       .misc   = misc_reg
+};
+
 static void print_mce(struct mce *m)
 {
        int ret = 0;
@@ -290,7 +335,9 @@ static void wait_for_panic(void)
 
 static void mce_panic(const char *msg, struct mce *final, char *exp)
 {
-       int i, apei_err = 0;
+       int apei_err = 0;
+       struct llist_node *pending;
+       struct mce_evt_llist *l;
 
        if (!fake_panic) {
                /*
@@ -307,11 +354,10 @@ static void mce_panic(const char *msg, struct mce *final, 
char *exp)
                if (atomic_inc_return(&mce_fake_panicked) > 1)
                        return;
        }
+       pending = mce_gen_pool_prepare_records();
        /* First print corrected ones that are still unlogged */
-       for (i = 0; i < MCE_LOG_LEN; i++) {
-               struct mce *m = &mcelog.entry[i];
-               if (!(m->status & MCI_STATUS_VAL))
-                       continue;
+       llist_for_each_entry(l, pending, llnode) {
+               struct mce *m = &l->mce;
                if (!(m->status & MCI_STATUS_UC)) {
                        print_mce(m);
                        if (!apei_err)
@@ -319,13 +365,11 @@ static void mce_panic(const char *msg, struct mce *final, 
char *exp)
                }
        }
        /* Now print uncorrected but with the final one last */
-       for (i = 0; i < MCE_LOG_LEN; i++) {
-               struct mce *m = &mcelog.entry[i];
-               if (!(m->status & MCI_STATUS_VAL))
-                       continue;
+       llist_for_each_entry(l, pending, llnode) {
+               struct mce *m = &l->mce;
                if (!(m->status & MCI_STATUS_UC))
                        continue;
-               if (!final || memcmp(m, final, sizeof(struct mce))) {
+               if (!final || mce_cmp(m, final)) {
                        print_mce(m);
                        if (!apei_err)
                                apei_err = apei_write_mce(m);
@@ -356,11 +400,11 @@ static int msr_to_offset(u32 msr)
 
        if (msr == mca_cfg.rip_msr)
                return offsetof(struct mce, ip);
-       if (msr == MSR_IA32_MCx_STATUS(bank))
+       if (msr == msr_ops.status(bank))
                return offsetof(struct mce, status);
-       if (msr == MSR_IA32_MCx_ADDR(bank))
+       if (msr == msr_ops.addr(bank))
                return offsetof(struct mce, addr);
-       if (msr == MSR_IA32_MCx_MISC(bank))
+       if (msr == msr_ops.misc(bank))
                return offsetof(struct mce, misc);
        if (msr == MSR_IA32_MCG_STATUS)
                return offsetof(struct mce, mcgstatus);
@@ -523,9 +567,9 @@ static struct notifier_block mce_srao_nb = {
 static void mce_read_aux(struct mce *m, int i)
 {
        if (m->status & MCI_STATUS_MISCV)
-               m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
+               m->misc = mce_rdmsrl(msr_ops.misc(i));
        if (m->status & MCI_STATUS_ADDRV) {
-               m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
+               m->addr = mce_rdmsrl(msr_ops.addr(i));
 
                /*
                 * Mask the reported address by the reported granularity.
@@ -607,7 +651,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t 
*b)
                m.tsc = 0;
 
                barrier();
-               m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
+               m.status = mce_rdmsrl(msr_ops.status(i));
                if (!(m.status & MCI_STATUS_VAL))
                        continue;
 
@@ -654,7 +698,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t 
*b)
                /*
                 * Clear state for this bank.
                 */
-               mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
+               mce_wrmsrl(msr_ops.status(i), 0);
        }
 
        /*
@@ -679,7 +723,7 @@ static int mce_no_way_out(struct mce *m, char **msg, 
unsigned long *validp,
        char *tmp;
 
        for (i = 0; i < mca_cfg.banks; i++) {
-               m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
+               m->status = mce_rdmsrl(msr_ops.status(i));
                if (m->status & MCI_STATUS_VAL) {
                        __set_bit(i, validp);
                        if (quirk_no_way_out)
@@ -830,9 +874,9 @@ static int mce_start(int *no_way_out)
 
        atomic_add(*no_way_out, &global_nwo);
        /*
-        * global_nwo should be updated before mce_callin
+        * Rely on the implied barrier below, such that global_nwo
+        * is updated before mce_callin.
         */
-       smp_wmb();
        order = atomic_inc_return(&mce_callin);
 
        /*
@@ -957,7 +1001,7 @@ static void mce_clear_state(unsigned long *toclear)
 
        for (i = 0; i < mca_cfg.banks; i++) {
                if (test_bit(i, toclear))
-                       mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
+                       mce_wrmsrl(msr_ops.status(i), 0);
        }
 }
 
@@ -994,11 +1038,12 @@ void do_machine_check(struct pt_regs *regs, long 
error_code)
        int i;
        int worst = 0;
        int severity;
+
        /*
         * Establish sequential order between the CPUs entering the machine
         * check handler.
         */
-       int order;
+       int order = -1;
        /*
         * If no_way_out gets set, there is no safe way to recover from this
         * MCE.  If mca_cfg.tolerant is cranked up, we'll try anyway.
@@ -1012,7 +1057,12 @@ void do_machine_check(struct pt_regs *regs, long 
error_code)
        DECLARE_BITMAP(toclear, MAX_NR_BANKS);
        DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
        char *msg = "Unknown";
-       int lmce = 0;
+
+       /*
+        * MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES
+        * on Intel.
+        */
+       int lmce = 1;
 
        /* If this CPU is offline, just bail out. */
        if (cpu_is_offline(smp_processor_id())) {
@@ -1051,19 +1101,20 @@ void do_machine_check(struct pt_regs *regs, long 
error_code)
                kill_it = 1;
 
        /*
-        * Check if this MCE is signaled to only this logical processor
+        * Check if this MCE is signaled to only this logical processor,
+        * on Intel only.
         */
-       if (m.mcgstatus & MCG_STATUS_LMCES)
-               lmce = 1;
-       else {
-               /*
-                * Go through all the banks in exclusion of the other CPUs.
-                * This way we don't report duplicated events on shared banks
-                * because the first one to see it will clear it.
-                * If this is a Local MCE, then no need to perform rendezvous.
-                */
+       if (m.cpuvendor == X86_VENDOR_INTEL)
+               lmce = m.mcgstatus & MCG_STATUS_LMCES;
+
+       /*
+        * Go through all banks in exclusion of the other CPUs. This way we
+        * don't report duplicated events on shared banks because the first one
+        * to see it will clear it. If this is a Local MCE, then no need to
+        * perform rendezvous.
+        */
+       if (!lmce)
                order = mce_start(&no_way_out);
-       }
 
        for (i = 0; i < cfg->banks; i++) {
                __clear_bit(i, toclear);
@@ -1076,7 +1127,7 @@ void do_machine_check(struct pt_regs *regs, long 
error_code)
                m.addr = 0;
                m.bank = i;
 
-               m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
+               m.status = mce_rdmsrl(msr_ops.status(i));
                if ((m.status & MCI_STATUS_VAL) == 0)
                        continue;
 
@@ -1420,7 +1471,6 @@ static void __mcheck_cpu_init_generic(void)
        enum mcp_flags m_fl = 0;
        mce_banks_t all_banks;
        u64 cap;
-       int i;
 
        if (!mca_cfg.bootlog)
                m_fl = MCP_DONTLOG;
@@ -1436,14 +1486,19 @@ static void __mcheck_cpu_init_generic(void)
        rdmsrl(MSR_IA32_MCG_CAP, cap);
        if (cap & MCG_CTL_P)
                wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+}
+
+static void __mcheck_cpu_init_clear_banks(void)
+{
+       int i;
 
        for (i = 0; i < mca_cfg.banks; i++) {
                struct mce_bank *b = &mce_banks[i];
 
                if (!b->init)
                        continue;
-               wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
-               wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
+               wrmsrl(msr_ops.ctl(i), b->ctl);
+               wrmsrl(msr_ops.status(i), 0);
        }
 }
 
@@ -1495,7 +1550,7 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 
*c)
                         */
                        clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
                }
-               if (c->x86 <= 17 && cfg->bootlog < 0) {
+               if (c->x86 < 17 && cfg->bootlog < 0) {
                        /*
                         * Lots of broken BIOS around that don't clear them
                         * by default and leave crap in there. Don't log:
@@ -1628,11 +1683,19 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 
*c)
                break;
 
        case X86_VENDOR_AMD: {
-               u32 ebx = cpuid_ebx(0x80000007);
+               mce_flags.overflow_recov = !!cpu_has(c, 
X86_FEATURE_OVERFLOW_RECOV);
+               mce_flags.succor         = !!cpu_has(c, X86_FEATURE_SUCCOR);
+               mce_flags.smca           = !!cpu_has(c, X86_FEATURE_SMCA);
 
-               mce_flags.overflow_recov = !!(ebx & BIT(0));
-               mce_flags.succor         = !!(ebx & BIT(1));
-               mce_flags.smca           = !!(ebx & BIT(3));
+               /*
+                * Install proper ops for Scalable MCA enabled processors
+                */
+               if (mce_flags.smca) {
+                       msr_ops.ctl     = smca_ctl_reg;
+                       msr_ops.status  = smca_status_reg;
+                       msr_ops.addr    = smca_addr_reg;
+                       msr_ops.misc    = smca_misc_reg;
+               }
                mce_amd_feature_init(c);
 
                break;
@@ -1717,6 +1780,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
 
        __mcheck_cpu_init_generic();
        __mcheck_cpu_init_vendor(c);
+       __mcheck_cpu_init_clear_banks();
        __mcheck_cpu_init_timer();
 }
 
@@ -2082,7 +2146,7 @@ static void mce_disable_error_reporting(void)
                struct mce_bank *b = &mce_banks[i];
 
                if (b->init)
-                       wrmsrl(MSR_IA32_MCx_CTL(i), 0);
+                       wrmsrl(msr_ops.ctl(i), 0);
        }
        return;
 }
@@ -2121,6 +2185,7 @@ static void mce_syscore_resume(void)
 {
        __mcheck_cpu_init_generic();
        __mcheck_cpu_init_vendor(raw_cpu_ptr(&cpu_info));
+       __mcheck_cpu_init_clear_banks();
 }
 
 static struct syscore_ops mce_syscore_ops = {
@@ -2138,6 +2203,7 @@ static void mce_cpu_restart(void *data)
        if (!mce_available(raw_cpu_ptr(&cpu_info)))
                return;
        __mcheck_cpu_init_generic();
+       __mcheck_cpu_init_clear_banks();
        __mcheck_cpu_init_timer();
 }
 
@@ -2413,7 +2479,7 @@ static void mce_reenable_cpu(void *h)
                struct mce_bank *b = &mce_banks[i];
 
                if (b->init)
-                       wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
+                       wrmsrl(msr_ops.ctl(i), b->ctl);
        }
 }
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c 
b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 9d656fd436ef..10b0661651e0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -54,14 +54,6 @@
 /* Threshold LVT offset is at MSR0xC0000410[15:12] */
 #define SMCA_THR_LVT_OFF       0xF000
 
-/*
- * OS is required to set the MCAX bit to acknowledge that it is now using the
- * new MSR ranges and new registers under each bank. It also means that the OS
- * will configure deferred errors in the new MCx_CONFIG register. If the bit is
- * not set, uncorrectable errors will cause a system panic.
- */
-#define SMCA_MCAX_EN_OFF       0x1
-
 static const char * const th_names[] = {
        "load_store",
        "insn_fetch",
@@ -333,7 +325,7 @@ static u32 get_block_address(u32 current_addr, u32 low, u32 
high,
        /* Fall back to method we used for older processors: */
        switch (block) {
        case 0:
-               addr = MSR_IA32_MCx_MISC(bank);
+               addr = msr_ops.misc(bank);
                break;
        case 1:
                offset = ((low & MASK_BLKPTR_LO) >> 21);
@@ -351,6 +343,7 @@ prepare_threshold_block(unsigned int bank, unsigned int 
block, u32 addr,
                        int offset, u32 misc_high)
 {
        unsigned int cpu = smp_processor_id();
+       u32 smca_low, smca_high, smca_addr;
        struct threshold_block b;
        int new;
 
@@ -369,24 +362,49 @@ prepare_threshold_block(unsigned int bank, unsigned int 
block, u32 addr,
 
        b.interrupt_enable = 1;
 
-       if (mce_flags.smca) {
-               u32 smca_low, smca_high;
-               u32 smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
+       if (!mce_flags.smca) {
+               new = (misc_high & MASK_LVTOFF_HI) >> 20;
+               goto set_offset;
+       }
 
-               if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
-                       smca_high |= SMCA_MCAX_EN_OFF;
-                       wrmsr(smca_addr, smca_low, smca_high);
-               }
+       smca_addr = MSR_AMD64_SMCA_MCx_CONFIG(bank);
 
-               /* Gather LVT offset for thresholding: */
-               if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
-                       goto out;
+       if (!rdmsr_safe(smca_addr, &smca_low, &smca_high)) {
+               /*
+                * OS is required to set the MCAX bit to acknowledge that it is
+                * now using the new MSR ranges and new registers under each
+                * bank. It also means that the OS will configure deferred
+                * errors in the new MCx_CONFIG register. If the bit is not set,
+                * uncorrectable errors will cause a system panic.
+                *
+                * MCA_CONFIG[MCAX] is bit 32 (0 in the high portion of the 
MSR.)
+                */
+               smca_high |= BIT(0);
 
-               new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
-       } else {
-               new = (misc_high & MASK_LVTOFF_HI) >> 20;
+               /*
+                * SMCA logs Deferred Error information in MCA_DE{STAT,ADDR}
+                * registers with the option of additionally logging to
+                * MCA_{STATUS,ADDR} if MCA_CONFIG[LogDeferredInMcaStat] is set.
+                *
+                * This bit is usually set by BIOS to retain the old behavior
+                * for OSes that don't use the new registers. Linux supports the
+                * new registers so let's disable that additional logging here.
+                *
+                * MCA_CONFIG[LogDeferredInMcaStat] is bit 34 (bit 2 in the high
+                * portion of the MSR).
+                */
+               smca_high &= ~BIT(2);
+
+               wrmsr(smca_addr, smca_low, smca_high);
        }
 
+       /* Gather LVT offset for thresholding: */
+       if (rdmsr_safe(MSR_CU_DEF_ERR, &smca_low, &smca_high))
+               goto out;
+
+       new = (smca_low & SMCA_THR_LVT_OFF) >> 12;
+
+set_offset:
        offset = setup_APIC_mce_threshold(offset, new);
 
        if ((offset == new) && (mce_threshold_vector != 
amd_threshold_interrupt))
@@ -430,12 +448,23 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
                deferred_error_interrupt_enable(c);
 }
 
-static void __log_error(unsigned int bank, bool threshold_err, u64 misc)
+static void
+__log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc)
 {
+       u32 msr_status = msr_ops.status(bank);
+       u32 msr_addr = msr_ops.addr(bank);
        struct mce m;
        u64 status;
 
-       rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
+       WARN_ON_ONCE(deferred_err && threshold_err);
+
+       if (deferred_err && mce_flags.smca) {
+               msr_status = MSR_AMD64_SMCA_MCx_DESTAT(bank);
+               msr_addr = MSR_AMD64_SMCA_MCx_DEADDR(bank);
+       }
+
+       rdmsrl(msr_status, status);
+
        if (!(status & MCI_STATUS_VAL))
                return;
 
@@ -448,10 +477,11 @@ static void __log_error(unsigned int bank, bool 
threshold_err, u64 misc)
                m.misc = misc;
 
        if (m.status & MCI_STATUS_ADDRV)
-               rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr);
+               rdmsrl(msr_addr, m.addr);
 
        mce_log(&m);
-       wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
+
+       wrmsrl(msr_status, 0);
 }
 
 static inline void __smp_deferred_error_interrupt(void)
@@ -479,17 +509,21 @@ asmlinkage __visible void 
smp_trace_deferred_error_interrupt(void)
 /* APIC interrupt handler for deferred errors */
 static void amd_deferred_error_interrupt(void)
 {
-       u64 status;
        unsigned int bank;
+       u32 msr_status;
+       u64 status;
 
        for (bank = 0; bank < mca_cfg.banks; ++bank) {
-               rdmsrl(MSR_IA32_MCx_STATUS(bank), status);
+               msr_status = (mce_flags.smca) ? MSR_AMD64_SMCA_MCx_DESTAT(bank)
+                                             : msr_ops.status(bank);
+
+               rdmsrl(msr_status, status);
 
                if (!(status & MCI_STATUS_VAL) ||
                    !(status & MCI_STATUS_DEFERRED))
                        continue;
 
-               __log_error(bank, false, 0);
+               __log_error(bank, true, false, 0);
                break;
        }
 }
@@ -544,7 +578,7 @@ static void amd_threshold_interrupt(void)
        return;
 
 log:
-       __log_error(bank, true, ((u64)high << 32) | low);
+       __log_error(bank, false, true, ((u64)high << 32) | low);
 }
 
 /*
diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig
index df280da34825..d957d5f21a86 100644
--- a/arch/x86/ras/Kconfig
+++ b/arch/x86/ras/Kconfig
@@ -1,4 +1,4 @@
-config AMD_MCE_INJ
+config MCE_AMD_INJ
        tristate "Simple MCE injection interface for AMD processors"
        depends on RAS && EDAC_DECODE_MCE && DEBUG_FS && AMD_NB
        default n
diff --git a/arch/x86/ras/Makefile b/arch/x86/ras/Makefile
index dd2c98b84037..5f94546db280 100644
--- a/arch/x86/ras/Makefile
+++ b/arch/x86/ras/Makefile
@@ -1,2 +1,2 @@
-obj-$(CONFIG_AMD_MCE_INJ)              += mce_amd_inj.o
+obj-$(CONFIG_MCE_AMD_INJ)              += mce_amd_inj.o
 
diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c
index 9e02dcaef683..e69f4701a076 100644
--- a/arch/x86/ras/mce_amd_inj.c
+++ b/arch/x86/ras/mce_amd_inj.c
@@ -290,14 +290,33 @@ static void do_inject(void)
        wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS,
                     (u32)mcg_status, (u32)(mcg_status >> 32));
 
-       wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b),
-                    (u32)i_mce.status, (u32)(i_mce.status >> 32));
+       if (boot_cpu_has(X86_FEATURE_SMCA)) {
+               if (inj_type == DFR_INT_INJ) {
+                       wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DESTAT(b),
+                                    (u32)i_mce.status, (u32)(i_mce.status >> 
32));
+
+                       wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DEADDR(b),
+                                    (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
+               } else {
+                       wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_STATUS(b),
+                                    (u32)i_mce.status, (u32)(i_mce.status >> 
32));
+
+                       wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_ADDR(b),
+                                    (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
+               }
+
+               wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(b),
+                            (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
+       } else {
+               wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b),
+                            (u32)i_mce.status, (u32)(i_mce.status >> 32));
 
-       wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b),
-                    (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
+               wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b),
+                            (u32)i_mce.addr, (u32)(i_mce.addr >> 32));
 
-       wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b),
-                    (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
+               wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b),
+                            (u32)i_mce.misc, (u32)(i_mce.misc >> 32));
+       }
 
        toggle_hw_mce_inject(cpu, false);
 
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 49768c08ac07..9b6800a79c7f 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -1052,7 +1052,6 @@ int amd_decode_mce(struct notifier_block *nb, unsigned 
long val, void *data)
        struct mce *m = (struct mce *)data;
        struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
        int ecc;
-       u32 ebx = cpuid_ebx(0x80000007);
 
        if (amd_filter_mce(m))
                return NOTIFY_STOP;
@@ -1075,7 +1074,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned 
long val, void *data)
                        ((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
                        ((m->status & MCI_STATUS_POISON)   ? "Poison"   : "-"));
 
-       if (!!(ebx & BIT(3))) {
+       if (boot_cpu_has(X86_FEATURE_SMCA)) {
                u32 low, high;
                u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
 
@@ -1094,7 +1093,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned 
long val, void *data)
        if (m->status & MCI_STATUS_ADDRV)
                pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, 
m->addr);
 
-       if (!!(ebx & BIT(3))) {
+       if (boot_cpu_has(X86_FEATURE_SMCA)) {
                decode_smca_errors(m);
                goto err_code;
        }
@@ -1149,7 +1148,6 @@ static struct notifier_block amd_mce_dec_nb = {
 static int __init mce_amd_init(void)
 {
        struct cpuinfo_x86 *c = &boot_cpu_data;
-       u32 ebx;
 
        if (c->x86_vendor != X86_VENDOR_AMD)
                return -ENODEV;
@@ -1205,9 +1203,8 @@ static int __init mce_amd_init(void)
                break;
 
        case 0x17:
-               ebx = cpuid_ebx(0x80000007);
                xec_mask = 0x3f;
-               if (!(ebx & BIT(3))) {
+               if (!boot_cpu_has(X86_FEATURE_SMCA)) {
                        printk(KERN_WARNING "Decoding supported only on 
Scalable MCA processors.\n");
                        goto err_out;
                }

[GIT PULL] RAS changes for v4.7

Reply via email to