From: Yazen Ghannam <[email protected]> The MCA_IPID register uniquely identifies a bank's type and instance on Scalable MCA systems. We should save the value of this register in struct mce along with the other relevant error information. This ensures that we can decode errors without relying on system software to correlate the bank to the type.
Signed-off-by: Yazen Ghannam <[email protected]> Cc: Aravind Gopalakrishnan <[email protected]> Cc: Steven Rostedt <[email protected]> Cc: Tony Luck <[email protected]> Cc: linux-edac <[email protected]> Cc: x86-ml <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Borislav Petkov <[email protected]> --- arch/x86/include/uapi/asm/mce.h | 1 + arch/x86/kernel/cpu/mcheck/mce.c | 8 ++++++-- arch/x86/kernel/cpu/mcheck/mce_amd.c | 8 ++++++-- include/trace/events/mce.h | 5 ++++- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 8c75fbc94c3f..69a6e07e3149 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -27,6 +27,7 @@ struct mce { __u32 apicid; /* CPU initial apic ID */ __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ + __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ }; #define MCE_GET_RECORD_LEN _IOR('M', 1, int) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 7f11ea5b75fa..84cb9b5859e2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -582,8 +582,12 @@ static void mce_read_aux(struct mce *m, int i) } } - if (mce_flags.smca && (m->status & MCI_STATUS_SYNDV)) - m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i)); + if (mce_flags.smca) { + m->ipid = mce_rdmsrl(MSR_AMD64_SMCA_MCx_IPID(i)); + + if (m->status & MCI_STATUS_SYNDV) + m->synd = mce_rdmsrl(MSR_AMD64_SMCA_MCx_SYND(i)); + } } static bool memory_error(struct mce *m) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 16766e09c2b7..d2f92ab5322f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -564,8 +564,12 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) if (m.status & MCI_STATUS_ADDRV) rdmsrl(msr_addr, m.addr); - if (mce_flags.smca && (m.status & MCI_STATUS_SYNDV)) - rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd); + if (mce_flags.smca) { + rdmsrl(MSR_AMD64_SMCA_MCx_IPID(bank), m.ipid); + + if (m.status & MCI_STATUS_SYNDV) + rdmsrl(MSR_AMD64_SMCA_MCx_SYND(bank), m.synd); + } mce_log(&m); diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h index 8be5268caf28..70f02149808c 100644 --- a/include/trace/events/mce.h +++ b/include/trace/events/mce.h @@ -21,6 +21,7 @@ TRACE_EVENT(mce_record, __field( u64, addr ) __field( u64, misc ) __field( u64, synd ) + __field( u64, ipid ) __field( u64, ip ) __field( u64, tsc ) __field( u64, walltime ) @@ -40,6 +41,7 @@ TRACE_EVENT(mce_record, __entry->addr = m->addr; __entry->misc = m->misc; __entry->synd = m->synd; + __entry->ipid = m->ipid; __entry->ip = m->ip; __entry->tsc = m->tsc; __entry->walltime = m->time; @@ -52,10 +54,11 @@ TRACE_EVENT(mce_record, __entry->cpuvendor = m->cpuvendor; ), - TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", + TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, IPID: %016Lx, ADDR/MISC/SYND: %016Lx/%016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", __entry->cpu, __entry->mcgcap, __entry->mcgstatus, __entry->bank, __entry->status, + __entry->ipid, __entry->addr, __entry->misc, __entry->synd, __entry->cs, __entry->ip, __entry->tsc, -- 2.10.0

