On Fri, Oct 30, 2020 at 12:04:03PM -0700, Luck, Tony wrote: Bah, didn't notice this conversation didn't include LKML.
> The Xeon versions of Sandy Bridge, Ivy Bridge and Haswell support an > optional additional error logging mode which is enabled by an MSR. > > Previously this mode was enabled from the mcelog(8) tool via /dev/cpu, > but the kernel is now very picky about which MSRs may be written. So > move the enabling into the kernel. > > Suggested-by: Boris Petkov <b...@alien8.de> > Signed-off-by: Tony Luck <tony.l...@intel.com> > --- > > N.B. I don't have any of these old systems in my lab any more. So > this is untested :-( > > arch/x86/include/asm/msr-index.h | 1 + > arch/x86/kernel/cpu/mce/intel.c | 20 ++++++++++++++++++++ > 2 files changed, 21 insertions(+) > > diff --git a/arch/x86/include/asm/msr-index.h > b/arch/x86/include/asm/msr-index.h > index 972a34d93505..b2dd2648c0e2 100644 > --- a/arch/x86/include/asm/msr-index.h > +++ b/arch/x86/include/asm/msr-index.h > @@ -139,6 +139,7 @@ > #define MSR_IA32_MCG_CAP 0x00000179 > #define MSR_IA32_MCG_STATUS 0x0000017a > #define MSR_IA32_MCG_CTL 0x0000017b > +#define MSR_ERROR_CONTROL 0x0000017f > #define MSR_IA32_MCG_EXT_CTL 0x000004d0 > > #define MSR_OFFCORE_RSP_0 0x000001a6 > diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c > index abe9fe0fb851..b47883e364b4 100644 > --- a/arch/x86/kernel/cpu/mce/intel.c > +++ b/arch/x86/kernel/cpu/mce/intel.c > @@ -509,12 +509,32 @@ static void intel_ppin_init(struct cpuinfo_x86 *c) > } > } > > +/* > + * Enable additional error logs from the integrated > + * memory controller on processors that support this. > + */ > +static void intel_imc_init(struct cpuinfo_x86 *c) > +{ > + u64 error_control; > + > + switch (c->x86_model) { > + case INTEL_FAM6_SANDYBRIDGE_X: > + case INTEL_FAM6_IVYBRIDGE_X: > + case INTEL_FAM6_HASWELL_X: > + rdmsrl(MSR_ERROR_CONTROL, error_control); > + error_control |= 2; > + wrmsrl(MSR_ERROR_CONTROL, error_control); > + break; > + } > +} > + > void mce_intel_feature_init(struct cpuinfo_x86 *c) > { > intel_init_thermal(c); > intel_init_cmci(); > intel_init_lmce(); > intel_ppin_init(c); > + intel_imc_init(c); > } > > void mce_intel_feature_clear(struct cpuinfo_x86 *c) > -- > 2.21.1 >