ADM64 APM Volume 2 (9.3.2 Error-Reporting Register Banks)documents that the size of threshold counter is implementation-dependent, and the implementations with less than 16 bits fill the most significant unimplemented bits with zeros. So THRESHOLD_MAX should be detected at the initialization period rather than a constant.
On the other hand, Error Counter (ERRCT) is bits 47:32, and accordingly MASK_ERR_COUNT_HI should be 0x0000FFFF instead of 0x00000FFF. Signed-off-by: Chen Yucong <sla...@gmail.com> --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 60 ++++++++++++++++++++++++++++++++-- 1 file changed, 58 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 5d4999f..c6552d2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -33,8 +33,10 @@ #include <asm/mce.h> #include <asm/msr.h> +static u32 threshold_max; + #define NR_BLOCKS 9 -#define THRESHOLD_MAX 0xFFF +#define THRESHOLD_MAX threshold_max #define INT_TYPE_APIC 0x00020000 #define MASK_VALID_HI 0x80000000 #define MASK_CNTP_HI 0x40000000 @@ -43,7 +45,7 @@ #define MASK_COUNT_EN_HI 0x00080000 #define MASK_INT_TYPE_HI 0x00060000 #define MASK_OVERFLOW_HI 0x00010000 -#define MASK_ERR_COUNT_HI 0x00000FFF +#define MASK_ERR_COUNT_HI 0x0000FFFF #define MASK_BLKPTR_LO 0xFF000000 #define MCG_XBLK_ADDR 0xC0000400 @@ -135,6 +137,54 @@ static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi) return 1; }; +static u32 get_threshold(int bank, int block) +{ + u64 cap; + u32 low = 0, high = 0, old_high = 0, address = 0; + + rdmsrl(MSR_IA32_MCG_CAP, cap); + if (bank < 0 || bank >= (cap & MCG_BANKCNT_MASK)) + return 0; + + if (block < 0 || block >= NR_BLOCKS) + return 0; + + address = MSR_IA32_MCx_MISC(bank); + rdmsr_safe(address, &low, &high); + + if (block != 0) { + address = (low & MASK_BLKPTR_LO) >> 21; + if (!address) + return 0; + + address = address + MCG_XBLK_ADDR + block - 1; + rdmsr_safe(address, &low, &high); + } + + if (!(high & MASK_VALID_HI)) + return 0; + + if (!(high & MASK_CNTP_HI) || + (high & MASK_LOCKED_HI)) + return 0; + + /* read original value and save it for restoring */ + old_high = high; + + /* + * write all 1s to ERR_COUNT field and then reread it for getting + * the maximum of threshold + */ + high = (high & ~MASK_ERR_COUNT_HI) | MASK_ERR_COUNT_HI; + wrmsr_safe(address, low, high); + rdmsr_safe(address, &low, &high); + + /* restore the original value */ + wrmsr_safe(address, low, old_high); + + return high & MASK_ERR_COUNT_HI; +} + /* * Called via smp_call_function_single(), must be called with correct * cpu affinity. @@ -214,6 +264,12 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) unsigned int bank, block; int offset = -1; + /* + * bank 4 supports APIC LVT interrupts implicitly since forever. + * So we can use bank4 for detecting the threshold. + */ + threshold_max = get_threshold(4, 0); + for (bank = 0; bank < mca_cfg.banks; ++bank) { for (block = 0; block < NR_BLOCKS; ++block) { if (block == 0) -- 1.7.10.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/