From: Abhishek Paliwal <abhishek.pali...@aricent.com> From: David Daney <david.da...@cavium.com>
Use COP0_ERRCTL for L1 Dcache parity and TLB parity checks for OCTEON3. For OCTEON II CPUs, add a Bus Error handler for the Write Buffer parity condition. Then make this log the errors with the EDAC system. Signed-off-by: David Daney <david.da...@cavium.com> Signed-off-by: Leonid Rosenboim <lrosenb...@caviumnetworks.com> Signed-off-by: Chandrakala Chavva <ccha...@caviumnetworks.com> Signed-off-by: Abhishek Paliwal <abhishek.pali...@aricent.com> --- arch/mips/cavium-octeon/setup.c | 12 ++- arch/mips/include/asm/mipsregs.h | 4 + arch/mips/include/asm/octeon/octeon.h | 12 +++ arch/mips/include/asm/traps.h | 9 ++ arch/mips/kernel/traps.c | 11 ++- arch/mips/mm/c-octeon.c | 170 ++++++++++++++++++++++++++++++++-- arch/mips/mm/cex-oct.S | 37 ++++++++ drivers/edac/octeon_edac-pc.c | 34 ++++--- 8 files changed, 267 insertions(+), 22 deletions(-) diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index fc0b0d2..7027044 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -381,7 +381,10 @@ void octeon_check_cpu_bist(void) pr_err("Core%d BIST Failure: CacheErr(icache) = 0x%llx\n", coreid, bist_val); - bist_val = read_octeon_c0_dcacheerr(); + if (current_cpu_type() == CPU_CAVIUM_OCTEON3) + bist_val = read_octeon_c0_errctl(); + else + bist_val = read_octeon_c0_dcacheerr(); if (bist_val & 1) pr_err("Core%d L1 Dcache parity error: " "CacheErr(dcache) = 0x%llx\n", @@ -392,7 +395,9 @@ void octeon_check_cpu_bist(void) if (bist_val & mask) pr_err("Core%d BIST Failure: COP0_CVM_MEM_CTL = 0x%llx\n", coreid, bist_val); - + if (current_cpu_type() == CPU_CAVIUM_OCTEON3) + write_octeon_c0_errctl(1); + else write_octeon_c0_dcacheerr(0); } @@ -583,7 +588,8 @@ void octeon_user_io_init(void) cvmmemctl.s.cvmsegenau = 0; /* Enable TLB parity error reporting on OCTEON II */ - if (current_cpu_type() == CPU_CAVIUM_OCTEON2) + if (current_cpu_type() == CPU_CAVIUM_OCTEON2 || + current_cpu_type() == CPU_CAVIUM_OCTEON3) cvmmemctl.s.tlbperrena = 1; write_c0_cvmmemctl(cvmmemctl.u64); diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 327f989..b7feb87 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -69,6 +69,7 @@ #define CP0_DEPC $24 #define CP0_PERFORMANCE $25 #define CP0_ECC $26 +#define CP0_ERRCTL $26 #define CP0_CACHEERR $27 #define CP0_TAGLO $28 #define CP0_TAGHI $29 @@ -1203,6 +1204,9 @@ do { \ #define read_octeon_c0_dcacheerr() __read_64bit_c0_register($27, 1) #define write_octeon_c0_dcacheerr(val) __write_64bit_c0_register($27, 1, val) +#define read_octeon_c0_errctl() __read_64bit_c0_register($26, 0) +#define write_octeon_c0_errctl(val) __write_64bit_c0_register($26, 0, val) + /* BMIPS3300 */ #define read_c0_brcm_config_0() __read_32bit_c0_register($22, 0) #define write_c0_brcm_config_0(val) __write_32bit_c0_register($22, 0, val) diff --git a/arch/mips/include/asm/octeon/octeon.h b/arch/mips/include/asm/octeon/octeon.h index c809f86..58e8beb 100644 --- a/arch/mips/include/asm/octeon/octeon.h +++ b/arch/mips/include/asm/octeon/octeon.h @@ -8,7 +8,10 @@ #ifndef __ASM_OCTEON_OCTEON_H #define __ASM_OCTEON_OCTEON_H +#include <linux/irqflags.h> +#include <linux/notifier.h> #include <asm/octeon/cvmx.h> +#include <linux/irq.h> extern uint64_t octeon_bootmem_alloc_range_phys(uint64_t size, uint64_t alignment, @@ -265,4 +268,13 @@ void octeon_irq_set_ip4_handler(octeon_irq_ip4_handler_t); extern void octeon_fixup_irqs(void); +int register_co_cache_error_notifier(struct notifier_block *nb); +int unregister_co_cache_error_notifier(struct notifier_block *nb); +#define CO_CACHE_ERROR_RECOVERABLE 0 +#define CO_CACHE_ERROR_UNRECOVERABLE 1 +#define CO_CACHE_ERROR_WB_PARITY 2 +#define CO_CACHE_ERROR_TLB_PARITY 3 + +extern unsigned long long cache_err_dcache[NR_CPUS]; + #endif /* __ASM_OCTEON_OCTEON_H */ diff --git a/arch/mips/include/asm/traps.h b/arch/mips/include/asm/traps.h index f41cf3e..6b896c7 100644 --- a/arch/mips/include/asm/traps.h +++ b/arch/mips/include/asm/traps.h @@ -21,6 +21,15 @@ extern void (*board_be_init)(void); extern int (*board_be_handler)(struct pt_regs *regs, int is_fixup); +/* + * Possible status responses for a board_mcheck_handler backend. + */ +#define MIPS_MC_DISCARD 0 /* return with no action */ +#define MIPS_MC_NOT_HANDLED 1 /* default handling */ +#define MIPS_MC_FATAL 2 /* treat as an unrecoverable error */ + +extern int (*board_mcheck_handler)(struct pt_regs *regs); + extern void (*board_nmi_handler_setup)(void); extern void (*board_ejtag_handler_setup)(void); extern void (*board_bind_eic_interrupt)(int irq, int regset); diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 81e6ae0..321657d 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -88,6 +88,7 @@ extern asmlinkage void handle_reserved(void); void (*board_be_init)(void); int (*board_be_handler)(struct pt_regs *regs, int is_fixup); +int (*board_mcheck_handler)(struct pt_regs *regs); void (*board_nmi_handler_setup)(void); void (*board_ejtag_handler_setup)(void); void (*board_bind_eic_interrupt)(int irq, int regset); @@ -1232,7 +1233,15 @@ asmlinkage void do_mcheck(struct pt_regs *regs) enum ctx_state prev_state; prev_state = exception_enter(); - show_regs(regs); + if (board_mcheck_handler) { + int resp = board_mcheck_handler(regs); + if (resp == MIPS_MC_DISCARD) + return; + if (resp == MIPS_MC_FATAL) + multi_match = 0; + } + + show_registers(regs); if (multi_match) { printk("Index : %0x\n", read_c0_index()); diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c index f41a5c5..14d91ee 100644 --- a/arch/mips/mm/c-octeon.c +++ b/arch/mips/mm/c-octeon.c @@ -31,6 +31,8 @@ unsigned long long cache_err_dcache[NR_CPUS]; EXPORT_SYMBOL_GPL(cache_err_dcache); +static RAW_NOTIFIER_HEAD(co_cache_error_chain); + /** * Octeon automatically flushes the dcache on tlb changes, so * from Linux's viewpoint it acts much like a physically @@ -176,6 +178,148 @@ static void octeon_flush_kernel_vmap_range(unsigned long vaddr, int size) BUG(); } +/* + * Octeon specific bus error handler, as write buffer parity errors + * trigger bus errors. These are fatal since the copy in the write buffer + * is the only copy of the data. + */ +static int octeon2_be_handler(struct pt_regs *regs, int is_fixup) +{ + u64 dcache_err; + u64 wbfperr_mask = 1ULL << 1; + + dcache_err = read_octeon_c0_dcacheerr(); + if (dcache_err & wbfperr_mask) { + int rv = raw_notifier_call_chain(&co_cache_error_chain, + CO_CACHE_ERROR_WB_PARITY, + NULL); + if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) { + unsigned int coreid = cvmx_get_core_num(); + + pr_err("Core%u: Write buffer parity error:\n", coreid); + pr_err("CacheErr (Dcache) == %llx\n", dcache_err); + } + + write_octeon_c0_dcacheerr(wbfperr_mask); + return MIPS_BE_FATAL; + } + if (is_fixup) + return MIPS_BE_FIXUP; + else + return MIPS_BE_FATAL; +} + +/* + * Octeon specific MachineCheck handler, as TLB parity errors + * trigger MachineCheck errors. + */ +static int octeon2_mcheck_handler(struct pt_regs *regs) +{ + u64 dcache_err; + u64 tlbperr_mask = 1ULL << 5; + dcache_err = read_octeon_c0_dcacheerr(); + if (dcache_err & tlbperr_mask) { + int rv; + union octeon_cvmemctl cvmmemctl; + + /* Clear the indicator */ + write_octeon_c0_dcacheerr(tlbperr_mask); + /* + * Blow everything away to (hopefully) write good + * parity to all TLB entries + */ + local_flush_tlb_all(); + /* Reenable TLB parity error reporting. */ + cvmmemctl.u64 = read_c0_cvmmemctl(); + cvmmemctl.s.tlbperrena = 1; + write_c0_cvmmemctl(cvmmemctl.u64); + + rv = raw_notifier_call_chain(&co_cache_error_chain, + CO_CACHE_ERROR_TLB_PARITY, + NULL); + if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) { + unsigned int coreid = cvmx_get_core_num(); + + pr_err("Core%u: TLB parity error:\n", coreid); + return MIPS_MC_FATAL; + } + + return MIPS_MC_DISCARD; + } + return MIPS_MC_NOT_HANDLED; +} + +/* + * Octeon3 specific bus error handler, as write buffer parity errors + * trigger bus errors. These are fatal since the copy in the write buffer + * is the only copy of the data. + */ +static int octeon3_be_handler(struct pt_regs *regs, int is_fixup) +{ + u64 dcache_err; + u64 wbfperr_mask = 1ULL << 9; + + dcache_err = read_octeon_c0_errctl(); + if (dcache_err & wbfperr_mask) { + int rv = raw_notifier_call_chain(&co_cache_error_chain, + CO_CACHE_ERROR_WB_PARITY, + NULL); + if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) { + unsigned int coreid = cvmx_get_core_num(); + + pr_err("Core%u: Write buffer parity error:\n", coreid); + pr_err("CacheErr (Dcache) == %llx\n", dcache_err); + } + + write_octeon_c0_errctl(wbfperr_mask); + return MIPS_BE_FATAL; + } + if (is_fixup) + return MIPS_BE_FIXUP; + else + return MIPS_BE_FATAL; +} + +/* + * Octeon3 specific MachineCheck handler, as TLB parity errors + * trigger MachineCheck errors. + */ +static int octeon3_mcheck_handler(struct pt_regs *regs) +{ + u64 dcache_err; + u64 tlbperr_mask = 1ULL << 14; + dcache_err = read_octeon_c0_errctl(); + if (dcache_err & tlbperr_mask) { + int rv; + union octeon_cvmemctl cvmmemctl; + + /* Clear the indicator */ + write_octeon_c0_errctl(tlbperr_mask); + /* + * Blow everything away to (hopefully) write good + * parity to all TLB entries + */ + local_flush_tlb_all(); + /* Reenable TLB parity error reporting. */ + cvmmemctl.u64 = read_c0_cvmmemctl(); + cvmmemctl.s.tlbperrena = 1; + write_c0_cvmmemctl(cvmmemctl.u64); + + rv = raw_notifier_call_chain(&co_cache_error_chain, + CO_CACHE_ERROR_TLB_PARITY, + NULL); + if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) { + unsigned int coreid = cvmx_get_core_num(); + + pr_err("Core%u: TLB parity error:\n", coreid); + return MIPS_MC_FATAL; + } + + return MIPS_MC_DISCARD; + } + return MIPS_MC_NOT_HANDLED; +} + /** * Probe Octeon's caches * @@ -223,6 +367,9 @@ static void probe_octeon(void) c->dcache.sets = 8; dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz; c->options |= MIPS_CPU_PREFETCH; + + board_be_handler = octeon2_be_handler; + board_mcheck_handler = octeon2_mcheck_handler; break; case CPU_CAVIUM_OCTEON3: @@ -237,6 +384,9 @@ static void probe_octeon(void) c->dcache.sets = 8; dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz; c->options |= MIPS_CPU_PREFETCH; + + board_be_handler = octeon3_be_handler; + board_mcheck_handler = octeon3_mcheck_handler; break; default: @@ -268,8 +418,13 @@ static void probe_octeon(void) static void octeon_cache_error_setup(void) { - extern char except_vec2_octeon; - set_handler(0x100, &except_vec2_octeon, 0x80); + if (current_cpu_type() == CPU_CAVIUM_OCTEON3) { + extern char except_vec2_octeon3; + set_handler(0x100, &except_vec2_octeon3, 0x80); + } else { + extern char except_vec2_octeon; + set_handler(0x100, &except_vec2_octeon, 0x80); + } } /** @@ -304,8 +459,6 @@ void octeon_cache_init(void) /* * Handle a cache error exception */ -static RAW_NOTIFIER_HEAD(co_cache_error_chain); - int register_co_cache_error_notifier(struct notifier_block *nb) { return raw_notifier_chain_register(&co_cache_error_chain, nb); @@ -330,7 +483,10 @@ static void co_cache_error_call_notifiers(unsigned long val) dcache_err = cache_err_dcache[coreid]; cache_err_dcache[coreid] = 0; } else { - dcache_err = read_octeon_c0_dcacheerr(); + if (current_cpu_type() == CPU_CAVIUM_OCTEON3) + dcache_err = read_octeon_c0_errctl(); + else + dcache_err = read_octeon_c0_dcacheerr(); } pr_err("Core%lu: Cache error exception:\n", coreid); @@ -353,7 +509,7 @@ static void co_cache_error_call_notifiers(unsigned long val) asmlinkage void cache_parity_error_octeon_recoverable(void) { - co_cache_error_call_notifiers(0); + co_cache_error_call_notifiers(CO_CACHE_ERROR_RECOVERABLE); } /** @@ -362,6 +518,6 @@ asmlinkage void cache_parity_error_octeon_recoverable(void) asmlinkage void cache_parity_error_octeon_non_recoverable(void) { - co_cache_error_call_notifiers(1); + co_cache_error_call_notifiers(CO_CACHE_ERROR_UNRECOVERABLE); panic("Can't handle cache error: nested exception"); } diff --git a/arch/mips/mm/cex-oct.S b/arch/mips/mm/cex-oct.S index 9029092..e73fec3 100644 --- a/arch/mips/mm/cex-oct.S +++ b/arch/mips/mm/cex-oct.S @@ -51,6 +51,43 @@ .set pop END(except_vec2_octeon) + /* Handle cache error for OCTEON3. */ + LEAF(except_vec2_octeon3) + + .set push + .set mips64r2 + .set noreorder + .set noat + + + /* due to an errata we need to read the COP0 CacheErr (Dcache) + * before any cache/DRAM access */ + + rdhwr k0, $0 /* get core_id */ + PTR_LA k1, cache_err_dcache + sll k0, k0, 3 + PTR_ADDU k1, k0, k1 /* k1 = &cache_err_dcache[core_id] */ + + dmfc0 k0, CP0_ERRCTL + sd k0, (k1) + andi k0, 1 /* Write 1 to clear Dcache parity error */ + dmtc0 k0, CP0_ERRCTL + + /* check whether this is a nested exception */ + mfc0 k1, CP0_STATUS + andi k1, k1, ST0_EXL + beqz k1, 2f + nop + j cache_parity_error_octeon_non_recoverable + nop + + /* exception is recoverable */ +2 : j handle_cache_err + nop + + .set pop + END(except_vec2_octeon3) + /* We need to jump to handle_cache_err so that the previous handler * can fit within 0x80 bytes. We also move from 0xFFFFFFFFAXXXXXXX * space (uncached) to the 0xFFFFFFFF8XXXXXXX space (cached). */ diff --git a/drivers/edac/octeon_edac-pc.c b/drivers/edac/octeon_edac-pc.c index 380cbe2..5f41e2f 100644 --- a/drivers/edac/octeon_edac-pc.c +++ b/drivers/edac/octeon_edac-pc.c @@ -19,13 +19,9 @@ #include "edac_module.h" #include <asm/octeon/cvmx.h> +#include <asm/octeon/octeon.h> #include <asm/mipsregs.h> -extern int register_co_cache_error_notifier(struct notifier_block *nb); -extern int unregister_co_cache_error_notifier(struct notifier_block *nb); - -extern unsigned long long cache_err_dcache[NR_CPUS]; - struct co_cache_error { struct notifier_block notifier; struct edac_device_ctl_info *ed; @@ -47,11 +43,26 @@ static int co_cache_error_event(struct notifier_block *this, u64 icache_err = read_octeon_c0_icacheerr(); u64 dcache_err; - if (event) { + switch (event) { + case CO_CACHE_ERROR_UNRECOVERABLE: dcache_err = cache_err_dcache[core]; cache_err_dcache[core] = 0; - } else { - dcache_err = read_octeon_c0_dcacheerr(); + break; + case CO_CACHE_ERROR_RECOVERABLE: + if (current_cpu_type() == CPU_CAVIUM_OCTEON3) + dcache_err = read_octeon_c0_errctl(); + else + dcache_err = read_octeon_c0_dcacheerr(); + break; + case CO_CACHE_ERROR_WB_PARITY: + edac_device_printk(p->ed, KERN_ERR, + "CacheErr (WB Parity): core %d/cpu %d\n", + core, cpu); + edac_device_handle_ue(p->ed, cpu, 2, "write-buffer"); + return NOTIFY_STOP; + default: + WARN(1, "Unknown event: %lu\n", event); + return NOTIFY_BAD; } if (icache_err & 1) { @@ -73,12 +84,13 @@ static int co_cache_error_event(struct notifier_block *this, edac_device_handle_ce(p->ed, cpu, 0, "dcache"); /* Clear the error indication */ - if (current_cpu_type() == CPU_CAVIUM_OCTEON2) + if (current_cpu_type() == CPU_CAVIUM_OCTEON3) + write_octeon_c0_errctl(1); + else if (current_cpu_type() == CPU_CAVIUM_OCTEON2) write_octeon_c0_dcacheerr(1); else write_octeon_c0_dcacheerr(0); } - return NOTIFY_STOP; } @@ -93,7 +105,7 @@ static int co_cache_error_probe(struct platform_device *pdev) platform_set_drvdata(pdev, p); p->ed = edac_device_alloc_ctl_info(0, "cpu", num_possible_cpus(), - "cache", 2, 0, NULL, 0, + "cache", 3, 0, NULL, 0, edac_device_alloc_index()); if (!p->ed) goto err; -- 1.8.1.4 -- _______________________________________________ linux-yocto mailing list linux-yocto@yoctoproject.org https://lists.yoctoproject.org/listinfo/linux-yocto