From: Abhishek Paliwal <abhishek.pali...@aricent.com>

From: David Daney <david.da...@cavium.com>

Use COP0_ERRCTL for L1 Dcache parity and TLB parity checks for OCTEON3.

For OCTEON II CPUs, add a Bus Error handler for the Write Buffer
parity condition.  Then make this log the errors with the EDAC system.

Signed-off-by: David Daney <david.da...@cavium.com>
Signed-off-by: Leonid Rosenboim <lrosenb...@caviumnetworks.com>
Signed-off-by: Chandrakala Chavva <ccha...@caviumnetworks.com>
Signed-off-by: Abhishek Paliwal <abhishek.pali...@aricent.com>
---
 arch/mips/cavium-octeon/setup.c       |  12 ++-
 arch/mips/include/asm/mipsregs.h      |   4 +
 arch/mips/include/asm/octeon/octeon.h |  12 +++
 arch/mips/include/asm/traps.h         |   9 ++
 arch/mips/kernel/traps.c              |  11 ++-
 arch/mips/mm/c-octeon.c               | 170 ++++++++++++++++++++++++++++++++--
 arch/mips/mm/cex-oct.S                |  37 ++++++++
 drivers/edac/octeon_edac-pc.c         |  34 ++++---
 8 files changed, 267 insertions(+), 22 deletions(-)

diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c
index fc0b0d2..7027044 100644
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@@ -381,7 +381,10 @@ void octeon_check_cpu_bist(void)
                pr_err("Core%d BIST Failure: CacheErr(icache) = 0x%llx\n",
                       coreid, bist_val);
 
-       bist_val = read_octeon_c0_dcacheerr();
+       if (current_cpu_type() == CPU_CAVIUM_OCTEON3)
+               bist_val = read_octeon_c0_errctl();
+       else
+               bist_val = read_octeon_c0_dcacheerr();
        if (bist_val & 1)
                pr_err("Core%d L1 Dcache parity error: "
                       "CacheErr(dcache) = 0x%llx\n",
@@ -392,7 +395,9 @@ void octeon_check_cpu_bist(void)
        if (bist_val & mask)
                pr_err("Core%d BIST Failure: COP0_CVM_MEM_CTL = 0x%llx\n",
                       coreid, bist_val);
-
+       if (current_cpu_type() == CPU_CAVIUM_OCTEON3)
+               write_octeon_c0_errctl(1);
+       else
                write_octeon_c0_dcacheerr(0);
 }
 
@@ -583,7 +588,8 @@ void octeon_user_io_init(void)
        cvmmemctl.s.cvmsegenau = 0;
 
        /* Enable TLB parity error reporting on OCTEON II */
-       if (current_cpu_type() == CPU_CAVIUM_OCTEON2)
+       if (current_cpu_type() == CPU_CAVIUM_OCTEON2 ||
+           current_cpu_type() == CPU_CAVIUM_OCTEON3)
                cvmmemctl.s.tlbperrena = 1;
 
        write_c0_cvmmemctl(cvmmemctl.u64);
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 327f989..b7feb87 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -69,6 +69,7 @@
 #define CP0_DEPC $24
 #define CP0_PERFORMANCE $25
 #define CP0_ECC $26
+#define CP0_ERRCTL $26
 #define CP0_CACHEERR $27
 #define CP0_TAGLO $28
 #define CP0_TAGHI $29
@@ -1203,6 +1204,9 @@ do {                                                      
                \
 #define read_octeon_c0_dcacheerr()     __read_64bit_c0_register($27, 1)
 #define write_octeon_c0_dcacheerr(val) __write_64bit_c0_register($27, 1, val)
 
+#define read_octeon_c0_errctl()                __read_64bit_c0_register($26, 0)
+#define write_octeon_c0_errctl(val)    __write_64bit_c0_register($26, 0, val)
+
 /* BMIPS3300 */
 #define read_c0_brcm_config_0()                __read_32bit_c0_register($22, 0)
 #define write_c0_brcm_config_0(val)    __write_32bit_c0_register($22, 0, val)
diff --git a/arch/mips/include/asm/octeon/octeon.h 
b/arch/mips/include/asm/octeon/octeon.h
index c809f86..58e8beb 100644
--- a/arch/mips/include/asm/octeon/octeon.h
+++ b/arch/mips/include/asm/octeon/octeon.h
@@ -8,7 +8,10 @@
 #ifndef __ASM_OCTEON_OCTEON_H
 #define __ASM_OCTEON_OCTEON_H
 
+#include <linux/irqflags.h>
+#include <linux/notifier.h>
 #include <asm/octeon/cvmx.h>
+#include <linux/irq.h>
 
 extern uint64_t octeon_bootmem_alloc_range_phys(uint64_t size,
                                                uint64_t alignment,
@@ -265,4 +268,13 @@ void octeon_irq_set_ip4_handler(octeon_irq_ip4_handler_t);
 
 extern void octeon_fixup_irqs(void);
 
+int register_co_cache_error_notifier(struct notifier_block *nb);
+int unregister_co_cache_error_notifier(struct notifier_block *nb);
+#define CO_CACHE_ERROR_RECOVERABLE 0
+#define CO_CACHE_ERROR_UNRECOVERABLE 1
+#define CO_CACHE_ERROR_WB_PARITY 2
+#define CO_CACHE_ERROR_TLB_PARITY 3
+
+extern unsigned long long cache_err_dcache[NR_CPUS];
+
 #endif /* __ASM_OCTEON_OCTEON_H */
diff --git a/arch/mips/include/asm/traps.h b/arch/mips/include/asm/traps.h
index f41cf3e..6b896c7 100644
--- a/arch/mips/include/asm/traps.h
+++ b/arch/mips/include/asm/traps.h
@@ -21,6 +21,15 @@
 extern void (*board_be_init)(void);
 extern int (*board_be_handler)(struct pt_regs *regs, int is_fixup);
 
+/*
+ * Possible status responses for a board_mcheck_handler backend.
+ */
+#define MIPS_MC_DISCARD        0               /* return with no action */
+#define MIPS_MC_NOT_HANDLED    1       /* default handling */
+#define MIPS_MC_FATAL  2               /* treat as an unrecoverable error */
+
+extern int (*board_mcheck_handler)(struct pt_regs *regs);
+
 extern void (*board_nmi_handler_setup)(void);
 extern void (*board_ejtag_handler_setup)(void);
 extern void (*board_bind_eic_interrupt)(int irq, int regset);
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 81e6ae0..321657d 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -88,6 +88,7 @@ extern asmlinkage void handle_reserved(void);
 
 void (*board_be_init)(void);
 int (*board_be_handler)(struct pt_regs *regs, int is_fixup);
+int (*board_mcheck_handler)(struct pt_regs *regs);
 void (*board_nmi_handler_setup)(void);
 void (*board_ejtag_handler_setup)(void);
 void (*board_bind_eic_interrupt)(int irq, int regset);
@@ -1232,7 +1233,15 @@ asmlinkage void do_mcheck(struct pt_regs *regs)
        enum ctx_state prev_state;
 
        prev_state = exception_enter();
-       show_regs(regs);
+       if (board_mcheck_handler) {
+               int resp = board_mcheck_handler(regs);
+               if (resp == MIPS_MC_DISCARD)
+                       return;
+               if (resp == MIPS_MC_FATAL)
+                       multi_match = 0;
+       }
+
+       show_registers(regs);
 
        if (multi_match) {
                printk("Index   : %0x\n", read_c0_index());
diff --git a/arch/mips/mm/c-octeon.c b/arch/mips/mm/c-octeon.c
index f41a5c5..14d91ee 100644
--- a/arch/mips/mm/c-octeon.c
+++ b/arch/mips/mm/c-octeon.c
@@ -31,6 +31,8 @@
 unsigned long long cache_err_dcache[NR_CPUS];
 EXPORT_SYMBOL_GPL(cache_err_dcache);
 
+static RAW_NOTIFIER_HEAD(co_cache_error_chain);
+
 /**
  * Octeon automatically flushes the dcache on tlb changes, so
  * from Linux's viewpoint it acts much like a physically
@@ -176,6 +178,148 @@ static void octeon_flush_kernel_vmap_range(unsigned long 
vaddr, int size)
        BUG();
 }
 
+/*
+ * Octeon specific bus error handler, as write buffer parity errors
+ * trigger bus errors.  These are fatal since the copy in the write buffer
+ * is the only copy of the data.
+ */
+static int octeon2_be_handler(struct pt_regs *regs, int is_fixup)
+{
+       u64 dcache_err;
+       u64 wbfperr_mask = 1ULL << 1;
+
+       dcache_err = read_octeon_c0_dcacheerr();
+       if (dcache_err & wbfperr_mask) {
+               int rv = raw_notifier_call_chain(&co_cache_error_chain,
+                                               CO_CACHE_ERROR_WB_PARITY,
+                                               NULL);
+               if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) {
+                       unsigned int coreid = cvmx_get_core_num();
+
+                       pr_err("Core%u: Write buffer parity error:\n", coreid);
+                       pr_err("CacheErr (Dcache) == %llx\n", dcache_err);
+               }
+
+               write_octeon_c0_dcacheerr(wbfperr_mask);
+               return MIPS_BE_FATAL;
+       }
+       if (is_fixup)
+               return MIPS_BE_FIXUP;
+       else
+               return MIPS_BE_FATAL;
+}
+
+/*
+ * Octeon specific MachineCheck handler, as TLB parity errors
+ * trigger MachineCheck errors.
+ */
+static int octeon2_mcheck_handler(struct pt_regs *regs)
+{
+       u64 dcache_err;
+       u64 tlbperr_mask = 1ULL << 5;
+       dcache_err = read_octeon_c0_dcacheerr();
+       if (dcache_err & tlbperr_mask) {
+               int rv;
+               union octeon_cvmemctl cvmmemctl;
+
+               /* Clear the indicator */
+               write_octeon_c0_dcacheerr(tlbperr_mask);
+               /*
+                * Blow everything away to (hopefully) write good
+                * parity to all TLB entries
+                */
+               local_flush_tlb_all();
+               /* Reenable TLB parity error reporting. */
+               cvmmemctl.u64 = read_c0_cvmmemctl();
+               cvmmemctl.s.tlbperrena = 1;
+               write_c0_cvmmemctl(cvmmemctl.u64);
+
+               rv = raw_notifier_call_chain(&co_cache_error_chain,
+                                               CO_CACHE_ERROR_TLB_PARITY,
+                                               NULL);
+               if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) {
+                       unsigned int coreid = cvmx_get_core_num();
+
+                       pr_err("Core%u: TLB parity error:\n", coreid);
+                       return MIPS_MC_FATAL;
+               }
+
+               return MIPS_MC_DISCARD;
+       }
+       return MIPS_MC_NOT_HANDLED;
+}
+
+/*
+ * Octeon3 specific bus error handler, as write buffer parity errors
+ * trigger bus errors.  These are fatal since the copy in the write buffer
+ * is the only copy of the data.
+ */
+static int octeon3_be_handler(struct pt_regs *regs, int is_fixup)
+{
+       u64 dcache_err;
+       u64 wbfperr_mask = 1ULL << 9;
+
+       dcache_err = read_octeon_c0_errctl();
+       if (dcache_err & wbfperr_mask) {
+               int rv = raw_notifier_call_chain(&co_cache_error_chain,
+                                               CO_CACHE_ERROR_WB_PARITY,
+                                               NULL);
+               if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) {
+                       unsigned int coreid = cvmx_get_core_num();
+
+                       pr_err("Core%u: Write buffer parity error:\n", coreid);
+                       pr_err("CacheErr (Dcache) == %llx\n", dcache_err);
+               }
+
+               write_octeon_c0_errctl(wbfperr_mask);
+               return MIPS_BE_FATAL;
+       }
+       if (is_fixup)
+               return MIPS_BE_FIXUP;
+       else
+               return MIPS_BE_FATAL;
+}
+
+/*
+ * Octeon3 specific MachineCheck handler, as TLB parity errors
+ * trigger MachineCheck errors.
+ */
+static int octeon3_mcheck_handler(struct pt_regs *regs)
+{
+       u64 dcache_err;
+       u64 tlbperr_mask = 1ULL << 14;
+       dcache_err = read_octeon_c0_errctl();
+       if (dcache_err & tlbperr_mask) {
+               int rv;
+               union octeon_cvmemctl cvmmemctl;
+
+               /* Clear the indicator */
+               write_octeon_c0_errctl(tlbperr_mask);
+               /*
+                * Blow everything away to (hopefully) write good
+                * parity to all TLB entries
+                */
+               local_flush_tlb_all();
+               /* Reenable TLB parity error reporting. */
+               cvmmemctl.u64 = read_c0_cvmmemctl();
+               cvmmemctl.s.tlbperrena = 1;
+               write_c0_cvmmemctl(cvmmemctl.u64);
+
+               rv = raw_notifier_call_chain(&co_cache_error_chain,
+                                               CO_CACHE_ERROR_TLB_PARITY,
+                                               NULL);
+               if ((rv & ~NOTIFY_STOP_MASK) != NOTIFY_OK) {
+                       unsigned int coreid = cvmx_get_core_num();
+
+                       pr_err("Core%u: TLB parity error:\n", coreid);
+                       return MIPS_MC_FATAL;
+               }
+
+               return MIPS_MC_DISCARD;
+       }
+       return MIPS_MC_NOT_HANDLED;
+}
+
 /**
  * Probe Octeon's caches
  *
@@ -223,6 +367,9 @@ static void probe_octeon(void)
                c->dcache.sets = 8;
                dcache_size = c->dcache.sets * c->dcache.ways * 
c->dcache.linesz;
                c->options |= MIPS_CPU_PREFETCH;
+
+               board_be_handler = octeon2_be_handler;
+               board_mcheck_handler = octeon2_mcheck_handler;
                break;
 
        case CPU_CAVIUM_OCTEON3:
@@ -237,6 +384,9 @@ static void probe_octeon(void)
                c->dcache.sets = 8;
                dcache_size = c->dcache.sets * c->dcache.ways * 
c->dcache.linesz;
                c->options |= MIPS_CPU_PREFETCH;
+
+               board_be_handler = octeon3_be_handler;
+               board_mcheck_handler = octeon3_mcheck_handler;
                break;
 
        default:
@@ -268,8 +418,13 @@ static void probe_octeon(void)
 
 static void  octeon_cache_error_setup(void)
 {
-       extern char except_vec2_octeon;
-       set_handler(0x100, &except_vec2_octeon, 0x80);
+       if (current_cpu_type() == CPU_CAVIUM_OCTEON3) {
+               extern char except_vec2_octeon3;
+               set_handler(0x100, &except_vec2_octeon3, 0x80);
+       } else {
+               extern char except_vec2_octeon;
+               set_handler(0x100, &except_vec2_octeon, 0x80);
+       }
 }
 
 /**
@@ -304,8 +459,6 @@ void octeon_cache_init(void)
 /*
  * Handle a cache error exception
  */
-static RAW_NOTIFIER_HEAD(co_cache_error_chain);
-
 int register_co_cache_error_notifier(struct notifier_block *nb)
 {
        return raw_notifier_chain_register(&co_cache_error_chain, nb);
@@ -330,7 +483,10 @@ static void co_cache_error_call_notifiers(unsigned long 
val)
                        dcache_err = cache_err_dcache[coreid];
                        cache_err_dcache[coreid] = 0;
                } else {
-                       dcache_err = read_octeon_c0_dcacheerr();
+                       if (current_cpu_type() == CPU_CAVIUM_OCTEON3)
+                               dcache_err = read_octeon_c0_errctl();
+                       else
+                               dcache_err = read_octeon_c0_dcacheerr();
                }
 
                pr_err("Core%lu: Cache error exception:\n", coreid);
@@ -353,7 +509,7 @@ static void co_cache_error_call_notifiers(unsigned long val)
 
 asmlinkage void cache_parity_error_octeon_recoverable(void)
 {
-       co_cache_error_call_notifiers(0);
+       co_cache_error_call_notifiers(CO_CACHE_ERROR_RECOVERABLE);
 }
 
 /**
@@ -362,6 +518,6 @@ asmlinkage void cache_parity_error_octeon_recoverable(void)
 
 asmlinkage void cache_parity_error_octeon_non_recoverable(void)
 {
-       co_cache_error_call_notifiers(1);
+       co_cache_error_call_notifiers(CO_CACHE_ERROR_UNRECOVERABLE);
        panic("Can't handle cache error: nested exception");
 }
diff --git a/arch/mips/mm/cex-oct.S b/arch/mips/mm/cex-oct.S
index 9029092..e73fec3 100644
--- a/arch/mips/mm/cex-oct.S
+++ b/arch/mips/mm/cex-oct.S
@@ -51,6 +51,43 @@
        .set    pop
        END(except_vec2_octeon)
 
+       /* Handle cache error for OCTEON3. */
+       LEAF(except_vec2_octeon3)
+
+       .set    push
+       .set    mips64r2
+       .set    noreorder
+       .set    noat
+
+
+       /* due to an errata we need to read the COP0 CacheErr (Dcache)
+        * before any cache/DRAM access  */
+
+       rdhwr   k0, $0        /* get core_id */
+       PTR_LA  k1, cache_err_dcache
+       sll     k0, k0, 3
+       PTR_ADDU k1, k0, k1    /* k1 = &cache_err_dcache[core_id] */
+
+       dmfc0   k0, CP0_ERRCTL
+       sd      k0, (k1)
+       andi    k0, 1           /* Write 1 to clear Dcache parity error */
+       dmtc0   k0, CP0_ERRCTL
+
+       /* check whether this is a nested exception */
+       mfc0    k1, CP0_STATUS
+       andi    k1, k1, ST0_EXL
+       beqz    k1, 2f
+       nop
+       j       cache_parity_error_octeon_non_recoverable
+       nop
+
+       /* exception is recoverable */
+2 :    j       handle_cache_err
+       nop
+
+       .set    pop
+       END(except_vec2_octeon3)
+
  /* We need to jump to handle_cache_err so that the previous handler
   * can fit within 0x80 bytes. We also move from 0xFFFFFFFFAXXXXXXX
   * space (uncached) to the 0xFFFFFFFF8XXXXXXX space (cached). */
diff --git a/drivers/edac/octeon_edac-pc.c b/drivers/edac/octeon_edac-pc.c
index 380cbe2..5f41e2f 100644
--- a/drivers/edac/octeon_edac-pc.c
+++ b/drivers/edac/octeon_edac-pc.c
@@ -19,13 +19,9 @@
 #include "edac_module.h"
 
 #include <asm/octeon/cvmx.h>
+#include <asm/octeon/octeon.h>
 #include <asm/mipsregs.h>
 
-extern int register_co_cache_error_notifier(struct notifier_block *nb);
-extern int unregister_co_cache_error_notifier(struct notifier_block *nb);
-
-extern unsigned long long cache_err_dcache[NR_CPUS];
-
 struct co_cache_error {
        struct notifier_block notifier;
        struct edac_device_ctl_info *ed;
@@ -47,11 +43,26 @@ static int  co_cache_error_event(struct notifier_block 
*this,
        u64 icache_err = read_octeon_c0_icacheerr();
        u64 dcache_err;
 
-       if (event) {
+       switch (event) {
+       case CO_CACHE_ERROR_UNRECOVERABLE:
                dcache_err = cache_err_dcache[core];
                cache_err_dcache[core] = 0;
-       } else {
-               dcache_err = read_octeon_c0_dcacheerr();
+               break;
+       case CO_CACHE_ERROR_RECOVERABLE:
+               if (current_cpu_type() == CPU_CAVIUM_OCTEON3)
+                       dcache_err = read_octeon_c0_errctl();
+               else
+                       dcache_err = read_octeon_c0_dcacheerr();
+               break;
+       case CO_CACHE_ERROR_WB_PARITY:
+               edac_device_printk(p->ed, KERN_ERR,
+                                       "CacheErr (WB Parity): core %d/cpu 
%d\n",
+                                       core, cpu);
+               edac_device_handle_ue(p->ed, cpu, 2, "write-buffer");
+               return NOTIFY_STOP;
+       default:
+               WARN(1, "Unknown event: %lu\n", event);
+               return NOTIFY_BAD;
        }
 
        if (icache_err & 1) {
@@ -73,12 +84,13 @@ static int  co_cache_error_event(struct notifier_block 
*this,
                        edac_device_handle_ce(p->ed, cpu, 0, "dcache");
 
                /* Clear the error indication */
-               if (current_cpu_type() == CPU_CAVIUM_OCTEON2)
+               if (current_cpu_type() == CPU_CAVIUM_OCTEON3)
+                       write_octeon_c0_errctl(1);
+               else if (current_cpu_type() == CPU_CAVIUM_OCTEON2)
                        write_octeon_c0_dcacheerr(1);
                else
                        write_octeon_c0_dcacheerr(0);
        }
-
        return NOTIFY_STOP;
 }
 
@@ -93,7 +105,7 @@ static int co_cache_error_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, p);
 
        p->ed = edac_device_alloc_ctl_info(0, "cpu", num_possible_cpus(),
-                                          "cache", 2, 0, NULL, 0,
+                                          "cache", 3, 0, NULL, 0,
                                           edac_device_alloc_index());
        if (!p->ed)
                goto err;
-- 
1.8.1.4

-- 
_______________________________________________
linux-yocto mailing list
linux-yocto@yoctoproject.org
https://lists.yoctoproject.org/listinfo/linux-yocto

Reply via email to