From: John Jacques <john.jacq...@intel.com> - Added arm64 edac support. - Adjusted AXM55xx edac driver for AXM56xx with registers, regions, configs, naming, kbuild. - Separated source code for AXM56xx and AXM55xx. - Added MPR registers dump (page1) functionality. - Fixes (error handling, synchronization, etc). - Improved module error handling and naming.
Signed-off-by: Marek Majtyka <marekx.majt...@intel.com> Signed-off-by: John Jacques <john.jacq...@intel.com> --- .../devicetree/bindings/arm/axxia/edac.txt | 14 + arch/arm64/Kconfig | 1 + arch/arm64/boot/dts/intel/axm5616-victoria.dts | 8 + arch/arm64/boot/dts/intel/axm56xx.dtsi | 18 + arch/arm64/include/asm/edac.h | 13 + drivers/edac/Kconfig | 14 +- drivers/edac/Makefile | 3 +- drivers/edac/axxia_edac-mc.c | 19 +- drivers/edac/axxia_edac-mc_56xx.c | 1479 ++++++++++++++++++++ 9 files changed, 1551 insertions(+), 18 deletions(-) create mode 100644 Documentation/devicetree/bindings/arm/axxia/edac.txt create mode 100644 arch/arm64/include/asm/edac.h create mode 100644 drivers/edac/axxia_edac-mc_56xx.c diff --git a/Documentation/devicetree/bindings/arm/axxia/edac.txt b/Documentation/devicetree/bindings/arm/axxia/edac.txt new file mode 100644 index 0000000..65078eb --- /dev/null +++ b/Documentation/devicetree/bindings/arm/axxia/edac.txt @@ -0,0 +1,14 @@ +Axxia Error Detection & Correction [EDAC] +The EDAC accesses a range of registers in the memory controllers. + +Required properties: +- compatible : should contain "intel,smmon" +- interrupts : Should contain the SYSMEM controller IRQ + +Example: + sm0: sm0@00220000 { + compatible = "intel,smmon"; + reg = <0 0x00220000 0 0x1000>; + syscon = <&syscon>; + interrupts = <0 238 4>; + }; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 03d63b3..a386e46 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -24,6 +24,7 @@ config ARM64 select COMMON_CLK select CPU_PM if (SUSPEND || CPU_IDLE) select DCACHE_WORD_ACCESS + select EDAC_SUPPORT select GENERIC_ALLOCATOR select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST if SMP diff --git a/arch/arm64/boot/dts/intel/axm5616-victoria.dts b/arch/arm64/boot/dts/intel/axm5616-victoria.dts index 9c787b4..89fa952 100644 --- a/arch/arm64/boot/dts/intel/axm5616-victoria.dts +++ b/arch/arm64/boot/dts/intel/axm5616-victoria.dts @@ -205,3 +205,11 @@ &trng { status = "okay"; }; + +&sm1 { + status = "okay"; +}; + +&sm0 { + status = "okay"; +}; diff --git a/arch/arm64/boot/dts/intel/axm56xx.dtsi b/arch/arm64/boot/dts/intel/axm56xx.dtsi index 4ce9e92..7c531ab 100644 --- a/arch/arm64/boot/dts/intel/axm56xx.dtsi +++ b/arch/arm64/boot/dts/intel/axm56xx.dtsi @@ -38,6 +38,8 @@ i2c3 = &i2c3; gpdma0 = &gpdma0; gpdma1 = &gpdma1; + sm0 = &sm0; + sm1 = &sm1; }; clocks { @@ -94,6 +96,22 @@ reg = <0x80 0x02c00000 0 0x40000>; }; + sm0: sm0@00220000 { + compatible = "intel,smmon"; + reg = <0 0x00220000 0 0x1000>; + syscon = <&syscon>; + interrupts = <0 238 4>; + status = "disabled"; + }; + + sm1: sm1@000f0000 { + compatible = "intel,smmon"; + reg = <0 0x000f0000 0 0x1000>; + syscon = <&syscon>; + interrupts = <0 239 4>; + status = "disabled"; + }; + reset: reset@8002C02008 { compatible = "intel,axm56xx-reset"; syscon = <&syscon>; diff --git a/arch/arm64/include/asm/edac.h b/arch/arm64/include/asm/edac.h new file mode 100644 index 0000000..8218fa8 --- /dev/null +++ b/arch/arm64/include/asm/edac.h @@ -0,0 +1,13 @@ +#ifndef ASM_EDAC_H +#define ASM_EDAC_H +/* + * ECC atomic, DMA, SMP and interrupt safe scrub function. + * Implements the per arch atomic_scrub() that EDAC use for software + * ECC scrubbing. It reads memory and then writes back the original + * value, allowing the hardware to detect and correct memory errors. + */ +static inline void atomic_scrub(void *va, u32 size) +{ + WARN_ONCE(1, "not implemented"); +} +#endif diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 235bd4d..6863d2a 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -386,8 +386,18 @@ config EDAC_OCTEON_PCI Support for error detection and correction on the Cavium Octeon family of SOCs. -config EDAC_AXXIA_SYSMEM - tristate "AXXIA EDAC SysMem Controller" +config EDAC_AXXIA_SYSMEM_5600 + depends on ARCH_AXXIA + bool "AXXIA EDAC SysMem Controller for 5600" + help + Support for System Memory Denali controller error + detection on the AXXIA AXM56xx devices. This enables + the System Memory error detection. System Memory error + detection is interrupt driven. + +config EDAC_AXXIA_SYSMEM_5500 + bool "AXXIA EDAC SysMem Controller for 5500" + depends on ARCH_AXXIA help Support for System Memory Denali controller error detection on the AXXIA AXM55xx devices. This enables diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 901da02..65dfcf8 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -16,7 +16,8 @@ ifdef CONFIG_PCI edac_core-y += edac_pci.o edac_pci_sysfs.o endif -obj-$(CONFIG_EDAC_AXXIA_SYSMEM) += axxia_edac-mc.o +obj-$(CONFIG_EDAC_AXXIA_SYSMEM_5500) += axxia_edac-mc.o +obj-$(CONFIG_EDAC_AXXIA_SYSMEM_5600) += axxia_edac-mc_56xx.o obj-$(CONFIG_EDAC_AXXIA_L3) += axxia_edac-l3.o obj-$(CONFIG_EDAC_AXXIA_L2_CPU) += axxia_edac-l2_cpu.o obj-$(CONFIG_EDAC_GHES) += ghes_edac.o diff --git a/drivers/edac/axxia_edac-mc.c b/drivers/edac/axxia_edac-mc.c index 30c7843..b3f5241 100644 --- a/drivers/edac/axxia_edac-mc.c +++ b/drivers/edac/axxia_edac-mc.c @@ -1,23 +1,12 @@ /* * drivers/edac/axxia_edac-mc.c * - * EDAC Driver for Avago's Axxia 5500 System Memory Controller + * EDAC Driver for Intel's Axxia 5600 Memory Controller * - * Copyright (C) 2010 LSI Inc. + * Copyright (C) 2016 Intel * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * This file may be distributed under the terms of the + * GNU General Public License. * */ diff --git a/drivers/edac/axxia_edac-mc_56xx.c b/drivers/edac/axxia_edac-mc_56xx.c new file mode 100644 index 0000000..54dac0e --- /dev/null +++ b/drivers/edac/axxia_edac-mc_56xx.c @@ -0,0 +1,1479 @@ +/* + * drivers/edac/axxia_edac-mc.c + * + * EDAC Driver for Intel's Axxia 5600 System Memory Controller + * + * Copyright (C) 2016 Intel Inc. + * + * This file may be distributed under the terms of the + * GNU General Public License. + */ + +#include <linux/module.h> +#include <linux/kthread.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/lsi-ncr.h> +#include <linux/edac.h> +#include <linux/of_platform.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/irq.h> +#include <linux/platform_device.h> +#include <linux/reboot.h> +#include <linux/mfd/syscon.h> +#include <linux/regmap.h> +#include <linux/interrupt.h> +#include "edac_core.h" +#include "edac_module.h" + +#define FMT "%s: syscon lookup failed hence using hardcoded register address\n" + +#define MPR_FMT9 "\n%3d %#010x %#010x %#010x %#010x"\ + " %#010x %#010x %#010x %#010x %#010x" + +#define MPR_FMT16 " %#010x %#010x %#010x %#010x"\ + " %#010x %#010x %#010x %#010x %#010x" + +#define MPR_HDR9 "Lp. dram0 dram1 dram2 dram3"\ + " dram4 dram5 dram6 dram7 dram8" + +#define MPR_HDR18 " dram9 dram10 dram11 dram12"\ + " dram13 dram14 dram15 dram16 dram17" + +#define INTEL_EDAC_MOD_STR "axxia56xx_edac" + +#define APB2_SER3_PHY_ADDR 0x008002c00000ULL +#define APB2_SER3_PHY_SIZE 110008 + +#define SM_MPR_PAGE 0x1 + +#define SM_56XX_DENALI_CTL_00 0x0 +#define SM_56XX_DENALI_CTL_57 0xe4 +#define SM_56XX_DENALI_CTL_117 0x1d4 +#define SM_56XX_DENALI_CTL_123 0x1ec + +/* INT STATUS */ +#define SM_56XX_DENALI_CTL_366 0x5b8 +#define SM_56XX_DENALI_CTL_367 0x5bc + +/* INT ACK */ +#define SM_56XX_DENALI_CTL_368 0x5c0 +#define SM_56XX_DENALI_CTL_369 0x5c4 + +/* INT MASK */ +#define SM_56XX_DENALI_CTL_370 0x5c8 +#define SM_56XX_DENALI_CTL_371 0x5cc + +/* MPR PAGE */ +#define SM_56XX_DENALI_CTL_58 0xe8 +#define SM_56XX_DENALI_CTL_59 0xec +#define SM_56XX_DENALI_CTL_60 0xf0 +#define SM_56XX_DENALI_CTL_61 0xf4 +#define SM_56XX_DENALI_CTL_62 0xf8 + +/* TODO CHECK */ +#define APB2_PERSIST_SCRATCH 0xdc +#define SMEM_PERSIST_SCRATCH_BIT (0x1 << 3) + +#define IRQ_NAME_LEN 16 +#define MEMORY_CONTROLLERS 4 +#define MAX_DQ 18 +#define MAX_CS 4 +#define MPR_CIRCULAR_BUF_LEN 16 +#define MPR_PAGE_BYTES 4 +#define MPR_ERRORS 2 /* CRC, CA Parity error */ + +#define SM_INT_MASK_LOW (0xfbbfef01) +#define SM_INT_MASK_ALL_LOW (0xffffffff) +#define SM_INT_MASK_HIGH (0x1) +#define SM_INT_MASK_ALL_HIGH (0x7) + +static int log = 1; +module_param(log, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(log, "Log each error to kernel log."); + +static int force_restart = 1; +module_param(force_restart, int, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(force_restart, "Machine restart on fatal error."); + +static atomic64_t mc_counter = ATOMIC_INIT(0); +/* + Bit [34] = Logical OR of all lower bits. + Bit [33] = A CRC error occurred on the write data bus. + Bit [32] = The software-initiated control word write has completed. + Bit [31] = The user-initiated DLL resync has completed. + Bit [30] = A state change has been detected on the + dfi_init_complete signal after initialization. + Bit [29] = The assertion of the INHIBIT_DRAM_CMD parameter has + successfully inhibited the command queue. + Bit [28] = The register interface-initiated mode register write has + completed and another mode register write may be issued. + Bit [27] = A Low Power Interface (LPI) timeout error has occurred. + Bit [26] = MPR read command, initiated with a software MPR_READ request, + is complete. + Bit [25] = Error received from the PHY on the DFI bus. + Bit [24] = RESERVED + Bit [23] = RESERVED + Bit [22] = A parity error has been detected on the address/control bus + on a registered DIMM. + Bit [21] = The leveling operation has completed. + Bit [20] = A read leveling gate training operation has been requested. + Bit [19] = A read leveling operation has been requested. + Bit [18] = A write leveling operation has been requested. + Bit [17] = A DFI update error has occurred. Error information can be + found in the UPDATE_ERROR_STATUS parameter. + Bit [16] = A write leveling error has occurred. Error information can + be found in the WRLVL_ERROR_STATUS parameter. + Bit [15] = A read leveling gate training error has occurred. Error + information can be found in the RDLVL_ERROR_STATUS parameter. + Bit [14] = A read leveling error has occurred. Error information can be + found in the RDLVL_ERROR_STATUS parameter. + Bit [13] = The user has programmed an invalid setting associated with + user words per burst. + Examples: Setting param_reduc when burst length = 2. A 1:2 + MC:PHY clock ratio with burst length = 2. + Bit [12] = A wrap cycle crossing a DRAM page has been detected. This + is unsupported & may result in memory data corruption. + Bit [11] = A write was attempted to a writeprotected region. + Bit [10] = The BIST operation has been completed. + Bit [9] = The low power operation has been completed. + Bit [8] = The MC initialization has been completed. + Bit [7] = An error occurred on the port command channel. + Bit [6] = Multiple uncorrectable ECC events have been detected. + Bit [5] = An uncorrectable ECC event has been detected. + Bit [4] = Multiple correctable ECC events have been detected. + Bit [3] = A correctable ECC event has been detected. + Bit [2] = Multiple accesses outside the defined PHYSICAL memory space + have occurred. + Bit [1] = A memory access outside the defined PHYSICAL memory space + has occurred. + Bit [0] = The memory reset is valid on the DFI bus. + + Of these 1, 2, 3, 4, 5, 6, 7, 12, 22 and 26 are of interest. +*/ + +/* + * MPR dump processing - overview. + * + * As ALERT_N does not have information about failing cs + * one need to collect dumps for all available cs. Below given example + * for two cs0/cs1. + * + * SMEM MC smmon_isr smmon_wq + * | | | + * | | | + * |ALERT_N - int_status bit [33] | + * |------------------>| | + * | |schedule smmon_wq | + * | |------------------>| + * | | |if(dump_in_progress==0) + * | | | dump_in_progress=1 + * | | | + * |CTL_57 cs0 page1 (trigger dump) | + * |<--------------------------------------| + * | | |wait + * |int_status bit [26]| | + * |------------------>| | + * | |wake up smmon_wq | + * | |------------------>| + * |read MPR CTL_58-78 | | + * |<--------------------------------------|collect cs0 MPR page1 + * | | | + * | | | + * |CTL_57 cs1 page1 (trigger dump) | + * |<--------------------------------------| + * | | |wait + * |int_status bit [26]| | + * |------------------>| | + * | |wake up smmon_wq | + * | |------------------>| + * |read MPR CTL_58-78 | | + * |<--------------------------------------|collect cs1 MPR page1 + * | | | + * | | |process dumps + * | | |dump_in_progress=0 + * | | | + */ + +/* INT_STATUS */ +struct __packed sm_56xx_denali_ctl_366 +{ + unsigned int int_status; +}; + +struct __packed sm_56xx_denali_ctl_367 +{ +#ifdef CPU_BIG_ENDIAN + unsigned obsolete1 : 24; + unsigned reserved0 : 5; + unsigned int_status : 3; +#else /* Little Endian */ + unsigned int_status : 3; + unsigned reserved0 : 5; + unsigned obsolete1 : 24; +#endif +}; + +/* ACK */ +struct __packed sm_56xx_denali_ctl_368 +{ + unsigned int int_ack; +}; + +struct __packed sm_56xx_denali_ctl_369 +{ +#ifdef CPU_BIG_ENDIAN + unsigned obsolete1 : 24; + unsigned reserved0 : 6; + unsigned int_ack : 2; +#else /* Little Endian */ + unsigned int_ack : 2; + unsigned reserved0 : 6; + unsigned obsolete1 : 24; +#endif +}; +/* MASK */ + +struct __packed sm_56xx_denali_ctl_370 +{ + unsigned int int_mask; +}; + +struct __packed sm_56xx_denali_ctl_371 +{ +#ifdef CPU_BIG_ENDIAN + unsigned reserved0 : 4; + unsigned odt_rd_map_cs3 : 4; + unsigned reserved1 : 4; + unsigned odt_wr_map_cs2 : 4; + unsigned reserved2 : 4; + unsigned odt_rd_map_cs2 : 4; + unsigned reserved3 : 5; + unsigned int_mask : 3; +#else /* Little Endian */ + unsigned int_mask : 3; + unsigned reserved3 : 5; + unsigned odt_rd_map_cs2 : 4; + unsigned reserved2 : 4; + unsigned odt_wr_map_cs2 : 4; + unsigned reserved1 : 4; + unsigned odt_rd_map_cs3 : 4; + unsigned reserved0 : 4; +#endif +}; + +/* DRAM CLASS */ +struct __packed sm_56xx_denali_ctl_00 +{ +#ifdef CPU_BIG_ENDIAN + unsigned version : 16; + unsigned reserved0 : 4; + unsigned dram_class : 4; + unsigned reserved1 : 7; + unsigned start : 1; +#else /* Little Endian */ + unsigned start : 1; + unsigned reserved1 : 7; + unsigned dram_class : 4; + unsigned reserved0 : 4; + unsigned version : 16; +#endif +}; + +/* Trigger MPR */ +struct __packed sm_56xx_denali_ctl_57 +{ +#ifdef CPU_BIG_ENDIAN + unsigned obsolete3 : 8; + unsigned reserved2 : 3; + unsigned read_mpr : 5; + unsigned reserved3 : 7; + unsigned reserved1 : 1; + unsigned mrw_status : 8; +#else /* Little Endian */ + unsigned mrw_status : 8; + unsigned reserved1 : 1; + unsigned reserved3 : 7; + unsigned read_mpr : 5; + unsigned reserved2 : 3; + unsigned obsolete3 : 8; +#endif +}; + +struct __packed sm_56xx_denali_ctl_117 +{ +#ifdef CPU_BIG_ENDIAN + unsigned reserved0 : 5; + unsigned row_diff : 3; + unsigned reserved1 : 6; + unsigned bank_diff : 2; + unsigned reserved2 : 7; + unsigned zqcs_rotate : 1; + unsigned reserved3 : 7; + unsigned zq_in_progress : 1; +#else /* Little Endian */ + unsigned zq_in_progress : 1; + unsigned reserved3 : 7; + unsigned zqcs_rotate : 1; + unsigned reserved2 : 7; + unsigned bank_diff : 2; + unsigned reserved1 : 6; + unsigned row_diff : 3; + unsigned reserved0 : 5; +#endif +}; + +struct __packed sm_56xx_denali_ctl_123 +{ +#ifdef CPU_BIG_ENDIAN + unsigned reserved0 : 5; + unsigned memdata_ratio_0 : 3; + unsigned reserved1 : 7; + unsigned reduc : 1; + unsigned reserved2 : 4; + unsigned burst_on_fly_bit : 4; + unsigned reserved3 : 4; + unsigned cs_map : 4; +#else /* Little Endian */ + unsigned cs_map : 4; + unsigned reserved3 : 4; + unsigned burst_on_fly_bit : 4; + unsigned reserved2 : 4; + unsigned reduc : 1; + unsigned reserved1 : 7; + unsigned memdata_ratio_0 : 3; + unsigned reserved0 : 5; +#endif +}; + +struct __packed mpr_dump { +/*! @brief This specifies 8 bits of page information from 4 registers per + * DRAM as per JEDEC + */ + u8 dram_0_page[MPR_PAGE_BYTES]; /* X9-SMEM/X9-CMEM, XLF-SMEM/XLF-CMEM */ + u8 dram_1_page[MPR_PAGE_BYTES]; /* X9-SMEM/X9-CMEM, XLF-SMEM/XLF-CMEM */ + u8 dram_2_page[MPR_PAGE_BYTES]; /* X9-SMEM, XLF-SMEM */ + u8 dram_3_page[MPR_PAGE_BYTES]; /* X9-SMEM, XLF-SMEM */ + u8 dram_4_page[MPR_PAGE_BYTES]; /* X9-SMEM, XLF-SMEM */ + u8 dram_5_page[MPR_PAGE_BYTES]; /* X9-SMEM */ + u8 dram_6_page[MPR_PAGE_BYTES]; /* X9-SMEM */ + u8 dram_7_page[MPR_PAGE_BYTES]; /* X9-SMEM */ + u8 dram_8_page[MPR_PAGE_BYTES]; /* X9-SMEM */ + u8 dram_9_page[MPR_PAGE_BYTES]; + u8 dram_10_page[MPR_PAGE_BYTES]; + u8 dram_11_page[MPR_PAGE_BYTES]; + u8 dram_12_page[MPR_PAGE_BYTES]; + u8 dram_13_page[MPR_PAGE_BYTES]; + u8 dram_14_page[MPR_PAGE_BYTES]; + u8 dram_15_page[MPR_PAGE_BYTES]; + u8 dram_16_page[MPR_PAGE_BYTES]; + u8 dram_17_page[MPR_PAGE_BYTES]; + +/*! @brief specifies which MPR Page(n) to read per JEDEC. 2-bit width */ + u8 mpr_page_id; + u8 cs; +}; + +enum events { + EV_ILLEGAL = 0, + EV_MULT_ILLEGAL, + EV_CORR_ECC, + EV_MULT_CORR_ECC, + EV_UNCORR_ECC, + EV_MULT_UNCORR_ECC, + EV_PORT_ERROR, + EV_WRAP_ERROR, + EV_PARITY_ERROR, + NR_EVENTS +}; + + +static char *block_name[] = { + "illegal", + "mult_illegal", + "ecc", + "mult_ecc", + "uncorr_ecc", + "mult_uncorr_ecc", + "port_error", + "wrap_error", + "parity_error", + "alert_n_cs0_dram0_ca_par_error", + "alert_n_cs0_dram0_crc_error", + "alert_n_cs0_dram1_ca_par_error", + "alert_n_cs0_dram1_crc_error", + "alert_n_cs0_dram2_ca_par_error", + "alert_n_cs0_dram2_crc_error", + "alert_n_cs0_dram3_ca_par_error", + "alert_n_cs0_dram3_crc_error", + "alert_n_cs0_dram4_ca_par_error", + "alert_n_cs0_dram4_crc_error", + "alert_n_cs0_dram5_ca_par_error", + "alert_n_cs0_dram5_crc_error", + "alert_n_cs0_dram6_ca_par_error", + "alert_n_cs0_dram6_crc_error", + "alert_n_cs0_dram7_ca_par_error", + "alert_n_cs0_dram7_crc_error", + "alert_n_cs0_dram8_ca_par_error", + "alert_n_cs0_dram8_crc_error", + "alert_n_cs0_dram9_ca_par_error", + "alert_n_cs0_dram9_crc_error", + "alert_n_cs0_dram10_ca_par_error", + "alert_n_cs0_dram10_crc_error", + "alert_n_cs0_dram11_ca_par_error", + "alert_n_cs0_dram11_crc_error", + "alert_n_cs0_dram12_ca_par_error", + "alert_n_cs0_dram12_crc_error", + "alert_n_cs0_dram13_ca_par_error", + "alert_n_cs0_dram13_crc_error", + "alert_n_cs0_dram14_ca_par_error", + "alert_n_cs0_dram14_crc_error", + "alert_n_cs0_dram15_ca_par_error", + "alert_n_cs0_dram15_crc_error", + "alert_n_cs0_dram16_ca_par_error", + "alert_n_cs0_dram16_crc_error", + "alert_n_cs0_dram17_ca_par_error", + "alert_n_cs0_dram17_crc_error", + "alert_n_cs1_dram0_ca_par_error", + "alert_n_cs1_dram0_crc_error", + "alert_n_cs1_dram1_ca_par_error", + "alert_n_cs1_dram1_crc_error", + "alert_n_cs1_dram2_ca_par_error", + "alert_n_cs1_dram2_crc_error", + "alert_n_cs1_dram3_ca_par_error", + "alert_n_cs1_dram3_crc_error", + "alert_n_cs1_dram4_ca_par_error", + "alert_n_cs1_dram4_crc_error", + "alert_n_cs1_dram5_ca_par_error", + "alert_n_cs1_dram5_crc_error", + "alert_n_cs1_dram6_ca_par_error", + "alert_n_cs1_dram6_crc_error", + "alert_n_cs1_dram7_ca_par_error", + "alert_n_cs1_dram7_crc_error", + "alert_n_cs1_dram8_ca_par_error", + "alert_n_cs1_dram8_crc_error", + "alert_n_cs1_dram9_ca_par_error", + "alert_n_cs1_dram9_crc_error", + "alert_n_cs1_dram10_ca_par_error", + "alert_n_cs1_dram10_crc_error", + "alert_n_cs1_dram11_ca_par_error", + "alert_n_cs1_dram11_crc_error", + "alert_n_cs1_dram12_ca_par_error", + "alert_n_cs1_dram12_crc_error", + "alert_n_cs1_dram13_ca_par_error", + "alert_n_cs1_dram13_crc_error", + "alert_n_cs1_dram14_ca_par_error", + "alert_n_cs1_dram14_crc_error", + "alert_n_cs1_dram15_ca_par_error", + "alert_n_cs1_dram15_crc_error", + "alert_n_cs1_dram16_ca_par_error", + "alert_n_cs1_dram16_crc_error", + "alert_n_cs1_dram17_ca_par_error", + "alert_n_cs1_dram17_crc_error", + "alert_n_cs2_dram0_ca_par_error", + "alert_n_cs2_dram0_crc_error", + "alert_n_cs2_dram1_ca_par_error", + "alert_n_cs2_dram1_crc_error", + "alert_n_cs2_dram2_ca_par_error", + "alert_n_cs2_dram2_crc_error", + "alert_n_cs2_dram3_ca_par_error", + "alert_n_cs2_dram3_crc_error", + "alert_n_cs2_dram4_ca_par_error", + "alert_n_cs2_dram4_crc_error", + "alert_n_cs2_dram5_ca_par_error", + "alert_n_cs2_dram5_crc_error", + "alert_n_cs2_dram6_ca_par_error", + "alert_n_cs2_dram6_crc_error", + "alert_n_cs2_dram7_ca_par_error", + "alert_n_cs2_dram7_crc_error", + "alert_n_cs2_dram8_ca_par_error", + "alert_n_cs2_dram8_crc_error", + "alert_n_cs2_dram9_ca_par_error", + "alert_n_cs2_dram9_crc_error", + "alert_n_cs2_dram10_ca_par_error", + "alert_n_cs2_dram10_crc_error", + "alert_n_cs2_dram11_ca_par_error", + "alert_n_cs2_dram11_crc_error", + "alert_n_cs2_dram12_ca_par_error", + "alert_n_cs2_dram12_crc_error", + "alert_n_cs2_dram13_ca_par_error", + "alert_n_cs2_dram13_crc_error", + "alert_n_cs2_dram14_ca_par_error", + "alert_n_cs2_dram14_crc_error", + "alert_n_cs2_dram15_ca_par_error", + "alert_n_cs2_dram15_crc_error", + "alert_n_cs2_dram16_ca_par_error", + "alert_n_cs2_dram16_crc_error", + "alert_n_cs2_dram17_ca_par_error", + "alert_n_cs2_dram17_crc_error", + "alert_n_cs3_dram0_ca_par_error", + "alert_n_cs3_dram0_crc_error", + "alert_n_cs3_dram1_ca_par_error", + "alert_n_cs3_dram1_crc_error", + "alert_n_cs3_dram2_ca_par_error", + "alert_n_cs3_dram2_crc_error", + "alert_n_cs3_dram3_ca_par_error", + "alert_n_cs3_dram3_crc_error", + "alert_n_cs3_dram4_ca_par_error", + "alert_n_cs3_dram4_crc_error", + "alert_n_cs3_dram5_ca_par_error", + "alert_n_cs3_dram5_crc_error", + "alert_n_cs3_dram6_ca_par_error", + "alert_n_cs3_dram6_crc_error", + "alert_n_cs3_dram7_ca_par_error", + "alert_n_cs3_dram7_crc_error", + "alert_n_cs3_dram8_ca_par_error", + "alert_n_cs3_dram8_crc_error", + "alert_n_cs3_dram9_ca_par_error", + "alert_n_cs3_dram9_crc_error", + "alert_n_cs3_dram10_ca_par_error", + "alert_n_cs3_dram10_crc_error", + "alert_n_cs3_dram11_ca_par_error", + "alert_n_cs3_dram11_crc_error", + "alert_n_cs3_dram12_ca_par_error", + "alert_n_cs3_dram12_crc_error", + "alert_n_cs3_dram13_ca_par_error", + "alert_n_cs3_dram13_crc_error", + "alert_n_cs3_dram14_ca_par_error", + "alert_n_cs3_dram14_crc_error", + "alert_n_cs3_dram15_ca_par_error", + "alert_n_cs3_dram15_crc_error", + "alert_n_cs3_dram16_ca_par_error", + "alert_n_cs3_dram16_crc_error", + "alert_n_cs3_dram17_ca_par_error", + "alert_n_cs3_dram17_crc_error" +}; +/* + * index = (sizeof(event_mask)/sizoef(event_mask[1]) + * + #cs*36 + (par?1:0)*18 + #dram + */ + + +static const u32 event_mask[NR_EVENTS] = { + [EV_ILLEGAL] = 0x00000002, + [EV_MULT_ILLEGAL] = 0x00000004, + [EV_CORR_ECC] = 0x00000008, + [EV_MULT_CORR_ECC] = 0x00000010, + [EV_UNCORR_ECC] = 0x00000020, + [EV_MULT_UNCORR_ECC] = 0x00000040, + [EV_PORT_ERROR] = 0x00000080, + [EV_WRAP_ERROR] = 0x00001000, + [EV_PARITY_ERROR] = 0x00400000, +}; + +static const struct event_logging { + int fatal; + const char *level; + const char *name; +} event_logging[NR_EVENTS] = { + [EV_ILLEGAL] = {0, KERN_ERR, "Illegal access"}, + [EV_MULT_ILLEGAL] = {0, KERN_ERR, "Illegal access"}, + [EV_CORR_ECC] = {0, KERN_NOTICE, "Correctable ECC error"}, + [EV_MULT_CORR_ECC] = {0, KERN_NOTICE, "Correctable ECC error"}, + [EV_UNCORR_ECC] = {1, KERN_CRIT, "Uncorrectable ECC error"}, + [EV_MULT_UNCORR_ECC] = {1, KERN_CRIT, "Uncorrectable ECC error"}, + [EV_PORT_ERROR] = {0, KERN_CRIT, "Port error"}, + [EV_WRAP_ERROR] = {0, KERN_CRIT, "Wrap error"}, + [EV_PARITY_ERROR] = {0, KERN_CRIT, "Parity error"}, +}; + +/* Private structure for common edac device */ +struct event_counter { + atomic_t counter; + int edac_block_idx; +}; + +struct mc_edac_data { + struct event_counter events[NR_EVENTS]; + struct event_counter alerts[MAX_CS][MAX_DQ][MPR_ERRORS]; + u8 mpr_page1[MPR_CIRCULAR_BUF_LEN][MAX_DQ][MPR_PAGE_BYTES]; + struct mpr_dump mpr; + char irq_name[IRQ_NAME_LEN]; + int cs_count; + int dram_count; + int irq; + int latest_mpr_page1_idx; + raw_spinlock_t mpr_data_lock; + struct mutex edac_sysfs_data_lock; + wait_queue_head_t dump_wq; + wait_queue_head_t event_wq; + atomic_t dump_ready; + atomic_t event_ready; + atomic_t dump_in_progress; + +}; + +struct intel_edac_dev_info { + struct platform_device *pdev; + struct mc_edac_data *data; + char *ctl_name; + char *blk_name; + struct work_struct offload_alerts; + struct work_struct offload_events; + int is_ddr4; + int edac_idx; + u32 sm_region; + struct regmap *syscon; + void __iomem *apb2ser3_region; + struct edac_device_ctl_info *edac_dev; + void (*check)(struct edac_device_ctl_info *edac_dev); +}; + + +static ssize_t mpr1_dump_show(struct edac_device_ctl_info + *edac_dev, char *data) +{ + u8 (*mpr_page1)[MAX_DQ][MPR_PAGE_BYTES]; + unsigned long flags; + char *buf = NULL; + ssize_t count = 0; + ssize_t total_count = 0; + int i = 0, j = 0; + int latest_mpr_page1_idx; + struct intel_edac_dev_info *dev_info = edac_dev->pvt_info; + + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (buf == NULL) + goto no_mem_buffer; + + mpr_page1 = kmalloc_array(MPR_CIRCULAR_BUF_LEN, sizeof(*mpr_page1), + GFP_KERNEL); + if (mpr_page1 == NULL) + goto no_mem_dump; + + /* + * If this cause a performance issue use rcu list, where each node + * is 2-dimentional array instead. + */ + raw_spin_lock_irqsave(&dev_info->data->mpr_data_lock, flags); + latest_mpr_page1_idx = dev_info->data->latest_mpr_page1_idx; + memcpy(mpr_page1, dev_info->data->mpr_page1, + MPR_CIRCULAR_BUF_LEN * MAX_DQ * MPR_PAGE_BYTES); + raw_spin_unlock_irqrestore(&dev_info->data->mpr_data_lock, flags); + + /* Now process on copied data ... */ + count = scnprintf(buf+total_count, PAGE_SIZE-total_count, + "%s", MPR_HDR9); + + total_count += (count > 0 ? count : 0); + + if (dev_info->data->dram_count == MAX_DQ) { + count = scnprintf(buf+total_count, PAGE_SIZE-total_count, + "%s", MPR_HDR18); + total_count += (count > 0 ? count : 0); + } + + + for (i = 0; i < MPR_CIRCULAR_BUF_LEN; ++i) { + j = (latest_mpr_page1_idx + + MPR_CIRCULAR_BUF_LEN - i) % MPR_CIRCULAR_BUF_LEN; + + /* x8 base */ + count = scnprintf(buf+total_count, + PAGE_SIZE-total_count, + MPR_FMT9, i+1, + *((u32 *) &mpr_page1[j][0]), + *((u32 *) &mpr_page1[j][1]), + *((u32 *) &mpr_page1[j][2]), + *((u32 *) &mpr_page1[j][3]), + *((u32 *) &mpr_page1[j][4]), + *((u32 *) &mpr_page1[j][5]), + *((u32 *) &mpr_page1[j][6]), + *((u32 *) &mpr_page1[j][7]), + *((u32 *) &mpr_page1[j][8])); + + total_count += (count > 0 ? count : 0); + + /* x16 addition */ + if (dev_info->data->dram_count == MAX_DQ) { + count = scnprintf(buf+total_count, + PAGE_SIZE-total_count, + MPR_FMT16, + *((u32 *) &mpr_page1[j][9]), + *((u32 *) &mpr_page1[j][10]), + *((u32 *) &mpr_page1[j][11]), + *((u32 *) &mpr_page1[j][12]), + *((u32 *) &mpr_page1[j][13]), + *((u32 *) &mpr_page1[j][14]), + *((u32 *) &mpr_page1[j][15]), + *((u32 *) &mpr_page1[j][16]), + *((u32 *) &mpr_page1[j][17])); + + total_count += (count > 0 ? count : 0); + } + } + + total_count = scnprintf(data, PAGE_SIZE, "%s\n", buf); + + /* free resourses */ + kfree(mpr_page1); + kfree(buf); + return total_count; + +no_mem_dump: + pr_info("Could not allocate memory for smem MPR dump.\n"); + kfree(buf); + return 0; +no_mem_buffer: + pr_err("Could not allocate memory for smem MPR buffer.\n"); + return 0; +} + +static struct edac_dev_sysfs_attribute device_block_attr[] = { + { + .attr = { + .name = "mpr_page1", + .mode = (S_IRUGO | S_IWUSR) + }, + .show = mpr1_dump_show, + .store = NULL}, + /* End of list */ + { + .attr = {.name = NULL} + } +}; + +static void axxia_mc_sysfs_attributes(struct edac_device_ctl_info *edac_dev) +{ + edac_dev->sysfs_attributes = &device_block_attr[0]; +} + +static inline void __attribute__((always_inline)) +handle_events(struct intel_edac_dev_info *edac_dev, + struct sm_56xx_denali_ctl_366 *denali_ctl_366) +{ + unsigned long set_val; + int i; + + for (i = 0; i < NR_EVENTS; ++i) { + if ((denali_ctl_366->int_status & event_mask[i]) != 0) { + if (force_restart && event_logging[i].fatal) { + if (IS_ERR(edac_dev->syscon)) { + set_val = readl( + edac_dev->apb2ser3_region + + APB2_PERSIST_SCRATCH); + /* set bit 3 in pscratch reg */ + set_val = set_val + | SMEM_PERSIST_SCRATCH_BIT; + writel(set_val, + edac_dev->apb2ser3_region + + APB2_PERSIST_SCRATCH); + } else { + regmap_update_bits(edac_dev->syscon, + APB2_PERSIST_SCRATCH, + SMEM_PERSIST_SCRATCH_BIT, + SMEM_PERSIST_SCRATCH_BIT); + } + pr_emerg("%s uncorrectable error\n", + edac_dev->ctl_name); + machine_restart(NULL); + } + atomic_inc(&edac_dev->data->events[i].counter); + } + } +} + +static inline void __attribute__((always_inline)) +inc_alert_counter(struct event_counter (*alert)[MAX_DQ][MPR_ERRORS], + int cs, int dram, u8 val) +{ + /* PARITY */ + if (val & 0x40) + atomic_inc(&alert[cs][dram][0].counter); + + /* CRC */ + if (val & 0x80) + atomic_inc(&alert[cs][dram][1].counter); +} + +static inline void __attribute__((always_inline)) +store_mpr_dump(struct intel_edac_dev_info *edac_dev, int cs) +{ + struct mpr_dump *mpr = &edac_dev->data->mpr; + int idx; + + edac_dev->data->latest_mpr_page1_idx++; + /* used smart bitwise op instead of %= MPR_CIRCULAR_BUF_LEN */ + /* MPR_CIRCULAR_BUF_LEN is power of 2 */ + idx = (edac_dev->data->latest_mpr_page1_idx &= + (MPR_CIRCULAR_BUF_LEN - 1)); + + memcpy((char *) &edac_dev->data->mpr_page1[idx], + (char *) &mpr->dram_0_page[0], + MAX_DQ * MPR_PAGE_BYTES); +} + +static inline void __attribute__((always_inline)) +update_alert_counters(struct intel_edac_dev_info *edac_dev, int cs) +{ + /* Casting magic + * + * mpr.dram_0_page[0] -> dram[0][0] + * mpr.dram_0_page[1] -> dram[0][1] + * mpr.dram_0_page[2] -> dram[0][2] + * mpr.dram_0_page[3] -> dram[0][3] + * ... + * mpr.dram_1_page[0] -> dram[1][0] + * ... + * mpr.dram_8_page[0] -> dram[8][0] + */ + u8 (*dram)[MPR_PAGE_BYTES] = + (u8 (*)[MPR_PAGE_BYTES]) (&edac_dev->data->mpr.dram_0_page[0]); + int i; + + for (i = 0; i < MAX_DQ; ++i) + inc_alert_counter(edac_dev->data->alerts, cs, i, dram[i][3]); + +} + +static inline int __attribute__((always_inline)) +collect_mpr_dump(struct intel_edac_dev_info *edac_dev, u8 page, int cs) +{ + struct mpr_dump *mpr = &edac_dev->data->mpr; + unsigned long flags; + u32 regval; + int i; + + mpr->mpr_page_id = page; + + for (i = 0; i < MPR_PAGE_BYTES; i++) { + if (ncr_read(edac_dev->sm_region, + (SM_56XX_DENALI_CTL_58 + (0x14 * i)), + 4, ®val)) + goto error_read; + + mpr->dram_0_page[i] = regval & 0xff; + mpr->dram_1_page[i] = ((regval & 0xff00) >> 8); + mpr->dram_2_page[i] = ((regval & 0xff0000) >> 16); + mpr->dram_3_page[i] = ((regval & 0xff000000) >> 24); + + if (ncr_read(edac_dev->sm_region, + (SM_56XX_DENALI_CTL_59 + (0x14 * i)), + 4, ®val)) + goto error_read; + + mpr->dram_4_page[i] = regval & 0xff; + mpr->dram_5_page[i] = ((regval & 0xff00) >> 8); + mpr->dram_6_page[i] = ((regval & 0xff0000) >> 16); + mpr->dram_7_page[i] = ((regval & 0xff000000) >> 24); + + if (ncr_read(edac_dev->sm_region, + (SM_56XX_DENALI_CTL_60 + (0x14 * i)), + 4, ®val)) + goto error_read; + + mpr->dram_8_page[i] = regval & 0xff; + + if (edac_dev->data->dram_count == MAX_DQ) { + mpr->dram_9_page[i] = ((regval & 0xff00) >> 8); + mpr->dram_10_page[i] = ((regval & 0xff0000) >> 16); + mpr->dram_11_page[i] = ((regval & 0xff000000) >> 24); + + if (ncr_read(edac_dev->sm_region, + (SM_56XX_DENALI_CTL_60 + + (0x14 * i)), 4, ®val)) + goto error_read; + + mpr->dram_12_page[i] = regval & 0xff; + mpr->dram_13_page[i] = ((regval & 0xff00) >> 8); + mpr->dram_14_page[i] = ((regval & 0xff0000) >> 16); + mpr->dram_15_page[i] = ((regval & 0xff000000) >> 24); + + if (ncr_read(edac_dev->sm_region, + (SM_56XX_DENALI_CTL_61 + + (0x14 * i)), 4, ®val)) + goto error_read; + + mpr->dram_16_page[i] = regval & 0xff; + mpr->dram_17_page[i] = ((regval & 0xff00) >> 8); + } + } + raw_spin_lock_irqsave(&edac_dev->data->mpr_data_lock, flags); + store_mpr_dump(edac_dev, cs); + raw_spin_unlock_irqrestore(&edac_dev->data->mpr_data_lock, flags); + + update_alert_counters(edac_dev, cs); + return 0; + +error_read: + printk_ratelimited("%s: Memory error reading MC mpr page\n", + dev_name(&edac_dev->pdev->dev)); + return 1; +} + +static irqreturn_t +smmon_isr(int interrupt, void *device) +{ + struct intel_edac_dev_info *dev_info = device; + struct sm_56xx_denali_ctl_366 denali_ctl_366; + struct sm_56xx_denali_ctl_367 denali_ctl_367; + struct sm_56xx_denali_ctl_368 denali_ctl_368 = {0}; + struct sm_56xx_denali_ctl_369 denali_ctl_369 = {0, 0, 0}; + + /* + * NOTE: + * ISR function is only reading int_status, and write into int_act + * registers. + * + * - first handle critical events, which might require restart + * (handle_events) and then to the job outside isr + * - second collect MPR dump if any exists and then trigger new if + * needed - all outside isr, + * - third wake up job outside isr to trigger mpr dump procedure when + * ALERT_N reported (bit [33] is on) + */ + + if (ncr_read(dev_info->sm_region, SM_56XX_DENALI_CTL_367, + 4, (u32 *) &denali_ctl_367)) + goto error_read; + + if (denali_ctl_367.int_status & 0x4) { + + if (ncr_read(dev_info->sm_region, SM_56XX_DENALI_CTL_366, + 4, (u32 *) &denali_ctl_366)) + goto error_read; + + handle_events(dev_info, &denali_ctl_366); + atomic_set(&dev_info->data->event_ready, 1); + wake_up(&dev_info->data->event_wq); + + denali_ctl_368.int_ack = + (denali_ctl_366.int_status & 0xf8ffffff); + + if (dev_info->is_ddr4) { + if (denali_ctl_366.int_status & 0x4000000) { + atomic_set(&dev_info->data->dump_ready, 1); + wake_up(&dev_info->data->dump_wq); + denali_ctl_368.int_ack |= 0x4000000; + } + } + if (ncr_write(dev_info->sm_region, SM_56XX_DENALI_CTL_368, + 4, (u32 *) &denali_ctl_368)) + goto error_write; + } + + if (denali_ctl_367.int_status & 0x2) { + if (dev_info->is_ddr4) { + atomic_inc(&dev_info->data->dump_in_progress); + wake_up(&dev_info->data->dump_wq); + } + denali_ctl_369.int_ack = 0x2; + if (ncr_write(dev_info->sm_region, SM_56XX_DENALI_CTL_369, + 4, (u32 *) &denali_ctl_369)) + goto error_write; + } + + return IRQ_HANDLED; + +error_write: + printk_ratelimited("%s: Error writing interrupt status\n", + dev_name(&dev_info->pdev->dev)); + return IRQ_HANDLED; +error_read: + printk_ratelimited("%s: Error reading interrupt status\n", + dev_name(&dev_info->pdev->dev)); + return IRQ_HANDLED; +} + + +static void intel_sm_alerts_error_check(struct edac_device_ctl_info *edac_dev) +{ + struct intel_edac_dev_info *dev_info = + (struct intel_edac_dev_info *) edac_dev->pvt_info; + struct event_counter (*alerts)[MAX_DQ][MPR_ERRORS] = + dev_info->data->alerts; + struct sm_56xx_denali_ctl_57 denali_ctl_57; + int i, j, k, l; + u32 counter; + +start: + /* keep hung up monitor happy 90 sec's */ + if (0 == wait_event_timeout(dev_info->data->dump_wq, + atomic_read(&dev_info->data->dump_in_progress), + msecs_to_jiffies(90*1000))) + goto start; + + /* the only one running workqueue */ + for (i = 0; i < dev_info->data->cs_count; ++i) { + + /* trigger dump */ + if (ncr_read(dev_info->sm_region, + SM_56XX_DENALI_CTL_57, + 4, (u32 *) &denali_ctl_57)) + goto error_read; + + /* bits [3:2] cs number */ + denali_ctl_57.read_mpr = 4*i + 1; + if (ncr_write(dev_info->sm_region, + SM_56XX_DENALI_CTL_57, + 4, (u32 *) &denali_ctl_57)) + goto error_write; + + /* bit [4] trigger dump */ + denali_ctl_57.read_mpr += 16; + if (ncr_write(dev_info->sm_region, + SM_56XX_DENALI_CTL_57, + 4, (u32 *) &denali_ctl_57)) + goto error_write; + /* wait */ + wait_event(dev_info->data->dump_wq, + atomic_read(&dev_info->data->dump_ready)); + + atomic_set(&dev_info->data->dump_ready, 0); + /* collect data */ + collect_mpr_dump(dev_info, 1, i); + } + + /* process all dumps - update counters */ + + mutex_lock(&dev_info->data->edac_sysfs_data_lock); + for (i = 0; i < dev_info->data->cs_count; ++i) { + for (j = 0; j < dev_info->data->dram_count; ++j) { + for (k = 0; k < MPR_ERRORS; ++k) { + /* + * TODO - How can one determine event type? + * recoverable/unrecoverable + */ + counter = atomic_xchg(&alerts[i][j][k].counter, + 0); + for (l = 0; l < counter; ++l) + edac_device_handle_ce(edac_dev, 0, + alerts[i][j][k].edac_block_idx, + edac_dev->ctl_name); + } + } + } + mutex_unlock(&dev_info->data->edac_sysfs_data_lock); + atomic_set(&dev_info->data->dump_in_progress, 0); + goto start; + +error_read: +error_write: + printk_ratelimited("Could not collect MPR dump.\n"); + atomic_set(&dev_info->data->dump_in_progress, 0); + goto start; +} + +static void intel_sm_events_error_check(struct edac_device_ctl_info *edac_dev) +{ + struct intel_edac_dev_info *dev_info = + (struct intel_edac_dev_info *) edac_dev->pvt_info; + struct event_counter *events = dev_info->data->events; + int i, j; + u32 counter; + + while (1) { + if (0 == wait_event_timeout(dev_info->data->event_wq, + atomic_read(&dev_info->data->event_ready), + msecs_to_jiffies(90*1000))) + continue; + + atomic_set(&dev_info->data->event_ready, 0); + + mutex_lock(&dev_info->data->edac_sysfs_data_lock); + for (i = 0; i < NR_EVENTS; ++i) { + counter = atomic_xchg(&events[i].counter, 0); + for (j = 0; j < counter; ++j) { + switch (i) { + /* + * TODO - How can one determine event type? + * recoverable/unrecoverable + */ + case EV_ILLEGAL: + case EV_MULT_ILLEGAL: + case EV_UNCORR_ECC: + case EV_MULT_UNCORR_ECC: + edac_device_handle_ue(edac_dev, 0, i, + edac_dev->ctl_name); + break; + case EV_CORR_ECC: + case EV_MULT_CORR_ECC: + case EV_PORT_ERROR: + case EV_WRAP_ERROR: + case EV_PARITY_ERROR: + edac_device_handle_ce(edac_dev, 0, i, + edac_dev->ctl_name); + break; + default: + printk_ratelimited( + "ERROR EVENT MISSING.\n"); + } + } + } + mutex_unlock(&dev_info->data->edac_sysfs_data_lock); + } +} + +static void axxia_alerts_work(struct work_struct *work) +{ + struct intel_edac_dev_info *dev_info = + container_of(work, struct intel_edac_dev_info, offload_alerts); + + intel_sm_alerts_error_check(dev_info->edac_dev); +} + +static void axxia_events_work(struct work_struct *work) +{ + struct intel_edac_dev_info *dev_info = + container_of(work, struct intel_edac_dev_info, offload_events); + + intel_sm_events_error_check(dev_info->edac_dev); +} + +static int get_active_cs(struct intel_edac_dev_info *dev_info) +{ + struct sm_56xx_denali_ctl_123 denali_ctl_123 = {0}; + int i; + int cs = 0; + + if (ncr_read(dev_info->sm_region, SM_56XX_DENALI_CTL_123, + 4, (u32 *) &denali_ctl_123)) { + pr_err("Could not read active CS number.\n"); + return 0; + } + + for (i = 0; i < MAX_CS; i++) + if (denali_ctl_123.cs_map & (0x1 << i)) + ++cs; + + return cs; +} + +static int get_active_dram(struct intel_edac_dev_info *dev_info) +{ + struct sm_56xx_denali_ctl_117 denali_ctl_117 = {0}; + int dram = 0; + + if (ncr_read(dev_info->sm_region, SM_56XX_DENALI_CTL_117, + 4, (u32 *) &denali_ctl_117)) { + pr_err("Could not read number of lanes.\n"); + } + + if (denali_ctl_117.bank_diff == 0) + dram = MAX_DQ/2; + + if (denali_ctl_117.bank_diff == 1) + dram = MAX_DQ; + + return dram; +} + +static int intel_edac_mc_probe(struct platform_device *pdev) +{ + struct edac_device_instance *instance; + struct edac_device_block *block; + int i, j, k, l; + int count; + struct intel_edac_dev_info *dev_info = NULL; + struct resource *io; + struct device_node *np = pdev->dev.of_node; + int irq = -1, rc = 0; + struct sm_56xx_denali_ctl_00 denali_ctl_00; + struct sm_56xx_denali_ctl_370 denali_ctl_370; + struct sm_56xx_denali_ctl_371 denali_ctl_371; + int cs_count = MAX_CS; + int dram_count = MAX_DQ; + + count = atomic64_inc_return(&mc_counter); + if ((count - 1) == MEMORY_CONTROLLERS) + goto err_nodev; + + dev_info = devm_kzalloc(&pdev->dev, sizeof(*dev_info), GFP_KERNEL); + if (!dev_info) + goto err_nomem; + + + dev_info->data = + devm_kzalloc(&pdev->dev, sizeof(*dev_info->data), GFP_KERNEL); + if (!dev_info->data) + goto err_noctlinfo; + + init_waitqueue_head(&dev_info->data->dump_wq); + init_waitqueue_head(&dev_info->data->event_wq); + + raw_spin_lock_init(&dev_info->data->mpr_data_lock); + mutex_init(&dev_info->data->edac_sysfs_data_lock); + + dev_info->ctl_name = kstrdup(np->name, GFP_KERNEL); + dev_info->blk_name = "ECC"; + edac_op_state = EDAC_OPSTATE_POLL; + + dev_info->pdev = pdev; + dev_info->edac_idx = edac_device_alloc_index(); + dev_info->data->irq = 0; + + /* setup all counters */ + for (i = 0; i < NR_EVENTS; ++i) + atomic_set(&dev_info->data->events[i].counter, 0); + + for (j = 0; j < MAX_CS; ++j) { + for (l = 0; l < MAX_DQ; ++l) { + for (k = 0; k < MPR_ERRORS; ++k, ++i) { + atomic_set(&dev_info->data-> + alerts[j][l][k].counter, 0); + } + } + } + + /* set up dump in progress flag */ + atomic_set(&dev_info->data->dump_in_progress, 0); + + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!io) { + dev_err(&pdev->dev, "Unable to get mem resource\n"); + goto err_noctlinfo; + } + dev_info->sm_region = io->start; + dev_info->syscon = + syscon_regmap_lookup_by_phandle(np, "syscon"); + if (IS_ERR(dev_info->syscon)) { + pr_info(FMT, np->name); + dev_info->apb2ser3_region = ioremap(APB2_SER3_PHY_ADDR, + APB2_SER3_PHY_SIZE); + if (!dev_info->apb2ser3_region) { + pr_err("ioremap of apb2ser3 region failed\n"); + goto err_noctlinfo; + } + } + + cs_count = get_active_cs(dev_info); + if (cs_count == 0) + goto err_noctlinfo; + + dram_count = get_active_dram(dev_info); + if (dram_count == 0) + goto err_noctlinfo; + + + if (ncr_read(dev_info->sm_region, SM_56XX_DENALI_CTL_00, + 4, (u32 *) &denali_ctl_00)) { + pr_err("Could not read ddr version.\n"); + goto err_noctlinfo; + } + + dev_info->is_ddr4 = (denali_ctl_00.dram_class == 0xa ? 1 : 0); + + if (dev_info->is_ddr4) + pr_info("%s supports mpr dump (DDR4).\n", dev_info->ctl_name); + else { + pr_info("%s doesn't support mpr dump.\n", dev_info->ctl_name); + cs_count = 0; + dram_count = 0; + } + + dev_info->data->cs_count = cs_count; + dev_info->data->dram_count = dram_count; + + dev_info->edac_dev = + edac_device_alloc_ctl_info(0, dev_info->ctl_name, + 1, dev_info->blk_name, + NR_EVENTS + + cs_count * dram_count * MPR_ERRORS, + 0, NULL, 0, dev_info->edac_idx); + + if (!dev_info->edac_dev) { + pr_info("No memory for edac device\n"); + goto err_noctlinfo; + } + + instance = &dev_info->edac_dev->instances[0]; + + /* It just gives more descriptive name. */ + for (i = 0; i < NR_EVENTS; ++i) { + block = &instance->blocks[i]; + snprintf(block->name, + sizeof(block->name), + "%s", block_name[i]); + dev_info->data->events[i].edac_block_idx = i; + } + /* + * NOTE, please notice that 'i' index is + * further used in following loops. This is done + * intentionally. Edac is using index for all instances, + * each instance shall be however named based on correct + * cs, dram, ca/crc. Those might differ between HW versions. + * CS 1-4 + * DRAM 4-16, + * CRC/CA Parity - always 2 events. + */ + if (dev_info->is_ddr4) { + for (j = 0; j < cs_count; ++j) { + for (l = 0; l < dram_count; ++l) { + for (k = 0; k < MPR_ERRORS; ++k, ++i) { + int idx = NR_EVENTS + + MAX_DQ * MPR_ERRORS * j + + MPR_ERRORS * l + k; + + dev_info->data->alerts[j][l][k]. + edac_block_idx = i; + block = &instance->blocks[i]; + snprintf(block->name, + sizeof(block->name), + "%s", block_name[idx]); + } + } + } + } + + dev_info->edac_dev->pvt_info = dev_info; + dev_info->edac_dev->dev = &dev_info->pdev->dev; + dev_info->edac_dev->ctl_name = dev_info->ctl_name; + dev_info->edac_dev->mod_name = INTEL_EDAC_MOD_STR; + dev_info->edac_dev->dev_name = dev_name(&dev_info->pdev->dev); + dev_info->edac_dev->edac_check = NULL; + + if (dev_info->is_ddr4) + axxia_mc_sysfs_attributes(dev_info->edac_dev); + + if (edac_device_add_device(dev_info->edac_dev) != 0) { + pr_info("Unable to add edac device for %s\n", + dev_info->ctl_name); + goto err_nosysfs; + } + + snprintf(&dev_info->data->irq_name[0], IRQ_NAME_LEN, + "%s-mon", dev_info->ctl_name); + + if (dev_info->is_ddr4) + INIT_WORK(&dev_info->offload_alerts, axxia_alerts_work); + + INIT_WORK(&dev_info->offload_events, axxia_events_work); + + if (dev_info->is_ddr4) + schedule_work(&dev_info->offload_alerts); + schedule_work(&dev_info->offload_events); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + pr_err("Could not get irq number.\n"); + goto err_noirq; + } + + /* + * Enable memory controller interrupts. + */ + if (dev_info->is_ddr4) + denali_ctl_370.int_mask = SM_INT_MASK_LOW; + else + denali_ctl_370.int_mask = SM_INT_MASK_LOW | + 0x04000000; + + if (ncr_write(dev_info->sm_region, SM_56XX_DENALI_CTL_370, + 4, (u32 *) &denali_ctl_370)) { + pr_err("Could not write interrupt mask reg (%s - ctl_370).\n", + dev_info->ctl_name); + goto err_noirq; + } + + if (ncr_read(dev_info->sm_region, SM_56XX_DENALI_CTL_371, + 4, (u32 *) &denali_ctl_371)) { + pr_err("Could not read interrupt mask reg (%s - ctl_371).\n", + dev_info->ctl_name); + goto err_noirq; + } + + denali_ctl_371.int_mask = SM_INT_MASK_HIGH; + if (ncr_write(dev_info->sm_region, SM_56XX_DENALI_CTL_371, + 4, (u32 *) &denali_ctl_371)) { + pr_err("Could not write interrupt mask reg (%s - ctl_371).\n", + dev_info->ctl_name); + goto err_noirq; + } + + dev_info->data->irq = irq; + rc = devm_request_irq(&pdev->dev, irq, + smmon_isr, IRQF_ONESHOT, + &dev_info->data->irq_name[0], dev_info); + + if (rc) { + pr_err("Could not register interrupt handler (%s).\n", + dev_info->ctl_name); + + dev_info->data->irq = 0; + + /* Mask all interrupts in controller */ + denali_ctl_370.int_mask = SM_INT_MASK_ALL_LOW; + if (ncr_write(dev_info->sm_region, SM_56XX_DENALI_CTL_370, + 4, (u32 *) &denali_ctl_370)) { + pr_err("Could not mask interrupts (%s - ctl_370).\n", + dev_info->ctl_name); + } + + denali_ctl_371.int_mask = SM_INT_MASK_ALL_HIGH; + if (ncr_write(dev_info->sm_region, SM_56XX_DENALI_CTL_371, + 4, (u32 *) &denali_ctl_371)) { + pr_err("Could not mask interrupts (%s - ctl_371).\n", + dev_info->ctl_name); + } + goto err_noirq; + } + return 0; + +err_noirq: + flush_scheduled_work(); + edac_device_del_device(&dev_info->pdev->dev); + +err_nosysfs: + edac_device_free_ctl_info(dev_info->edac_dev); +err_noctlinfo: + mutex_destroy(&dev_info->data->edac_sysfs_data_lock); + atomic64_dec(&mc_counter); + platform_device_unregister(dev_info->pdev); + return 1; +err_nomem: + atomic64_dec(&mc_counter); + return -ENOMEM; +err_nodev: + atomic64_dec(&mc_counter); + return -ENODEV; +} + +static int intel_edac_mc_remove(struct platform_device *pdev) +{ + struct intel_edac_dev_info *dev_info = + (struct intel_edac_dev_info *) &pdev->dev; + + if (dev_info) { + if (dev_info->data->irq > 0) { + disable_irq(dev_info->data->irq); + devm_free_irq(&pdev->dev, + dev_info->data->irq, dev_info); + } + + flush_scheduled_work(); + + if (dev_info->edac_dev != NULL) { + edac_device_del_device(&dev_info->pdev->dev); + edac_device_free_ctl_info(dev_info->edac_dev); + } + + + mutex_destroy(&dev_info->data->edac_sysfs_data_lock); + atomic64_dec(&mc_counter); + } + platform_device_unregister(pdev); + return 0; +} + +static const struct of_device_id intel_edac_smmon_match[] = { + { .compatible = "intel,smmon" }, + { } +}; +MODULE_DEVICE_TABLE(platform, intel_edac_smmon_match); + +static struct platform_driver intel_edac_mc_driver = { + .probe = intel_edac_mc_probe, + .remove = intel_edac_mc_remove, + .driver = { + .name = "intel_edac_smmon", + .of_match_table = intel_edac_smmon_match, + } +}; +module_platform_driver(intel_edac_mc_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Marek Majtyka <marekx.majt...@intel.com>"); +MODULE_AUTHOR("Arun Joshi <arun.jo...@intel.com>"); -- 2.7.4 -- _______________________________________________ linux-yocto mailing list linux-yocto@yoctoproject.org https://lists.yoctoproject.org/listinfo/linux-yocto