On Thu, 03 Oct 2019 12:32:41 +0100,
Talel Shenhar <ta...@amazon.com> wrote:
> 
> The Amazon's Annapurna Labs SoCs includes Point Of Serialization error
> logging unit that reports an error in case write error (e.g . Attempt to
> write to a read only register).
> This error shall be reported to EDAC subsystem as uncorrectable-error.
> 
> Signed-off-by: Talel Shenhar <ta...@amazon.com>
> ---
>  MAINTAINERS                |   7 ++
>  drivers/edac/Kconfig       |   6 ++
>  drivers/edac/Makefile      |   1 +
>  drivers/edac/al_pos_edac.c | 173 
> +++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 187 insertions(+)
>  create mode 100644 drivers/edac/al_pos_edac.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index e7a47b5..f5ce446 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -751,6 +751,13 @@ F:       drivers/tty/serial/altera_jtaguart.c
>  F:   include/linux/altera_uart.h
>  F:   include/linux/altera_jtaguart.h
>  
> +AMAZON ANNAPURNA LABS POS EDAC DRIVER
> +M:   Talel Shenhar <ta...@amazon.com>
> +M:   Talel Shenhar <talelshen...@gmail.com>
> +S:   Maintained
> +F:   Documentation/devicetree/bindings/edac/amazon,al-pos-edac.yaml
> +F:   drivers/edac/al-pos-edac.c
> +
>  AMAZON ANNAPURNA LABS THERMAL MMIO DRIVER
>  M:   Talel Shenhar <ta...@amazon.com>
>  S:   Maintained
> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
> index 200c04c..bb5805f 100644
> --- a/drivers/edac/Kconfig
> +++ b/drivers/edac/Kconfig
> @@ -100,6 +100,12 @@ config EDAC_AMD64_ERROR_INJECTION
>         In addition, there are two control files, inject_read and 
> inject_write,
>         which trigger the DRAM ECC Read and Write respectively.
>  
> +config EDAC_AL_POS
> +     tristate "Amazon's Annapurna Labs POS EDAC driver"
> +     depends on (ARCH_ALPINE || COMPILE_TEST)
> +     help
> +       Include support for the SoC POS EDAC error capability.
> +
>  config EDAC_AMD76X
>       tristate "AMD 76x (760, 762, 768)"
>       depends on PCI && X86_32
> diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
> index 165ca65e..3571936 100644
> --- a/drivers/edac/Makefile
> +++ b/drivers/edac/Makefile
> @@ -22,6 +22,7 @@ obj-$(CONFIG_EDAC_GHES)                     += ghes_edac.o
>  edac_mce_amd-y                               := mce_amd.o
>  obj-$(CONFIG_EDAC_DECODE_MCE)                += edac_mce_amd.o
>  
> +obj-$(CONFIG_EDAC_AL_POS)            += al_pos_edac.o
>  obj-$(CONFIG_EDAC_AMD76X)            += amd76x_edac.o
>  obj-$(CONFIG_EDAC_CPC925)            += cpc925_edac.o
>  obj-$(CONFIG_EDAC_I5000)             += i5000_edac.o
> diff --git a/drivers/edac/al_pos_edac.c b/drivers/edac/al_pos_edac.c
> new file mode 100644
> index 00000000..bd6cd87
> --- /dev/null
> +++ b/drivers/edac/al_pos_edac.c
> @@ -0,0 +1,173 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
> + */
> +#include <linux/bitfield.h>
> +#include <linux/edac.h>
> +#include <linux/of_irq.h>
> +#include "edac_module.h"
> +
> +#define DRV_NAME "al_pos_edac"
> +#define AL_POS_EDAC_MSG_MAX 256
> +
> +/* Registers Offset */
> +#define AL_POS_ERROR_LOG_1   0x0
> +#define AL_POS_ERROR_LOG_0   0x4
> +
> +/* Registers Fields */
> +#define AL_POS_ERROR_LOG_1_VALID     BIT(31)
> +#define AL_POS_ERROR_LOG_1_BRESP     GENMASK(18, 17)
> +#define AL_POS_ERROR_LOG_1_REQUEST_ID        GENMASK(16, 8)
> +#define AL_POS_ERROR_LOG_1_ADDR_HIGH GENMASK(7, 0)
> +
> +#define AL_POS_ERROR_LOG_0_ADDR_LOW  GENMASK(31, 0)
> +
> +struct al_pos_edac {
> +     struct edac_device_ctl_info *edac_dev;
> +     void __iomem *mmio_base;
> +     int irq;
> +};
> +
> +static int al_pos_handle(struct al_pos_edac *al_pos)
> +{
> +     u32 log0, log1;
> +     u64 addr;
> +     u16 request_id;
> +     u8 bresp;
> +     char msg[AL_POS_EDAC_MSG_MAX];
> +
> +     log1 = readl(al_pos->mmio_base + AL_POS_ERROR_LOG_1);

I already commented on the misuse of strict accesses. Unless you can
explain and document *why* you need the extra ordering, please use
relaxed accesses.

> +     if (!FIELD_GET(AL_POS_ERROR_LOG_1_VALID, log1))
> +             return 0;
> +
> +     log0 = readl(al_pos->mmio_base + AL_POS_ERROR_LOG_0);
> +     writel(0, al_pos->mmio_base + AL_POS_ERROR_LOG_1);
> +
> +     addr = FIELD_GET(AL_POS_ERROR_LOG_0_ADDR_LOW, log0);
> +     addr |= (((u64)FIELD_GET(AL_POS_ERROR_LOG_1_ADDR_HIGH, log1)) << 32);
> +     request_id = FIELD_GET(AL_POS_ERROR_LOG_1_REQUEST_ID, log1);
> +     bresp = FIELD_GET(AL_POS_ERROR_LOG_1_BRESP, log1);
> +
> +     snprintf(msg, sizeof(msg),
> +              "addr=0x%llx request_id=0x%x bresp=0x%x\n",
> +              addr, request_id, bresp);
> +
> +     edac_device_handle_ue(al_pos->edac_dev, 0, 0, msg);
> +
> +     return 1;
> +}
> +
> +static void al_pos_edac_check(struct edac_device_ctl_info *edac_dev)
> +{
> +     struct al_pos_edac *al_pos = edac_dev->pvt_info;
> +
> +     al_pos_handle(al_pos);
> +}
> +
> +static irqreturn_t al_pos_irq_handler(int irq, void *info)
> +{
> +     struct platform_device *pdev = info;
> +     struct al_pos_edac *al_pos = platform_get_drvdata(pdev);
> +
> +     if (al_pos_handle(al_pos))
> +             return IRQ_HANDLED;
> +     return IRQ_NONE;
> +}
> +
> +static int al_pos_probe(struct platform_device *pdev)
> +{
> +     struct edac_device_ctl_info *edac_dev;
> +     struct al_pos_edac *al_pos;
> +     int ret;
> +
> +     edac_dev = edac_device_alloc_ctl_info(sizeof(*al_pos), DRV_NAME, 1,
> +                                           DRV_NAME, 1, 0, NULL, 0,
> +                                           edac_device_alloc_index());
> +     if (!edac_dev)
> +             return -ENOMEM;
> +
> +     al_pos = edac_dev->pvt_info;
> +     al_pos->edac_dev = edac_dev;
> +     platform_set_drvdata(pdev, al_pos);
> +
> +     al_pos->mmio_base = devm_platform_ioremap_resource(pdev, 0);
> +     if (IS_ERR(al_pos->mmio_base)) {
> +             dev_err(&pdev->dev, "failed to ioremap memory (%ld)\n",
> +                     PTR_ERR(al_pos->mmio_base));
> +             return PTR_ERR(al_pos->mmio_base);
> +     }
> +
> +     al_pos->irq = platform_get_irq(pdev, 0);
> +     if (al_pos->irq <= 0)
> +             edac_dev->edac_check = al_pos_edac_check;
> +
> +     edac_dev->dev = &pdev->dev;
> +     edac_dev->mod_name = DRV_NAME;
> +     edac_dev->dev_name = dev_name(&pdev->dev);
> +     edac_dev->ctl_name = "POS";
> +
> +     ret = edac_device_add_device(edac_dev);
> +     if (ret) {
> +             dev_err(&pdev->dev, "Failed to add edac device\n");
> +             goto err_free_edac;
> +     }
> +
> +     if (al_pos->irq > 0) {
> +             ret = devm_request_irq(&pdev->dev,
> +                                    al_pos->irq,
> +                                    al_pos_irq_handler,
> +                                    0,
> +                                    pdev->name,
> +                                    pdev);
> +             if (ret != 0) {
> +                     dev_err(&pdev->dev,
> +                             "failed to register to irq %d (%d)\n",
> +                             al_pos->irq, ret);
> +                     goto err_remove_edac;

Would it be worth continuing without interrupts? After all, the
interrupt seems to be an optional part of the device...

Thanks,

        M.

-- 
Jazz is not dead, it just smells funny.

Reply via email to