Re: [PATCH 11/17] fpga: dfl: afu: add error reporting support.

2019-04-09 Thread Wu Hao
On Tue, Apr 09, 2019 at 03:57:37PM -0500, Alan Tull wrote:
> On Sun, Mar 24, 2019 at 10:24 PM Wu Hao  wrote:
> 
> Hi Hao,
> 
> >
> > Error reporting is one important private feature, it reports error
> > detected on port and accelerated function unit (AFU). It introduces
> > several sysfs interfaces to allow userspace to check and clear
> > errors detected by hardware.
> >
> > Signed-off-by: Xu Yilun 
> > Signed-off-by: Wu Hao 
> > ---
> >  Documentation/ABI/testing/sysfs-platform-dfl-port |  29 +++
> >  drivers/fpga/Makefile |   1 +
> >  drivers/fpga/dfl-afu-error.c  | 225 
> > ++
> >  drivers/fpga/dfl-afu-main.c   |   4 +
> >  drivers/fpga/dfl-afu.h|   4 +
> >  5 files changed, 263 insertions(+)
> >  create mode 100644 drivers/fpga/dfl-afu-error.c
> >
> > diff --git a/Documentation/ABI/testing/sysfs-platform-dfl-port 
> > b/Documentation/ABI/testing/sysfs-platform-dfl-port
> > index f611e47..e6140aa 100644
> > --- a/Documentation/ABI/testing/sysfs-platform-dfl-port
> > +++ b/Documentation/ABI/testing/sysfs-platform-dfl-port
> > @@ -79,3 +79,32 @@ KernelVersion:   5.2
> >  Contact:   Wu Hao 
> >  Description:   Read-only. Read this file to get the status of issued 
> > command
> > to userclck_freqcntrcmd.
> > +
> > +What:  /sys/bus/platform/devices/dfl-port.0/errors/errors
> > +Date:  March 2019
> > +KernelVersion: 5.2
> > +Contact:   Wu Hao 
> > +Description:   Read-only. Read this file to get errors detected on port and
> > +   Accelerated Function Unit (AFU).
> > +
> > +What:  /sys/bus/platform/devices/dfl-port.0/errors/first_error
> > +Date:  March 2019
> > +KernelVersion: 5.2
> > +Contact:   Wu Hao 
> > +Description:   Read-only. Read this file to get the first error detected by
> > +   hardware.
> > +
> > +What:  
> > /sys/bus/platform/devices/dfl-port.0/errors/first_malformed_req
> > +Date:  March 2019
> > +KernelVersion: 5.2
> > +Contact:   Wu Hao 
> > +Description:   Read-only. Read this file to get the first malformed request
> > +   captured by hardware.
> > +
> > +What:  /sys/bus/platform/devices/dfl-port.0/errors/clear
> > +Date:  March 2019
> > +KernelVersion: 5.2
> > +Contact:   Wu Hao 
> > +Description:   Write-only. Write error code to this file to clear errors. 
> > If
> > +   the input error code doesn't match, it returns -EBUSY error
> > +   code.
> 
> I understand how -EBUSY could be the right error code for when the
> hardware is in a state where the error can't be cleared.  But if the
> input error code doesn't match, shouldn't the code be -EINVAL?  Also
> as noted below, the way this is currently coded, -ETIMEDOUT could get
> returned.

Thanks for the comments, let me try to capture all possible error return
values in doc in the next version to avoid confusion.

> 
> > diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile
> > index c0dd4c8..f1f0af7 100644
> > --- a/drivers/fpga/Makefile
> > +++ b/drivers/fpga/Makefile
> > @@ -40,6 +40,7 @@ obj-$(CONFIG_FPGA_DFL_AFU)+= dfl-afu.o
> >
> >  dfl-fme-objs := dfl-fme-main.o dfl-fme-pr.o
> >  dfl-afu-objs := dfl-afu-main.o dfl-afu-region.o dfl-afu-dma-region.o
> > +dfl-afu-objs += dfl-afu-error.o
> >
> >  # Drivers for FPGAs which implement DFL
> >  obj-$(CONFIG_FPGA_DFL_PCI) += dfl-pci.o
> > diff --git a/drivers/fpga/dfl-afu-error.c b/drivers/fpga/dfl-afu-error.c
> > new file mode 100644
> > index 000..b66bd4a
> > --- /dev/null
> > +++ b/drivers/fpga/dfl-afu-error.c
> > @@ -0,0 +1,225 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Driver for FPGA Accelerated Function Unit (AFU) Error Reporting
> > + *
> > + * Copyright 2019 Intel Corporation, Inc.
> > + *
> > + * Authors:
> > + *   Wu Hao 
> > + *   Xiao Guangrong 
> > + *   Joseph Grecco 
> > + *   Enno Luebbers 
> > + *   Tim Whisonant 
> > + *   Ananda Ravuri 
> > + *   Mitchel Henry 
> > + */
> > +
> > +#include 
> > +
> > +#include "dfl-afu.h"
> > +
> > +#define PORT_ERROR_MASK0x8
> > +#define PORT_ERROR 0x10
> > +#define PORT_FIRST_ERROR   0x18
> > +#define PORT_MALFORMED_REQ00x20
> > +#define PORT_MALFORMED_REQ10x28
> > +
> > +#define ERROR_MASK GENMASK_ULL(63, 0)
> > +
> > +/* mask or unmask port errors by the error mask register. */
> > +static void __port_err_mask(struct device *dev, bool mask)
> > +{
> > +   void __iomem *base;
> > +
> > +   base = dfl_get_feature_ioaddr_by_id(dev, PORT_FEATURE_ID_ERROR);
> > +
> > +   writeq(mask ? ERROR_MASK : 0, base + PORT_ERROR_MASK);
> > +}
> > +
> > +/* clear port errors. */
> > +static int __port_err_clear(struct device *dev, u64 err)
> > +{
> > +   struct platform_device *pdev = to_platform_device(dev);
> > +   void __iomem *base_err, 

Re: [PATCH 11/17] fpga: dfl: afu: add error reporting support.

2019-04-09 Thread Alan Tull
On Sun, Mar 24, 2019 at 10:24 PM Wu Hao  wrote:

Hi Hao,

>
> Error reporting is one important private feature, it reports error
> detected on port and accelerated function unit (AFU). It introduces
> several sysfs interfaces to allow userspace to check and clear
> errors detected by hardware.
>
> Signed-off-by: Xu Yilun 
> Signed-off-by: Wu Hao 
> ---
>  Documentation/ABI/testing/sysfs-platform-dfl-port |  29 +++
>  drivers/fpga/Makefile |   1 +
>  drivers/fpga/dfl-afu-error.c  | 225 
> ++
>  drivers/fpga/dfl-afu-main.c   |   4 +
>  drivers/fpga/dfl-afu.h|   4 +
>  5 files changed, 263 insertions(+)
>  create mode 100644 drivers/fpga/dfl-afu-error.c
>
> diff --git a/Documentation/ABI/testing/sysfs-platform-dfl-port 
> b/Documentation/ABI/testing/sysfs-platform-dfl-port
> index f611e47..e6140aa 100644
> --- a/Documentation/ABI/testing/sysfs-platform-dfl-port
> +++ b/Documentation/ABI/testing/sysfs-platform-dfl-port
> @@ -79,3 +79,32 @@ KernelVersion:   5.2
>  Contact:   Wu Hao 
>  Description:   Read-only. Read this file to get the status of issued command
> to userclck_freqcntrcmd.
> +
> +What:  /sys/bus/platform/devices/dfl-port.0/errors/errors
> +Date:  March 2019
> +KernelVersion: 5.2
> +Contact:   Wu Hao 
> +Description:   Read-only. Read this file to get errors detected on port and
> +   Accelerated Function Unit (AFU).
> +
> +What:  /sys/bus/platform/devices/dfl-port.0/errors/first_error
> +Date:  March 2019
> +KernelVersion: 5.2
> +Contact:   Wu Hao 
> +Description:   Read-only. Read this file to get the first error detected by
> +   hardware.
> +
> +What:  
> /sys/bus/platform/devices/dfl-port.0/errors/first_malformed_req
> +Date:  March 2019
> +KernelVersion: 5.2
> +Contact:   Wu Hao 
> +Description:   Read-only. Read this file to get the first malformed request
> +   captured by hardware.
> +
> +What:  /sys/bus/platform/devices/dfl-port.0/errors/clear
> +Date:  March 2019
> +KernelVersion: 5.2
> +Contact:   Wu Hao 
> +Description:   Write-only. Write error code to this file to clear errors. If
> +   the input error code doesn't match, it returns -EBUSY error
> +   code.

I understand how -EBUSY could be the right error code for when the
hardware is in a state where the error can't be cleared.  But if the
input error code doesn't match, shouldn't the code be -EINVAL?  Also
as noted below, the way this is currently coded, -ETIMEDOUT could get
returned.

> diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile
> index c0dd4c8..f1f0af7 100644
> --- a/drivers/fpga/Makefile
> +++ b/drivers/fpga/Makefile
> @@ -40,6 +40,7 @@ obj-$(CONFIG_FPGA_DFL_AFU)+= dfl-afu.o
>
>  dfl-fme-objs := dfl-fme-main.o dfl-fme-pr.o
>  dfl-afu-objs := dfl-afu-main.o dfl-afu-region.o dfl-afu-dma-region.o
> +dfl-afu-objs += dfl-afu-error.o
>
>  # Drivers for FPGAs which implement DFL
>  obj-$(CONFIG_FPGA_DFL_PCI) += dfl-pci.o
> diff --git a/drivers/fpga/dfl-afu-error.c b/drivers/fpga/dfl-afu-error.c
> new file mode 100644
> index 000..b66bd4a
> --- /dev/null
> +++ b/drivers/fpga/dfl-afu-error.c
> @@ -0,0 +1,225 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Driver for FPGA Accelerated Function Unit (AFU) Error Reporting
> + *
> + * Copyright 2019 Intel Corporation, Inc.
> + *
> + * Authors:
> + *   Wu Hao 
> + *   Xiao Guangrong 
> + *   Joseph Grecco 
> + *   Enno Luebbers 
> + *   Tim Whisonant 
> + *   Ananda Ravuri 
> + *   Mitchel Henry 
> + */
> +
> +#include 
> +
> +#include "dfl-afu.h"
> +
> +#define PORT_ERROR_MASK0x8
> +#define PORT_ERROR 0x10
> +#define PORT_FIRST_ERROR   0x18
> +#define PORT_MALFORMED_REQ00x20
> +#define PORT_MALFORMED_REQ10x28
> +
> +#define ERROR_MASK GENMASK_ULL(63, 0)
> +
> +/* mask or unmask port errors by the error mask register. */
> +static void __port_err_mask(struct device *dev, bool mask)
> +{
> +   void __iomem *base;
> +
> +   base = dfl_get_feature_ioaddr_by_id(dev, PORT_FEATURE_ID_ERROR);
> +
> +   writeq(mask ? ERROR_MASK : 0, base + PORT_ERROR_MASK);
> +}
> +
> +/* clear port errors. */
> +static int __port_err_clear(struct device *dev, u64 err)
> +{
> +   struct platform_device *pdev = to_platform_device(dev);
> +   void __iomem *base_err, *base_hdr;
> +   int ret;
> +   u64 v;
> +
> +   base_err = dfl_get_feature_ioaddr_by_id(dev, PORT_FEATURE_ID_ERROR);
> +   base_hdr = dfl_get_feature_ioaddr_by_id(dev, PORT_FEATURE_ID_HEADER);
> +
> +   /*
> +* clear Port Errors
> +*
> +* - Check for AP6 State
> +* - Halt Port by keeping Port in reset
> +* - Set PORT Error mask to all 1 to mask errors
> +* - Clear all errors
> +* - Set