Hello James,

Thank you for your feedback!

On 11/25/2016 11:19 AM, James Morse wrote:
Hi Tyler,

On 21/11/16 22:35, Tyler Baicar wrote:
A RAS (Reliability, Availability, Serviceability) controller
may be a separate processor running in parallel with OS
execution, and may generate error records for consumption by
the OS. If the RAS controller produces multiple error records,
then they may be overwritten before the OS has consumed them.

The Generic Hardware Error Source (GHES) v2 structure
introduces the capability for the OS to acknowledge the
consumption of the error record generated by the RAS
controller. A RAS controller supporting GHESv2 shall wait for
the acknowledgment before writing a new error record, thus
eliminating the race condition.
This patch also adds support for parsing GHESv2 sub-tables.
Before they would be rejected as an unknown hardware error source.
Yes, I will add that to the text.
Signed-off-by: Jonathan (Zhixiong) Zhang<zjzh...@codeaurora.org>
Nit: the patch author's Sign-off should come first, you either need a 'From:
Jonathan (Zhixiong) Zhang ...' on this patch, or re-order these Signed-off-by's.
I'll reorder them in the next set.
Signed-off-by: Richard Ruigrok<rruig...@codeaurora.org>
Signed-off-by: Tyler Baicar<tbai...@codeaurora.org>
Signed-off-by: Naveen Kaje<nk...@codeaurora.org>
---
  drivers/acpi/apei/ghes.c | 49 +++++++++++++++++++++++++++++++++++++++++++++---
  drivers/acpi/apei/hest.c |  7 +++++--
  include/acpi/ghes.h      |  5 ++++-
  3 files changed, 55 insertions(+), 6 deletions(-)

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 60746ef..b79abc5 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -45,6 +45,7 @@
  #include <linux/aer.h>
  #include <linux/nmi.h>
+#include <acpi/actbl1.h>
  #include <acpi/ghes.h>
  #include <acpi/apei.h>
  #include <asm/tlbflush.h>
@@ -79,6 +80,10 @@
        ((struct acpi_hest_generic_status *)                            \
         ((struct ghes_estatus_node *)(estatus_node) + 1))
+#define HEST_TYPE_GENERIC_V2(ghes) \
+       ((struct acpi_hest_header *)ghes->generic)->type ==       \
+        ACPI_HEST_TYPE_GENERIC_ERROR_V2
+
IS_ HEST_TYPE_GENERIC_V2() ? (for the sake of readability)

Will do.
  /*
   * This driver isn't really modular, however for the time being,
   * continuing to use module_param is the easiest way to remain
@@ -248,10 +253,18 @@ static struct ghes *ghes_new(struct acpi_hest_generic 
*generic)
        ghes = kzalloc(sizeof(*ghes), GFP_KERNEL);
        if (!ghes)
                return ERR_PTR(-ENOMEM);
+
        ghes->generic = generic;
+       if (HEST_TYPE_GENERIC_V2(ghes)) {
+               rc = apei_map_generic_address(
+                       &ghes->generic_v2->read_ack_register);
+               if (rc)
+                       goto err_free;
+       }
+
        rc = apei_map_generic_address(&generic->error_status_address);
        if (rc)
-               goto err_free;
+               goto err_unmap_read_ack_addr;
        error_block_length = generic->error_block_length;
        if (error_block_length > GHES_ESTATUS_MAX_SIZE) {
                pr_warning(FW_WARN GHES_PFX
@@ -263,13 +276,17 @@ static struct ghes *ghes_new(struct acpi_hest_generic 
*generic)
        ghes->estatus = kmalloc(error_block_length, GFP_KERNEL);
        if (!ghes->estatus) {
                rc = -ENOMEM;
-               goto err_unmap;
+               goto err_unmap_status_addr;
        }
return ghes; -err_unmap:
+err_unmap_status_addr:
        apei_unmap_generic_address(&generic->error_status_address);
+err_unmap_read_ack_addr:
+       if (HEST_TYPE_GENERIC_V2(ghes))
+               apei_unmap_generic_address(
+                       &ghes->generic_v2->read_ack_register);
  err_free:
        kfree(ghes);
        return ERR_PTR(rc);
@@ -279,6 +296,9 @@ static void ghes_fini(struct ghes *ghes)
  {
        kfree(ghes->estatus);
        apei_unmap_generic_address(&ghes->generic->error_status_address);
+       if (HEST_TYPE_GENERIC_V2(ghes))
+               apei_unmap_generic_address(
+                       &ghes->generic_v2->read_ack_register);
  }
static inline int ghes_severity(int severity)
@@ -648,6 +668,23 @@ static void ghes_estatus_cache_add(
        rcu_read_unlock();
  }
+static int ghes_ack_error(struct acpi_hest_generic_v2 *generic_v2)
+{
+       int rc;
+       u64 val = 0;
+
+       rc = apei_read(&val, &generic_v2->read_ack_register);
+       if (rc)
+               return rc;
+       val &= generic_v2->read_ack_preserve <<
+               generic_v2->read_ack_register.bit_offset;
+       val |= generic_v2->read_ack_write <<
+               generic_v2->read_ack_register.bit_offset;
Is this bit_offset shifting needed in case the read_ack_register is in the
'system io' (or embedded controller) address space and shares a register with
some other stuff?

The read_ack_{preserve,write} values are u64, so if bit_offset is non-zero the
high order bits get lost, but both ends of this are in the firmware's control.

(I assumed this thing would always be in memory and these fields would never be
used - but I guess that isn't true!)

Yeah, we are not using these values, but they are defined this way in the ACPI 6.1 spec (Table 18-344). read_ack_register is defined as a Generic Address Structure which has this offset defined in
Table 5-26.

I assume it is defined this way for shared registers as you mentioned though. With this
flexibility the firmware is able to specify exactly what to write.
+       rc = apei_write(val, &generic_v2->read_ack_register);
+
+       return rc;
+}
+
  static int ghes_proc(struct ghes *ghes)
  {
        int rc;
@@ -660,6 +697,12 @@ static int ghes_proc(struct ghes *ghes)
                        ghes_estatus_cache_add(ghes->generic, ghes->estatus);
        }
        ghes_do_proc(ghes, ghes->estatus);
+
+       if (HEST_TYPE_GENERIC_V2(ghes)) {
+               rc = ghes_ack_error(ghes->generic_v2);
+               if (rc)
+                       return rc;
+       }
  out:
        ghes_clear_estatus(ghes);
        return 0;
diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c
index 792a0d9..ef725a9 100644
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -52,6 +52,7 @@ static const int hest_esrc_len_tab[ACPI_HEST_TYPE_RESERVED] = 
{
        [ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer),
        [ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge),
        [ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic),
+       [ACPI_HEST_TYPE_GENERIC_ERROR_V2] = sizeof(struct acpi_hest_generic_v2),
  };
static int hest_esrc_len(struct acpi_hest_header *hest_hdr)
@@ -146,7 +147,8 @@ static int __init hest_parse_ghes_count(struct 
acpi_hest_header *hest_hdr, void
  {
        int *count = data;
- if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR)
+       if (hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR ||
+           hest_hdr->type == ACPI_HEST_TYPE_GENERIC_ERROR_V2)
                (*count)++;
        return 0;
  }
@@ -157,7 +159,8 @@ static int __init hest_parse_ghes(struct acpi_hest_header 
*hest_hdr, void *data)
        struct ghes_arr *ghes_arr = data;
        int rc, i;
- if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR)
+       if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR &&
+           hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR_V2)
                return 0;
if (!((struct acpi_hest_generic *)hest_hdr)->enabled)
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index 720446c..68f088a 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -13,7 +13,10 @@
  #define GHES_EXITING          0x0002
struct ghes {
-       struct acpi_hest_generic *generic;
+       union {
+               struct acpi_hest_generic *generic;
+               struct acpi_hest_generic_v2 *generic_v2;
+       };
        struct acpi_hest_generic_status *estatus;
        u64 buffer_paddr;
        unsigned long flags;

Looks good to me, for what its worth:
Reviewed-by: James Morse<james.mo...@arm.com>
Thanks!
Tyler

--
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm 
Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.

_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Reply via email to