Re: [PATCH V14 01/10] acpi: apei: read ack upon ghes record consumption
On 4/11/2017 11:15 AM, Borislav Petkov wrote: On Tue, Mar 28, 2017 at 01:30:31PM -0600, Tyler Baicar wrote: A RAS (Reliability, Availability, Serviceability) controller may be a separate processor running in parallel with OS execution, and may generate error records for consumption by the OS. If the RAS controller produces multiple error records, then they may be overwritten before the OS has consumed them. The Generic Hardware Error Source (GHES) v2 structure introduces the capability for the OS to acknowledge the consumption of the error record generated by the RAS controller. A RAS controller supporting GHESv2 shall wait for the acknowledgment before writing a new error record, thus eliminating the race condition. Add support for parsing of GHESv2 sub-tables as well. Signed-off-by: Tyler BaicarCC: Jonathan (Zhixiong) Zhang Reviewed-by: James Morse --- drivers/acpi/apei/ghes.c | 49 +--- drivers/acpi/apei/hest.c | 7 +-- include/acpi/ghes.h | 5 - 3 files changed, 55 insertions(+), 6 deletions(-) ... @@ -249,10 +254,18 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); if (!ghes) return ERR_PTR(-ENOMEM); + ghes->generic = generic; + if (IS_HEST_TYPE_GENERIC_V2(ghes)) { + rc = apei_map_generic_address( + >generic_v2->read_ack_register); Yeah, that linebreak just to keep the 80-cols rule makes the code ugly and hard to read. Please put that mapping and unmapping in wrappers called map_gen_v2(ghes) and unmap_gen_v2(ghes) or so, so that you can call them wherever needed. Thus should make the flow a bit more understandable what's going on and you won't have to repeat the unmapping lines in ghes_fini(). Hello Boris, Thank you for the feedback. I will make this change in the next version. @@ -649,6 +669,23 @@ static void ghes_estatus_cache_add( rcu_read_unlock(); } +static int ghes_ack_error(struct acpi_hest_generic_v2 *generic_v2) +{ + int rc; + u64 val = 0; + + rc = apei_read(, _v2->read_ack_register); + if (rc) + return rc; + val &= generic_v2->read_ack_preserve << + generic_v2->read_ack_register.bit_offset; + val |= generic_v2->read_ack_write << + generic_v2->read_ack_register.bit_offset; Yeah, let them stick out, it more readable this way. Line spacing is helpful too: ... rc = apei_read(, _v2->read_ack_register); if (rc) return rc; val &= generic_v2->read_ack_preserve << generic_v2->read_ack_register.bit_offset; val |= generic_v2->read_ack_write<< generic_v2->read_ack_register.bit_offset; return apei_write(val, _v2->read_ack_register); } I will make this change in the next version. Thanks, Tyler + rc = apei_write(val, _v2->read_ack_register); + + return rc; +} + static int ghes_proc(struct ghes *ghes) { int rc; -- Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc. Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project. ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH V14 01/10] acpi: apei: read ack upon ghes record consumption
On Tue, Mar 28, 2017 at 01:30:31PM -0600, Tyler Baicar wrote: > A RAS (Reliability, Availability, Serviceability) controller > may be a separate processor running in parallel with OS > execution, and may generate error records for consumption by > the OS. If the RAS controller produces multiple error records, > then they may be overwritten before the OS has consumed them. > > The Generic Hardware Error Source (GHES) v2 structure > introduces the capability for the OS to acknowledge the > consumption of the error record generated by the RAS > controller. A RAS controller supporting GHESv2 shall wait for > the acknowledgment before writing a new error record, thus > eliminating the race condition. > > Add support for parsing of GHESv2 sub-tables as well. > > Signed-off-by: Tyler Baicar> CC: Jonathan (Zhixiong) Zhang > Reviewed-by: James Morse > --- > drivers/acpi/apei/ghes.c | 49 > +--- > drivers/acpi/apei/hest.c | 7 +-- > include/acpi/ghes.h | 5 - > 3 files changed, 55 insertions(+), 6 deletions(-) ... > @@ -249,10 +254,18 @@ static struct ghes *ghes_new(struct acpi_hest_generic > *generic) > ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); > if (!ghes) > return ERR_PTR(-ENOMEM); > + > ghes->generic = generic; > + if (IS_HEST_TYPE_GENERIC_V2(ghes)) { > + rc = apei_map_generic_address( > + >generic_v2->read_ack_register); Yeah, that linebreak just to keep the 80-cols rule makes the code ugly and hard to read. Please put that mapping and unmapping in wrappers called map_gen_v2(ghes) and unmap_gen_v2(ghes) or so, so that you can call them wherever needed. Thus should make the flow a bit more understandable what's going on and you won't have to repeat the unmapping lines in ghes_fini(). > @@ -649,6 +669,23 @@ static void ghes_estatus_cache_add( > rcu_read_unlock(); > } > > +static int ghes_ack_error(struct acpi_hest_generic_v2 *generic_v2) > +{ > + int rc; > + u64 val = 0; > + > + rc = apei_read(, _v2->read_ack_register); > + if (rc) > + return rc; > + val &= generic_v2->read_ack_preserve << > + generic_v2->read_ack_register.bit_offset; > + val |= generic_v2->read_ack_write << > + generic_v2->read_ack_register.bit_offset; Yeah, let them stick out, it more readable this way. Line spacing is helpful too: ... rc = apei_read(, _v2->read_ack_register); if (rc) return rc; val &= generic_v2->read_ack_preserve << generic_v2->read_ack_register.bit_offset; val |= generic_v2->read_ack_write<< generic_v2->read_ack_register.bit_offset; return apei_write(val, _v2->read_ack_register); } > + rc = apei_write(val, _v2->read_ack_register); > + > + return rc; > +} > + > static int ghes_proc(struct ghes *ghes) > { > int rc; -- Regards/Gruss, Boris. Good mailing practices for 400: avoid top-posting and trim the reply. ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PATCH V14 01/10] acpi: apei: read ack upon ghes record consumption
A RAS (Reliability, Availability, Serviceability) controller may be a separate processor running in parallel with OS execution, and may generate error records for consumption by the OS. If the RAS controller produces multiple error records, then they may be overwritten before the OS has consumed them. The Generic Hardware Error Source (GHES) v2 structure introduces the capability for the OS to acknowledge the consumption of the error record generated by the RAS controller. A RAS controller supporting GHESv2 shall wait for the acknowledgment before writing a new error record, thus eliminating the race condition. Add support for parsing of GHESv2 sub-tables as well. Signed-off-by: Tyler BaicarCC: Jonathan (Zhixiong) Zhang Reviewed-by: James Morse --- drivers/acpi/apei/ghes.c | 49 +--- drivers/acpi/apei/hest.c | 7 +-- include/acpi/ghes.h | 5 - 3 files changed, 55 insertions(+), 6 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index b192b42..0241e36 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -46,6 +46,7 @@ #include #include +#include #include #include #include @@ -80,6 +81,10 @@ ((struct acpi_hest_generic_status *)\ ((struct ghes_estatus_node *)(estatus_node) + 1)) +#define IS_HEST_TYPE_GENERIC_V2(ghes) \ + ((struct acpi_hest_header *)ghes->generic)->type == \ +ACPI_HEST_TYPE_GENERIC_ERROR_V2 + /* * This driver isn't really modular, however for the time being, * continuing to use module_param is the easiest way to remain @@ -249,10 +254,18 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) ghes = kzalloc(sizeof(*ghes), GFP_KERNEL); if (!ghes) return ERR_PTR(-ENOMEM); + ghes->generic = generic; + if (IS_HEST_TYPE_GENERIC_V2(ghes)) { + rc = apei_map_generic_address( + >generic_v2->read_ack_register); + if (rc) + goto err_free; + } + rc = apei_map_generic_address(>error_status_address); if (rc) - goto err_free; + goto err_unmap_read_ack_addr; error_block_length = generic->error_block_length; if (error_block_length > GHES_ESTATUS_MAX_SIZE) { pr_warning(FW_WARN GHES_PFX @@ -264,13 +277,17 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic) ghes->estatus = kmalloc(error_block_length, GFP_KERNEL); if (!ghes->estatus) { rc = -ENOMEM; - goto err_unmap; + goto err_unmap_status_addr; } return ghes; -err_unmap: +err_unmap_status_addr: apei_unmap_generic_address(>error_status_address); +err_unmap_read_ack_addr: + if (IS_HEST_TYPE_GENERIC_V2(ghes)) + apei_unmap_generic_address( + >generic_v2->read_ack_register); err_free: kfree(ghes); return ERR_PTR(rc); @@ -280,6 +297,9 @@ static void ghes_fini(struct ghes *ghes) { kfree(ghes->estatus); apei_unmap_generic_address(>generic->error_status_address); + if (IS_HEST_TYPE_GENERIC_V2(ghes)) + apei_unmap_generic_address( + >generic_v2->read_ack_register); } static inline int ghes_severity(int severity) @@ -649,6 +669,23 @@ static void ghes_estatus_cache_add( rcu_read_unlock(); } +static int ghes_ack_error(struct acpi_hest_generic_v2 *generic_v2) +{ + int rc; + u64 val = 0; + + rc = apei_read(, _v2->read_ack_register); + if (rc) + return rc; + val &= generic_v2->read_ack_preserve << + generic_v2->read_ack_register.bit_offset; + val |= generic_v2->read_ack_write << + generic_v2->read_ack_register.bit_offset; + rc = apei_write(val, _v2->read_ack_register); + + return rc; +} + static int ghes_proc(struct ghes *ghes) { int rc; @@ -661,6 +698,12 @@ static int ghes_proc(struct ghes *ghes) ghes_estatus_cache_add(ghes->generic, ghes->estatus); } ghes_do_proc(ghes, ghes->estatus); + + if (IS_HEST_TYPE_GENERIC_V2(ghes)) { + rc = ghes_ack_error(ghes->generic_v2); + if (rc) + return rc; + } out: ghes_clear_estatus(ghes); return rc; diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 8f2a98e..456b488 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -52,6 +52,7 @@ [ACPI_HEST_TYPE_AER_ENDPOINT] = sizeof(struct acpi_hest_aer), [ACPI_HEST_TYPE_AER_BRIDGE] = sizeof(struct acpi_hest_aer_bridge), [ACPI_HEST_TYPE_GENERIC_ERROR] = sizeof(struct acpi_hest_generic), +