Gavin Shan <[email protected]> writes:

> Use error_abort in acpi_ghes_memory_errors() so that the caller needn't
> explicitly call abort() on errors. With this change, its return value
> isn't needed any more.
>
> Suggested-by: Igor Mammedov <[email protected]>
> Signed-off-by: Gavin Shan <[email protected]>
> ---
>  hw/acpi/ghes-stub.c    |  6 +++---
>  hw/acpi/ghes.c         | 15 ++++-----------
>  include/hw/acpi/ghes.h |  5 +++--
>  target/arm/kvm.c       | 10 +++-------
>  4 files changed, 13 insertions(+), 23 deletions(-)
>
> diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c
> index 4faf573aeb..4ef914ffc5 100644
> --- a/hw/acpi/ghes-stub.c
> +++ b/hw/acpi/ghes-stub.c
> @@ -11,10 +11,10 @@
>  #include "qemu/osdep.h"
>  #include "hw/acpi/ghes.h"
>  
> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> -                            uint64_t *addresses, uint32_t num_of_addresses)
> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> +                             uint64_t *addresses, uint32_t num_of_addresses,
> +                             Error **errp)
>  {
> -    return -1;
>  }

Before the patch, this function always fails: it returns -1.

Afterwards, it always succeeds: it doesn't set @errp.

Which one is correct?

>  
>  AcpiGhesState *acpi_ghes_get_state(void)
> diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
> index 055e5d719a..aa469c03f2 100644
> --- a/hw/acpi/ghes.c
> +++ b/hw/acpi/ghes.c
> @@ -543,8 +543,9 @@ void ghes_record_cper_errors(AcpiGhesState *ags, const 
> void *cper, size_t len,
>      notifier_list_notify(&acpi_generic_error_notifiers, &source_id);
>  }
>  
> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> -                            uint64_t *addresses, uint32_t num_of_addresses)
> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> +                             uint64_t *addresses, uint32_t num_of_addresses,
> +                             Error **errp)

qapi/error.h:

 * - Whenever practical, also return a value that indicates success /
 *   failure.  This can make the error checking more concise, and can
 *   avoid useless error object creation and destruction.  Note that
 *   we still have many functions returning void.  We recommend
 *   • bool-valued functions return true on success / false on failure,
 *   • pointer-valued functions return non-null / null pointer, and
 *   • integer-valued functions return non-negative / negative.

>  {
>      /* Memory Error Section Type */
>      const uint8_t guid[] =
> @@ -555,7 +556,6 @@ int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t 
> source_id,
>       * Table 17-13 Generic Error Data Entry
>       */
>      QemuUUID fru_id = {};
> -    Error *errp = NULL;
>      int data_length;
>      GArray *block;
>      uint32_t block_status, i;
> @@ -592,16 +592,9 @@ int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t 
> source_id,
>      }
>  
>      /* Report the error */
> -    ghes_record_cper_errors(ags, block->data, block->len, source_id, &errp);
> +    ghes_record_cper_errors(ags, block->data, block->len, source_id, errp);
>  
>      g_array_free(block, true);
> -
> -    if (errp) {
> -        error_report_err(errp);
> -        return -1;
> -    }
> -
> -    return 0;
>  }

The error reporting moves into the caller.

>  
>  AcpiGhesState *acpi_ghes_get_state(void)
> diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h
> index f73908985d..35c7bbbb01 100644
> --- a/include/hw/acpi/ghes.h
> +++ b/include/hw/acpi/ghes.h
> @@ -98,8 +98,9 @@ void acpi_build_hest(AcpiGhesState *ags, GArray *table_data,
>                       const char *oem_id, const char *oem_table_id);
>  void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s,
>                            GArray *hardware_errors);
> -int acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> -                            uint64_t *addresses, uint32_t num_of_addresses);
> +void acpi_ghes_memory_errors(AcpiGhesState *ags, uint16_t source_id,
> +                             uint64_t *addresses, uint32_t num_of_addresses,
> +                             Error **errp);
>  void ghes_record_cper_errors(AcpiGhesState *ags, const void *cper, size_t 
> len,
>                               uint16_t source_id, Error **errp);
>  
> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> index 459ca4a9b0..a889315606 100644
> --- a/target/arm/kvm.c
> +++ b/target/arm/kvm.c
> @@ -2458,13 +2458,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, 
> void *addr)
>              addresses[0] = paddr;
>              if (code == BUS_MCEERR_AR) {
>                  kvm_cpu_synchronize_state(c);
> -                if (!acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
> -                                             addresses, 1)) {
> -                    kvm_inject_arm_sea(c);
> -                } else {
> -                    error_report("failed to record the error");
> -                    abort();
> -                }
> +                acpi_ghes_memory_errors(ags, ACPI_HEST_SRC_ID_SYNC,
> +                                        addresses, 1, &error_abort);
> +                kvm_inject_arm_sea(c);

Before the patch, we get two error reports, like this:

    qemu-system-FOO: OSPM does not acknowledge previous error, so can not 
record CPER for current error anymore
    qemu-system-FOO: failed to record the error
    Aborted (core dumped)

Such error cascades should be avoided.

Afterwards, we get one:

    Unexpected error at SOURCE-FILE:LINE-NUMBER:
    qemu-system-FOO: OSPM does not acknowledge previous error, so can not 
record CPER for current error anymore
    Aborted (core dumped)

Are all errors reported by acpi_ghes_memory_errors() programming errors,
i.e. when an error is reported, there's a bug for us to fix?

If not, abort() is wrong before the patch, and &error_abort is wrong
afterwards.

You can use &error_fatal for fatal errors that are not programming
errors.

>              }
>              return;
>          }


Reply via email to