On Tue,  1 Oct 2024 13:42:47 +0200
Mauro Carvalho Chehab <mchehab+hua...@kernel.org> wrote:

> There are two pointers that are needed during error injection:
> 
> 1. The start address of the CPER block to be stored;
> 2. The address of the ack, which needs a reset before next error.
> 
> Calculate them preferrable from the HEST table, as this allows
> checking the source ID, the size of the table and the type of
> HEST error block structures.
> 
> Yet, keep the old code, as this is needed for migration purposes.
> 
> Signed-off-by: Mauro Carvalho Chehab <mchehab+hua...@kernel.org>
> ---
>  hw/acpi/ghes.c | 93 ++++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 83 insertions(+), 10 deletions(-)
> 
> diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
> index 2c2cf444edeb..313a6e453af6 100644
> --- a/hw/acpi/ghes.c
> +++ b/hw/acpi/ghes.c
> @@ -61,6 +61,23 @@
>   */
>  #define ACPI_GHES_GESB_SIZE                 20
>  
> +/*
> + * Offsets with regards to the start of the HEST table stored at
> + * ags->hest_addr_le, according with the memory layout map at
> + * docs/specs/acpi_hest_ghes.rst.
> + */
> +
> +/* ACPI 6.2: 18.3.2.8 Generic Hardware Error Source version 2
> + * Table 18-382 Generic Hardware Error Source version 2 (GHESv2) Structure
> + */
> +#define HEST_GHES_V2_TABLE_SIZE  92
> +#define GHES_ACK_OFFSET          (64 + GAS_ADDR_OFFSET)
> +
> +/* ACPI 6.2: 18.3.2.7: Generic Hardware Error Source
> + * Table 18-380: 'Error Status Address' field
> + */
> +#define GHES_ERR_ST_ADDR_OFFSET  (20 + GAS_ADDR_OFFSET)
> +
>  /*
>   * Values for error_severity field
>   */
> @@ -218,14 +235,6 @@ static void build_ghes_error_table(GArray 
> *hardware_errors, BIOSLinker *linker,
>  {
>      int i, error_status_block_offset;
>  
> -    /*
> -     * TODO: Current version supports only one source.
> -     * A further patch will drop this check, after adding a proper migration
> -     * code, as, for the code to work, we need to store a bios pointer to the
> -     * HEST table.
> -     */
> -    assert(num_sources == 1);
> -
>      /* Build error_block_address */
>      for (i = 0; i < num_sources; i++) {
>          build_append_int_noprefix(hardware_errors, 0, sizeof(uint64_t));
> @@ -425,6 +434,65 @@ static void get_ghes_offsets(uint64_t ghes_addr,
>      *read_ack_register_addr = ghes_addr + sizeof(uint64_t);
>  }
>  
> +static void get_hest_offsets(uint16_t source_id, uint64_t hest_addr,
> +                             uint64_t *cper_addr,
> +                             uint64_t *read_ack_start_addr,
> +                              Error **errp)

cper/read_ack are GHES specific only, aren't they?

perhaps s/get_hest_offsets/get_ghes_source_offsets/


> +{
> +    uint64_t hest_err_block_addr, hest_read_ack_start_addr;
> +    uint64_t err_source_struct, error_block_addr;
> +    uint32_t num_sources, i;
> +
> +    if (!hest_addr) {
> +        return;
> +    }
> +
> +    cpu_physical_memory_read(hest_addr, &num_sources, sizeof(num_sources));
> +
> +    err_source_struct = hest_addr + sizeof(num_sources);
> +
> +    /*
> +     * Currently, HEST Error source navigates only for GHESv2 tables
> +     */
> +
> +    for (i = 0; i < num_sources; i++) {

missing le2cpu(num_sources)

> +        uint64_t addr = err_source_struct;
> +        uint16_t type, src_id;
> +
> +        cpu_physical_memory_read(addr, &type, sizeof(type));

ditto for anything larger than 1 byte that you read from guest memory
(all over the patch)

> +
> +        /* For now, we only know the size of GHESv2 table */
> +        assert(type == ACPI_GHES_SOURCE_GENERIC_ERROR_V2);

Imagine in qemu-9.3 we add non GHES error source, and then try to migrate
such guest to qemu-9.2. It will explode here.
Of-cause we can add some compat property to ged or machine type to
make sure that code works old way in qemu-9.3 for virt-9.2
at expense of keeping 9.2 code in 9.3. Which adds to maintenance burden
and fragile, also it's a matter of time before we screw it up and release
non-migratable/broken QEMU.

So I'd like to avoid piling up compat code/knobs on to of each other
and do it in a way where this src id lookup could gracefully skip
not implemented yet error sources.
This way we won't need any compat knobs to deal with in the future. 

> +
> +        /* It is GHES. Compare CPER source address */
> +        addr += sizeof(type);
> +        cpu_physical_memory_read(addr, &src_id, sizeof(src_id));
> +
> +        if (src_id == source_id) {
> +            break;
> +        }
> +
> +        err_source_struct += HEST_GHES_V2_TABLE_SIZE;
> +    }
> +    if (i == num_sources) {
> +        error_setg(errp, "HEST: Source %d not found.", source_id);
> +        return;
> +    }
> +
> +    /* Navigate though table address pointers */
> +    hest_err_block_addr = err_source_struct + GHES_ERR_ST_ADDR_OFFSET;
> +    hest_read_ack_start_addr = err_source_struct + GHES_ACK_OFFSET;

s/hest_read_ack_start_addr/hest_read_ack_addr/

> +
> +    cpu_physical_memory_read(hest_err_block_addr, &error_block_addr,
> +                             sizeof(error_block_addr));
> +
> +    cpu_physical_memory_read(error_block_addr, cper_addr,
> +                             sizeof(*cper_addr));
> +
> +    cpu_physical_memory_read(hest_read_ack_start_addr, read_ack_start_addr,
> +                             sizeof(*read_ack_start_addr));
> +}
> +
>  void ghes_record_cper_errors(const void *cper, size_t len,
>                               uint16_t source_id, Error **errp)
>  {
> @@ -445,8 +513,13 @@ void ghes_record_cper_errors(const void *cper, size_t 
> len,
>      }
>      ags = &acpi_ged_state->ghes_state;
>  
> -    get_ghes_offsets(le64_to_cpu(ags->hw_error_le),
> -                     &cper_addr, &read_ack_register_addr);
> +    if (!ags->hest_addr_le) {
> +        get_ghes_offsets(le64_to_cpu(ags->hw_error_le),

should it be named get_hw_error_offsets

> +                         &cper_addr, &read_ack_register_addr);
> +    } else {
> +        get_hest_offsets(source_id, le64_to_cpu(ags->hest_addr_le),
> +                         &cper_addr, &read_ack_register_addr, errp);
> +    }
>  
>      if (!cper_addr) {
>          error_setg(errp, "can not find Generic Error Status Block");


Reply via email to