On Tue, 1 Oct 2024 13:42:47 +0200 Mauro Carvalho Chehab <mchehab+hua...@kernel.org> wrote:
> There are two pointers that are needed during error injection: > > 1. The start address of the CPER block to be stored; > 2. The address of the ack, which needs a reset before next error. > > Calculate them preferrable from the HEST table, as this allows > checking the source ID, the size of the table and the type of > HEST error block structures. > > Yet, keep the old code, as this is needed for migration purposes. > > Signed-off-by: Mauro Carvalho Chehab <mchehab+hua...@kernel.org> > --- > hw/acpi/ghes.c | 93 ++++++++++++++++++++++++++++++++++++++++++++------ > 1 file changed, 83 insertions(+), 10 deletions(-) > > diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c > index 2c2cf444edeb..313a6e453af6 100644 > --- a/hw/acpi/ghes.c > +++ b/hw/acpi/ghes.c > @@ -61,6 +61,23 @@ > */ > #define ACPI_GHES_GESB_SIZE 20 > > +/* > + * Offsets with regards to the start of the HEST table stored at > + * ags->hest_addr_le, according with the memory layout map at > + * docs/specs/acpi_hest_ghes.rst. > + */ > + > +/* ACPI 6.2: 18.3.2.8 Generic Hardware Error Source version 2 > + * Table 18-382 Generic Hardware Error Source version 2 (GHESv2) Structure > + */ > +#define HEST_GHES_V2_TABLE_SIZE 92 > +#define GHES_ACK_OFFSET (64 + GAS_ADDR_OFFSET) > + > +/* ACPI 6.2: 18.3.2.7: Generic Hardware Error Source > + * Table 18-380: 'Error Status Address' field > + */ > +#define GHES_ERR_ST_ADDR_OFFSET (20 + GAS_ADDR_OFFSET) > + > /* > * Values for error_severity field > */ > @@ -218,14 +235,6 @@ static void build_ghes_error_table(GArray > *hardware_errors, BIOSLinker *linker, > { > int i, error_status_block_offset; > > - /* > - * TODO: Current version supports only one source. > - * A further patch will drop this check, after adding a proper migration > - * code, as, for the code to work, we need to store a bios pointer to the > - * HEST table. > - */ > - assert(num_sources == 1); > - > /* Build error_block_address */ > for (i = 0; i < num_sources; i++) { > build_append_int_noprefix(hardware_errors, 0, sizeof(uint64_t)); > @@ -425,6 +434,65 @@ static void get_ghes_offsets(uint64_t ghes_addr, > *read_ack_register_addr = ghes_addr + sizeof(uint64_t); > } > > +static void get_hest_offsets(uint16_t source_id, uint64_t hest_addr, > + uint64_t *cper_addr, > + uint64_t *read_ack_start_addr, > + Error **errp) cper/read_ack are GHES specific only, aren't they? perhaps s/get_hest_offsets/get_ghes_source_offsets/ > +{ > + uint64_t hest_err_block_addr, hest_read_ack_start_addr; > + uint64_t err_source_struct, error_block_addr; > + uint32_t num_sources, i; > + > + if (!hest_addr) { > + return; > + } > + > + cpu_physical_memory_read(hest_addr, &num_sources, sizeof(num_sources)); > + > + err_source_struct = hest_addr + sizeof(num_sources); > + > + /* > + * Currently, HEST Error source navigates only for GHESv2 tables > + */ > + > + for (i = 0; i < num_sources; i++) { missing le2cpu(num_sources) > + uint64_t addr = err_source_struct; > + uint16_t type, src_id; > + > + cpu_physical_memory_read(addr, &type, sizeof(type)); ditto for anything larger than 1 byte that you read from guest memory (all over the patch) > + > + /* For now, we only know the size of GHESv2 table */ > + assert(type == ACPI_GHES_SOURCE_GENERIC_ERROR_V2); Imagine in qemu-9.3 we add non GHES error source, and then try to migrate such guest to qemu-9.2. It will explode here. Of-cause we can add some compat property to ged or machine type to make sure that code works old way in qemu-9.3 for virt-9.2 at expense of keeping 9.2 code in 9.3. Which adds to maintenance burden and fragile, also it's a matter of time before we screw it up and release non-migratable/broken QEMU. So I'd like to avoid piling up compat code/knobs on to of each other and do it in a way where this src id lookup could gracefully skip not implemented yet error sources. This way we won't need any compat knobs to deal with in the future. > + > + /* It is GHES. Compare CPER source address */ > + addr += sizeof(type); > + cpu_physical_memory_read(addr, &src_id, sizeof(src_id)); > + > + if (src_id == source_id) { > + break; > + } > + > + err_source_struct += HEST_GHES_V2_TABLE_SIZE; > + } > + if (i == num_sources) { > + error_setg(errp, "HEST: Source %d not found.", source_id); > + return; > + } > + > + /* Navigate though table address pointers */ > + hest_err_block_addr = err_source_struct + GHES_ERR_ST_ADDR_OFFSET; > + hest_read_ack_start_addr = err_source_struct + GHES_ACK_OFFSET; s/hest_read_ack_start_addr/hest_read_ack_addr/ > + > + cpu_physical_memory_read(hest_err_block_addr, &error_block_addr, > + sizeof(error_block_addr)); > + > + cpu_physical_memory_read(error_block_addr, cper_addr, > + sizeof(*cper_addr)); > + > + cpu_physical_memory_read(hest_read_ack_start_addr, read_ack_start_addr, > + sizeof(*read_ack_start_addr)); > +} > + > void ghes_record_cper_errors(const void *cper, size_t len, > uint16_t source_id, Error **errp) > { > @@ -445,8 +513,13 @@ void ghes_record_cper_errors(const void *cper, size_t > len, > } > ags = &acpi_ged_state->ghes_state; > > - get_ghes_offsets(le64_to_cpu(ags->hw_error_le), > - &cper_addr, &read_ack_register_addr); > + if (!ags->hest_addr_le) { > + get_ghes_offsets(le64_to_cpu(ags->hw_error_le), should it be named get_hw_error_offsets > + &cper_addr, &read_ack_register_addr); > + } else { > + get_hest_offsets(source_id, le64_to_cpu(ags->hest_addr_le), > + &cper_addr, &read_ack_register_addr, errp); > + } > > if (!cper_addr) { > error_setg(errp, "can not find Generic Error Status Block");