On Wed, 21 Jan 2026 12:25:15 +0100 Mauro Carvalho Chehab <[email protected]> wrote:
> Add a decoder to help debugging injected CPERs. This is more > relevant when we add fuzzy-testing error inject, as the > decoder is helpful to identify what it packages will be sent > via QEMU to the firmware-fist logic. > > By purpose, I opted to keep this completely independent from > the encoders implementation, as this can be used even when > there are no encoders for a certain GGUID type (except for a GGUID? > fuzzy logic test, which is pretty much independent of the > records meaning). > > Signed-off-by: Mauro Carvalho Chehab <[email protected]> A few minor things inline. I checked the specs, so all that stuff is fine including the spec bug you mention (which I'm sure you've already reported :) Jonathan > --- > MAINTAINERS | 1 + > scripts/ghes_decode.py | 1155 ++++++++++++++++++++++++++++++++++++++++ > scripts/qmp_helper.py | 3 + > 3 files changed, 1159 insertions(+) > create mode 100644 scripts/ghes_decode.py > > diff --git a/scripts/ghes_decode.py b/scripts/ghes_decode.py > new file mode 100644 > index 000000000000..6c7fdfe84e3a > --- /dev/null > +++ b/scripts/ghes_decode.py > @@ -0,0 +1,1155 @@ > +#!/usr/bin/env python3 > +# > +# pylint: disable=R0903,R0912,R0913,R0915,R0917,R1713,E1121,C0302,W0613 > +# SPDX-License-Identifier: GPL-2.0-or-later > +# > +# Copyright (C) 2025 Mauro Carvalho Chehab <[email protected]> > + > +""" > +Helper classes to decode a generic error data entry. > + > +By purpose, the logic here is independent of the logic inside qmp_helper > +and other modules. With a different implementation, it is more likely to > +discover bugs at the error injection logic. Also, as this can be used to > +dump errors injected by reproducing an error mesage or for fuzzy error > +injection, it can't rely at the encoding logic inside each module of rely on the encoding logic > +ghes_inject.py. > + > +To make the decoder simple, the decode logic here is at field level, not > +trying to decode bitmaps. > +""" > + > +from typing import Optional > +class DecodeProcX86(): > + """ > + Class to decode an x86 Processor Error as defined at > + UEFI 2.1 - N.2.2 Section Descriptor > + """ > + > + # GUID for x86 Processor Error > + guid = "dc3ea0b0-a144-4797-b95b-53fa242b6e1d" > + > + pei_fields = [ > + ("Error Structure Type", 16, "guid"), > + ("Validation Bits", 8, "int"), > + ("Check Information", 8, "int"), > + ("Target Identifier", 8, "int"), > + ("Requestor Identifier", 8, "int"), > + ("Responder Identifier", 8, "int"), > + ("Instruction Pointer", 8, "int") > + ] > + > + def __init__(self, cper: DecodeField): > + self.cper = cper > + > + def decode(self, guid): > + """Decode x86 Processor Error""" > + print("x86 Processor Error") > + > + val = self.cper.decode("Validation Bits", 8, "int") > + try: > + val_bits = int.from_bytes(val, byteorder='little') > + except ValueError, TypeError: > + val_bits = 0 > + > + error_info_num = (val_bits >> 2) & 0x3f # bits 2-7 > + context_info_num = (val_bits >> 8) & 0xff # bits 8-13 > + > + self.cper.decode("Local APIC_ID", 8, "int") > + self.cper.decode("CPUID Info", 48, "int") > + > + for pei in range(0, error_info_num): > + if self.cper.past_end: > + return > + > + print() > + print(f"Processor Error Info {pei}") > + for name, size, ftype in self.pei_fields: > + self.cper.decode(name, size, ftype) > + > + for ctx in range(0, context_info_num): > + if self.cper.past_end: > + return > + > + print() > + print(f"Context {ctx}") > + > + self.cper.decode("Register Context Type", 2, "int") > + > + val = self.cper.decode("Register Array Size", 2, "int") > + try: > + context_size = int(int.from_bytes(val, byteorder='little') / > 8) > + except ValueError, TypeError: > + context_size = 0 > + > + self.cper.decode("MSR Address", 4, "int") > + self.cper.decode("MM Register Address", 8, "int") > + > + for reg in range(0, context_size): > + if self.cper.past_end: > + return > + self.cper.decode(f"Register offset {reg:<3}", 8, "int") As for arm. Probably need sanity check it's the 8 byte version. And for giggles even on x86 some are 16 bytes ;) GDTR, IDTR Meh, I don't care about those 16 byte ones, but not presenting the 4 byte ones as 8 would be good. > + > + @staticmethod > + def decode_list(): > + """ > + Returns a tuple with the GUID and class > + """ > + return [(DecodeProcX86.guid, DecodeProcX86)] > +class DecodeProcArm(): > + """ > + Class to decode an ARM Processor Error as defined at > + UEFI 2.6 - N.2.2 Section Descriptor > + """ > + > + # GUID for ARM Processor Error > + guid = "e19e3d16-bc11-11e4-9caa-c2051d5d46b0" > + > + arm_pei_fields = [ > + ("Version", 1, "int"), > + ("Length", 1, "int"), > + ("valid", 2, "int"), > + ("type", 1, "int"), > + ("multiple-error", 2, "int"), > + ("flags", 1, "int"), > + ("error-info", 8, "int"), > + ("virt-addr", 8, "int"), > + ("phy-addr", 8, "int"), > + ] > + > + def __init__(self, cper: DecodeField): > + self.cper = cper > + > + def decode(self, guid): > + """Decode Processor ARM""" > + > + print("ARM Processor Error") > + > + start = self.cper.pos > + > + self.cper.decode("Valid", 4, "int") > + > + val = self.cper.decode("Error Info num", 2, "int") > + try: > + error_info_num = int.from_bytes(val, byteorder='little') > + except ValueError, TypeError: > + error_info_num = 0 > + > + val = self.cper.decode("Context Info num", 2, "int") > + try: > + context_info_num = int.from_bytes(val, byteorder='little') > + except ValueError, TypeError: > + context_info_num = 0 > + > + val = self.cper.decode("Section Length", 4, "int") > + try: > + section_length = int.from_bytes(val, byteorder='little') > + except ValueError, TypeError: > + section_length = 0 > + > + self.cper.decode("Error affinity level", 1, "int") > + self.cper.decode("Reserved", 3, "int") > + self.cper.decode("MPIDR_EL1", 8, "int") > + self.cper.decode("MIDR_EL1", 8, "int") > + self.cper.decode("Running State", 4, "int") > + self.cper.decode("PSCI State", 4, "int") > + > + for pei in range(0, error_info_num): > + if self.cper.past_end: > + return > + > + print() > + print(f"Processor Error Info {pei}") > + for name, size, ftype in self.arm_pei_fields: > + self.cper.decode(name, size, ftype) > + > + for ctx in range(0, context_info_num): > + if self.cper.past_end: > + return > + > + print() > + print(f"Context {ctx}") > + self.cper.decode("Version", 2, "int") > + self.cper.decode("Register Context Type", 2, "int") > + val = self.cper.decode("Register Array Size", 4, "int") > + try: > + context_size = int(int.from_bytes(val, byteorder='little') / > 8) > + except ValueError: > + context_size = 0 > + > + for reg in range(0, context_size): > + if self.cper.past_end: > + return > + self.cper.decode(f"Register {reg:<3}", 8, "int") Maybe check it's not a 32 bit context? Don't decode it if it is but don't try to decode it as 64 bit. Can get that from the Register Context Type. Anything over 4 is fine. > + > + remaining = max(section_length + start - self.cper.pos, 0) > + if remaining: > + print() > + self.cper.decode("Vendor data", remaining, "int") > + > + @staticmethod > + def decode_list(): > + """ > + Returns a tuple with the GUID and class > + """ > + return [(DecodeProcArm.guid, DecodeProcArm)] > +class DecodeDMAVT(): > + """ > + Class to decode a DMA Virtualization Technology Error as defined at > + UEFI 2.2 - N.2.2 Section Descriptor As below. Feels like it should be N2.11.2 Intel VT for Directed I/O specific DMAr Error Section Descriptor. Same for other places this comment exists. > + """ > + > + # GUID for DMA VT Error > + guid = "71761d37-32b2-45cd-a7d0-b0fedd93e8cf" > +class DecodeDMAIOMMU(): > + """ > + Class to decode an IOMMU DMA Error as defined at > + UEFI 2.2 - N.2.2 Section Descriptor Odd reference choice. This stuff is in N2.11.3 IOMMU Specific DMAr Error Section in 2.11. I'm too lazy to find it in the older spec. > + """ > + > + # GUID for IOMMU DMA Error Maybe call it the IOMMU Specific DMAr Error > + guid = "036f84e1-7f37-428c-a79e-575fdfaa84ec" > + > + fields = [ > +class DecodeCXLCompEvent(): > + """ > + Class to decode a CXL Component Error as defined at > + UEFI 2.9 - N.2.14. CXL Component Events Section > + > + Currently, the decoder handles only the common fields, displaying > + the CXL Component Event Log field in bytes. > + """ > + > + # GUIDs, as defined at CXL specification 3.2: 8.2.10.2.1 Event Records > + guids = [ > + ("General Media", > "fbcd0a77-c260-417f-85a9-088b1621eba6"), > + ("DRAM", > "601dcbb3-9c06-4eab-b8af-4e9bfb5c9624"), > + ("Memory Module", > "fe927475-dd59-4339-a586-79bab113bc74"), > + ("Memory Sparing", > "e71f3a40-2d29-4092-8a39-4d1c966c7c65"), > + ("Physical Switch", > "77cf9271-9c02-470b-9fe4-bc7b75f2da97"), As per earlier patch review I'm not sure we care about this and the following. I don't think we'll ever see them in CPER records, unless going other something else that encapsulates that format. > + ("Virtual Switch", > "40d26425-3396-4c4d-a5da-3d472a63af25"), > + ("MDL Port", > "8dc44363-0c96-4710-b7bf-04bb99534c3f"), MLD -> Multi-Logical Device > + ("Dynamic Capabilities", > "ca95afa7-f183-4018-8c2f-95268e101a2a"), > + ] > + > + fields = [ > + ("Validation Bits", 8, "int"), > + ("Device ID", 12, "int"), > + ("Device Serial Number", 8, "int") > + ] > + > + def __init__(self, cper: DecodeField): > + self.cper = cper > + > + def decode(self, guid): > + """Decode CXL Protocol Error""" > + for name, guid_event in DecodeCXLCompEvent.guids: > + if guid == guid_event: > + print(f"CXL {name} Event Record") > + break > + > + val = self.cper.decode("Length", 4, "int") > + try: > + length = int.from_bytes(val, byteorder='little') > + except ValueError, TypeError: > + length = 0 > + > + for name, size, ftype in self.fields: > + self.cper.decode(name, size, ftype) > + > + length = max(0, length - self.cper.pos) > + > + self.cper.decode("CXL Component Event Log", length, "int", > + show_incomplete=True) > + > + @staticmethod > + def decode_list(): > + """ > + Returns a tuple with the GUID and class > + """ > + > + guid_list = [] > + > + for _, guid in DecodeCXLCompEvent.guids: > + guid_list.append((guid, DecodeCXLCompEvent)) > + > + return guid_list ... > +class DecodeGhesEntry(): > + """ > + Class to decode a GHESv2 element, as defined at: > + ACPI 6.1: 18.3.2.8 Generic Hardware Error Source version 2 > + """ > + > + # Fields present on all CPER records > + common_fields = [ > + # Generic Error Status Block fields > + ("Block Status", 4, "int", None), > + ("Raw Data Offset", 4, "int", "raw_data_offset"), > + ("Raw Data Length", 4, "int", "raw_data_len"), > + ("Data Length", 4, "int", None), > + ("Error Severity", 4, "int", None), > + > + # Generic Error Data Entry > + ("Section Type", 16, "guid", "session_type"), Why session_type? Is idea it's the type of decode session we are doing? Feels a bit too much like a typo from section_type. > + ("Error Severity", 4, "int", None), > + ("Revision", 2, "int", None), > + ("Validation Bits", 1, "int", None), > + ("Flags", 1, "int", None), > + ("Error Data Length", 4, "int", None), > + ("FRU Id", 16, "guid", None), > + ("FRU Text", 20, "str", None), > + ("Timestamp", 8, "bcd", None), > + ] > + > + def __init__(self, cper_data: bytearray): > + """ > + Initializes a byte array, decoding it, printing results at the > + screen. > + """ ... > + > + # Handle common types > + cper = DecodeField(cper_data) > + > + fields = {} > + for name, size, ftype, var in self.common_fields: > + val = cper.decode(name, size, ftype) > + > + if ftype == "int": > + try: > + val = int.from_bytes(val, byteorder='little') > + except ValueError, TypeError: > + val = 0 > + > + if var is not None: > + fields[var] = val > + > + if fields["raw_data_len"]: > + cper.decode("Raw Data", fields["raw_data_len"], > + "int", pos=fields["raw_data_offset"]) > + > + if not fields["session_type"]: > + return
