On Wed, 21 Jan 2026 12:25:15 +0100
Mauro Carvalho Chehab <[email protected]> wrote:

> Add a decoder to help debugging injected CPERs. This is more
> relevant when we add fuzzy-testing error inject, as the
> decoder is helpful to identify what it packages will be sent
> via QEMU to the firmware-fist logic.
> 
> By purpose, I opted to keep this completely independent from
> the encoders implementation, as this can be used even when
> there are no encoders for a certain GGUID type (except for a
GGUID?

> fuzzy logic test, which is pretty much independent of the
> records meaning).
> 
> Signed-off-by: Mauro Carvalho Chehab <[email protected]>

A few minor things inline.

I checked the specs, so all that stuff is fine including the spec bug
you mention (which I'm sure you've already reported :)

Jonathan

> ---
>  MAINTAINERS            |    1 +
>  scripts/ghes_decode.py | 1155 ++++++++++++++++++++++++++++++++++++++++
>  scripts/qmp_helper.py  |    3 +
>  3 files changed, 1159 insertions(+)
>  create mode 100644 scripts/ghes_decode.py
> 

> diff --git a/scripts/ghes_decode.py b/scripts/ghes_decode.py
> new file mode 100644
> index 000000000000..6c7fdfe84e3a
> --- /dev/null
> +++ b/scripts/ghes_decode.py
> @@ -0,0 +1,1155 @@
> +#!/usr/bin/env python3
> +#
> +# pylint: disable=R0903,R0912,R0913,R0915,R0917,R1713,E1121,C0302,W0613
> +# SPDX-License-Identifier: GPL-2.0-or-later
> +#
> +# Copyright (C) 2025 Mauro Carvalho Chehab <[email protected]>
> +
> +"""
> +Helper classes to decode a generic error data entry.
> +
> +By purpose, the logic here is independent of the logic inside qmp_helper
> +and other modules. With a different implementation, it is more likely to
> +discover bugs at the error injection logic. Also, as this can be used to
> +dump errors injected by reproducing an error mesage or for fuzzy error
> +injection, it can't rely at the encoding logic inside each module of

rely on the encoding logic

> +ghes_inject.py.
> +
> +To make the decoder simple, the decode logic here is at field level, not
> +trying to decode bitmaps.
> +"""
> +
> +from typing import Optional


> +class DecodeProcX86():
> +    """
> +    Class to decode an x86 Processor Error as defined at
> +    UEFI 2.1 - N.2.2 Section Descriptor
> +    """
> +
> +    # GUID for x86 Processor Error
> +    guid = "dc3ea0b0-a144-4797-b95b-53fa242b6e1d"
> +
> +    pei_fields = [
> +        ("Error Structure Type", 16, "guid"),
> +        ("Validation Bits", 8, "int"),
> +        ("Check Information", 8, "int"),
> +        ("Target Identifier", 8, "int"),
> +        ("Requestor Identifier", 8, "int"),
> +        ("Responder Identifier", 8, "int"),
> +        ("Instruction Pointer", 8, "int")
> +    ]
> +
> +    def __init__(self, cper: DecodeField):
> +        self.cper = cper
> +
> +    def decode(self, guid):
> +        """Decode x86 Processor Error"""
> +        print("x86 Processor Error")
> +
> +        val = self.cper.decode("Validation Bits", 8, "int")
> +        try:
> +            val_bits = int.from_bytes(val, byteorder='little')
> +        except ValueError, TypeError:
> +            val_bits = 0
> +
> +        error_info_num = (val_bits >> 2) & 0x3f    # bits 2-7
> +        context_info_num = (val_bits >> 8) & 0xff  # bits 8-13
> +
> +        self.cper.decode("Local APIC_ID", 8, "int")
> +        self.cper.decode("CPUID Info", 48, "int")
> +
> +        for pei in range(0, error_info_num):
> +            if self.cper.past_end:
> +                return
> +
> +            print()
> +            print(f"Processor Error Info {pei}")
> +            for name, size, ftype in self.pei_fields:
> +                self.cper.decode(name, size, ftype)
> +
> +        for ctx in range(0, context_info_num):
> +            if self.cper.past_end:
> +                return
> +
> +            print()
> +            print(f"Context {ctx}")
> +
> +            self.cper.decode("Register Context Type", 2, "int")
> +
> +            val = self.cper.decode("Register Array Size", 2, "int")
> +            try:
> +                context_size = int(int.from_bytes(val, byteorder='little') / 
> 8)
> +            except ValueError, TypeError:
> +                context_size = 0
> +
> +            self.cper.decode("MSR Address", 4, "int")
> +            self.cper.decode("MM Register Address", 8, "int")
> +
> +            for reg in range(0, context_size):
> +                if self.cper.past_end:
> +                    return
> +                self.cper.decode(f"Register offset {reg:<3}", 8, "int")

As for arm. Probably need sanity check it's the 8 byte version.
And for giggles even on x86 some are 16 bytes ;)
GDTR, IDTR

Meh, I don't care about those 16 byte ones, but not presenting the 4 byte
ones as 8 would be good.



> +
> +    @staticmethod
> +    def decode_list():
> +        """
> +        Returns a tuple with the GUID and class
> +        """
> +        return [(DecodeProcX86.guid, DecodeProcX86)]

> +class DecodeProcArm():
> +    """
> +    Class to decode an ARM Processor Error as defined at
> +    UEFI 2.6 - N.2.2 Section Descriptor
> +    """
> +
> +    # GUID for ARM Processor Error
> +    guid = "e19e3d16-bc11-11e4-9caa-c2051d5d46b0"
> +
> +    arm_pei_fields = [
> +        ("Version",              1, "int"),
> +        ("Length",               1, "int"),
> +        ("valid",                2, "int"),
> +        ("type",                 1, "int"),
> +        ("multiple-error",       2, "int"),
> +        ("flags",                1, "int"),
> +        ("error-info",           8, "int"),
> +        ("virt-addr",            8, "int"),
> +        ("phy-addr",             8, "int"),
> +    ]
> +
> +    def __init__(self, cper: DecodeField):
> +        self.cper = cper
> +
> +    def decode(self, guid):
> +        """Decode Processor ARM"""
> +
> +        print("ARM Processor Error")
> +
> +        start = self.cper.pos
> +
> +        self.cper.decode("Valid", 4, "int")
> +
> +        val = self.cper.decode("Error Info num", 2, "int")
> +        try:
> +            error_info_num = int.from_bytes(val, byteorder='little')
> +        except ValueError, TypeError:
> +            error_info_num = 0
> +
> +        val = self.cper.decode("Context Info num", 2, "int")
> +        try:
> +            context_info_num = int.from_bytes(val, byteorder='little')
> +        except ValueError, TypeError:
> +            context_info_num = 0
> +
> +        val = self.cper.decode("Section Length", 4, "int")
> +        try:
> +            section_length = int.from_bytes(val, byteorder='little')
> +        except ValueError, TypeError:
> +            section_length = 0
> +
> +        self.cper.decode("Error affinity level", 1, "int")
> +        self.cper.decode("Reserved", 3, "int")
> +        self.cper.decode("MPIDR_EL1", 8, "int")
> +        self.cper.decode("MIDR_EL1", 8, "int")
> +        self.cper.decode("Running State", 4, "int")
> +        self.cper.decode("PSCI State", 4, "int")
> +
> +        for pei in range(0, error_info_num):
> +            if self.cper.past_end:
> +                return
> +
> +            print()
> +            print(f"Processor Error Info {pei}")
> +            for name, size, ftype in self.arm_pei_fields:
> +                self.cper.decode(name, size, ftype)
> +
> +        for ctx in range(0, context_info_num):
> +            if self.cper.past_end:
> +                return
> +
> +            print()
> +            print(f"Context {ctx}")
> +            self.cper.decode("Version", 2, "int")
> +            self.cper.decode("Register Context Type", 2, "int")
> +            val = self.cper.decode("Register Array Size", 4, "int")
> +            try:
> +                context_size = int(int.from_bytes(val, byteorder='little') / 
> 8)
> +            except ValueError:
> +                context_size = 0
> +
> +            for reg in range(0, context_size):
> +                if self.cper.past_end:
> +                    return
> +                self.cper.decode(f"Register {reg:<3}", 8, "int")

Maybe check it's not a 32 bit context?  Don't decode it if it
is but don't try to decode it as 64 bit.  Can get that from the
Register Context Type.  Anything over 4 is fine.


> +
> +        remaining = max(section_length + start - self.cper.pos, 0)
> +        if remaining:
> +            print()
> +            self.cper.decode("Vendor data", remaining, "int")
> +
> +    @staticmethod
> +    def decode_list():
> +        """
> +        Returns a tuple with the GUID and class
> +        """
> +        return [(DecodeProcArm.guid, DecodeProcArm)]

> +class DecodeDMAVT():
> +    """
> +    Class to decode a DMA Virtualization Technology Error as defined at
> +    UEFI 2.2 - N.2.2 Section Descriptor
As below. Feels like it should be 

N2.11.2 Intel VT for Directed I/O specific DMAr Error Section Descriptor.
Same for other places this comment exists.


> +    """
> +
> +    # GUID for DMA VT Error
> +    guid = "71761d37-32b2-45cd-a7d0-b0fedd93e8cf"

> +class DecodeDMAIOMMU():
> +    """
> +    Class to decode an IOMMU DMA Error as defined at
> +    UEFI 2.2 - N.2.2 Section Descriptor

Odd reference choice.
This stuff is in N2.11.3 IOMMU Specific DMAr Error Section
in 2.11. I'm too lazy to find it in the older spec.

> +    """
> +
> +    # GUID for IOMMU DMA Error

Maybe call it the IOMMU Specific DMAr Error

> +    guid = "036f84e1-7f37-428c-a79e-575fdfaa84ec"
> +
> +    fields = [

> +class DecodeCXLCompEvent():
> +    """
> +    Class to decode a CXL Component Error as defined at
> +    UEFI 2.9 - N.2.14. CXL Component Events Section
> +
> +    Currently, the decoder handles only the common fields, displaying
> +    the CXL Component Event Log field in bytes.
> +    """
> +
> +    # GUIDs, as defined at CXL specification 3.2: 8.2.10.2.1 Event Records
> +    guids = [
> +        ("General Media",              
> "fbcd0a77-c260-417f-85a9-088b1621eba6"),
> +        ("DRAM",                       
> "601dcbb3-9c06-4eab-b8af-4e9bfb5c9624"),
> +        ("Memory Module",              
> "fe927475-dd59-4339-a586-79bab113bc74"),
> +        ("Memory Sparing",             
> "e71f3a40-2d29-4092-8a39-4d1c966c7c65"),
> +        ("Physical Switch",            
> "77cf9271-9c02-470b-9fe4-bc7b75f2da97"),

As per earlier patch review I'm not sure we care about this and the following.
I don't think we'll ever see them in CPER records, unless going other something
else that encapsulates that format.

> +        ("Virtual Switch",             
> "40d26425-3396-4c4d-a5da-3d472a63af25"),
> +        ("MDL Port",                   
> "8dc44363-0c96-4710-b7bf-04bb99534c3f"),
MLD -> Multi-Logical Device

> +        ("Dynamic Capabilities",       
> "ca95afa7-f183-4018-8c2f-95268e101a2a"),
> +    ]
> +
> +    fields = [
> +        ("Validation Bits", 8, "int"),
> +        ("Device ID", 12, "int"),
> +        ("Device Serial Number", 8, "int")
> +    ]
> +
> +    def __init__(self, cper: DecodeField):
> +        self.cper = cper
> +
> +    def decode(self, guid):
> +        """Decode CXL Protocol Error"""
> +        for name, guid_event in DecodeCXLCompEvent.guids:
> +            if guid == guid_event:
> +                print(f"CXL {name} Event Record")
> +                break
> +
> +        val = self.cper.decode("Length", 4, "int")
> +        try:
> +            length = int.from_bytes(val, byteorder='little')
> +        except ValueError, TypeError:
> +            length = 0
> +
> +        for name, size, ftype in self.fields:
> +            self.cper.decode(name, size, ftype)
> +
> +        length = max(0, length - self.cper.pos)
> +
> +        self.cper.decode("CXL Component Event Log", length, "int",
> +                         show_incomplete=True)
> +
> +    @staticmethod
> +    def decode_list():
> +        """
> +        Returns a tuple with the GUID and class
> +        """
> +
> +        guid_list = []
> +
> +        for _, guid in DecodeCXLCompEvent.guids:
> +            guid_list.append((guid, DecodeCXLCompEvent))
> +
> +        return guid_list


...

> +class DecodeGhesEntry():
> +    """
> +    Class to decode a GHESv2 element, as defined at:
> +    ACPI 6.1: 18.3.2.8 Generic Hardware Error Source version 2
> +    """
> +
> +    # Fields present on all CPER records
> +    common_fields = [
> +        # Generic Error Status Block fields
> +        ("Block Status",           4, "int", None),
> +        ("Raw Data Offset",        4, "int", "raw_data_offset"),
> +        ("Raw Data Length",        4, "int", "raw_data_len"),
> +        ("Data Length",            4, "int", None),
> +        ("Error Severity",         4, "int", None),
> +
> +        # Generic Error Data Entry
> +        ("Section Type",          16, "guid", "session_type"),

Why session_type? Is idea it's the type of decode session we are
doing? Feels a bit too much like a typo from section_type.


> +        ("Error Severity",         4, "int", None),
> +        ("Revision",               2, "int", None),
> +        ("Validation Bits",        1, "int", None),
> +        ("Flags",                  1, "int", None),
> +        ("Error Data Length",      4, "int", None),
> +        ("FRU Id",                16, "guid", None),
> +        ("FRU Text",              20, "str", None),
> +        ("Timestamp",              8, "bcd", None),
> +    ]
> +
> +    def __init__(self, cper_data: bytearray):
> +        """
> +        Initializes a byte array, decoding it, printing results at the
> +        screen.
> +        """

...

> +
> +        # Handle common types
> +        cper = DecodeField(cper_data)
> +
> +        fields = {}
> +        for name, size, ftype, var in self.common_fields:
> +            val = cper.decode(name, size, ftype)
> +
> +            if ftype == "int":
> +                try:
> +                    val = int.from_bytes(val, byteorder='little')
> +                except ValueError, TypeError:
> +                    val = 0
> +
> +            if var is not None:
> +                fields[var] = val
> +
> +        if fields["raw_data_len"]:
> +            cper.decode("Raw Data", fields["raw_data_len"],
> +                        "int", pos=fields["raw_data_offset"])
> +
> +        if not fields["session_type"]:
> +            return


Reply via email to