From: John Allen <john.al...@amd.com> For the most part, AMD hosts can use the same MCE injection code as Intel, but there are instances where the qemu implementation is Intel specific. First, MCE delivery works differently on AMD and does not support broadcast. Second, kvm_mce_inject generates MCEs that include a number of Intel specific status bits. Modify kvm_mce_inject to properly generate MCEs on AMD platforms.
Reported-by: William Roche <william.ro...@oracle.com> Signed-off-by: John Allen <john.al...@amd.com> Message-ID: <20240603193622.47156-2-john.al...@amd.com> Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- target/i386/cpu.h | 2 ++ target/i386/helper.c | 4 ++++ target/i386/kvm/kvm.c | 39 +++++++++++++++++++++++++++++++-------- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 29d799adfd6..e6d5d1b483c 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -377,6 +377,8 @@ typedef enum X86Seg { #define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ #define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ #define MCI_STATUS_AR (1ULL<<55) /* Action required */ +#define MCI_STATUS_DEFERRED (1ULL<<44) /* Deferred error */ +#define MCI_STATUS_POISON (1ULL<<43) /* Poisoned data consumed */ /* MISC register defines */ #define MCM_ADDR_SEGOFF 0 /* segment offset */ diff --git a/target/i386/helper.c b/target/i386/helper.c index f9d1381f90b..01a268a30bb 100644 --- a/target/i386/helper.c +++ b/target/i386/helper.c @@ -91,6 +91,10 @@ int cpu_x86_support_mca_broadcast(CPUX86State *env) int family = 0; int model = 0; + if (IS_AMD_CPU(env)) { + return 0; + } + cpu_x86_version(env, &family, &model); if ((family == 6 && model >= 14) || family > 6) { return 1; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index b5635209812..55a9e8a70cf 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -638,17 +638,40 @@ static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code) { CPUState *cs = CPU(cpu); CPUX86State *env = &cpu->env; - uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN | - MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S; - uint64_t mcg_status = MCG_STATUS_MCIP; + uint64_t status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_MISCV | + MCI_STATUS_ADDRV; + uint64_t mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV; int flags = 0; - if (code == BUS_MCEERR_AR) { - status |= MCI_STATUS_AR | 0x134; - mcg_status |= MCG_STATUS_RIPV | MCG_STATUS_EIPV; + if (!IS_AMD_CPU(env)) { + status |= MCI_STATUS_S | MCI_STATUS_UC; + if (code == BUS_MCEERR_AR) { + status |= MCI_STATUS_AR | 0x134; + mcg_status |= MCG_STATUS_EIPV; + } else { + status |= 0xc0; + } } else { - status |= 0xc0; - mcg_status |= MCG_STATUS_RIPV; + if (code == BUS_MCEERR_AR) { + status |= MCI_STATUS_UC | MCI_STATUS_POISON; + mcg_status |= MCG_STATUS_EIPV; + } else { + /* Setting the POISON bit for deferred errors indicates to the + * guest kernel that the address provided by the MCE is valid + * and usable which will ensure that the guest kernel will send + * a SIGBUS_AO signal to the guest process. This allows for + * more desirable behavior in the case that the guest process + * with poisoned memory has set the MCE_KILL_EARLY prctl flag + * which indicates that the process would prefer to handle or + * shutdown due to the poisoned memory condition before the + * memory has been accessed. + * + * While the POISON bit would not be set in a deferred error + * sent from hardware, the bit is not meaningful for deferred + * errors and can be reused in this scenario. + */ + status |= MCI_STATUS_DEFERRED | MCI_STATUS_POISON; + } } flags = cpu_x86_support_mca_broadcast(env) ? MCE_INJECT_BROADCAST : 0; -- 2.45.1