This patch includes migration support for machine check handling. Especially this patch blocks VM migration requests until the machine check error handling is complete as (i) these errors are specific to the source hardware and is irrelevant on the target hardware, (ii) these errors cause data corruption and should be handled before migration.
Signed-off-by: Aravinda Prasad <aravi...@linux.vnet.ibm.com> --- hw/ppc/spapr.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ hw/ppc/spapr_events.c | 14 ++++++++++++++ hw/ppc/spapr_rtas.c | 2 ++ include/hw/ppc/spapr.h | 2 ++ 4 files changed, 62 insertions(+) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 1c0908e..f6262f0 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -46,6 +46,7 @@ #include "migration/qemu-file-types.h" #include "migration/global_state.h" #include "migration/register.h" +#include "migration/blocker.h" #include "mmu-hash64.h" #include "mmu-book3s-v3.h" #include "cpu-models.h" @@ -1829,6 +1830,8 @@ static void spapr_machine_reset(MachineState *machine) /* Signal all vCPUs waiting on this condition */ qemu_cond_broadcast(&spapr->mc_delivery_cond); + + migrate_del_blocker(spapr->fwnmi_migration_blocker); } static void spapr_create_nvram(SpaprMachineState *spapr) @@ -2119,6 +2122,42 @@ static const VMStateDescription vmstate_spapr_dtb = { }, }; +static bool spapr_fwnmi_needed(void *opaque) +{ + SpaprMachineState *spapr = (SpaprMachineState *)opaque; + + return spapr->guest_machine_check_addr != -1; +} + +static int spapr_fwnmi_post_load(void *opaque, int version_id) +{ + SpaprMachineState *spapr = (SpaprMachineState *)opaque; + + if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_ON) { + + if (kvmppc_has_cap_ppc_fwnmi()) { + return 0; + } + + return kvmppc_set_fwnmi(); + } + + return 0; +} + +static const VMStateDescription vmstate_spapr_machine_check = { + .name = "spapr_machine_check", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_fwnmi_needed, + .post_load = spapr_fwnmi_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT64(guest_machine_check_addr, SpaprMachineState), + VMSTATE_INT32(mc_status, SpaprMachineState), + VMSTATE_END_OF_LIST() + }, +}; + static const VMStateDescription vmstate_spapr = { .name = "spapr", .version_id = 3, @@ -2152,6 +2191,7 @@ static const VMStateDescription vmstate_spapr = { &vmstate_spapr_dtb, &vmstate_spapr_cap_large_decr, &vmstate_spapr_cap_ccf_assist, + &vmstate_spapr_machine_check, NULL } }; @@ -2948,6 +2988,10 @@ static void spapr_machine_init(MachineState *machine) exit(1); } + /* Create the error string for live migration blocker */ + error_setg(&spapr->fwnmi_migration_blocker, + "Live migration not supported during machine check handling"); + /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */ spapr_fwnmi_register(); } diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index 8ebb85e..a8ce9bd 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -43,6 +43,7 @@ #include "qemu/main-loop.h" #include "hw/ppc/spapr_ovec.h" #include <libfdt.h> +#include "migration/blocker.h" #define RTAS_LOG_VERSION_MASK 0xff000000 #define RTAS_LOG_VERSION_6 0x06000000 @@ -854,6 +855,19 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered) void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) { SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + int ret; + Error *local_err = NULL; + + ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, &local_err); + if (ret < 0) { + /* + * We don't want to abort and let the migration to continue. In a + * rare case, the machine check handler will run on the target + * hardware. Though this is not preferable, it is better than aborting + * the migration or killing the VM. + */ + warn_report_err(local_err); + } while (spapr->mc_status != -1) { /* diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c index d892583..b682cc2 100644 --- a/hw/ppc/spapr_rtas.c +++ b/hw/ppc/spapr_rtas.c @@ -50,6 +50,7 @@ #include "hw/ppc/fdt.h" #include "target/ppc/mmu-hash64.h" #include "target/ppc/mmu-book3s-v3.h" +#include "migration/blocker.h" static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr, uint32_t token, uint32_t nargs, @@ -438,6 +439,7 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu, */ spapr->mc_status = -1; qemu_cond_signal(&spapr->mc_delivery_cond); + migrate_del_blocker(spapr->fwnmi_migration_blocker); rtas_st(rets, 0, RTAS_OUT_SUCCESS); } } diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index dada821..ea7625e 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -217,6 +217,8 @@ struct SpaprMachineState { unsigned gpu_numa_id; SpaprTpmProxy *tpm_proxy; + + Error *fwnmi_migration_blocker; }; #define H_SUCCESS 0