The commit referenced (from QEMU 10.0) has changed the way the pseries machine marks a cpu as quiesced. Previously, the cpu->halted value from QEMU common cpu code was (incorrectly) used. With the fix, the env->quiesced variable starts being used, which improves on the original situation, but also causes a side effect after migration:
The env->quiesced is set at reset and never migrated, which causes the destination QEMU to stop delivering interrupts and hang the machine. To fix the issue from this point on, start migrating the env->quiesced value. For QEMU versions < 10.0, sending the new element on the stream would cause migration to be aborted, so add the appropriate compatibility property to omit the new subsection. Independently of this patch, all migrations from QEMU versions < 10.0 would result in a hang since the older QEMU never migrates env->quiesced. This is bad because it leaves machines already running on the old QEMU without a migration path into newer versions. As a workaround, use a few heuristics to infer the new value of env->quiesced based on cpu->halted, LPCR and PSSCR bits that are usually set/cleared along with quiesced. Note that this was tested with -cpu power9 and -machine ic-mode=xive due to another bug affecting migration of XICS guests. Tested both forward and backward migration and savevm/loadvm from 9.2 and 10.0. Also tested loadvm of a savevm image that contains a mix of cpus both halted and not halted. Reported-by: Fabian Vogt <[email protected]> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/3079 Fixes: fb802acdc8b ("ppc/spapr: Fix RTAS stopped state") Signed-off-by: Fabiano Rosas <[email protected]> --- hw/ppc/spapr.c | 6 +++++ target/ppc/cpu.h | 1 + target/ppc/cpu_init.c | 7 +++++ target/ppc/machine.c | 63 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 99b843ba2f..9dde61a667 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -4776,8 +4776,14 @@ DEFINE_SPAPR_MACHINE(10, 1); */ static void spapr_machine_10_0_class_options(MachineClass *mc) { + static GlobalProperty spapr_compat_10_0[] = { + { TYPE_POWERPC_CPU, "rtas-stopped-state", "false" }, + }; + spapr_machine_10_1_class_options(mc); compat_props_add(mc->compat_props, hw_compat_10_0, hw_compat_10_0_len); + compat_props_add(mc->compat_props, spapr_compat_10_0, + G_N_ELEMENTS(spapr_compat_10_0)); } DEFINE_SPAPR_MACHINE(10, 0); diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 787020f6f9..bbd661e96c 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -1529,6 +1529,7 @@ struct ArchCPU { void *machine_data; int32_t node_id; /* NUMA node this CPU belongs to */ PPCHash64Options *hash64_opts; + bool rtas_stopped_state; /* Those resources are used only during code translation */ /* opcode handlers */ diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 86ead740ee..8dac1cd812 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -55,6 +55,11 @@ /* #define PPC_DEBUG_SPR */ /* #define USE_APPLE_GDB */ +static const Property powerpc_cpu_properties[] = { + DEFINE_PROP_BOOL("rtas-stopped-state", PowerPCCPU, + rtas_stopped_state, true), +}; + static inline void vscr_init(CPUPPCState *env, uint32_t val) { /* Altivec always uses round-to-nearest */ @@ -7529,6 +7534,8 @@ static void ppc_cpu_class_init(ObjectClass *oc, const void *data) &pcc->parent_unrealize); pcc->pvr_match = ppc_pvr_match_default; + device_class_set_props(dc, powerpc_cpu_properties); + resettable_class_set_parent_phases(rc, NULL, ppc_cpu_reset_hold, NULL, &pcc->parent_phases); diff --git a/target/ppc/machine.c b/target/ppc/machine.c index d72e5ecb94..ba63a7debb 100644 --- a/target/ppc/machine.c +++ b/target/ppc/machine.c @@ -6,6 +6,7 @@ #include "mmu-hash64.h" #include "migration/cpu.h" #include "qapi/error.h" +#include "qemu/error-report.h" #include "kvm_ppc.h" #include "power8-pmu.h" #include "system/replay.h" @@ -257,6 +258,45 @@ static int cpu_post_load(void *opaque, int version_id) ppc_store_sdr1(env, env->spr[SPR_SDR1]); } + if (!cpu->rtas_stopped_state) { + /* + * The source QEMU doesn't have fb802acdc8 and still uses halt + + * PM bits in LPCR to implement RTAS stopped state. The new (this) + * QEMU will have put the secondary vcpus in stopped state, + * waiting for the start-cpu RTAS call. That call will never come + * if the source cpus were already running. Try to infer the cpus + * state and set env->quiesced accordingly. + * + * env->quiesced = true ==> the cpu is waiting to start + * env->quiesced = false ==> the cpu is running (unless halted) + */ + + /* + * Halted _could_ mean quiesced, but it could also be cede, + * confer_self, power management, etc. + */ + if (CPU(cpu)->halted) { + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + /* + * Both the PSSCR_EC bit and LPCR PM bits set at cpu reset + * and rtas_stop and cleared at rtas_start, it's a good + * heuristic. + */ + if ((env->spr[SPR_PSSCR] & PSSCR_EC) && + (env->spr[SPR_LPCR] & pcc->lpcr_pm)) { + env->quiesced = true; + } else { + env->quiesced = false; + } + } else { + /* + * Old QEMU sets halted during rtas_stop_self. Not halted, + * therefore definitely not quiesced. + */ + env->quiesced = false; + } + } + post_load_update_msr(env); if (tcg_enabled()) { @@ -649,6 +689,28 @@ static const VMStateDescription vmstate_reservation = { } }; +static bool rtas_stopped_needed(void *opaque) +{ + PowerPCCPU *cpu = opaque; + + return cpu->rtas_stopped_state; +} + +static const VMStateDescription vmstate_rtas_stopped = { + .name = "cpu/rtas_stopped", + .version_id = 1, + .minimum_version_id = 1, + .needed = rtas_stopped_needed, + .fields = (const VMStateField[]) { + /* + * "RTAS stopped" state, independent of halted state. For QEMU + * < 10.0, this is taken from cpu->halted at cpu_post_load() + */ + VMSTATE_BOOL(env.quiesced, PowerPCCPU), + VMSTATE_END_OF_LIST() + } +}; + #ifdef TARGET_PPC64 static bool bhrb_needed(void *opaque) { @@ -715,6 +777,7 @@ const VMStateDescription vmstate_ppc_cpu = { &vmstate_tlbmas, &vmstate_compat, &vmstate_reservation, + &vmstate_rtas_stopped, NULL } }; -- 2.51.0
