Re: [PATCH 4/5] ppc/spapr: Don't kill the guest if a recovered FWNMI machine check delivery fails
Greg Kurz's on March 18, 2020 2:57 am: > On Tue, 17 Mar 2020 15:02:14 +1000 > Nicholas Piggin wrote: > >> Try to be tolerant of errors if the machine check had been recovered >> by the host. >> >> Signed-off-by: Nicholas Piggin >> --- > > Same comment as previous patch on multi-line error strings and > warn_report() in the !recovered case. > >> hw/ppc/spapr_events.c | 25 ++--- >> 1 file changed, 18 insertions(+), 7 deletions(-) >> >> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c >> index d35151eeb0..3f524cb0ca 100644 >> --- a/hw/ppc/spapr_events.c >> +++ b/hw/ppc/spapr_events.c >> @@ -807,13 +807,20 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, >> bool recovered) >> /* get rtas addr from fdt */ >> rtas_addr = spapr_get_rtas_addr(); >> if (!rtas_addr) { >> -warn_report("FWNMI: Unable to deliver machine check to guest: " >> -"rtas_addr not found."); >> -qemu_system_guest_panicked(NULL); >> +if (!recovered) { >> +warn_report("FWNMI: Unable to deliver machine check to guest: " >> +"rtas_addr not found."); >> +qemu_system_guest_panicked(NULL); >> +} else { >> +warn_report("FWNMI: Unable to deliver machine check to guest: " >> +"rtas_addr not found. Machine check recovered."); >> +} >> g_free(ext_elog); >> return; >> } >> >> +spapr->fwnmi_machine_check_interlock = cpu->vcpu_id; >> + > > I don't understand this change. If we bail out without delivering the interrupt, we can't take the interlock otherwise the guest can never release it. Thanks, Nick
Re: [PATCH 4/5] ppc/spapr: Don't kill the guest if a recovered FWNMI machine check delivery fails
On Tue, 17 Mar 2020 15:02:14 +1000 Nicholas Piggin wrote: > Try to be tolerant of errors if the machine check had been recovered > by the host. > > Signed-off-by: Nicholas Piggin > --- Same comment as previous patch on multi-line error strings and warn_report() in the !recovered case. > hw/ppc/spapr_events.c | 25 ++--- > 1 file changed, 18 insertions(+), 7 deletions(-) > > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > index d35151eeb0..3f524cb0ca 100644 > --- a/hw/ppc/spapr_events.c > +++ b/hw/ppc/spapr_events.c > @@ -807,13 +807,20 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, > bool recovered) > /* get rtas addr from fdt */ > rtas_addr = spapr_get_rtas_addr(); > if (!rtas_addr) { > -warn_report("FWNMI: Unable to deliver machine check to guest: " > -"rtas_addr not found."); > -qemu_system_guest_panicked(NULL); > +if (!recovered) { > +warn_report("FWNMI: Unable to deliver machine check to guest: " > +"rtas_addr not found."); > +qemu_system_guest_panicked(NULL); > +} else { > +warn_report("FWNMI: Unable to deliver machine check to guest: " > +"rtas_addr not found. Machine check recovered."); > +} > g_free(ext_elog); > return; > } > > +spapr->fwnmi_machine_check_interlock = cpu->vcpu_id; > + I don't understand this change. > stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET, > env->gpr[3]); > cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + > @@ -850,9 +857,14 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) > * that CPU called "ibm,nmi-interlock") > */ > if (spapr->fwnmi_machine_check_interlock == cpu->vcpu_id) { > -warn_report("FWNMI: Unable to deliver machine check to guest: " > -"nested machine check."); > -qemu_system_guest_panicked(NULL); > +if (!recovered) { > +warn_report("FWNMI: Unable to deliver machine check to > guest: " > +"nested machine check."); > +qemu_system_guest_panicked(NULL); > +} else { > +warn_report("FWNMI: Unable to deliver machine check to > guest: " > +"nested machine check. Machine check > recovered."); > +} > return; > } > qemu_cond_wait_iothread(&spapr->fwnmi_machine_check_interlock_cond); > @@ -880,7 +892,6 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) > warn_report("Received a fwnmi while migration was in progress"); > } > > -spapr->fwnmi_machine_check_interlock = cpu->vcpu_id; > spapr_mce_dispatch_elog(cpu, recovered); > } >
[PATCH 4/5] ppc/spapr: Don't kill the guest if a recovered FWNMI machine check delivery fails
Try to be tolerant of errors if the machine check had been recovered by the host. Signed-off-by: Nicholas Piggin --- hw/ppc/spapr_events.c | 25 ++--- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c index d35151eeb0..3f524cb0ca 100644 --- a/hw/ppc/spapr_events.c +++ b/hw/ppc/spapr_events.c @@ -807,13 +807,20 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered) /* get rtas addr from fdt */ rtas_addr = spapr_get_rtas_addr(); if (!rtas_addr) { -warn_report("FWNMI: Unable to deliver machine check to guest: " -"rtas_addr not found."); -qemu_system_guest_panicked(NULL); +if (!recovered) { +warn_report("FWNMI: Unable to deliver machine check to guest: " +"rtas_addr not found."); +qemu_system_guest_panicked(NULL); +} else { +warn_report("FWNMI: Unable to deliver machine check to guest: " +"rtas_addr not found. Machine check recovered."); +} g_free(ext_elog); return; } +spapr->fwnmi_machine_check_interlock = cpu->vcpu_id; + stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET, env->gpr[3]); cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + @@ -850,9 +857,14 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) * that CPU called "ibm,nmi-interlock") */ if (spapr->fwnmi_machine_check_interlock == cpu->vcpu_id) { -warn_report("FWNMI: Unable to deliver machine check to guest: " -"nested machine check."); -qemu_system_guest_panicked(NULL); +if (!recovered) { +warn_report("FWNMI: Unable to deliver machine check to guest: " +"nested machine check."); +qemu_system_guest_panicked(NULL); +} else { +warn_report("FWNMI: Unable to deliver machine check to guest: " +"nested machine check. Machine check recovered."); +} return; } qemu_cond_wait_iothread(&spapr->fwnmi_machine_check_interlock_cond); @@ -880,7 +892,6 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) warn_report("Received a fwnmi while migration was in progress"); } -spapr->fwnmi_machine_check_interlock = cpu->vcpu_id; spapr_mce_dispatch_elog(cpu, recovered); } -- 2.23.0