On Wed, Jun 12, 2019 at 02:51:13PM +0530, Aravinda Prasad wrote: > Memory error such as bit flips that cannot be corrected > by hardware are passed on to the kernel for handling. > If the memory address in error belongs to guest then > the guest kernel is responsible for taking suitable action. > Patch [1] enhances KVM to exit guest with exit reason > set to KVM_EXIT_NMI in such cases. This patch handles > KVM_EXIT_NMI exit. > > [1] https://www.spinics.net/lists/kvm-ppc/msg12637.html > (e20bbd3d and related commits) > > Signed-off-by: Aravinda Prasad <aravi...@linux.vnet.ibm.com>
Reviewed-by: David Gibson <da...@gibson.dropbear.id.au> > --- > hw/ppc/spapr.c | 8 ++++++++ > hw/ppc/spapr_events.c | 23 +++++++++++++++++++++++ > include/hw/ppc/spapr.h | 10 ++++++++++ > target/ppc/kvm.c | 14 ++++++++++++++ > target/ppc/kvm_ppc.h | 2 ++ > target/ppc/trace-events | 1 + > 6 files changed, 58 insertions(+) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index 2ef86aa..6cc2c3b 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -1806,6 +1806,12 @@ static void spapr_machine_reset(void) > first_ppc_cpu->env.gpr[5] = 0; > > spapr->cas_reboot = false; > + > + spapr->mc_status = -1; > + spapr->guest_machine_check_addr = -1; > + > + /* Signal all vCPUs waiting on this condition */ > + qemu_cond_broadcast(&spapr->mc_delivery_cond); > } > > static void spapr_create_nvram(SpaprMachineState *spapr) > @@ -3070,6 +3076,8 @@ static void spapr_machine_init(MachineState *machine) > > kvmppc_spapr_enable_inkernel_multitce(); > } > + > + qemu_cond_init(&spapr->mc_delivery_cond); > } > > static int spapr_kvm_type(MachineState *machine, const char *vm_type) > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > index ae0f093..a0c66d7 100644 > --- a/hw/ppc/spapr_events.c > +++ b/hw/ppc/spapr_events.c > @@ -620,6 +620,29 @@ void > spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type, > RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); > } > > +void spapr_mce_req_event(PowerPCCPU *cpu) > +{ > + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); > + > + while (spapr->mc_status != -1) { > + /* > + * Check whether the same CPU got machine check error > + * while still handling the mc error (i.e., before > + * that CPU called "ibm,nmi-interlock") > + */ > + if (spapr->mc_status == cpu->vcpu_id) { > + qemu_system_guest_panicked(NULL); > + return; > + } > + qemu_cond_wait_iothread(&spapr->mc_delivery_cond); > + /* Meanwhile if the system is reset, then just return */ > + if (spapr->guest_machine_check_addr == -1) { > + return; > + } > + } > + spapr->mc_status = cpu->vcpu_id; > +} > + > static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr, > uint32_t token, uint32_t nargs, > target_ulong args, > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > index f891f8f..f34c79f 100644 > --- a/include/hw/ppc/spapr.h > +++ b/include/hw/ppc/spapr.h > @@ -190,6 +190,15 @@ struct SpaprMachineState { > * occurs during the unplug process. */ > QTAILQ_HEAD(, SpaprDimmState) pending_dimm_unplugs; > > + /* State related to "ibm,nmi-register" and "ibm,nmi-interlock" calls */ > + target_ulong guest_machine_check_addr; > + /* > + * mc_status is set to -1 if mc is not in progress, else is set to the > CPU > + * handling the mc. > + */ > + int mc_status; > + QemuCond mc_delivery_cond; > + > /*< public >*/ > char *kvm_type; > char *host_model; > @@ -789,6 +798,7 @@ void spapr_clear_pending_events(SpaprMachineState *spapr); > int spapr_max_server_number(SpaprMachineState *spapr); > void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex, > uint64_t pte0, uint64_t pte1); > +void spapr_mce_req_event(PowerPCCPU *cpu); > > /* DRC callbacks. */ > void spapr_core_release(DeviceState *dev); > diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c > index afef4cd..99f33fe 100644 > --- a/target/ppc/kvm.c > +++ b/target/ppc/kvm.c > @@ -1763,6 +1763,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run > *run) > ret = 0; > break; > > + case KVM_EXIT_NMI: > + trace_kvm_handle_nmi_exception(); > + ret = kvm_handle_nmi(cpu, run); > + break; > + > default: > fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); > ret = -1; > @@ -2863,6 +2868,15 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) > return data & 0xffff; > } > > +int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run) > +{ > + cpu_synchronize_state(CPU(cpu)); > + > + spapr_mce_req_event(cpu); > + > + return 0; > +} > + > int kvmppc_enable_hwrng(void) > { > if (!kvm_enabled() || !kvm_check_extension(kvm_state, > KVM_CAP_PPC_HWRNG)) { > diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h > index 880cee9..3d9f0b4 100644 > --- a/target/ppc/kvm_ppc.h > +++ b/target/ppc/kvm_ppc.h > @@ -83,6 +83,8 @@ bool kvmppc_hpt_needs_host_contiguous_pages(void); > void kvm_check_mmu(PowerPCCPU *cpu, Error **errp); > void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online); > > +int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run); > + > #else > > static inline uint32_t kvmppc_get_tbfreq(void) > diff --git a/target/ppc/trace-events b/target/ppc/trace-events > index 3dc6740..6d15aa9 100644 > --- a/target/ppc/trace-events > +++ b/target/ppc/trace-events > @@ -28,3 +28,4 @@ kvm_handle_papr_hcall(void) "handle PAPR hypercall" > kvm_handle_epr(void) "handle epr" > kvm_handle_watchdog_expiry(void) "handle watchdog expiry" > kvm_handle_debug_exception(void) "handle debug exception" > +kvm_handle_nmi_exception(void) "handle NMI exception" > -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
signature.asc
Description: PGP signature