Re: [PATCH v4 19/21] KVM: arm64: Handle RAS SErrors from EL2 on guest exit
On Thu, Oct 19, 2017 at 03:58:05PM +0100, James Morse wrote: > We expect to have firmware-first handling of RAS SErrors, with errors > notified via an APEI method. For systems without firmware-first, add > some minimal handling to KVM. > > There are two ways KVM can take an SError due to a guest, either may be a > RAS error: we exit the guest due to an SError routed to EL2 by HCR_EL2.AMO, > or we take an SError from EL2 when we unmask PSTATE.A from __guest_exit. > > The current SError from EL2 code unmasks SError and tries to fence any > pending SError into a single instruction window. It then leaves SError > unmasked. > > With the v8.2 RAS Extensions we may take an SError for a 'corrected' > error, but KVM is only able to handle SError from EL2 if they occur > during this single instruction window... > > The RAS Extensions give us a new instruction to synchronise and > consume SErrors. The RAS Extensions document (ARM DDI0587), > '2.4.1 ESB and Unrecoverable errors' describes ESB as synchronising > SError interrupts generated by 'instructions, translation table walks, > hardware updates to the translation tables, and instruction fetches on > the same PE'. This makes ESB equivalent to KVMs existing > 'dsb, mrs-daifclr, isb' sequence. > > Use the alternatives to synchronise and consume any SError using ESB > instead of unmasking and taking the SError. Set ARM_EXIT_WITH_SERROR_BIT > in the exit_code so that we can restart the vcpu if it turns out this > SError has no impact on the vcpu. > > Signed-off-by: James Morse Reviewed-by: Christoffer Dall > > --- > Changes since v3: > * Moved that nop out of the firing line > > arch/arm64/include/asm/kvm_emulate.h | 5 + > arch/arm64/include/asm/kvm_host.h| 1 + > arch/arm64/kernel/asm-offsets.c | 1 + > arch/arm64/kvm/handle_exit.c | 10 +- > arch/arm64/kvm/hyp/entry.S | 13 + > 5 files changed, 29 insertions(+), 1 deletion(-) > > diff --git a/arch/arm64/include/asm/kvm_emulate.h > b/arch/arm64/include/asm/kvm_emulate.h > index 8a7a838eb17a..8274d16df3cd 100644 > --- a/arch/arm64/include/asm/kvm_emulate.h > +++ b/arch/arm64/include/asm/kvm_emulate.h > @@ -173,6 +173,11 @@ static inline phys_addr_t kvm_vcpu_get_fault_ipa(const > struct kvm_vcpu *vcpu) > return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8; > } > > +static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu) > +{ > + return vcpu->arch.fault.disr_el1; > +} > + > static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu) > { > return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK; > diff --git a/arch/arm64/include/asm/kvm_host.h > b/arch/arm64/include/asm/kvm_host.h > index 97438cc3a9ad..cf5d78ba14b5 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -89,6 +89,7 @@ struct kvm_vcpu_fault_info { > u32 esr_el2;/* Hyp Syndrom Register */ > u64 far_el2;/* Hyp Fault Address Register */ > u64 hpfar_el2; /* Hyp IPA Fault Address Register */ > + u64 disr_el1; /* Deferred [SError] Status Register */ > }; > > /* > diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c > index 71bf088f1e4b..121889c49542 100644 > --- a/arch/arm64/kernel/asm-offsets.c > +++ b/arch/arm64/kernel/asm-offsets.c > @@ -130,6 +130,7 @@ int main(void) >BLANK(); > #ifdef CONFIG_KVM_ARM_HOST >DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); > + DEFINE(VCPU_FAULT_DISR,offsetof(struct kvm_vcpu, arch.fault.disr_el1)); >DEFINE(CPU_GP_REGS,offsetof(struct kvm_cpu_context, > gp_regs)); >DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); >DEFINE(CPU_FP_REGS,offsetof(struct kvm_regs, fp_regs)); > diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c > index 345fdbba6c2e..e1e6cfe7d4d9 100644 > --- a/arch/arm64/kvm/handle_exit.c > +++ b/arch/arm64/kvm/handle_exit.c > @@ -23,6 +23,7 @@ > #include > > #include > +#include > #include > #include > #include > @@ -208,7 +209,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run > *run, > *vcpu_pc(vcpu) -= adj; > } > > - kvm_inject_vabt(vcpu); > + if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) { > + u64 disr = kvm_vcpu_get_disr(vcpu); > + > + kvm_handle_guest_serror(vcpu, disr_to_esr(disr)); > + } else { > + kvm_inject_vabt(vcpu); > + } > + > return 1; > } > > diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S > index 12ee62d6d410..024c7afc78f8 100644 > --- a/arch/arm64/kvm/hyp/entry.S > +++ b/arch/arm64/kvm/hyp/entry.S > @@ -124,6 +124,17 @@ ENTRY(__guest_exit) > // Now restore the host regs > restore_callee_saved_regs x2 > > +alte
Re: [PATCH v4 19/21] KVM: arm64: Handle RAS SErrors from EL2 on guest exit
On Thu, Oct 19 2017 at 4:58:05 pm BST, James Morse wrote: > We expect to have firmware-first handling of RAS SErrors, with errors > notified via an APEI method. For systems without firmware-first, add > some minimal handling to KVM. > > There are two ways KVM can take an SError due to a guest, either may be a > RAS error: we exit the guest due to an SError routed to EL2 by HCR_EL2.AMO, > or we take an SError from EL2 when we unmask PSTATE.A from __guest_exit. > > The current SError from EL2 code unmasks SError and tries to fence any > pending SError into a single instruction window. It then leaves SError > unmasked. > > With the v8.2 RAS Extensions we may take an SError for a 'corrected' > error, but KVM is only able to handle SError from EL2 if they occur > during this single instruction window... > > The RAS Extensions give us a new instruction to synchronise and > consume SErrors. The RAS Extensions document (ARM DDI0587), > '2.4.1 ESB and Unrecoverable errors' describes ESB as synchronising > SError interrupts generated by 'instructions, translation table walks, > hardware updates to the translation tables, and instruction fetches on > the same PE'. This makes ESB equivalent to KVMs existing > 'dsb, mrs-daifclr, isb' sequence. > > Use the alternatives to synchronise and consume any SError using ESB > instead of unmasking and taking the SError. Set ARM_EXIT_WITH_SERROR_BIT > in the exit_code so that we can restart the vcpu if it turns out this > SError has no impact on the vcpu. > > Signed-off-by: James Morse Reviewed-by: Marc Zyngier M. -- Jazz is not dead. It just smells funny. ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v4 19/21] KVM: arm64: Handle RAS SErrors from EL2 on guest exit
Hi gengdongjiu, On 27/10/17 07:26, gengdongjiu wrote: > On 2017/10/19 22:58, James Morse wrote: >> +alternative_if ARM64_HAS_RAS_EXTN >> +// If we have the RAS extensions we can consume a pending error >> +// without an unmask-SError and isb. >> +esb >> +mrs_s x2, SYS_DISR_EL1 > I do not think you can get the right value when esb produce a SError. when > SError happen, it will take to EL3 firmware immediately. so the disr_el1 will > not record > the error and value is 0. This depends on SCR_EL3.EA, which the normal-world can't know about. Your system sets SCR_EL3.EA, and takes the SError to EL3. It's now up to firmware to notify the normal world via some firmware-first mechanism. What does KVM do? SCR_EL3.EA makes DISR_EL1 RAZ/WI, so yes, it reads 0 here, notes there is no SError pending, and it continues on its merry way. Firmware is left to pick up the pieces and notify the normal world about the error. What if SCR_EL3.EA is clear? Now SCTLR_EL2.IESB's ErrorSynchronizationBarrier causes any RAS error the CPU has deferred to become a pending SError. But SError is masked because we took an exception. Running the ESB-instruction consumes any pending SError and writes its ESR into DISR_EL1. What does KVM do? Reads the value and sets the ARM_EXIT_WITH_SERROR_BIT if there was an error pending. >> +str x2, [x1, #(VCPU_FAULT_DISR - VCPU_CONTEXT)] >> +cbz x2, 1f > why will jump to 1, if there is not SError, also "ret"? jump to 1: to avoid the cost of writing zero back to DISR_EL1 if its already zero and skip setting the ARM_EXIT_WITH_SERROR_BIT, as there was no SError. ret: because this is what happens at the end of the vaxorcism code. We need to run that as with the ARMv8.2 RAS Extensions we have a better way of consuming SError from the CPU without taking them as an exception. >> +msr_s SYS_DISR_EL1, xzr >> +orr x0, x0, #(1<> +1: ret >> +alternative_else James ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Re: [PATCH v4 19/21] KVM: arm64: Handle RAS SErrors from EL2 on guest exit
On 2017/10/19 22:58, James Morse wrote: > +alternative_if ARM64_HAS_RAS_EXTN > + // If we have the RAS extensions we can consume a pending error > + // without an unmask-SError and isb. > + esb > + mrs_s x2, SYS_DISR_EL1 I do not think you can get the right value when esb produce a SError. when SError happen, it will take to EL3 firmware immediately. so the disr_el1 will not record the error and value is 0. > + str x2, [x1, #(VCPU_FAULT_DISR - VCPU_CONTEXT)] > + cbz x2, 1f why will jump to 1, if there is not SError, also "ret"? > + msr_s SYS_DISR_EL1, xzr > + orr x0, x0, #(1< +1: ret > +alternative_else ___ kvmarm mailing list kvmarm@lists.cs.columbia.edu https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
[PATCH v4 19/21] KVM: arm64: Handle RAS SErrors from EL2 on guest exit
We expect to have firmware-first handling of RAS SErrors, with errors notified via an APEI method. For systems without firmware-first, add some minimal handling to KVM. There are two ways KVM can take an SError due to a guest, either may be a RAS error: we exit the guest due to an SError routed to EL2 by HCR_EL2.AMO, or we take an SError from EL2 when we unmask PSTATE.A from __guest_exit. The current SError from EL2 code unmasks SError and tries to fence any pending SError into a single instruction window. It then leaves SError unmasked. With the v8.2 RAS Extensions we may take an SError for a 'corrected' error, but KVM is only able to handle SError from EL2 if they occur during this single instruction window... The RAS Extensions give us a new instruction to synchronise and consume SErrors. The RAS Extensions document (ARM DDI0587), '2.4.1 ESB and Unrecoverable errors' describes ESB as synchronising SError interrupts generated by 'instructions, translation table walks, hardware updates to the translation tables, and instruction fetches on the same PE'. This makes ESB equivalent to KVMs existing 'dsb, mrs-daifclr, isb' sequence. Use the alternatives to synchronise and consume any SError using ESB instead of unmasking and taking the SError. Set ARM_EXIT_WITH_SERROR_BIT in the exit_code so that we can restart the vcpu if it turns out this SError has no impact on the vcpu. Signed-off-by: James Morse --- Changes since v3: * Moved that nop out of the firing line arch/arm64/include/asm/kvm_emulate.h | 5 + arch/arm64/include/asm/kvm_host.h| 1 + arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kvm/handle_exit.c | 10 +- arch/arm64/kvm/hyp/entry.S | 13 + 5 files changed, 29 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 8a7a838eb17a..8274d16df3cd 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -173,6 +173,11 @@ static inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu) return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8; } +static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.fault.disr_el1; +} + static inline u32 kvm_vcpu_hvc_get_imm(const struct kvm_vcpu *vcpu) { return kvm_vcpu_get_hsr(vcpu) & ESR_ELx_xVC_IMM_MASK; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 97438cc3a9ad..cf5d78ba14b5 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -89,6 +89,7 @@ struct kvm_vcpu_fault_info { u32 esr_el2;/* Hyp Syndrom Register */ u64 far_el2;/* Hyp Fault Address Register */ u64 hpfar_el2; /* Hyp IPA Fault Address Register */ + u64 disr_el1; /* Deferred [SError] Status Register */ }; /* diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 71bf088f1e4b..121889c49542 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -130,6 +130,7 @@ int main(void) BLANK(); #ifdef CONFIG_KVM_ARM_HOST DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); + DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 345fdbba6c2e..e1e6cfe7d4d9 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -208,7 +209,14 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, *vcpu_pc(vcpu) -= adj; } - kvm_inject_vabt(vcpu); + if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) { + u64 disr = kvm_vcpu_get_disr(vcpu); + + kvm_handle_guest_serror(vcpu, disr_to_esr(disr)); + } else { + kvm_inject_vabt(vcpu); + } + return 1; } diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 12ee62d6d410..024c7afc78f8 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -124,6 +124,17 @@ ENTRY(__guest_exit) // Now restore the host regs restore_callee_saved_regs x2 +alternative_if ARM64_HAS_RAS_EXTN + // If we have the RAS extensions we can consume a pending error + // without an unmask-SError and isb. + esb + mrs_s x2, SYS_DISR_EL1 + str x2, [x1, #(VCPU_FAULT_DISR - VCPU_CONTEXT)] + cbz x2, 1f + msr_s SYS_DISR_EL1, xzr + o