Due to the way ARMv8.4-NV suppresses traps when accessing EL2
system registers, we can't track when the guest changes its
HCR_EL2.TGE setting. This means we always trap EL1 TLBIs,
even if they don't affect any guest.

This obviously has a huge impact on performance, as we handle
TLBI traps as a normal exit, and a normal VHE host issues
thousands of TLBIs when booting (and quite a few when running
userspace).

A cheap way to reduce the overhead is to handle the limited
case of {E2H,TGE}=={1,1} as a guest fixup, as we already have
the right mmu configuration in place. Just execute the decoded
instruction right away and return to the guest.

Signed-off-by: Marc Zyngier <m...@kernel.org>
---
 arch/arm64/kvm/hyp/vhe/switch.c | 36 +++++++++++++++++++++++++++++++++
 arch/arm64/kvm/hyp/vhe/tlb.c    |  6 ++++--
 arch/arm64/kvm/sys_regs.c       | 25 ++++++++---------------
 3 files changed, 48 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index c90aed418f73..0b9bc36340a5 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -168,6 +168,39 @@ void deactivate_traps_vhe_put(void)
        __deactivate_traps_common();
 }
 
+static bool __hyp_handle_tlbi_el1(struct kvm_vcpu *vcpu)
+{
+       u32 instr;
+       u64 val;
+
+       /*
+        * Ideally, we would never trap on EL1 TLB invalidations when the
+        * guest's HCR_EL2.{E2H,TGE} == {1,1}. But "thanks" to ARMv8.4, we
+        * don't trap writes to HCR_EL2, meaning that we can't track
+        * changes to the virtual TGE bit. So we leave HCR_EL2.TTLB set on
+        * the host. Oopsie...
+        *
+        * In order to speed-up EL1 TLBIs from the vEL2 guest when TGE is
+        * set, try and handle these invalidation as quickly as possible,
+        * without fully exiting (unless this needs forwarding).
+        */
+       if (kvm_vcpu_trap_get_class(vcpu) != ESR_ELx_EC_SYS64 ||
+           !vcpu_mode_el2(vcpu) ||
+           (__vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | 
HCR_TGE))
+               return false;
+
+       instr = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
+       if (sys_reg_Op0(instr) != TLBI_Op0 ||
+           sys_reg_Op1(instr) != TLBI_Op1_EL1)
+               return false;
+
+       val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
+       __kvm_tlb_el1_instr(NULL, val, instr);
+       __kvm_skip_instr(vcpu);
+
+       return true;
+}
+
 static bool __hyp_handle_eret(struct kvm_vcpu *vcpu)
 {
        struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
@@ -261,6 +294,9 @@ static bool fixup_guest_exit_vhe(struct kvm_vcpu *vcpu, u64 
*exit_code,
        if (*exit_code == ARM_EXCEPTION_TRAP) {
                if (__hyp_handle_eret(vcpu))
                        return true;
+
+               if (__hyp_handle_tlbi_el1(vcpu))
+                       return true;
        }
 
        return fixup_guest_exit(vcpu, exit_code);
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index 52fda1d61308..6dd7f224e0f3 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -200,7 +200,8 @@ void __kvm_tlb_el1_instr(struct kvm_s2_mmu *mmu, u64 val, 
u64 sys_encoding)
        dsb(ishst);
 
        /* Switch to requested VMID */
-       __tlb_switch_to_guest(mmu, &cxt);
+       if (mmu)
+               __tlb_switch_to_guest(mmu, &cxt);
 
        /*
         * Execute the same instruction as the guest hypervisor did,
@@ -239,5 +240,6 @@ void __kvm_tlb_el1_instr(struct kvm_s2_mmu *mmu, u64 val, 
u64 sys_encoding)
        dsb(ish);
        isb();
 
-       __tlb_switch_to_host(&cxt);
+       if (mmu)
+               __tlb_switch_to_host(&cxt);
 }
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index c7b004982ea9..cf03281f06a3 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -2684,6 +2684,8 @@ static bool handle_tlbi_el1(struct kvm_vcpu *vcpu, struct 
sys_reg_params *p,
                            const struct sys_reg_desc *r)
 {
        u32 sys_encoding = sys_insn(p->Op0, p->Op1, p->CRn, p->CRm, p->Op2);
+       u64 virtual_vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
+       struct kvm_s2_mmu *mmu;
 
        /*
         * If we're here, this is because we've trapped on a EL1 TLBI
@@ -2702,24 +2704,13 @@ static bool handle_tlbi_el1(struct kvm_vcpu *vcpu, 
struct sys_reg_params *p,
 
        mutex_lock(&vcpu->kvm->lock);
 
-       if ((__vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | 
HCR_TGE)) {
-               u64 virtual_vttbr = vcpu_read_sys_reg(vcpu, VTTBR_EL2);
-               struct kvm_s2_mmu *mmu;
-
-               mmu = lookup_s2_mmu(vcpu->kvm, virtual_vttbr, HCR_VM);
-               if (mmu)
-                       __kvm_tlb_el1_instr(mmu, p->regval, sys_encoding);
+       mmu = lookup_s2_mmu(vcpu->kvm, virtual_vttbr, HCR_VM);
+       if (mmu)
+               __kvm_tlb_el1_instr(mmu, p->regval, sys_encoding);
 
-               mmu = lookup_s2_mmu(vcpu->kvm, virtual_vttbr, 0);
-               if (mmu)
-                       __kvm_tlb_el1_instr(mmu, p->regval, sys_encoding);
-       } else {
-               /*
-                * ARMv8.4-NV allows the guest to change TGE behind
-                * our back, so we always trap EL1 TLBIs from vEL2...
-                */
-               __kvm_tlb_el1_instr(&vcpu->kvm->arch.mmu, p->regval, 
sys_encoding);
-       }
+       mmu = lookup_s2_mmu(vcpu->kvm, virtual_vttbr, 0);
+       if (mmu)
+               __kvm_tlb_el1_instr(mmu, p->regval, sys_encoding);
 
        mutex_unlock(&vcpu->kvm->lock);
 
-- 
2.29.2

_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Reply via email to