This patch is part of a series which builds KVM's non-VHE hyp code separately
from VHE and the rest of the kernel.

switch.c implements context-switching for KVM, with large parts shared between
VHE/nVHE. These common routines are moved to switch.h, VHE-specific code is
left in switch.c and nVHE-specific code is moved to nvhe/switch.c.

Previously __kvm_vcpu_run needed a different symbol name for VHE/nVHE. This
is cleaned up and the caller in arm.c simplified.

Signed-off-by: David Brazdil <dbraz...@google.com>
---
 arch/arm64/include/asm/kvm_asm.h             |   4 +-
 arch/arm64/include/asm/kvm_host_hypercalls.h |   4 +-
 arch/arm64/include/asm/kvm_hyp.h             |   5 +
 arch/arm64/kernel/image-vars.h               |  25 +-
 arch/arm64/kvm/arm.c                         |   6 +-
 arch/arm64/kvm/hyp/hyp-entry.S               |   2 +
 arch/arm64/kvm/hyp/nvhe/Makefile             |   2 +-
 arch/arm64/kvm/hyp/nvhe/switch.c             | 271 ++++++++
 arch/arm64/kvm/hyp/switch.c                  | 688 +------------------
 arch/arm64/kvm/hyp/switch.h                  | 441 ++++++++++++
 arch/arm64/kvm/hyp/sysreg-sr.c               |   4 +-
 11 files changed, 764 insertions(+), 688 deletions(-)
 create mode 100644 arch/arm64/kvm/hyp/nvhe/switch.c
 create mode 100644 arch/arm64/kvm/hyp/switch.h

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index cdaf3df8085d..0cb229b9e148 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -86,9 +86,7 @@ extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
 
 extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high);
 
-extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu);
-
-extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu);
+extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
 extern u64 __vgic_v3_get_ich_vtr_el2(void);
 extern u64 __vgic_v3_read_vmcr(void);
diff --git a/arch/arm64/include/asm/kvm_host_hypercalls.h 
b/arch/arm64/include/asm/kvm_host_hypercalls.h
index af8ad505d816..cc45930fdc76 100644
--- a/arch/arm64/include/asm/kvm_host_hypercalls.h
+++ b/arch/arm64/include/asm/kvm_host_hypercalls.h
@@ -22,8 +22,8 @@ __KVM_HOST_HCALL(__kvm_tlb_flush_local_vmid)
 #define __KVM_HOST_HCALL_TABLE_IDX___kvm_flush_vm_context      4
 __KVM_HOST_HCALL(__kvm_flush_vm_context)
 
-#define __KVM_HOST_HCALL_TABLE_IDX___kvm_vcpu_run_nvhe         5
-__KVM_HOST_HCALL(__kvm_vcpu_run_nvhe)
+#define __KVM_HOST_HCALL_TABLE_IDX___kvm_vcpu_run              5
+__KVM_HOST_HCALL(__kvm_vcpu_run)
 
 #define __KVM_HOST_HCALL_TABLE_IDX___kvm_tlb_flush_vmid                6
 __KVM_HOST_HCALL(__kvm_tlb_flush_vmid)
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index fe57f60f06a8..b5895040c16a 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -82,11 +82,16 @@ void __debug_switch_to_host(struct kvm_vcpu *vcpu);
 void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
 void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
 
+#ifndef __HYPERVISOR__
 void activate_traps_vhe_load(struct kvm_vcpu *vcpu);
 void deactivate_traps_vhe_put(void);
+#endif
 
 u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
+
+#ifdef __HYPERVISOR__
 void __noreturn __hyp_do_panic(unsigned long, ...);
+#endif
 
 /*
  * Must be called from hyp code running at EL2 with an updated VTTBR
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index c0a0ec238854..e2282a4304c5 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -61,18 +61,34 @@ __efistub__ctype            = _ctype;
  * memory mappings.
  */
 
+__hyp_text___debug_switch_to_guest = __debug_switch_to_guest;
+__hyp_text___debug_switch_to_host = __debug_switch_to_host;
+__hyp_text___fpsimd_restore_state = __fpsimd_restore_state;
+__hyp_text___fpsimd_save_state = __fpsimd_save_state;
+__hyp_text___guest_enter = __guest_enter;
 __hyp_text___guest_exit = __guest_exit;
 __hyp_text___icache_flags = __icache_flags;
 __hyp_text___kvm_enable_ssbs = __kvm_enable_ssbs;
 __hyp_text___kvm_get_mdcr_el2 = __kvm_get_mdcr_el2;
 __hyp_text___kvm_handle_stub_hvc = __kvm_handle_stub_hvc;
 __hyp_text___kvm_timer_set_cntvoff = __kvm_timer_set_cntvoff;
-__hyp_text___kvm_vcpu_run_nvhe = __kvm_vcpu_run_nvhe;
+__hyp_text___sysreg32_restore_state = __sysreg32_restore_state;
+__hyp_text___sysreg32_save_state = __sysreg32_save_state;
+__hyp_text___sysreg_restore_state_nvhe = __sysreg_restore_state_nvhe;
+__hyp_text___sysreg_save_state_nvhe = __sysreg_save_state_nvhe;
+__hyp_text___timer_disable_traps = __timer_disable_traps;
+__hyp_text___timer_enable_traps = __timer_enable_traps;
+__hyp_text___vgic_v2_perform_cpuif_access = __vgic_v2_perform_cpuif_access;
+__hyp_text___vgic_v3_activate_traps = __vgic_v3_activate_traps;
+__hyp_text___vgic_v3_deactivate_traps = __vgic_v3_deactivate_traps;
 __hyp_text___vgic_v3_get_ich_vtr_el2 = __vgic_v3_get_ich_vtr_el2;
 __hyp_text___vgic_v3_init_lrs = __vgic_v3_init_lrs;
+__hyp_text___vgic_v3_perform_cpuif_access = __vgic_v3_perform_cpuif_access;
 __hyp_text___vgic_v3_read_vmcr = __vgic_v3_read_vmcr;
 __hyp_text___vgic_v3_restore_aprs = __vgic_v3_restore_aprs;
+__hyp_text___vgic_v3_restore_state = __vgic_v3_restore_state;
 __hyp_text___vgic_v3_save_aprs = __vgic_v3_save_aprs;
+__hyp_text___vgic_v3_save_state = __vgic_v3_save_state;
 __hyp_text___vgic_v3_write_vmcr = __vgic_v3_write_vmcr;
 __hyp_text_abort_guest_exit_end = abort_guest_exit_end;
 __hyp_text_abort_guest_exit_start = abort_guest_exit_start;
@@ -81,13 +97,18 @@ __hyp_text_arm64_enable_wa2_handling = 
arm64_enable_wa2_handling;
 __hyp_text_arm64_ssbd_callback_required = arm64_ssbd_callback_required;
 __hyp_text_cpu_hwcap_keys = cpu_hwcap_keys;
 __hyp_text_cpu_hwcaps = cpu_hwcaps;
-__hyp_text_hyp_panic = hyp_panic;
 __hyp_text_kimage_voffset = kimage_voffset;
 __hyp_text_kvm_host_data = kvm_host_data;
 __hyp_text_kvm_patch_vector_branch = kvm_patch_vector_branch;
+__hyp_text_kvm_skip_instr32 = kvm_skip_instr32;
 __hyp_text_kvm_update_va_mask = kvm_update_va_mask;
+__hyp_text_kvm_vgic_global_state = kvm_vgic_global_state;
 __hyp_text_panic = panic;
 __hyp_text_physvirt_offset = physvirt_offset;
+__hyp_text_sve_load_state = sve_load_state;
+__hyp_text_sve_save_state = sve_save_state;
+__hyp_text_vgic_v2_cpuif_trap = vgic_v2_cpuif_trap;
+__hyp_text_vgic_v3_cpuif_trap = vgic_v3_cpuif_trap;
 
 #endif /* CONFIG_KVM */
 
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index c958bb37b769..dea249dc82b3 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -749,11 +749,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
                trace_kvm_entry(*vcpu_pc(vcpu));
                guest_enter_irqoff();
 
-               if (has_vhe()) {
-                       ret = kvm_vcpu_run_vhe(vcpu);
-               } else {
-                       ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu);
-               }
+               ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu);
 
                vcpu->mode = OUTSIDE_GUEST_MODE;
                vcpu->stat.exits++;
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 7e5f386c5c2d..04ea0b83b728 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -221,6 +221,7 @@ el2_error:
        eret
        sb
 
+#ifdef __HYPERVISOR__
 SYM_FUNC_START(__hyp_do_panic)
        mov     lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
                      PSR_MODE_EL1h)
@@ -230,6 +231,7 @@ SYM_FUNC_START(__hyp_do_panic)
        eret
        sb
 SYM_FUNC_END(__hyp_do_panic)
+#endif
 
 SYM_CODE_START(__hyp_panic)
        get_host_ctxt x0, x1
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 0da836cb5580..82fbd0b76501 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -7,7 +7,7 @@ asflags-y := -D__HYPERVISOR__
 ccflags-y := -D__HYPERVISOR__ -fno-stack-protector -DDISABLE_BRANCH_PROFILING \
             $(DISABLE_STACKLEAK_PLUGIN)
 
-obj-y := tlb.o host_hypercall.o ../hyp-entry.o
+obj-y := switch.o tlb.o host_hypercall.o ../hyp-entry.o
 
 obj-y := $(patsubst %.o,%.hyp.o,$(obj-y))
 extra-y := $(patsubst %.hyp.o,%.hyp.tmp.o,$(obj-y))
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
new file mode 100644
index 000000000000..4294beed3dc1
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyng...@arm.com>
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <uapi/linux/psci.h>
+
+#include <kvm/arm_psci.h>
+
+#include <asm/barrier.h>
+#include <asm/cpufeature.h>
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/fpsimd.h>
+#include <asm/debug-monitors.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+
+#include "../switch.h"
+
+static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
+{
+       u64 val;
+
+       ___activate_traps(vcpu);
+       __activate_traps_common(vcpu);
+
+       val = CPTR_EL2_DEFAULT;
+       val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM;
+       if (!update_fp_enabled(vcpu)) {
+               val |= CPTR_EL2_TFP;
+               __activate_traps_fpsimd32(vcpu);
+       }
+
+       write_sysreg(val, cptr_el2);
+
+       if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+               struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
+
+               isb();
+               /*
+                * At this stage, and thanks to the above isb(), S2 is
+                * configured and enabled. We can now restore the guest's S1
+                * configuration: SCTLR, and only then TCR.
+                */
+               write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],     SYS_SCTLR);
+               isb();
+               write_sysreg_el1(ctxt->sys_regs[TCR_EL1],       SYS_TCR);
+       }
+}
+
+static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
+{
+       u64 mdcr_el2;
+
+       ___deactivate_traps(vcpu);
+
+       mdcr_el2 = read_sysreg(mdcr_el2);
+
+       if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
+               u64 val;
+
+               /*
+                * Set the TCR and SCTLR registers in the exact opposite
+                * sequence as __activate_traps (first prevent walks,
+                * then force the MMU on). A generous sprinkling of isb()
+                * ensure that things happen in this exact order.
+                */
+               val = read_sysreg_el1(SYS_TCR);
+               write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR);
+               isb();
+               val = read_sysreg_el1(SYS_SCTLR);
+               write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR);
+               isb();
+       }
+
+       __deactivate_traps_common();
+
+       mdcr_el2 &= MDCR_EL2_HPMN_MASK;
+       mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
+
+       write_sysreg(mdcr_el2, mdcr_el2);
+       write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2);
+       write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
+}
+
+static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
+{
+       write_sysreg(0, vttbr_el2);
+}
+
+/* Save VGICv3 state on non-VHE systems */
+static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
+{
+       if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
+               __vgic_v3_save_state(vcpu);
+               __vgic_v3_deactivate_traps(vcpu);
+       }
+}
+
+/* Restore VGICv3 state on non_VEH systems */
+static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
+{
+       if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
+               __vgic_v3_activate_traps(vcpu);
+               __vgic_v3_restore_state(vcpu);
+       }
+}
+
+/**
+ * Disable host events, enable guest events
+ */
+static bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt)
+{
+       struct kvm_host_data *host;
+       struct kvm_pmu_events *pmu;
+
+       host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
+       pmu = &host->pmu_events;
+
+       if (pmu->events_host)
+               write_sysreg(pmu->events_host, pmcntenclr_el0);
+
+       if (pmu->events_guest)
+               write_sysreg(pmu->events_guest, pmcntenset_el0);
+
+       return (pmu->events_host || pmu->events_guest);
+}
+
+/**
+ * Disable guest events, enable host events
+ */
+static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
+{
+       struct kvm_host_data *host;
+       struct kvm_pmu_events *pmu;
+
+       host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
+       pmu = &host->pmu_events;
+
+       if (pmu->events_guest)
+               write_sysreg(pmu->events_guest, pmcntenclr_el0);
+
+       if (pmu->events_host)
+               write_sysreg(pmu->events_host, pmcntenset_el0);
+}
+
+/* Switch to the guest for legacy non-VHE systems */
+int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu)
+{
+       struct kvm_cpu_context *host_ctxt;
+       struct kvm_cpu_context *guest_ctxt;
+       bool pmu_switch_needed;
+       u64 exit_code;
+
+       /*
+        * Having IRQs masked via PMR when entering the guest means the GIC
+        * will not signal the CPU of interrupts of lower priority, and the
+        * only way to get out will be via guest exceptions.
+        * Naturally, we want to avoid this.
+        */
+       if (system_uses_irq_prio_masking()) {
+               gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
+               pmr_sync();
+       }
+
+       vcpu = kern_hyp_va(vcpu);
+
+       host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+       host_ctxt->__hyp_running_vcpu = vcpu;
+       guest_ctxt = &vcpu->arch.ctxt;
+
+       pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
+
+       __sysreg_save_state_nvhe(host_ctxt);
+
+       /*
+        * We must restore the 32-bit state before the sysregs, thanks
+        * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
+        *
+        * Also, and in order to be able to deal with erratum #1319537 (A57)
+        * and #1319367 (A72), we must ensure that all VM-related sysreg are
+        * restored before we enable S2 translation.
+        */
+       __sysreg32_restore_state(vcpu);
+       __sysreg_restore_state_nvhe(guest_ctxt);
+
+       __activate_vm(kern_hyp_va(vcpu->kvm));
+       __activate_traps(vcpu);
+
+       __hyp_vgic_restore_state(vcpu);
+       __timer_enable_traps(vcpu);
+
+       __debug_switch_to_guest(vcpu);
+
+       __set_guest_arch_workaround_state(vcpu);
+
+       do {
+               /* Jump in the fire! */
+               exit_code = __guest_enter(vcpu, host_ctxt);
+
+               /* And we're baaack! */
+       } while (fixup_guest_exit(vcpu, &exit_code));
+
+       __set_host_arch_workaround_state(vcpu);
+
+       __sysreg_save_state_nvhe(guest_ctxt);
+       __sysreg32_save_state(vcpu);
+       __timer_disable_traps(vcpu);
+       __hyp_vgic_save_state(vcpu);
+
+       __deactivate_traps(vcpu);
+       __deactivate_vm(vcpu);
+
+       __sysreg_restore_state_nvhe(host_ctxt);
+
+       if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
+               __fpsimd_save_fpexc32(vcpu);
+
+       /*
+        * This must come after restoring the host sysregs, since a non-VHE
+        * system may enable SPE here and make use of the TTBRs.
+        */
+       __debug_switch_to_host(vcpu);
+
+       if (pmu_switch_needed)
+               __pmu_switch_to_host(host_ctxt);
+
+       /* Returning to host will clear PSR.I, remask PMR if needed */
+       if (system_uses_irq_prio_masking())
+               gic_write_pmr(GIC_PRIO_IRQOFF);
+
+       return exit_code;
+}
+
+void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
+{
+       u64 spsr = read_sysreg_el2(SYS_SPSR);
+       u64 elr = read_sysreg_el2(SYS_ELR);
+       u64 par = read_sysreg(par_el1);
+       struct kvm_vcpu *vcpu = host_ctxt->__hyp_running_vcpu;
+       unsigned long str_va;
+
+       if (read_sysreg(vttbr_el2)) {
+               __timer_disable_traps(vcpu);
+               __deactivate_traps(vcpu);
+               __deactivate_vm(vcpu);
+               __sysreg_restore_state_nvhe(host_ctxt);
+       }
+
+       /*
+        * Force the panic string to be loaded from the literal pool,
+        * making sure it is a kernel address and not a PC-relative
+        * reference.
+        */
+       asm volatile("ldr %0, =%1" : "=r" (str_va) : "S" (__hyp_panic_string));
+
+       __hyp_do_panic(str_va,
+                      spsr, elr,
+                      read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR),
+                      read_sysreg(hpfar_el2), par, vcpu);
+       unreachable();
+}
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 7a7c08029d81..1d03c5bf0b18 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -24,76 +24,14 @@
 #include <asm/processor.h>
 #include <asm/thread_info.h>
 
-/* Check whether the FP regs were dirtied while in the host-side run loop: */
-static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu)
-{
-       /*
-        * When the system doesn't support FP/SIMD, we cannot rely on
-        * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an
-        * abort on the very first access to FP and thus we should never
-        * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always
-        * trap the accesses.
-        */
-       if (!system_supports_fpsimd() ||
-           vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
-               vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
-                                     KVM_ARM64_FP_HOST);
-
-       return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED);
-}
-
-/* Save the 32-bit only FPSIMD system register state */
-static void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
-{
-       if (!vcpu_el1_is_32bit(vcpu))
-               return;
-
-       vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
-}
-
-static void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
-{
-       /*
-        * We are about to set CPTR_EL2.TFP to trap all floating point
-        * register accesses to EL2, however, the ARM ARM clearly states that
-        * traps are only taken to EL2 if the operation would not otherwise
-        * trap to EL1.  Therefore, always make sure that for 32-bit guests,
-        * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
-        * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
-        * it will cause an exception.
-        */
-       if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
-               write_sysreg(1 << 30, fpexc32_el2);
-               isb();
-       }
-}
-
-static void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu)
-{
-       /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
-       write_sysreg(1 << 15, hstr_el2);
-
-       /*
-        * Make sure we trap PMU access from EL0 to EL2. Also sanitize
-        * PMSELR_EL0 to make sure it never contains the cycle
-        * counter, which could make a PMXEVCNTR_EL0 access UNDEF at
-        * EL1 instead of being trapped to EL2.
-        */
-       write_sysreg(0, pmselr_el0);
-       write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
-       write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
-}
-
-static void __hyp_text __deactivate_traps_common(void)
-{
-       write_sysreg(0, hstr_el2);
-       write_sysreg(0, pmuserenr_el0);
-}
+#include "switch.h"
 
-static void activate_traps_vhe(struct kvm_vcpu *vcpu)
+static void __activate_traps(struct kvm_vcpu *vcpu)
 {
        u64 val;
 
+       ___activate_traps(vcpu);
+
        val = read_sysreg(cpacr_el1);
        val |= CPACR_EL1_TTA;
        val &= ~CPACR_EL1_ZEN;
@@ -121,59 +59,14 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu)
 
        write_sysreg(kvm_get_hyp_vector(), vbar_el1);
 }
-NOKPROBE_SYMBOL(activate_traps_vhe);
-
-static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
-{
-       u64 val;
-
-       __activate_traps_common(vcpu);
-
-       val = CPTR_EL2_DEFAULT;
-       val |= CPTR_EL2_TTA | CPTR_EL2_TZ | CPTR_EL2_TAM;
-       if (!update_fp_enabled(vcpu)) {
-               val |= CPTR_EL2_TFP;
-               __activate_traps_fpsimd32(vcpu);
-       }
-
-       write_sysreg(val, cptr_el2);
-
-       if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
-               struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt;
-
-               isb();
-               /*
-                * At this stage, and thanks to the above isb(), S2 is
-                * configured and enabled. We can now restore the guest's S1
-                * configuration: SCTLR, and only then TCR.
-                */
-               write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1],     SYS_SCTLR);
-               isb();
-               write_sysreg_el1(ctxt->sys_regs[TCR_EL1],       SYS_TCR);
-       }
-}
+NOKPROBE_SYMBOL(__activate_traps);
 
-static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
+static void __deactivate_traps(struct kvm_vcpu *vcpu)
 {
-       u64 hcr = vcpu->arch.hcr_el2;
-
-       if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
-               hcr |= HCR_TVM;
-
-       write_sysreg(hcr, hcr_el2);
-
-       if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
-               write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
+       extern char vectors[];  /* kernel exception vectors */
 
-       if (has_vhe())
-               activate_traps_vhe(vcpu);
-       else
-               __activate_traps_nvhe(vcpu);
-}
+       ___deactivate_traps(vcpu);
 
-static void deactivate_traps_vhe(void)
-{
-       extern char vectors[];  /* kernel exception vectors */
        write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
 
        /*
@@ -186,57 +79,7 @@ static void deactivate_traps_vhe(void)
        write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
        write_sysreg(vectors, vbar_el1);
 }
-NOKPROBE_SYMBOL(deactivate_traps_vhe);
-
-static void __hyp_text __deactivate_traps_nvhe(void)
-{
-       u64 mdcr_el2 = read_sysreg(mdcr_el2);
-
-       if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) {
-               u64 val;
-
-               /*
-                * Set the TCR and SCTLR registers in the exact opposite
-                * sequence as __activate_traps_nvhe (first prevent walks,
-                * then force the MMU on). A generous sprinkling of isb()
-                * ensure that things happen in this exact order.
-                */
-               val = read_sysreg_el1(SYS_TCR);
-               write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR);
-               isb();
-               val = read_sysreg_el1(SYS_SCTLR);
-               write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR);
-               isb();
-       }
-
-       __deactivate_traps_common();
-
-       mdcr_el2 &= MDCR_EL2_HPMN_MASK;
-       mdcr_el2 |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT;
-
-       write_sysreg(mdcr_el2, mdcr_el2);
-       write_sysreg(HCR_HOST_NVHE_FLAGS, hcr_el2);
-       write_sysreg(CPTR_EL2_DEFAULT, cptr_el2);
-}
-
-static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
-{
-       /*
-        * If we pended a virtual abort, preserve it until it gets
-        * cleared. See D1.14.3 (Virtual Interrupts) for details, but
-        * the crucial bit is "On taking a vSError interrupt,
-        * HCR_EL2.VSE is cleared to 0."
-        */
-       if (vcpu->arch.hcr_el2 & HCR_VSE) {
-               vcpu->arch.hcr_el2 &= ~HCR_VSE;
-               vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
-       }
-
-       if (has_vhe())
-               deactivate_traps_vhe();
-       else
-               __deactivate_traps_nvhe();
-}
+NOKPROBE_SYMBOL(__deactivate_traps);
 
 void activate_traps_vhe_load(struct kvm_vcpu *vcpu)
 {
@@ -256,385 +99,6 @@ void deactivate_traps_vhe_put(void)
        __deactivate_traps_common();
 }
 
-static void __hyp_text __activate_vm(struct kvm *kvm)
-{
-       __load_guest_stage2(kvm);
-}
-
-static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
-{
-       write_sysreg(0, vttbr_el2);
-}
-
-/* Save VGICv3 state on non-VHE systems */
-static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu)
-{
-       if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
-               __vgic_v3_save_state(vcpu);
-               __vgic_v3_deactivate_traps(vcpu);
-       }
-}
-
-/* Restore VGICv3 state on non_VEH systems */
-static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
-{
-       if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) {
-               __vgic_v3_activate_traps(vcpu);
-               __vgic_v3_restore_state(vcpu);
-       }
-}
-
-static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
-{
-       u64 par, tmp;
-
-       /*
-        * Resolve the IPA the hard way using the guest VA.
-        *
-        * Stage-1 translation already validated the memory access
-        * rights. As such, we can use the EL1 translation regime, and
-        * don't have to distinguish between EL0 and EL1 access.
-        *
-        * We do need to save/restore PAR_EL1 though, as we haven't
-        * saved the guest context yet, and we may return early...
-        */
-       par = read_sysreg(par_el1);
-       asm volatile("at s1e1r, %0" : : "r" (far));
-       isb();
-
-       tmp = read_sysreg(par_el1);
-       write_sysreg(par, par_el1);
-
-       if (unlikely(tmp & SYS_PAR_EL1_F))
-               return false; /* Translation failed, back to guest */
-
-       /* Convert PAR to HPFAR format */
-       *hpfar = PAR_TO_HPFAR(tmp);
-       return true;
-}
-
-static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
-{
-       u8 ec;
-       u64 esr;
-       u64 hpfar, far;
-
-       esr = vcpu->arch.fault.esr_el2;
-       ec = ESR_ELx_EC(esr);
-
-       if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
-               return true;
-
-       far = read_sysreg_el2(SYS_FAR);
-
-       /*
-        * The HPFAR can be invalid if the stage 2 fault did not
-        * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
-        * bit is clear) and one of the two following cases are true:
-        *   1. The fault was due to a permission fault
-        *   2. The processor carries errata 834220
-        *
-        * Therefore, for all non S1PTW faults where we either have a
-        * permission fault or the errata workaround is enabled, we
-        * resolve the IPA using the AT instruction.
-        */
-       if (!(esr & ESR_ELx_S1PTW) &&
-           (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
-            (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
-               if (!__translate_far_to_hpfar(far, &hpfar))
-                       return false;
-       } else {
-               hpfar = read_sysreg(hpfar_el2);
-       }
-
-       vcpu->arch.fault.far_el2 = far;
-       vcpu->arch.fault.hpfar_el2 = hpfar;
-       return true;
-}
-
-/* Check for an FPSIMD/SVE trap and handle as appropriate */
-static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
-{
-       bool vhe, sve_guest, sve_host;
-       u8 hsr_ec;
-
-       if (!system_supports_fpsimd())
-               return false;
-
-       if (system_supports_sve()) {
-               sve_guest = vcpu_has_sve(vcpu);
-               sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
-               vhe = true;
-       } else {
-               sve_guest = false;
-               sve_host = false;
-               vhe = has_vhe();
-       }
-
-       hsr_ec = kvm_vcpu_trap_get_class(vcpu);
-       if (hsr_ec != ESR_ELx_EC_FP_ASIMD &&
-           hsr_ec != ESR_ELx_EC_SVE)
-               return false;
-
-       /* Don't handle SVE traps for non-SVE vcpus here: */
-       if (!sve_guest)
-               if (hsr_ec != ESR_ELx_EC_FP_ASIMD)
-                       return false;
-
-       /* Valid trap.  Switch the context: */
-
-       if (vhe) {
-               u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN;
-
-               if (sve_guest)
-                       reg |= CPACR_EL1_ZEN;
-
-               write_sysreg(reg, cpacr_el1);
-       } else {
-               write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
-                            cptr_el2);
-       }
-
-       isb();
-
-       if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
-               /*
-                * In the SVE case, VHE is assumed: it is enforced by
-                * Kconfig and kvm_arch_init().
-                */
-               if (sve_host) {
-                       struct thread_struct *thread = container_of(
-                               vcpu->arch.host_fpsimd_state,
-                               struct thread_struct, uw.fpsimd_state);
-
-                       sve_save_state(sve_pffr(thread),
-                                      &vcpu->arch.host_fpsimd_state->fpsr);
-               } else {
-                       __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
-               }
-
-               vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
-       }
-
-       if (sve_guest) {
-               sve_load_state(vcpu_sve_pffr(vcpu),
-                              &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr,
-                              sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1);
-               write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12);
-       } else {
-               __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
-       }
-
-       /* Skip restoring fpexc32 for AArch64 guests */
-       if (!(read_sysreg(hcr_el2) & HCR_RW))
-               write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2],
-                            fpexc32_el2);
-
-       vcpu->arch.flags |= KVM_ARM64_FP_ENABLED;
-
-       return true;
-}
-
-static bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu)
-{
-       u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu));
-       int rt = kvm_vcpu_sys_get_rt(vcpu);
-       u64 val = vcpu_get_reg(vcpu, rt);
-
-       /*
-        * The normal sysreg handling code expects to see the traps,
-        * let's not do anything here.
-        */
-       if (vcpu->arch.hcr_el2 & HCR_TVM)
-               return false;
-
-       switch (sysreg) {
-       case SYS_SCTLR_EL1:
-               write_sysreg_el1(val, SYS_SCTLR);
-               break;
-       case SYS_TTBR0_EL1:
-               write_sysreg_el1(val, SYS_TTBR0);
-               break;
-       case SYS_TTBR1_EL1:
-               write_sysreg_el1(val, SYS_TTBR1);
-               break;
-       case SYS_TCR_EL1:
-               write_sysreg_el1(val, SYS_TCR);
-               break;
-       case SYS_ESR_EL1:
-               write_sysreg_el1(val, SYS_ESR);
-               break;
-       case SYS_FAR_EL1:
-               write_sysreg_el1(val, SYS_FAR);
-               break;
-       case SYS_AFSR0_EL1:
-               write_sysreg_el1(val, SYS_AFSR0);
-               break;
-       case SYS_AFSR1_EL1:
-               write_sysreg_el1(val, SYS_AFSR1);
-               break;
-       case SYS_MAIR_EL1:
-               write_sysreg_el1(val, SYS_MAIR);
-               break;
-       case SYS_AMAIR_EL1:
-               write_sysreg_el1(val, SYS_AMAIR);
-               break;
-       case SYS_CONTEXTIDR_EL1:
-               write_sysreg_el1(val, SYS_CONTEXTIDR);
-               break;
-       default:
-               return false;
-       }
-
-       __kvm_skip_instr(vcpu);
-       return true;
-}
-
-/*
- * Return true when we were able to fixup the guest exit and should return to
- * the guest, false when we should restore the host state and return to the
- * main run loop.
- */
-static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
-{
-       if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
-               vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
-
-       /*
-        * We're using the raw exception code in order to only process
-        * the trap if no SError is pending. We will come back to the
-        * same PC once the SError has been injected, and replay the
-        * trapping instruction.
-        */
-       if (*exit_code != ARM_EXCEPTION_TRAP)
-               goto exit;
-
-       if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
-           kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 &&
-           handle_tx2_tvm(vcpu))
-               return true;
-
-       /*
-        * We trap the first access to the FP/SIMD to save the host context
-        * and restore the guest context lazily.
-        * If FP/SIMD is not implemented, handle the trap and inject an
-        * undefined instruction exception to the guest.
-        * Similarly for trapped SVE accesses.
-        */
-       if (__hyp_handle_fpsimd(vcpu))
-               return true;
-
-       if (!__populate_fault_info(vcpu))
-               return true;
-
-       if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
-               bool valid;
-
-               valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
-                       kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
-                       kvm_vcpu_dabt_isvalid(vcpu) &&
-                       !kvm_vcpu_dabt_isextabt(vcpu) &&
-                       !kvm_vcpu_dabt_iss1tw(vcpu);
-
-               if (valid) {
-                       int ret = __vgic_v2_perform_cpuif_access(vcpu);
-
-                       if (ret == 1)
-                               return true;
-
-                       /* Promote an illegal access to an SError.*/
-                       if (ret == -1)
-                               *exit_code = ARM_EXCEPTION_EL1_SERROR;
-
-                       goto exit;
-               }
-       }
-
-       if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
-           (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
-            kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
-               int ret = __vgic_v3_perform_cpuif_access(vcpu);
-
-               if (ret == 1)
-                       return true;
-       }
-
-exit:
-       /* Return to the host kernel and handle the exit */
-       return false;
-}
-
-static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu)
-{
-       if (!cpus_have_final_cap(ARM64_SSBD))
-               return false;
-
-       return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG);
-}
-
-static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_ARM64_SSBD
-       /*
-        * The host runs with the workaround always present. If the
-        * guest wants it disabled, so be it...
-        */
-       if (__needs_ssbd_off(vcpu) &&
-           __hyp_this_cpu_read(arm64_ssbd_callback_required))
-               arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL);
-#endif
-}
-
-static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_ARM64_SSBD
-       /*
-        * If the guest has disabled the workaround, bring it back on.
-        */
-       if (__needs_ssbd_off(vcpu) &&
-           __hyp_this_cpu_read(arm64_ssbd_callback_required))
-               arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL);
-#endif
-}
-
-/**
- * Disable host events, enable guest events
- */
-static bool __hyp_text __pmu_switch_to_guest(struct kvm_cpu_context *host_ctxt)
-{
-       struct kvm_host_data *host;
-       struct kvm_pmu_events *pmu;
-
-       host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
-       pmu = &host->pmu_events;
-
-       if (pmu->events_host)
-               write_sysreg(pmu->events_host, pmcntenclr_el0);
-
-       if (pmu->events_guest)
-               write_sysreg(pmu->events_guest, pmcntenset_el0);
-
-       return (pmu->events_host || pmu->events_guest);
-}
-
-/**
- * Disable guest events, enable host events
- */
-static void __hyp_text __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt)
-{
-       struct kvm_host_data *host;
-       struct kvm_pmu_events *pmu;
-
-       host = container_of(host_ctxt, struct kvm_host_data, host_ctxt);
-       pmu = &host->pmu_events;
-
-       if (pmu->events_guest)
-               write_sysreg(pmu->events_guest, pmcntenclr_el0);
-
-       if (pmu->events_host)
-               write_sysreg(pmu->events_host, pmcntenset_el0);
-}
-
 /* Switch to the guest for VHE systems running in EL2 */
 static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 {
@@ -691,7 +155,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
 }
 NOKPROBE_SYMBOL(__kvm_vcpu_run_vhe);
 
-int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
+int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
 {
        int ret;
 
@@ -726,126 +190,8 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
        return ret;
 }
 
-/* Switch to the guest for legacy non-VHE systems */
-int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu)
-{
-       struct kvm_cpu_context *host_ctxt;
-       struct kvm_cpu_context *guest_ctxt;
-       bool pmu_switch_needed;
-       u64 exit_code;
-
-       /*
-        * Having IRQs masked via PMR when entering the guest means the GIC
-        * will not signal the CPU of interrupts of lower priority, and the
-        * only way to get out will be via guest exceptions.
-        * Naturally, we want to avoid this.
-        */
-       if (system_uses_irq_prio_masking()) {
-               gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
-               pmr_sync();
-       }
-
-       vcpu = kern_hyp_va(vcpu);
-
-       host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
-       host_ctxt->__hyp_running_vcpu = vcpu;
-       guest_ctxt = &vcpu->arch.ctxt;
-
-       pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
-
-       __sysreg_save_state_nvhe(host_ctxt);
-
-       /*
-        * We must restore the 32-bit state before the sysregs, thanks
-        * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72).
-        *
-        * Also, and in order to be able to deal with erratum #1319537 (A57)
-        * and #1319367 (A72), we must ensure that all VM-related sysreg are
-        * restored before we enable S2 translation.
-        */
-       __sysreg32_restore_state(vcpu);
-       __sysreg_restore_state_nvhe(guest_ctxt);
-
-       __activate_vm(kern_hyp_va(vcpu->kvm));
-       __activate_traps(vcpu);
-
-       __hyp_vgic_restore_state(vcpu);
-       __timer_enable_traps(vcpu);
-
-       __debug_switch_to_guest(vcpu);
-
-       __set_guest_arch_workaround_state(vcpu);
-
-       do {
-               /* Jump in the fire! */
-               exit_code = __guest_enter(vcpu, host_ctxt);
-
-               /* And we're baaack! */
-       } while (fixup_guest_exit(vcpu, &exit_code));
-
-       __set_host_arch_workaround_state(vcpu);
-
-       __sysreg_save_state_nvhe(guest_ctxt);
-       __sysreg32_save_state(vcpu);
-       __timer_disable_traps(vcpu);
-       __hyp_vgic_save_state(vcpu);
-
-       __deactivate_traps(vcpu);
-       __deactivate_vm(vcpu);
-
-       __sysreg_restore_state_nvhe(host_ctxt);
-
-       if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
-               __fpsimd_save_fpexc32(vcpu);
-
-       /*
-        * This must come after restoring the host sysregs, since a non-VHE
-        * system may enable SPE here and make use of the TTBRs.
-        */
-       __debug_switch_to_host(vcpu);
-
-       if (pmu_switch_needed)
-               __pmu_switch_to_host(host_ctxt);
-
-       /* Returning to host will clear PSR.I, remask PMR if needed */
-       if (system_uses_irq_prio_masking())
-               gic_write_pmr(GIC_PRIO_IRQOFF);
-
-       return exit_code;
-}
-
-static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx 
ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
-
-static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
-                                            struct kvm_cpu_context 
*__host_ctxt)
-{
-       struct kvm_vcpu *vcpu;
-       unsigned long str_va;
-
-       vcpu = __host_ctxt->__hyp_running_vcpu;
-
-       if (read_sysreg(vttbr_el2)) {
-               __timer_disable_traps(vcpu);
-               __deactivate_traps(vcpu);
-               __deactivate_vm(vcpu);
-               __sysreg_restore_state_nvhe(__host_ctxt);
-       }
-
-       /*
-        * Force the panic string to be loaded from the literal pool,
-        * making sure it is a kernel address and not a PC-relative
-        * reference.
-        */
-       asm volatile("ldr %0, =%1" : "=r" (str_va) : "S" (__hyp_panic_string));
-
-       __hyp_do_panic(str_va,
-                      spsr, elr,
-                      read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR),
-                      read_sysreg(hpfar_el2), par, vcpu);
-}
-
-static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
-                                struct kvm_cpu_context *host_ctxt)
+static void __hyp_call_panic(u64 spsr, u64 elr, u64 par,
+                            struct kvm_cpu_context *host_ctxt)
 {
        struct kvm_vcpu *vcpu;
        vcpu = host_ctxt->__hyp_running_vcpu;
@@ -858,18 +204,14 @@ static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 
par,
              read_sysreg_el2(SYS_ESR),   read_sysreg_el2(SYS_FAR),
              read_sysreg(hpfar_el2), par, vcpu);
 }
-NOKPROBE_SYMBOL(__hyp_call_panic_vhe);
+NOKPROBE_SYMBOL(__hyp_call_panic);
 
-void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
+void __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
 {
        u64 spsr = read_sysreg_el2(SYS_SPSR);
        u64 elr = read_sysreg_el2(SYS_ELR);
        u64 par = read_sysreg(par_el1);
 
-       if (!has_vhe())
-               __hyp_call_panic_nvhe(spsr, elr, par, host_ctxt);
-       else
-               __hyp_call_panic_vhe(spsr, elr, par, host_ctxt);
-
+       __hyp_call_panic(spsr, elr, par, host_ctxt);
        unreachable();
 }
diff --git a/arch/arm64/kvm/hyp/switch.h b/arch/arm64/kvm/hyp/switch.h
new file mode 100644
index 000000000000..00ecb0111e79
--- /dev/null
+++ b/arch/arm64/kvm/hyp/switch.h
@@ -0,0 +1,441 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyng...@arm.com>
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <uapi/linux/psci.h>
+
+#include <kvm/arm_psci.h>
+
+#include <asm/barrier.h>
+#include <asm/cpufeature.h>
+#include <asm/kprobes.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
+#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
+#include <asm/fpsimd.h>
+#include <asm/debug-monitors.h>
+#include <asm/processor.h>
+#include <asm/thread_info.h>
+
+static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx 
ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
+
+/* Check whether the FP regs were dirtied while in the host-side run loop: */
+static inline bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu)
+{
+       /*
+        * When the system doesn't support FP/SIMD, we cannot rely on
+        * the _TIF_FOREIGN_FPSTATE flag. However, we always inject an
+        * abort on the very first access to FP and thus we should never
+        * see KVM_ARM64_FP_ENABLED. For added safety, make sure we always
+        * trap the accesses.
+        */
+       if (!system_supports_fpsimd() ||
+           vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE)
+               vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
+                                     KVM_ARM64_FP_HOST);
+
+       return !!(vcpu->arch.flags & KVM_ARM64_FP_ENABLED);
+}
+
+/* Save the 32-bit only FPSIMD system register state */
+static inline void __hyp_text __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
+{
+       if (!vcpu_el1_is_32bit(vcpu))
+               return;
+
+       vcpu->arch.ctxt.sys_regs[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
+}
+
+static inline void __hyp_text __activate_traps_fpsimd32(struct kvm_vcpu *vcpu)
+{
+       /*
+        * We are about to set CPTR_EL2.TFP to trap all floating point
+        * register accesses to EL2, however, the ARM ARM clearly states that
+        * traps are only taken to EL2 if the operation would not otherwise
+        * trap to EL1.  Therefore, always make sure that for 32-bit guests,
+        * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
+        * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to
+        * it will cause an exception.
+        */
+       if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) {
+               write_sysreg(1 << 30, fpexc32_el2);
+               isb();
+       }
+}
+
+static inline void __hyp_text __activate_traps_common(struct kvm_vcpu *vcpu)
+{
+       /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */
+       write_sysreg(1 << 15, hstr_el2);
+
+       /*
+        * Make sure we trap PMU access from EL0 to EL2. Also sanitize
+        * PMSELR_EL0 to make sure it never contains the cycle
+        * counter, which could make a PMXEVCNTR_EL0 access UNDEF at
+        * EL1 instead of being trapped to EL2.
+        */
+       write_sysreg(0, pmselr_el0);
+       write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
+       write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
+}
+
+static inline void __hyp_text __deactivate_traps_common(void)
+{
+       write_sysreg(0, hstr_el2);
+       write_sysreg(0, pmuserenr_el0);
+}
+
+static inline void __hyp_text ___activate_traps(struct kvm_vcpu *vcpu)
+{
+       u64 hcr = vcpu->arch.hcr_el2;
+
+       if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
+               hcr |= HCR_TVM;
+
+       write_sysreg(hcr, hcr_el2);
+
+       if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
+               write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
+}
+
+static inline void __hyp_text ___deactivate_traps(struct kvm_vcpu *vcpu)
+{
+       /*
+        * If we pended a virtual abort, preserve it until it gets
+        * cleared. See D1.14.3 (Virtual Interrupts) for details, but
+        * the crucial bit is "On taking a vSError interrupt,
+        * HCR_EL2.VSE is cleared to 0."
+        */
+       if (vcpu->arch.hcr_el2 & HCR_VSE) {
+               vcpu->arch.hcr_el2 &= ~HCR_VSE;
+               vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
+       }
+}
+
+static inline void __hyp_text __activate_vm(struct kvm *kvm)
+{
+       __load_guest_stage2(kvm);
+}
+
+static inline bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
+{
+       u64 par, tmp;
+
+       /*
+        * Resolve the IPA the hard way using the guest VA.
+        *
+        * Stage-1 translation already validated the memory access
+        * rights. As such, we can use the EL1 translation regime, and
+        * don't have to distinguish between EL0 and EL1 access.
+        *
+        * We do need to save/restore PAR_EL1 though, as we haven't
+        * saved the guest context yet, and we may return early...
+        */
+       par = read_sysreg(par_el1);
+       asm volatile("at s1e1r, %0" : : "r" (far));
+       isb();
+
+       tmp = read_sysreg(par_el1);
+       write_sysreg(par, par_el1);
+
+       if (unlikely(tmp & SYS_PAR_EL1_F))
+               return false; /* Translation failed, back to guest */
+
+       /* Convert PAR to HPFAR format */
+       *hpfar = PAR_TO_HPFAR(tmp);
+       return true;
+}
+
+static inline bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
+{
+       u8 ec;
+       u64 esr;
+       u64 hpfar, far;
+
+       esr = vcpu->arch.fault.esr_el2;
+       ec = ESR_ELx_EC(esr);
+
+       if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
+               return true;
+
+       far = read_sysreg_el2(SYS_FAR);
+
+       /*
+        * The HPFAR can be invalid if the stage 2 fault did not
+        * happen during a stage 1 page table walk (the ESR_EL2.S1PTW
+        * bit is clear) and one of the two following cases are true:
+        *   1. The fault was due to a permission fault
+        *   2. The processor carries errata 834220
+        *
+        * Therefore, for all non S1PTW faults where we either have a
+        * permission fault or the errata workaround is enabled, we
+        * resolve the IPA using the AT instruction.
+        */
+       if (!(esr & ESR_ELx_S1PTW) &&
+           (cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
+            (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
+               if (!__translate_far_to_hpfar(far, &hpfar))
+                       return false;
+       } else {
+               hpfar = read_sysreg(hpfar_el2);
+       }
+
+       vcpu->arch.fault.far_el2 = far;
+       vcpu->arch.fault.hpfar_el2 = hpfar;
+       return true;
+}
+
+/* Check for an FPSIMD/SVE trap and handle as appropriate */
+static inline bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
+{
+       bool vhe, sve_guest, sve_host;
+       u8 hsr_ec;
+
+       if (!system_supports_fpsimd())
+               return false;
+
+       if (system_supports_sve()) {
+               sve_guest = vcpu_has_sve(vcpu);
+               sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
+               vhe = true;
+       } else {
+               sve_guest = false;
+               sve_host = false;
+               vhe = has_vhe();
+       }
+
+       hsr_ec = kvm_vcpu_trap_get_class(vcpu);
+       if (hsr_ec != ESR_ELx_EC_FP_ASIMD &&
+           hsr_ec != ESR_ELx_EC_SVE)
+               return false;
+
+       /* Don't handle SVE traps for non-SVE vcpus here: */
+       if (!sve_guest)
+               if (hsr_ec != ESR_ELx_EC_FP_ASIMD)
+                       return false;
+
+       /* Valid trap.  Switch the context: */
+
+       if (vhe) {
+               u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN;
+
+               if (sve_guest)
+                       reg |= CPACR_EL1_ZEN;
+
+               write_sysreg(reg, cpacr_el1);
+       } else {
+               write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
+                            cptr_el2);
+       }
+
+       isb();
+
+       if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
+               /*
+                * In the SVE case, VHE is assumed: it is enforced by
+                * Kconfig and kvm_arch_init().
+                */
+               if (sve_host) {
+                       struct thread_struct *thread = container_of(
+                               vcpu->arch.host_fpsimd_state,
+                               struct thread_struct, uw.fpsimd_state);
+
+                       sve_save_state(sve_pffr(thread),
+                                      &vcpu->arch.host_fpsimd_state->fpsr);
+               } else {
+                       __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
+               }
+
+               vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
+       }
+
+       if (sve_guest) {
+               sve_load_state(vcpu_sve_pffr(vcpu),
+                              &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr,
+                              sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1);
+               write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12);
+       } else {
+               __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
+       }
+
+       /* Skip restoring fpexc32 for AArch64 guests */
+       if (!(read_sysreg(hcr_el2) & HCR_RW))
+               write_sysreg(vcpu->arch.ctxt.sys_regs[FPEXC32_EL2],
+                            fpexc32_el2);
+
+       vcpu->arch.flags |= KVM_ARM64_FP_ENABLED;
+
+       return true;
+}
+
+static inline bool __hyp_text handle_tx2_tvm(struct kvm_vcpu *vcpu)
+{
+       u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_hsr(vcpu));
+       int rt = kvm_vcpu_sys_get_rt(vcpu);
+       u64 val = vcpu_get_reg(vcpu, rt);
+
+       /*
+        * The normal sysreg handling code expects to see the traps,
+        * let's not do anything here.
+        */
+       if (vcpu->arch.hcr_el2 & HCR_TVM)
+               return false;
+
+       switch (sysreg) {
+       case SYS_SCTLR_EL1:
+               write_sysreg_el1(val, SYS_SCTLR);
+               break;
+       case SYS_TTBR0_EL1:
+               write_sysreg_el1(val, SYS_TTBR0);
+               break;
+       case SYS_TTBR1_EL1:
+               write_sysreg_el1(val, SYS_TTBR1);
+               break;
+       case SYS_TCR_EL1:
+               write_sysreg_el1(val, SYS_TCR);
+               break;
+       case SYS_ESR_EL1:
+               write_sysreg_el1(val, SYS_ESR);
+               break;
+       case SYS_FAR_EL1:
+               write_sysreg_el1(val, SYS_FAR);
+               break;
+       case SYS_AFSR0_EL1:
+               write_sysreg_el1(val, SYS_AFSR0);
+               break;
+       case SYS_AFSR1_EL1:
+               write_sysreg_el1(val, SYS_AFSR1);
+               break;
+       case SYS_MAIR_EL1:
+               write_sysreg_el1(val, SYS_MAIR);
+               break;
+       case SYS_AMAIR_EL1:
+               write_sysreg_el1(val, SYS_AMAIR);
+               break;
+       case SYS_CONTEXTIDR_EL1:
+               write_sysreg_el1(val, SYS_CONTEXTIDR);
+               break;
+       default:
+               return false;
+       }
+
+       __kvm_skip_instr(vcpu);
+       return true;
+}
+
+/*
+ * Return true when we were able to fixup the guest exit and should return to
+ * the guest, false when we should restore the host state and return to the
+ * main run loop.
+ */
+static inline bool __hyp_text
+fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+       if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
+               vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
+
+       /*
+        * We're using the raw exception code in order to only process
+        * the trap if no SError is pending. We will come back to the
+        * same PC once the SError has been injected, and replay the
+        * trapping instruction.
+        */
+       if (*exit_code != ARM_EXCEPTION_TRAP)
+               goto exit;
+
+       if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) &&
+           kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 &&
+           handle_tx2_tvm(vcpu))
+               return true;
+
+       /*
+        * We trap the first access to the FP/SIMD to save the host context
+        * and restore the guest context lazily.
+        * If FP/SIMD is not implemented, handle the trap and inject an
+        * undefined instruction exception to the guest.
+        * Similarly for trapped SVE accesses.
+        */
+       if (__hyp_handle_fpsimd(vcpu))
+               return true;
+
+       if (!__populate_fault_info(vcpu))
+               return true;
+
+       if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
+               bool valid;
+
+               valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW &&
+                       kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
+                       kvm_vcpu_dabt_isvalid(vcpu) &&
+                       !kvm_vcpu_dabt_isextabt(vcpu) &&
+                       !kvm_vcpu_dabt_iss1tw(vcpu);
+
+               if (valid) {
+                       int ret = __vgic_v2_perform_cpuif_access(vcpu);
+
+                       if (ret == 1)
+                               return true;
+
+                       /* Promote an illegal access to an SError.*/
+                       if (ret == -1)
+                               *exit_code = ARM_EXCEPTION_EL1_SERROR;
+
+                       goto exit;
+               }
+       }
+
+       if (static_branch_unlikely(&vgic_v3_cpuif_trap) &&
+           (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 ||
+            kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) {
+               int ret = __vgic_v3_perform_cpuif_access(vcpu);
+
+               if (ret == 1)
+                       return true;
+       }
+
+exit:
+       /* Return to the host kernel and handle the exit */
+       return false;
+}
+
+static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu)
+{
+       if (!cpus_have_final_cap(ARM64_SSBD))
+               return false;
+
+       return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG);
+}
+
+static inline void __hyp_text
+__set_guest_arch_workaround_state(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ARM64_SSBD
+       /*
+        * The host runs with the workaround always present. If the
+        * guest wants it disabled, so be it...
+        */
+       if (__needs_ssbd_off(vcpu) &&
+           __hyp_this_cpu_read(arm64_ssbd_callback_required))
+               arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL);
+#endif
+}
+
+static inline void __hyp_text
+__set_host_arch_workaround_state(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ARM64_SSBD
+       /*
+        * If the guest has disabled the workaround, bring it back on.
+        */
+       if (__needs_ssbd_off(vcpu) &&
+           __hyp_this_cpu_read(arm64_ssbd_callback_required))
+               arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL);
+#endif
+}
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 75b1925763f1..7a261ace2405 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -125,7 +125,7 @@ static void __hyp_text __sysreg_restore_el1_state(struct 
kvm_cpu_context *ctxt)
                /*
                 * Must only be done for guest registers, hence the context
                 * test. We're coming from the host, so SCTLR.M is already
-                * set. Pairs with __activate_traps_nvhe().
+                * set. Pairs with nVHE's __activate_traps().
                 */
                write_sysreg_el1((ctxt->sys_regs[TCR_EL1] |
                                  TCR_EPD1_MASK | TCR_EPD0_MASK),
@@ -153,7 +153,7 @@ static void __hyp_text __sysreg_restore_el1_state(struct 
kvm_cpu_context *ctxt)
            ctxt->__hyp_running_vcpu) {
                /*
                 * Must only be done for host registers, hence the context
-                * test. Pairs with __deactivate_traps_nvhe().
+                * test. Pairs with nVHE's __deactivate_traps().
                 */
                isb();
                /*
-- 
2.26.1

_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Reply via email to