Register tlb_remote_flush callback for vmx when hyperv capability of nested guest mapping flush is detected. The interface can help to reduce overhead when flush ept table among vcpus for nested VM. The tradition way is to send IPIs to all affected vcpus and executes INVEPT on each vcpus. It will trigger several vmexits for IPI and INVEPT emulation. Hyper-V provides such hypercall to do flush for all vcpus and call the hypercall when all ept table pointers of single VM are same.
Signed-off-by: Lan Tianyu <tianyu....@microsoft.com> --- Change since v2: Make ept_pointers_match as tristate "check", "match" and "mismatch". Set "check" in vmx_set_cr3(), check all ept table pointers in hv_remote_flush_tlb() and call hypercall when all ept pointers are same. Change since v1: Replace identical_ept_pointer with ept_pointers_match and check kvm_x86_ops->tlb_remote_flush in check_ept_pointer(). --- arch/x86/kvm/vmx.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1689f433f3a0..601ee37937a9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -188,12 +188,21 @@ module_param(ple_window_max, uint, 0444); extern const ulong vmx_return; +enum ept_pointers_status { + EPT_POINTERS_CHECK = 0, + EPT_POINTERS_MATCH = 1, + EPT_POINTERS_MISMATCH = 2 +}; + struct kvm_vmx { struct kvm kvm; unsigned int tss_addr; bool ept_identity_pagetable_done; gpa_t ept_identity_map_addr; + + enum ept_pointers_status ept_pointers_match; + spinlock_t ept_pointer_lock; }; #define NR_AUTOLOAD_MSRS 8 @@ -853,6 +862,7 @@ struct vcpu_vmx { */ u64 msr_ia32_feature_control; u64 msr_ia32_feature_control_valid_bits; + u64 ept_pointer; }; enum segment_cache_field { @@ -4774,6 +4784,48 @@ static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid, } } +/* check_ept_pointer() should be under protection of ept_pointer_lock. */ +static void check_ept_pointer(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + u64 tmp_eptp = INVALID_PAGE; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!VALID_PAGE(tmp_eptp)) { + tmp_eptp = to_vmx(vcpu)->ept_pointer; + } else if (tmp_eptp != to_vmx(vcpu)->ept_pointer) { + to_kvm_vmx(kvm)->ept_pointers_match + = EPT_POINTERS_MISMATCH; + return; + } + } + + to_kvm_vmx(kvm)->ept_pointers_match = EPT_POINTERS_MATCH; +} + +static int hv_remote_flush_tlb(struct kvm *kvm) +{ + int ret; + + spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock); + + if (to_kvm_vmx(kvm)->ept_pointers_match == EPT_POINTERS_CHECK) + check_ept_pointer(kvm); + + if (to_kvm_vmx(kvm)->ept_pointers_match != EPT_POINTERS_MATCH) { + ret = -EFAULT; + goto out; + } + + ret = hyperv_flush_guest_mapping( + to_vmx(kvm_get_vcpu(kvm, 0))->ept_pointer); + +out: + spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); + return ret; +} + static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) { __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa); @@ -4960,6 +5012,7 @@ static u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { + struct kvm *kvm = vcpu->kvm; unsigned long guest_cr3; u64 eptp; @@ -4967,11 +5020,20 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (enable_ept) { eptp = construct_eptp(vcpu, cr3); vmcs_write64(EPT_POINTER, eptp); + + if (kvm_x86_ops->tlb_remote_flush) { + spin_lock(&to_kvm_vmx(kvm)->ept_pointer_lock); + to_vmx(vcpu)->ept_pointer = eptp; + to_kvm_vmx(kvm)->ept_pointers_match + = EPT_POINTERS_CHECK; + spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); + } + if (enable_unrestricted_guest || is_paging(vcpu) || is_guest_mode(vcpu)) guest_cr3 = kvm_read_cr3(vcpu); else - guest_cr3 = to_kvm_vmx(vcpu->kvm)->ept_identity_map_addr; + guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr; ept_load_pdptrs(vcpu); } @@ -7538,6 +7600,12 @@ static __init int hardware_setup(void) if (enable_ept && !cpu_has_vmx_ept_2m_page()) kvm_disable_largepages(); +#if IS_ENABLED(CONFIG_HYPERV) + if (ms_hyperv.nested_features & HV_X64_NESTED_GUEST_MAPPING_FLUSH + && enable_ept) + kvm_x86_ops->tlb_remote_flush = hv_remote_flush_tlb; +#endif + if (!cpu_has_vmx_ple()) { ple_gap = 0; ple_window = 0; @@ -10383,6 +10451,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) static int vmx_vm_init(struct kvm *kvm) { + spin_lock_init(&to_kvm_vmx(kvm)->ept_pointer_lock); + if (!ple_gap) kvm->arch.pause_in_guest = true; return 0; -- 2.14.3