On 10.07.2017 22:49, Bandan Das wrote:
> When L2 uses vmfunc, L0 utilizes the associated vmexit to
> emulate a switching of the ept pointer by reloading the
> guest MMU.
> 
> Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
> Signed-off-by: Bandan Das <b...@redhat.com>
> ---
>  arch/x86/include/asm/vmx.h |  6 +++++
>  arch/x86/kvm/vmx.c         | 58 
> +++++++++++++++++++++++++++++++++++++++++++---
>  2 files changed, 61 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index da5375e..5f63a2e 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -115,6 +115,10 @@
>  #define VMX_MISC_SAVE_EFER_LMA                       0x00000020
>  #define VMX_MISC_ACTIVITY_HLT                        0x00000040
>  
> +/* VMFUNC functions */
> +#define VMX_VMFUNC_EPTP_SWITCHING               0x00000001
> +#define VMFUNC_EPTP_ENTRIES  512
> +
>  static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
>  {
>       return vmx_basic & GENMASK_ULL(30, 0);
> @@ -200,6 +204,8 @@ enum vmcs_field {
>       EOI_EXIT_BITMAP2_HIGH           = 0x00002021,
>       EOI_EXIT_BITMAP3                = 0x00002022,
>       EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
> +     EPTP_LIST_ADDRESS               = 0x00002024,
> +     EPTP_LIST_ADDRESS_HIGH          = 0x00002025,
>       VMREAD_BITMAP                   = 0x00002026,
>       VMWRITE_BITMAP                  = 0x00002028,
>       XSS_EXIT_BITMAP                 = 0x0000202C,
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index fe8f5fc..0a969fb 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -246,6 +246,7 @@ struct __packed vmcs12 {
>       u64 eoi_exit_bitmap1;
>       u64 eoi_exit_bitmap2;
>       u64 eoi_exit_bitmap3;
> +     u64 eptp_list_address;
>       u64 xss_exit_bitmap;
>       u64 guest_physical_address;
>       u64 vmcs_link_pointer;
> @@ -771,6 +772,7 @@ static const unsigned short vmcs_field_to_offset_table[] 
> = {
>       FIELD64(EOI_EXIT_BITMAP1, eoi_exit_bitmap1),
>       FIELD64(EOI_EXIT_BITMAP2, eoi_exit_bitmap2),
>       FIELD64(EOI_EXIT_BITMAP3, eoi_exit_bitmap3),
> +     FIELD64(EPTP_LIST_ADDRESS, eptp_list_address),
>       FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
>       FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
>       FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
> @@ -1402,6 +1404,13 @@ static inline bool nested_cpu_has_vmfunc(struct vmcs12 
> *vmcs12)
>       return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_VMFUNC);
>  }
>  
> +static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
> +{
> +     return nested_cpu_has_vmfunc(vmcs12) &&
> +             (vmcs12->vm_function_control &

I wonder if it makes sense to rename vm_function_control to
- vmfunc_control
- vmfunc_controls (so it matches nested_vmx_vmfunc_controls)
- vmfunc_ctrl

> +              VMX_VMFUNC_EPTP_SWITCHING);
> +}
> +
>  static inline bool is_nmi(u32 intr_info)
>  {
>       return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
> @@ -2791,7 +2800,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx 
> *vmx)
>       if (cpu_has_vmx_vmfunc()) {
>               vmx->nested.nested_vmx_secondary_ctls_high |=
>                       SECONDARY_EXEC_ENABLE_VMFUNC;
> -             vmx->nested.nested_vmx_vmfunc_controls = 0;
> +             /*
> +              * Advertise EPTP switching unconditionally
> +              * since we emulate it
> +              */
> +             vmx->nested.nested_vmx_vmfunc_controls =
> +                     VMX_VMFUNC_EPTP_SWITCHING;>     }
>  
>       /*
> @@ -7772,6 +7786,9 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
>       struct vcpu_vmx *vmx = to_vmx(vcpu);
>       struct vmcs12 *vmcs12;
>       u32 function = vcpu->arch.regs[VCPU_REGS_RAX];
> +     u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
> +     struct page *page = NULL;
> +     u64 *l1_eptp_list, address;
>  
>       /*
>        * VMFUNC is only supported for nested guests, but we always enable the
> @@ -7784,11 +7801,46 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
>       }
>  
>       vmcs12 = get_vmcs12(vcpu);
> -     if ((vmcs12->vm_function_control & (1 << function)) == 0)
> +     if (((vmcs12->vm_function_control & (1 << function)) == 0) ||
> +         WARN_ON_ONCE(function))

"... instruction causes a VM exit if the bit at position EAX is 0 in the
VM-function controls (the selected VM function is
not enabled)."

So g2 can trigger this WARN_ON_ONCE, no? I think we should drop it then
completely.

> +             goto fail;
> +
> +     if (!nested_cpu_has_ept(vmcs12) ||
> +         !nested_cpu_has_eptp_switching(vmcs12))
> +             goto fail;
> +
> +     if (!vmcs12->eptp_list_address || index >= VMFUNC_EPTP_ENTRIES)
> +             goto fail;

I can find the definition for an vmexit in case of index >=
VMFUNC_EPTP_ENTRIES, but not for !vmcs12->eptp_list_address in the SDM.

Can you give me a hint?

> +
> +     page = nested_get_page(vcpu, vmcs12->eptp_list_address);
> +     if (!page)
>               goto fail;
> -     WARN_ONCE(1, "VMCS12 VM function control should have been zero");
> +
> +     l1_eptp_list = kmap(page);
> +     address = l1_eptp_list[index];
> +     if (!address)
> +             goto fail;

Can you move that check to the other address checks below? (or rework if
this make sense, see below)

> +     /*
> +      * If the (L2) guest does a vmfunc to the currently
> +      * active ept pointer, we don't have to do anything else
> +      */
> +     if (vmcs12->ept_pointer != address) {
> +             if (address >> cpuid_maxphyaddr(vcpu) ||
> +                 !IS_ALIGNED(address, 4096))

Couldn't the pfn still be invalid and make kvm_mmu_reload() fail?
(triggering a KVM_REQ_TRIPLE_FAULT)

> +                     goto fail;
> +             kvm_mmu_unload(vcpu);
> +             vmcs12->ept_pointer = address;
> +             kvm_mmu_reload(vcpu);

I was thinking about something like this:

kvm_mmu_unload(vcpu);
old = vmcs12->ept_pointer;
vmcs12->ept_pointer = address;
if (kvm_mmu_reload(vcpu)) {
        /* pointer invalid, restore previous state */
        kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu);
        vmcs12->ept_pointer = old;
        kvm_mmu_reload(vcpu);
        goto fail;
}

The you can inherit the checks from mmu_check_root().


Wonder why I can't spot checks for cpuid_maxphyaddr() /
IS_ALIGNED(address, 4096) for ordinary use of vmcs12->ept_pointer. The
checks should be identical.


> +             kunmap(page);
> +             nested_release_page_clean(page);

shouldn't the kunmap + nested_release_page_clean go outside the if clause?

> +     }
> +     return kvm_skip_emulated_instruction(vcpu);
>  
>  fail:
> +     if (page) {
> +             kunmap(page);
> +             nested_release_page_clean(page);
> +     }
>       nested_vmx_vmexit(vcpu, vmx->exit_reason,
>                         vmcs_read32(VM_EXIT_INTR_INFO),
>                         vmcs_readl(EXIT_QUALIFICATION));
> 

David and mmu code are not yet best friends. So sorry if I am missing
something.

-- 

Thanks,

David

Reply via email to