Hi David,

David Hildenbrand <da...@redhat.com> writes:

>> +static inline bool nested_cpu_has_eptp_switching(struct vmcs12 *vmcs12)
>> +{
>> +    return nested_cpu_has_vmfunc(vmcs12) &&
>> +            (vmcs12->vm_function_control &
>> +             VMX_VMFUNC_EPTP_SWITCHING);
>> +}
>> +
>>  static inline bool is_nmi(u32 intr_info)
>>  {
>>      return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
>> @@ -2791,7 +2800,12 @@ static void nested_vmx_setup_ctls_msrs(struct 
>> vcpu_vmx *vmx)
>>      if (cpu_has_vmx_vmfunc()) {
>>              vmx->nested.nested_vmx_secondary_ctls_high |=
>>                      SECONDARY_EXEC_ENABLE_VMFUNC;
>> -            vmx->nested.nested_vmx_vmfunc_controls = 0;
>> +            /*
>> +             * Advertise EPTP switching unconditionally
>> +             * since we emulate it
>> +             */
>> +            vmx->nested.nested_vmx_vmfunc_controls =
>> +                    VMX_VMFUNC_EPTP_SWITCHING;
>
> Should this only be advertised, if enable_ept is set (if the guest also
> sees/can use SECONDARY_EXEC_ENABLE_EPT)?

This represents the function control MSR, which on the hardware is
a RO value. The checks for enable_ept and such are somewhere else.

>>      }
>>  
>>      /*
>> @@ -7767,6 +7781,85 @@ static int handle_preemption_timer(struct kvm_vcpu 
>> *vcpu)
>>      return 1;
>>  }
>>  
>> +static bool check_ept_address_valid(struct kvm_vcpu *vcpu, u64 address)
>
> check_..._valid -> valid_ept_address() ?

I think either of the names is fine and I would prefer not
to respin unless you feel really strongly about it :) 

>
>> +{
>> +    struct vcpu_vmx *vmx = to_vmx(vcpu);
>> +    u64 mask = VMX_EPT_RWX_MASK;
>> +    int maxphyaddr = cpuid_maxphyaddr(vcpu);
>> +    struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
>> +
>> +    /* Check for execute_only validity */
>> +    if ((address & mask) == VMX_EPT_EXECUTABLE_MASK) {
>> +            if (!(vmx->nested.nested_vmx_ept_caps &
>> +                  VMX_EPT_EXECUTE_ONLY_BIT))
>> +                    return false;
>> +    }
>> +
>> +    /* Bits 5:3 must be 3 */
>> +    if (((address >> VMX_EPT_GAW_EPTP_SHIFT) & 0x7) != VMX_EPT_DEFAULT_GAW)
>> +            return false;
>> +
>> +    /* Reserved bits should not be set */
>> +    if (address >> maxphyaddr || ((address >> 7) & 0x1f))
>> +            return false;
>> +
>> +    /* AD, if set, should be supported */
>> +    if ((address & VMX_EPT_AD_ENABLE_BIT)) {
>> +            if (!enable_ept_ad_bits)
>> +                    return false;
>> +            mmu->ept_ad = true;
>> +    } else
>> +            mmu->ept_ad = false;
>
> I wouldn't expect a "check" function to modify the mmu. Can you move
> modifying the mmu outside of this function (leaving the
> enable_ept_ad_bits check in place)? (and maybe even set mmu->ept_ad
> _after_ the kvm_mmu_unload(vcpu)?, just when setting vmcs12->ept_pointer?)
>

Well, the correct thing to do is have a wrapper around it in mmu.c
without directly calling here and also call this function before
nested_mmu is initialized. I am working on a separate patch for this btw.
It seems to me setting mmu->ept_ad after kvm_mmu_unload is unnecessary
since it's already being set only if everything else succeeds.
kvm_mmu_unload() isn't affected by the setting of this flag if I understand
correctly.

>> +
>> +    return true;
>> +}
>> +
>> +static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
>> +                                 struct vmcs12 *vmcs12)
>> +{
>> +    u32 index = vcpu->arch.regs[VCPU_REGS_RCX];
>> +    u64 *l1_eptp_list, address;
>> +    struct page *page;
>> +
>> +    if (!nested_cpu_has_eptp_switching(vmcs12) ||
>> +        !nested_cpu_has_ept(vmcs12))
>> +            return 1;
>> +
>> +    if (index >= VMFUNC_EPTP_ENTRIES)
>> +            return 1;
>> +
>> +    page = nested_get_page(vcpu, vmcs12->eptp_list_address);
>> +    if (!page)
>> +            return 1;
>> +
>> +    l1_eptp_list = kmap(page);
>> +    address = l1_eptp_list[index];
>> +
>> +    /*
>> +     * If the (L2) guest does a vmfunc to the currently
>> +     * active ept pointer, we don't have to do anything else
>> +     */
>> +    if (vmcs12->ept_pointer != address) {
>> +            if (!check_ept_address_valid(vcpu, address)) {
>> +                    kunmap(page);
>> +                    nested_release_page_clean(page);
>> +                    return 1;
>> +            }
>> +            kvm_mmu_unload(vcpu);
>> +            vmcs12->ept_pointer = address;
>> +            /*
>> +             * TODO: Check what's the correct approach in case
>> +             * mmu reload fails. Currently, we just let the next
>> +             * reload potentially fail
>> +             */
>> +            kvm_mmu_reload(vcpu);
>
> So, what actually happens if this generates a tripple fault? I guess we
> will kill the (nested) hypervisor?

Yes. Not sure what's the right thing to do is though...

Bandan

>> +    }
>> +
>> +    kunmap(page);
>> +    nested_release_page_clean(page);
>> +    return 0;
>> +}
>> +
>>  static int handle_vmfunc(struct kvm_vcpu *vcpu)
>>  {
>>      struct vcpu_vmx *vmx = to_vmx(vcpu);
>> @@ -7786,7 +7879,16 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
>>      vmcs12 = get_vmcs12(vcpu);
>>      if ((vmcs12->vm_function_control & (1 << function)) == 0)
>>              goto fail;
>> -    WARN_ONCE(1, "VMCS12 VM function control should have been zero");
>> +
>> +    switch (function) {
>> +    case 0:
>> +            if (nested_vmx_eptp_switching(vcpu, vmcs12))
>> +                    goto fail;
>> +            break;
>> +    default:
>> +            goto fail;
>> +    }
>> +    return kvm_skip_emulated_instruction(vcpu);
>>  
>>  fail:
>>      nested_vmx_vmexit(vcpu, vmx->exit_reason,
>> @@ -10354,10 +10456,20 @@ static int check_vmentry_prereqs(struct kvm_vcpu 
>> *vcpu, struct vmcs12 *vmcs12)
>>                              vmx->nested.nested_vmx_entry_ctls_high))
>>              return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
>>  
>> -    if (nested_cpu_has_vmfunc(vmcs12) &&
>> -        (vmcs12->vm_function_control &
>> -         ~vmx->nested.nested_vmx_vmfunc_controls))
>> -            return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
>> +    if (nested_cpu_has_vmfunc(vmcs12)) {
>> +            if (vmcs12->vm_function_control &
>> +                ~vmx->nested.nested_vmx_vmfunc_controls)
>> +                    return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
>> +
>> +            if (nested_cpu_has_eptp_switching(vmcs12)) {
>> +                    if (!nested_cpu_has_ept(vmcs12) ||
>> +                        (vmcs12->eptp_list_address >>
>> +                         cpuid_maxphyaddr(vcpu)) ||
>> +                        !IS_ALIGNED(vmcs12->eptp_list_address, 4096))
>> +                            return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
>> +            }
>> +    }
>> +
>>  
>>      if (vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu))
>>              return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
>> 

Reply via email to