On Sun, Jun 13, 2010 at 03:29:44PM +0300, Nadav Har'El wrote:
> This patch contains code to prepare the VMCS which can be used to actually
> run the L2 guest, vmcs02. prepare_vmcs02 appropriately merges the information
> in shadow_vmcs that L1 built for L2 (vmcs12), and that in the VMCS that we
> built for L1 (vmcs01).
> 
> VMREAD/WRITE can only access one VMCS at a time (the "current" VMCS), which
> makes it difficult for us to read from vmcs01 while writing to vmcs12. This
> is why we first make a copy of vmcs01 in memory (l1_shadow_vmcs) and then
> read that memory copy while writing to vmcs12.
> 
Did you mean vmcs02 instead of vmcs12 in above paragraph?

> Signed-off-by: Nadav Har'El <n...@il.ibm.com>
> ---
> --- .before/arch/x86/kvm/vmx.c        2010-06-13 15:01:29.000000000 +0300
> +++ .after/arch/x86/kvm/vmx.c 2010-06-13 15:01:29.000000000 +0300
> @@ -849,6 +849,36 @@ static inline bool report_flexpriority(v
>       return flexpriority_enabled;
>  }
>  
> +static inline bool nested_cpu_has_vmx_tpr_shadow(struct kvm_vcpu *vcpu)
> +{
> +     return cpu_has_vmx_tpr_shadow() &&
> +             get_shadow_vmcs(vcpu)->cpu_based_vm_exec_control &
> +             CPU_BASED_TPR_SHADOW;
> +}
> +
> +static inline bool nested_cpu_has_secondary_exec_ctrls(struct kvm_vcpu *vcpu)
> +{
> +     return cpu_has_secondary_exec_ctrls() &&
> +             get_shadow_vmcs(vcpu)->cpu_based_vm_exec_control &
> +             CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
> +}
> +
> +static inline bool nested_vm_need_virtualize_apic_accesses(struct kvm_vcpu
> +                                                        *vcpu)
> +{
> +     return nested_cpu_has_secondary_exec_ctrls(vcpu) &&
> +             (get_shadow_vmcs(vcpu)->secondary_vm_exec_control &
> +             SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
> +}
> +
> +static inline bool nested_cpu_has_vmx_ept(struct kvm_vcpu *vcpu)
> +{
> +     return nested_cpu_has_secondary_exec_ctrls(vcpu) &&
> +             (get_shadow_vmcs(vcpu)->secondary_vm_exec_control &
> +             SECONDARY_EXEC_ENABLE_EPT);
> +}
> +
> +
>  static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
>  {
>       int i;
> @@ -1292,6 +1322,39 @@ static void vmx_load_host_state(struct v
>       preempt_enable();
>  }
>  
> +int load_vmcs_host_state(struct shadow_vmcs *src)
> +{
> +     vmcs_write16(HOST_ES_SELECTOR, src->host_es_selector);
> +     vmcs_write16(HOST_CS_SELECTOR, src->host_cs_selector);
> +     vmcs_write16(HOST_SS_SELECTOR, src->host_ss_selector);
> +     vmcs_write16(HOST_DS_SELECTOR, src->host_ds_selector);
> +     vmcs_write16(HOST_FS_SELECTOR, src->host_fs_selector);
> +     vmcs_write16(HOST_GS_SELECTOR, src->host_gs_selector);
> +     vmcs_write16(HOST_TR_SELECTOR, src->host_tr_selector);
> +
> +     vmcs_write64(TSC_OFFSET, src->tsc_offset);
> +
> +     if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT)
> +             vmcs_write64(HOST_IA32_PAT, src->host_ia32_pat);
> +
> +     vmcs_write32(HOST_IA32_SYSENTER_CS, src->host_ia32_sysenter_cs);
> +
> +     vmcs_writel(HOST_CR0, src->host_cr0);
> +     vmcs_writel(HOST_CR3, src->host_cr3);
> +     vmcs_writel(HOST_CR4, src->host_cr4);
> +     vmcs_writel(HOST_FS_BASE, src->host_fs_base);
> +     vmcs_writel(HOST_GS_BASE, src->host_gs_base);
> +     vmcs_writel(HOST_TR_BASE, src->host_tr_base);
> +     vmcs_writel(HOST_GDTR_BASE, src->host_gdtr_base);
> +     vmcs_writel(HOST_IDTR_BASE, src->host_idtr_base);
> +     vmcs_writel(HOST_RSP, src->host_rsp);
> +     vmcs_writel(HOST_RIP, src->host_rip);
> +     vmcs_writel(HOST_IA32_SYSENTER_ESP, src->host_ia32_sysenter_esp);
> +     vmcs_writel(HOST_IA32_SYSENTER_EIP, src->host_ia32_sysenter_eip);
> +
> +     return 0;
> +}
> +
>  /*
>   * Switches to specified vcpu, until a matching vcpu_put(), but assumes
>   * vcpu mutex is already taken.
> @@ -1922,6 +1985,71 @@ static void vmclear_local_vcpus(void)
>               __vcpu_clear(vmx);
>  }
>  
> +int load_vmcs_common(struct shadow_vmcs *src)
> +{
> +     vmcs_write16(GUEST_ES_SELECTOR, src->guest_es_selector);
> +     vmcs_write16(GUEST_CS_SELECTOR, src->guest_cs_selector);
> +     vmcs_write16(GUEST_SS_SELECTOR, src->guest_ss_selector);
> +     vmcs_write16(GUEST_DS_SELECTOR, src->guest_ds_selector);
> +     vmcs_write16(GUEST_FS_SELECTOR, src->guest_fs_selector);
> +     vmcs_write16(GUEST_GS_SELECTOR, src->guest_gs_selector);
> +     vmcs_write16(GUEST_LDTR_SELECTOR, src->guest_ldtr_selector);
> +     vmcs_write16(GUEST_TR_SELECTOR, src->guest_tr_selector);
> +
> +     vmcs_write64(GUEST_IA32_DEBUGCTL, src->guest_ia32_debugctl);
> +
> +     if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
> +             vmcs_write64(GUEST_IA32_PAT, src->guest_ia32_pat);
> +
> +     vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, src->vm_entry_intr_info_field);
> +     vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
> +                  src->vm_entry_exception_error_code);
> +     vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, src->vm_entry_instruction_len);
> +
> +     vmcs_write32(GUEST_ES_LIMIT, src->guest_es_limit);
> +     vmcs_write32(GUEST_CS_LIMIT, src->guest_cs_limit);
> +     vmcs_write32(GUEST_SS_LIMIT, src->guest_ss_limit);
> +     vmcs_write32(GUEST_DS_LIMIT, src->guest_ds_limit);
> +     vmcs_write32(GUEST_FS_LIMIT, src->guest_fs_limit);
> +     vmcs_write32(GUEST_GS_LIMIT, src->guest_gs_limit);
> +     vmcs_write32(GUEST_LDTR_LIMIT, src->guest_ldtr_limit);
> +     vmcs_write32(GUEST_TR_LIMIT, src->guest_tr_limit);
> +     vmcs_write32(GUEST_GDTR_LIMIT, src->guest_gdtr_limit);
> +     vmcs_write32(GUEST_IDTR_LIMIT, src->guest_idtr_limit);
> +     vmcs_write32(GUEST_ES_AR_BYTES, src->guest_es_ar_bytes);
> +     vmcs_write32(GUEST_CS_AR_BYTES, src->guest_cs_ar_bytes);
> +     vmcs_write32(GUEST_SS_AR_BYTES, src->guest_ss_ar_bytes);
> +     vmcs_write32(GUEST_DS_AR_BYTES, src->guest_ds_ar_bytes);
> +     vmcs_write32(GUEST_FS_AR_BYTES, src->guest_fs_ar_bytes);
> +     vmcs_write32(GUEST_GS_AR_BYTES, src->guest_gs_ar_bytes);
> +     vmcs_write32(GUEST_LDTR_AR_BYTES, src->guest_ldtr_ar_bytes);
> +     vmcs_write32(GUEST_TR_AR_BYTES, src->guest_tr_ar_bytes);
> +     vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
> +                  src->guest_interruptibility_info);
> +     vmcs_write32(GUEST_ACTIVITY_STATE, src->guest_activity_state);
> +     vmcs_write32(GUEST_SYSENTER_CS, src->guest_sysenter_cs);
> +
> +     vmcs_writel(GUEST_ES_BASE, src->guest_es_base);
> +     vmcs_writel(GUEST_CS_BASE, src->guest_cs_base);
> +     vmcs_writel(GUEST_SS_BASE, src->guest_ss_base);
> +     vmcs_writel(GUEST_DS_BASE, src->guest_ds_base);
> +     vmcs_writel(GUEST_FS_BASE, src->guest_fs_base);
> +     vmcs_writel(GUEST_GS_BASE, src->guest_gs_base);
> +     vmcs_writel(GUEST_LDTR_BASE, src->guest_ldtr_base);
> +     vmcs_writel(GUEST_TR_BASE, src->guest_tr_base);
> +     vmcs_writel(GUEST_GDTR_BASE, src->guest_gdtr_base);
> +     vmcs_writel(GUEST_IDTR_BASE, src->guest_idtr_base);
> +     vmcs_writel(GUEST_DR7, src->guest_dr7);
> +     vmcs_writel(GUEST_RSP, src->guest_rsp);
> +     vmcs_writel(GUEST_RIP, src->guest_rip);
> +     vmcs_writel(GUEST_RFLAGS, src->guest_rflags);
> +     vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
> +                 src->guest_pending_dbg_exceptions);
> +     vmcs_writel(GUEST_SYSENTER_ESP, src->guest_sysenter_esp);
> +     vmcs_writel(GUEST_SYSENTER_EIP, src->guest_sysenter_eip);
> +
> +     return 0;
> +}
>  
>  /* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot()
>   * tricks.
> @@ -5363,6 +5491,281 @@ static void vmx_set_supported_cpuid(u32 
>  {
>  }
>  
> +/* Make a copy of the current VMCS to ordinary memory. This is needed because
> + * in VMX you cannot read and write to two VMCS at the same time, so when we
> + * want to do this (in prepare_vmcs_02, which needs to read from vmcs01 while
> + * preparing vmcs02), we need to first save a copy of one VMCS's fields in
> + * memory, and then use that copy.
> + */
> +void save_vmcs(struct shadow_vmcs *dst)
> +{
> +     dst->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
> +     dst->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
> +     dst->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
> +     dst->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
> +     dst->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
> +     dst->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
> +     dst->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR);
> +     dst->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
> +     dst->host_es_selector = vmcs_read16(HOST_ES_SELECTOR);
> +     dst->host_cs_selector = vmcs_read16(HOST_CS_SELECTOR);
> +     dst->host_ss_selector = vmcs_read16(HOST_SS_SELECTOR);
> +     dst->host_ds_selector = vmcs_read16(HOST_DS_SELECTOR);
> +     dst->host_fs_selector = vmcs_read16(HOST_FS_SELECTOR);
> +     dst->host_gs_selector = vmcs_read16(HOST_GS_SELECTOR);
> +     dst->host_tr_selector = vmcs_read16(HOST_TR_SELECTOR);
> +     dst->io_bitmap_a = vmcs_read64(IO_BITMAP_A);
> +     dst->io_bitmap_b = vmcs_read64(IO_BITMAP_B);
> +     if (cpu_has_vmx_msr_bitmap())
> +             dst->msr_bitmap = vmcs_read64(MSR_BITMAP);
> +     dst->tsc_offset = vmcs_read64(TSC_OFFSET);
> +     dst->virtual_apic_page_addr = vmcs_read64(VIRTUAL_APIC_PAGE_ADDR);
> +     dst->apic_access_addr = vmcs_read64(APIC_ACCESS_ADDR);
> +     if (enable_ept)
> +             dst->ept_pointer = vmcs_read64(EPT_POINTER);
> +     dst->guest_physical_address = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
> +     dst->vmcs_link_pointer = vmcs_read64(VMCS_LINK_POINTER);
> +     dst->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
> +     if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
> +             dst->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT);
> +     if (enable_ept) {
> +             dst->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
> +             dst->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
> +             dst->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
> +             dst->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
> +     }
> +     dst->pin_based_vm_exec_control = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
> +     dst->cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
> +     dst->exception_bitmap = vmcs_read32(EXCEPTION_BITMAP);
> +     dst->page_fault_error_code_mask =
> +             vmcs_read32(PAGE_FAULT_ERROR_CODE_MASK);
> +     dst->page_fault_error_code_match =
> +             vmcs_read32(PAGE_FAULT_ERROR_CODE_MATCH);
> +     dst->cr3_target_count = vmcs_read32(CR3_TARGET_COUNT);
> +     dst->vm_exit_controls = vmcs_read32(VM_EXIT_CONTROLS);
> +     dst->vm_entry_controls = vmcs_read32(VM_ENTRY_CONTROLS);
> +     dst->vm_entry_intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
> +     dst->vm_entry_exception_error_code =
> +             vmcs_read32(VM_ENTRY_EXCEPTION_ERROR_CODE);
> +     dst->vm_entry_instruction_len = vmcs_read32(VM_ENTRY_INSTRUCTION_LEN);
> +     dst->tpr_threshold = vmcs_read32(TPR_THRESHOLD);
> +     dst->secondary_vm_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
> +     if (enable_vpid && dst->secondary_vm_exec_control &
> +         SECONDARY_EXEC_ENABLE_VPID)
> +             dst->virtual_processor_id = vmcs_read16(VIRTUAL_PROCESSOR_ID);
> +     dst->vm_instruction_error = vmcs_read32(VM_INSTRUCTION_ERROR);
> +     dst->vm_exit_reason  = vmcs_read32(VM_EXIT_REASON);
> +     dst->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
> +     dst->vm_exit_intr_error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
> +     dst->idt_vectoring_info_field = vmcs_read32(IDT_VECTORING_INFO_FIELD);
> +     dst->idt_vectoring_error_code = vmcs_read32(IDT_VECTORING_ERROR_CODE);
> +     dst->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
> +     dst->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
> +     dst->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT);
> +     dst->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT);
> +     dst->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT);
> +     dst->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT);
> +     dst->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT);
> +     dst->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT);
> +     dst->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT);
> +     dst->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT);
> +     dst->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
> +     dst->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
> +     dst->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
> +     dst->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
> +     dst->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
> +     dst->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
> +     dst->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
> +     dst->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
> +     dst->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES);
> +     dst->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES);
> +     dst->guest_interruptibility_info =
> +             vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
> +     dst->guest_activity_state = vmcs_read32(GUEST_ACTIVITY_STATE);
> +     dst->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
> +     dst->host_ia32_sysenter_cs = vmcs_read32(HOST_IA32_SYSENTER_CS);
> +     dst->cr0_guest_host_mask = vmcs_readl(CR0_GUEST_HOST_MASK);
> +     dst->cr4_guest_host_mask = vmcs_readl(CR4_GUEST_HOST_MASK);
> +     dst->cr0_read_shadow = vmcs_readl(CR0_READ_SHADOW);
> +     dst->cr4_read_shadow = vmcs_readl(CR4_READ_SHADOW);
> +     dst->cr3_target_value0 = vmcs_readl(CR3_TARGET_VALUE0);
> +     dst->cr3_target_value1 = vmcs_readl(CR3_TARGET_VALUE1);
> +     dst->cr3_target_value2 = vmcs_readl(CR3_TARGET_VALUE2);
> +     dst->cr3_target_value3 = vmcs_readl(CR3_TARGET_VALUE3);
> +     dst->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
> +     dst->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
> +     dst->guest_cr0 = vmcs_readl(GUEST_CR0);
> +     dst->guest_cr3 = vmcs_readl(GUEST_CR3);
> +     dst->guest_cr4 = vmcs_readl(GUEST_CR4);
> +     dst->guest_es_base = vmcs_readl(GUEST_ES_BASE);
> +     dst->guest_cs_base = vmcs_readl(GUEST_CS_BASE);
> +     dst->guest_ss_base = vmcs_readl(GUEST_SS_BASE);
> +     dst->guest_ds_base = vmcs_readl(GUEST_DS_BASE);
> +     dst->guest_fs_base = vmcs_readl(GUEST_FS_BASE);
> +     dst->guest_gs_base = vmcs_readl(GUEST_GS_BASE);
> +     dst->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE);
> +     dst->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
> +     dst->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
> +     dst->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
> +     dst->guest_dr7 = vmcs_readl(GUEST_DR7);
> +     dst->guest_rsp = vmcs_readl(GUEST_RSP);
> +     dst->guest_rip = vmcs_readl(GUEST_RIP);
> +     dst->guest_rflags = vmcs_readl(GUEST_RFLAGS);
> +     dst->guest_pending_dbg_exceptions =
> +             vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
> +     dst->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
> +     dst->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
> +     dst->host_cr0 = vmcs_readl(HOST_CR0);
> +     dst->host_cr3 = vmcs_readl(HOST_CR3);
> +     dst->host_cr4 = vmcs_readl(HOST_CR4);
> +     dst->host_fs_base = vmcs_readl(HOST_FS_BASE);
> +     dst->host_gs_base = vmcs_readl(HOST_GS_BASE);
> +     dst->host_tr_base = vmcs_readl(HOST_TR_BASE);
> +     dst->host_gdtr_base = vmcs_readl(HOST_GDTR_BASE);
> +     dst->host_idtr_base = vmcs_readl(HOST_IDTR_BASE);
> +     dst->host_ia32_sysenter_esp = vmcs_readl(HOST_IA32_SYSENTER_ESP);
> +     dst->host_ia32_sysenter_eip = vmcs_readl(HOST_IA32_SYSENTER_EIP);
> +     dst->host_rsp = vmcs_readl(HOST_RSP);
> +     dst->host_rip = vmcs_readl(HOST_RIP);
> +     if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT)
> +             dst->host_ia32_pat = vmcs_read64(HOST_IA32_PAT);
> +}
> +
> +/* prepare_vmcs_02 is called in when the L1 guest hypervisor runs its nested
> + * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
> + * with L0's wishes for its guest (vmsc01), so we can run the L2 guest in a
> + * way that will both be appropriate to L1's requests, and our needs.
> + */
> +int prepare_vmcs_02(struct kvm_vcpu *vcpu,
> +     struct shadow_vmcs *vmcs12, struct shadow_vmcs *vmcs01)
> +{
> +     u32 exec_control;
> +
> +     load_vmcs_common(vmcs12);
> +
> +     vmcs_write64(VMCS_LINK_POINTER, vmcs12->vmcs_link_pointer);
> +     vmcs_write64(IO_BITMAP_A, vmcs01->io_bitmap_a);
> +     vmcs_write64(IO_BITMAP_B, vmcs01->io_bitmap_b);
> +     if (cpu_has_vmx_msr_bitmap())
> +             vmcs_write64(MSR_BITMAP, vmcs01->msr_bitmap);
> +
> +     if (vmcs12->vm_entry_msr_load_count > 0 ||
> +                     vmcs12->vm_exit_msr_load_count > 0 ||
> +                     vmcs12->vm_exit_msr_store_count > 0) {
> +             printk(KERN_WARNING
> +                     "%s: VMCS MSR_{LOAD,STORE} unsupported\n", __func__);
> +     }
> +
> +     if (nested_cpu_has_vmx_tpr_shadow(vcpu)) {
> +             struct page *page =
> +                     nested_get_page(vcpu, vmcs12->virtual_apic_page_addr);
> +             if (!page)
> +                     return 1;
> +             vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, page_to_phys(page));
> +             kvm_release_page_clean(page);
> +     }
> +
> +     if (nested_vm_need_virtualize_apic_accesses(vcpu)) {
> +             struct page *page =
> +                     nested_get_page(vcpu, vmcs12->apic_access_addr);
> +             if (!page)
> +                     return 1;
> +             vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page));
> +             kvm_release_page_clean(page);
> +     }
> +
> +     vmcs_write32(PIN_BASED_VM_EXEC_CONTROL,
> +                  (vmcs01->pin_based_vm_exec_control |
> +                   vmcs12->pin_based_vm_exec_control));
> +     vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK,
> +                  (vmcs01->page_fault_error_code_mask &
> +                   vmcs12->page_fault_error_code_mask));
> +     vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH,
> +                  (vmcs01->page_fault_error_code_match &
> +                   vmcs12->page_fault_error_code_match));
> +
> +     if (cpu_has_secondary_exec_ctrls()) {
> +             u32 exec_control = vmcs01->secondary_vm_exec_control;
> +             if (nested_cpu_has_secondary_exec_ctrls(vcpu)) {
> +                     exec_control |= vmcs12->secondary_vm_exec_control;
> +                     if (!vm_need_virtualize_apic_accesses(vcpu->kvm) ||
> +                         !nested_vm_need_virtualize_apic_accesses(vcpu))
> +                             exec_control &=
> +                             ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
> +             }
> +             vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
> +     }
> +
> +     load_vmcs_host_state(vmcs01);
> +
> +     if (vm_need_tpr_shadow(vcpu->kvm) &&
> +         nested_cpu_has_vmx_tpr_shadow(vcpu))
> +             vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
> +
> +     if (enable_ept) {
> +             if (!nested_cpu_has_vmx_ept(vcpu)) {
> +                     vmcs_write64(EPT_POINTER, vmcs01->ept_pointer);
> +                     vmcs_write64(GUEST_PDPTR0, vmcs01->guest_pdptr0);
> +                     vmcs_write64(GUEST_PDPTR1, vmcs01->guest_pdptr1);
> +                     vmcs_write64(GUEST_PDPTR2, vmcs01->guest_pdptr2);
> +                     vmcs_write64(GUEST_PDPTR3, vmcs01->guest_pdptr3);
> +             }
> +     }
> +
> +     exec_control = vmcs01->cpu_based_vm_exec_control;
> +     exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
> +     exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
> +     exec_control &= ~CPU_BASED_TPR_SHADOW;
> +     exec_control |= vmcs12->cpu_based_vm_exec_control;
> +     if (!vm_need_tpr_shadow(vcpu->kvm) ||
> +         vmcs12->virtual_apic_page_addr == 0) {
> +             exec_control &= ~CPU_BASED_TPR_SHADOW;
> +#ifdef CONFIG_X86_64
> +             exec_control |= CPU_BASED_CR8_STORE_EXITING |
> +                     CPU_BASED_CR8_LOAD_EXITING;
> +#endif
> +     } else if (exec_control & CPU_BASED_TPR_SHADOW) {
> +#ifdef CONFIG_X86_64
> +             exec_control &= ~CPU_BASED_CR8_STORE_EXITING;
> +             exec_control &= ~CPU_BASED_CR8_LOAD_EXITING;
> +#endif
> +     }
> +     vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
> +
> +     /* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the
> +      * bitwise-or of what L1 wants to trap for L2, and what we want to
> +      * trap. However, vmx_fpu_activate/deactivate may have happened after
> +      * we saved vmcs01, so we shouldn't trust its TS and NM_VECTOR bits
> +      * and need to base them again on fpu_active. Note that CR0.TS also
> +      * needs updating - we do this after this function returns (in
> +      * nested_vmx_run).
> +      */
> +     vmcs_write32(EXCEPTION_BITMAP,
> +                  ((vmcs01->exception_bitmap&~(1u<<NM_VECTOR)) |
> +                   (vcpu->fpu_active ? 0 : (1u<<NM_VECTOR)) |
> +                   vmcs12->exception_bitmap));
> +     vmcs_writel(CR0_GUEST_HOST_MASK, vmcs12->cr0_guest_host_mask |
> +                     (vcpu->fpu_active ? 0 : X86_CR0_TS));
> +     vcpu->arch.cr0_guest_owned_bits = ~(vmcs12->cr0_guest_host_mask |
> +                     (vcpu->fpu_active ? 0 : X86_CR0_TS));
> +
> +     vmcs_write32(VM_EXIT_CONTROLS,
> +                  (vmcs01->vm_exit_controls &
> +                     (~(VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT)))
> +                    | vmcs12->vm_exit_controls);
> +
> +     vmcs_write32(VM_ENTRY_CONTROLS,
> +                  (vmcs01->vm_entry_controls &
> +                     (~(VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_IA32E_MODE)))
> +                   | vmcs12->vm_entry_controls);
> +
> +     vmcs_writel(CR4_GUEST_HOST_MASK,
> +                 (vmcs01->cr4_guest_host_mask  &
> +                  vmcs12->cr4_guest_host_mask));
> +
> +     return 0;
> +}
> +
>  static struct kvm_x86_ops vmx_x86_ops = {
>       .cpu_has_kvm_support = cpu_has_kvm_support,
>       .disabled_by_bios = vmx_disabled_by_bios,
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
                        Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to