Re: [PATCH 1/5] Nested VMX patch 1 implements vmon and vmoff
Avi Kivity a...@redhat.com wrote on 20/10/2009 06:00:50: From: Avi Kivity a...@redhat.com To: Orit Wasserman/Haifa/i...@ibmil Cc: kvm@vger.kernel.org, Ben-Ami Yassour1/Haifa/i...@ibmil, Abel Gordon/ Haifa/i...@ibmil, Muli Ben-Yehuda/Haifa/i...@ibmil, aligu...@us.ibm.com, md...@us.ibm.com Date: 20/10/2009 06:02 Subject: Re: [PATCH 1/5] Nested VMX patch 1 implements vmon and vmoff On 10/15/2009 11:41 PM, or...@il.ibm.com wrote: /* + * Handles msr read for nested virtualization + */ +static int nested_vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, + u64 *pdata) +{ + u64 vmx_msr = 0; + + switch (msr_index) { + case MSR_IA32_FEATURE_CONTROL: + *pdata = 0; + break; + case MSR_IA32_VMX_BASIC: + *pdata = 0; + rdmsrl(MSR_IA32_VMX_BASIC, vmx_msr); + *pdata = (vmx_msr 0x00cf); + break; + This (and the rest of the msrs) must be controllable from userspace. Otherwise a live migration from a newer host to an older host would break. OK. /* + * Writes msr value for nested virtualization + * Returns 0 on success, non-0 otherwise. + */ +static int nested_vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) +{ + switch (msr_index) { + case MSR_IA32_FEATURE_CONTROL: + if ((data (FEATURE_CONTROL_LOCKED | + FEATURE_CONTROL_VMXON_ENABLED)) + != (FEATURE_CONTROL_LOCKED | + FEATURE_CONTROL_VMXON_ENABLED)) + return 1; + break; + default: + return 1; + } + + return 0; +} + Need to export this msr to userspace for live migration. See msrs_to_save[]. OK. +/* + * Check to see if vcpu can execute vmx command + * Inject the corrseponding exception + */ +static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) +{ + struct kvm_segment cs; + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_msr_entry *msr; + + vmx_get_segment(vcpu,cs, VCPU_SREG_CS); + + if (!vmx-nested.vmxon) { + printk(KERN_DEBUG %s: vmx not on\n, __func__); pr_debug I will change it. + kvm_queue_exception(vcpu, UD_VECTOR); + return 0; + } + + msr = find_msr_entry(vmx, MSR_EFER); + + if ((vmx_get_rflags(vcpu) X86_EFLAGS_VM) || + ((msr-data EFER_LMA) !cs.l)) { is_long_mode() I will change it. static int handle_vmx_insn(struct kvm_vcpu *vcpu) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; } +static int handle_vmoff(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!nested_vmx_check_permission(vcpu)) + return 1; + + vmx-nested.vmxon = 0; + + skip_emulated_instruction(vcpu); + return 1; +} + +static int handle_vmon(struct kvm_vcpu *vcpu) +{ + struct kvm_segment cs; + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!nested) { + printk(KERN_DEBUG %s: nested vmx not enabled\n, __func__); + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + vmx_get_segment(vcpu,cs, VCPU_SREG_CS); + + if (!(vcpu-arch.cr4 X86_CR4_VMXE) || + !(vcpu-arch.cr0 X86_CR0_PE) || + (vmx_get_rflags(vcpu) X86_EFLAGS_VM)) { + kvm_queue_exception(vcpu, UD_VECTOR); + printk(KERN_INFO %s invalid register state\n, __func__); + return 1; + } +#ifdef CONFIG_X86_64 + if (((find_msr_entry(to_vmx(vcpu), + MSR_EFER)-data EFER_LMA) !cs.l)) { is_long_mode(), and you can avoid the #ifdef. I will change it. VMXON is supposed to block INIT, please add that (in a separate patch). I will add it. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/5] Nested VMX patch 1 implements vmon and vmoff
On 10/15/2009 11:41 PM, or...@il.ibm.com wrote: /* + * Handles msr read for nested virtualization + */ +static int nested_vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, + u64 *pdata) +{ + u64 vmx_msr = 0; + + switch (msr_index) { + case MSR_IA32_FEATURE_CONTROL: + *pdata = 0; + break; + case MSR_IA32_VMX_BASIC: + *pdata = 0; + rdmsrl(MSR_IA32_VMX_BASIC, vmx_msr); + *pdata = (vmx_msr 0x00cf); + break; + This (and the rest of the msrs) must be controllable from userspace. Otherwise a live migration from a newer host to an older host would break. /* + * Writes msr value for nested virtualization + * Returns 0 on success, non-0 otherwise. + */ +static int nested_vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) +{ + switch (msr_index) { + case MSR_IA32_FEATURE_CONTROL: + if ((data (FEATURE_CONTROL_LOCKED | +FEATURE_CONTROL_VMXON_ENABLED)) + != (FEATURE_CONTROL_LOCKED | + FEATURE_CONTROL_VMXON_ENABLED)) + return 1; + break; + default: + return 1; + } + + return 0; +} + Need to export this msr to userspace for live migration. See msrs_to_save[]. +/* + * Check to see if vcpu can execute vmx command + * Inject the corrseponding exception + */ +static int nested_vmx_check_permission(struct kvm_vcpu *vcpu) +{ + struct kvm_segment cs; + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct kvm_msr_entry *msr; + + vmx_get_segment(vcpu,cs, VCPU_SREG_CS); + + if (!vmx-nested.vmxon) { + printk(KERN_DEBUG %s: vmx not on\n, __func__); pr_debug + kvm_queue_exception(vcpu, UD_VECTOR); + return 0; + } + + msr = find_msr_entry(vmx, MSR_EFER); + + if ((vmx_get_rflags(vcpu) X86_EFLAGS_VM) || +((msr-data EFER_LMA) !cs.l)) { is_long_mode() static int handle_vmx_insn(struct kvm_vcpu *vcpu) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; } +static int handle_vmoff(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!nested_vmx_check_permission(vcpu)) + return 1; + + vmx-nested.vmxon = 0; + + skip_emulated_instruction(vcpu); + return 1; +} + +static int handle_vmon(struct kvm_vcpu *vcpu) +{ + struct kvm_segment cs; + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!nested) { + printk(KERN_DEBUG %s: nested vmx not enabled\n, __func__); + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + vmx_get_segment(vcpu,cs, VCPU_SREG_CS); + + if (!(vcpu-arch.cr4 X86_CR4_VMXE) || + !(vcpu-arch.cr0 X86_CR0_PE) || + (vmx_get_rflags(vcpu) X86_EFLAGS_VM)) { + kvm_queue_exception(vcpu, UD_VECTOR); + printk(KERN_INFO %s invalid register state\n, __func__); + return 1; + } +#ifdef CONFIG_X86_64 + if (((find_msr_entry(to_vmx(vcpu), +MSR_EFER)-data EFER_LMA) !cs.l)) { is_long_mode(), and you can avoid the #ifdef. VMXON is supposed to block INIT, please add that (in a separate patch). -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/5] Nested VMX patch 1 implements vmon and vmoff
From: Orit Wasserman or...@il.ibm.com --- arch/x86/kvm/svm.c |3 - arch/x86/kvm/vmx.c | 217 +++- arch/x86/kvm/x86.c |6 +- arch/x86/kvm/x86.h |2 + 4 files changed, 222 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2df9b45..3c1f22a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -124,9 +124,6 @@ static int npt = 1; module_param(npt, int, S_IRUGO); -static int nested = 1; -module_param(nested, int, S_IRUGO); - static void svm_flush_tlb(struct kvm_vcpu *vcpu); static void svm_complete_interrupts(struct vcpu_svm *svm); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 78101dd..71bd91a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -67,6 +67,11 @@ struct vmcs { char data[0]; }; +struct nested_vmx { + /* Has the level1 guest done vmxon? */ + bool vmxon; +}; + struct vcpu_vmx { struct kvm_vcpu vcpu; struct list_head local_vcpus_link; @@ -114,6 +119,9 @@ struct vcpu_vmx { ktime_t entry_time; s64 vnmi_blocked_time; u32 exit_reason; + + /* Nested vmx */ + struct nested_vmx nested; }; static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) @@ -967,6 +975,95 @@ static void guest_write_tsc(u64 guest_tsc, u64 host_tsc) } /* + * Handles msr read for nested virtualization + */ +static int nested_vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, + u64 *pdata) +{ + u64 vmx_msr = 0; + + switch (msr_index) { + case MSR_IA32_FEATURE_CONTROL: + *pdata = 0; + break; + case MSR_IA32_VMX_BASIC: + *pdata = 0; + rdmsrl(MSR_IA32_VMX_BASIC, vmx_msr); + *pdata = (vmx_msr 0x00cf); + break; + case MSR_IA32_VMX_PINBASED_CTLS: + rdmsrl(MSR_IA32_VMX_PINBASED_CTLS, vmx_msr); + *pdata = (PIN_BASED_EXT_INTR_MASK vmcs_config.pin_based_exec_ctrl) | + (PIN_BASED_NMI_EXITING vmcs_config.pin_based_exec_ctrl) | + (PIN_BASED_VIRTUAL_NMIS vmcs_config.pin_based_exec_ctrl); + break; + case MSR_IA32_VMX_PROCBASED_CTLS: + { + u32 vmx_msr_high, vmx_msr_low; + u32 control = CPU_BASED_HLT_EXITING | +#ifdef CONFIG_X86_64 + CPU_BASED_CR8_LOAD_EXITING | + CPU_BASED_CR8_STORE_EXITING | +#endif + CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING | + CPU_BASED_USE_IO_BITMAPS | + CPU_BASED_MOV_DR_EXITING | + CPU_BASED_USE_TSC_OFFSETING | + CPU_BASED_INVLPG_EXITING | + CPU_BASED_TPR_SHADOW | + CPU_BASED_USE_MSR_BITMAPS | + CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); + + control = vmx_msr_high; /* bit == 0 in high word == must be zero */ + control |= vmx_msr_low; /* bit == 1 in low word == must be one */ + + *pdata = (CPU_BASED_HLT_EXITING control) | +#ifdef CONFIG_X86_64 + (CPU_BASED_CR8_LOAD_EXITING control) | + (CPU_BASED_CR8_STORE_EXITING control) | +#endif + (CPU_BASED_CR3_LOAD_EXITING control) | + (CPU_BASED_CR3_STORE_EXITING control) | + (CPU_BASED_USE_IO_BITMAPS control) | + (CPU_BASED_MOV_DR_EXITING control) | + (CPU_BASED_USE_TSC_OFFSETING control) | + (CPU_BASED_INVLPG_EXITING control) ; + + if (cpu_has_secondary_exec_ctrls()) + *pdata |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + + if (vm_need_tpr_shadow(vcpu-kvm)) + *pdata |= CPU_BASED_TPR_SHADOW; + break; + } + case MSR_IA32_VMX_EXIT_CTLS: + *pdata = 0; +#ifdef CONFIG_X86_64 + *pdata |= VM_EXIT_HOST_ADDR_SPACE_SIZE; +#endif + break; + case MSR_IA32_VMX_ENTRY_CTLS: + *pdata = 0; + break; + case MSR_IA32_VMX_PROCBASED_CTLS2: + *pdata = 0; + if (vm_need_virtualize_apic_accesses(vcpu-kvm)) + *pdata |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + break; + case MSR_IA32_VMX_EPT_VPID_CAP: + *pdata = 0; + break; + default: + return 1; + } + + return 0; +} + +/* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. @@ -1005,6 +1102,9 @@ static int
[PATCH 1/5] Nested VMX patch 1 implements vmon and vmoff
From: Orit Wasserman or...@il.ibm.com --- arch/x86/kvm/svm.c |3 - arch/x86/kvm/vmx.c | 217 +++- arch/x86/kvm/x86.c |6 +- arch/x86/kvm/x86.h |2 + 4 files changed, 222 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2df9b45..3c1f22a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -124,9 +124,6 @@ static int npt = 1; module_param(npt, int, S_IRUGO); -static int nested = 1; -module_param(nested, int, S_IRUGO); - static void svm_flush_tlb(struct kvm_vcpu *vcpu); static void svm_complete_interrupts(struct vcpu_svm *svm); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 78101dd..71bd91a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -67,6 +67,11 @@ struct vmcs { char data[0]; }; +struct nested_vmx { + /* Has the level1 guest done vmxon? */ + bool vmxon; +}; + struct vcpu_vmx { struct kvm_vcpu vcpu; struct list_head local_vcpus_link; @@ -114,6 +119,9 @@ struct vcpu_vmx { ktime_t entry_time; s64 vnmi_blocked_time; u32 exit_reason; + + /* Nested vmx */ + struct nested_vmx nested; }; static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) @@ -967,6 +975,95 @@ static void guest_write_tsc(u64 guest_tsc, u64 host_tsc) } /* + * Handles msr read for nested virtualization + */ +static int nested_vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, + u64 *pdata) +{ + u64 vmx_msr = 0; + + switch (msr_index) { + case MSR_IA32_FEATURE_CONTROL: + *pdata = 0; + break; + case MSR_IA32_VMX_BASIC: + *pdata = 0; + rdmsrl(MSR_IA32_VMX_BASIC, vmx_msr); + *pdata = (vmx_msr 0x00cf); + break; + case MSR_IA32_VMX_PINBASED_CTLS: + rdmsrl(MSR_IA32_VMX_PINBASED_CTLS, vmx_msr); + *pdata = (PIN_BASED_EXT_INTR_MASK vmcs_config.pin_based_exec_ctrl) | + (PIN_BASED_NMI_EXITING vmcs_config.pin_based_exec_ctrl) | + (PIN_BASED_VIRTUAL_NMIS vmcs_config.pin_based_exec_ctrl); + break; + case MSR_IA32_VMX_PROCBASED_CTLS: + { + u32 vmx_msr_high, vmx_msr_low; + u32 control = CPU_BASED_HLT_EXITING | +#ifdef CONFIG_X86_64 + CPU_BASED_CR8_LOAD_EXITING | + CPU_BASED_CR8_STORE_EXITING | +#endif + CPU_BASED_CR3_LOAD_EXITING | + CPU_BASED_CR3_STORE_EXITING | + CPU_BASED_USE_IO_BITMAPS | + CPU_BASED_MOV_DR_EXITING | + CPU_BASED_USE_TSC_OFFSETING | + CPU_BASED_INVLPG_EXITING | + CPU_BASED_TPR_SHADOW | + CPU_BASED_USE_MSR_BITMAPS | + CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); + + control = vmx_msr_high; /* bit == 0 in high word == must be zero */ + control |= vmx_msr_low; /* bit == 1 in low word == must be one */ + + *pdata = (CPU_BASED_HLT_EXITING control) | +#ifdef CONFIG_X86_64 + (CPU_BASED_CR8_LOAD_EXITING control) | + (CPU_BASED_CR8_STORE_EXITING control) | +#endif + (CPU_BASED_CR3_LOAD_EXITING control) | + (CPU_BASED_CR3_STORE_EXITING control) | + (CPU_BASED_USE_IO_BITMAPS control) | + (CPU_BASED_MOV_DR_EXITING control) | + (CPU_BASED_USE_TSC_OFFSETING control) | + (CPU_BASED_INVLPG_EXITING control) ; + + if (cpu_has_secondary_exec_ctrls()) + *pdata |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + + if (vm_need_tpr_shadow(vcpu-kvm)) + *pdata |= CPU_BASED_TPR_SHADOW; + break; + } + case MSR_IA32_VMX_EXIT_CTLS: + *pdata = 0; +#ifdef CONFIG_X86_64 + *pdata |= VM_EXIT_HOST_ADDR_SPACE_SIZE; +#endif + break; + case MSR_IA32_VMX_ENTRY_CTLS: + *pdata = 0; + break; + case MSR_IA32_VMX_PROCBASED_CTLS2: + *pdata = 0; + if (vm_need_virtualize_apic_accesses(vcpu-kvm)) + *pdata |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + break; + case MSR_IA32_VMX_EPT_VPID_CAP: + *pdata = 0; + break; + default: + return 1; + } + + return 0; +} + +/* * Reads an msr value (of 'msr_index') into 'pdata'. * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. @@ -1005,6 +1102,9 @@ static int