On 15/06/19 03:14, Liran Alon wrote: >> @@ -1368,6 +1369,13 @@ int kvm_arch_init_vcpu(CPUState *cs) >> if (has_xsave) { >> env->xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave)); >> } >> + >> + nested_state_len = kvm_max_nested_state_length(); >> + if (nested_state_len > 0) { >> + assert(nested_state_len >= offsetof(struct kvm_nested_state, data)); >> + env->nested_state = g_malloc0(nested_state_len); > > Paolo, why have you removed setting “env->nested_state->size = > max_nested_state_len;”?
Because I confused the "nested_state_len == 0" check in kvm_put_nested_state with "env->nested_state->size == 0". > In addition, in my next patch-series I also added the following code here > which is required: > > + if (IS_INTEL_CPU(env)) { > + struct kvm_vmx_nested_state_hdr *vmx_hdr = > + &env->nested_state->hdr.vmx_hdr; > + > + vmx_hdr->vmxon_pa = -1ull; > + vmx_hdr->vmcs12_pa = -1ull; > + } Looks good. >> + } >> + >> cpu->kvm_msr_buf = g_malloc0(MSR_BUF_SIZE); > > Note: In my next patch-series I have also added a new kvm_arch_destroy_vcpu() > method which is called from kvm_destroy_vcpu(). > Similar to how kvm_arch_init_vcpu() is called from kvm_init_vcpu(). > I use it to free both cpu->kvm_msr_buf and env->nested_state. Looks good too. >> >> if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP)) { >> @@ -3125,6 +3133,41 @@ static int kvm_get_debugregs(X86CPU *cpu) >> return 0; >> } >> >> +static int kvm_put_nested_state(X86CPU *cpu) >> +{ >> + CPUX86State *env = &cpu->env; >> + uint32_t nested_state_len = kvm_max_nested_state_length(); >> + >> + if (nested_state_len == 0) { >> + return 0; >> + } >> + >> + assert(env->nested_state->size <= nested_state_len); >> + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_NESTED_STATE, >> env->nested_state); >> +} >> + >> +static int kvm_get_nested_state(X86CPU *cpu) >> +{ >> + CPUX86State *env = &cpu->env; >> + uint32_t nested_state_len = kvm_max_nested_state_length(); >> + >> + if (nested_state_len == 0) { >> + return 0; >> + } >> + >> + /* >> + * It is possible that migration restored a smaller size into >> + * nested_state->size than what our kernel supports. >> + * We preserve migration origin nested_state->size for >> + * the call to KVM_SET_NESTED_STATE but wish that our next call >> + * to KVM_GET_NESTED_STATE will use the maximum size supported by >> + * the kernel we're running on. >> + */ >> + env->nested_state->size = nested_state_len; >> + >> + return kvm_vcpu_ioctl(CPU(cpu), KVM_GET_NESTED_STATE, >> env->nested_state); >> +} >> + >> int kvm_arch_put_registers(CPUState *cpu, int level) >> { >> X86CPU *x86_cpu = X86_CPU(cpu); >> @@ -3132,6 +3175,11 @@ int kvm_arch_put_registers(CPUState *cpu, int level) >> >> assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); >> >> + ret = kvm_put_nested_state(x86_cpu); >> + if (ret < 0) { >> + return ret; >> + } >> + >> if (level >= KVM_PUT_RESET_STATE) { >> ret = kvm_put_msr_feature_control(x86_cpu); >> if (ret < 0) { >> @@ -3247,6 +3295,10 @@ int kvm_arch_get_registers(CPUState *cs) >> if (ret < 0) { >> goto out; >> } >> + ret = kvm_get_nested_state(cpu); >> + if (ret < 0) { >> + goto out; >> + } >> ret = 0; >> out: >> cpu_sync_bndcs_hflags(&cpu->env); >> diff --git a/target/i386/machine.c b/target/i386/machine.c >> index 41460be54b..45dbae6054 100644 >> --- a/target/i386/machine.c >> +++ b/target/i386/machine.c >> @@ -246,6 +246,15 @@ static int cpu_pre_save(void *opaque) >> env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK); >> } >> >> +#ifdef CONFIG_KVM >> + /* Verify we have nested virtualization state from kernel if required */ >> + if (is_nested_virt_enabled(env) && !env->nested_state) { >> + error_report("Guest enabled nested virtualization but kernel " >> + "do not support saving nested state"); >> + return -EINVAL; >> + } >> +#endif >> + >> return 0; >> } >> >> @@ -909,6 +918,176 @@ static const VMStateDescription vmstate_tsc_khz = { >> } >> }; >> >> +#ifdef CONFIG_KVM >> +static bool vmx_vmcs12_needed(void *opaque) >> +{ >> + struct kvm_nested_state *nested_state = opaque; >> + return (nested_state->size > offsetof(struct kvm_nested_state, >> + vmx.data[0].vmcs12)); > > Do you prefer this compared to checking explicitly? i.e. by: > return (nested_state->vmx.vmcs12_pa != -1ull); I think I do, it guarantees that we don't serialize gibberish from vmx.data[0] and it's consistent with the vmx_shadow_vmcs12_needed check. >> +static bool nested_state_needed(void *opaque) >> +{ >> + X86CPU *cpu = opaque; >> + CPUX86State *env = &cpu->env; >> + >> + return (is_vmx_enabled(env) && >> vmx_nested_state_needed(env->nested_state)) || >> + (is_svm_enabled(env) && >> svm_nested_state_needed(env->nested_state)); >> +} > > As I specified in an earlier email in this patch-series, this is not entirely > accurate. > In case vCPU is running L2 and entered SMM, then is_vmx_enabled() will return > false because CR4 is set to 0 on entering SMM. > I consider deeming nested_state needed in case hflags specifies guest is in > SMM mode. Any objection? See other answer, let's fix it in patch 7 instead. Paolo