Re: [PATCH 5/5] KVM: nVMX: optimize prepare_vmcs02{,_full} for Enlightened VMCS case

2018-06-15 Thread Vitaly Kuznetsov
Liran Alon  writes:

> - vkuzn...@redhat.com wrote:
>
>> When Enlightened VMCS is in use by L1 hypervisor we can avoid
>> vmwriting
>> VMCS fields which did not change.
>> 
>> Our first goal is to achieve minimal impact on traditional VMCS case
>> so
>> we're not wrapping each vmwrite() with an if-changed checker. We also
>> can't
>> utilize static keys as Enlightened VMCS usage is per-guest.
>> 
>> This patch implements the simpliest solution: checking fields in
>> groups.
>> We skip single vmwrite() statements as doing the check will cost us
>> something even in non-evmcs case and the win is tiny. Unfortunately,
>> this
>> makes prepare_vmcs02_full{,_full}() code Enlightened VMCS-dependent
>> (and
>> a bit ugly).
>> 
>> Signed-off-by: Vitaly Kuznetsov 
>> ---
>>  arch/x86/kvm/vmx.c | 143
>> ++---
>>  1 file changed, 82 insertions(+), 61 deletions(-)
>> 
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 6802ba91468c..9a7d76c5c92b 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -11619,50 +11619,79 @@ static int nested_vmx_load_cr3(struct
>> kvm_vcpu *vcpu, unsigned long cr3, bool ne
>>  return 0;
>>  }
>>  
>> +/*
>> + * Check if L1 hypervisor changed the particular field in
>> Enlightened
>> + * VMCS and avoid redundant vmwrite if it didn't. Can only be used
>> when
>> + * the value we're about to write is unchanged vmcs12->field.
>> + */
>> +#define evmcs_needs_write(vmx, clean_field)
>> ((vmx)->nested.dirty_vmcs12 ||\
>> +!(vmx->nested.hv_evmcs->hv_clean_fields &\
>> +  HV_VMX_ENLIGHTENED_CLEAN_FIELD_##clean_field))
>
> Why declare this is a macro instead of an static inline small function?
> Just to shorten the name of the clean-field constant?
>

To be completely honest I forgot why I used define but I think yes, it
was because HV_VMX_ENLIGHTENED_CLEAN_FIELD_* constants are very long.

>> +
>>  static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12
>> *vmcs12)
>>  {
>>  struct vcpu_vmx *vmx = to_vmx(vcpu);
>> +struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
>> +
>> +if (!hv_evmcs || evmcs_needs_write(vmx, GUEST_GRP2)) {
>> +vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
>> +vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
>> +vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
>> +vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
>> +vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
>> +vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
>> +vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
>> +vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
>> +vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
>> +vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
>> +vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
>> +vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
>> +vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
>> +vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
>> +vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
>> +vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
>> +vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
>> +vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
>> +vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
>> +vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
>> +vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
>> +vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
>> +vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
>> +vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
>> +vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
>> +vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
>> +vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
>> +vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
>> +vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
>> +vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
>> +vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
>> +}
>> +
>> +if (!hv_evmcs || evmcs_needs_write(vmx, GUEST_GRP1)) {
>> +vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
>> +vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
>> +vmcs12->guest_pending_dbg_exceptions);
>> +vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
>> +vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
>> +
>> +if (vmx_mpx_supported())
>> +vmcs_write64(GUEST_BNDCFGS, vmcs12

Re: [PATCH 5/5] KVM: nVMX: optimize prepare_vmcs02{,_full} for Enlightened VMCS case

2018-06-14 Thread Liran Alon


- vkuzn...@redhat.com wrote:

> When Enlightened VMCS is in use by L1 hypervisor we can avoid
> vmwriting
> VMCS fields which did not change.
> 
> Our first goal is to achieve minimal impact on traditional VMCS case
> so
> we're not wrapping each vmwrite() with an if-changed checker. We also
> can't
> utilize static keys as Enlightened VMCS usage is per-guest.
> 
> This patch implements the simpliest solution: checking fields in
> groups.
> We skip single vmwrite() statements as doing the check will cost us
> something even in non-evmcs case and the win is tiny. Unfortunately,
> this
> makes prepare_vmcs02_full{,_full}() code Enlightened VMCS-dependent
> (and
> a bit ugly).
> 
> Signed-off-by: Vitaly Kuznetsov 
> ---
>  arch/x86/kvm/vmx.c | 143
> ++---
>  1 file changed, 82 insertions(+), 61 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 6802ba91468c..9a7d76c5c92b 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -11619,50 +11619,79 @@ static int nested_vmx_load_cr3(struct
> kvm_vcpu *vcpu, unsigned long cr3, bool ne
>   return 0;
>  }
>  
> +/*
> + * Check if L1 hypervisor changed the particular field in
> Enlightened
> + * VMCS and avoid redundant vmwrite if it didn't. Can only be used
> when
> + * the value we're about to write is unchanged vmcs12->field.
> + */
> +#define evmcs_needs_write(vmx, clean_field)
> ((vmx)->nested.dirty_vmcs12 ||\
> + !(vmx->nested.hv_evmcs->hv_clean_fields &\
> +   HV_VMX_ENLIGHTENED_CLEAN_FIELD_##clean_field))

Why declare this is a macro instead of an static inline small function?
Just to shorten the name of the clean-field constant?

> +
>  static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12
> *vmcs12)
>  {
>   struct vcpu_vmx *vmx = to_vmx(vcpu);
> + struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
> +
> + if (!hv_evmcs || evmcs_needs_write(vmx, GUEST_GRP2)) {
> + vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
> + vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
> + vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
> + vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
> + vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
> + vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
> + vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
> + vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
> + vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
> + vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
> + vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
> + vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
> + vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
> + vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
> + vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
> + vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
> + vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
> + vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
> + vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
> + vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
> + vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
> + vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
> + vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
> + vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
> + vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
> + vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
> + vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
> + vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
> + vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
> + vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
> + vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
> + }
> +
> + if (!hv_evmcs || evmcs_needs_write(vmx, GUEST_GRP1)) {
> + vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
> + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
> + vmcs12->guest_pending_dbg_exceptions);
> + vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
> + vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
> +
> + if (vmx_mpx_supported())
> + vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
>  
> - vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
> - vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
> - vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selec

[PATCH 5/5] KVM: nVMX: optimize prepare_vmcs02{,_full} for Enlightened VMCS case

2018-06-14 Thread Vitaly Kuznetsov
When Enlightened VMCS is in use by L1 hypervisor we can avoid vmwriting
VMCS fields which did not change.

Our first goal is to achieve minimal impact on traditional VMCS case so
we're not wrapping each vmwrite() with an if-changed checker. We also can't
utilize static keys as Enlightened VMCS usage is per-guest.

This patch implements the simpliest solution: checking fields in groups.
We skip single vmwrite() statements as doing the check will cost us
something even in non-evmcs case and the win is tiny. Unfortunately, this
makes prepare_vmcs02_full{,_full}() code Enlightened VMCS-dependent (and
a bit ugly).

Signed-off-by: Vitaly Kuznetsov 
---
 arch/x86/kvm/vmx.c | 143 ++---
 1 file changed, 82 insertions(+), 61 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6802ba91468c..9a7d76c5c92b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -11619,50 +11619,79 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, 
unsigned long cr3, bool ne
return 0;
 }
 
+/*
+ * Check if L1 hypervisor changed the particular field in Enlightened
+ * VMCS and avoid redundant vmwrite if it didn't. Can only be used when
+ * the value we're about to write is unchanged vmcs12->field.
+ */
+#define evmcs_needs_write(vmx, clean_field) ((vmx)->nested.dirty_vmcs12 ||\
+   !(vmx->nested.hv_evmcs->hv_clean_fields &\
+ HV_VMX_ENLIGHTENED_CLEAN_FIELD_##clean_field))
+
 static void prepare_vmcs02_full(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 {
struct vcpu_vmx *vmx = to_vmx(vcpu);
+   struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
+
+   if (!hv_evmcs || evmcs_needs_write(vmx, GUEST_GRP2)) {
+   vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
+   vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
+   vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
+   vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
+   vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
+   vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
+   vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
+   vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
+   vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
+   vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
+   vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
+   vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
+   vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
+   vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
+   vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
+   vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
+   vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
+   vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
+   vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
+   vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
+   vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
+   vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
+   vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
+   vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
+   vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
+   vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
+   vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
+   vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
+   vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
+   vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
+   vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
+   }
+
+   if (!hv_evmcs || evmcs_needs_write(vmx, GUEST_GRP1)) {
+   vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
+   vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
+   vmcs12->guest_pending_dbg_exceptions);
+   vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
+   vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
+
+   if (vmx_mpx_supported())
+   vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
 
-   vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
-   vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
-   vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
-   vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
-   vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
-   vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
-   vmcs_write16(GUEST_TR_SELECTOR, vmcs12->g