Re: [PATCH 4/4] VMX: x86: Only reset MMU when necessary

2010-05-11 Thread Marcelo Tosatti
On Tue, May 11, 2010 at 01:30:07PM +0800, Sheng Yang wrote:
> Only modifying some bits of CR0/CR4 needs paging mode switch.
> 
> Add update_rsvd_bits_mask() to address EFER.NX bit updating for reserved bits.
> 
> Signed-off-by: Sheng Yang 
> ---
>  arch/x86/include/asm/kvm_host.h |1 +
>  arch/x86/kvm/mmu.c  |   17 ++---
>  arch/x86/kvm/x86.c  |   14 --
>  3 files changed, 27 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index ed48904..c8c8a03 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -553,6 +553,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, 
> int slot);
>  void kvm_mmu_zap_all(struct kvm *kvm);
>  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
>  void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int 
> kvm_nr_mmu_pages);
> +void update_rsvd_bits_mask(struct kvm_vcpu *vcpu);
>  
>  int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
>  
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 5412185..98abdcf 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -2335,6 +2335,19 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu 
> *vcpu, int level)
>   }
>  }
>  
> +void update_rsvd_bits_mask(struct kvm_vcpu *vcpu)
> +{
> + if (!is_paging(vcpu))
> + return;
> + if (is_long_mode(vcpu))
> + reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL);
> + else if (is_pae(vcpu))
> + reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL);
> + else
> + reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL);
> +}
> +EXPORT_SYMBOL_GPL(update_rsvd_bits_mask);
> +
>  static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
>  {
>   struct kvm_mmu *context = &vcpu->arch.mmu;
> @@ -2400,18 +2413,16 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
>   context->gva_to_gpa = nonpaging_gva_to_gpa;
>   context->root_level = 0;
>   } else if (is_long_mode(vcpu)) {
> - reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL);
>   context->gva_to_gpa = paging64_gva_to_gpa;
>   context->root_level = PT64_ROOT_LEVEL;
>   } else if (is_pae(vcpu)) {
> - reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL);
>   context->gva_to_gpa = paging64_gva_to_gpa;
>   context->root_level = PT32E_ROOT_LEVEL;
>   } else {
> - reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL);
>   context->gva_to_gpa = paging32_gva_to_gpa;
>   context->root_level = PT32_ROOT_LEVEL;
>   }
> + update_rsvd_bits_mask(vcpu);
>  
>   return 0;
>  }
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index b59fc67..1c76e08 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -416,6 +416,9 @@ out:
>  
>  static int __kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
>  {
> + unsigned long old_cr0 = kvm_read_cr0(vcpu);
> + unsigned long update_bits = X86_CR0_PG | X86_CR0_PE;

If PAE paging would be in use following an execution of MOV to CR0 or
MOV to CR4 (see Section 4.1.1) and the instruction is modifying any of
CR0.CD, CR0.NW, CR0.PG, CR4.PAE, CR4.PGE, or CR4.PSE; then the PDPTEs
are loaded from the address in CR3.

If the PDPTRS changed, the mmu must be reloaded.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] VMX: x86: Only reset MMU when necessary

2010-05-11 Thread Sheng Yang
On Wednesday 12 May 2010 03:36:26 Marcelo Tosatti wrote:
> On Tue, May 11, 2010 at 01:30:07PM +0800, Sheng Yang wrote:
> > Only modifying some bits of CR0/CR4 needs paging mode switch.
> > 
> > Add update_rsvd_bits_mask() to address EFER.NX bit updating for reserved
> > bits.
> > 
> > Signed-off-by: Sheng Yang 
> > ---
> > 
> >  arch/x86/include/asm/kvm_host.h |1 +
> >  arch/x86/kvm/mmu.c  |   17 ++---
> >  arch/x86/kvm/x86.c  |   14 --
> >  3 files changed, 27 insertions(+), 5 deletions(-)
> > 
> > diff --git a/arch/x86/include/asm/kvm_host.h
> > b/arch/x86/include/asm/kvm_host.h index ed48904..c8c8a03 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -553,6 +553,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm
> > *kvm, int slot);
> > 
> >  void kvm_mmu_zap_all(struct kvm *kvm);
> >  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
> >  void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int
> >  kvm_nr_mmu_pages);
> > 
> > +void update_rsvd_bits_mask(struct kvm_vcpu *vcpu);
> > 
> >  int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
> > 
> > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> > index 5412185..98abdcf 100644
> > --- a/arch/x86/kvm/mmu.c
> > +++ b/arch/x86/kvm/mmu.c
> > @@ -2335,6 +2335,19 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu
> > *vcpu, int level)
> > 
> > }
> >  
> >  }
> > 
> > +void update_rsvd_bits_mask(struct kvm_vcpu *vcpu)
> > +{
> > +   if (!is_paging(vcpu))
> > +   return;
> > +   if (is_long_mode(vcpu))
> > +   reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL);
> > +   else if (is_pae(vcpu))
> > +   reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL);
> > +   else
> > +   reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL);
> > +}
> > +EXPORT_SYMBOL_GPL(update_rsvd_bits_mask);
> > +
> > 
> >  static int paging64_init_context_common(struct kvm_vcpu *vcpu, int
> >  level) {
> >  
> > struct kvm_mmu *context = &vcpu->arch.mmu;
> > 
> > @@ -2400,18 +2413,16 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu
> > *vcpu)
> > 
> > context->gva_to_gpa = nonpaging_gva_to_gpa;
> > context->root_level = 0;
> > 
> > } else if (is_long_mode(vcpu)) {
> > 
> > -   reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL);
> > 
> > context->gva_to_gpa = paging64_gva_to_gpa;
> > context->root_level = PT64_ROOT_LEVEL;
> > 
> > } else if (is_pae(vcpu)) {
> > 
> > -   reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL);
> > 
> > context->gva_to_gpa = paging64_gva_to_gpa;
> > context->root_level = PT32E_ROOT_LEVEL;
> > 
> > } else {
> > 
> > -   reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL);
> > 
> > context->gva_to_gpa = paging32_gva_to_gpa;
> > context->root_level = PT32_ROOT_LEVEL;
> > 
> > }
> > 
> > +   update_rsvd_bits_mask(vcpu);
> > 
> > return 0;
> >  
> >  }
> > 
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index b59fc67..1c76e08 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > 
> > @@ -416,6 +416,9 @@ out:
> >  static int __kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
> >  {
> > 
> > +   unsigned long old_cr0 = kvm_read_cr0(vcpu);
> > +   unsigned long update_bits = X86_CR0_PG | X86_CR0_PE;
> 
> If PAE paging would be in use following an execution of MOV to CR0 or
> MOV to CR4 (see Section 4.1.1) and the instruction is modifying any of
> CR0.CD, CR0.NW, CR0.PG, CR4.PAE, CR4.PGE, or CR4.PSE; then the PDPTEs
> are loaded from the address in CR3.
> 
> If the PDPTRS changed, the mmu must be reloaded.

Yes... Would add CR0.CD and CR0.NW here.

--
regards
Yang, Sheng
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] VMX: x86: Only reset MMU when necessary

2010-05-11 Thread Avi Kivity

On 05/12/2010 05:09 AM, Sheng Yang wrote:

Only modifying some bits of CR0/CR4 needs paging mode switch.

Add update_rsvd_bits_mask() to address EFER.NX bit updating for reserved bits.
   


Can you please repost the whole series?  Due to a problem with my 
mailbox I don't have the patches either in my inbox or on k...@.


--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] VMX: x86: Only reset MMU when necessary

2010-05-11 Thread Avi Kivity

On 05/12/2010 09:33 AM, Sheng Yang wrote:

Only modifying some bits of CR0/CR4 needs paging mode switch.

Add update_rsvd_bits_mask() to address EFER.NX bit updating for reserved bits.


@@ -2335,6 +2335,19 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, 
int level)
}
  }

+void update_rsvd_bits_mask(struct kvm_vcpu *vcpu)
+{
+   if (!is_paging(vcpu))
+   return;
+   if (is_long_mode(vcpu))
+   reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL);
+   else if (is_pae(vcpu))
+   reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL);
+   else
+   reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL);
+}
+EXPORT_SYMBOL_GPL(update_rsvd_bits_mask);
   


Needs a kvm_ prefix if made a global symbol.  But isn't nx switching 
rare enough so we can reload the mmu completely?



diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b59fc67..971a295 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -416,6 +416,10 @@ out:

  static int __kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
  {
+   unsigned long old_cr0 = kvm_read_cr0(vcpu);
+   unsigned long update_bits = X86_CR0_PG | X86_CR0_PE |
+   X86_CR0_CD | X86_CR0_NW;
   


PE doesn't affect paging, CD, NW don't either?

What about WP?


+
cr0 |= X86_CR0_ET;

  #ifdef CONFIG_X86_64
@@ -449,7 +453,8 @@ static int __kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned 
long cr0)

kvm_x86_ops->set_cr0(vcpu, cr0);

-   kvm_mmu_reset_context(vcpu);
+   if ((cr0 ^ old_cr0)&  update_bits)
+   kvm_mmu_reset_context(vcpu);
return 0;
  }


@@ -692,6 +698,8 @@ static u32 emulated_msrs[] = {

  static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
  {
+   u64 old_efer = vcpu->arch.efer;
+
if (efer&  efer_reserved_bits)
return 1;

@@ -722,6 +730,9 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)

vcpu->arch.mmu.base_role.nxe = (efer&  EFER_NX)&&  !tdp_enabled;

+   if ((efer ^ old_efer)&  EFER_NX)
+   update_rsvd_bits_mask(vcpu);
+
return 0;
  }
   


I think it's fine to reset the entire mmu context here, most guests 
won't toggle nx all the time.  But it needs to be in patch 3, otherwise 
we have a regression between 3 and 4.


--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] VMX: x86: Only reset MMU when necessary

2010-05-12 Thread Sheng Yang
On Wednesday 12 May 2010 14:59:14 Avi Kivity wrote:
> On 05/12/2010 09:33 AM, Sheng Yang wrote:
> > Only modifying some bits of CR0/CR4 needs paging mode switch.
> > 
> > Add update_rsvd_bits_mask() to address EFER.NX bit updating for reserved
> > bits.
> > 
> > 
> > @@ -2335,6 +2335,19 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu
> > *vcpu, int level)
> > 
> > }
> >   
> >   }
> > 
> > +void update_rsvd_bits_mask(struct kvm_vcpu *vcpu)
> > +{
> > +   if (!is_paging(vcpu))
> > +   return;
> > +   if (is_long_mode(vcpu))
> > +   reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL);
> > +   else if (is_pae(vcpu))
> > +   reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL);
> > +   else
> > +   reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL);
> > +}
> > +EXPORT_SYMBOL_GPL(update_rsvd_bits_mask);
> 
> Needs a kvm_ prefix if made a global symbol.  But isn't nx switching
> rare enough so we can reload the mmu completely?

Yes, it should be rare enough. I am OK with keeping it, though "reload MMU" 
seems 
a little misleading meaning here.
 
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index b59fc67..971a295 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > 
> > @@ -416,6 +416,10 @@ out:
> >   static int __kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
> >   {
> > 
> > +   unsigned long old_cr0 = kvm_read_cr0(vcpu);
> > +   unsigned long update_bits = X86_CR0_PG | X86_CR0_PE |
> > +   X86_CR0_CD | X86_CR0_NW;
> 
> PE doesn't affect paging, CD, NW don't either?

Yes, PE can't affect alone.

Marcelo has commented on CD/NW, because we need to reload pdptrs if they 
changed, 
then we need to reload MMU.
> 
> What about WP?

How WP would affect?

> > +
> > 
> > cr0 |= X86_CR0_ET;
> >   
> >   #ifdef CONFIG_X86_64
> > 
> > @@ -449,7 +453,8 @@ static int __kvm_set_cr0(struct kvm_vcpu *vcpu,
> > unsigned long cr0)
> > 
> > kvm_x86_ops->set_cr0(vcpu, cr0);
> > 
> > -   kvm_mmu_reset_context(vcpu);
> > +   if ((cr0 ^ old_cr0)&  update_bits)
> > +   kvm_mmu_reset_context(vcpu);
> > 
> > return 0;
> >   
> >   }
> > 
> > @@ -692,6 +698,8 @@ static u32 emulated_msrs[] = {
> > 
> >   static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
> >   {
> > 
> > +   u64 old_efer = vcpu->arch.efer;
> > +
> > 
> > if (efer&  efer_reserved_bits)
> > 
> > return 1;
> > 
> > @@ -722,6 +730,9 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)
> > 
> > vcpu->arch.mmu.base_role.nxe = (efer&  EFER_NX)&&  !tdp_enabled;
> > 
> > +   if ((efer ^ old_efer)&  EFER_NX)
> > +   update_rsvd_bits_mask(vcpu);
> > +
> > 
> > return 0;
> >   
> >   }
> 
> I think it's fine to reset the entire mmu context here, most guests
> won't toggle nx all the time.  But it needs to be in patch 3, otherwise
> we have a regression between 3 and 4.

OK. Would drop patch 3 and keep mmu reset if you like...
--
regards
Yang, Sheng
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/4] VMX: x86: Only reset MMU when necessary

2010-05-12 Thread Avi Kivity

On 05/12/2010 10:31 AM, Sheng Yang wrote:



diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b59fc67..971a295 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c

@@ -416,6 +416,10 @@ out:
   static int __kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
   {

+   unsigned long old_cr0 = kvm_read_cr0(vcpu);
+   unsigned long update_bits = X86_CR0_PG | X86_CR0_PE |
+   X86_CR0_CD | X86_CR0_NW;
   

PE doesn't affect paging, CD, NW don't either?
 

Yes, PE can't affect alone.

Marcelo has commented on CD/NW, because we need to reload pdptrs if they 
changed,
then we need to reload MMU.
   


Ah, correct.


What about WP?
 

How WP would affect?
   


If cr0.wp=0 then we can have a pte with gpte.rw=0 but spte.rw=1 (since 
the guest always runs with cr0.wp=1).  So we need to reload the mmu to 
switch page tables.


This won't work now, I'll post a patch adding cr0.wp to sp->role.  But 
please add cr0.wp to the set of bits requiring reload so we won't have a 
regression.



@@ -722,6 +730,9 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer)

vcpu->arch.mmu.base_role.nxe = (efer&   EFER_NX)&&   !tdp_enabled;

+   if ((efer ^ old_efer)&   EFER_NX)
+   update_rsvd_bits_mask(vcpu);
+

return 0;

   }
   

I think it's fine to reset the entire mmu context here, most guests
won't toggle nx all the time.  But it needs to be in patch 3, otherwise
we have a regression between 3 and 4.
 

OK. Would drop patch 3 and keep mmu reset if you like...
   


Yes please.

--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html