Re: RFC: Add reserved bits check

2009-03-31 Thread Avi Kivity

Dong, Eddie wrote:

+   case PT64_ROOT_LEVEL:
+   context-rsvd_bits_mask[0][3] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+   context-rsvd_bits_mask[0][2] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+   context-rsvd_bits_mask[0][1] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+   context-rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51)


I added a test for this and it noticed the pte bits missed nx.  I fixed 
that up.  I also added code to shadow into different pages when EFER.NXE 
changes, so that we can handle the transition without flushing all 
shadow (and also run vcpus with mismatched EFER.NX).


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: RFC: Add reserved bits check

2009-03-30 Thread Dong, Eddie
 
 Just noticed that walk_addr() too can be called from tdp context, so
 need to make sure rsvd_bits_mask is initialized in init_kvm_tdp_mmu()
 as well.

Yes, fixed.
Thx, eddie


commit b282565503a78e75af643de42fe7bf495e2213ec
Author: root r...@eddie-wb.localdomain
Date:   Mon Mar 30 16:57:39 2009 +0800

Emulate #PF error code of reserved bits violation.

Signed-off-by: Eddie Dong eddie.d...@intel.com

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55fd4c5..4fe2742 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -261,6 +261,7 @@ struct kvm_mmu {
union kvm_mmu_page_role base_role;
 
u64 *pae_root;
+   u64 rsvd_bits_mask[2][4];
 };
 
 struct kvm_vcpu_arch {
@@ -791,5 +792,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ef060ec..2eab758 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -126,6 +126,7 @@ module_param(oos_shadow, bool, 0644);
 #define PFERR_PRESENT_MASK (1U  0)
 #define PFERR_WRITE_MASK (1U  1)
 #define PFERR_USER_MASK (1U  2)
+#define PFERR_RSVD_MASK (1U  3)
 #define PFERR_FETCH_MASK (1U  4)
 
 #define PT_DIRECTORY_LEVEL 2
@@ -179,6 +180,11 @@ static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
 static u64 __read_mostly shadow_mt_mask;
 
+static inline u64 rsvd_bits(int s, int e)
+{
+   return ((1ULL  (e - s + 1)) - 1)  s;
+}
+
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 {
shadow_trap_nonpresent_pte = trap_pte;
@@ -2155,6 +2161,14 @@ static void paging_free(struct kvm_vcpu *vcpu)
nonpaging_free(vcpu);
 }
 
+static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)
+{
+   int bit7;
+
+   bit7 = (gpte  7)  1;
+   return (gpte  vcpu-arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0;
+}
+
 #define PTTYPE 64
 #include paging_tmpl.h
 #undef PTTYPE
@@ -2163,6 +2177,54 @@ static void paging_free(struct kvm_vcpu *vcpu)
 #include paging_tmpl.h
 #undef PTTYPE
 
+void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
+{
+   struct kvm_mmu *context = vcpu-arch.mmu;
+   int maxphyaddr = cpuid_maxphyaddr(vcpu);
+   u64 exb_bit_rsvd = 0;
+
+   if (!is_nx(vcpu))
+   exb_bit_rsvd = rsvd_bits(63, 63);
+   switch (level) {
+   case PT32_ROOT_LEVEL:
+   /* no rsvd bits for 2 level 4K page table entries */
+   context-rsvd_bits_mask[0][1] = 0;
+   context-rsvd_bits_mask[0][0] = 0;
+   if (is_cpuid_PSE36())
+   /* 36bits PSE 4MB page */
+   context-rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
+   else
+   /* 32 bits PSE 4MB page */
+   context-rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
+   context-rsvd_bits_mask[1][0] = 0;
+   break;
+   case PT32E_ROOT_LEVEL:
+   context-rsvd_bits_mask[0][1] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 62);  /* PDE */
+   context-rsvd_bits_mask[0][0] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 62);  /* PTE */
+   context-rsvd_bits_mask[1][1] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 62) |
+   rsvd_bits(13, 20);  /* large page */
+   context-rsvd_bits_mask[1][0] = context-rsvd_bits_mask[0][0];
+   break;
+   case PT64_ROOT_LEVEL:
+   context-rsvd_bits_mask[0][3] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+   context-rsvd_bits_mask[0][2] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+   context-rsvd_bits_mask[0][1] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+   context-rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+   context-rsvd_bits_mask[1][3] = context-rsvd_bits_mask[0][3];
+   context-rsvd_bits_mask[1][2] = context-rsvd_bits_mask[0][2];
+   context-rsvd_bits_mask[1][1] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20);
+   context-rsvd_bits_mask[1][0] = context-rsvd_bits_mask[0][0];
+   break;
+   }
+}
+
 static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
 {
struct kvm_mmu *context = vcpu-arch.mmu;
@@ -2183,6 +2245,7 @@ static int paging64_init_context_common(struct kvm_vcpu 
*vcpu, int level)
 
 static int paging64_init_context(struct kvm_vcpu *vcpu)
 {
+   reset_rsvds_bits_mask(vcpu, 

Re: RFC: Add reserved bits check

2009-03-30 Thread Avi Kivity

Dong, Eddie wrote:

Just noticed that walk_addr() too can be called from tdp context, so
need to make sure rsvd_bits_mask is initialized in init_kvm_tdp_mmu()
as well.



Yes, fixed.

  
Applied, thanks.  I also added unit tests for bit 51 of the pte and pde 
in the mmu tests.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: RFC: Add reserved bits check

2009-03-29 Thread Avi Kivity

Dong, Eddie wrote:
 
+static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)

+{
+   int ps = 0;
+
+   if (level == PT_DIRECTORY_LEVEL)
+   ps = !!(gpte  PT_PAGE_SIZE_MASK);
  


No need for this.  If you set rsvd_bits_mask[1][0] == 
rsvd_bits_mask[0][0], then you get the same behaviour.  The first index 
is not the page size, it's just bit 7.


You'll need to fill all the indexes for bit 7 == 1, but it's worth it, 
with the 1GB pages patch.



+   return (gpte  vcpu-arch.mmu.rsvd_bits_mask[ps][level-1]) != 0;
+}
+
 #define PTTYPE 64
 #include paging_tmpl.h
 #undef PTTYPE
 
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)

+{
+   struct kvm_cpuid_entry2 *best;
+
+   best = kvm_find_cpuid_entry(vcpu, 0x8008, 0);
+   if (best)
+   return best-eax  0xff;
+   return 32;
+}
+
  


Best to return 36 if the cpu doesn't support cpuid 8008 but does 
support pae.



--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: RFC: Add reserved bits check

2009-03-29 Thread Dong, Eddie
 +static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int
 level) +{ +  int ps = 0;
 +
 +if (level == PT_DIRECTORY_LEVEL)
 +ps = !!(gpte  PT_PAGE_SIZE_MASK);
 
 
 No need for this.  If you set rsvd_bits_mask[1][0] ==
 rsvd_bits_mask[0][0], then you get the same behaviour.  The first
 index is not the page size, it's just bit 7.

Sure, fixed.

 
 You'll need to fill all the indexes for bit 7 == 1, but it's worth it,
 with the 1GB pages patch.
 
 +return (gpte  vcpu-arch.mmu.rsvd_bits_mask[ps][level-1]) != 0; +}
 +
  #define PTTYPE 64
  #include paging_tmpl.h
  #undef PTTYPE
 
 +int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
 +{
 +struct kvm_cpuid_entry2 *best;
 +
 +best = kvm_find_cpuid_entry(vcpu, 0x8008, 0); + if (best)
 +return best-eax  0xff;
 +return 32;
 +}
 +
 
 
 Best to return 36 if the cpu doesn't support cpuid 8008 but does
 support pae.

Mmm, noticed a conflict information in SDM, but you are right :)

One more modification is that RSVD bit error code won't update if P=0 after 
double checking with internal architect.

Thanks and reposted.
Eddie




Emulate #PF error code of reserved bits violation.

Signed-off-by: Eddie Dong eddie.d...@intel.com

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55fd4c5..4fe2742 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -261,6 +261,7 @@ struct kvm_mmu {
union kvm_mmu_page_role base_role;
 
u64 *pae_root;
+   u64 rsvd_bits_mask[2][4];
 };
 
 struct kvm_vcpu_arch {
@@ -791,5 +792,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ef060ec..0a6f109 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -126,6 +126,7 @@ module_param(oos_shadow, bool, 0644);
 #define PFERR_PRESENT_MASK (1U  0)
 #define PFERR_WRITE_MASK (1U  1)
 #define PFERR_USER_MASK (1U  2)
+#define PFERR_RSVD_MASK (1U  3)
 #define PFERR_FETCH_MASK (1U  4)
 
 #define PT_DIRECTORY_LEVEL 2
@@ -179,6 +180,11 @@ static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
 static u64 __read_mostly shadow_mt_mask;
 
+static inline u64 rsvd_bits(int s, int e)
+{
+   return ((1ULL  (e - s + 1)) - 1)  s;
+}
+
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 {
shadow_trap_nonpresent_pte = trap_pte;
@@ -2155,6 +2161,14 @@ static void paging_free(struct kvm_vcpu *vcpu)
nonpaging_free(vcpu);
 }
 
+static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)
+{
+   int bit7;
+
+   bit7 = (gpte  7)  1;
+   return (gpte  vcpu-arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0;
+}
+
 #define PTTYPE 64
 #include paging_tmpl.h
 #undef PTTYPE
@@ -2183,6 +2197,25 @@ static int paging64_init_context_common(struct kvm_vcpu 
*vcpu, int level)
 
 static int paging64_init_context(struct kvm_vcpu *vcpu)
 {
+   struct kvm_mmu *context = vcpu-arch.mmu;
+   int maxphyaddr = cpuid_maxphyaddr(vcpu);
+   u64 exb_bit_rsvd = 0;
+
+   if (!is_nx(vcpu))
+   exb_bit_rsvd = rsvd_bits(63, 63);
+
+   context-rsvd_bits_mask[0][3] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+   context-rsvd_bits_mask[0][2] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+   context-rsvd_bits_mask[0][1] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+   context-rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+   context-rsvd_bits_mask[1][3] = context-rsvd_bits_mask[0][3];
+   context-rsvd_bits_mask[1][2] = context-rsvd_bits_mask[0][2];
+   context-rsvd_bits_mask[1][1] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20);
+   context-rsvd_bits_mask[1][0] = context-rsvd_bits_mask[0][0];
return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
 }
 
@@ -2190,6 +2223,16 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 {
struct kvm_mmu *context = vcpu-arch.mmu;
 
+   /* no rsvd bits for 2 level 4K page table entries */
+   context-rsvd_bits_mask[0][1] = 0;
+   context-rsvd_bits_mask[0][0] = 0;
+   if (is_cpuid_PSE36())
+   /* 36bits PSE 4MB page */
+   context-rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
+   else
+   /* 32 bits PSE 4MB page */
+   context-rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
+   context-rsvd_bits_mask[1][0] = 0;
context-new_cr3 = paging_new_cr3;
context-page_fault = paging32_page_fault;
context-gva_to_gpa = paging32_gva_to_gpa;
@@ -2205,6 +2248,22 @@ static int paging32_init_context(struct 

Re: RFC: Add reserved bits check

2009-03-29 Thread Avi Kivity

Dong, Eddie wrote:

@@ -2183,6 +2197,25 @@ static int paging64_init_context_common(struct kvm_vcpu 
*vcpu, int level)
 
 static int paging64_init_context(struct kvm_vcpu *vcpu)

 {
+   struct kvm_mmu *context = vcpu-arch.mmu;
+   int maxphyaddr = cpuid_maxphyaddr(vcpu);
+   u64 exb_bit_rsvd = 0;
+
+   if (!is_nx(vcpu))
+   exb_bit_rsvd = rsvd_bits(63, 63);
+
+   context-rsvd_bits_mask[0][3] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+   context-rsvd_bits_mask[0][2] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+   context-rsvd_bits_mask[0][1] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+   context-rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+   context-rsvd_bits_mask[1][3] = context-rsvd_bits_mask[0][3];
+   context-rsvd_bits_mask[1][2] = context-rsvd_bits_mask[0][2];
+   context-rsvd_bits_mask[1][1] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20);
+   context-rsvd_bits_mask[1][0] = context-rsvd_bits_mask[0][0];
return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
 }
  


Just noticed that walk_addr() too can be called from tdp context, so 
need to make sure rsvd_bits_mask is initialized in init_kvm_tdp_mmu() as 
well.


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: RFC: Add reserved bits check

2009-03-27 Thread Avi Kivity

Dong, Eddie wrote:
 




Current KVM doesn't check reserved bits of guest page table, while may use 
reserved bits to bypass guest #PF in VMX.

 


This patch add this check while leaving shadow pte un-constructed if guest 
RSVD=1.

  




--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -261,6 +261,8 @@ struct kvm_mmu {
  union kvm_mmu_page_role base_role;
 
  u64 *pae_root;

+ u64 rsvd_bits_mask[4];
+ u64 large_page_rsvd_mask;
 }; 


Make large_page_rsvd_mask() an array too, in preparation for 1GB pages?

Perhaps u64 rsvd_bits_mask[2][4].  First index is bit 7 of the pte, 
second index is the level.  Makes for faster run time too.



 #define PT_DIRECTORY_LEVEL 2
@@ -179,6 +180,13 @@ static u64 __read_mostly shadow_user_mask;
 static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
 static u64 __read_mostly shadow_mt_mask;
+extern struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(
+ struct kvm_vcpu *vcpu, u32 function, u32 index);
  


This needs to be in a header file, so we don't get random breakage when 
the signature changes.


 
+static int is_rsvd_bits_set(struct kvm_vcpu *vcpu, unsigned long pte, int level)
  


u64 pte... (and bool for return type)

s/pte/gpte/ to make it clear.


@@ -2184,6 +2215,18 @@ static int paging64_init_context_common(struct kvm_vcpu 
*vcpu, int level)
 
 static int paging64_init_context(struct kvm_vcpu *vcpu)

 {
+ struct kvm_mmu *context = vcpu-arch.mmu;
+ int maxphyaddr = cpuid_maxphyaddr(vcpu);
+
+ context-rsvd_bits_mask[3] =
+  rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+ context-rsvd_bits_mask[2] =
+  rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+ context-rsvd_bits_mask[1] =
+  rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+ context-rsvd_bits_mask[0] = rsvd_bits(maxphyaddr, 51);
+ context-large_page_rsvd_mask =  /* 2MB PDE */
+  rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20);
  return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
 }
  


Isn't bit 63 reserved if NX is disabled?

 
@@ -2206,6 +2258,18 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 
 static int paging32E_init_context(struct kvm_vcpu *vcpu)

 {
+ struct kvm_mmu *context = vcpu-arch.mmu;
+ int maxphyaddr = cpuid_maxphyaddr(vcpu);
+
+ /* 3 levels */
+ context-rsvd_bits_mask[2] = rsvd_bits(maxphyaddr, 63) |
+  rsvd_bits(7, 8) | rsvd_bits(1,2); /* PDPTE */
  


Will never be use, PDPTEs are loaded by set_cr3(), not walk_addr().

 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h

index 7314c09..844efe9 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
  gfn_t table_gfn;
  unsigned index, pt_access, pte_access;
  gpa_t pte_gpa;
+ int rsvd_fault;
 
  pgprintk(%s: addr %lx\n, __func__, addr);

 walk:
@@ -153,10 +154,13 @@ walk:
 walker-level - 1, table_gfn);
 
   kvm_read_guest(vcpu-kvm, pte_gpa, pte, sizeof(pte));

+  rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker-level);
  


Does a not present pte set PFERR_RSVD?

 
   if (!is_present_pte(pte))

goto not_present;
 
+  if (rsvd_fault)

+   goto access_error;
   if (write_fault  !is_writeble_pte(pte))
if (user_fault || is_write_protection(vcpu))
 goto access_error;
@@ -233,6 +237,8 @@ err:
   walker-error_code |= PFERR_USER_MASK;
  if (fetch_fault)
   walker-error_code |= PFERR_FETCH_MASK;
+ if (rsvd_fault)
+  walker-error_code |= PFERR_RSVD_MASK;
  return 0;
 }
 



  



--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: RFC: Add reserved bits check

2009-03-27 Thread Dong, Eddie
 + context-rsvd_bits_mask[0] = rsvd_bits(maxphyaddr, 51);
 + context-large_page_rsvd_mask =  /* 2MB PDE */
 +  rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20);
   return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);  }
 
 
 Isn't bit 63 reserved if NX is disabled?

Sure.

 
 
 @@ -2206,6 +2258,18 @@ static int paging32_init_context(struct
 kvm_vcpu *vcpu) 
 
  static int paging32E_init_context(struct kvm_vcpu *vcpu)  {
 + struct kvm_mmu *context = vcpu-arch.mmu;
 + int maxphyaddr = cpuid_maxphyaddr(vcpu);
 +
 + /* 3 levels */
 + context-rsvd_bits_mask[2] = rsvd_bits(maxphyaddr, 63) |
 +  rsvd_bits(7, 8) | rsvd_bits(1,2); /* PDPTE */
 
 
 Will never be use, PDPTEs are loaded by set_cr3(), not walk_addr().
 

I see, then how about to replace CR3_PAE_RESERVED_BITS check at cr3 load with
rsvd_bits_mask[2]? Seems current code are lacking of enough reserved bits check 
too.

 @@ -153,10 +154,13 @@ walk:
  walker-level - 1, table_gfn);
 
kvm_read_guest(vcpu-kvm, pte_gpa, pte, sizeof(pte));
 +  rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker-level);
 
 
 Does a not present pte set PFERR_RSVD?

Yes though most commercial OS doesn't use it. 
I plan to post a follow up patch to fix the potential RSVD_fault error code 
mismatch when bypass_guest_pf=1.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: RFC: Add reserved bits check

2009-03-27 Thread Dong, Eddie

 Will never be use, PDPTEs are loaded by set_cr3(), not walk_addr().
 
 
 I see, then how about to replace CR3_PAE_RESERVED_BITS check at cr3
 load with 
 rsvd_bits_mask[2]? Seems current code are lacking of enough reserved
 bits check too. 
 

typo, I mean this:

--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -233,7 +233,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
goto out;
}
for (i = 0; i  ARRAY_SIZE(pdpte); ++i) {
-   if ((pdpte[i]  1)  (pdpte[i]  0xfff001e6ull)) {
+   if ((pdpte[i]  1)  (pdpte[i]  
vcpu-arch.mmu.rsvd_bits_mask[0][2])) {
ret = 0;
goto out;
}
(--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: RFC: Add reserved bits check

2009-03-27 Thread Avi Kivity

Dong, Eddie wrote:

Will never be use, PDPTEs are loaded by set_cr3(), not walk_addr().




I see, then how about to replace CR3_PAE_RESERVED_BITS check at cr3 load with
rsvd_bits_mask[2]? Seems current code are lacking of enough reserved bits check 
too.

  


Need to make sure rsvd_bits_mask[] is maintained on ept and npt, then.

--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: RFC: Add reserved bits check

2009-03-27 Thread Dong, Eddie
 
 Need to make sure rsvd_bits_mask[] is maintained on ept and npt, then.

Sure, will be in next patch, post the current modified one.

Thx, eddie



Current KVM doesn't check reserved bits of guest page table entry, but use 
reserved bits to bypass guest #PF in VMX.

 

This patch add reserved bit check while leaving shadow pte un-constructed if 
guest RSVD=1.


commit dd1d697edf42953d407c10f4d38c650aafd3d3d5
Author: root r...@eddie-wb.localdomain
Date:   Fri Mar 27 23:35:27 2009 +0800

Emulate #PF error code of reserved bits violation.

Signed-off-by: Eddie Dong eddie.d...@intel.com

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55fd4c5..4fe2742 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -261,6 +261,7 @@ struct kvm_mmu {
union kvm_mmu_page_role base_role;
 
u64 *pae_root;
+   u64 rsvd_bits_mask[2][4];
 };
 
 struct kvm_vcpu_arch {
@@ -791,5 +792,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ef060ec..35af90a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -126,6 +126,7 @@ module_param(oos_shadow, bool, 0644);
 #define PFERR_PRESENT_MASK (1U  0)
 #define PFERR_WRITE_MASK (1U  1)
 #define PFERR_USER_MASK (1U  2)
+#define PFERR_RSVD_MASK (1U  3)
 #define PFERR_FETCH_MASK (1U  4)
 
 #define PT_DIRECTORY_LEVEL 2
@@ -179,6 +180,11 @@ static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
 static u64 __read_mostly shadow_mt_mask;
 
+static inline u64 rsvd_bits(int s, int e)
+{
+   return ((1ULL  (e - s + 1)) - 1)  s;
+}
+
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 {
shadow_trap_nonpresent_pte = trap_pte;
@@ -2155,6 +2161,15 @@ static void paging_free(struct kvm_vcpu *vcpu)
nonpaging_free(vcpu);
 }
 
+static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level)
+{
+   int ps = 0;
+
+   if (level == PT_DIRECTORY_LEVEL)
+   ps = !!(gpte  PT_PAGE_SIZE_MASK);
+   return (gpte  vcpu-arch.mmu.rsvd_bits_mask[ps][level-1]) != 0;
+}
+
 #define PTTYPE 64
 #include paging_tmpl.h
 #undef PTTYPE
@@ -2183,6 +2198,22 @@ static int paging64_init_context_common(struct kvm_vcpu 
*vcpu, int level)
 
 static int paging64_init_context(struct kvm_vcpu *vcpu)
 {
+   struct kvm_mmu *context = vcpu-arch.mmu;
+   int maxphyaddr = cpuid_maxphyaddr(vcpu);
+   u64 exb_bit_rsvd = 0;
+
+   if (!is_nx(vcpu))
+   exb_bit_rsvd = rsvd_bits(63, 63);
+
+   context-rsvd_bits_mask[0][3] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+   context-rsvd_bits_mask[0][2] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+   context-rsvd_bits_mask[0][1] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+   context-rsvd_bits_mask[0][0] = rsvd_bits(maxphyaddr, 51);
+   context-rsvd_bits_mask[1][1] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20);
return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
 }
 
@@ -2190,6 +2221,15 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 {
struct kvm_mmu *context = vcpu-arch.mmu;
 
+   /* no rsvd bits for 2 level 4K page table entries */
+   context-rsvd_bits_mask[0][0] = 0;
+   context-rsvd_bits_mask[0][1] = 0;
+   if (is_cpuid_PSE36())
+   /* 36bits PSE 4MB page */
+   context-rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
+   else
+   /* 32 bits PSE 4MB page */
+   context-rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
context-new_cr3 = paging_new_cr3;
context-page_fault = paging32_page_fault;
context-gva_to_gpa = paging32_gva_to_gpa;
@@ -2205,6 +2245,21 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 
 static int paging32E_init_context(struct kvm_vcpu *vcpu)
 {
+   struct kvm_mmu *context = vcpu-arch.mmu;
+   int maxphyaddr = cpuid_maxphyaddr(vcpu);
+   u64 exb_bit_rsvd = 0;
+
+   if (!is_nx(vcpu))
+   exb_bit_rsvd = rsvd_bits(63, 63);
+
+   context-rsvd_bits_mask[0][1] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 62);  /* PDE */
+   context-rsvd_bits_mask[0][0] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 62);  /* PTE */
+   context-rsvd_bits_mask[1][1] = exb_bit_rsvd |
+   rsvd_bits(maxphyaddr, 62) |
+   rsvd_bits(13, 20);  /* large page */
+
return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
 }
 
diff --git a/arch/x86/kvm/paging_tmpl.h 

RFC: Add reserved bits check

2009-03-26 Thread Dong, Eddie
 



Current KVM doesn't check reserved bits of guest page table, while may use 
reserved bits to bypass guest #PF in VMX.

 

This patch add this check while leaving shadow pte un-constructed if guest 
RSVD=1.

 

Comments?

Thx, eddie

 

 

 

 

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55fd4c5..9370ff0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -261,6 +261,8 @@ struct kvm_mmu {
  union kvm_mmu_page_role base_role;
 
  u64 *pae_root;
+ u64 rsvd_bits_mask[4];
+ u64 large_page_rsvd_mask;
 };
 
 struct kvm_vcpu_arch {
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 31ba3cb..7f55c4a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -127,6 +127,7 @@ module_param(oos_shadow, bool, 0644);
 #define PFERR_PRESENT_MASK (1U  0)
 #define PFERR_WRITE_MASK (1U  1)
 #define PFERR_USER_MASK (1U  2)
+#define PFERR_RSVD_MASK (1U  3)
 #define PFERR_FETCH_MASK (1U  4)
 
 #define PT_DIRECTORY_LEVEL 2
@@ -179,6 +180,13 @@ static u64 __read_mostly shadow_user_mask;
 static u64 __read_mostly shadow_accessed_mask;
 static u64 __read_mostly shadow_dirty_mask;
 static u64 __read_mostly shadow_mt_mask;
+extern struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(
+ struct kvm_vcpu *vcpu, u32 function, u32 index);
+
+static inline u64 rsvd_bits(int s, int e)
+{
+ return ((1ULL  (e - s + 1)) - 1)  s;
+}
 
 void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
 {
@@ -251,6 +259,18 @@ static int is_rmap_pte(u64 pte)
  return is_shadow_present_pte(pte);
 }
 
+static int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+ u32 function=0x8008;
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, function, 0);
+ if (best) {
+  return best-eax  0xff;
+ }
+ return 40;
+}
+
 static pfn_t spte_to_pfn(u64 pte)
 {
  return (pte  PT64_BASE_ADDR_MASK)  PAGE_SHIFT;
@@ -2156,6 +2176,17 @@ static void paging_free(struct kvm_vcpu *vcpu)
  nonpaging_free(vcpu);
 }
 
+static int is_rsvd_bits_set(struct kvm_vcpu *vcpu, unsigned long pte, int 
level)
+{
+ if (level == PT_DIRECTORY_LEVEL  (pte  PT_PAGE_SIZE_MASK)) {
+  /* large page */
+  return (pte  vcpu-arch.mmu.large_page_rsvd_mask) != 0;
+ }
+ else
+  /* 4K page */
+  return (pte  vcpu-arch.mmu.rsvd_bits_mask[level-1]) != 0;
+}
+
 #define PTTYPE 64
 #include paging_tmpl.h
 #undef PTTYPE
@@ -2184,6 +2215,18 @@ static int paging64_init_context_common(struct kvm_vcpu 
*vcpu, int level)
 
 static int paging64_init_context(struct kvm_vcpu *vcpu)
 {
+ struct kvm_mmu *context = vcpu-arch.mmu;
+ int maxphyaddr = cpuid_maxphyaddr(vcpu);
+
+ context-rsvd_bits_mask[3] =
+  rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+ context-rsvd_bits_mask[2] =
+  rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+ context-rsvd_bits_mask[1] =
+  rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8);
+ context-rsvd_bits_mask[0] = rsvd_bits(maxphyaddr, 51);
+ context-large_page_rsvd_mask =  /* 2MB PDE */
+  rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20);
  return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
 }
 
@@ -2191,6 +2234,15 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 {
  struct kvm_mmu *context = vcpu-arch.mmu;
 
+ /* no rsvd bits for 2 level 4K page table entries */
+ context-rsvd_bits_mask[0] = 0;
+ context-rsvd_bits_mask[1] = 0;
+ if (is_cpuid_PSE36())
+  /* 36bits PSE 4MB page */
+  context-large_page_rsvd_mask = rsvd_bits(17, 21);
+ else
+  /* 32 bits PSE 4MB page */
+  context-large_page_rsvd_mask = rsvd_bits(13, 21);
  context-new_cr3 = paging_new_cr3;
  context-page_fault = paging32_page_fault;
  context-gva_to_gpa = paging32_gva_to_gpa;
@@ -2206,6 +2258,18 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
 
 static int paging32E_init_context(struct kvm_vcpu *vcpu)
 {
+ struct kvm_mmu *context = vcpu-arch.mmu;
+ int maxphyaddr = cpuid_maxphyaddr(vcpu);
+
+ /* 3 levels */
+ context-rsvd_bits_mask[2] = rsvd_bits(maxphyaddr, 63) |
+  rsvd_bits(7, 8) | rsvd_bits(1,2); /* PDPTE */
+ context-rsvd_bits_mask[1] = rsvd_bits(maxphyaddr, 63); /* PDE */
+ context-rsvd_bits_mask[0] =   /* PTE */
+  rsvd_bits(maxphyaddr, 63) | rsvd_bits(7, 8) | rsvd_bits(1, 2);
+ context-large_page_rsvd_mask =   /* 2M page */
+   rsvd_bits(maxphyaddr, 63) | rsvd_bits(13, 20);
+
  return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
 }
 
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 7314c09..844efe9 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
  gfn_t table_gfn;
  unsigned index, pt_access, pte_access;
  gpa_t pte_gpa;
+ int rsvd_fault;
 
  pgprintk(%s: addr %lx\n, __func__, addr);
 walk:
@@ -153,10 +154,13 @@ walk:
 walker-level - 1, table_gfn);
 
   kvm_read_guest(vcpu-kvm, pte_gpa, pte, sizeof(pte));
+  rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker-level);
 
   if (!is_present_pte(pte))
goto not_present;
 
+  if (rsvd_fault)
+   goto access_error;