Re: [PATCH v3 2/9] KVM: MMU: abstract spte write-protect

2012-04-20 Thread Xiao Guangrong
On 04/21/2012 12:18 PM, Marcelo Tosatti wrote:

> On Sat, Apr 21, 2012 at 11:24:54AM +0800, Xiao Guangrong wrote:
>> On 04/21/2012 05:33 AM, Marcelo Tosatti wrote:
>>
>>
  static bool
  __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, int level)
  {
 @@ -1050,24 +1078,13 @@ __rmap_write_protect(struct kvm *kvm, unsigned 
 long *rmapp, int level)

for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
BUG_ON(!(*sptep & PT_PRESENT_MASK));
 -  rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, 
 *sptep);
 -
 -  if (!is_writable_pte(*sptep)) {
 -  sptep = rmap_get_next(&iter);
 -  continue;
 -  }
 -
 -  if (level == PT_PAGE_TABLE_LEVEL) {
 -  mmu_spte_update(sptep, *sptep & ~PT_WRITABLE_MASK);
 -  sptep = rmap_get_next(&iter);
 -  } else {
 -  BUG_ON(!is_large_pte(*sptep));
 -  drop_spte(kvm, sptep);
 -  --kvm->stat.lpages;
>>>
>>> It is preferable to remove all large sptes including read-only ones, the
>>
>>
>> It can cause page faults even if read memory on these large sptse.
>>
>> Actually, Avi suggested that make large writable spte to be readonly
>> (not dropped) on this path.
> 
> See commits e49146dce8c3dc6f4485c1904b6587855f393e71,
> 38187c830cab84daecb41169948467f1f19317e3 for issues
> with large read-only sptes.
> 


Yes, we need check the code carefully when change writable spte to be
read-only, let us discuss it in the separate patchset later. :)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 2/9] KVM: MMU: abstract spte write-protect

2012-04-20 Thread Marcelo Tosatti
On Sat, Apr 21, 2012 at 11:24:54AM +0800, Xiao Guangrong wrote:
> On 04/21/2012 05:33 AM, Marcelo Tosatti wrote:
> 
> 
> >>  static bool
> >>  __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, int level)
> >>  {
> >> @@ -1050,24 +1078,13 @@ __rmap_write_protect(struct kvm *kvm, unsigned 
> >> long *rmapp, int level)
> >>
> >>for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
> >>BUG_ON(!(*sptep & PT_PRESENT_MASK));
> >> -  rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, 
> >> *sptep);
> >> -
> >> -  if (!is_writable_pte(*sptep)) {
> >> -  sptep = rmap_get_next(&iter);
> >> -  continue;
> >> -  }
> >> -
> >> -  if (level == PT_PAGE_TABLE_LEVEL) {
> >> -  mmu_spte_update(sptep, *sptep & ~PT_WRITABLE_MASK);
> >> -  sptep = rmap_get_next(&iter);
> >> -  } else {
> >> -  BUG_ON(!is_large_pte(*sptep));
> >> -  drop_spte(kvm, sptep);
> >> -  --kvm->stat.lpages;
> > 
> > It is preferable to remove all large sptes including read-only ones, the
> 
> 
> It can cause page faults even if read memory on these large sptse.
> 
> Actually, Avi suggested that make large writable spte to be readonly
> (not dropped) on this path.

See commits e49146dce8c3dc6f4485c1904b6587855f393e71,
38187c830cab84daecb41169948467f1f19317e3 for issues
with large read-only sptes.

> > current behaviour, then to verify that no read->write transition can
> > occur in fault paths (fault paths which are increasing in number).
> 
> 
> Yes, the small spte also has issue (find a write-protected spte in
> fault paths). Later, the second part of this patchset will introduce
> rmap.WRITE_PROTECTED bit, then we can do the fast check before calling
> fast page fault.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 2/9] KVM: MMU: abstract spte write-protect

2012-04-20 Thread Xiao Guangrong
On 04/21/2012 09:10 AM, Takuya Yoshikawa wrote:

> On Fri, 20 Apr 2012 18:33:19 -0300
> Marcelo Tosatti  wrote:
> 
>> It is preferable to remove all large sptes including read-only ones, the
>> current behaviour, then to verify that no read->write transition can
>> occur in fault paths (fault paths which are increasing in number).
>>
> 
> I think we should use separate function than spte_write_protect() for
> the large case.


I will introduce a function to handle large sptes when i implement the
idea of making writable spte to be read-only.

But, keep it in this patchset first.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 2/9] KVM: MMU: abstract spte write-protect

2012-04-20 Thread Xiao Guangrong
On 04/21/2012 05:33 AM, Marcelo Tosatti wrote:


>>  static bool
>>  __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, int level)
>>  {
>> @@ -1050,24 +1078,13 @@ __rmap_write_protect(struct kvm *kvm, unsigned long 
>> *rmapp, int level)
>>
>>  for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
>>  BUG_ON(!(*sptep & PT_PRESENT_MASK));
>> -rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, 
>> *sptep);
>> -
>> -if (!is_writable_pte(*sptep)) {
>> -sptep = rmap_get_next(&iter);
>> -continue;
>> -}
>> -
>> -if (level == PT_PAGE_TABLE_LEVEL) {
>> -mmu_spte_update(sptep, *sptep & ~PT_WRITABLE_MASK);
>> -sptep = rmap_get_next(&iter);
>> -} else {
>> -BUG_ON(!is_large_pte(*sptep));
>> -drop_spte(kvm, sptep);
>> ---kvm->stat.lpages;
> 
> It is preferable to remove all large sptes including read-only ones, the


It can cause page faults even if read memory on these large sptse.

Actually, Avi suggested that make large writable spte to be readonly
(not dropped) on this path.

> current behaviour, then to verify that no read->write transition can
> occur in fault paths (fault paths which are increasing in number).


Yes, the small spte also has issue (find a write-protected spte in
fault paths). Later, the second part of this patchset will introduce
rmap.WRITE_PROTECTED bit, then we can do the fast check before calling
fast page fault.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 2/9] KVM: MMU: abstract spte write-protect

2012-04-20 Thread Takuya Yoshikawa
On Fri, 20 Apr 2012 18:33:19 -0300
Marcelo Tosatti  wrote:

> It is preferable to remove all large sptes including read-only ones, the
> current behaviour, then to verify that no read->write transition can
> occur in fault paths (fault paths which are increasing in number).
> 

I think we should use separate function than spte_write_protect() for
the large case.

Takuya
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 2/9] KVM: MMU: abstract spte write-protect

2012-04-20 Thread Marcelo Tosatti
On Fri, Apr 20, 2012 at 04:17:47PM +0800, Xiao Guangrong wrote:
> Introduce a common function to abstract spte write-protect to
> cleanup the code
> 
> Signed-off-by: Xiao Guangrong 
> ---
>  arch/x86/kvm/mmu.c |   60 ++-
>  1 files changed, 35 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 4a3cc18..e70ff38 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -1041,6 +1041,34 @@ static void drop_spte(struct kvm *kvm, u64 *sptep)
>   rmap_remove(kvm, sptep);
>  }
> 
> +/* Return true if the spte is dropped. */
> +static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool large,
> +bool *flush)
> +{
> + u64 spte = *sptep;
> +
> + if (!is_writable_pte(spte))
> + return false;
> +
> + *flush |= true;
> +
> + if (large) {
> + pgprintk("rmap_write_protect(large): spte %p %llx\n",
> +  spte, *spte);
> + BUG_ON(!is_large_pte(spte));
> +
> + drop_spte(kvm, sptep);
> + --kvm->stat.lpages;
> + return true;
> + }
> +
> + rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
> + spte = spte & ~PT_WRITABLE_MASK;
> + mmu_spte_update(sptep, spte);
> +
> + return false;
> +}
> +
>  static bool
>  __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, int level)
>  {
> @@ -1050,24 +1078,13 @@ __rmap_write_protect(struct kvm *kvm, unsigned long 
> *rmapp, int level)
> 
>   for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
>   BUG_ON(!(*sptep & PT_PRESENT_MASK));
> - rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, 
> *sptep);
> -
> - if (!is_writable_pte(*sptep)) {
> - sptep = rmap_get_next(&iter);
> - continue;
> - }
> -
> - if (level == PT_PAGE_TABLE_LEVEL) {
> - mmu_spte_update(sptep, *sptep & ~PT_WRITABLE_MASK);
> - sptep = rmap_get_next(&iter);
> - } else {
> - BUG_ON(!is_large_pte(*sptep));
> - drop_spte(kvm, sptep);
> - --kvm->stat.lpages;

It is preferable to remove all large sptes including read-only ones, the
current behaviour, then to verify that no read->write transition can
occur in fault paths (fault paths which are increasing in number).

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 2/9] KVM: MMU: abstract spte write-protect

2012-04-20 Thread Xiao Guangrong
Introduce a common function to abstract spte write-protect to
cleanup the code

Signed-off-by: Xiao Guangrong 
---
 arch/x86/kvm/mmu.c |   60 ++-
 1 files changed, 35 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 4a3cc18..e70ff38 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1041,6 +1041,34 @@ static void drop_spte(struct kvm *kvm, u64 *sptep)
rmap_remove(kvm, sptep);
 }

+/* Return true if the spte is dropped. */
+static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool large,
+  bool *flush)
+{
+   u64 spte = *sptep;
+
+   if (!is_writable_pte(spte))
+   return false;
+
+   *flush |= true;
+
+   if (large) {
+   pgprintk("rmap_write_protect(large): spte %p %llx\n",
+spte, *spte);
+   BUG_ON(!is_large_pte(spte));
+
+   drop_spte(kvm, sptep);
+   --kvm->stat.lpages;
+   return true;
+   }
+
+   rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
+   spte = spte & ~PT_WRITABLE_MASK;
+   mmu_spte_update(sptep, spte);
+
+   return false;
+}
+
 static bool
 __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, int level)
 {
@@ -1050,24 +1078,13 @@ __rmap_write_protect(struct kvm *kvm, unsigned long 
*rmapp, int level)

for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
BUG_ON(!(*sptep & PT_PRESENT_MASK));
-   rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, 
*sptep);
-
-   if (!is_writable_pte(*sptep)) {
-   sptep = rmap_get_next(&iter);
-   continue;
-   }
-
-   if (level == PT_PAGE_TABLE_LEVEL) {
-   mmu_spte_update(sptep, *sptep & ~PT_WRITABLE_MASK);
-   sptep = rmap_get_next(&iter);
-   } else {
-   BUG_ON(!is_large_pte(*sptep));
-   drop_spte(kvm, sptep);
-   --kvm->stat.lpages;
+   if (spte_write_protect(kvm, sptep, level > PT_PAGE_TABLE_LEVEL,
+ &write_protected)) {
sptep = rmap_get_first(*rmapp, &iter);
+   continue;
}

-   write_protected = true;
+   sptep = rmap_get_next(&iter);
}

return write_protected;
@@ -3902,6 +3919,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu)
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
 {
struct kvm_mmu_page *sp;
+   bool flush = false;

list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) {
int i;
@@ -3916,16 +3934,8 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, 
int slot)
  !is_last_spte(pt[i], sp->role.level))
continue;

-   if (is_large_pte(pt[i])) {
-   drop_spte(kvm, &pt[i]);
-   --kvm->stat.lpages;
-   continue;
-   }
-
-   /* avoid RMW */
-   if (is_writable_pte(pt[i]))
-   mmu_spte_update(&pt[i],
-   pt[i] & ~PT_WRITABLE_MASK);
+   spte_write_protect(kvm, &pt[i],
+  is_large_pte(pt[i]), &flush);
}
}
kvm_flush_remote_tlbs(kvm);
-- 
1.7.7.6

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html