Re: [patch 10/10] KVM: MMU: speed up mmu_unsync_walk

2008-09-22 Thread Avi Kivity

Marcelo Tosatti wrote:

Also, it may make sense to replace it with an array of u16s.



Why?
  


They'll be usually empty or near-empty, no? In which case the array is 
faster and smaller.


But I don't think the difference is measurable. So scratch that remark.

--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 10/10] KVM: MMU: speed up mmu_unsync_walk

2008-09-20 Thread Marcelo Tosatti
On Fri, Sep 19, 2008 at 06:26:56PM -0700, Avi Kivity wrote:
>>  --- kvm.orig/include/asm-x86/kvm_host.h
>> +++ kvm/include/asm-x86/kvm_host.h
>> @@ -201,6 +201,7 @@ struct kvm_mmu_page {
>>  u64 *parent_pte;   /* !multimapped */
>>  struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
>>  };
>> +DECLARE_BITMAP(unsync_child_bitmap, 512);
>>  };
>>  
>
> Later, we can throw this bitmap out to a separate object. 

Yeah.

> Also, it may make sense to replace it with an array of u16s.

Why?
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [patch 10/10] KVM: MMU: speed up mmu_unsync_walk

2008-09-19 Thread Avi Kivity

Marcelo Tosatti wrote:

Cache the unsynced children information in a per-page bitmap.

 static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp)
 {
@@ -946,33 +978,57 @@ static void nonpaging_invlpg(struct kvm_
 static int mmu_unsync_walk(struct kvm_mmu_page *parent, mmu_unsync_fn fn,
   void *priv)
 {
-   int i, ret;
-   struct kvm_mmu_page *sp = parent;
+   int ret, level, i;
+   u64 ent;
+   struct kvm_mmu_page *sp, *child;
+   struct walk {
+   struct kvm_mmu_page *sp;
+   int pos;
+   } walk[PT64_ROOT_LEVEL];
 
-	while (parent->unsync_children) {

-   for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
-   u64 ent = sp->spt[i];
+   WARN_ON(parent->role.level == PT_PAGE_TABLE_LEVEL);
+
+   if (!parent->unsync_children)
+   return 0;
+
+   memset(&walk, 0, sizeof(walk));
+   level = parent->role.level;
+   walk[level-1].sp = parent;
+
+   do {
+   sp = walk[level-1].sp;
+   i = find_next_bit(sp->unsync_child_bitmap, 512, 
walk[level-1].pos);
+   if (i < 512) {
+   walk[level-1].pos = i+1;
+   ent = sp->spt[i];
 
 			if (is_shadow_present_pte(ent)) {

-   struct kvm_mmu_page *child;
child = page_header(ent & PT64_BASE_ADDR_MASK);
 
 if (child->unsync_children) {

-   sp = child;
-   break;
+   --level;
+   walk[level-1].sp = child;
+   walk[level-1].pos = 0;
+   continue;
}
if (child->unsync) {
ret = fn(child, priv);
+   __clear_bit(i, sp->unsync_child_bitmap);
if (ret)
return ret;
}
}
+   __clear_bit(i, sp->unsync_child_bitmap);
+   } else {
+   ++level;
+   if (find_first_bit(sp->unsync_child_bitmap, 512) == 
512) {
+   sp->unsync_children = 0;
+   if (level-1 < PT64_ROOT_LEVEL)
+   walk[level-1].pos = 0;
+   }
}
-   if (i == PT64_ENT_PER_PAGE) {
-   sp->unsync_children = 0;
-   sp = parent;
-   }
-   }
+   } while (level <= parent->role.level);
+
return 0;
 }
  




 
--- kvm.orig/include/asm-x86/kvm_host.h

+++ kvm/include/asm-x86/kvm_host.h
@@ -201,6 +201,7 @@ struct kvm_mmu_page {
u64 *parent_pte;   /* !multimapped */
struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
};
+   DECLARE_BITMAP(unsync_child_bitmap, 512);
 };
 


Later, we can throw this bitmap out to a separate object.  Also, it may 
make sense to replace it with an array of u16s.


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch 10/10] KVM: MMU: speed up mmu_unsync_walk

2008-09-18 Thread Marcelo Tosatti
Cache the unsynced children information in a per-page bitmap.


Index: kvm/arch/x86/kvm/mmu.c
===
--- kvm.orig/arch/x86/kvm/mmu.c
+++ kvm/arch/x86/kvm/mmu.c
@@ -924,6 +924,38 @@ static void mmu_parent_walk(struct kvm_v
} while (level > start_level-1);
 }
 
+static void kvm_mmu_update_unsync_bitmap(u64 *spte)
+{
+   unsigned int index;
+   struct kvm_mmu_page *sp = page_header(__pa(spte));
+
+   index = spte - sp->spt;
+   __set_bit(index, sp->unsync_child_bitmap);
+   sp->unsync_children = 1;
+}
+
+static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
+{
+   struct kvm_pte_chain *pte_chain;
+   struct hlist_node *node;
+   int i;
+
+   if (!sp->parent_pte)
+   return;
+
+   if (!sp->multimapped) {
+   kvm_mmu_update_unsync_bitmap(sp->parent_pte);
+   return;
+   }
+
+   hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link)
+   for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
+   if (!pte_chain->parent_ptes[i])
+   break;
+   kvm_mmu_update_unsync_bitmap(pte_chain->parent_ptes[i]);
+   }
+}
+
 static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp)
 {
@@ -946,33 +978,57 @@ static void nonpaging_invlpg(struct kvm_
 static int mmu_unsync_walk(struct kvm_mmu_page *parent, mmu_unsync_fn fn,
   void *priv)
 {
-   int i, ret;
-   struct kvm_mmu_page *sp = parent;
+   int ret, level, i;
+   u64 ent;
+   struct kvm_mmu_page *sp, *child;
+   struct walk {
+   struct kvm_mmu_page *sp;
+   int pos;
+   } walk[PT64_ROOT_LEVEL];
 
-   while (parent->unsync_children) {
-   for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
-   u64 ent = sp->spt[i];
+   WARN_ON(parent->role.level == PT_PAGE_TABLE_LEVEL);
+
+   if (!parent->unsync_children)
+   return 0;
+
+   memset(&walk, 0, sizeof(walk));
+   level = parent->role.level;
+   walk[level-1].sp = parent;
+
+   do {
+   sp = walk[level-1].sp;
+   i = find_next_bit(sp->unsync_child_bitmap, 512, 
walk[level-1].pos);
+   if (i < 512) {
+   walk[level-1].pos = i+1;
+   ent = sp->spt[i];
 
if (is_shadow_present_pte(ent)) {
-   struct kvm_mmu_page *child;
child = page_header(ent & PT64_BASE_ADDR_MASK);
 
if (child->unsync_children) {
-   sp = child;
-   break;
+   --level;
+   walk[level-1].sp = child;
+   walk[level-1].pos = 0;
+   continue;
}
if (child->unsync) {
ret = fn(child, priv);
+   __clear_bit(i, sp->unsync_child_bitmap);
if (ret)
return ret;
}
}
+   __clear_bit(i, sp->unsync_child_bitmap);
+   } else {
+   ++level;
+   if (find_first_bit(sp->unsync_child_bitmap, 512) == 
512) {
+   sp->unsync_children = 0;
+   if (level-1 < PT64_ROOT_LEVEL)
+   walk[level-1].pos = 0;
+   }
}
-   if (i == PT64_ENT_PER_PAGE) {
-   sp->unsync_children = 0;
-   sp = parent;
-   }
-   }
+   } while (level <= parent->role.level);
+
return 0;
 }
 
@@ -1037,6 +1093,13 @@ static void mmu_sync_children(struct kvm
cond_resched_lock(&vcpu->kvm->mmu_lock);
 }
 
+static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+{
+   sp->unsync_children = 1;
+   kvm_mmu_update_parents_unsync(sp);
+   return 1;
+}
+
 static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 gfn_t gfn,
 gva_t gaddr,
@@ -1075,10 +1138,11 @@ static struct kvm_mmu_page *kvm_mmu_get_
if (sp->role.word != role.word)
continue;
 
-   if (sp->unsync_children)
-   vcpu->arch.mmu.need_root_sync = 1;
-
mmu_page_add_parent_pte(vcpu, sp, parent_pte);
+