[PATCH 1/2] kvm/e500v2: Remove shadow tlb

2010-09-08 Thread Liu Yu
It is unnecessary to keep shadow tlb.
first, shadow tlb keep fixed value in shadow, which make things unflexible.
second, remove shadow tlb can save a lot memory.

This patch remove shadow tlb and caculate the shadow tlb entry value
before we write it to hardware.

Also we use new struct tlbe_ref to trace the relation
between guest tlb entry and page.

Signed-off-by: Liu Yu yu@freescale.com
---
 arch/powerpc/include/asm/kvm_e500.h |7 +-
 arch/powerpc/kvm/e500_tlb.c |  287 +--
 2 files changed, 108 insertions(+), 186 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_e500.h 
b/arch/powerpc/include/asm/kvm_e500.h
index 7fea26f..cb785f9 100644
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -29,13 +29,18 @@ struct tlbe{
u32 mas7;
 };
 
+struct tlbe_ref {
+   struct page *page;
+   struct tlbe *gtlbe;
+};
+
 struct kvmppc_vcpu_e500 {
/* Unmodified copy of the guest's TLB. */
struct tlbe *guest_tlb[E500_TLB_NUM];
/* TLB that's actually used when the guest is running. */
struct tlbe *shadow_tlb[E500_TLB_NUM];
/* Pages which are referenced in the shadow TLB. */
-   struct page **shadow_pages[E500_TLB_NUM];
+   struct tlbe_ref *shadow_refs[E500_TLB_NUM];
 
unsigned int guest_tlb_size[E500_TLB_NUM];
unsigned int shadow_tlb_size[E500_TLB_NUM];
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index f11ca0f..0b657af 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
+ * Copyright (C) 2008, 2010 Freescale Semiconductor, Inc. All rights reserved.
  *
  * Author: Yu Liu, yu@freescale.com
  *
@@ -48,17 +48,6 @@ void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
tlbe-mas3, tlbe-mas7);
}
}
-
-   for (tlbsel = 0; tlbsel  2; tlbsel++) {
-   printk(Shadow TLB%d:\n, tlbsel);
-   for (i = 0; i  vcpu_e500-shadow_tlb_size[tlbsel]; i++) {
-   tlbe = vcpu_e500-shadow_tlb[tlbsel][i];
-   if (tlbe-mas1  MAS1_VALID)
-   printk( S[%d][%3d] |  %08X | %08X | %08X | 
%08X |\n,
-   tlbsel, i, tlbe-mas1, tlbe-mas2,
-   tlbe-mas3, tlbe-mas7);
-   }
-   }
 }
 
 static inline unsigned int tlb0_get_next_victim(
@@ -121,10 +110,8 @@ static inline void __write_host_tlbe(struct tlbe *stlbe)
 }
 
 static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
-   int tlbsel, int esel)
+   int tlbsel, int esel, struct tlbe *stlbe)
 {
-   struct tlbe *stlbe = vcpu_e500-shadow_tlb[tlbsel][esel];
-
local_irq_disable();
if (tlbsel == 0) {
__write_host_tlbe(stlbe);
@@ -139,28 +126,12 @@ static inline void write_host_tlbe(struct 
kvmppc_vcpu_e500 *vcpu_e500,
mtspr(SPRN_MAS0, mas0);
}
local_irq_enable();
+   trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe-mas1, stlbe-mas2,
+   stlbe-mas3, stlbe-mas7);
 }
 
 void kvmppc_e500_tlb_load(struct kvm_vcpu *vcpu, int cpu)
 {
-   struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
-   int i;
-   unsigned register mas0;
-
-   /* Load all valid TLB1 entries to reduce guest tlb miss fault */
-   local_irq_disable();
-   mas0 = mfspr(SPRN_MAS0);
-   for (i = 0; i  tlb1_max_shadow_size(); i++) {
-   struct tlbe *stlbe = vcpu_e500-shadow_tlb[1][i];
-
-   if (get_tlb_v(stlbe)) {
-   mtspr(SPRN_MAS0, MAS0_TLBSEL(1)
-   | MAS0_ESEL(to_htlb1_esel(i)));
-   __write_host_tlbe(stlbe);
-   }
-   }
-   mtspr(SPRN_MAS0, mas0);
-   local_irq_enable();
 }
 
 void kvmppc_e500_tlb_put(struct kvm_vcpu *vcpu)
@@ -202,16 +173,19 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 
*vcpu_e500,
 }
 
 static void kvmppc_e500_shadow_release(struct kvmppc_vcpu_e500 *vcpu_e500,
-   int tlbsel, int esel)
+   int stlbsel, int sesel)
 {
-   struct tlbe *stlbe = vcpu_e500-shadow_tlb[tlbsel][esel];
-   struct page *page = vcpu_e500-shadow_pages[tlbsel][esel];
+   struct tlbe_ref *ref;
+   struct page *page;
+
+   ref = vcpu_e500-shadow_refs[stlbsel][sesel];
+   page = ref-page;
 
if (page) {
-   vcpu_e500-shadow_pages[tlbsel][esel] = NULL;
+   ref-page = NULL;
 
-   if (get_tlb_v(stlbe)) {
-   if (tlbe_is_writable(stlbe))
+   if (get_tlb_v(ref-gtlbe)) {
+   if (tlbe_is_writable(ref-gtlbe))
kvm_release_page_dirty(page);
else

[PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0

2010-09-08 Thread Liu Yu
Current guest TLB1 is mapped to host TLB1.
As host kernel only provides 4K uncontinuous pages,
we have to break guest large mapping into 4K shadow mappings.
These 4K shadow mappings are then mapped into host TLB1 on fly.
As host TLB1 only has 13 free entries, there's serious tlb miss.

Since e500v2 has a big number of TLB0 entries,
it should be help to map those 4K shadow mappings to host TLB0.
To achieve this, we need to unlink guest tlb and host tlb,
So that guest TLB1 mappings can route to any host TLB0 entries freely.

Pages/mappings are considerred in the same kind as host tlb entry.
This patch remove the link between pages and guest tlb entry to do the unlink.
And keep host_tlb0_ref in each vcpu to trace pages.
Then it's easy to map guest TLB1 to host TLB0.

In guest ramdisk boot test(guest mainly uses TLB1),
with this patch, the tlb miss number get down 90%.

Signed-off-by: Liu Yu yu@freescale.com
---
 arch/powerpc/include/asm/kvm_e500.h |7 +-
 arch/powerpc/kvm/e500.c |4 +
 arch/powerpc/kvm/e500_tlb.c |  280 ---
 arch/powerpc/kvm/e500_tlb.h |1 +
 4 files changed, 104 insertions(+), 188 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_e500.h 
b/arch/powerpc/include/asm/kvm_e500.h
index cb785f9..16c0ed0 100644
--- a/arch/powerpc/include/asm/kvm_e500.h
+++ b/arch/powerpc/include/asm/kvm_e500.h
@@ -37,13 +37,10 @@ struct tlbe_ref {
 struct kvmppc_vcpu_e500 {
/* Unmodified copy of the guest's TLB. */
struct tlbe *guest_tlb[E500_TLB_NUM];
-   /* TLB that's actually used when the guest is running. */
-   struct tlbe *shadow_tlb[E500_TLB_NUM];
-   /* Pages which are referenced in the shadow TLB. */
-   struct tlbe_ref *shadow_refs[E500_TLB_NUM];
+   /* Pages which are referenced in host TLB. */
+   struct tlbe_ref *host_tlb0_ref;
 
unsigned int guest_tlb_size[E500_TLB_NUM];
-   unsigned int shadow_tlb_size[E500_TLB_NUM];
unsigned int guest_tlb_nv[E500_TLB_NUM];
 
u32 host_pid[E500_PID_NUM];
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index e8a00b0..14af6d7 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -146,6 +146,10 @@ static int __init kvmppc_e500_init(void)
if (r)
return r;
 
+   r = kvmppc_e500_mmu_init();
+   if (r)
+   return r;
+
/* copy extra E500 exception handlers */
ivor[0] = mfspr(SPRN_IVOR32);
ivor[1] = mfspr(SPRN_IVOR33);
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 0b657af..a6c2320 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -25,9 +25,15 @@
 #include e500_tlb.h
 #include trace.h
 
-#define to_htlb1_esel(esel) (tlb1_entry_num - (esel) - 1)
+static unsigned int host_tlb0_entry_num;
+static unsigned int host_tlb0_assoc;
+static unsigned int host_tlb0_assoc_bit;
 
-static unsigned int tlb1_entry_num;
+static inline unsigned int get_tlb0_entry_offset(u32 eaddr, u32 esel)
+{
+   return ((eaddr  0x7F000)  (12 - host_tlb0_assoc_bit) |
+   (esel  (host_tlb0_assoc - 1)));
+}
 
 void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
 {
@@ -62,11 +68,6 @@ static inline unsigned int tlb0_get_next_victim(
return victim;
 }
 
-static inline unsigned int tlb1_max_shadow_size(void)
-{
-   return tlb1_entry_num - tlbcam_index;
-}
-
 static inline int tlbe_is_writable(struct tlbe *tlbe)
 {
return tlbe-mas3  (MAS3_SW|MAS3_UW);
@@ -100,7 +101,7 @@ static inline u32 e500_shadow_mas2_attrib(u32 mas2, int 
usermode)
 /*
  * writing shadow tlb entry to host TLB
  */
-static inline void __write_host_tlbe(struct tlbe *stlbe)
+static inline void __host_tlbe_write(struct tlbe *stlbe)
 {
mtspr(SPRN_MAS1, stlbe-mas1);
mtspr(SPRN_MAS2, stlbe-mas2);
@@ -109,25 +110,22 @@ static inline void __write_host_tlbe(struct tlbe *stlbe)
__asm__ __volatile__ (tlbwe\n : : );
 }
 
-static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
-   int tlbsel, int esel, struct tlbe *stlbe)
+static inline u32 host_tlb0_write(struct kvmppc_vcpu_e500 *vcpu_e500,
+   u32 gvaddr, struct tlbe *stlbe)
 {
-   local_irq_disable();
-   if (tlbsel == 0) {
-   __write_host_tlbe(stlbe);
-   } else {
-   unsigned register mas0;
+   unsigned register mas0;
 
-   mas0 = mfspr(SPRN_MAS0);
+   local_irq_disable();
 
-   mtspr(SPRN_MAS0, MAS0_TLBSEL(1) | 
MAS0_ESEL(to_htlb1_esel(esel)));
-   __write_host_tlbe(stlbe);
+   mas0 = mfspr(SPRN_MAS0);
+   __host_tlbe_write(stlbe);
 
-   mtspr(SPRN_MAS0, mas0);
-   }
local_irq_enable();
-   trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe-mas1, stlbe-mas2,
+
+   trace_kvm_stlb_write(mas0, stlbe-mas1, stlbe-mas2,
stlbe-mas3, stlbe-mas7);
+
+  

[PATCH 0/2] kvm/e500v2: MMU optimization

2010-09-08 Thread Liu Yu
The patchset aims at mapping guest TLB1 to host TLB0.
And it includes:
[PATCH 1/2] kvm/e500v2: Remove shadow tlb
[PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0

The reason we need patch 1 is because patch 1 make things simple and flexible.
Only applying patch 1 aslo make kvm work.

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] kvm/e500v2: Remove shadow tlb

2010-09-08 Thread Hollis Blanchard

On 09/08/2010 02:40 AM, Liu Yu wrote:

It is unnecessary to keep shadow tlb.
first, shadow tlb keep fixed value in shadow, which make things unflexible.
second, remove shadow tlb can save a lot memory.

This patch remove shadow tlb and caculate the shadow tlb entry value
before we write it to hardware.

Also we use new struct tlbe_ref to trace the relation
between guest tlb entry and page.


Did you look at the performance impact?

Back in the day, we did essentially the same thing on 440. However, 
rather than discard the whole TLB when context switching away from the 
host (to be demand-faulted when the guest is resumed), we found a 
noticeable performance improvement by preserving a shadow TLB across 
context switches. We only use it in the vcpu_put/vcpu_load path.


Of course, our TLB was much smaller (64 entries), so the use model may 
not be the same at all (e.g. it takes longer to restore a full guest TLB 
working set, but maybe it's not really possible to use all 1024 TLB0 
entries in one timeslice anyways).


--
Hollis Blanchard
Mentor Graphics, Embedded Systems Division
--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/2] kvm/e500v2: MMU optimization

2010-09-08 Thread Hollis Blanchard

On 09/08/2010 02:40 AM, Liu Yu wrote:

The patchset aims at mapping guest TLB1 to host TLB0.
And it includes:
[PATCH 1/2] kvm/e500v2: Remove shadow tlb
[PATCH 2/2] kvm/e500v2: mapping guest TLB1 to host TLB0

The reason we need patch 1 is because patch 1 make things simple and flexible.
Only applying patch 1 aslo make kvm work.


I've always thought the best long-term optimization on these cores is 
to share in the host PID allocation (i.e. __init_new_context()). This 
way, the TID in guest mappings would not overlap the TID in host 
mappings, and guest mappings could be demand-faulted rather than swapped 
wholesale. To do that, you would need to track the host PID in KVM data 
structures, I guess in the tlbe_ref structure.


--
Hollis Blanchard
Mentor Graphics, Embedded Systems Division
--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html