Subject: [patch] KVM: cr3 cache support
From: Ingo Molnar <[EMAIL PROTECTED]>

this enables a KVM-aware Linux guest to make use of VMX CPU's cr3 cache 
feature. The result is cheaper context-switches and faster TLB flushes.

Signed-off-by: Ingo Molnar <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h         |   17 ++-
 drivers/kvm/kvm_main.c    |    2 
 drivers/kvm/mmu.c         |  123 +++++++++++++++++--------
 drivers/kvm/paging_tmpl.h |    2 
 drivers/kvm/vmx.c         |  224 ++++++++++++++++++++++++++++++++++++++--------
 drivers/kvm/vmx.h         |    1 
 6 files changed, 283 insertions(+), 86 deletions(-)

Index: linux/drivers/kvm/kvm.h
===================================================================
--- linux.orig/drivers/kvm/kvm.h
+++ linux/drivers/kvm/kvm.h
@@ -52,8 +52,8 @@
 
 #define KVM_MAX_VCPUS 1
 #define KVM_MEMORY_SLOTS 4
-#define KVM_NUM_MMU_PAGES 256
-#define KVM_MIN_FREE_MMU_PAGES 5
+#define KVM_NUM_MMU_PAGES 1024
+#define KVM_MIN_FREE_MMU_PAGES 10
 #define KVM_REFILL_PAGES 25
 
 #define FX_IMAGE_SIZE 512
@@ -166,7 +166,7 @@ struct kvm_mmu {
        int root_level;
        int shadow_root_level;
 
-       u64 *pae_root;
+       u64 *pae_root[KVM_CR3_CACHE_SIZE];
 };
 
 #define KVM_NR_MEM_OBJS 20
@@ -240,6 +240,9 @@ struct kvm_vcpu {
        unsigned long cr3;
        struct kvm_vcpu_para_state *para_state;
        hpa_t vm_syscall_hpa;
+       unsigned int cr3_cache_idx;
+       unsigned int cr3_cache_limit;
+       gpa_t guest_cr3_gpa[KVM_CR3_CACHE_SIZE];
        unsigned long cr4;
        unsigned long cr8;
        u64 pdptrs[4]; /* pae */
@@ -400,6 +403,8 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu)
 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot);
 
+void kvm_cr3_cache_clear(struct kvm_vcpu *vcpu);
+
 hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa);
 #define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
 #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
@@ -445,9 +450,9 @@ int emulator_set_dr(struct x86_emulate_c
                    unsigned long value);
 
 void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
-void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr0);
-void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr0);
-void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr0);
+void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
+void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
+void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
 void lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
 
 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
Index: linux/drivers/kvm/kvm_main.c
===================================================================
--- linux.orig/drivers/kvm/kvm_main.c
+++ linux/drivers/kvm/kvm_main.c
@@ -447,7 +447,7 @@ EXPORT_SYMBOL_GPL(set_cr4);
 void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
        if (is_long_mode(vcpu)) {
-               if ( cr3 & CR3_L_MODE_RESEVED_BITS) {
+               if (cr3 & CR3_L_MODE_RESEVED_BITS) {
                        printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n");
                        inject_gp(vcpu);
                        return;
Index: linux/drivers/kvm/mmu.c
===================================================================
--- linux.orig/drivers/kvm/mmu.c
+++ linux/drivers/kvm/mmu.c
@@ -782,7 +782,7 @@ static int nonpaging_map(struct kvm_vcpu
 
 static void mmu_free_roots(struct kvm_vcpu *vcpu)
 {
-       int i;
+       int i, j;
        struct kvm_mmu_page *page;
 
 #ifdef CONFIG_X86_64
@@ -796,21 +796,40 @@ static void mmu_free_roots(struct kvm_vc
                return;
        }
 #endif
-       for (i = 0; i < 4; ++i) {
-               hpa_t root = vcpu->mmu.pae_root[i];
+       /*
+        * Skip to the next cr3 filter entry and free it (if it's occupied):
+        */
+       vcpu->cr3_cache_idx++;
+       if (unlikely(vcpu->cr3_cache_idx >= vcpu->cr3_cache_limit))
+               vcpu->cr3_cache_idx = 0;
 
-               ASSERT(VALID_PAGE(root));
-               root &= PT64_BASE_ADDR_MASK;
-               page = page_header(root);
-               --page->root_count;
-               vcpu->mmu.pae_root[i] = INVALID_PAGE;
+       j = vcpu->cr3_cache_idx;
+       /*
+        * Clear the guest-visible entry:
+        */
+       if (vcpu->para_state) {
+               vcpu->para_state->cr3_cache.entry[j].guest_cr3 = 0;
+               vcpu->para_state->cr3_cache.entry[j].host_cr3 = 0;
+       }
+       ASSERT(vcpu->mmu.pae_root[j]);
+       if (VALID_PAGE(vcpu->mmu.pae_root[j][0])) {
+               vcpu->guest_cr3_gpa[j] = INVALID_PAGE;
+               for (i = 0; i < 4; ++i) {
+                       hpa_t root = vcpu->mmu.pae_root[j][i];
+
+                       ASSERT(VALID_PAGE(root));
+                       root &= PT64_BASE_ADDR_MASK;
+                       page = page_header(root);
+                       --page->root_count;
+                       vcpu->mmu.pae_root[j][i] = INVALID_PAGE;
+               }
        }
        vcpu->mmu.root_hpa = INVALID_PAGE;
 }
 
 static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
 {
-       int i;
+       int i, j;
        gfn_t root_gfn;
        struct kvm_mmu_page *page;
 
@@ -829,8 +848,10 @@ static void mmu_alloc_roots(struct kvm_v
                return;
        }
 #endif
+
+       j = vcpu->cr3_cache_idx;
        for (i = 0; i < 4; ++i) {
-               hpa_t root = vcpu->mmu.pae_root[i];
+               hpa_t root = vcpu->mmu.pae_root[j][i];
 
                ASSERT(!VALID_PAGE(root));
                if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL)
@@ -842,9 +863,14 @@ static void mmu_alloc_roots(struct kvm_v
                                        NULL);
                root = page->page_hpa;
                ++page->root_count;
-               vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK;
+               vcpu->mmu.pae_root[j][i] = root | PT_PRESENT_MASK;
        }
-       vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root);
+       vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root[j]);
+       /*
+        * Store the guest-side address too, we need it if a guest
+        * exits the VM, to rediscover what cr3 it changed to:
+        */
+       vcpu->guest_cr3_gpa[j] = vcpu->cr3;
 }
 
 static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
@@ -877,7 +903,13 @@ static int nonpaging_page_fault(struct k
 
 static void nonpaging_free(struct kvm_vcpu *vcpu)
 {
-       mmu_free_roots(vcpu);
+       int j;
+
+       /*
+        * This will cycle through all existing roots and free them:
+        */
+       for (j = 0; j < KVM_CR3_CACHE_SIZE; j++)
+               mmu_free_roots(vcpu);
 }
 
 static int nonpaging_init_context(struct kvm_vcpu *vcpu)
@@ -896,20 +928,17 @@ static int nonpaging_init_context(struct
        return 0;
 }
 
-static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
-{
-       ++kvm_stat.tlb_flush;
-       kvm_arch_ops->tlb_flush(vcpu);
-}
-
 static void paging_new_cr3(struct kvm_vcpu *vcpu)
 {
        pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3);
+
        mmu_free_roots(vcpu);
        if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
                kvm_mmu_free_some_pages(vcpu);
        mmu_alloc_roots(vcpu);
-       kvm_mmu_flush_tlb(vcpu);
+       /*
+        * Setting the cr3 will flush the TLB:
+        */
        kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
 }
 
@@ -1194,6 +1223,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_free_some_page
 static void free_mmu_pages(struct kvm_vcpu *vcpu)
 {
        struct kvm_mmu_page *page;
+       int j;
 
        while (!list_empty(&vcpu->kvm->active_mmu_pages)) {
                page = container_of(vcpu->kvm->active_mmu_pages.next,
@@ -1207,13 +1237,17 @@ static void free_mmu_pages(struct kvm_vc
                __free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT));
                page->page_hpa = INVALID_PAGE;
        }
-       free_page((unsigned long)vcpu->mmu.pae_root);
+       for (j = 0; j < KVM_CR3_CACHE_SIZE; j++) {
+               ASSERT(vcpu->mmu.pae_root[j]);
+               free_page((unsigned long)vcpu->mmu.pae_root[j]);
+               vcpu->mmu.pae_root[j] = NULL;
+       }
 }
 
 static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
 {
        struct page *page;
-       int i;
+       int i, j;
 
        ASSERT(vcpu);
 
@@ -1230,17 +1264,22 @@ static int alloc_mmu_pages(struct kvm_vc
                ++vcpu->kvm->n_free_mmu_pages;
        }
 
-       /*
-        * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
-        * Therefore we need to allocate shadow page tables in the first
-        * 4GB of memory, which happens to fit the DMA32 zone.
-        */
-       page = alloc_page(GFP_KERNEL | __GFP_DMA32);
-       if (!page)
-               goto error_1;
-       vcpu->mmu.pae_root = page_address(page);
-       for (i = 0; i < 4; ++i)
-               vcpu->mmu.pae_root[i] = INVALID_PAGE;
+       for (j = 0; j < KVM_CR3_CACHE_SIZE; j++) {
+               /*
+                * When emulating 32-bit mode, cr3 is only 32 bits even on
+                * x86_64. Therefore we need to allocate shadow page tables
+                * in the first 4GB of memory, which happens to fit the DMA32
+                * zone:
+                */
+               page = alloc_page(GFP_KERNEL | __GFP_DMA32);
+               if (!page)
+                       goto error_1;
+
+               ASSERT(!vcpu->mmu.pae_root[j]);
+               vcpu->mmu.pae_root[j] = page_address(page);
+               for (i = 0; i < 4; ++i)
+                       vcpu->mmu.pae_root[j][i] = INVALID_PAGE;
+       }
 
        return 0;
 
@@ -1344,15 +1383,19 @@ static void audit_mappings(struct kvm_vc
 {
        int i;
 
-       if (vcpu->mmu.root_level == 4)
+       if (vcpu->mmu.root_level == 4) {
                audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4);
-       else
-               for (i = 0; i < 4; ++i)
-                       if (vcpu->mmu.pae_root[i] & PT_PRESENT_MASK)
+               return;
+       }
+
+       for (j = 0; j < KVM_CR3_CACHE_SIZE; j++) {
+               for (i = 0; i < 4; ++i) {
+                       if (vcpu->mmu.pae_root[j][i] & PT_PRESENT_MASK) {
                                audit_mappings_page(vcpu,
-                                                   vcpu->mmu.pae_root[i],
-                                                   i << 30,
-                                                   2);
+                                       vcpu->mmu.pae_root[j][i], i << 30, 2);
+                       }
+               }
+       }
 }
 
 static int count_rmaps(struct kvm_vcpu *vcpu)
Index: linux/drivers/kvm/paging_tmpl.h
===================================================================
--- linux.orig/drivers/kvm/paging_tmpl.h
+++ linux/drivers/kvm/paging_tmpl.h
@@ -197,7 +197,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu
        shadow_addr = vcpu->mmu.root_hpa;
        level = vcpu->mmu.shadow_root_level;
        if (level == PT32E_ROOT_LEVEL) {
-               shadow_addr = vcpu->mmu.pae_root[(addr >> 30) & 3];
+               shadow_addr = vcpu->mmu.pae_root[vcpu->cr3_cache_idx][(addr >> 
30) & 3];
                shadow_addr &= PT64_BASE_ADDR_MASK;
                --level;
        }
Index: linux/drivers/kvm/vmx.c
===================================================================
--- linux.orig/drivers/kvm/vmx.c
+++ linux/drivers/kvm/vmx.c
@@ -794,9 +794,54 @@ static void vmx_set_cr0_no_modeswitch(st
        vcpu->cr0 = cr0;
 }
 
-static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
+static void print_area_in_hex(void *area, int size)
 {
-       vmcs_writel(GUEST_CR3, cr3);
+       unsigned char *data = area;
+       int i;
+
+       for (i = 0; i < size; i++, data++) {
+               if (!(i & 15))
+                       printk("\n%p:", data);
+               printk(" %02x", data[i]);
+       }
+       printk("\n");
+}
+
+/*
+ * Clear the guest side of the cr3 cache:
+ */
+void kvm_cr3_cache_clear(struct kvm_vcpu *vcpu)
+{
+       struct kvm_cr3_cache *cache;
+
+       if (!vcpu->para_state)
+               return;
+       cache = &vcpu->para_state->cr3_cache;
+       memset(cache->entry, 0, sizeof(cache->entry));
+}
+
+static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3_hpa)
+{
+       struct kvm_cr3_cache *cache;
+       int idx;
+
+       vmcs_writel(GUEST_CR3, cr3_hpa);
+       if (!vcpu->para_state)
+               return;
+
+       WARN_ON(vmcs_readl(GUEST_CR3) != vcpu->mmu.root_hpa);
+
+       idx = vcpu->cr3_cache_idx;
+       cache = &vcpu->para_state->cr3_cache;
+
+       /* NOTE: remove this check, in case of hostile guests: */
+       WARN_ON(cache->entry[idx].guest_cr3);
+       WARN_ON(cache->entry[idx].host_cr3);
+
+       cache->entry[idx].guest_cr3 = vcpu->cr3;
+       cache->entry[idx].host_cr3 = cr3_hpa;
+
+       vmcs_writel(CR3_TARGET_VALUE0 + idx*2, cr3_hpa);
 }
 
 static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@@ -973,6 +1018,42 @@ static void seg_setup(int seg)
 }
 
 /*
+ * Set up the cr3 validity hardware cache:
+ */
+static int vmcs_setup_cr3_cache(struct kvm_vcpu *vcpu)
+{
+       unsigned int cr3_target_values, i;
+       u64 msr_val;
+
+       rdmsrl(MSR_IA32_VMX_MISC, msr_val);
+
+       printk("MSR_IA32_VMX_MISC: %016Lx\n", msr_val);
+
+       /*
+        * 9 bits of "CR3 target values":
+        */
+       cr3_target_values = (msr_val >> 16) & ((1 << 10) - 1);
+       printk(" cr3 target values: %d\n", cr3_target_values);
+       if (cr3_target_values > KVM_CR3_CACHE_SIZE) {
+               printk("KVM: limiting cr3 cache size from %d to %d\n",
+                       cr3_target_values, KVM_CR3_CACHE_SIZE);
+               cr3_target_values = KVM_CR3_CACHE_SIZE;
+       }
+
+       vcpu->cr3_cache_idx = 0;
+       vcpu->cr3_cache_limit = cr3_target_values;
+       /*
+        * Initialize. TODO: set this to guest physical memory.
+        */
+       for (i = 0; i < cr3_target_values; i++)
+               vmcs_writel(CR3_TARGET_VALUE0 + i*2, -1UL);
+
+       vmcs_write32(CR3_TARGET_COUNT, cr3_target_values);
+
+       return 0;
+}
+
+/*
  * Sets up the vmcs for emulated real mode.
  */
 static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
@@ -1076,7 +1157,10 @@ static int vmx_vcpu_setup(struct kvm_vcp
        vmcs_write32(EXCEPTION_BITMAP, 1 << PF_VECTOR);
        vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
        vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
-       vmcs_write32(CR3_TARGET_COUNT, 0);           /* 22.2.1 */
+
+       ret = vmcs_setup_cr3_cache(vcpu);
+       if (ret < 0)
+               goto out;
 
        vmcs_writel(HOST_CR0, read_cr0());  /* 22.2.3 */
        vmcs_writel(HOST_CR4, read_cr4());  /* 22.2.3, 22.2.5 */
@@ -1328,6 +1412,7 @@ static int handle_exception(struct kvm_v
                cr2 = vmcs_readl(EXIT_QUALIFICATION);
 
                spin_lock(&vcpu->kvm->lock);
+               kvm_cr3_cache_clear(vcpu);
                r = kvm_mmu_page_fault(vcpu, cr2, error_code);
                if (r < 0) {
                        spin_unlock(&vcpu->kvm->lock);
@@ -1499,6 +1584,7 @@ int vcpu_register_para(struct kvm_vcpu *
                goto err_skip;
        }
 
+       para_state->cr3_cache.max_idx = vcpu->cr3_cache_limit;
        printk("KVM: para guest successfully registered.\n");
        vcpu->para_state = para_state;
        vcpu->vm_syscall_hpa = vm_syscall_hpa;
@@ -1694,6 +1780,13 @@ static int handle_halt(struct kvm_vcpu *
        return 0;
 }
 
+static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+       kvm_run->exit_reason = KVM_EXIT_DEBUG;
+//     printk("got vmcall at RIP %08lx\n", vmcs_readl(GUEST_RIP));
+       vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP)+3);
+       return 1;
+}
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -1711,6 +1804,7 @@ static int (*kvm_vmx_exit_handlers[])(st
        [EXIT_REASON_MSR_WRITE]               = handle_wrmsr,
        [EXIT_REASON_PENDING_INTERRUPT]       = handle_interrupt_window,
        [EXIT_REASON_HLT]                     = handle_halt,
+       [EXIT_REASON_VMCALL]                  = handle_vmcall,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -1755,48 +1849,43 @@ static int dm_request_for_irq_injection(
                (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
 }
 
-static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static void kvm_cr3_cache_sync(struct kvm_vcpu *vcpu)
 {
-       u8 fail;
-       u16 fs_sel, gs_sel, ldt_sel;
-       int fs_gs_ldt_reload_needed;
-       int r;
+       void *guest_cr3_hva;
+       hpa_t guest_cr3_hpa;
+       u64 *root;
+       int j;
+
+       if (!vcpu->para_state)
+               return;
+
+       guest_cr3_hpa = vmcs_readl(GUEST_CR3);
 
-again:
        /*
-        * Set host fs and gs selectors.  Unfortunately, 22.2.3 does not
-        * allow segment selectors with cpl > 0 or ti == 1.
+        * Are they in sync already?
         */
-       fs_sel = read_fs();
-       gs_sel = read_gs();
-       ldt_sel = read_ldt();
-       fs_gs_ldt_reload_needed = (fs_sel & 7) | (gs_sel & 7) | ldt_sel;
-       if (!fs_gs_ldt_reload_needed) {
-               vmcs_write16(HOST_FS_SELECTOR, fs_sel);
-               vmcs_write16(HOST_GS_SELECTOR, gs_sel);
-       } else {
-               vmcs_write16(HOST_FS_SELECTOR, 0);
-               vmcs_write16(HOST_GS_SELECTOR, 0);
-       }
-
-#ifdef CONFIG_X86_64
-       vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
-       vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
-#else
-       vmcs_writel(HOST_FS_BASE, segment_base(fs_sel));
-       vmcs_writel(HOST_GS_BASE, segment_base(gs_sel));
-#endif
+       if (guest_cr3_hpa == vcpu->mmu.root_hpa)
+               return;
 
-       do_interrupt_requests(vcpu, kvm_run);
+       guest_cr3_hva = __va(guest_cr3_hpa);
 
-       if (vcpu->guest_debug.enabled)
-               kvm_guest_debug_pre(vcpu);
+       for (j = 0; j < vcpu->cr3_cache_limit; j++) {
+               root = vcpu->mmu.pae_root[j];
+               WARN_ON(!root);
+               if (root != guest_cr3_hva)
+                       continue;
 
-       fx_save(vcpu->host_fx_image);
-       fx_restore(vcpu->guest_fx_image);
+               vcpu->cr3 = vcpu->guest_cr3_gpa[j];
+               vcpu->cr3_cache_idx = j;
+               vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root[j]);
+               break;
+       }
+       WARN_ON(j == KVM_CR3_CACHE_SIZE);
+}
 
-       save_msrs(vcpu->host_msrs, vcpu->nmsrs);
-       load_msrs(vcpu->guest_msrs, NR_BAD_MSRS);
+static int __vmx_vcpu_run(struct kvm_vcpu *vcpu)
+{
+       u8 fail;
 
        asm (
                /* Store host registers */
@@ -1917,6 +2006,64 @@ again:
                [cr2]"i"(offsetof(struct kvm_vcpu, cr2))
              : "cc", "memory" );
 
+       return fail;
+}
+
+static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+       u8 fail;
+       u16 fs_sel, gs_sel, ldt_sel;
+       int fs_gs_ldt_reload_needed;
+       int r;
+
+again:
+       /*
+        * Set host fs and gs selectors.  Unfortunately, 22.2.3 does not
+        * allow segment selectors with cpl > 0 or ti == 1.
+        */
+       fs_sel = read_fs();
+       gs_sel = read_gs();
+       ldt_sel = read_ldt();
+       fs_gs_ldt_reload_needed = (fs_sel & 7) | (gs_sel & 7) | ldt_sel;
+       if (!fs_gs_ldt_reload_needed) {
+               vmcs_write16(HOST_FS_SELECTOR, fs_sel);
+               vmcs_write16(HOST_GS_SELECTOR, gs_sel);
+       } else {
+               vmcs_write16(HOST_FS_SELECTOR, 0);
+               vmcs_write16(HOST_GS_SELECTOR, 0);
+       }
+
+#ifdef CONFIG_X86_64
+       vmcs_writel(HOST_FS_BASE, read_msr(MSR_FS_BASE));
+       vmcs_writel(HOST_GS_BASE, read_msr(MSR_GS_BASE));
+#else
+       vmcs_writel(HOST_FS_BASE, segment_base(fs_sel));
+       vmcs_writel(HOST_GS_BASE, segment_base(gs_sel));
+#endif
+
+       do_interrupt_requests(vcpu, kvm_run);
+
+       if (vcpu->guest_debug.enabled)
+               kvm_guest_debug_pre(vcpu);
+
+       fx_save(vcpu->host_fx_image);
+       fx_restore(vcpu->guest_fx_image);
+
+       save_msrs(vcpu->host_msrs, vcpu->nmsrs);
+       load_msrs(vcpu->guest_msrs, NR_BAD_MSRS);
+
+       WARN_ON(vmcs_readl(GUEST_CR3) != vcpu->mmu.root_hpa);
+
+       fail = __vmx_vcpu_run(vcpu);
+
+       /*
+        * Figure out whether vcpu->cr3 needs updating because
+        * the guest makde use of the cr3 cache:
+        */
+       kvm_cr3_cache_sync(vcpu);
+
+       WARN_ON(vmcs_readl(GUEST_CR3) != vcpu->mmu.root_hpa);
+
        ++kvm_stat.exits;
 
        save_msrs(vcpu->guest_msrs, NR_BAD_MSRS);
@@ -1987,6 +2134,7 @@ again:
 static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
 {
        vmcs_writel(GUEST_CR3, vmcs_readl(GUEST_CR3));
+       kvm_cr3_cache_clear(vcpu);
 }
 
 static void vmx_inject_page_fault(struct kvm_vcpu *vcpu,
@@ -2016,7 +2164,7 @@ static void vmx_inject_page_fault(struct
                     INTR_TYPE_EXCEPTION |
                     INTR_INFO_DELIEVER_CODE_MASK |
                     INTR_INFO_VALID_MASK);
-
+       kvm_cr3_cache_clear(vcpu);
 }
 
 static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
Index: linux/drivers/kvm/vmx.h
===================================================================
--- linux.orig/drivers/kvm/vmx.h
+++ linux/drivers/kvm/vmx.h
@@ -292,5 +292,6 @@ enum vmcs_field {
 #define MSR_IA32_VMX_PROCBASED_CTLS            0x482
 #define MSR_IA32_VMX_EXIT_CTLS         0x483
 #define MSR_IA32_VMX_ENTRY_CTLS                0x484
+#define MSR_IA32_VMX_MISC              0x485
 
 #endif

-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to