From ac4dd1782b9f0f51e0c366a1b8db4515d6828df8 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng.yang@intel.com>
Date: Tue, 23 Oct 2007 12:34:42 +0800
Subject: [PATCH] Enable memory mapped TPR shadow(FlexPriority)

This patch based on CR8/TPR patch before, and enable the TPR
shadow(FlexPriority) for 32bit Windows. Since TPR is accessed
very frequently by 32bit Windows, especially SMP guest, with
FlexPriority enabled, we saw significant performance gain.

Signed-off-by: Sheng Yang <sheng.yang@intel.com>
---
 drivers/kvm/kvm.h         |    8 +++-
 drivers/kvm/kvm_main.c    |   35 +++++++++++----
 drivers/kvm/vmx.c         |  105 +++++++++++++++++++++++++++++++++++++++++---
 drivers/kvm/vmx.h         |    3 +
 drivers/kvm/x86_emulate.c |   11 +++++
 drivers/kvm/x86_emulate.h |    4 ++
 6 files changed, 147 insertions(+), 19 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 08b5b21..0751f8e 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -379,6 +379,7 @@ struct kvm {
 	struct kvm_pic *vpic;
 	struct kvm_ioapic *vioapic;
 	int round_robin_prev_vcpu;
+	struct page *apic_access_page;
 };
 
 static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
@@ -503,6 +504,11 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
 void kvm_mmu_zap_all(struct kvm *kvm);
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
 
+int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
+				  struct
+				  kvm_userspace_memory_region *mem,
+				  int user_alloc);
+
 hpa_t gpa_to_hpa(struct kvm *kvm, gpa_t gpa);
 #define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
 #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
@@ -535,7 +541,7 @@ enum emulation_result {
 };
 
 int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
-			unsigned long cr2, u16 error_code, int no_decode);
+			unsigned long cr2, u16 error_code, int cmd_type);
 void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
 void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 6f7b31e..afcd84b 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -643,10 +643,10 @@ EXPORT_SYMBOL_GPL(fx_init);
  *
  * Discontiguous memory is allowed, mostly for framebuffers.
  */
-static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
-					  struct
-					  kvm_userspace_memory_region *mem,
-					  int user_alloc)
+int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
+				  struct
+				  kvm_userspace_memory_region *mem,
+				  int user_alloc)
 {
 	int r;
 	gfn_t base_gfn;
@@ -776,6 +776,7 @@ out_unlock:
 out:
 	return r;
 }
+EXPORT_SYMBOL_GPL(kvm_vm_ioctl_set_memory_region);
 
 static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
 					  u32 kvm_nr_mmu_pages)
@@ -1252,14 +1253,21 @@ static int emulator_read_emulated(unsigned long addr,
 		memcpy(val, vcpu->mmio_data, bytes);
 		vcpu->mmio_read_completed = 0;
 		return X86EMUL_CONTINUE;
-	} else if (emulator_read_std(addr, val, bytes, vcpu)
-		   == X86EMUL_CONTINUE)
-		return X86EMUL_CONTINUE;
+	}
 
 	gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
+
+	/* For APIC access vmexit */
+	if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
+		goto mmio;
+
+	if (emulator_read_std(addr, val, bytes, vcpu)
+			== X86EMUL_CONTINUE)
+		return X86EMUL_CONTINUE;
 	if (gpa == UNMAPPED_GVA)
 		return X86EMUL_PROPAGATE_FAULT;
 
+mmio:
 	/*
 	 * Is this MMIO handled locally?
 	 */
@@ -1297,6 +1305,10 @@ static int emulator_write_emulated_onepage(unsigned long addr,
 	struct kvm_io_device *mmio_dev;
 	gpa_t                 gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
 
+	/* For APIC access vmexit */
+	if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
+		goto mmio;
+
 	if (gpa == UNMAPPED_GVA) {
 		kvm_x86_ops->inject_page_fault(vcpu, addr, 2);
 		return X86EMUL_PROPAGATE_FAULT;
@@ -1305,6 +1317,7 @@ static int emulator_write_emulated_onepage(unsigned long addr,
 	if (emulator_write_phys(vcpu, gpa, val, bytes))
 		return X86EMUL_CONTINUE;
 
+mmio:
 	/*
 	 * Is this MMIO handled locally?
 	 */
@@ -1435,7 +1448,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
 			struct kvm_run *run,
 			unsigned long cr2,
 			u16 error_code,
-			int no_decode)
+			int cmd_type)
 {
 	int r;
 
@@ -1444,8 +1457,9 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
 
 	vcpu->mmio_is_write = 0;
 	vcpu->pio.string = 0;
+	vcpu->emulate_ctxt.cmd_type = cmd_type;
 
-	if (!no_decode) {
+	if ((cmd_type & EMULCMD_NO_DECODE) == 0) {
 		int cs_db, cs_l;
 		kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
@@ -2262,7 +2276,8 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		vcpu->mmio_read_completed = 1;
 		vcpu->mmio_needed = 0;
 		r = emulate_instruction(vcpu, kvm_run,
-					vcpu->mmio_fault_cr2, 0, 1);
+					vcpu->mmio_fault_cr2, 0,
+					EMULCMD_NO_DECODE);
 		if (r == EMULATE_DO_MMIO) {
 			/*
 			 * Read-modify-write.  Back to userspace.
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 97814e4..d8155ee 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -86,6 +86,7 @@ static struct vmcs_config {
 	u32 revision_id;
 	u32 pin_based_exec_ctrl;
 	u32 cpu_based_exec_ctrl;
+	u32 cpu_based_2nd_exec_ctrl;
 	u32 vmexit_ctrl;
 	u32 vmentry_ctrl;
 } vmcs_config;
@@ -179,6 +180,29 @@ static inline int vm_need_tpr_shadow(struct kvm *kvm)
 	return ((cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)));
 }
 
+static inline int cpu_has_secondary_exec_ctrls(void)
+{
+	return (vmcs_config.cpu_based_exec_ctrl &
+		CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
+}
+
+static inline int vm_need_secondary_exec_ctrls(struct kvm *kvm)
+{
+	return ((cpu_has_secondary_exec_ctrls()) && (irqchip_in_kernel(kvm)));
+}
+
+static inline int cpu_has_vmx_virtualize_apic_accesses(void)
+{
+	return (vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
+}
+
+static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
+{
+	return ((cpu_has_vmx_virtualize_apic_accesses()) &&
+		(irqchip_in_kernel(kvm)));
+}
+
 static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
 {
 	int i;
@@ -916,6 +940,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	u32 min, opt;
 	u32 _pin_based_exec_control = 0;
 	u32 _cpu_based_exec_control = 0;
+	u32 _cpu_based_2nd_exec_control = 0;
 	u32 _vmexit_control = 0;
 	u32 _vmentry_control = 0;
 
@@ -933,11 +958,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	      CPU_BASED_USE_IO_BITMAPS |
 	      CPU_BASED_MOV_DR_EXITING |
 	      CPU_BASED_USE_TSC_OFFSETING;
-#ifdef CONFIG_X86_64
-	opt = CPU_BASED_TPR_SHADOW;
-#else
-	opt = 0;
-#endif
+	opt = CPU_BASED_TPR_SHADOW |
+	      CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
 				&_cpu_based_exec_control) < 0)
 		return -EIO;
@@ -946,6 +968,18 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 		_cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING &
 					   ~CPU_BASED_CR8_STORE_EXITING;
 #endif
+	if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
+		min = 0;
+		opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+		if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2,
+					&_cpu_based_2nd_exec_control) < 0)
+			return -EIO;
+	}
+#ifndef CONFIG_X86_64
+	if (!(_cpu_based_2nd_exec_control &
+				SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+		_cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
+#endif
 
 	min = 0;
 #ifdef CONFIG_X86_64
@@ -983,6 +1017,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 
 	vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
 	vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
+	vmcs_conf->cpu_based_2nd_exec_ctrl = _cpu_based_2nd_exec_control;
 	vmcs_conf->vmexit_ctrl         = _vmexit_control;
 	vmcs_conf->vmentry_ctrl        = _vmentry_control;
 
@@ -1421,6 +1456,26 @@ static void seg_setup(int seg)
 	vmcs_write32(sf->ar_bytes, 0x93);
 }
 
+static int alloc_apic_access_page(struct kvm *kvm)
+{
+	struct kvm_userspace_memory_region kvm_userspace_mem;
+	int r;
+
+	r = -EFAULT;
+	/* Top memslot for apic access page */
+	if (kvm->nmemslots == KVM_MEMORY_SLOTS)
+		return r;
+	kvm_userspace_mem.slot = kvm->nmemslots + 1;
+	kvm_userspace_mem.flags = 0;
+	kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL;
+	kvm_userspace_mem.memory_size = PAGE_SIZE;
+	r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem, 0);
+	if (r)
+		return r;
+	kvm->apic_access_page = gfn_to_page(kvm, 0xfee00);
+	return 0;
+}
+
 /*
  * Sets up the vmcs for emulated real mode.
  */
@@ -1452,8 +1507,14 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 				CPU_BASED_CR8_LOAD_EXITING;
 #endif
 	}
+	if (!vm_need_secondary_exec_ctrls(vmx->vcpu.kvm))
+		exec_control &= ~CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
 
+	if (vm_need_secondary_exec_ctrls(vmx->vcpu.kvm))
+		vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
+			     vmcs_config.cpu_based_2nd_exec_ctrl);
+
 	vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf);
 	vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf);
 	vmcs_write32(CR3_TARGET_COUNT, 0);           /* 22.2.1 */
@@ -1522,6 +1583,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
 	vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
 
+	if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
+		if (alloc_apic_access_page(vmx->vcpu.kvm) != 0)
+			return -ENOMEM;
+
 	return 0;
 }
 
@@ -1610,13 +1675,15 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);  /* 22.2.1 */
 
-#ifdef CONFIG_X86_64
 	vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 0);
 	if (vm_need_tpr_shadow(vmx->vcpu.kvm))
 		vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
 			     page_to_phys(vmx->vcpu.apic->regs_page));
 	vmcs_write32(TPR_THRESHOLD, 0);
-#endif
+
+	if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
+		vmcs_write64(APIC_ACCESS_ADDR,
+			     page_to_phys(vmx->vcpu.kvm->apic_access_page));
 
 	vmx->vcpu.cr0 = 0x60000010;
 	vmx_set_cr0(&vmx->vcpu, vmx->vcpu.cr0); /* enter rmode */
@@ -2098,6 +2165,27 @@ static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	return 1;
 }
 
+static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	u64 exit_qualification;
+	enum emulation_result er;
+	unsigned long offset;
+
+	exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
+	offset = exit_qualification & 0xffful;
+
+	er = emulate_instruction(vcpu, kvm_run, 0, 0, EMULCMD_DECODE_ADDR);
+
+	if (er !=  EMULATE_DONE) {
+		printk(KERN_ERR
+		       "Fail to handle apic access vmexit! Offset is 0x%lx\n",
+		       offset);
+		return -ENOTSUPP;
+	}
+	return 1;
+}
+
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -2117,7 +2205,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
 	[EXIT_REASON_PENDING_INTERRUPT]       = handle_interrupt_window,
 	[EXIT_REASON_HLT]                     = handle_halt,
 	[EXIT_REASON_VMCALL]                  = handle_vmcall,
-	[EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold
+	[EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
+	[EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
 };
 
 static const int kvm_vmx_max_exit_handlers =
diff --git a/drivers/kvm/vmx.h b/drivers/kvm/vmx.h
index 270d477..11d7341 100644
--- a/drivers/kvm/vmx.h
+++ b/drivers/kvm/vmx.h
@@ -89,6 +89,8 @@ enum vmcs_field {
 	TSC_OFFSET_HIGH                 = 0x00002011,
 	VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
 	VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
+	APIC_ACCESS_ADDR		= 0x00002014,
+	APIC_ACCESS_ADDR_HIGH		= 0x00002015,
 	VMCS_LINK_POINTER               = 0x00002800,
 	VMCS_LINK_POINTER_HIGH          = 0x00002801,
 	GUEST_IA32_DEBUGCTL             = 0x00002802,
@@ -214,6 +216,7 @@ enum vmcs_field {
 #define EXIT_REASON_MSR_WRITE           32
 #define EXIT_REASON_MWAIT_INSTRUCTION   36
 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS         44
 
 /*
  * Interruption-information format
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index f858c01..d24166e 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -845,6 +845,11 @@ modrm_done:
 			c->src.type = OP_REG;
 			break;
 		}
+		if (((ctxt->cmd_type & EMULCMD_DECODE_ADDR) != 0) &&
+		    (c->modrm_ea == 0)) {
+			ctxt->cr2 = insn_fetch(u32, c->src.bytes, c->eip);
+			c->eip -= c->src.bytes;
+		}
 		c->src.type = OP_MEM;
 		break;
 	case SrcImm:
@@ -906,6 +911,12 @@ modrm_done:
 		}
 		break;
 	case DstMem:
+		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
+		if (((ctxt->cmd_type & EMULCMD_DECODE_ADDR) != 0) &&
+		    (c->modrm_ea == 0)) {
+			ctxt->cr2 = insn_fetch(u32, c->dst.bytes, c->eip);
+			c->eip -= c->dst.bytes;
+		}
 		/*
 		 * For instructions with a ModR/M byte, switch to register
 		 * access if Mod = 3.
diff --git a/drivers/kvm/x86_emulate.h b/drivers/kvm/x86_emulate.h
index f03b128..5185916 100644
--- a/drivers/kvm/x86_emulate.h
+++ b/drivers/kvm/x86_emulate.h
@@ -153,6 +153,10 @@ struct x86_emulate_ctxt {
 	/* Emulated execution mode, represented by an X86EMUL_MODE value. */
 	int mode;
 
+#define EMULCMD_NO_DECODE   (1 << 0)
+#define EMULCMD_DECODE_ADDR (1 << 1)
+	int cmd_type;
+
 	unsigned long cs_base;
 	unsigned long ds_base;
 	unsigned long es_base;
-- 
1.5.2

