These patches enable memory mapped TPR shadow (FlexPriority).

Since TPR is accessed very frequently by 32bit Windows, especially SMP
guest, with FlexPriority enabled, we saw significant performance gain.

The issue is: FlexPriority needs to add a memory slot to the vm to make
shadow work with APIC access page.

We don't like the idea to add a memory slot, but no better choice now.
Our propose is to add p2m table to KVM, while seems this is still a long
way to go.

BTW: I didn't use the offset(or other info) provide by CPU when handling
APIC access vmexit. Instead, I used a bit in cmd_type(including
no_decode) to tell emulator decode memory operand by itself when
necessary. That's because I only got the guest physical address when
handling APIC access vmexit, but emulator need a guest virtual address
to fit its flow. I have tried some ways, and current solution seems the
most proper one.

--
>From 3e83b579d0e9368f0f8223c24eac9898b9623aa2 Mon Sep 17 00:00:00 2001
From: Sheng Yang <[EMAIL PROTECTED]>
Date: Fri, 14 Sep 2007 09:51:54 +0800
Subject: [PATCH] Add a slot for apic access usage, not elegant but no
choice

---
 user/kvmctl.c |   13 +++++++++++++
 1 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/user/kvmctl.c b/user/kvmctl.c
index f358dc1..7e75945 100644
--- a/user/kvmctl.c
+++ b/user/kvmctl.c
@@ -248,6 +248,7 @@ int kvm_create(kvm_context_t kvm, unsigned long
phys_mem_bytes, void **vm_mem)
        unsigned long dosmem = 0xa0000;
        unsigned long exmem = 0xc0000;
        unsigned long pcimem = 0xf0000000;
+       unsigned long apicmem= 0xfee00000;
        unsigned long memory = (phys_mem_bytes + PAGE_SIZE - 1) &
PAGE_MASK;
        int fd = kvm->fd;
        int zfd;
@@ -267,6 +268,11 @@ int kvm_create(kvm_context_t kvm, unsigned long
phys_mem_bytes, void **vm_mem)
                .memory_size = memory < pcimem ? 0 : memory - pcimem,
                .guest_phys_addr = 0x100000000,
        };
+       struct kvm_memory_region apic_memory = {
+               .slot = 5,
+               .memory_size = PAGE_SIZE,
+               .guest_phys_addr = apicmem,
+       };
 
        if (memory >= pcimem)
                extended_memory.memory_size = pcimem - exmem;
@@ -302,9 +308,16 @@ int kvm_create(kvm_context_t kvm, unsigned long
phys_mem_bytes, void **vm_mem)
                }
        }
 
+       r = ioctl(fd, KVM_SET_MEMORY_REGION, &apic_memory);
+       if (r == -1) {
+               fprintf(stderr, "kvm_create_memory_region: %m\n");
+               return -1;
+       }
+
        kvm_memory_region_save_params(kvm, &low_memory);
        kvm_memory_region_save_params(kvm, &extended_memory);
        kvm_memory_region_save_params(kvm, &above_4g_memory);
+       kvm_memory_region_save_params(kvm, &apic_memory);
 
        *vm_mem = mmap(NULL, memory, PROT_READ|PROT_WRITE, MAP_SHARED,
fd, 0);
        if (*vm_mem == MAP_FAILED) {
-- 
1.5.2

>From 5b814299e3fb0912b1337749d42e3ef33b2615e7 Mon Sep 17 00:00:00 2001
From: Sheng Yang <[EMAIL PROTECTED]>
Date: Mon, 24 Sep 2007 16:10:40 +0800
Subject: [PATCH] Enable memory-mapped TPR shadow feature

---
 drivers/kvm/irq.h         |    2 +
 drivers/kvm/kvm.h         |    2 +-
 drivers/kvm/kvm_main.c    |   23 ++++++++++---
 drivers/kvm/lapic.c       |    3 ++
 drivers/kvm/vmx.c         |   76
+++++++++++++++++++++++++++++++++++++++++---
 drivers/kvm/vmx.h         |    3 ++
 drivers/kvm/x86_emulate.c |   14 +++++++-
 drivers/kvm/x86_emulate.h |    4 ++
 8 files changed, 112 insertions(+), 15 deletions(-)

diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h
index 11fc014..afbfa0c 100644
--- a/drivers/kvm/irq.h
+++ b/drivers/kvm/irq.h
@@ -118,6 +118,8 @@ struct kvm_lapic {
        struct kvm_vcpu *vcpu;
        struct page *regs_page;
        void *regs;
+       struct page *apic_access_page;
+       hpa_t apic_access_hpa;
 };
 
 #ifdef DEBUG
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 051cdbe..bb8534a 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -565,7 +565,7 @@ enum emulation_result {
 };
 
 int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
-                       unsigned long cr2, u16 error_code, int
no_decode);
+                       unsigned long cr2, u16 error_code, int
cmd_type);
 void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char
*context);
 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long
address);
 void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long
address);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index cecdb1b..0ebae4c 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -1080,14 +1080,19 @@ static int emulator_read_emulated(unsigned long
addr,
                memcpy(val, vcpu->mmio_data, bytes);
                vcpu->mmio_read_completed = 0;
                return X86EMUL_CONTINUE;
-       } else if (emulator_read_std(addr, val, bytes, vcpu)
-                  == X86EMUL_CONTINUE)
-               return X86EMUL_CONTINUE;
+       }
 
        gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
+       if ((gpa & PAGE_MASK) == 0xfee00000)
+               goto mmio;
+
+       if (emulator_read_std(addr, val, bytes, vcpu)
+                       == X86EMUL_CONTINUE)
+               return X86EMUL_CONTINUE;
        if (gpa == UNMAPPED_GVA)
                return X86EMUL_PROPAGATE_FAULT;
 
+mmio:
        /*
         * Is this MMIO handled locally?
         */
@@ -1132,6 +1137,9 @@ static int
emulator_write_emulated_onepage(unsigned long addr,
        struct kvm_io_device *mmio_dev;
        gpa_t                 gpa = vcpu->mmu.gva_to_gpa(vcpu, addr);
 
+       if ((gpa & PAGE_MASK) == 0xfee00000)
+               goto mmio;
+
        if (gpa == UNMAPPED_GVA) {
                kvm_x86_ops->inject_page_fault(vcpu, addr, 2);
                return X86EMUL_PROPAGATE_FAULT;
@@ -1140,6 +1148,7 @@ static int
emulator_write_emulated_onepage(unsigned long addr,
        if (emulator_write_phys(vcpu, gpa, val, bytes))
                return X86EMUL_CONTINUE;
 
+mmio:
        /*
         * Is this MMIO handled locally?
         */
@@ -1270,7 +1279,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
                        struct kvm_run *run,
                        unsigned long cr2,
                        u16 error_code,
-                       int no_decode)
+                       int cmd_type)
 {
        int r = 0;
 
@@ -1279,8 +1288,9 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
 
        vcpu->mmio_is_write = 0;
        vcpu->pio.string = 0;
+       vcpu->emulate_ctxt.cmd_type = cmd_type;
 
-       if (!no_decode) {
+       if ((cmd_type & EMULCMD_NO_DECODE) == 0) {
                int cs_db, cs_l;
                kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
@@ -2073,7 +2083,8 @@ static int kvm_vcpu_ioctl_run(struct kvm_vcpu
*vcpu, struct kvm_run *kvm_run)
                vcpu->mmio_read_completed = 1;
                vcpu->mmio_needed = 0;
                r = emulate_instruction(vcpu, kvm_run,
-                                       vcpu->mmio_fault_cr2, 0, 1);
+                                       vcpu->mmio_fault_cr2, 0,
+                                       EMULCMD_NO_DECODE);
                if (r == EMULATE_DO_MMIO) {
                        /*
                         * Read-modify-write.  Back to userspace.
diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c
index ddf9f20..b59dcda 100644
--- a/drivers/kvm/lapic.c
+++ b/drivers/kvm/lapic.c
@@ -952,6 +952,9 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
        memset(apic->regs, 0, PAGE_SIZE);
        apic->vcpu = vcpu;
 
+       apic->apic_access_page = vcpu->kvm->memslots[5].phys_mem[0];
+       apic->apic_access_hpa = page_to_phys(apic->apic_access_page);
+
        hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC,
HRTIMER_MODE_ABS);
        apic->timer.dev.function = apic_timer_fn;
        apic->base_address = APIC_DEFAULT_PHYS_BASE;
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 6f1ad90..a7fe87c 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -85,6 +85,7 @@ static struct vmcs_config {
        u32 revision_id;
        u32 pin_based_exec_ctrl;
        u32 cpu_based_exec_ctrl;
+       u32 cpu_based_2nd_exec_ctrl;
        u32 vmexit_ctrl;
        u32 vmentry_ctrl;
 } vmcs_config;
@@ -178,6 +179,29 @@ static inline int vm_need_tpr_shadow(struct kvm
*kvm)
        return ((cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)));
 }
 
+static inline int cpu_has_secondary_exec_ctrls(void)
+{
+       return (vmcs_config.cpu_based_exec_ctrl & \
+               CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
+}
+
+static inline int vm_need_secondary_exec_ctrls(struct kvm *kvm)
+{
+       return ((cpu_has_secondary_exec_ctrls()) &&
(irqchip_in_kernel(kvm)));
+}
+
+static inline int cpu_has_vmx_virtualize_apic_accesses(void)
+{
+       return (vmcs_config.cpu_based_2nd_exec_ctrl & \
+               SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
+}
+
+static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
+{
+       return ((cpu_has_vmx_virtualize_apic_accesses()) && \
+               (irqchip_in_kernel(kvm)));
+}
+
 static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
 {
        int i;
@@ -915,6 +939,7 @@ static __init int setup_vmcs_config(struct
vmcs_config *vmcs_conf)
        u32 min, opt;
        u32 _pin_based_exec_control = 0;
        u32 _cpu_based_exec_control = 0;
+       u32 _cpu_based_2nd_exec_control = 0;
        u32 _vmexit_control = 0;
        u32 _vmentry_control = 0;
 
@@ -932,11 +957,8 @@ static __init int setup_vmcs_config(struct
vmcs_config *vmcs_conf)
              CPU_BASED_USE_IO_BITMAPS |
              CPU_BASED_MOV_DR_EXITING |
              CPU_BASED_USE_TSC_OFFSETING;
-#ifdef CONFIG_X86_64
-       opt = CPU_BASED_TPR_SHADOW;
-#else
-       opt = 0;
-#endif
+       opt = CPU_BASED_TPR_SHADOW |
+             CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
        if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
                                &_cpu_based_exec_control) < 0)
                return -EIO;
@@ -945,6 +967,18 @@ static __init int setup_vmcs_config(struct
vmcs_config *vmcs_conf)
                _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING &
                                           ~CPU_BASED_CR8_STORE_EXITING;
 #endif
+       if (_cpu_based_exec_control &
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
+               min = 0;
+               opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+               if (adjust_vmx_controls(min, opt,
MSR_IA32_VMX_PROCBASED_CTLS2,
+                                       &_cpu_based_2nd_exec_control) <
0)
+                       return -EIO;
+       }
+#ifndef CONFIG_X86_64
+       if (!(_cpu_based_2nd_exec_control &
+
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+               _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
+#endif
 
        min = 0;
 #ifdef CONFIG_X86_64
@@ -982,6 +1016,7 @@ static __init int setup_vmcs_config(struct
vmcs_config *vmcs_conf)
 
        vmcs_conf->pin_based_exec_ctrl = _pin_based_exec_control;
        vmcs_conf->cpu_based_exec_ctrl = _cpu_based_exec_control;
+       vmcs_conf->cpu_based_2nd_exec_ctrl =
_cpu_based_2nd_exec_control;
        vmcs_conf->vmexit_ctrl         = _vmexit_control;
        vmcs_conf->vmentry_ctrl        = _vmentry_control;
 
@@ -1532,8 +1567,14 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
                                CPU_BASED_CR8_LOAD_EXITING;
 #endif
        }
+       if (!vm_need_secondary_exec_ctrls(vmx->vcpu.kvm))
+               exec_control &= ~CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
        vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
 
+       if (vm_need_secondary_exec_ctrls(vmx->vcpu.kvm))
+               vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
+                            vmcs_config.cpu_based_2nd_exec_ctrl);
+
        vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf);
        vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf);
        vmcs_write32(CR3_TARGET_COUNT, 0);           /* 22.2.1 */
@@ -1610,6 +1651,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
                             page_to_phys(vmx->vcpu.apic->regs_page));
        vmcs_write32(TPR_THRESHOLD, 0);
 #endif
+       if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
+               vmcs_write64(APIC_ACCESS_ADDR,
+                            vmx->vcpu.apic->apic_access_hpa);
 
        vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
        vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
@@ -2100,6 +2144,25 @@ static int handle_vmcall(struct kvm_vcpu *vcpu,
struct kvm_run *kvm_run)
        return 1;
 }
 
+static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run
*kvm_run)
+{
+       u64 exit_qualification;
+       enum emulation_result er;
+       unsigned long offset;
+
+       exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
+       offset = exit_qualification & 0xffful;
+
+       er = emulate_instruction(vcpu, kvm_run, 0, 0,
EMULCMD_DECODE_ADDR);
+
+       if (er !=  EMULATE_DONE) {
+               BUG();
+               return 0;
+       }
+       return 1;
+}
+
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest
execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate
what needs
@@ -2119,7 +2182,8 @@ static int (*kvm_vmx_exit_handlers[])(struct
kvm_vcpu *vcpu,
        [EXIT_REASON_PENDING_INTERRUPT]       = handle_interrupt_window,
        [EXIT_REASON_HLT]                     = handle_halt,
        [EXIT_REASON_VMCALL]                  = handle_vmcall,
-       [EXIT_REASON_TPR_BELOW_THRESHOLD]     =
handle_tpr_below_threshold
+       [EXIT_REASON_TPR_BELOW_THRESHOLD]     =
handle_tpr_below_threshold,
+       [EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
 };
 
 static const int kvm_vmx_max_exit_handlers =
diff --git a/drivers/kvm/vmx.h b/drivers/kvm/vmx.h
index fd4e146..07cf1b5 100644
--- a/drivers/kvm/vmx.h
+++ b/drivers/kvm/vmx.h
@@ -89,6 +89,8 @@ enum vmcs_field {
        TSC_OFFSET_HIGH                 = 0x00002011,
        VIRTUAL_APIC_PAGE_ADDR          = 0x00002012,
        VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
+       APIC_ACCESS_ADDR                = 0x00002014,
+       APIC_ACCESS_ADDR_HIGH           = 0x00002015,
        VMCS_LINK_POINTER               = 0x00002800,
        VMCS_LINK_POINTER_HIGH          = 0x00002801,
        GUEST_IA32_DEBUGCTL             = 0x00002802,
@@ -214,6 +216,7 @@ enum vmcs_field {
 #define EXIT_REASON_MSR_WRITE           32
 #define EXIT_REASON_MWAIT_INSTRUCTION   36
 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS         44
 
 /*
  * Interruption-information format
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index f294a49..d04e4c6 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -820,13 +820,17 @@ done_prefixes:
                c->src.bytes = 4;
                goto srcmem_common;
        case SrcMem:
-               c->src.bytes = (c->d & ByteOp) ? 1 :
-                                                          c->op_bytes;
+               c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
                /* Don't fetch the address for invlpg: it could be
unmapped. */
                if (c->twobyte && c->b == 0x01
                                    && c->modrm_reg == 7)
                        break;
              srcmem_common:
+               if (((ctxt->cmd_type & EMULCMD_DECODE_ADDR) != 0) &&
+                   (c->modrm_ea == 0)) {
+                       ctxt->cr2 = insn_fetch(u32, c->src.bytes,
c->eip);
+                       c->eip -= c->src.bytes;
+               }
                c->src.type = OP_MEM;
                break;
        case SrcImm:
@@ -888,6 +892,12 @@ done_prefixes:
                }
                break;
        case DstMem:
+               c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
+               if (((ctxt->cmd_type & EMULCMD_DECODE_ADDR) != 0) &&
+                   (c->modrm_ea == 0)) {
+                       ctxt->cr2 = insn_fetch(u32, c->dst.bytes,
c->eip);
+                       c->eip -= c->dst.bytes;
+               }
                c->dst.type = OP_MEM;
                break;
        }
diff --git a/drivers/kvm/x86_emulate.h b/drivers/kvm/x86_emulate.h
index 28acad4..26dc6b0 100644
--- a/drivers/kvm/x86_emulate.h
+++ b/drivers/kvm/x86_emulate.h
@@ -153,6 +153,10 @@ struct x86_emulate_ctxt {
        /* Emulated execution mode, represented by an X86EMUL_MODE
value. */
        int mode;
 
+#define EMULCMD_NO_DECODE   (1 << 0)
+#define EMULCMD_DECODE_ADDR (1 << 1)
+       int cmd_type;
+
        unsigned long cs_base;
        unsigned long ds_base;
        unsigned long es_base;
-- 
1.5.2


Thanks
Yang, Sheng

Attachment: 0001-Add-a-slot-for-apic-access-usage-not-elegant-but-no.patch
Description: 0001-Add-a-slot-for-apic-access-usage-not-elegant-but-no.patch

Attachment: 0001-Enable-memory-mapped-TPR-shadow-feature.patch
Description: 0001-Enable-memory-mapped-TPR-shadow-feature.patch

-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to