Author: neel
Date: Wed Dec 25 06:46:31 2013
New Revision: 259863
URL: http://svnweb.freebsd.org/changeset/base/259863

Log:
  vlapic code restructuring to make it easy to support hardware-assist for APIC
  emulation.
  
  The vlapic initialization and cleanup is done via processor specific vmm_ops.
  This will allow the VT-x/SVM modules to layer any hardware-assist for APIC
  emulation or virtual interrupt delivery on top of the vlapic device model.
  
  Add a parameter to 'vcpu_notify_event()' to distinguish between vlapic
  interrupts versus other events (e.g. NMI). This provides an opportunity to
  use hardware-assists like Posted Interrupts (VT-x) or doorbell MSR (SVM)
  to deliver an interrupt to a guest without causing a VM-exit.
  
  Get rid of lapic_pending_intr() and lapic_intr_accepted() and use the
  vlapic_xxx() counterparts directly.
  
  Associate an 'Apic Page' with each vcpu and reference it from the 'vlapic'.
  The 'Apic Page' is intended to be referenced from the Intel VMCS as the
  'virtual APIC page' or from the AMD VMCB as the 'vAPIC backing page'.

Added:
  head/sys/amd64/vmm/io/vlapic_priv.h   (contents, props changed)
Modified:
  head/sys/amd64/include/vmm.h
  head/sys/amd64/vmm/amd/amdv.c
  head/sys/amd64/vmm/intel/vmx.c
  head/sys/amd64/vmm/intel/vmx.h
  head/sys/amd64/vmm/io/vlapic.c
  head/sys/amd64/vmm/io/vlapic.h
  head/sys/amd64/vmm/vmm.c
  head/sys/amd64/vmm/vmm_lapic.c
  head/sys/amd64/vmm/vmm_lapic.h

Modified: head/sys/amd64/include/vmm.h
==============================================================================
--- head/sys/amd64/include/vmm.h        Wed Dec 25 06:09:31 2013        
(r259862)
+++ head/sys/amd64/include/vmm.h        Wed Dec 25 06:46:31 2013        
(r259863)
@@ -69,6 +69,8 @@ typedef int   (*vmi_get_cap_t)(void *vmi, 
 typedef int    (*vmi_set_cap_t)(void *vmi, int vcpu, int num, int val);
 typedef struct vmspace * (*vmi_vmspace_alloc)(vm_offset_t min, vm_offset_t 
max);
 typedef void   (*vmi_vmspace_free)(struct vmspace *vmspace);
+typedef struct vlapic * (*vmi_vlapic_init)(void *vmi, int vcpu);
+typedef void   (*vmi_vlapic_cleanup)(void *vmi, struct vlapic *vlapic);
 
 struct vmm_ops {
        vmm_init_func_t         init;           /* module wide initialization */
@@ -87,6 +89,8 @@ struct vmm_ops {
        vmi_set_cap_t           vmsetcap;
        vmi_vmspace_alloc       vmspace_alloc;
        vmi_vmspace_free        vmspace_free;
+       vmi_vlapic_init         vlapic_init;
+       vmi_vlapic_cleanup      vlapic_cleanup;
 };
 
 extern struct vmm_ops vmm_ops_intel;
@@ -159,7 +163,7 @@ vcpu_is_running(struct vm *vm, int vcpu,
 }
 
 void *vcpu_stats(struct vm *vm, int vcpu);
-void vcpu_notify_event(struct vm *vm, int vcpuid);
+void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
 struct vmspace *vm_get_vmspace(struct vm *vm);
 int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
 int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);

Modified: head/sys/amd64/vmm/amd/amdv.c
==============================================================================
--- head/sys/amd64/vmm/amd/amdv.c       Wed Dec 25 06:09:31 2013        
(r259862)
+++ head/sys/amd64/vmm/amd/amdv.c       Wed Dec 25 06:46:31 2013        
(r259863)
@@ -155,6 +155,20 @@ amdv_vmspace_free(struct vmspace *vmspac
        return;
 }
 
+static struct vlapic *
+amdv_vlapic_init(void *arg, int vcpuid)
+{
+
+       panic("amdv_vlapic_init: not implmented");
+}
+
+static void
+amdv_vlapic_cleanup(void *arg, struct vlapic *vlapic)
+{
+
+       panic("amdv_vlapic_cleanup: not implemented");
+}
+
 struct vmm_ops vmm_ops_amd = {
        amdv_init,
        amdv_cleanup,
@@ -171,6 +185,8 @@ struct vmm_ops vmm_ops_amd = {
        amdv_setcap,
        amdv_vmspace_alloc,
        amdv_vmspace_free,
+       amdv_vlapic_init,
+       amdv_vlapic_cleanup,
 };
 
 static int

Modified: head/sys/amd64/vmm/intel/vmx.c
==============================================================================
--- head/sys/amd64/vmm/intel/vmx.c      Wed Dec 25 06:09:31 2013        
(r259862)
+++ head/sys/amd64/vmm/intel/vmx.c      Wed Dec 25 06:46:31 2013        
(r259863)
@@ -50,10 +50,11 @@ __FBSDID("$FreeBSD$");
 
 #include <machine/vmm.h>
 #include "vmm_host.h"
-#include "vmm_lapic.h"
 #include "vmm_msr.h"
 #include "vmm_ktr.h"
 #include "vmm_stat.h"
+#include "vlapic.h"
+#include "vlapic_priv.h"
 
 #include "vmx_msr.h"
 #include "ept.h"
@@ -112,7 +113,8 @@ __FBSDID("$FreeBSD$");
 #define        HANDLED         1
 #define        UNHANDLED       0
 
-MALLOC_DEFINE(M_VMX, "vmx", "vmx");
+static MALLOC_DEFINE(M_VMX, "vmx", "vmx");
+static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic");
 
 SYSCTL_DECL(_hw_vmm);
 SYSCTL_NODE(_hw_vmm, OID_AUTO, vmx, CTLFLAG_RW, NULL, NULL);
@@ -1033,7 +1035,7 @@ nmiblocked:
 }
 
 static void
-vmx_inject_interrupts(struct vmx *vmx, int vcpu)
+vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic)
 {
        int vector;
        uint64_t info, rflags, interruptibility;
@@ -1059,7 +1061,7 @@ vmx_inject_interrupts(struct vmx *vmx, i
                return;
 
        /* Ask the local apic for a vector to inject */
-       vector = lapic_pending_intr(vmx->vm, vcpu);
+       vector = vlapic_pending_intr(vlapic);
        if (vector < 0)
                return;
 
@@ -1081,7 +1083,7 @@ vmx_inject_interrupts(struct vmx *vmx, i
        vmcs_write(VMCS_ENTRY_INTR_INFO, info);
 
        /* Update the Local APIC ISR */
-       lapic_intr_accepted(vmx->vm, vcpu, vector);
+       vlapic_intr_accepted(vlapic, vector);
 
        VCPU_CTR1(vmx->vm, vcpu, "Injecting hwintr at vector %d", vector);
 
@@ -1459,11 +1461,13 @@ vmx_run(void *arg, int vcpu, register_t 
        struct vmxctx *vmxctx;
        struct vmcs *vmcs;
        struct vm_exit *vmexit;
+       struct vlapic *vlapic;
 
        vmx = arg;
        vmcs = &vmx->vmcs[vcpu];
        vmxctx = &vmx->ctx[vcpu];
        vmxctx->launched = 0;
+       vlapic = vm_lapic(vmx->vm, vcpu);
 
        astpending = 0;
        vmexit = vm_exitinfo(vmx->vm, vcpu);
@@ -1491,7 +1495,7 @@ vmx_run(void *arg, int vcpu, register_t 
        vmx_set_pcpu_defaults(vmx, vcpu);
 
        do {
-               vmx_inject_interrupts(vmx, vcpu);
+               vmx_inject_interrupts(vmx, vcpu, vlapic);
                vmx_run_trace(vmx, vcpu);
                rc = vmx_setjmp(vmxctx);
 #ifdef SETJMP_TRACE
@@ -1963,6 +1967,32 @@ vmx_setcap(void *arg, int vcpu, int type
         return (retval);
 }
 
+static struct vlapic *
+vmx_vlapic_init(void *arg, int vcpuid)
+{
+       struct vmx *vmx;
+       struct vlapic *vlapic;
+       
+       vmx = arg;
+
+       vlapic = malloc(sizeof(struct vlapic), M_VLAPIC, M_WAITOK | M_ZERO);
+       vlapic->vm = vmx->vm;
+       vlapic->vcpuid = vcpuid;
+       vlapic->apic_page = (struct LAPIC *)&vmx->apic_page[vcpuid];
+
+       vlapic_init(vlapic);
+
+       return (vlapic);
+}
+
+static void
+vmx_vlapic_cleanup(void *arg, struct vlapic *vlapic)
+{
+
+       vlapic_cleanup(vlapic);
+       free(vlapic, M_VLAPIC);
+}
+
 struct vmm_ops vmm_ops_intel = {
        vmx_init,
        vmx_cleanup,
@@ -1979,4 +2009,6 @@ struct vmm_ops vmm_ops_intel = {
        vmx_setcap,
        ept_vmspace_alloc,
        ept_vmspace_free,
+       vmx_vlapic_init,
+       vmx_vlapic_cleanup,
 };

Modified: head/sys/amd64/vmm/intel/vmx.h
==============================================================================
--- head/sys/amd64/vmm/intel/vmx.h      Wed Dec 25 06:09:31 2013        
(r259862)
+++ head/sys/amd64/vmm/intel/vmx.h      Wed Dec 25 06:46:31 2013        
(r259863)
@@ -92,9 +92,15 @@ struct vmxstate {
        uint16_t vpid;
 };
 
+struct apic_page {
+       uint32_t reg[PAGE_SIZE / 4];
+};
+CTASSERT(sizeof(struct apic_page) == PAGE_SIZE);
+
 /* virtual machine softc */
 struct vmx {
        struct vmcs     vmcs[VM_MAXCPU];        /* one vmcs per virtual cpu */
+       struct apic_page apic_page[VM_MAXCPU];  /* one apic page per vcpu */
        char            msr_bitmap[PAGE_SIZE];
        struct msr_entry guest_msrs[VM_MAXCPU][GUEST_MSR_MAX_ENTRIES];
        struct vmxctx   ctx[VM_MAXCPU];

Modified: head/sys/amd64/vmm/io/vlapic.c
==============================================================================
--- head/sys/amd64/vmm/io/vlapic.c      Wed Dec 25 06:09:31 2013        
(r259862)
+++ head/sys/amd64/vmm/io/vlapic.c      Wed Dec 25 06:46:31 2013        
(r259863)
@@ -37,16 +37,21 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>
 #include <sys/smp.h>
 
-#include <machine/clock.h>
 #include <x86/specialreg.h>
 #include <x86/apicreg.h>
 
+#include <machine/clock.h>
+#include <machine/smp.h>
+
 #include <machine/vmm.h>
 
-#include "vmm_stat.h"
+#include "vmm_ipi.h"
 #include "vmm_lapic.h"
 #include "vmm_ktr.h"
+#include "vmm_stat.h"
+
 #include "vlapic.h"
+#include "vlapic_priv.h"
 #include "vioapic.h"
 
 #define        VLAPIC_CTR0(vlapic, format)                                     
\
@@ -60,7 +65,7 @@ __FBSDID("$FreeBSD$");
 
 #define        VLAPIC_CTR_IRR(vlapic, msg)                                     
\
 do {                                                                   \
-       uint32_t *irrptr = &(vlapic)->apic.irr0;                        \
+       uint32_t *irrptr = &(vlapic)->apic_page->irr0;                  \
        irrptr[0] = irrptr[0];  /* silence compiler */                  \
        VLAPIC_CTR1((vlapic), msg " irr0 0x%08x", irrptr[0 << 2]);      \
        VLAPIC_CTR1((vlapic), msg " irr1 0x%08x", irrptr[1 << 2]);      \
@@ -74,7 +79,7 @@ do {                                                          
        \
 
 #define        VLAPIC_CTR_ISR(vlapic, msg)                                     
\
 do {                                                                   \
-       uint32_t *isrptr = &(vlapic)->apic.isr0;                        \
+       uint32_t *isrptr = &(vlapic)->apic_page->isr0;                  \
        isrptr[0] = isrptr[0];  /* silence compiler */                  \
        VLAPIC_CTR1((vlapic), msg " isr0 0x%08x", isrptr[0 << 2]);      \
        VLAPIC_CTR1((vlapic), msg " isr1 0x%08x", isrptr[1 << 2]);      \
@@ -86,8 +91,6 @@ do {                                                          
        \
        VLAPIC_CTR1((vlapic), msg " isr7 0x%08x", isrptr[7 << 2]);      \
 } while (0)
 
-static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic");
-
 #define        PRIO(x)                 ((x) >> 4)
 
 #define VLAPIC_VERSION         (16)
@@ -95,40 +98,6 @@ static MALLOC_DEFINE(M_VLAPIC, "vlapic",
 
 #define        x2apic(vlapic)  (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 
: 0)
 
-enum boot_state {
-       BS_INIT,
-       BS_SIPI,
-       BS_RUNNING
-};
-
-struct vlapic {
-       struct vm               *vm;
-       int                     vcpuid;
-
-       struct LAPIC            apic;
-
-       uint32_t                esr_pending;
-       int                     esr_firing;
-
-       struct callout  callout;        /* vlapic timer */
-       struct bintime  timer_fire_bt;  /* callout expiry time */
-       struct bintime  timer_freq_bt;  /* timer frequency */
-       struct bintime  timer_period_bt; /* timer period */
-       struct mtx      timer_mtx;
-
-       /*
-        * The 'isrvec_stk' is a stack of vectors injected by the local apic.
-        * A vector is popped from the stack when the processor does an EOI.
-        * The vector on the top of the stack is used to compute the
-        * Processor Priority in conjunction with the TPR.
-        */
-       uint8_t                  isrvec_stk[ISRVEC_STK_SIZE];
-       int                      isrvec_stk_top;
-
-       uint64_t                msr_apicbase;
-       enum boot_state         boot_state;
-};
-
 /*
  * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the
  * vlapic_callout_handler() and vcpu accesses to the following registers:
@@ -163,7 +132,7 @@ vlapic_get_ldr(struct vlapic *vlapic)
        int apicid;
        uint32_t ldr;
 
-       lapic = &vlapic->apic;
+       lapic = vlapic->apic_page;
        if (x2apic(vlapic)) {
                apicid = vlapic_get_id(vlapic);
                ldr = 1 << (apicid & 0xf);
@@ -178,7 +147,7 @@ vlapic_get_dfr(struct vlapic *vlapic)
 {
        struct LAPIC *lapic;
 
-       lapic = &vlapic->apic;
+       lapic = vlapic->apic_page;
        if (x2apic(vlapic))
                return (0);
        else
@@ -196,7 +165,7 @@ vlapic_set_dfr(struct vlapic *vlapic, ui
                return;
        }
 
-       lapic = &vlapic->apic;
+       lapic = vlapic->apic_page;
        dfr = (lapic->dfr & APIC_DFR_RESERVED) | (data & APIC_DFR_MODEL_MASK);
        if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT)
                VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model");
@@ -219,7 +188,7 @@ vlapic_set_ldr(struct vlapic *vlapic, ui
                return;
        }
 
-       lapic = &vlapic->apic;
+       lapic = vlapic->apic_page;
        lapic->ldr = data & ~APIC_LDR_RESERVED;
        VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr);
 }
@@ -277,7 +246,7 @@ vlapic_get_ccr(struct vlapic *vlapic)
        uint32_t ccr;
        
        ccr = 0;
-       lapic = &vlapic->apic;
+       lapic = vlapic->apic_page;
 
        VLAPIC_TIMER_LOCK(vlapic);
        if (callout_active(&vlapic->callout)) {
@@ -307,7 +276,7 @@ vlapic_set_dcr(struct vlapic *vlapic, ui
        struct LAPIC *lapic;
        int divisor;
        
-       lapic = &vlapic->apic;
+       lapic = vlapic->apic_page;
        VLAPIC_TIMER_LOCK(vlapic);
 
        lapic->dcr_timer = dcr;
@@ -330,7 +299,9 @@ vlapic_set_dcr(struct vlapic *vlapic, ui
 static void
 vlapic_update_errors(struct vlapic *vlapic)
 {
-       struct LAPIC    *lapic = &vlapic->apic;
+       struct LAPIC *lapic;
+       
+       lapic = vlapic->apic_page;
        lapic->esr = vlapic->esr_pending;
        vlapic->esr_pending = 0;
 }
@@ -340,7 +311,7 @@ vlapic_reset(struct vlapic *vlapic)
 {
        struct LAPIC *lapic;
        
-       lapic = &vlapic->apic;
+       lapic = vlapic->apic_page;
        bzero(lapic, sizeof(struct LAPIC));
 
        lapic->version = VLAPIC_VERSION;
@@ -360,7 +331,7 @@ vlapic_reset(struct vlapic *vlapic)
 void
 vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
 {
-       struct LAPIC    *lapic = &vlapic->apic;
+       struct LAPIC    *lapic = vlapic->apic_page;
        uint32_t        *irrptr, *tmrptr, mask;
        int             idx;
 
@@ -401,7 +372,7 @@ vlapic_set_intr_ready(struct vlapic *vla
 static __inline uint32_t *
 vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset)
 {
-       struct LAPIC    *lapic = &vlapic->apic;
+       struct LAPIC    *lapic = vlapic->apic_page;
        int              i;
 
        switch (offset) {
@@ -428,7 +399,7 @@ vlapic_set_lvt(struct vlapic *vlapic, ui
        uint32_t *lvtptr, mask;
        struct LAPIC *lapic;
        
-       lapic = &vlapic->apic;
+       lapic = vlapic->apic_page;
        lvtptr = vlapic_get_lvtptr(vlapic, offset);     
 
        if (offset == APIC_OFFSET_TIMER_LVT)
@@ -475,7 +446,7 @@ vlapic_fire_lvt(struct vlapic *vlapic, u
                        return (0);
                }
                vlapic_set_intr_ready(vlapic, vec, false);
-               vcpu_notify_event(vlapic->vm, vlapic->vcpuid);
+               vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true);
                break;
        case APIC_LVT_DM_NMI:
                vm_inject_nmi(vlapic->vm, vlapic->vcpuid);
@@ -494,7 +465,7 @@ dump_isrvec_stk(struct vlapic *vlapic)
        int i;
        uint32_t *isrptr;
 
-       isrptr = &vlapic->apic.isr0;
+       isrptr = &vlapic->apic_page->isr0;
        for (i = 0; i < 8; i++)
                printf("ISR%d 0x%08x\n", i, isrptr[i * 4]);
 
@@ -519,7 +490,7 @@ vlapic_update_ppr(struct vlapic *vlapic)
         * bits is set in the ISRx registers.
         */
        isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top];
-       tpr = vlapic->apic.tpr;
+       tpr = vlapic->apic_page->tpr;
 
 #if 1
        {
@@ -548,7 +519,7 @@ vlapic_update_ppr(struct vlapic *vlapic)
                 * corresponding entry on the isrvec stack.
                 */
                i = 1;
-               isrptr = &vlapic->apic.isr0;
+               isrptr = &vlapic->apic_page->isr0;
                for (vector = 0; vector < 256; vector++) {
                        idx = (vector / 32) * 4;
                        if (isrptr[idx] & (1 << (vector % 32))) {
@@ -568,14 +539,14 @@ vlapic_update_ppr(struct vlapic *vlapic)
        else
                ppr = isrvec & 0xf0;
 
-       vlapic->apic.ppr = ppr;
+       vlapic->apic_page->ppr = ppr;
        VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr);
 }
 
 static void
 vlapic_process_eoi(struct vlapic *vlapic)
 {
-       struct LAPIC    *lapic = &vlapic->apic;
+       struct LAPIC    *lapic = vlapic->apic_page;
        uint32_t        *isrptr, *tmrptr;
        int             i, idx, bitpos, vector;
 
@@ -735,7 +706,7 @@ vlapic_callout_handler(void *arg)
 
        callout_deactivate(&vlapic->callout);
 
-       KASSERT(vlapic->apic.icr_timer != 0, ("vlapic timer is disabled"));
+       KASSERT(vlapic->apic_page->icr_timer != 0, ("timer is disabled"));
 
        vlapic_fire_timer(vlapic);
 
@@ -789,7 +760,7 @@ vlapic_set_icr_timer(struct vlapic *vlap
 
        VLAPIC_TIMER_LOCK(vlapic);
 
-       lapic = &vlapic->apic;
+       lapic = vlapic->apic_page;
        lapic->icr_timer = icr_timer;
 
        vlapic->timer_period_bt = vlapic->timer_freq_bt;
@@ -1021,7 +992,7 @@ lapic_process_icr(struct vlapic *vlapic,
 int
 vlapic_pending_intr(struct vlapic *vlapic)
 {
-       struct LAPIC    *lapic = &vlapic->apic;
+       struct LAPIC    *lapic = vlapic->apic_page;
        int              idx, i, bitpos, vector;
        uint32_t        *irrptr, val;
 
@@ -1050,7 +1021,7 @@ vlapic_pending_intr(struct vlapic *vlapi
 void
 vlapic_intr_accepted(struct vlapic *vlapic, int vector)
 {
-       struct LAPIC    *lapic = &vlapic->apic;
+       struct LAPIC    *lapic = vlapic->apic_page;
        uint32_t        *irrptr, *isrptr;
        int             idx, stk_top;
 
@@ -1087,7 +1058,7 @@ lapic_set_svr(struct vlapic *vlapic, uin
        struct LAPIC *lapic;
        uint32_t old, changed;
 
-       lapic = &vlapic->apic;
+       lapic = vlapic->apic_page;
        old = lapic->svr;
        changed = old ^ new;
        if ((changed & APIC_SVR_ENABLE) != 0) {
@@ -1115,7 +1086,7 @@ lapic_set_svr(struct vlapic *vlapic, uin
 int
 vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data, bool *retu)
 {
-       struct LAPIC    *lapic = &vlapic->apic;
+       struct LAPIC    *lapic = vlapic->apic_page;
        uint32_t        *reg;
        int              i;
 
@@ -1182,13 +1153,13 @@ vlapic_read(struct vlapic *vlapic, uint6
                case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
                        *data = vlapic_get_lvt(vlapic, offset); 
                        break;
-               case APIC_OFFSET_ICR:
+               case APIC_OFFSET_TIMER_ICR:
                        *data = lapic->icr_timer;
                        break;
-               case APIC_OFFSET_CCR:
+               case APIC_OFFSET_TIMER_CCR:
                        *data = vlapic_get_ccr(vlapic);
                        break;
-               case APIC_OFFSET_DCR:
+               case APIC_OFFSET_TIMER_DCR:
                        *data = lapic->dcr_timer;
                        break;
                case APIC_OFFSET_RRR:
@@ -1204,7 +1175,7 @@ done:
 int
 vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data, bool *retu)
 {
-       struct LAPIC    *lapic = &vlapic->apic;
+       struct LAPIC    *lapic = vlapic->apic_page;
        int             retval;
 
        VLAPIC_CTR2(vlapic, "vlapic write offset %#x, data %#lx", offset, data);
@@ -1252,11 +1223,11 @@ vlapic_write(struct vlapic *vlapic, uint
                case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
                        vlapic_set_lvt(vlapic, offset, data);
                        break;
-               case APIC_OFFSET_ICR:
+               case APIC_OFFSET_TIMER_ICR:
                        vlapic_set_icr_timer(vlapic, data);
                        break;
 
-               case APIC_OFFSET_DCR:
+               case APIC_OFFSET_TIMER_DCR:
                        vlapic_set_dcr(vlapic, data);
                        break;
 
@@ -1270,7 +1241,7 @@ vlapic_write(struct vlapic *vlapic, uint
                case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7:
                case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7:
                case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
-               case APIC_OFFSET_CCR:
+               case APIC_OFFSET_TIMER_CCR:
                default:
                        // Read only.
                        break;
@@ -1279,14 +1250,14 @@ vlapic_write(struct vlapic *vlapic, uint
        return (retval);
 }
 
-struct vlapic *
-vlapic_init(struct vm *vm, int vcpuid)
+void
+vlapic_init(struct vlapic *vlapic)
 {
-       struct vlapic           *vlapic;
-
-       vlapic = malloc(sizeof(struct vlapic), M_VLAPIC, M_WAITOK | M_ZERO);
-       vlapic->vm = vm;
-       vlapic->vcpuid = vcpuid;
+       KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized"));
+       KASSERT(vlapic->vcpuid >= 0 && vlapic->vcpuid < VM_MAXCPU,
+           ("vlapic_init: vcpuid is not initialized"));
+       KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not "
+           "initialized"));
 
        /*
         * If the vlapic is configured in x2apic mode then it will be
@@ -1300,12 +1271,10 @@ vlapic_init(struct vm *vm, int vcpuid)
 
        vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
 
-       if (vcpuid == 0)
+       if (vlapic->vcpuid == 0)
                vlapic->msr_apicbase |= APICBASE_BSP;
 
        vlapic_reset(vlapic);
-
-       return (vlapic);
 }
 
 void
@@ -1313,7 +1282,6 @@ vlapic_cleanup(struct vlapic *vlapic)
 {
 
        callout_drain(&vlapic->callout);
-       free(vlapic, M_VLAPIC);
 }
 
 uint64_t
@@ -1378,10 +1346,25 @@ vlapic_deliver_intr(struct vm *vm, bool 
        }
 }
 
+void
+vlapic_post_intr(struct vlapic *vlapic, int hostcpu)
+{
+       /*
+        * Post an interrupt to the vcpu currently running on 'hostcpu'.
+        *
+        * This is done by leveraging features like Posted Interrupts (Intel)
+        * Doorbell MSR (AMD AVIC) that avoid a VM exit.
+        *
+        * If neither of these features are available then fallback to
+        * sending an IPI to 'hostcpu'.
+        */
+       ipi_cpu(hostcpu, vmm_ipinum);
+}
+
 bool
 vlapic_enabled(struct vlapic *vlapic)
 {
-       struct LAPIC *lapic = &vlapic->apic;
+       struct LAPIC *lapic = vlapic->apic_page;
 
        if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 &&
            (lapic->svr & APIC_SVR_ENABLE) != 0)

Modified: head/sys/amd64/vmm/io/vlapic.h
==============================================================================
--- head/sys/amd64/vmm/io/vlapic.h      Wed Dec 25 06:09:31 2013        
(r259862)
+++ head/sys/amd64/vmm/io/vlapic.h      Wed Dec 25 06:46:31 2013        
(r259863)
@@ -30,73 +30,33 @@
 #define        _VLAPIC_H_
 
 struct vm;
- 
-/*
- * Map of APIC Registers:       Offset  Description                            
Access
- */
-#define APIC_OFFSET_ID                 0x20    // Local APIC ID                
        R/W
-#define APIC_OFFSET_VER        0x30    // Local APIC Version                   
R
-#define APIC_OFFSET_TPR        0x80    // Task Priority Register               
R/W
-#define APIC_OFFSET_APR        0x90    // Arbitration Priority Register        
R
-#define APIC_OFFSET_PPR        0xA0    // Processor Priority Register          
R
-#define APIC_OFFSET_EOI        0xB0    // EOI Register                         
W
-#define APIC_OFFSET_RRR        0xC0    // Remote read                          
R
-#define APIC_OFFSET_LDR        0xD0    // Logical Destination                  
R/W
-#define APIC_OFFSET_DFR        0xE0    // Destination Format Register          
0..27 R;  28..31 R/W
-#define APIC_OFFSET_SVR        0xF0    // Spurious Interrupt Vector Reg.       
0..3  R;  4..9   R/W
-#define APIC_OFFSET_ISR0       0x100   // ISR  000-031                         
R
-#define APIC_OFFSET_ISR1       0x110   // ISR  032-063                         
R
-#define APIC_OFFSET_ISR2       0x120   // ISR  064-095                         
R
-#define APIC_OFFSET_ISR3       0x130   // ISR  095-128                         
R
-#define APIC_OFFSET_ISR4       0x140   // ISR  128-159                         
R
-#define APIC_OFFSET_ISR5       0x150   // ISR  160-191                         
R
-#define APIC_OFFSET_ISR6       0x160   // ISR  192-223                         
R
-#define APIC_OFFSET_ISR7       0x170   // ISR  224-255                         
R
-#define APIC_OFFSET_TMR0       0x180   // TMR  000-031                         
R
-#define APIC_OFFSET_TMR1       0x190   // TMR  032-063                         
R
-#define APIC_OFFSET_TMR2       0x1A0   // TMR  064-095                         
R
-#define APIC_OFFSET_TMR3       0x1B0   // TMR  095-128                         
R
-#define APIC_OFFSET_TMR4       0x1C0   // TMR  128-159                         
R
-#define APIC_OFFSET_TMR5       0x1D0   // TMR  160-191                         
R
-#define APIC_OFFSET_TMR6       0x1E0   // TMR  192-223                         
R
-#define APIC_OFFSET_TMR7       0x1F0   // TMR  224-255                         
R
-#define APIC_OFFSET_IRR0       0x200   // IRR  000-031                         
R
-#define APIC_OFFSET_IRR1       0x210   // IRR  032-063                         
R
-#define APIC_OFFSET_IRR2       0x220   // IRR  064-095                         
R
-#define APIC_OFFSET_IRR3       0x230   // IRR  095-128                         
R
-#define APIC_OFFSET_IRR4       0x240   // IRR  128-159                         
R
-#define APIC_OFFSET_IRR5       0x250   // IRR  160-191                         
R
-#define APIC_OFFSET_IRR6       0x260   // IRR  192-223                         
R
-#define APIC_OFFSET_IRR7       0x270   // IRR  224-255                         
R
-#define APIC_OFFSET_ESR                0x280   // Error Status Register        
        R
-#define APIC_OFFSET_CMCI_LVT   0x2F0   // Local Vector Table (CMCI)            
R/W
-#define APIC_OFFSET_ICR_LOW    0x300   // Interrupt Command Reg. (0-31)        
R/W
-#define APIC_OFFSET_ICR_HI     0x310   // Interrupt Command Reg. (32-63)       
R/W
-#define APIC_OFFSET_TIMER_LVT  0x320   // Local Vector Table (Timer)           
R/W
-#define APIC_OFFSET_THERM_LVT  0x330   // Local Vector Table (Thermal)         
R/W (PIV+)
-#define APIC_OFFSET_PERF_LVT   0x340   // Local Vector Table (Performance)     
R/W (P6+)
-#define APIC_OFFSET_LINT0_LVT  0x350   // Local Vector Table (LINT0)           
R/W
-#define APIC_OFFSET_LINT1_LVT  0x360   // Local Vector Table (LINT1)           
R/W
-#define APIC_OFFSET_ERROR_LVT  0x370   // Local Vector Table (ERROR)           
R/W
-#define APIC_OFFSET_ICR        0x380   // Initial Count Reg. for Timer         
R/W
-#define APIC_OFFSET_CCR        0x390   // Current Count of Timer               
R
-#define APIC_OFFSET_DCR        0x3E0   // Timer Divide Configuration Reg.      
R/W
-
-/*
- * 16 priority levels with at most one vector injected per level.
- */
-#define        ISRVEC_STK_SIZE         (16 + 1)
-
 enum x2apic_state;
 
-struct vlapic *vlapic_init(struct vm *vm, int vcpuid);
-void vlapic_cleanup(struct vlapic *vlapic);
 int vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data,
     bool *retu);
 int vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data,
     bool *retu);
+
+/*
+ * Returns a vector between 32 and 255 if an interrupt is pending in the
+ * IRR that can be delivered based on the current state of ISR and TPR.
+ *
+ * Note that the vector does not automatically transition to the ISR as a
+ * result of calling this function.
+ *
+ * Returns -1 if there is no eligible vector that can be delivered to the
+ * guest at this time.
+ */
 int vlapic_pending_intr(struct vlapic *vlapic);
+
+/*
+ * Transition 'vector' from IRR to ISR. This function is called with the
+ * vector returned by 'vlapic_pending_intr()' when the guest is able to
+ * accept this interrupt (i.e. RFLAGS.IF = 1 and no conditions exist that
+ * block interrupt delivery).
+ */
 void vlapic_intr_accepted(struct vlapic *vlapic, int vector);
+
 void vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level);
 void vlapic_set_error(struct vlapic *vlapic, uint32_t mask);
 void vlapic_fire_cmci(struct vlapic *vlapic);
@@ -109,4 +69,5 @@ bool vlapic_enabled(struct vlapic *vlapi
 
 void vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys,
     int delmode, int vec);
+void vlapic_post_intr(struct vlapic *vlapic, int hostcpu);
 #endif /* _VLAPIC_H_ */

Added: head/sys/amd64/vmm/io/vlapic_priv.h
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/amd64/vmm/io/vlapic_priv.h Wed Dec 25 06:46:31 2013        
(r259863)
@@ -0,0 +1,124 @@
+/*-
+ * Copyright (c) 2013 Neel Natu <n...@freebsd.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _VLAPIC_PRIV_H_
+#define        _VLAPIC_PRIV_H_
+
+/*
+ * APIC Register:              Offset     Description
+ */
+#define APIC_OFFSET_ID         0x20    /* Local APIC ID                */
+#define APIC_OFFSET_VER                0x30    /* Local APIC Version           
*/
+#define APIC_OFFSET_TPR                0x80    /* Task Priority Register       
*/
+#define APIC_OFFSET_APR                0x90    /* Arbitration Priority         
*/
+#define APIC_OFFSET_PPR                0xA0    /* Processor Priority Register  
*/
+#define APIC_OFFSET_EOI                0xB0    /* EOI Register                 
*/
+#define APIC_OFFSET_RRR                0xC0    /* Remote read                  
*/
+#define APIC_OFFSET_LDR                0xD0    /* Logical Destination          
*/
+#define APIC_OFFSET_DFR                0xE0    /* Destination Format Register  
*/
+#define APIC_OFFSET_SVR                0xF0    /* Spurious Vector Register     
*/
+#define APIC_OFFSET_ISR0       0x100   /* In Service Register          */
+#define APIC_OFFSET_ISR1       0x110
+#define APIC_OFFSET_ISR2       0x120
+#define APIC_OFFSET_ISR3       0x130
+#define APIC_OFFSET_ISR4       0x140
+#define APIC_OFFSET_ISR5       0x150
+#define APIC_OFFSET_ISR6       0x160
+#define APIC_OFFSET_ISR7       0x170
+#define APIC_OFFSET_TMR0       0x180   /* Trigger Mode Register        */
+#define APIC_OFFSET_TMR1       0x190
+#define APIC_OFFSET_TMR2       0x1A0
+#define APIC_OFFSET_TMR3       0x1B0
+#define APIC_OFFSET_TMR4       0x1C0
+#define APIC_OFFSET_TMR5       0x1D0
+#define APIC_OFFSET_TMR6       0x1E0
+#define APIC_OFFSET_TMR7       0x1F0
+#define APIC_OFFSET_IRR0       0x200   /* Interrupt Request Register   */
+#define APIC_OFFSET_IRR1       0x210
+#define APIC_OFFSET_IRR2       0x220
+#define APIC_OFFSET_IRR3       0x230
+#define APIC_OFFSET_IRR4       0x240
+#define APIC_OFFSET_IRR5       0x250
+#define APIC_OFFSET_IRR6       0x260
+#define APIC_OFFSET_IRR7       0x270
+#define APIC_OFFSET_ESR                0x280   /* Error Status Register        
*/
+#define APIC_OFFSET_CMCI_LVT   0x2F0   /* Local Vector Table (CMCI)    */
+#define APIC_OFFSET_ICR_LOW    0x300   /* Interrupt Command Register   */
+#define APIC_OFFSET_ICR_HI     0x310
+#define APIC_OFFSET_TIMER_LVT  0x320   /* Local Vector Table (Timer)   */
+#define APIC_OFFSET_THERM_LVT  0x330   /* Local Vector Table (Thermal) */
+#define APIC_OFFSET_PERF_LVT   0x340   /* Local Vector Table (PMC)     */
+#define APIC_OFFSET_LINT0_LVT  0x350   /* Local Vector Table (LINT0)   */
+#define APIC_OFFSET_LINT1_LVT  0x360   /* Local Vector Table (LINT1)   */
+#define APIC_OFFSET_ERROR_LVT  0x370   /* Local Vector Table (ERROR)   */
+#define APIC_OFFSET_TIMER_ICR  0x380   /* Timer's Initial Count        */
+#define APIC_OFFSET_TIMER_CCR  0x390   /* Timer's Current Count        */
+#define APIC_OFFSET_TIMER_DCR  0x3E0   /* Timer's Divide Configuration */
+
+enum boot_state {
+       BS_INIT,
+       BS_SIPI,
+       BS_RUNNING
+};
+
+/*
+ * 16 priority levels with at most one vector injected per level.
+ */
+#define        ISRVEC_STK_SIZE         (16 + 1)
+
+struct vlapic {
+       struct vm               *vm;
+       int                     vcpuid;
+       struct LAPIC            *apic_page;
+
+       uint32_t                esr_pending;
+       int                     esr_firing;
+
+       struct callout  callout;        /* vlapic timer */
+       struct bintime  timer_fire_bt;  /* callout expiry time */
+       struct bintime  timer_freq_bt;  /* timer frequency */
+       struct bintime  timer_period_bt; /* timer period */
+       struct mtx      timer_mtx;
+
+       /*
+        * The 'isrvec_stk' is a stack of vectors injected by the local apic.
+        * A vector is popped from the stack when the processor does an EOI.
+        * The vector on the top of the stack is used to compute the
+        * Processor Priority in conjunction with the TPR.
+        */
+       uint8_t                  isrvec_stk[ISRVEC_STK_SIZE];
+       int                      isrvec_stk_top;
+
+       uint64_t                msr_apicbase;
+       enum boot_state         boot_state;
+};
+
+void vlapic_init(struct vlapic *vlapic);
+void vlapic_cleanup(struct vlapic *vlapic);
+
+#endif /* _VLAPIC_PRIV_H_ */

Modified: head/sys/amd64/vmm/vmm.c
==============================================================================
--- head/sys/amd64/vmm/vmm.c    Wed Dec 25 06:09:31 2013        (r259862)
+++ head/sys/amd64/vmm/vmm.c    Wed Dec 25 06:46:31 2013        (r259863)
@@ -156,6 +156,10 @@ static struct vmm_ops *ops;
        (ops != NULL ? (*ops->vmgetcap)(vmi, vcpu, num, retval) : ENXIO)
 #define        VMSETCAP(vmi, vcpu, num, val)           \
        (ops != NULL ? (*ops->vmsetcap)(vmi, vcpu, num, val) : ENXIO)
+#define        VLAPIC_INIT(vmi, vcpu)                  \
+       (ops != NULL ? (*ops->vlapic_init)(vmi, vcpu) : NULL)
+#define        VLAPIC_CLEANUP(vmi, vlapic)             \
+       (ops != NULL ? (*ops->vlapic_cleanup)(vmi, vlapic) : NULL)
 
 #define        fpu_start_emulating()   load_cr0(rcr0() | CR0_TS)
 #define        fpu_stop_emulating()    clts()
@@ -167,9 +171,11 @@ CTASSERT(VMM_MSR_NUM <= 64);       /* msr_mask
 static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
 
 static void
-vcpu_cleanup(struct vcpu *vcpu)
+vcpu_cleanup(struct vm *vm, int i)
 {
-       vlapic_cleanup(vcpu->vlapic);
+       struct vcpu *vcpu = &vm->vcpu[i];
+
+       VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic);
        vmm_stat_free(vcpu->stats);     
        fpu_save_area_free(vcpu->guestfpu);
 }
@@ -184,7 +190,7 @@ vcpu_init(struct vm *vm, uint32_t vcpu_i
        vcpu_lock_init(vcpu);
        vcpu->hostcpu = NOCPU;
        vcpu->vcpuid = vcpu_id;
-       vcpu->vlapic = vlapic_init(vm, vcpu_id);
+       vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
        vm_set_x2apic_state(vm, vcpu_id, X2APIC_ENABLED);
        vcpu->guestfpu = fpu_save_area_alloc();
        fpu_save_area_reset(vcpu->guestfpu);
@@ -360,7 +366,7 @@ vm_destroy(struct vm *vm)
        vm->num_mem_segs = 0;
 
        for (i = 0; i < VM_MAXCPU; i++)
-               vcpu_cleanup(&vm->vcpu[i]);
+               vcpu_cleanup(vm, i);
 
        VMSPACE_FREE(vm->vmspace);
 
@@ -1127,7 +1133,7 @@ vm_inject_nmi(struct vm *vm, int vcpuid)
        vcpu = &vm->vcpu[vcpuid];
 
        vcpu->nmi_pending = 1;
-       vcpu_notify_event(vm, vcpuid);
+       vcpu_notify_event(vm, vcpuid, false);
        return (0);
 }
 
@@ -1356,7 +1362,7 @@ vm_set_x2apic_state(struct vm *vm, int v
  *   to the host_cpu to cause the vcpu to trap into the hypervisor.
  */
 void
-vcpu_notify_event(struct vm *vm, int vcpuid)
+vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr)
 {
        int hostcpu;
        struct vcpu *vcpu;
@@ -1371,8 +1377,12 @@ vcpu_notify_event(struct vm *vm, int vcp
        } else {
                if (vcpu->state != VCPU_RUNNING)
                        panic("invalid vcpu state %d", vcpu->state);
-               if (hostcpu != curcpu)
-                       ipi_cpu(hostcpu, vmm_ipinum);
+               if (hostcpu != curcpu) {
+                       if (lapic_intr)
+                               vlapic_post_intr(vcpu->vlapic, hostcpu);
+                       else
+                               ipi_cpu(hostcpu, vmm_ipinum);
+               }
        }
        vcpu_unlock(vcpu);
 }

Modified: head/sys/amd64/vmm/vmm_lapic.c
==============================================================================
--- head/sys/amd64/vmm/vmm_lapic.c      Wed Dec 25 06:09:31 2013        
(r259862)
+++ head/sys/amd64/vmm/vmm_lapic.c      Wed Dec 25 06:46:31 2013        
(r259863)
@@ -51,26 +51,6 @@ __FBSDID("$FreeBSD$");
 #define        MSI_X86_ADDR_LOG        0x00000004      /* Destination Mode */
 
 int
-lapic_pending_intr(struct vm *vm, int cpu)
-{
-       struct vlapic *vlapic;
-
-       vlapic = vm_lapic(vm, cpu);
-
-       return (vlapic_pending_intr(vlapic));
-}
-
-void
-lapic_intr_accepted(struct vm *vm, int cpu, int vector)
-{
-       struct vlapic *vlapic;
-
-       vlapic = vm_lapic(vm, cpu);
-
-       vlapic_intr_accepted(vlapic, vector);
-}
-
-int
 lapic_set_intr(struct vm *vm, int cpu, int vector, bool level)
 {
        struct vlapic *vlapic;
@@ -83,9 +63,7 @@ lapic_set_intr(struct vm *vm, int cpu, i
 
        vlapic = vm_lapic(vm, cpu);
        vlapic_set_intr_ready(vlapic, vector, level);
-
-       vcpu_notify_event(vm, cpu);
-
+       vcpu_notify_event(vm, cpu, true);
        return (0);
 }
 

Modified: head/sys/amd64/vmm/vmm_lapic.h
==============================================================================
--- head/sys/amd64/vmm/vmm_lapic.h      Wed Dec 25 06:09:31 2013        
(r259862)
+++ head/sys/amd64/vmm/vmm_lapic.h      Wed Dec 25 06:46:31 2013        
(r259863)
@@ -43,26 +43,6 @@ int  lapic_mmio_write(void *vm, int cpu, 
                         uint64_t wval, int size, void *arg);
 
 /*
- * Returns a vector between 32 and 255 if an interrupt is pending in the
- * IRR that can be delivered based on the current state of ISR and TPR.
- *
- * Note that the vector does not automatically transition to the ISR as a
- * result of calling this function.
- *
- * Returns -1 if there is no eligible vector that can be delivered to the
- * guest at this time.
- */
-int    lapic_pending_intr(struct vm *vm, int cpu);
-
-/*
- * Transition 'vector' from IRR to ISR. This function is called with the
- * vector returned by 'lapic_pending_intr()' when the guest is able to
- * accept this interrupt (i.e. RFLAGS.IF = 1 and no conditions exist that
- * block interrupt delivery).
- */
-void   lapic_intr_accepted(struct vm *vm, int cpu, int vector);
-
-/*
  * Signals to the LAPIC that an interrupt at 'vector' needs to be generated
  * to the 'cpu', the state is recorded in IRR.
  */
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to