Hi,

in summer, I posted some paravirt patches for amd64. In response to the 
comments I received then, I have created some infrastructure to binary 
patch kernel code during boot. In order to get some feedback, I am posting 
the whole paravirt & code patching diff here. Also, KVM users may be 
interested in trying it (hi Antoine ;).

For the codepatching part, the most interesting files are codepatch.c and 
codepatch.h.  In copy.S and cpu.c, I convert the code patching already 
done for SMAP to the new infrastructure (if someone has a machine with 
SMAP support, testing would be nice).

The basic approach is to create macros that surround a to-be-patched code 
part and store pointer, length, and a tag in a dedicated section. Then 
there are functions to replace all code parts with a specific tag. Does 
this part of the diff look sensible?

The rest of the diff is paravirtualization for KVM that partially also 
uses the code patching infrastructure. The paravirtualization stuff seems 
to work fine and gives some nice speed-up, but it still needs plenty of 
cleanup before it can be committed.

There are some paravirtualization bits that are not specific to KVM. For 
example, I introduce a running_on_hypervisor() function and skip some 
delays during shutdown if it returns true. I think this should work for 
most (all?) hypervisors, though it still needs some detection logic. 
Making it depend on the CPUIDECX_HYPERV bit should catch the most common 
hypervisors. Is this something we could commit already in order to see if 
it causes any problems?

Another question: Most of the infrastructure and the pointers to the code 
that needs patching are only used at boot. Is there already a way to put 
that into a dedicated section that is freed and reused after boot?

Cheers,
Stefan

=======================

 amd64/codepatch.c     |  183 ++++++++++++++++++++++++++++++++++++++++++++++++++
 amd64/copy.S          |   40 ++++------
 amd64/cpu.c           |   63 ++++-------------
 amd64/genassym.cf     |    3 
 amd64/identcpu.c      |    1 
 amd64/lapic.c         |   64 +++++++++++++++--
 amd64/machdep.c       |    9 +-
 amd64/mainbus.c       |    8 ++
 amd64/paravirt.c      |  161 +++++++++++++++++++++++++++++++++++++++++++
 amd64/vector.S        |  139 +++++++++++++++++++++++++++++++++++++
 conf/GENERIC          |    2 
 conf/files.amd64      |    6 +
 include/codepatch.h   |   53 ++++++++++++++
 include/cpu.h         |    4 +
 include/cpuvar.h      |   12 +++
 include/i82093reg.h   |   26 ++++++-
 include/i82489var.h   |   13 +++
 include/paravirtvar.h |   72 +++++++++++++++++++
 include/specialreg.h  |    1 
 19 files changed, 776 insertions(+), 84 deletions(-)

=======================

diff --git a/sys/arch/amd64/amd64/codepatch.c b/sys/arch/amd64/amd64/codepatch.c
new file mode 100644
index 0000000..3f5eba0
--- /dev/null
+++ b/sys/arch/amd64/amd64/codepatch.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2014 Stefan Fritsch <s...@sfritsch.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/param.h>
+#include <machine/paravirtvar.h>
+#include <machine/codepatch.h>
+#include <uvm/uvm_extern.h>
+
+#if 1
+#define DBGPRINT(fmt, args...) printf("%s: " fmt "\n", __func__, ## args)
+#else
+#define DBGPRINT(fmt, args...) do {} while (0)
+#endif
+
+struct codepatch {
+       uint32_t offset;
+       uint16_t len;
+       uint16_t tag;
+};
+
+extern struct codepatch codepatch_begin;
+extern struct codepatch codepatch_end;
+
+static const int nop_len_min = 2;
+static const int nop_len_max = 9;
+static const unsigned char nops[][9] = {
+       { 0x66, 0x90 },
+       { 0x0F, 0x1F, 0x00 },
+       { 0x0F, 0x1F, 0x40, 0x00 },
+       { 0x0F, 0x1F, 0x44, 0x00, 0x00 },
+       { 0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00 },
+       { 0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00 },
+       { 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
+       { 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+};
+
+void
+codepatch_fill_nop(void *caddr, uint16_t len)
+{
+       unsigned char *addr = caddr;
+       uint16_t nop_len;
+
+       while (len > 0) {
+               if (len < nop_len_min) {
+                       panic("%s: can't patch NOP with len 1 at %p",
+                           __func__, caddr);
+               }
+               if (len <= nop_len_max)
+                       nop_len = len;
+               else if (len == nop_len_max + 1)
+                       nop_len = nop_len_max - 1;
+               else
+                       nop_len = nop_len_max;
+               memcpy(addr, nops[nop_len-nop_len_min], nop_len);
+               addr += nop_len;
+               len -= nop_len;
+       }
+}
+
+/*
+ * Create writeable aliases of memory we need
+ * to write to as kernel is mapped read-only
+ */
+void *codepatch_maprw(vaddr_t *nva, vaddr_t dest)
+{
+       paddr_t kva = trunc_page((paddr_t)dest);
+       paddr_t po = (paddr_t)dest & PAGE_MASK;
+       paddr_t pa1, pa2;
+
+       if (*nva == 0)
+               *nva = (vaddr_t)km_alloc(2 * PAGE_SIZE, &kv_any, &kp_none,
+                                       &kd_waitok);
+
+       pmap_extract(pmap_kernel(), kva, &pa1);
+       pmap_extract(pmap_kernel(), kva + PAGE_SIZE, &pa2);
+       pmap_kenter_pa(*nva, pa1, PROT_READ | PROT_WRITE);
+       pmap_kenter_pa(*nva + PAGE_SIZE, pa2, PROT_READ | PROT_WRITE);
+       pmap_update(pmap_kernel());
+
+       return (void *)(*nva + po);
+}
+
+void codepatch_unmaprw(vaddr_t nva)
+{
+       if (nva != 0)
+               km_free((void *)nva, 2 * PAGE_SIZE, &kv_any, &kp_none);
+}
+
+/* Patch with NOPs */
+void
+codepatch_nop(uint16_t tag)
+{
+       struct codepatch *patch;
+       unsigned char *rwaddr;
+       vaddr_t addr, rwmap = 0;
+       int i = 0;
+
+       DBGPRINT("patching tag %u", tag);
+
+       for (patch = &codepatch_begin; patch < &codepatch_end; patch++) {
+               if (patch->tag != tag)
+                       continue;
+               addr = KERNBASE + patch->offset;
+               rwaddr = codepatch_maprw(&rwmap, addr);
+               codepatch_fill_nop(rwaddr, patch->len);
+               i++;
+       }
+       codepatch_unmaprw(rwmap);
+       DBGPRINT("patched %d places", i);
+}
+
+/* Patch with alternative code */
+void
+codepatch_replace(uint16_t tag, void *code, size_t len)
+{
+       struct codepatch *patch;
+       unsigned char *rwaddr;
+       vaddr_t addr, rwmap = 0;
+       int i = 0;
+
+       DBGPRINT("patching tag %u with %p", tag, code);
+
+       for (patch = &codepatch_begin; patch < &codepatch_end; patch++) {
+               if (patch->tag != tag)
+                       continue;
+               addr = KERNBASE + patch->offset;
+
+               if (len > patch->len) {
+                       panic("%s: can't replace len %u with %zu at %#lx",
+                           __func__, patch->len, len, addr);
+               }
+               rwaddr = codepatch_maprw(&rwmap, addr);
+               memcpy(rwaddr, code, len);
+               codepatch_fill_nop(rwaddr + len, patch->len - len);
+               i++;
+       }
+       codepatch_unmaprw(rwmap);
+       DBGPRINT("patched %d places", i);
+}
+
+/* Patch with calls to func */
+void
+codepatch_call(uint16_t tag, void *func)
+{
+       struct codepatch *patch;
+       unsigned char *rwaddr;
+       int32_t offset;
+       int i = 0;
+       vaddr_t addr, rwmap = 0;
+
+       DBGPRINT("patching tag %u with call %p", tag, func);
+
+       for (patch = &codepatch_begin; patch < &codepatch_end; patch++) {
+               if (patch->tag != tag)
+                       continue;
+               addr = KERNBASE + patch->offset;
+               if (patch->len < 5)
+                       panic("%s: can't replace len %u with call at %#lx",
+                           __func__, patch->len, addr);
+
+               offset = (vaddr_t)func - (addr + 5);
+               rwaddr = codepatch_maprw(&rwmap, addr);
+               rwaddr[0] = 0xe8; /* call near */
+               memcpy(rwaddr + 1, &offset, sizeof(offset));
+               codepatch_fill_nop(rwaddr + 5, patch->len - 5);
+               i++;
+       }
+       codepatch_unmaprw(rwmap);
+       DBGPRINT("patched %d places", i);
+}
diff --git a/sys/arch/amd64/amd64/copy.S b/sys/arch/amd64/amd64/copy.S
index 32e18fc..05923ee 100644
--- a/sys/arch/amd64/amd64/copy.S
+++ b/sys/arch/amd64/amd64/copy.S
@@ -42,6 +42,7 @@
 #include <sys/syscall.h>
 
 #include <machine/asm.h>
+#include <machine/codepatch.h>
 
 /*
  * As stac/clac SMAP instructions are 3 bytes, we want the fastest
@@ -53,6 +54,12 @@
  * on all family 0x6 and 0xf processors (ie 686+)
  */
 #define SMAP_NOP       .byte 0x0f, 0x1f, 0x00
+#define SMAP_STAC      CODEPATCH_START                 ;\
+                       SMAP_NOP                        ;\
+                       CODEPATCH_END(CPTAG_STAC)
+#define SMAP_CLAC      CODEPATCH_START                 ;\
+                       SMAP_NOP                        ;\
+                       CODEPATCH_END(CPTAG_CLAC)
 
 /*
  * Copy routines from and to userland, plus a few more. See the
@@ -110,7 +117,6 @@ ENTRY(kcopy)
        xorq    %rax,%rax
        ret
 
-.globl _C_LABEL(_copyout_stac), _C_LABEL(_copyout_clac)
 ENTRY(copyout)
        pushq   $0
 
@@ -127,9 +133,7 @@ ENTRY(copyout)
        movq    CPUVAR(CURPCB),%rdx
        leaq    _C_LABEL(copy_fault)(%rip),%r11
        movq    %r11,PCB_ONFAULT(%rdx)
-_C_LABEL(_copyout_stac):
-       SMAP_NOP
-
+       SMAP_STAC
        cld
        movq    %rax,%rcx
        shrq    $3,%rcx
@@ -139,22 +143,17 @@ _C_LABEL(_copyout_stac):
        andb    $7,%cl
        rep
        movsb
-
-_C_LABEL(_copyout_clac):
-       SMAP_NOP
+       SMAP_CLAC
        popq    PCB_ONFAULT(%rdx)
        xorl    %eax,%eax
        ret
 
-.globl _C_LABEL(_copyin_stac), _C_LABEL(_copyin_clac)
 ENTRY(copyin)
        movq    CPUVAR(CURPCB),%rax
        pushq   $0
        leaq    _C_LABEL(copy_fault)(%rip),%r11
        movq    %r11,PCB_ONFAULT(%rax)
-_C_LABEL(_copyin_stac):
-       SMAP_NOP
-
+       SMAP_STAC
        xchgq   %rdi,%rsi
        movq    %rdx,%rax
 
@@ -176,8 +175,7 @@ _C_LABEL(_copyin_stac):
        rep
        movsb
 
-_C_LABEL(_copyin_clac):
-       SMAP_NOP
+       SMAP_CLAC
        movq    CPUVAR(CURPCB),%rdx
        popq    PCB_ONFAULT(%rdx)
        xorl    %eax,%eax
@@ -186,15 +184,12 @@ _C_LABEL(_copyin_clac):
 NENTRY(copy_efault)
        movq    $EFAULT,%rax
 
-.globl _C_LABEL(_copy_fault_clac)
 NENTRY(copy_fault)
-_C_LABEL(_copy_fault_clac):
-       SMAP_NOP
+       SMAP_CLAC
        movq    CPUVAR(CURPCB),%rdx
        popq    PCB_ONFAULT(%rdx)
        ret
 
-.globl _C_LABEL(_copyoutstr_stac)
 ENTRY(copyoutstr)
        xchgq   %rdi,%rsi
        movq    %rdx,%r8
@@ -203,8 +198,7 @@ ENTRY(copyoutstr)
 5:     movq    CPUVAR(CURPCB),%rax
        leaq    _C_LABEL(copystr_fault)(%rip),%r11
        movq    %r11,PCB_ONFAULT(%rax)
-_C_LABEL(_copyoutstr_stac):
-       SMAP_NOP
+       SMAP_STAC
        /*
         * Get min(%rdx, VM_MAXUSER_ADDRESS-%rdi).
         */
@@ -238,7 +232,6 @@ _C_LABEL(_copyoutstr_stac):
        movq    $ENAMETOOLONG,%rax
        jmp     copystr_return
 
-.globl _C_LABEL(_copyinstr_stac)
 ENTRY(copyinstr)
        xchgq   %rdi,%rsi
        movq    %rdx,%r8
@@ -247,8 +240,7 @@ ENTRY(copyinstr)
        movq    CPUVAR(CURPCB),%rcx
        leaq    _C_LABEL(copystr_fault)(%rip),%r11
        movq    %r11,PCB_ONFAULT(%rcx)
-_C_LABEL(_copyinstr_stac):
-       SMAP_NOP
+       SMAP_STAC
 
        /*
         * Get min(%rdx, VM_MAXUSER_ADDRESS-%rsi).
@@ -286,11 +278,9 @@ _C_LABEL(_copyinstr_stac):
 ENTRY(copystr_efault)
        movl    $EFAULT,%eax
 
-.globl _C_LABEL(_copystr_fault_clac)
 ENTRY(copystr_fault)
 copystr_return:
-_C_LABEL(_copystr_fault_clac):
-       SMAP_NOP
+       SMAP_CLAC
        /* Set *lencopied and return %eax. */
        movq    CPUVAR(CURPCB),%rcx
        movq    $0,PCB_ONFAULT(%rcx)
diff --git a/sys/arch/amd64/amd64/cpu.c b/sys/arch/amd64/amd64/cpu.c
index 2d7619f..868c897 100644
--- a/sys/arch/amd64/amd64/cpu.c
+++ b/sys/arch/amd64/amd64/cpu.c
@@ -77,12 +77,14 @@
 
 #include <uvm/uvm_extern.h>
 
+#include <machine/codepatch.h>
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
 #include <machine/cpuvar.h>
 #include <machine/pmap.h>
 #include <machine/vmparam.h>
 #include <machine/mpbiosvar.h>
+#include <machine/paravirtvar.h>
 #include <machine/pcb.h>
 #include <machine/specialreg.h>
 #include <machine/segments.h>
@@ -120,65 +122,23 @@ struct cpu_softc {
 #ifndef SMALL_KERNEL
 void   replacesmap(void);
 
-extern long _copyout_stac;
-extern long _copyout_clac;
-extern long _copyin_stac;
-extern long _copyin_clac;
-extern long _copy_fault_clac;
-extern long _copyoutstr_stac;
-extern long _copyinstr_stac;
-extern long _copystr_fault_clac;
 extern long _stac;
 extern long _clac;
 
-static const struct {
-       void *daddr;
-       void *saddr;
-} ireplace[] = {
-       { &_copyout_stac, &_stac },
-       { &_copyout_clac, &_clac },
-       { &_copyin_stac, &_stac },
-       { &_copyin_clac, &_clac },
-       { &_copy_fault_clac, &_clac },
-       { &_copyoutstr_stac, &_stac },
-       { &_copyinstr_stac, &_stac },
-       { &_copystr_fault_clac, &_clac },
-};
-
 void
 replacesmap(void)
 {
        static int replacedone = 0;
-       int i, s;
-       vaddr_t nva;
+       int s;
 
        if (replacedone)
                return;
        replacedone = 1;
 
        s = splhigh();
-       /*
-        * Create writeable aliases of memory we need
-        * to write to as kernel is mapped read-only
-        */
-       nva = (vaddr_t)km_alloc(2 * PAGE_SIZE, &kv_any, &kp_none, &kd_waitok);
-
-       for (i = 0; i < nitems(ireplace); i++) {
-               paddr_t kva = trunc_page((paddr_t)ireplace[i].daddr);
-               paddr_t po = (paddr_t)ireplace[i].daddr & PAGE_MASK;
-               paddr_t pa1, pa2;
 
-               pmap_extract(pmap_kernel(), kva, &pa1);
-               pmap_extract(pmap_kernel(), kva + PAGE_SIZE, &pa2);
-               pmap_kenter_pa(nva, pa1, PROT_READ | PROT_WRITE);
-               pmap_kenter_pa(nva + PAGE_SIZE, pa2, PROT_READ | PROT_WRITE);
-               pmap_update(pmap_kernel());
-
-               /* replace 3 byte nops with stac/clac instructions */
-               bcopy(ireplace[i].saddr, (void *)(nva + po), 3);
-       }
-
-       km_free((void *)nva, 2 * PAGE_SIZE, &kv_any, &kp_none);
+       codepatch_replace(CPTAG_STAC, &_stac, 3);
+       codepatch_replace(CPTAG_CLAC, &_clac, 3);
        
        splx(s);
 }
@@ -573,6 +533,19 @@ cpu_init(struct cpu_info *ci)
        ci->ci_flags |= CPUF_RUNNING;
        tlbflushg();
 #endif
+#if NPARAVIRT > 0
+       if (kvm_pv_eoi_enabled) {
+               paddr_t pa;
+               ci->ci_kvm_pv_eoi = 0;
+               if (pmap_extract(pmap_kernel(), (vaddr_t)&ci->ci_kvm_pv_eoi, 
&pa) &&
+                   ((uint64_t)pa & 0x3) == 0) {
+                       wrmsr(MSR_KVM_EOI_EN, (1 | (uint64_t)pa) );
+               } else {
+                       printf("could not get phys addr for MSR_KVM_EOI_EN, 
disabling pv_eoi\n");
+                       kvm_pv_eoi_enabled = 0;
+               }
+       }
+#endif
 }
 
 
diff --git a/sys/arch/amd64/amd64/genassym.cf b/sys/arch/amd64/amd64/genassym.cf
index e13a477..b92094d 100644
--- a/sys/arch/amd64/amd64/genassym.cf
+++ b/sys/arch/amd64/amd64/genassym.cf
@@ -114,6 +114,9 @@ member      CPU_INFO_MUTEX_LEVEL    ci_mutex_level
 endif
 member CPU_INFO_GDT            ci_gdt
 member CPU_INFO_TSS            ci_tss
+# XXX if NPARAVIRT > 0
+member CPU_INFO_KVM_PV_EOI     ci_kvm_pv_eoi
+# XXX endif
 
 struct intrsource
 member is_recurse
diff --git a/sys/arch/amd64/amd64/identcpu.c b/sys/arch/amd64/amd64/identcpu.c
index ed46079..c62bd6f 100644
--- a/sys/arch/amd64/amd64/identcpu.c
+++ b/sys/arch/amd64/amd64/identcpu.c
@@ -129,6 +129,7 @@ const struct {
        { CPUIDECX_AVX,         "AVX" },
        { CPUIDECX_F16C,        "F16C" },
        { CPUIDECX_RDRAND,      "RDRAND" },
+       { CPUIDECX_HYPERV,      "Hypervisor" },
 }, cpu_ecpuid_ecxfeatures[] = {
        { CPUIDECX_LAHF,        "LAHF" },
        { CPUIDECX_CMPLEG,      "CMPLEG" },
diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c
index 0488c1f..b82c497 100644
--- a/sys/arch/amd64/amd64/lapic.c
+++ b/sys/arch/amd64/amd64/lapic.c
@@ -44,6 +44,7 @@
 #include <machine/pmap.h>
 #include <machine/vmparam.h>
 #include <machine/mpbiosvar.h>
+#include <machine/paravirtvar.h>
 #include <machine/pcb.h>
 #include <machine/specialreg.h>
 #include <machine/segments.h>
@@ -234,14 +235,27 @@ lapic_boot_init(paddr_t lapic_base)
        lapic_map(lapic_base);
 
 #ifdef MULTIPROCESSOR
+#if NPARAVIRT > 0
+       if (kvm_pv_eoi_enabled) {
+               idt_allocmap[LAPIC_IPI_INVLTLB] = 1;
+               idt_vec_set(LAPIC_IPI_INVLTLB, Xipi_invltlb_kvm_pv_eoi);
+               idt_allocmap[LAPIC_IPI_INVLPG] = 1;
+               idt_vec_set(LAPIC_IPI_INVLPG, Xipi_invlpg_kvm_pv_eoi);
+               idt_allocmap[LAPIC_IPI_INVLRANGE] = 1;
+               idt_vec_set(LAPIC_IPI_INVLRANGE, Xipi_invlrange_kvm_pv_eoi);
+       }
+       else
+#endif
+       {
+               idt_allocmap[LAPIC_IPI_INVLTLB] = 1;
+               idt_vec_set(LAPIC_IPI_INVLTLB, Xipi_invltlb);
+               idt_allocmap[LAPIC_IPI_INVLPG] = 1;
+               idt_vec_set(LAPIC_IPI_INVLPG, Xipi_invlpg);
+               idt_allocmap[LAPIC_IPI_INVLRANGE] = 1;
+               idt_vec_set(LAPIC_IPI_INVLRANGE, Xipi_invlrange);
+       }
        idt_allocmap[LAPIC_IPI_VECTOR] = 1;
        idt_vec_set(LAPIC_IPI_VECTOR, Xintr_lapic_ipi);
-       idt_allocmap[LAPIC_IPI_INVLTLB] = 1;
-       idt_vec_set(LAPIC_IPI_INVLTLB, Xipi_invltlb);
-       idt_allocmap[LAPIC_IPI_INVLPG] = 1;
-       idt_vec_set(LAPIC_IPI_INVLPG, Xipi_invlpg);
-       idt_allocmap[LAPIC_IPI_INVLRANGE] = 1;
-       idt_vec_set(LAPIC_IPI_INVLRANGE, Xipi_invlrange);
 #endif
        idt_allocmap[LAPIC_SPURIOUS_VECTOR] = 1;
        idt_vec_set(LAPIC_SPURIOUS_VECTOR, Xintrspurious);
@@ -501,8 +515,12 @@ x86_ipi_init(int target)
        return 0;
 }
 
+#if NPARAVIRT > 0
 int
+default_x86_ipi(int vec, int target, int dl)
+#else
 x86_ipi(int vec, int target, int dl)
+#endif
 {
        int s;
 
@@ -522,6 +540,40 @@ x86_ipi(int vec, int target, int dl)
 
        return 0;
 }
+
+#if NPARAVIRT > 0
+int
+kvm_x86_ipi(int vec, int target, int dl)
+{
+       uint64_t data = target << LAPIC_ID_SHIFT;
+       data <<= 32;
+       data |= (target & LAPIC_DEST_MASK) | vec | dl | LAPIC_LVL_ASSERT;
+
+       /*
+        * Under KVM with in-kernel lapic, i82489_icr_wait() is not necessary.
+        * Omitting it saves several vm exits.
+        * XXX Check what happens without in-kernel lapic
+        * XXX Check what happens on AMD
+        */
+
+       /*
+        * Using the MSR causes only one vm exit as opposed to two exits when
+        * writing the two halfs of the ICR register.
+        *
+        * Also, MSRs are cheaper than MMIO writes on CPUs lacking the APIC
+        * virtualization feature.
+        *
+        * XXX detect MSR support
+        */
+
+       wrmsr(MSR_HV_X64_ICR, data);
+
+       return 0;
+}
+
+int (*x86_ipi_func)(int, int, int) = default_x86_ipi;
+#endif /* NPARAVIRT */
+
 #endif /* MULTIPROCESSOR */
 
 
diff --git a/sys/arch/amd64/amd64/machdep.c b/sys/arch/amd64/amd64/machdep.c
index 98edbd6..46c5c60 100644
--- a/sys/arch/amd64/amd64/machdep.c
+++ b/sys/arch/amd64/amd64/machdep.c
@@ -101,6 +101,7 @@
 
 #include <machine/cpu.h>
 #include <machine/cpufunc.h>
+#include <machine/paravirtvar.h>
 #include <machine/pio.h>
 #include <machine/psl.h>
 #include <machine/reg.h>
@@ -749,7 +750,8 @@ boot(int howto)
        }
        if_downall();
 
-       delay(4*1000000);       /* XXX */
+       if (!running_on_hypervisor())
+               delay(4*1000000);       /* XXX */
 
        uvm_shutdown();
        splhigh();
@@ -771,7 +773,8 @@ haltsys:
                extern int acpi_enabled;
 
                if (acpi_enabled) {
-                       delay(500000);
+                       if (!running_on_hypervisor())
+                               delay(500000);
                        if ((howto & RB_POWERDOWN) != 0)
                                acpi_powerdown();
                }
@@ -785,7 +788,7 @@ haltsys:
        }
 
        printf("rebooting...\n");
-       if (cpureset_delay > 0)
+       if (cpureset_delay > 0 && !running_on_hypervisor())
                delay(cpureset_delay * 1000);
        cpu_reset();
        for (;;) ;
diff --git a/sys/arch/amd64/amd64/mainbus.c b/sys/arch/amd64/amd64/mainbus.c
index 2742ca0..7a4def3 100644
--- a/sys/arch/amd64/amd64/mainbus.c
+++ b/sys/arch/amd64/amd64/mainbus.c
@@ -49,6 +49,7 @@
 #include "bios.h"
 #include "mpbios.h"
 #include "vmt.h"
+#include "paravirt.h"
 
 #include <machine/cpuvar.h>
 #include <machine/i82093var.h>
@@ -151,6 +152,13 @@ mainbus_attach(struct device *parent, struct device *self, 
void *aux)
 
        printf("\n");
 
+#if NPARAVIRT > 0
+       {
+               mba.mba_bios.ba_name = "paravirt";
+               config_found(self, &mba.mba_bios, mainbus_print);
+       }
+#endif
+
 #if NBIOS > 0
        {
                mba.mba_bios.ba_name = "bios";
diff --git a/sys/arch/amd64/amd64/paravirt.c b/sys/arch/amd64/amd64/paravirt.c
new file mode 100644
index 0000000..d4199fa
--- /dev/null
+++ b/sys/arch/amd64/amd64/paravirt.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2014 Stefan Fritsch <s...@sfritsch.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <machine/paravirtvar.h>
+#include <machine/codepatch.h>
+#include <machine/cpu.h>
+#include <machine/specialreg.h>
+#include <machine/biosvar.h>
+#include <machine/cpuvar.h>
+
+char           hypervisor_signature[13];
+uint32_t       kvm_features, hyperv_features;
+uint32_t       kvm_cpuid_base = 0;
+uint32_t       hyperv_cpuid_base = 0;
+int            kvm_pv_eoi_enabled = 0;
+static int     hyperv_apic_msr_enabled = 0;
+
+struct paravirt_softc {
+       struct  device sc_dev;
+};
+
+int paravirt_probe(struct device *, void *, void *);
+void paravirt_attach(struct device *, struct device *, void *);
+int paravirt_guess(void);
+
+struct cfattach paravirt_ca = {
+       sizeof(struct paravirt_softc),
+       paravirt_probe,
+       paravirt_attach,
+       NULL,
+       NULL
+};
+
+struct cfdriver paravirt_cd = {
+       NULL, "paravirt", DV_DULL
+};
+
+int
+paravirt_probe(struct device *parent, void *match, void *aux)
+{
+       struct bios_attach_args *bia = aux;
+       if (paravirt_cd.cd_ndevs || strcmp(bia->ba_name, paravirt_cd.cd_name))
+               return 0;
+       return 1;
+}
+
+int
+kvm_has_feature(int feature)
+{
+       return (kvm_features & (1UL << feature));
+}
+
+int
+hyperv_has_feature(int feature)
+{
+       return (hyperv_features & (1UL << feature));
+}
+
+
+int
+running_on_hypervisor()
+{
+       return (kvm_cpuid_base != 0 || hyperv_cpuid_base != 0);
+}
+
+struct pvpatch {
+       uint32_t offset;
+       uint16_t len;
+       uint16_t id;
+};
+
+#define DISABLE_KVM_PV_EOI     0x1
+#define FORCE_KVM_PV_EOI       0x2
+#define DISABLE_HV_APIC_MSR    0x4
+#define FORCE_HV_APIC_MSR      0x8
+
+void
+paravirt_attach(struct device *parent, struct device *self, void *aux)
+{
+       struct paravirt_softc *sc = (struct paravirt_softc *)self;
+       uint32_t flags = sc->sc_dev.dv_cfdata->cf_flags;
+       uint32_t regs[4];
+       uint32_t base;
+       // struct cpu_info *ci = curcpu();
+
+       for (base = CPUID_HYPERVISOR_SIGNATURE_START;
+           base < CPUID_HYPERVISOR_SIGNATURE_END;
+           base += CPUID_HYPERVISOR_SIGNATURE_STEP) {
+               CPUID(base, regs[0], regs[1], regs[2], regs[3]);
+               if (memcmp(&regs[1], "KVMKVMKVM\0\0\0", 12) == 0) {
+                       kvm_cpuid_base = base;
+                       CPUID(base + CPUID_OFFSET_KVM_FEATURES, regs[0], 
regs[1], regs[2], regs[3]);
+                       kvm_features = regs[0];
+                       printf(" KVM[%#x]", kvm_features);
+               } else if (memcmp(&regs[1], "Microsoft Hv", 12) == 0) {
+                       hyperv_cpuid_base = base;
+                       if (regs[0] >= base + CPUID_OFFSET_HYPERV_FEATURE_ID) {
+                               CPUID(base + CPUID_OFFSET_HYPERV_FEATURE_ID,
+                                   regs[0], regs[1], regs[2], regs[3]);
+                               hyperv_features = regs[0];
+                       }
+                       printf(" Hyper-V[%#x]", hyperv_features);
+               }
+       }
+
+       /*
+        * Unfortunately, userspace (qemu) is responsible for exposing the
+        * Hyper-V CPUID pages and qemu does not do this unless a special
+        * command line parameter is given. Therefore it is a possible and very
+        * common configuration that the kernel (kvm) supports the APIC access
+        * MSRs but the CPUID bits are missing. To work around this, we check
+        * for KVM_FEATURE_ASYNC_PF which was introduced in Linux 2.6.37 which
+        * is later than 2.6.34 where support for the APIC access MSRs first
+        * appeared.
+        */
+       if ((flags&FORCE_HV_APIC_MSR)
+           || hyperv_has_feature(HYPERV_FEATURE_APIC_MSRS)
+           || kvm_has_feature(KVM_FEATURE_ASYNC_PF))
+               hyperv_apic_msr_enabled = 1;
+       if (flags&DISABLE_HV_APIC_MSR)
+               hyperv_apic_msr_enabled = 0;
+       if (hyperv_apic_msr_enabled)
+               printf(" HV:APIC_MSR");
+
+       if ((flags&FORCE_KVM_PV_EOI) || kvm_has_feature(KVM_FEATURE_PV_EOI))
+               kvm_pv_eoi_enabled = 1;
+       if (flags&DISABLE_KVM_PV_EOI)
+               kvm_pv_eoi_enabled = 0;
+       if (kvm_pv_eoi_enabled)
+               printf(" KVM:PV_EOI");
+
+       printf("\n");
+
+       if (!kvm_pv_eoi_enabled) {
+               // XXX this must also be done if paravirt is disabled
+               codepatch_nop(CPTAG_KVM_PV_EOI);
+       }
+
+       if (hyperv_apic_msr_enabled) {
+               extern void *hv_msr_eoi;
+#ifdef MULTIPROCESSOR
+               x86_ipi_func = kvm_x86_ipi;
+#endif
+               codepatch_call(CPTAG_EOI, &hv_msr_eoi);
+       }
+}
diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
index 2e23826..6f41a6f 100644
--- a/sys/arch/amd64/amd64/vector.S
+++ b/sys/arch/amd64/amd64/vector.S
@@ -77,6 +77,8 @@
 #include <machine/trap.h>
 #include <machine/intr.h>
 #include <machine/psl.h>
+#include <machine/paravirtvar.h>
+#include <machine/codepatch.h>
 
 #include "ioapic.h"
 #include "lapic.h"
@@ -313,6 +315,34 @@ calltrap:
 #define        XINTR(name,num)         _Xintr_##name##num
 #endif
 
+       .section .codepatch,"a"
+       .align  8
+       .globl _C_LABEL(codepatch_begin)
+_C_LABEL(codepatch_begin):
+        .previous
+
+       .section .codepatchend,"a"
+       .globl _C_LABEL(codepatch_end)
+_C_LABEL(codepatch_end):
+        .previous
+
+#if NPARAVIRT > 0
+       .globl _C_LABEL(hv_msr_eoi)
+_C_LABEL(hv_msr_eoi):
+       pushq   %rax
+       pushq   %rcx
+       pushq   %rdx
+       mov     $MSR_HV_X64_EOI,%ecx
+       mov     $0,%eax
+       mov     $0,%edx
+       wrmsr
+       popq    %rdx
+       popq    %rcx
+       popq    %rax
+       ret
+
+#endif
+
 #if NLAPIC > 0
 #ifdef MULTIPROCESSOR
 IDTVEC(recurse_lapic_ipi)
@@ -325,7 +355,16 @@ IDTVEC(intr_lapic_ipi)
        pushq   $0              
        pushq   $T_ASTFLT
        INTRENTRY               
+#if NPARAVIRT > 0
+       CODEPATCH_START
+       btr     $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+       jc      3f
+       CODEPATCH_END(CPTAG_KVM_PV_EOI)
+#endif
+       CODEPATCH_START
        movl    $0,_C_LABEL(local_apic)+LAPIC_EOI
+       CODEPATCH_END(CPTAG_EOI)
+3:
        movl    CPUVAR(ILEVEL),%ebx
        cmpl    $IPL_IPI,%ebx
        jae     2f
@@ -347,7 +386,7 @@ IDTVEC(resume_lapic_ipi)
 IDTVEC(ipi_invltlb)
        pushq   %rax
 
-       ioapic_asm_ack()
+       ioapic_asm_ack_no_swapgs()
 
        movq    %cr3, %rax
        movq    %rax, %cr3
@@ -361,7 +400,7 @@ IDTVEC(ipi_invltlb)
 IDTVEC(ipi_invlpg)
        pushq   %rax
 
-       ioapic_asm_ack()
+       ioapic_asm_ack_no_swapgs()
 
        movq    tlb_shoot_addr1, %rax
        invlpg  (%rax)
@@ -376,7 +415,7 @@ IDTVEC(ipi_invlrange)
        pushq   %rax
        pushq   %rdx
 
-       ioapic_asm_ack()
+       ioapic_asm_ack_no_swapgs()
 
        movq    tlb_shoot_addr1, %rax
        movq    tlb_shoot_addr2, %rdx
@@ -392,6 +431,91 @@ IDTVEC(ipi_invlrange)
        popq    %rax
        iretq
 
+IDTVEC(ipi_invltlb_kvm_pv_eoi)
+        testq   $SEL_UPL,8(%rsp)
+        je      1f
+       swapgs
+       btr     $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+       swapgs
+       jmp 2f
+1:
+       btr     $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+2:
+       jc      3f
+       CODEPATCH_START
+       movl    $0,(_C_LABEL(local_apic)+LAPIC_EOI)(%rip)
+       CODEPATCH_END(CPTAG_EOI)
+3:
+       pushq   %rax
+
+       movq    %cr3, %rax
+       movq    %rax, %cr3
+
+       lock
+       decq    tlb_shoot_wait
+
+       popq    %rax
+       iretq
+
+IDTVEC(ipi_invlpg_kvm_pv_eoi)
+        testq   $SEL_UPL,8(%rsp)
+        je      1f
+       swapgs
+       btr     $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+       swapgs
+       jmp 2f
+1:
+       btr     $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+2:
+       jc      3f
+       CODEPATCH_START
+       movl    $0,(_C_LABEL(local_apic)+LAPIC_EOI)(%rip)
+       CODEPATCH_END(CPTAG_EOI)
+3:
+       pushq   %rax
+
+       movq    tlb_shoot_addr1, %rax
+       invlpg  (%rax)
+
+       lock
+       decq    tlb_shoot_wait
+
+       popq    %rax
+       iretq
+
+IDTVEC(ipi_invlrange_kvm_pv_eoi)
+        testq   $SEL_UPL,8(%rsp)
+        je      1f
+       swapgs
+       btr     $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+       swapgs
+       jmp 2f
+1:
+       btr     $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+2:
+       jc      3f
+       CODEPATCH_START
+       movl    $0,(_C_LABEL(local_apic)+LAPIC_EOI)(%rip)
+       CODEPATCH_END(CPTAG_EOI)
+3:
+       pushq   %rax
+       pushq   %rdx
+
+       movq    tlb_shoot_addr1, %rax
+       movq    tlb_shoot_addr2, %rdx
+4:     invlpg  (%rax)
+       addq    $PAGE_SIZE, %rax
+       cmpq    %rdx, %rax
+       jb      4b
+
+       lock
+       decq    tlb_shoot_wait
+
+       popq    %rdx
+       popq    %rax
+       iretq
+
+
 #endif /* MULTIPROCESSOR */
        
        /*
@@ -407,7 +531,16 @@ IDTVEC(intr_lapic_ltimer)
        pushq   $0              
        pushq   $T_ASTFLT
        INTRENTRY               
+#if NPARAVIRT > 0
+       CODEPATCH_START
+       btr     $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)
+       jc      3f
+       CODEPATCH_END(CPTAG_KVM_PV_EOI)
+#endif
+       CODEPATCH_START
        movl    $0,_C_LABEL(local_apic)+LAPIC_EOI
+       CODEPATCH_END(CPTAG_EOI)
+3:
        movl    CPUVAR(ILEVEL),%ebx
        cmpl    $IPL_CLOCK,%ebx
        jae     2f
diff --git a/sys/arch/amd64/conf/GENERIC b/sys/arch/amd64/conf/GENERIC
index e1c463c..0ec4eb0 100644
--- a/sys/arch/amd64/conf/GENERIC
+++ b/sys/arch/amd64/conf/GENERIC
@@ -39,6 +39,8 @@ isa0  at amdpcib?
 isa0   at tcpcib?
 pci*   at mainbus0
 
+paravirt0 at mainbus0
+
 acpi0          at bios0
 acpitimer*     at acpi?
 acpihpet*      at acpi?
diff --git a/sys/arch/amd64/conf/files.amd64 b/sys/arch/amd64/conf/files.amd64
index 103b653..a73d5d8 100644
--- a/sys/arch/amd64/conf/files.amd64
+++ b/sys/arch/amd64/conf/files.amd64
@@ -80,6 +80,12 @@ device       mainbus: isabus, pcibus, mainbus
 attach mainbus at root
 file   arch/amd64/amd64/mainbus.c              mainbus
 
+device paravirt
+attach paravirt at mainbus
+file   arch/amd64/amd64/paravirt.c             paravirt needs-flag
+
+file   arch/amd64/amd64/codepatch.c
+
 device bios {}
 attach bios at mainbus
 file   arch/amd64/amd64/bios.c                 bios needs-flag
diff --git a/sys/arch/amd64/include/codepatch.h 
b/sys/arch/amd64/include/codepatch.h
new file mode 100644
index 0000000..9463219
--- /dev/null
+++ b/sys/arch/amd64/include/codepatch.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2014 Stefan Fritsch <s...@sfritsch.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _MACHINE_CODEPATCH_H_
+#define _MACHINE_CODEPATCH_H_
+
+#include <machine/param.h>
+
+#ifndef _LOCORE
+
+void *codepatch_maprw(vaddr_t *nva, vaddr_t dest);
+void codepatch_unmaprw(vaddr_t nva);
+void codepatch_fill_nop(void *caddr, uint16_t len);
+void codepatch_nop(uint16_t tag);
+void codepatch_replace(uint16_t tag, void *code, size_t len);
+void codepatch_call(uint16_t tag, void *func);
+
+#endif /* !_LOCORE */
+
+/*
+ * Mark the start of some code snippet to be patched.
+ */
+#define        CODEPATCH_START 998:
+/*
+ * Mark the end of some code to be patched, and assign the given tag.
+ */
+#define        CODEPATCH_END(tag)                       \
+       999:                                     \
+       .section .codepatch, "a"                ;\
+       .int (998b - KERNBASE)                  ;\
+       .short (999b - 998b)                    ;\
+       .short tag                              ;\
+       .previous
+
+#define CPTAG_EOI              1
+#define CPTAG_KVM_PV_EOI       2
+#define CPTAG_STAC             3
+#define CPTAG_CLAC             4
+
+#endif /* _MACHINE_CODEPATCH_H_ */
diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h
index bd654f1..29aee57 100644
--- a/sys/arch/amd64/include/cpu.h
+++ b/sys/arch/amd64/include/cpu.h
@@ -46,6 +46,7 @@
 #include <machine/segments.h>
 #include <machine/cacheinfo.h>
 #include <machine/intrdefs.h>
+// XXX #include "paravirt.h"
 
 #ifdef MULTIPROCESSOR
 #include <machine/i82489reg.h>
@@ -137,6 +138,9 @@ struct cpu_info {
 
        struct ksensordev       ci_sensordev;
        struct ksensor          ci_sensor;
+// XXX #if NPARAVIRT > 0
+       u_int32_t               ci_kvm_pv_eoi;
+// XXX #endif
 #ifdef GPROF
        struct gmonparam        *ci_gmon;
 #endif
diff --git a/sys/arch/amd64/include/cpuvar.h b/sys/arch/amd64/include/cpuvar.h
index 8a75da5..5f19e27 100644
--- a/sys/arch/amd64/include/cpuvar.h
+++ b/sys/arch/amd64/include/cpuvar.h
@@ -64,6 +64,8 @@
  * SUCH DAMAGE.
  */
 
+#include "paravirt.h"
+
 struct cpu_functions {
        int (*start)(struct cpu_info *);
        int (*stop)(struct cpu_info *);
@@ -87,7 +89,17 @@ struct cpu_attach_args {
 
 #ifdef _KERNEL
 
+#if NPARAVIRT > 0
+extern int (*x86_ipi_func)(int,int,int);
+int kvm_x86_ipi(int vec, int target, int dl);
+int default_x86_ipi(int vec, int target, int dl);
+static inline int x86_ipi(int vec, int target, int dl) {
+       return x86_ipi_func(vec, target, dl);
+}
+#else
 int x86_ipi(int,int,int);
+#endif
+
 void x86_self_ipi(int);
 int x86_ipi_init(int);
 
diff --git a/sys/arch/amd64/include/i82093reg.h 
b/sys/arch/amd64/include/i82093reg.h
index e4ab947..452af3d 100644
--- a/sys/arch/amd64/include/i82093reg.h
+++ b/sys/arch/amd64/include/i82093reg.h
@@ -112,8 +112,30 @@
 
 #ifdef _KERNEL
 
-#define ioapic_asm_ack(num) \
-       movl    $0,(_C_LABEL(local_apic)+LAPIC_EOI)(%rip)
+#include <machine/paravirtvar.h>
+#include <machine/codepatch.h>
+#define ioapic_asm_ack_no_swapgs(num)                          \
+       CODEPATCH_START                                         ;\
+       movl    $0,(_C_LABEL(local_apic)+LAPIC_EOI)(%rip)       ;\
+       CODEPATCH_END(CPTAG_EOI)
+
+#if NPARAVIRT > 0
+/*
+ * This is only usable if swapgs has already been called (e.d. by INTRENTRY).
+ */
+#define ioapic_asm_ack(num)                                    \
+       CODEPATCH_START                                         ;\
+       btr     $KVM_PV_EOI_BIT,CPUVAR(KVM_PV_EOI)              ;\
+       jc      78f                                             ;\
+       CODEPATCH_END(CPTAG_KVM_PV_EOI)                         ;\
+       ioapic_asm_ack_no_swapgs(num)                           ;\
+       78:
+
+#else
+
+#define ioapic_asm_ack(num)             ioapic_asm_ack_no_swapgs(num)
+
+#endif
 
 #ifdef MULTIPROCESSOR
 
diff --git a/sys/arch/amd64/include/i82489var.h 
b/sys/arch/amd64/include/i82489var.h
index dd50af5..f926c43 100644
--- a/sys/arch/amd64/include/i82489var.h
+++ b/sys/arch/amd64/include/i82489var.h
@@ -33,6 +33,8 @@
 #ifndef _MACHINE_I82489VAR_H_
 #define _MACHINE_I82489VAR_H_
 
+#include "paravirt.h"
+
 /*
  * Software definitions belonging to Local APIC driver.
  */
@@ -73,6 +75,9 @@ extern void Xintrspurious(void);
  * Vector used for inter-processor interrupts.
  */
 extern void Xintr_lapic_ipi(void);
+#if NPARAVIRT > 0
+extern void Xintr_lapic_ipi_kvm_pv_eoi(void);
+#endif
 extern void Xrecurse_lapic_ipi(void);
 extern void Xresume_lapic_ipi(void);
 #define LAPIC_IPI_VECTOR                       0xe0
@@ -88,12 +93,20 @@ extern void Xresume_lapic_ipi(void);
 extern void Xipi_invltlb(void);
 extern void Xipi_invlpg(void);
 extern void Xipi_invlrange(void);
+#if NPARAVIRT > 0
+extern void Xipi_invltlb_kvm_pv_eoi(void);
+extern void Xipi_invlpg_kvm_pv_eoi(void);
+extern void Xipi_invlrange_kvm_pv_eoi(void);
+#endif
 
 /*
  * Vector used for local apic timer interrupts.
  */
 
 extern void Xintr_lapic_ltimer(void);
+#if NPARAVIRT > 0
+extern void Xintr_lapic_ltimer_kvm_pv_eoi(void);
+#endif
 extern void Xresume_lapic_ltimer(void);
 extern void Xrecurse_lapic_ltimer(void);
 #define LAPIC_TIMER_VECTOR             0xc0
diff --git a/sys/arch/amd64/include/paravirtvar.h 
b/sys/arch/amd64/include/paravirtvar.h
new file mode 100644
index 0000000..49306ba
--- /dev/null
+++ b/sys/arch/amd64/include/paravirtvar.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2014 Stefan Fritsch <s...@sfritsch.de>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef _MACHINE_PARAVIRT_H_
+#define _MACHINE_PARAVIRT_H_
+
+#include "paravirt.h"
+
+#define        CPUID_HYPERVISOR_SIGNATURE_START        0x40000000
+#define        CPUID_HYPERVISOR_SIGNATURE_END          0x40010000
+#define        CPUID_HYPERVISOR_SIGNATURE_STEP         0x100
+
+#define        CPUID_OFFSET_KVM_FEATURES               0x1
+
+/*
+ * We don't really support Hyper-V but KVM offers one feature
+ * from Hyper-V that we use (the APIC access MSRs).
+ */
+
+#define CPUID_OFFSET_HYPERV_INTERFACE          0x1
+#define CPUID_OFFSET_HYPERV_SYSTEM_ID          0x2
+#define CPUID_OFFSET_HYPERV_FEATURE_ID         0x3
+
+#define HYPERV_FEATURE_APIC_MSRS               4
+
+#define        KVM_FEATURE_CLOCKSOURCE                 0       /* deprecated */
+#define        KVM_FEATURE_NOP_IO_DELAY                1
+#define        KVM_FEATURE_MMU_OP                      2       /* deprecated */
+#define        KVM_FEATURE_CLOCKSOURCE2                3
+#define        KVM_FEATURE_ASYNC_PF                    4
+#define        KVM_FEATURE_STEAL_TIME                  5
+#define        KVM_FEATURE_PV_EOI                      6
+#define        KVM_FEATURE_PV_UNHALT                   7
+#define        KVM_FEATURE_CLOCKSOURCE_STABLE_BIT      24
+
+#define        MSR_KVM_EOI_EN                          0x4b564d04
+
+#define        MSR_HV_X64_EOI                          0x40000070
+#define        MSR_HV_X64_ICR                          0x40000071
+#define        MSR_HV_X64_TPR                          0x40000072
+
+#define KVM_PV_EOI_BIT                         0
+
+#ifndef _LOCORE
+
+extern int kvm_pv_eoi_enabled;
+int kvm_has_feature(int feature);
+int hyperv_has_feature(int feature);
+
+#if NPARAVIRT > 0
+int running_on_hypervisor(void);
+#else
+#define running_on_hypervisor()                0
+#endif /* NPARAVIRT */
+
+void codepatch_info(void);
+#endif /* !_LOCORE */
+
+#endif /* _MACHINE_PARAVIRT_H_ */
diff --git a/sys/arch/amd64/include/specialreg.h 
b/sys/arch/amd64/include/specialreg.h
index 735b7cf..6947973 100644
--- a/sys/arch/amd64/include/specialreg.h
+++ b/sys/arch/amd64/include/specialreg.h
@@ -158,6 +158,7 @@
 #define        CPUIDECX_AVX    0x10000000      /* Advanced Vector Extensions */
 #define        CPUIDECX_F16C   0x20000000      /* 16bit fp conversion  */
 #define        CPUIDECX_RDRAND 0x40000000      /* RDRAND instruction  */
+#define        CPUIDECX_HYPERV 0x80000000      /* Hypervisor present */
 
 /*
  * "Structured Extended Feature Flags Parameters" (CPUID function 0x7, leaf 0)

Reply via email to