On Tue, Oct 28, 2008 at 03:13:34PM -0500, Anthony Liguori wrote: > This patch adds very basic KVM support. KVM is a kernel module for Linux that > allows userspace programs to make use of hardware virtualization support. It > current supports x86 hardware virtualization using Intel VT-x or AMD-V. It > also supports IA64 VT-i, PPC 440, and S390. > > This patch only implements the bare minimum support to get a guest booting. > It > has very little impact the rest of QEMU and attempts to integrate nicely with > the rest of QEMU. > > Even though this implementation is basic, it is significantly faster than TCG. > Booting and shutting down a Linux guest: > > w/TCG: 1:32.36 elapsed 84% CPU > > w/KVM: 0:31.14 elapsed 59% CPU > > Right now, KVM is disabled by default and must be explicitly enabled with > -enable-kvm. We can enable it by default later when we have had better > testing. > > Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]> > > diff --git a/KVM_TODO b/KVM_TODO > new file mode 100644 > index 0000000..9529049 > --- /dev/null > +++ b/KVM_TODO > @@ -0,0 +1,9 @@ > +1) Add hooks for load/save of register state > + o Fixes gdbstub, save/restore, and vmport > +2) Add VGA optimization > +3) Add IO thread > +4) Add guest SMP support > +5) Add TPR optimization > +6) Add support for in-kernel APIC > +7) Add support for in-kernel PIT > +8) Merge in additional changes in kvm-userspace tree > diff --git a/Makefile.target b/Makefile.target > index e2edf9d..903d66d 100644 > --- a/Makefile.target > +++ b/Makefile.target > @@ -183,6 +183,9 @@ CFLAGS+=-I/opt/SUNWspro/prod/include/cc > endif > endif > > +kvm.o: CFLAGS+=$(KVM_CFLAGS) > +kvm-all.o: CFLAGS+=$(KVM_CFLAGS) > + > all: $(PROGS) > > ######################################################### > @@ -475,6 +478,9 @@ ifndef CONFIG_USER_ONLY > > OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o > net-checksum.o > OBJS+=fw_cfg.o aio.o buffered_file.o migration.o migration-tcp.o > +ifdef CONFIG_KVM > +OBJS+=kvm.o kvm-all.o > +endif > ifdef CONFIG_WIN32 > OBJS+=block-raw-win32.o > else > diff --git a/configure b/configure > index aefa69b..7aed99d 100755 > --- a/configure > +++ b/configure > @@ -113,6 +113,7 @@ aio="yes" > nptl="yes" > mixemu="no" > bluez="yes" > +kvm="yes" > > # OS specific > targetos=`uname -s` > @@ -300,6 +301,8 @@ for opt do > ;; > --disable-bluez) bluez="no" > ;; > + --disable-kvm) kvm="no" > + ;; > --enable-profiler) profiler="yes" > ;; > --enable-cocoa) > @@ -439,6 +442,7 @@ echo " --disable-brlapi disable BrlAPI" > echo " --disable-vnc-tls disable TLS encryption for VNC server" > echo " --disable-curses disable curses output" > echo " --disable-bluez disable bluez stack connectivity" > +echo " --disable-kvm disable KVM acceleration support" > echo " --disable-nptl disable usermode NPTL support" > echo " --enable-system enable all system emulation targets" > echo " --disable-system disable all system emulation targets" > @@ -933,6 +937,30 @@ EOF > fi > > ########################################## > +# kvm probe > +if test "$kvm" = "yes" ; then > + cat > $TMPC <<EOF > +#include <linux/kvm.h> > +#if !defined(KVM_API_VERSION) || \ > + KVM_API_VERSION < 12 || \ > + KVM_API_VERSION > 12 || \ > + !defined(KVM_CAP_USER_MEMORY) || \ > + !defined(KVM_CAP_SET_TSS_ADDR) > +#error Invalid KVM version > +#endif > +int main(void) { return 0; } > +EOF > + # FIXME make this configurable > + kvm_cflags=-I/lib/modules/`uname -r`/build/include > + if $cc $ARCH_CFLAGS -o $TMPE ${OS_CFLAGS} $kvm_cflags $TMPC \ > + 2>/dev/null ; then > + : > + else > + kvm="no" > + fi > +fi > + > +########################################## > # AIO probe > if test "$aio" = "yes" ; then > aio=no > @@ -1018,6 +1046,7 @@ echo "uname -r $uname_release" > echo "NPTL support $nptl" > echo "vde support $vde" > echo "AIO support $aio" > +echo "KVM support $kvm" > > if test $sdl_too_old = "yes"; then > echo "-> Your SDL version is too old - please upgrade to have SDL support" > @@ -1388,6 +1417,15 @@ interp_prefix1=`echo "$interp_prefix" | sed > "s/%M/$target_cpu/g"` > echo "#define CONFIG_QEMU_PREFIX \"$interp_prefix1\"" >> $config_h > gdb_xml_files="" > > +# FIXME allow i386 to build on x86_64 and vice versa > +if test "$kvm" = "yes" -a "$target_cpu" != "$cpu" ; then > + kvm="no" > +fi > +# Disable KVM for linux-user > +if test "$kvm" = "yes" -a "$target_softmmu" = "no" ; then > + kvm="no" > +fi > + > case "$target_cpu" in > i386) > echo "TARGET_ARCH=i386" >> $config_mak > @@ -1397,6 +1435,11 @@ case "$target_cpu" in > then > echo "#define USE_KQEMU 1" >> $config_h > fi > + if test "$kvm" = "yes" ; then > + echo "CONFIG_KVM=yes" >> $config_mak > + echo "KVM_CFLAGS=$kvm_cflags" >> $config_mak > + echo "#define CONFIG_KVM" >> $config_h > + fi > gcc3minver=`$cc --version 2> /dev/null| fgrep "(GCC) 3." | awk '{ print > $3 }' | cut -f2 -d.` > if test -n "$gcc3minver" && test $gcc3minver -gt 3 > then > @@ -1414,6 +1457,11 @@ case "$target_cpu" in > then > echo "#define USE_KQEMU 1" >> $config_h > fi > + if test "$kvm" = "yes" ; then > + echo "CONFIG_KVM=yes" >> $config_mak > + echo "KVM_CFLAGS=$kvm_cflags" >> $config_mak > + echo "#define CONFIG_KVM 1" >> $config_h > + fi > ;; > alpha) > echo "TARGET_ARCH=alpha" >> $config_mak > diff --git a/cpu-defs.h b/cpu-defs.h > index 5dcac74..46d4487 100644 > --- a/cpu-defs.h > +++ b/cpu-defs.h > @@ -142,6 +142,9 @@ typedef struct icount_decr_u16 { > } icount_decr_u16; > #endif > > +struct kvm_run; > +struct KVMState; > + > #define CPU_TEMP_BUF_NLONGS 128 > #define CPU_COMMON \ > struct TranslationBlock *current_tb; /* currently executing TB */ \ > @@ -199,6 +202,9 @@ typedef struct icount_decr_u16 { > /* user data */ \ > void *opaque; \ > \ > - const char *cpu_model_str; > + const char *cpu_model_str; \ > + struct KVMState *kvm_state; \ > + struct kvm_run *kvm_run; \ > + int kvm_fd; > > #endif > diff --git a/cpu-exec.c b/cpu-exec.c > index 6d4dcdd..04b3021 100644 > --- a/cpu-exec.c > +++ b/cpu-exec.c > @@ -22,6 +22,7 @@ > #include "exec.h" > #include "disas.h" > #include "tcg.h" > +#include "kvm.h" > > #if !defined(CONFIG_SOFTMMU) > #undef EAX > @@ -361,6 +362,19 @@ int cpu_exec(CPUState *env1) > } > #endif > > + if (kvm_enabled()) { > + int ret; > + ret = kvm_cpu_exec(env); > + if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) { > + env->interrupt_request &= ~CPU_INTERRUPT_EXIT; > + env->exception_index = EXCP_INTERRUPT; > + cpu_loop_exit(); > + } else if (env->halted) { > + cpu_loop_exit(); > + } else > + longjmp(env->jmp_env, 1); > + } > + > next_tb = 0; /* force lookup of first TB */ > for(;;) { > interrupt_request = env->interrupt_request; > diff --git a/exec.c b/exec.c > index f1fcec8..2623ac6 100644 > --- a/exec.c > +++ b/exec.c > @@ -39,6 +39,7 @@ > #include "tcg.h" > #include "hw/hw.h" > #include "osdep.h" > +#include "kvm.h" > #if defined(CONFIG_USER_ONLY) > #include <qemu.h> > #endif > @@ -2211,6 +2212,9 @@ void cpu_register_physical_memory(target_phys_addr_t > start_addr, > kqemu_set_phys_mem(start_addr, size, phys_offset); > } > #endif > + if (kvm_enabled()) > + kvm_set_phys_mem(start_addr, size, phys_offset); > + > size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK; > end_addr = start_addr + (target_phys_addr_t)size; > for(addr = start_addr; addr != end_addr; addr += TARGET_PAGE_SIZE) { > diff --git a/hw/acpi.c b/hw/acpi.c > index 45963d3..66a5faa 100644 > --- a/hw/acpi.c > +++ b/hw/acpi.c > @@ -23,6 +23,7 @@ > #include "sysemu.h" > #include "i2c.h" > #include "smbus.h" > +#include "kvm.h" > > //#define DEBUG > > @@ -501,6 +502,12 @@ i2c_bus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t > smb_io_base, > > register_ioport_write(ACPI_DBG_IO_ADDR, 4, 4, acpi_dbg_writel, s); > > + if (kvm_enabled()) { > + /* Mark SMM as already inited to prevent SMM from running. KVM does > not > + * support SMM mode. */ > + pci_conf[0x5B] = 0x02; > + } > + > /* XXX: which specification is used ? The i82731AB has different > mappings */ > pci_conf[0x5f] = (parallel_hds[0] != NULL ? 0x80 : 0) | 0x10; > diff --git a/kvm-all.c b/kvm-all.c > new file mode 100644 > index 0000000..4379071 > --- /dev/null > +++ b/kvm-all.c > @@ -0,0 +1,377 @@ > +/* > + * QEMU KVM support > + * > + * Copyright IBM, Corp. 2008 > + * > + * Authors: > + * Anthony Liguori <[EMAIL PROTECTED]> > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the COPYING file in the top-level directory. > + * > + */ > + > +#include <sys/types.h> > +#include <sys/ioctl.h> > +#include <sys/mman.h> > + > +#include <linux/kvm.h> > + > +#include "qemu-common.h" > +#include "sysemu.h" > +#include "kvm.h" > + > +//#define DEBUG_KVM > + > +#ifdef DEBUG_KVM > +#define dprintf(fmt, ...) \ > + do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) > +#else > +#define dprintf(fmt, ...) \ > + do { } while (0) > +#endif > + > +typedef struct kvm_userspace_memory_region KVMSlot;
Actually, I don't think it is a good idea. We may want to keep internal-only data tied to the slot, such the slot's dirty bitmap if we do per-slot dirty tracking. Of course there may be other ways to do it, but this is the cleaner and more adequate since we're going through a fresh start. > + > +int kvm_allowed = 0; > + > +struct KVMState > +{ > + KVMSlot slots[32]; > + int fd; > + int vmfd; > +}; > + > +static KVMState *kvm_state; > + > +static KVMSlot *kvm_alloc_slot(KVMState *s) > +{ > + int i; > + > + for (i = 0; i < ARRAY_SIZE(s->slots); i++) { > + if (s->slots[i].memory_size == 0) > + return &s->slots[i]; > + } > + > + return NULL; > +} > + > +static KVMSlot *kvm_lookup_slot(KVMState *s, target_phys_addr_t start_addr) > +{ > + int i; > + > + for (i = 0; i < ARRAY_SIZE(s->slots); i++) { > + KVMSlot *mem = &s->slots[i]; > + > + if (start_addr >= mem->guest_phys_addr && > + start_addr < (mem->guest_phys_addr + mem->memory_size)) > + return mem; > + } > + > + return NULL; > +} > + > +int kvm_init_vcpu(CPUState *env) > +{ > + KVMState *s = kvm_state; > + long mmap_size; > + int ret; > + > + dprintf("kvm_init_vcpu\n"); > + > + ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, > + (void *)(unsigned long)env->cpu_index); > + if (ret < 0) { > + dprintf("kvm_create_vcpu failed\n"); > + goto err; > + } > + > + env->kvm_fd = ret; > + env->kvm_state = s; > + > + mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); > + if (mmap_size < 0) { > + dprintf("KVM_GET_VCPU_MMAP_SIZE failed\n"); > + goto err; > + } > + > + env->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, > + env->kvm_fd, 0); > + if (env->kvm_run == MAP_FAILED) { > + ret = -errno; > + dprintf("mmap'ing vcpu state failed\n"); > + goto err; > + } > + > + ret = kvm_arch_init_vcpu(env); > + > +err: > + return ret; > +} > + > +int kvm_init(int smp_cpus) > +{ > + KVMState *s; > + int ret; > + int i; > + > + if (smp_cpus > 1) > + return -EINVAL; > + > + s = qemu_mallocz(sizeof(KVMState)); > + if (s == NULL) > + return -ENOMEM; > + > + for (i = 0; i < ARRAY_SIZE(s->slots); i++) > + s->slots[i].slot = i; > + > + s->vmfd = -1; > + s->fd = open("/dev/kvm", O_RDWR); > + if (s->fd == -1) { > + fprintf(stderr, "Could not access KVM kernel module: %m\n"); > + ret = -errno; > + goto err; > + } > + > + ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0); > + if (ret < KVM_API_VERSION) { > + if (ret > 0) > + ret = -EINVAL; > + fprintf(stderr, "kvm version too old\n"); > + goto err; > + } > + > + if (ret > KVM_API_VERSION) { > + ret = -EINVAL; > + fprintf(stderr, "kvm version not supported\n"); > + goto err; > + } > + > + s->vmfd = kvm_ioctl(s, KVM_CREATE_VM, 0); > + if (s->vmfd < 0) > + goto err; > + > + /* initially, KVM allocated its own memory and we had to jump through > + * hooks to make phys_ram_base point to this. Modern versions of KVM > + * just use a user allocated buffer so we can use phys_ram_base > + * unmodified. Make sure we have a sufficiently modern version of KVM. > + */ > + ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, (void *)KVM_CAP_USER_MEMORY); > + if (ret <= 0) { > + if (ret == 0) > + ret = -EINVAL; > + fprintf(stderr, "kvm does not support KVM_CAP_USER_MEMORY\n"); > + goto err; > + } > + > + ret = kvm_arch_init(s, smp_cpus); > + if (ret < 0) > + goto err; > + > + kvm_state = s; > + > + return 0; > + > +err: > + if (s) { > + if (s->vmfd != -1) > + close(s->vmfd); > + if (s->fd != -1) > + close(s->fd); > + } > + qemu_free(s); > + > + return ret; > +} > + > +static int kvm_handle_io(CPUState *env, uint16_t port, void *data, > + int direction, int size, uint32_t count) > +{ > + int i; > + uint8_t *ptr = data; > + > + for (i = 0; i < count; i++) { > + if (direction == KVM_EXIT_IO_IN) { > + switch (size) { > + case 1: > + stb_p(ptr, cpu_inb(env, port)); > + break; > + case 2: > + stw_p(ptr, cpu_inw(env, port)); > + break; > + case 4: > + stl_p(ptr, cpu_inl(env, port)); > + break; > + } > + } else { > + switch (size) { > + case 1: > + cpu_outb(env, port, ldub_p(ptr)); > + break; > + case 2: > + cpu_outw(env, port, lduw_p(ptr)); > + break; > + case 4: > + cpu_outl(env, port, ldl_p(ptr)); > + break; > + } > + } > + > + ptr += size; > + } > + > + return 1; > +} > + > +int kvm_cpu_exec(CPUState *env) > +{ > + struct kvm_run *run = env->kvm_run; > + int ret; > + > + dprintf("kvm_cpu_exec()\n"); > + > + do { > + kvm_arch_pre_run(env, run); > + > + if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) { > + dprintf("interrupt exit requested\n"); > + ret = 0; > + break; > + } > + > + dprintf("setting tpr\n"); > + run->cr8 = cpu_get_apic_tpr(env); > + > + ret = kvm_vcpu_ioctl(env, KVM_RUN, 0); > + kvm_arch_post_run(env, run); > + > + if (ret == -EINTR || ret == -EAGAIN) { > + dprintf("io window exit\n"); > + ret = 0; > + break; > + } > + > + if (ret < 0) { > + dprintf("kvm run failed %s\n", strerror(-ret)); > + abort(); > + } > + > + ret = 0; /* exit loop */ > + switch (run->exit_reason) { > + case KVM_EXIT_IO: > + dprintf("handle_io\n"); > + ret = kvm_handle_io(env, run->io.port, > + (uint8_t *)run + run->io.data_offset, > + run->io.direction, > + run->io.size, > + run->io.count); > + break; > + case KVM_EXIT_MMIO: > + dprintf("handle_mmio\n"); > + cpu_physical_memory_rw(run->mmio.phys_addr, > + run->mmio.data, > + run->mmio.len, > + run->mmio.is_write); > + ret = 1; > + break; > + case KVM_EXIT_IRQ_WINDOW_OPEN: > + dprintf("irq_window_open\n"); > + break; > + case KVM_EXIT_SHUTDOWN: > + dprintf("shutdown\n"); > + qemu_system_reset_request(); > + ret = 1; > + break; > + case KVM_EXIT_UNKNOWN: > + dprintf("kvm_exit_unknown\n"); > + break; > + case KVM_EXIT_FAIL_ENTRY: > + dprintf("kvm_exit_fail_entry\n"); > + break; > + case KVM_EXIT_EXCEPTION: > + dprintf("kvm_exit_exception\n"); > + break; > + case KVM_EXIT_DEBUG: > + dprintf("kvm_exit_debug\n"); > + break; > + default: > + dprintf("kvm_arch_handle_exit\n"); > + ret = kvm_arch_handle_exit(env, run); > + break; > + } > + } while (ret > 0); > + > + return ret; > +} > + > +void kvm_set_phys_mem(target_phys_addr_t start_addr, > + ram_addr_t size, > + ram_addr_t phys_offset) > +{ > + KVMState *s = kvm_state; > + ram_addr_t flags = phys_offset & ~TARGET_PAGE_MASK; > + KVMSlot *mem; > + > + /* KVM does not support read-only slots */ > + phys_offset &= ~IO_MEM_ROM; > + > + mem = kvm_lookup_slot(s, start_addr); > + if (mem) { > + if (flags == IO_MEM_UNASSIGNED) { > + mem->memory_size = 0; > + mem->guest_phys_addr = start_addr; > + mem->userspace_addr = 0; > + mem->flags = 0; > + > + kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, mem); > + } else if (start_addr >= mem->guest_phys_addr && > + (start_addr + size) <= (mem->guest_phys_addr + > mem->memory_size)) > + return; > + } > + > + /* KVM does not need to know about this memory */ > + if (flags >= IO_MEM_UNASSIGNED) > + return; > + > + mem = kvm_alloc_slot(s); > + mem->memory_size = size; > + mem->guest_phys_addr = start_addr; > + mem->userspace_addr = (unsigned long)(phys_ram_base + phys_offset); > + mem->flags = 0; > + > + kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, mem); > + /* FIXME deal with errors */ > +} > + > +int kvm_ioctl(KVMState *s, int type, void *data) > +{ > + int ret; > + > + ret = ioctl(s->fd, type, data); > + if (ret == -1) > + ret = -errno; > + > + return ret; > +} > + > +int kvm_vm_ioctl(KVMState *s, int type, void *data) > +{ > + int ret; > + > + ret = ioctl(s->vmfd, type, data); > + if (ret == -1) > + ret = -errno; > + > + return ret; > +} > + > +int kvm_vcpu_ioctl(CPUState *env, int type, void *data) > +{ > + int ret; > + > + ret = ioctl(env->kvm_fd, type, data); > + if (ret == -1) > + ret = -errno; > + > + return ret; > +} > diff --git a/kvm.h b/kvm.h > new file mode 100644 > index 0000000..37102b4 > --- /dev/null > +++ b/kvm.h > @@ -0,0 +1,68 @@ > +/* > + * QEMU KVM support > + * > + * Copyright IBM, Corp. 2008 > + * > + * Authors: > + * Anthony Liguori <[EMAIL PROTECTED]> > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the COPYING file in the top-level directory. > + * > + */ > + > +#ifndef QEMU_KVM_H > +#define QEMU_KVM_H > + > +#include "config.h" > + > +#ifdef CONFIG_KVM > +extern int kvm_allowed; > + > +#define kvm_enabled() (kvm_allowed) > +#else > +#define kvm_enabled() (0) > +#endif > + > +struct kvm_run; > + > +/* external API */ > + > +int kvm_init(int smp_cpus); > + > +int kvm_init_vcpu(CPUState *env); > + > +int kvm_cpu_exec(CPUState *env); > + > +void kvm_set_phys_mem(target_phys_addr_t start_addr, > + ram_addr_t size, > + ram_addr_t phys_offset); > + > +/* internal API */ > + > +struct KVMState; > +typedef struct KVMState KVMState; > + > +int kvm_ioctl(KVMState *s, int type, void *data); > + > +int kvm_vm_ioctl(KVMState *s, int type, void *data); > + > +int kvm_vcpu_ioctl(CPUState *env, int type, void *data); > + > +/* Arch specific hooks */ > + > +int kvm_arch_post_run(CPUState *env, struct kvm_run *run); > + > +int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run); > + > +int kvm_arch_pre_run(CPUState *env, struct kvm_run *run); > + > +int kvm_arch_get_registers(CPUState *env); > + > +int kvm_arch_put_registers(CPUState *env); > + > +int kvm_arch_init(KVMState *s, int smp_cpus); > + > +int kvm_arch_init_vcpu(CPUState *env); > + > +#endif > diff --git a/monitor.c b/monitor.c > index f0a0bc3..dc90a2b 100644 > --- a/monitor.c > +++ b/monitor.c > @@ -37,6 +37,7 @@ > #include <dirent.h> > #include "qemu-timer.h" > #include "migration.h" > +#include "kvm.h" > > //#define DEBUG > //#define DEBUG_COMPLETION > @@ -1263,6 +1264,19 @@ static void do_info_kqemu(void) > #endif > } > > +static void do_info_kvm(void) > +{ > +#ifdef CONFIG_KVM > + term_printf("kvm support: "); > + if (kvm_enabled()) > + term_printf("enabled\n"); > + else > + term_printf("disabled\n"); > +#else > + term_printf("kvm support: not compiled\n"); > +#endif > +} > + > #ifdef CONFIG_PROFILER > > int64_t kqemu_time; > @@ -1495,6 +1509,8 @@ static const term_cmd_t info_cmds[] = { > "", "show dynamic compiler info", }, > { "kqemu", "", do_info_kqemu, > "", "show kqemu information", }, > + { "kvm", "", do_info_kvm, > + "", "show kvm information", }, > { "usb", "", usb_info, > "", "show guest USB devices", }, > { "usbhost", "", usb_host_info, > diff --git a/target-i386/cpu.h b/target-i386/cpu.h > index 263a477..167bae2 100644 > --- a/target-i386/cpu.h > +++ b/target-i386/cpu.h > @@ -587,6 +587,8 @@ typedef struct CPUX86State { > target_ulong kernelgsbase; > #endif > > + uint64_t tsc; > + > uint64_t pat; > > /* exception/interrupt handling */ > @@ -617,6 +619,10 @@ typedef struct CPUX86State { > int kqemu_enabled; > int last_io_time; > #endif > + > + /* For KVM */ > + uint64_t interrupt_bitmap[256 / 64]; > + > /* in order to simplify APIC support, we leave this pointer to the > user */ > struct APICState *apic_state; > diff --git a/target-i386/helper.c b/target-i386/helper.c > index 905ae9b..e550f74 100644 > --- a/target-i386/helper.c > +++ b/target-i386/helper.c > @@ -29,6 +29,7 @@ > #include "exec-all.h" > #include "svm.h" > #include "qemu-common.h" > +#include "kvm.h" > > //#define DEBUG_MMU > > @@ -115,6 +116,8 @@ CPUX86State *cpu_x86_init(const char *cpu_model) > #ifdef USE_KQEMU > kqemu_init(env); > #endif > + if (kvm_enabled()) > + kvm_init_vcpu(env); > return env; > } > > @@ -1288,6 +1291,40 @@ target_phys_addr_t cpu_get_phys_page_debug(CPUState > *env, target_ulong addr) > } > #endif /* !CONFIG_USER_ONLY */ > > +#if defined(CONFIG_KVM) > +static void host_cpuid(uint32_t function, uint32_t *eax, uint32_t *ebx, > + uint32_t *ecx, uint32_t *edx) > +{ > + uint32_t vec[4]; > + > +#ifdef __x86_64__ > + asm volatile("cpuid" > + : "=a"(vec[0]), "=b"(vec[1]), > + "=c"(vec[2]), "=d"(vec[3]) > + : "0"(function) : "cc"); > +#else > + asm volatile("pusha \n\t" > + "cpuid \n\t" > + "mov %%eax, 0(%1) \n\t" > + "mov %%ebx, 4(%1) \n\t" > + "mov %%ecx, 8(%1) \n\t" > + "mov %%edx, 12(%1) \n\t" > + "popa" > + : : "a"(function), "S"(vec) > + : "memory", "cc"); > +#endif > + > + if (eax) > + *eax = vec[0]; > + if (ebx) > + *ebx = vec[1]; > + if (ecx) > + *ecx = vec[2]; > + if (edx) > + *edx = vec[3]; > +} > +#endif > + > void cpu_x86_cpuid(CPUX86State *env, uint32_t index, > uint32_t *eax, uint32_t *ebx, > uint32_t *ecx, uint32_t *edx) > @@ -1307,12 +1344,23 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, > *ebx = env->cpuid_vendor1; > *edx = env->cpuid_vendor2; > *ecx = env->cpuid_vendor3; > + > + /* sysenter isn't supported on compatibility mode on AMD. and > syscall > + * isn't supported in compatibility mode on Intel. so advertise the > + * actuall cpu, and say goodbye to migration between different > vendors > + * is you use compatibility mode. */ > + if (kvm_enabled()) > + host_cpuid(0, NULL, ebx, ecx, edx); > break; > case 1: > *eax = env->cpuid_version; > *ebx = (env->cpuid_apic_id << 24) | 8 << 8; /* CLFLUSH size in quad > words, Linux wants it. */ > *ecx = env->cpuid_ext_features; > *edx = env->cpuid_features; > + > + /* "Hypervisor present" bit required for Microsoft SVVP */ > + if (kvm_enabled()) > + *ecx |= (1 << 31); > break; > case 2: > /* cache info: needed for Pentium Pro compatibility */ > @@ -1390,6 +1438,31 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, > *ebx = 0; > *ecx = env->cpuid_ext3_features; > *edx = env->cpuid_ext2_features; > + > + if (kvm_enabled()) { > + uint32_t h_eax, h_edx; > + > + host_cpuid(0x80000001, &h_eax, NULL, NULL, &h_edx); > + > + /* disable CPU features that the host does not support */ > + > + /* long mode */ > + if ((h_edx & 0x20000000) == 0 /* || !lm_capable_kernel */) > + *edx &= ~0x20000000; > + /* syscall */ > + if ((h_edx & 0x00000800) == 0) > + *edx &= ~0x00000800; > + /* nx */ > + if ((h_edx & 0x00100000) == 0) > + *edx &= ~0x00100000; > + > + /* disable CPU features that KVM cannot support */ > + > + /* svm */ > + *ecx &= ~4UL; > + /* 3dnow */ > + *edx = ~0xc0000000; > + } > break; > case 0x80000002: > case 0x80000003: > diff --git a/target-i386/kvm.c b/target-i386/kvm.c > new file mode 100644 > index 0000000..ff372af > --- /dev/null > +++ b/target-i386/kvm.c > @@ -0,0 +1,635 @@ > +/* > + * QEMU KVM support > + * > + * Copyright (C) 2006-2008 Qumranet Technologies > + * Copyright IBM, Corp. 2008 > + * > + * Authors: > + * Anthony Liguori <[EMAIL PROTECTED]> > + * > + * This work is licensed under the terms of the GNU GPL, version 2. See > + * the COPYING file in the top-level directory. > + * > + */ > + > +#include <sys/types.h> > +#include <sys/ioctl.h> > +#include <sys/mman.h> > + > +#include <linux/kvm.h> > + > +#include "qemu-common.h" > +#include "sysemu.h" > +#include "kvm.h" > +#include "cpu.h" > + > +//#define DEBUG_KVM > + > +#ifdef DEBUG_KVM > +#define dprintf(fmt, ...) \ > + do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) > +#else > +#define dprintf(fmt, ...) \ > + do { } while (0) > +#endif > + > +int kvm_arch_init_vcpu(CPUState *env) > +{ > + struct { > + struct kvm_cpuid cpuid; > + struct kvm_cpuid_entry entries[100]; > + } __attribute__((packed)) cpuid_data; > + int limit, i, cpuid_i; > + uint32_t eax, ebx, ecx, edx; > + > + cpuid_i = 0; > + > + cpu_x86_cpuid(env, 0, &eax, &ebx, &ecx, &edx); > + limit = eax; > + > + for (i = 0; i < limit; i++) { > + struct kvm_cpuid_entry *c = &cpuid_data.entries[cpuid_i++]; > + > + cpu_x86_cpuid(env, i, &eax, &ebx, &ecx, &edx); > + c->function = i; > + c->eax = eax; > + c->ebx = ebx; > + c->ecx = ecx; > + c->edx = edx; > + } > + > + cpu_x86_cpuid(env, 0x80000000, &eax, &ebx, &ecx, &edx); > + limit = eax; > + > + for (i = 0x80000000; i < limit; i++) { > + struct kvm_cpuid_entry *c = &cpuid_data.entries[cpuid_i++]; > + > + cpu_x86_cpuid(env, i, &eax, &ebx, &ecx, &edx); > + c->function = i; > + c->eax = eax; > + c->ebx = ebx; > + c->ecx = ecx; > + c->edx = edx; > + } > + > + cpuid_data.cpuid.nent = cpuid_i; > + > + return kvm_vcpu_ioctl(env, KVM_SET_CPUID, &cpuid_data); > +} > + > +static int kvm_has_msr_star(CPUState *env) > +{ > + static int has_msr_star; > + int ret; > + > + /* first time */ > + if (has_msr_star == 0) { > + struct kvm_msr_list msr_list, *kvm_msr_list; > + > + has_msr_star = -1; > + > + /* Obtain MSR list from KVM. These are the MSRs that we must > + * save/restore */ > + ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list); > + if (ret < 0) > + return 0; > + > + msr_list.nmsrs = 0; > + kvm_msr_list = qemu_mallocz(sizeof(msr_list) + > + msr_list.nmsrs * > sizeof(msr_list.indices[0])); > + if (kvm_msr_list == NULL) > + return 0; > + > + ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, > kvm_msr_list); > + if (ret >= 0) { > + int i; > + > + for (i = 0; i < kvm_msr_list->nmsrs; i++) { > + if (kvm_msr_list->indices[i] == MSR_STAR) { > + has_msr_star = 1; > + break; > + } > + } > + } > + > + free(kvm_msr_list); > + } > + > + if (has_msr_star == 1) > + return 1; > + return 0; > +} > + > +int kvm_arch_init(KVMState *s, int smp_cpus) > +{ > + int ret; > + > + /* create vm86 tss. KVM uses vm86 mode to emulate 16-bit code > + * directly. In order to use vm86 mode, a TSS is needed. Since this > + * must be part of guest physical memory, we need to allocate it. Older > + * versions of KVM just assumed that it would be at the end of physical > + * memory but that doesn't work with more than 4GB of memory. We simply > + * refuse to work with those older versions of KVM. */ > + ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, (void *)KVM_CAP_SET_TSS_ADDR); > + if (ret <= 0) { > + fprintf(stderr, "kvm does not support KVM_CAP_SET_TSS_ADDR\n"); > + return ret; > + } > + > + /* this address is 3 pages before the bios, and the bios should present > + * as unavaible memory. FIXME, need to ensure the e820 map deals with > + * this? > + */ > + return kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, (void *)0xfffbd000); > +} > + > +static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs) > +{ > + lhs->selector = rhs->selector; > + lhs->base = rhs->base; > + lhs->limit = rhs->limit; > + lhs->type = 3; > + lhs->present = 1; > + lhs->dpl = 3; > + lhs->db = 0; > + lhs->s = 1; > + lhs->l = 0; > + lhs->g = 0; > + lhs->avl = 0; > + lhs->unusable = 0; > +} > + > +static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs) > +{ > + unsigned flags = rhs->flags; > + lhs->selector = rhs->selector; > + lhs->base = rhs->base; > + lhs->limit = rhs->limit; > + lhs->type = (flags >> DESC_TYPE_SHIFT) & 15; > + lhs->present = (flags & DESC_P_MASK) != 0; > + lhs->dpl = rhs->selector & 3; > + lhs->db = (flags >> DESC_B_SHIFT) & 1; > + lhs->s = (flags & DESC_S_MASK) != 0; > + lhs->l = (flags >> DESC_L_SHIFT) & 1; > + lhs->g = (flags & DESC_G_MASK) != 0; > + lhs->avl = (flags & DESC_AVL_MASK) != 0; > + lhs->unusable = 0; > +} > + > +static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs) > +{ > + lhs->selector = rhs->selector; > + lhs->base = rhs->base; > + lhs->limit = rhs->limit; > + lhs->flags = > + (rhs->type << DESC_TYPE_SHIFT) > + | (rhs->present * DESC_P_MASK) > + | (rhs->dpl << DESC_DPL_SHIFT) > + | (rhs->db << DESC_B_SHIFT) > + | (rhs->s * DESC_S_MASK) > + | (rhs->l << DESC_L_SHIFT) > + | (rhs->g * DESC_G_MASK) > + | (rhs->avl * DESC_AVL_MASK); > +} > + > +static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set) > +{ > + if (set) > + *kvm_reg = *qemu_reg; > + else > + *qemu_reg = *kvm_reg; > +} > + > +static int kvm_getput_regs(CPUState *env, int set) > +{ > + struct kvm_regs regs; > + int ret = 0; > + > + if (!set) { > + ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, ®s); > + if (ret < 0) > + return ret; > + } > + > + kvm_getput_reg(®s.rax, &env->regs[R_EAX], set); > + kvm_getput_reg(®s.rbx, &env->regs[R_EBX], set); > + kvm_getput_reg(®s.rcx, &env->regs[R_ECX], set); > + kvm_getput_reg(®s.rdx, &env->regs[R_EDX], set); > + kvm_getput_reg(®s.rsi, &env->regs[R_ESI], set); > + kvm_getput_reg(®s.rdi, &env->regs[R_EDI], set); > + kvm_getput_reg(®s.rsp, &env->regs[R_ESP], set); > + kvm_getput_reg(®s.rbp, &env->regs[R_EBP], set); > +#ifdef TARGET_X86_64 > + kvm_getput_reg(®s.r8, &env->regs[8], set); > + kvm_getput_reg(®s.r9, &env->regs[9], set); > + kvm_getput_reg(®s.r10, &env->regs[10], set); > + kvm_getput_reg(®s.r11, &env->regs[11], set); > + kvm_getput_reg(®s.r12, &env->regs[12], set); > + kvm_getput_reg(®s.r13, &env->regs[13], set); > + kvm_getput_reg(®s.r14, &env->regs[14], set); > + kvm_getput_reg(®s.r15, &env->regs[15], set); > +#endif > + > + kvm_getput_reg(®s.rflags, &env->eflags, set); > + kvm_getput_reg(®s.rip, &env->eip, set); > + > + if (set) > + ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, ®s); > + > + return ret; > +} > + > +static int kvm_put_fpu(CPUState *env) > +{ > + struct kvm_fpu fpu; > + int i; > + > + memset(&fpu, 0, sizeof fpu); > + fpu.fsw = env->fpus & ~(7 << 11); > + fpu.fsw |= (env->fpstt & 7) << 11; > + fpu.fcw = env->fpuc; > + for (i = 0; i < 8; ++i) > + fpu.ftwx |= (!env->fptags[i]) << i; > + memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs); > + memcpy(fpu.xmm, env->xmm_regs, sizeof env->xmm_regs); > + fpu.mxcsr = env->mxcsr; > + > + return kvm_vcpu_ioctl(env, KVM_SET_FPU, &fpu); > +} > + > +static int kvm_put_sregs(CPUState *env) > +{ > + struct kvm_sregs sregs; > + > + memcpy(sregs.interrupt_bitmap, > + env->interrupt_bitmap, > + sizeof(sregs.interrupt_bitmap)); > + > + if ((env->eflags & VM_MASK)) { > + set_v8086_seg(&sregs.cs, &env->segs[R_CS]); > + set_v8086_seg(&sregs.ds, &env->segs[R_DS]); > + set_v8086_seg(&sregs.es, &env->segs[R_ES]); > + set_v8086_seg(&sregs.fs, &env->segs[R_FS]); > + set_v8086_seg(&sregs.gs, &env->segs[R_GS]); > + set_v8086_seg(&sregs.ss, &env->segs[R_SS]); > + } else { > + set_seg(&sregs.cs, &env->segs[R_CS]); > + set_seg(&sregs.ds, &env->segs[R_DS]); > + set_seg(&sregs.es, &env->segs[R_ES]); > + set_seg(&sregs.fs, &env->segs[R_FS]); > + set_seg(&sregs.gs, &env->segs[R_GS]); > + set_seg(&sregs.ss, &env->segs[R_SS]); > + > + if (env->cr[0] & CR0_PE_MASK) { > + /* force ss cpl to cs cpl */ > + sregs.ss.selector = (sregs.ss.selector & ~3) | > + (sregs.cs.selector & 3); > + sregs.ss.dpl = sregs.ss.selector & 3; > + } > + } > + > + set_seg(&sregs.tr, &env->tr); > + set_seg(&sregs.ldt, &env->ldt); > + > + sregs.idt.limit = env->idt.limit; > + sregs.idt.base = env->idt.base; > + sregs.gdt.limit = env->gdt.limit; > + sregs.gdt.base = env->gdt.base; > + > + sregs.cr0 = env->cr[0]; > + sregs.cr2 = env->cr[2]; > + sregs.cr3 = env->cr[3]; > + sregs.cr4 = env->cr[4]; > + > + sregs.cr8 = cpu_get_apic_tpr(env); > + sregs.apic_base = cpu_get_apic_base(env); > + > + sregs.efer = env->efer; > + > + return kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs); > +} > + > +static void kvm_msr_entry_set(struct kvm_msr_entry *entry, > + uint32_t index, uint64_t value) > +{ > + entry->index = index; > + entry->data = value; > +} > + > +static int kvm_put_msrs(CPUState *env) > +{ > + struct { > + struct kvm_msrs info; > + struct kvm_msr_entry entries[100]; > + } msr_data; > + struct kvm_msr_entry *msrs = msr_data.entries; > + int n = 0; > + > + kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs); > + kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp); > + kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip); > + if (kvm_has_msr_star(env)) > + kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star); > + kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); > +#ifdef TARGET_X86_64 > + /* FIXME if lm capable */ > + kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); > + kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase); > + kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask); > + kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar); > +#endif > + msr_data.info.nmsrs = n; > + > + return kvm_vcpu_ioctl(env, KVM_SET_MSRS, &msr_data); > + > +} > + > + > +static int kvm_get_fpu(CPUState *env) > +{ > + struct kvm_fpu fpu; > + int i, ret; > + > + ret = kvm_vcpu_ioctl(env, KVM_GET_FPU, &fpu); > + if (ret < 0) > + return ret; > + > + env->fpstt = (fpu.fsw >> 11) & 7; > + env->fpus = fpu.fsw; > + env->fpuc = fpu.fcw; > + for (i = 0; i < 8; ++i) > + env->fptags[i] = !((fpu.ftwx >> i) & 1); > + memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs); > + memcpy(env->xmm_regs, fpu.xmm, sizeof env->xmm_regs); > + env->mxcsr = fpu.mxcsr; > + > + return 0; > +} > + > +static int kvm_get_sregs(CPUState *env) > +{ > + struct kvm_sregs sregs; > + uint32_t hflags; > + int ret; > + > + ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs); > + if (ret < 0) > + return ret; > + > + memcpy(env->interrupt_bitmap, > + sregs.interrupt_bitmap, > + sizeof(sregs.interrupt_bitmap)); > + > + get_seg(&env->segs[R_CS], &sregs.cs); > + get_seg(&env->segs[R_DS], &sregs.ds); > + get_seg(&env->segs[R_ES], &sregs.es); > + get_seg(&env->segs[R_FS], &sregs.fs); > + get_seg(&env->segs[R_GS], &sregs.gs); > + get_seg(&env->segs[R_SS], &sregs.ss); > + > + get_seg(&env->tr, &sregs.tr); > + get_seg(&env->ldt, &sregs.ldt); > + > + env->idt.limit = sregs.idt.limit; > + env->idt.base = sregs.idt.base; > + env->gdt.limit = sregs.gdt.limit; > + env->gdt.base = sregs.gdt.base; > + > + env->cr[0] = sregs.cr0; > + env->cr[2] = sregs.cr2; > + env->cr[3] = sregs.cr3; > + env->cr[4] = sregs.cr4; > + > + cpu_set_apic_base(env, sregs.apic_base); > + > + env->efer = sregs.efer; > + //cpu_set_apic_tpr(env, sregs.cr8); > + > +#define HFLAG_COPY_MASK ~( \ > + HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \ > + HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \ > + HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \ > + HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK) > + > + > + > + hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; > + hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT); > + hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) & > + (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK); > + hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK)); > + hflags |= (env->cr[4] & CR4_OSFXSR_MASK) << > + (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT); > + > + if (env->efer & MSR_EFER_LMA) { > + hflags |= HF_LMA_MASK; > + } > + > + if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) { > + hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK; > + } else { > + hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >> > + (DESC_B_SHIFT - HF_CS32_SHIFT); > + hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >> > + (DESC_B_SHIFT - HF_SS32_SHIFT); > + if (!(env->cr[0] & CR0_PE_MASK) || > + (env->eflags & VM_MASK) || > + !(hflags & HF_CS32_MASK)) { > + hflags |= HF_ADDSEG_MASK; > + } else { > + hflags |= ((env->segs[R_DS].base | > + env->segs[R_ES].base | > + env->segs[R_SS].base) != 0) << > + HF_ADDSEG_SHIFT; > + } > + } > + env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags; > + env->cc_src = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); > + env->df = 1 - (2 * ((env->eflags >> 10) & 1)); > + env->cc_op = CC_OP_EFLAGS; > + env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); > + > + return 0; > +} > + > +static int kvm_get_msrs(CPUState *env) > +{ > + struct { > + struct kvm_msrs info; > + struct kvm_msr_entry entries[100]; > + } msr_data; > + struct kvm_msr_entry *msrs = msr_data.entries; > + int ret, i, n; > + > + n = 0; > + msrs[n++].index = MSR_IA32_SYSENTER_CS; > + msrs[n++].index = MSR_IA32_SYSENTER_ESP; > + msrs[n++].index = MSR_IA32_SYSENTER_EIP; > + if (kvm_has_msr_star(env)) > + msrs[n++].index = MSR_STAR; > + msrs[n++].index = MSR_IA32_TSC; > +#ifdef TARGET_X86_64 > + /* FIXME lm_capable_kernel */ > + msrs[n++].index = MSR_CSTAR; > + msrs[n++].index = MSR_KERNELGSBASE; > + msrs[n++].index = MSR_FMASK; > + msrs[n++].index = MSR_LSTAR; > +#endif > + msr_data.info.nmsrs = n; > + ret = kvm_vcpu_ioctl(env, KVM_GET_MSRS, &msr_data); > + if (ret < 0) > + return ret; > + > + for (i = 0; i < ret; i++) { > + switch (msrs[i].index) { > + case MSR_IA32_SYSENTER_CS: > + env->sysenter_cs = msrs[i].data; > + break; > + case MSR_IA32_SYSENTER_ESP: > + env->sysenter_esp = msrs[i].data; > + break; > + case MSR_IA32_SYSENTER_EIP: > + env->sysenter_eip = msrs[i].data; > + break; > + case MSR_STAR: > + env->star = msrs[i].data; > + break; > +#ifdef TARGET_X86_64 > + case MSR_CSTAR: > + env->cstar = msrs[i].data; > + break; > + case MSR_KERNELGSBASE: > + env->kernelgsbase = msrs[i].data; > + break; > + case MSR_FMASK: > + env->fmask = msrs[i].data; > + break; > + case MSR_LSTAR: > + env->lstar = msrs[i].data; > + break; > +#endif > + case MSR_IA32_TSC: > + env->tsc = msrs[i].data; > + break; > + } > + } > + > + return 0; > +} > + > +int kvm_arch_put_registers(CPUState *env) > +{ > + int ret; > + > + ret = kvm_getput_regs(env, 1); > + if (ret < 0) > + return ret; > + > + ret = kvm_put_fpu(env); > + if (ret < 0) > + return ret; > + > + ret = kvm_put_sregs(env); > + if (ret < 0) > + return ret; > + > + ret = kvm_put_msrs(env); > + if (ret < 0) > + return ret; > + > + return 0; > +} > + > +int kvm_arch_get_registers(CPUState *env) > +{ > + int ret; > + > + ret = kvm_getput_regs(env, 0); > + if (ret < 0) > + return ret; > + > + ret = kvm_get_fpu(env); > + if (ret < 0) > + return ret; > + > + ret = kvm_get_sregs(env); > + if (ret < 0) > + return ret; > + > + ret = kvm_get_msrs(env); > + if (ret < 0) > + return ret; > + > + return 0; > +} > + > +int kvm_arch_pre_run(CPUState *env, struct kvm_run *run) > +{ > + /* Try to inject an interrupt if the guest can accept it */ > + if (run->ready_for_interrupt_injection && > + (env->interrupt_request & CPU_INTERRUPT_HARD) && > + (env->eflags & IF_MASK)) { > + int irq; > + > + env->interrupt_request &= ~CPU_INTERRUPT_HARD; > + irq = cpu_get_pic_interrupt(env); > + if (irq >= 0) { > + struct kvm_interrupt intr; > + intr.irq = irq; > + /* FIXME: errors */ > + dprintf("injected interrupt %d\n", irq); > + kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr); > + } > + } > + > + /* If we have an interrupt but the guest is not ready to receive an > + * interrupt, request an interrupt window exit. This will > + * cause a return to userspace as soon as the guest is ready to > + * receive interrupts. */ > + if ((env->interrupt_request & CPU_INTERRUPT_HARD)) > + run->request_interrupt_window = 1; > + else > + run->request_interrupt_window = 0; > + > + return 0; > +} > + > +int kvm_arch_post_run(CPUState *env, struct kvm_run *run) > +{ > + if (run->if_flag) > + env->eflags |= IF_MASK; > + else > + env->eflags &= ~IF_MASK; > + > + cpu_set_apic_tpr(env, run->cr8); > + cpu_set_apic_base(env, run->apic_base); > + > + return 0; > +} > + > +static int kvm_handle_halt(CPUState *env) > +{ > + if (!((env->interrupt_request & CPU_INTERRUPT_HARD) && > + (env->eflags & IF_MASK)) && > + !(env->interrupt_request & CPU_INTERRUPT_NMI)) { > + env->halted = 1; > + env->exception_index = EXCP_HLT; > + return 0; > + } > + > + return 1; > +} > + > +int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run) > +{ > + int ret = 0; > + > + switch (run->exit_reason) { > + case KVM_EXIT_HLT: > + dprintf("handle_hlt\n"); > + ret = kvm_handle_halt(env); > + break; > + } > + > + return ret; > +} > diff --git a/vl.c b/vl.c > index 74ae652..ecda8d5 100644 > --- a/vl.c > +++ b/vl.c > @@ -39,6 +39,7 @@ > #include "block.h" > #include "audio/audio.h" > #include "migration.h" > +#include "kvm.h" > > #include <unistd.h> > #include <fcntl.h> > @@ -8258,6 +8259,9 @@ static void help(int exitcode) > "-kernel-kqemu enable KQEMU full virtualization (default is > user mode only)\n" > "-no-kqemu disable KQEMU kernel module usage\n" > #endif > +#ifdef CONFIG_KVM > + "-enable-kvm enable KVM full virtualization support\n" > +#endif > #ifdef TARGET_I386 > "-no-acpi disable ACPI\n" > #endif > @@ -8363,6 +8367,7 @@ enum { > QEMU_OPTION_pidfile, > QEMU_OPTION_no_kqemu, > QEMU_OPTION_kernel_kqemu, > + QEMU_OPTION_enable_kvm, > QEMU_OPTION_win2k_hack, > QEMU_OPTION_usb, > QEMU_OPTION_usbdevice, > @@ -8449,6 +8454,9 @@ static const QEMUOption qemu_options[] = { > { "no-kqemu", 0, QEMU_OPTION_no_kqemu }, > { "kernel-kqemu", 0, QEMU_OPTION_kernel_kqemu }, > #endif > +#ifdef CONFIG_KVM > + { "enable-kvm", 0, QEMU_OPTION_enable_kvm }, > +#endif > #if defined(TARGET_PPC) || defined(TARGET_SPARC) > { "g", 1, QEMU_OPTION_g }, > #endif > @@ -9271,6 +9279,14 @@ int main(int argc, char **argv) > kqemu_allowed = 2; > break; > #endif > +#ifdef CONFIG_KVM > + case QEMU_OPTION_enable_kvm: > + kvm_allowed = 1; > +#ifdef USE_KQEMU > + kqemu_allowed = 0; > +#endif > + break; > +#endif > case QEMU_OPTION_usb: > usb_enabled = 1; > break; > @@ -9405,6 +9421,14 @@ int main(int argc, char **argv) > } > } > > +#if defined(CONFIG_KVM) && defined(USE_KQEMU) > + if (kvm_allowed && kqemu_allowed) { > + fprintf(stderr, > + "You can not enable both KVM and kqemu at the same time\n"); > + exit(1); > + } > +#endif > + > if (smp_cpus > machine->max_cpus) { > fprintf(stderr, "Number of SMP cpus requested (%d), exceeds max cpus > " > "supported by machine `%s' (%d)\n", smp_cpus, machine->name, > @@ -9710,6 +9734,16 @@ int main(int argc, char **argv) > } > } > > + if (kvm_enabled()) { > + int ret; > + > + ret = kvm_init(smp_cpus); > + if (ret < 0) { > + fprintf(stderr, "failed to initialize KVM\n"); > + exit(1); > + } > + } > + > machine->init(ram_size, vga_ram_size, boot_devices, ds, > kernel_filename, kernel_cmdline, initrd_filename, > cpu_model); > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html