[COMMIT master] Do not compile qemu-kvm.c and qemu-kvm-x86.c
From: Glauber Costa glom...@redhat.com Instead, include them from upstream files Signed-off-by: Glauber Costa glom...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/Makefile.target b/Makefile.target index e0edd27..df1f32b 100644 --- a/Makefile.target +++ b/Makefile.target @@ -160,7 +160,6 @@ ifeq ($(ARCH),sparc64) CPPFLAGS+=-I$(SRC_PATH)/tcg/sparc endif -libobj-$(CONFIG_KVM) += qemu-kvm.o ifdef CONFIG_SOFTFLOAT libobj-y += fpu/softfloat.o else @@ -171,13 +170,13 @@ libobj-y += op_helper.o helper.o ifeq ($(TARGET_ARCH), i386) libobj-y += helper.o -libobj-$(CONFIG_KVM) += qemu-kvm-x86.o kvm-tpr-opt.o +libobj-$(CONFIG_KVM) += kvm-tpr-opt.o libobj-$(CONFIG_KVM) += qemu-kvm-helper.o endif ifeq ($(TARGET_ARCH), x86_64) libobj-y += helper.o -libobj-$(CONFIG_KVM) += qemu-kvm-x86.o kvm-tpr-opt.o +libobj-$(CONFIG_KVM) += kvm-tpr-opt.o libobj-$(CONFIG_KVM) += qemu-kvm-helper.o endif diff --git a/kvm-all.c b/kvm-all.c index 4c2fdf5..e42b1f6 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1029,3 +1029,5 @@ void kvm_remove_all_breakpoints(CPUState *current_env) } #endif /* !KVM_CAP_SET_GUEST_DEBUG */ #endif + +#include qemu-kvm.c diff --git a/target-i386/kvm.c b/target-i386/kvm.c index b7eb096..cfa5b80 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -964,3 +964,5 @@ void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg) } #endif /* KVM_CAP_SET_GUEST_DEBUG */ #endif + +#include qemu-kvm-x86.c -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] replace USE_KVM with CONFIG_KVM
From: Glauber Costa glom...@redhat.com Make things less confuse, and we have KVM_UPSTREAM to differentiate between the two versions anyway. kvm-all.c and kvm.c gets compiled now, but protected with KVM_UPSTREAM too, so no function in there gets visible in the final binary Signed-off-by: Glauber Costa glom...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/Makefile.target b/Makefile.target index e3189a1..e0edd27 100644 --- a/Makefile.target +++ b/Makefile.target @@ -160,9 +160,7 @@ ifeq ($(ARCH),sparc64) CPPFLAGS+=-I$(SRC_PATH)/tcg/sparc endif -ifeq ($(USE_KVM), 1) -libobj-y += qemu-kvm.o -endif +libobj-$(CONFIG_KVM) += qemu-kvm.o ifdef CONFIG_SOFTFLOAT libobj-y += fpu/softfloat.o else @@ -173,18 +171,14 @@ libobj-y += op_helper.o helper.o ifeq ($(TARGET_ARCH), i386) libobj-y += helper.o -ifeq ($(USE_KVM), 1) -libobj-y += qemu-kvm-x86.o kvm-tpr-opt.o -libobj-y += qemu-kvm-helper.o -endif +libobj-$(CONFIG_KVM) += qemu-kvm-x86.o kvm-tpr-opt.o +libobj-$(CONFIG_KVM) += qemu-kvm-helper.o endif ifeq ($(TARGET_ARCH), x86_64) libobj-y += helper.o -ifeq ($(USE_KVM), 1) -libobj-y += qemu-kvm-x86.o kvm-tpr-opt.o -libobj-y += qemu-kvm-helper.o -endif +libobj-$(CONFIG_KVM) += qemu-kvm-x86.o kvm-tpr-opt.o +libobj-$(CONFIG_KVM) += qemu-kvm-helper.o endif libobj-y += op_helper.o @@ -203,9 +197,7 @@ endif ifeq ($(TARGET_BASE_ARCH), ia64) libobj-y += op_helper.o firmware.o -ifeq ($(USE_KVM), 1) -libobj-y += qemu-kvm-ia64.o -endif +libobj-$(CONFIG_KVM) += qemu-kvm-ia64.o endif ifeq ($(TARGET_BASE_ARCH), cris) diff --git a/configure b/configure index f8b80f2..9b744c5 100755 --- a/configure +++ b/configure @@ -2121,8 +2121,8 @@ disable_cpu_emulation() { configure_kvm() { if test $kvm = yes -a $target_softmmu = yes -a \ \( $cpu = i386 -o $cpu = x86_64 -o $cpu = ia64 -o $cpu = powerpc \); then -echo #define USE_KVM 1 $config_h -echo USE_KVM=1 $config_mak +echo #define CONFIG_KVM 1 $config_h +echo CONFIG_KVM=y $config_mak echo KVM_CFLAGS=$kvm_cflags $config_mak if test $kvm_cap_pit = yes ; then echo USE_KVM_PIT=1 $config_mak @@ -2159,9 +2159,9 @@ case $target_arch2 in echo #define CONFIG_KQEMU 1 $config_h fi if test $target_kvm = yes ; then - echo USE_KVM=yes $config_mak + echo CONFIG_KVM=y $config_mak echo KVM_CFLAGS=$kvm_cflags $config_mak - echo #define USE_KVM 1 $config_h + echo #define CONFIG_KVM 1 $config_h fi if test $xen = yes -a $target_softmmu = yes; then @@ -2183,9 +2183,9 @@ case $target_arch2 in fi if [ use_upstream_kvm = yes ]; then if test $target_kvm = yes ; then - echo USE_KVM=yes $config_mak + echo CONFIG_KVM=y $config_mak echo KVM_CFLAGS=$kvm_cflags $config_mak - echo #define USE_KVM 1 $config_h + echo #define CONFIG_KVM 1 $config_h fi fi if test $xen = yes -a $target_softmmu = yes @@ -2281,7 +2281,7 @@ case $target_arch2 in if test $target_kvm = yes ; then echo CONFIG_KVM=y $config_mak echo KVM_CFLAGS=$kvm_cflags $config_mak - echo #define USE_KVM 1 $config_h + echo #define CONFIG_KVM 1 $config_h fi fi gdb_xml_files=power-core.xml power-fpu.xml power-altivec.xml power-spe.xml diff --git a/hw/acpi.c b/hw/acpi.c index 074e09f..7de9cb7 100644 --- a/hw/acpi.c +++ b/hw/acpi.c @@ -775,7 +775,7 @@ static void disable_processor(struct gpe_regs *g, int cpu) } #if defined(TARGET_I386) || defined(TARGET_X86_64) -#ifdef USE_KVM +#ifdef CONFIG_KVM static CPUState *qemu_kvm_cpu_env(int index) { CPUState *penv; @@ -798,7 +798,7 @@ void qemu_system_cpu_hot_add(int cpu, int state) CPUState *env; if (state -#ifdef USE_KVM +#ifdef CONFIG_KVM (!qemu_kvm_cpu_env(cpu)) #endif ) { diff --git a/hw/msix.c b/hw/msix.c index 5f77dc9..b5dfa0b 100644 --- a/hw/msix.c +++ b/hw/msix.c @@ -63,7 +63,7 @@ /* Flag for interrupt controller to declare MSI-X support */ int msix_supported; -#ifdef USE_KVM +#ifdef CONFIG_KVM /* KVM specific MSIX helpers */ static void kvm_msix_free(PCIDevice *dev) { diff --git a/hw/virtio-net.c b/hw/virtio-net.c index 6b82232..bda2397 100644 --- a/hw/virtio-net.c +++ b/hw/virtio-net.c @@ -15,7 +15,7 @@ #include net.h #include qemu-timer.h #include virtio-net.h -#ifdef USE_KVM +#ifdef CONFIG_KVM #include qemu-kvm.h #endif @@ -344,7 +344,7 @@ static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) qemu_flush_queued_packets(n-vc); -#ifdef USE_KVM +#ifdef CONFIG_KVM /* We now have RX buffers, signal to the IO thread to break out of the select to re-poll the tap file descriptor */ if (kvm_enabled()) diff --git a/kvm-all.c b/kvm-all.c index 8567ac9..4c2fdf5 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -26,6 +26,7 @@ #include gdbstub.h #include kvm.h +#ifdef KVM_UPSTREAM /* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */ #define
[COMMIT master] duplicate KVMState
From: Glauber Costa glom...@redhat.com In this patch, we duplicate most of KVMState in our files. This should be removed later, when they are 100 % equal. Meanwhile, we fold our kvm_context_t structure inside it. To make transition smooth, we still keep a global variable kvm_context pointing to its position inside the global KVMState. This way we don't need to hurry about changing all callers. kvm_init() and kvm_finalize are changed, though, since they have now to deal with the creation/destruction of a global KVMState Signed-off-by: Glauber Costa glom...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm.c b/qemu-kvm.c index 43e7b4c..45f5abe 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -42,6 +42,9 @@ int kvm_irqchip = 1; int kvm_pit = 1; int kvm_pit_reinject = 1; int kvm_nested = 0; + + +static KVMState *kvm_state; kvm_context_t kvm_context; pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER; @@ -416,16 +419,16 @@ int kvm_dirty_pages_log_reset(kvm_context_t kvm) } -kvm_context_t kvm_init(void *opaque) +int kvm_init(int smp_cpus) { int fd; - kvm_context_t kvm; int r, gsi_count; + fd = open(/dev/kvm, O_RDWR); if (fd == -1) { perror(open /dev/kvm); - return NULL; + return -1; } r = ioctl(fd, KVM_GET_API_VERSION, 0); if (r == -1) { @@ -446,35 +449,39 @@ kvm_context_t kvm_init(void *opaque) } kvm_abi = r; kvm_page_size = getpagesize(); - kvm = qemu_mallocz(sizeof(*kvm)); - kvm-fd = fd; - kvm-vm_fd = -1; - kvm-opaque = opaque; - kvm-dirty_pages_log_all = 0; - kvm-no_irqchip_creation = 0; - kvm-no_pit_creation = 0; + kvm_state = qemu_mallocz(sizeof(*kvm_state)); +kvm_context = kvm_state-kvm_context; - gsi_count = kvm_get_gsi_count(kvm); + kvm_context-fd = fd; + kvm_context-vm_fd = -1; + kvm_context-opaque = cpu_single_env; + kvm_context-dirty_pages_log_all = 0; + kvm_context-no_irqchip_creation = 0; + kvm_context-no_pit_creation = 0; + + gsi_count = kvm_get_gsi_count(kvm_context); if (gsi_count 0) { int gsi_bits, i; /* Round up so we can search ints using ffs */ gsi_bits = ALIGN(gsi_count, 32); - kvm-used_gsi_bitmap = qemu_mallocz(gsi_bits / 8); - kvm-max_gsi = gsi_bits; + kvm_context-used_gsi_bitmap = qemu_mallocz(gsi_bits / 8); + kvm_context-max_gsi = gsi_bits; /* Mark any over-allocated bits as already in use */ for (i = gsi_count; i gsi_bits; i++) - set_gsi(kvm, i); + set_gsi(kvm_context, i); } - return kvm; +pthread_mutex_lock(qemu_mutex); + return 0; + out_close: close(fd); - return NULL; + return -1; } -void kvm_finalize(kvm_context_t kvm) +static void kvm_finalize(KVMState *s) { /* FIXME if (kvm-vcpu_fd[0] != -1) @@ -482,8 +489,8 @@ void kvm_finalize(kvm_context_t kvm) if (kvm-vm_fd != -1) close(kvm-vm_fd); */ - close(kvm-fd); - free(kvm); + close(s-kvm_context.fd); + free(s); } void kvm_disable_irqchip_creation(kvm_context_t kvm) @@ -2217,18 +2224,6 @@ int kvm_main_loop(void) return 0; } -int kvm_qemu_init() -{ -/* Try to initialize kvm */ -kvm_context = kvm_init(cpu_single_env); -if (!kvm_context) { - return -1; -} -pthread_mutex_lock(qemu_mutex); - -return 0; -} - #ifdef TARGET_I386 static int destroy_region_works = 0; #endif @@ -2252,12 +2247,12 @@ int kvm_qemu_create_context(void) kvm_disable_pit_creation(kvm_context); } if (kvm_create(kvm_context, 0, NULL) 0) { - kvm_finalize(kvm_context); + kvm_finalize(kvm_state); return -1; } r = kvm_arch_qemu_create_context(); if(r 0) - kvm_finalize(kvm_context); + kvm_finalize(kvm_state); if (kvm_pit !kvm_pit_reinject) { if (kvm_reinject_control(kvm_context, 0)) { fprintf(stderr, failure to disable in-kernel PIT reinjection\n); diff --git a/qemu-kvm.h b/qemu-kvm.h index 20993f6..5d2d54c 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -128,18 +128,7 @@ int kvm_set_msrs(kvm_vcpu_context_t, struct kvm_msr_entry *msrs, int n); * \param opaque Not used * \return NULL on failure */ -kvm_context_t kvm_init(void *opaque); - -/*! - * \brief Cleanup the KVM context - * - * Should always be called when closing down KVM.\n - * Exception: If kvm_init() fails, this function should not be called, as the - * context would be invalid - * - * \param kvm Pointer to the kvm_context that is to be freed - */ -void kvm_finalize(kvm_context_t kvm); +int kvm_init(int smp_cpus); /*! * \brief Disable the in-kernel IRQCHIP creation @@ -1166,4
[COMMIT master] provide env-kvm_fd
From: Glauber Costa glom...@redhat.com qemu upstream puts kvm information on env. Do that too, since it will allow us to use CPUState in cpu-specific functions, instead of kvm-specific types. Signed-off-by: Glauber Costa glom...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm.c b/qemu-kvm.c index 45f5abe..6897e3c 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -503,11 +503,12 @@ void kvm_disable_pit_creation(kvm_context_t kvm) kvm-no_pit_creation = 1; } -kvm_vcpu_context_t kvm_create_vcpu(kvm_context_t kvm, int id) +kvm_vcpu_context_t kvm_create_vcpu(CPUState *env, int id) { long mmap_size; int r; kvm_vcpu_context_t vcpu_ctx = qemu_malloc(sizeof(struct kvm_vcpu_context)); +kvm_context_t kvm = kvm_context; vcpu_ctx-kvm = kvm; vcpu_ctx-id = id; @@ -518,6 +519,10 @@ kvm_vcpu_context_t kvm_create_vcpu(kvm_context_t kvm, int id) goto err; } vcpu_ctx-fd = r; + +env-kvm_fd = r; +env-kvm_state = kvm_state; + mmap_size = ioctl(kvm-fd, KVM_GET_VCPU_MMAP_SIZE, 0); if (mmap_size == -1) { fprintf(stderr, get vcpu mmap size: %m\n); @@ -2013,7 +2018,7 @@ static void *ap_main_loop(void *_env) env-thread_id = kvm_get_thread_id(); sigfillset(signals); sigprocmask(SIG_BLOCK, signals, NULL); -env-kvm_cpu_state.vcpu_ctx = kvm_create_vcpu(kvm_context, env-cpu_index); +env-kvm_cpu_state.vcpu_ctx = kvm_create_vcpu(env, env-cpu_index); #ifdef USE_KVM_DEVICE_ASSIGNMENT /* do ioperm for io ports of assigned devices */ diff --git a/qemu-kvm.h b/qemu-kvm.h index 5d2d54c..f43 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -181,7 +181,7 @@ void kvm_create_irqchip(kvm_context_t kvm); * \param slot vcpu number ( 0) * \return 0 on success, -errno on failure */ -kvm_vcpu_context_t kvm_create_vcpu(kvm_context_t kvm, int id); +kvm_vcpu_context_t kvm_create_vcpu(CPUState *env, int id); /*! * \brief Start the VCPU -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] use kvm_upstream sw_breakpoints structure
From: Glauber Costa glom...@redhat.com Signed-off-by: Glauber Costa glom...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index b531ca4..3bbb9d2 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -1519,7 +1519,7 @@ int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info) break; } } -} else if (kvm_find_sw_breakpoint(arch_info-pc)) +} else if (kvm_find_sw_breakpoint(cpu_single_env, arch_info-pc)) handle = 1; if (!handle) @@ -1542,7 +1542,7 @@ void kvm_arch_update_guest_debug(CPUState *env, struct kvm_guest_debug *dbg) }; int n; -if (!TAILQ_EMPTY(kvm_sw_breakpoints)) +if (kvm_sw_breakpoints_active(env)) dbg-control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; if (nb_hw_breakpoint 0) { diff --git a/qemu-kvm.c b/qemu-kvm.c index 6897e3c..b0661b6 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -459,6 +459,10 @@ int kvm_init(int smp_cpus) kvm_context-no_irqchip_creation = 0; kvm_context-no_pit_creation = 0; +#ifdef KVM_CAP_SET_GUEST_DEBUG +TAILQ_INIT(kvm_state-kvm_sw_breakpoints); +#endif + gsi_count = kvm_get_gsi_count(kvm_context); if (gsi_count 0) { int gsi_bits, i; @@ -2439,14 +2443,13 @@ int kvm_qemu_init_env(CPUState *cenv) } #ifdef KVM_CAP_SET_GUEST_DEBUG -struct kvm_sw_breakpoint_head kvm_sw_breakpoints = -TAILQ_HEAD_INITIALIZER(kvm_sw_breakpoints); -struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(target_ulong pc) +struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env, + target_ulong pc) { struct kvm_sw_breakpoint *bp; -TAILQ_FOREACH(bp, kvm_sw_breakpoints, entry) { +TAILQ_FOREACH(bp, env-kvm_state-kvm_sw_breakpoints, entry) { if (bp-pc == pc) return bp; } @@ -2481,6 +2484,11 @@ int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap) return data.err; } +int kvm_sw_breakpoints_active(CPUState *env) +{ +return !TAILQ_EMPTY(env-kvm_state-kvm_sw_breakpoints); +} + int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr, target_ulong len, int type) { @@ -2489,7 +2497,7 @@ int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr, int err; if (type == GDB_BREAKPOINT_SW) { - bp = kvm_find_sw_breakpoint(addr); + bp = kvm_find_sw_breakpoint(current_env, addr); if (bp) { bp-use_count++; return 0; @@ -2507,7 +2515,8 @@ int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr, return err; } - TAILQ_INSERT_HEAD(kvm_sw_breakpoints, bp, entry); +TAILQ_INSERT_HEAD(current_env-kvm_state-kvm_sw_breakpoints, + bp, entry); } else { err = kvm_arch_insert_hw_breakpoint(addr, len, type); if (err) @@ -2530,7 +2539,7 @@ int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr, int err; if (type == GDB_BREAKPOINT_SW) { - bp = kvm_find_sw_breakpoint(addr); + bp = kvm_find_sw_breakpoint(current_env, addr); if (!bp) return -ENOENT; @@ -2543,7 +2552,7 @@ int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr, if (err) return err; - TAILQ_REMOVE(kvm_sw_breakpoints, bp, entry); + TAILQ_REMOVE(current_env-kvm_state-kvm_sw_breakpoints, bp, entry); qemu_free(bp); } else { err = kvm_arch_remove_hw_breakpoint(addr, len, type); @@ -2564,7 +2573,7 @@ void kvm_remove_all_breakpoints(CPUState *current_env) struct kvm_sw_breakpoint *bp, *next; CPUState *env; -TAILQ_FOREACH_SAFE(bp, kvm_sw_breakpoints, entry, next) { +TAILQ_FOREACH_SAFE(bp, current_env-kvm_state-kvm_sw_breakpoints, entry, next) { if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) { /* Try harder to find a CPU that currently sees the breakpoint. */ for (env = first_cpu; env != NULL; env = env-next_cpu) { diff --git a/qemu-kvm.h b/qemu-kvm.h index f43..d5291a3 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -1004,12 +1004,12 @@ struct kvm_sw_breakpoint { int use_count; TAILQ_ENTRY(kvm_sw_breakpoint) entry; }; -TAILQ_HEAD(kvm_sw_breakpoint_head, kvm_sw_breakpoint); -extern struct kvm_sw_breakpoint_head kvm_sw_breakpoints; +TAILQ_HEAD(kvm_sw_breakpoint_head, kvm_sw_breakpoint); int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info); -struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(target_ulong pc); +int kvm_sw_breakpoints_active(CPUState *env); +struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env, target_ulong pc); int kvm_arch_insert_sw_breakpoint(CPUState *current_env, struct kvm_sw_breakpoint *bp); int kvm_arch_remove_sw_breakpoint(CPUState *current_env, @@ -1174,6 +1174,9 @@
[COMMIT master] qemu-kvm: x86: fix memleak if ioctl fails
From: Amit Shah amit.s...@redhat.com Fix a memleak when the KVM_SET_CPUID2 ioctl fails. Free the memory that we allocate to store cpuids. Reported-by: Mark McLoughlin mar...@redhat.com Signed-off-by: Amit Shah amit.s...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index 3bbb9d2..350f272 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -535,7 +535,7 @@ int kvm_setup_cpuid2(kvm_vcpu_context_t vcpu, int nent, r = ioctl(vcpu-fd, KVM_SET_CPUID2, cpuid); if (r == -1) { fprintf(stderr, kvm_setup_cpuid2: %m\n); - return -errno; + r = -errno; } free(cpuid); return r; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] Set the iothread's eventfd/pipe descriptors to non-blocking
From: Dor Laor dl...@redhat.com It fixes migration issue when the destination is loaded. If the migration socket is full, we get EAGAIN for the write. The set_fd_handler2 defers the write for later on. The function tries to wake up the iothread by qemu_kvm_notify_work. Since this happens in a loop, multiple times, the pipe that emulates eventfd becomes full and we get a deadlock. Mark McLoughlin suggested to remove spurious wake-up of the migration code when we get EAGAIN and wait for the socket to become writeable. (+1) Nevertheless, the pipe descriptors shouldn't be blocking and the reader can also read several chunks in a time. Signed-off-by: Dor Laor d...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm.c b/qemu-kvm.c index 355adf4..3c892e6 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -2106,14 +2106,17 @@ void qemu_kvm_notify_work(void) if (len == -1 errno == EINTR) continue; - if (len = 0) +/* In case we have a pipe, there is not reason to insist writing + * 8 bytes + */ + if (len == -1 errno == EAGAIN) break; +if (len = 0) +break; + offset += len; } - -if (offset != 8) - fprintf(stderr, failed to notify io thread\n); } /* If we have signalfd, we mask out the signals we want to handle and then @@ -2152,20 +2155,18 @@ static void sigfd_handler(void *opaque) static void io_thread_wakeup(void *opaque) { int fd = (unsigned long)opaque; -char buffer[8]; -size_t offset = 0; +char buffer[4096]; -while (offset 8) { +/* Drain the pipe/(eventfd) */ +while (1) { ssize_t len; - len = read(fd, buffer + offset, 8 - offset); + len = read(fd, buffer, sizeof(buffer)); if (len == -1 errno == EINTR) continue; if (len = 0) break; - - offset += len; } } @@ -2183,6 +2184,9 @@ int kvm_main_loop(void) return -errno; } +fcntl(fds[0], F_SETFL, O_NONBLOCK); +fcntl(fds[1], F_SETFL, O_NONBLOCK); + qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL, (void *)(unsigned long)fds[0]); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] replace qemu_kvm_cpu_env
From: Glauber Costa glom...@redhat.com We now have an upstream qemu function that does exactly that, but in a kvm-independent way. Use it. Signed-off-by: Glauber Costa glom...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/hw/acpi.c b/hw/acpi.c index 7de9cb7..d23abd1 100644 --- a/hw/acpi.c +++ b/hw/acpi.c @@ -775,33 +775,11 @@ static void disable_processor(struct gpe_regs *g, int cpu) } #if defined(TARGET_I386) || defined(TARGET_X86_64) -#ifdef CONFIG_KVM -static CPUState *qemu_kvm_cpu_env(int index) -{ -CPUState *penv; - -penv = first_cpu; - -while (penv) { -if (penv-cpu_index == index) -return penv; -penv = (CPUState *)penv-next_cpu; -} - -return NULL; -} -#endif - - void qemu_system_cpu_hot_add(int cpu, int state) { CPUState *env; -if (state -#ifdef CONFIG_KVM - (!qemu_kvm_cpu_env(cpu)) -#endif -) { +if (state !qemu_get_cpu(cpu)) { env = pc_new_cpu(model); if (!env) { fprintf(stderr, cpu %d creation failed\n, cpu); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] kvm: allow qemu to set EPT identity mapping address
From: Sheng Yang sh...@linux.intel.com If we use larger BIOS image than current 256KB, we would need move reserved TSS and EPT identity mapping pages. Currently TSS support this, but not EPT. (change from v1, use parameter address instead of value for ioctl) Signed-off-by: Sheng Yang sh...@linux.intel.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/kvm/include/linux/kvm.h b/kvm/include/linux/kvm.h index af6d592..ff1025d 100644 --- a/kvm/include/linux/kvm.h +++ b/kvm/include/linux/kvm.h @@ -468,6 +468,7 @@ struct kvm_trace_rec { #endif #define KVM_CAP_PIT2 33 #define KVM_CAP_PIT_STATE2 35 +#define KVM_CAP_SET_IDENTITY_MAP_ADDR 37 #ifdef KVM_CAP_IRQ_ROUTING @@ -529,6 +530,7 @@ struct kvm_x86_mce { #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\ struct kvm_userspace_memory_region) #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) +#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) /* * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns * a vcpu fd. diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index df40aae..d2c8abe 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -73,6 +73,47 @@ static int kvm_init_tss(kvm_context_t kvm) return 0; } +static int kvm_set_identity_map_addr(kvm_context_t kvm, unsigned long addr) +{ +#ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR + int r; + + r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_IDENTITY_MAP_ADDR); + if (r 0) { + r = ioctl(kvm-vm_fd, KVM_SET_IDENTITY_MAP_ADDR, addr); + if (r == -1) { + fprintf(stderr, kvm_set_identity_map_addr: %m\n); + return -errno; + } + return 0; + } +#endif + return -ENOSYS; +} + +static int kvm_init_identity_map_page(kvm_context_t kvm) +{ +#ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR + int r; + + r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_IDENTITY_MAP_ADDR); + if (r 0) { + /* +* this address is 4 pages before the bios, and the bios should present +* as unavaible memory +*/ + r = kvm_set_identity_map_addr(kvm, 0xfffbc000); + if (r 0) { + fprintf(stderr, kvm_init_identity_map_page: + unable to set identity mapping addr\n); + return r; + } + + } +#endif + return 0; +} + static int kvm_create_pit(kvm_context_t kvm) { #ifdef KVM_CAP_PIT @@ -104,6 +145,10 @@ int kvm_arch_create(kvm_context_t kvm, unsigned long phys_mem_bytes, if (r 0) return r; + r = kvm_init_identity_map_page(kvm); + if (r 0) + return r; + r = kvm_create_pit(kvm); if (r 0) return r; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] reuse upstream breakpoint code
From: Glauber Costa glom...@redhat.com Drop KVM_UPSTREAM around functions we intend to reuse. This allow us to share code in kvm-all.c, that is equal in qemu-kvm.c Signed-off-by: Glauber Costa glom...@redhat.com CC: Jan Kiszka jan.kis...@siemens.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/kvm-all.c b/kvm-all.c index e42b1f6..67908a7 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -873,6 +873,8 @@ void kvm_setup_guest_memory(void *start, size_t size) } } +#endif /* KVM_UPSTREAM */ + #ifdef KVM_CAP_SET_GUEST_DEBUG struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env, target_ulong pc) @@ -891,6 +893,7 @@ int kvm_sw_breakpoints_active(CPUState *env) return !TAILQ_EMPTY(env-kvm_state-kvm_sw_breakpoints); } +#ifdef KVM_UPSTREAM int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap) { struct kvm_guest_debug dbg; @@ -904,6 +907,7 @@ int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap) return kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, dbg); } +#endif int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr, target_ulong len, int type) @@ -1028,6 +1032,5 @@ void kvm_remove_all_breakpoints(CPUState *current_env) { } #endif /* !KVM_CAP_SET_GUEST_DEBUG */ -#endif #include qemu-kvm.c diff --git a/kvm.h b/kvm.h index e9a43e2..0191752 100644 --- a/kvm.h +++ b/kvm.h @@ -16,6 +16,7 @@ #include config.h #include sys-queue.h +#include qemu-kvm.h #ifdef KVM_UPSTREAM diff --git a/qemu-kvm.c b/qemu-kvm.c index b0661b6..355adf4 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -2444,18 +2444,6 @@ int kvm_qemu_init_env(CPUState *cenv) #ifdef KVM_CAP_SET_GUEST_DEBUG -struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env, - target_ulong pc) -{ -struct kvm_sw_breakpoint *bp; - -TAILQ_FOREACH(bp, env-kvm_state-kvm_sw_breakpoints, entry) { - if (bp-pc == pc) - return bp; -} -return NULL; -} - struct kvm_set_guest_debug_data { struct kvm_guest_debug dbg; int err; @@ -2484,133 +2472,7 @@ int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap) return data.err; } -int kvm_sw_breakpoints_active(CPUState *env) -{ -return !TAILQ_EMPTY(env-kvm_state-kvm_sw_breakpoints); -} - -int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr, - target_ulong len, int type) -{ -struct kvm_sw_breakpoint *bp; -CPUState *env; -int err; - -if (type == GDB_BREAKPOINT_SW) { - bp = kvm_find_sw_breakpoint(current_env, addr); - if (bp) { - bp-use_count++; - return 0; - } - - bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint)); - if (!bp) - return -ENOMEM; - - bp-pc = addr; - bp-use_count = 1; - err = kvm_arch_insert_sw_breakpoint(current_env, bp); - if (err) { - free(bp); - return err; - } - -TAILQ_INSERT_HEAD(current_env-kvm_state-kvm_sw_breakpoints, - bp, entry); -} else { - err = kvm_arch_insert_hw_breakpoint(addr, len, type); - if (err) - return err; -} - -for (env = first_cpu; env != NULL; env = env-next_cpu) { - err = kvm_update_guest_debug(env, 0); - if (err) - return err; -} -return 0; -} - -int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr, - target_ulong len, int type) -{ -struct kvm_sw_breakpoint *bp; -CPUState *env; -int err; - -if (type == GDB_BREAKPOINT_SW) { - bp = kvm_find_sw_breakpoint(current_env, addr); - if (!bp) - return -ENOENT; - - if (bp-use_count 1) { - bp-use_count--; - return 0; - } - - err = kvm_arch_remove_sw_breakpoint(current_env, bp); - if (err) - return err; - - TAILQ_REMOVE(current_env-kvm_state-kvm_sw_breakpoints, bp, entry); - qemu_free(bp); -} else { - err = kvm_arch_remove_hw_breakpoint(addr, len, type); - if (err) - return err; -} - -for (env = first_cpu; env != NULL; env = env-next_cpu) { - err = kvm_update_guest_debug(env, 0); - if (err) - return err; -} -return 0; -} - -void kvm_remove_all_breakpoints(CPUState *current_env) -{ -struct kvm_sw_breakpoint *bp, *next; -CPUState *env; - -TAILQ_FOREACH_SAFE(bp, current_env-kvm_state-kvm_sw_breakpoints, entry, next) { -if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) { -/* Try harder to find a CPU that currently sees the breakpoint. */ -for (env = first_cpu; env != NULL; env = env-next_cpu) { -if (kvm_arch_remove_sw_breakpoint(env, bp) == 0) -break; -} -} -} -
[COMMIT master] virtio-net: replace custom io thread notify with qemu one
From: Glauber Costa glom...@redhat.com replace qemu_kvm_notify_work() with qemu_notify_event(), that ends up calling it anyway. Signed-off-by: Glauber Costa glom...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/hw/virtio-net.c b/hw/virtio-net.c index bda2397..75c9695 100644 --- a/hw/virtio-net.c +++ b/hw/virtio-net.c @@ -344,12 +344,9 @@ static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) qemu_flush_queued_packets(n-vc); -#ifdef CONFIG_KVM /* We now have RX buffers, signal to the IO thread to break out of the select to re-poll the tap file descriptor */ -if (kvm_enabled()) -qemu_kvm_notify_work(); -#endif +qemu_notify_event(); } static int do_virtio_net_can_receive(VirtIONet *n, int bufsize) -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] fix segfault with -no-kvm
From: Glauber Costa glom...@redhat.com Our PIT implementation calls qemu_kvm_pit_in_kernel without checking for kvm_enabled() as does everybody else. It will make it dereference kvm_context pointer wich will be NULL. Signed-off-by: Glauber Costa glom...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/hw/i8254.c b/hw/i8254.c index fd0bdfe..34a716c 100644 --- a/hw/i8254.c +++ b/hw/i8254.c @@ -478,7 +478,7 @@ void hpet_disable_pit(void) { PITChannelState *s = pit_state.channels[0]; -if (qemu_kvm_pit_in_kernel()) { +if (kvm_enabled() qemu_kvm_pit_in_kernel()) { if (qemu_kvm_has_pit_state2()) { kvm_hpet_disable_kpit(); } else { @@ -502,7 +502,7 @@ void hpet_enable_pit(void) PITState *pit = pit_state; PITChannelState *s = pit-channels[0]; -if (qemu_kvm_pit_in_kernel()) { +if (kvm_enabled() qemu_kvm_pit_in_kernel()) { if (qemu_kvm_has_pit_state2()) { kvm_hpet_enable_kpit(); } else { -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] Add MCE simulation support to qemu/kvm
From: Huang Ying ying.hu...@intel.com KVM ioctls are used to initialize MCE simulation and inject MCE. The real MCE simulation is implemented in Linux kernel. The Kernel part has been merged. ChangeLog: v7: - Re-based on qemu-kvm.git/next branch v6: - Re-based on latest qemu-kvm.git v5: - Re-based on latest qemu-kvm.git v3: - Re-based on qemu/tcg MCE support patch v2: - Use new kernel MCE capability exportion interface. Signed-off-by: Huang Ying ying.hu...@intel.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/kvm/include/linux/kvm.h b/kvm/include/linux/kvm.h index 790601d..af6d592 100644 --- a/kvm/include/linux/kvm.h +++ b/kvm/include/linux/kvm.h @@ -463,6 +463,9 @@ struct kvm_trace_rec { #define KVM_CAP_ASSIGN_DEV_IRQ 29 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 +#ifdef __KVM_HAVE_MCE +#define KVM_CAP_MCE 31 +#endif #define KVM_CAP_PIT2 33 #define KVM_CAP_PIT_STATE2 35 @@ -504,6 +507,19 @@ struct kvm_irq_routing { #endif +#ifdef KVM_CAP_MCE +/* x86 MCE */ +struct kvm_x86_mce { + __u64 status; + __u64 addr; + __u64 misc; + __u64 mcg_status; + __u8 bank; + __u8 pad1[7]; + __u64 pad2[3]; +}; +#endif + /* * ioctls for VM fds */ @@ -592,6 +608,10 @@ struct kvm_irq_routing { #define KVM_NMI _IO(KVMIO, 0x9a) /* Available with KVM_CAP_SET_GUEST_DEBUG */ #define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug) +/* MCE for x86 */ +#define KVM_X86_SETUP_MCE _IOW(KVMIO, 0x9c, __u64) +#define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO, 0x9d, __u64) +#define KVM_X86_SET_MCE _IOW(KVMIO, 0x9e, struct kvm_x86_mce) /* * Deprecated interfaces diff --git a/kvm/include/x86/asm/kvm.h b/kvm/include/x86/asm/kvm.h index 0c6bf8a..411063c 100644 --- a/kvm/include/x86/asm/kvm.h +++ b/kvm/include/x86/asm/kvm.h @@ -57,6 +57,7 @@ #define __KVM_HAVE_USER_NMI #define __KVM_HAVE_GUEST_DEBUG #define __KVM_HAVE_MSIX +#define __KVM_HAVE_MCE /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index 350f272..df40aae 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -432,6 +432,39 @@ int kvm_set_msrs(kvm_vcpu_context_t vcpu, struct kvm_msr_entry *msrs, int n) return r; } +int kvm_get_mce_cap_supported(kvm_context_t kvm, uint64_t *mce_cap, + int *max_banks) +{ +#ifdef KVM_CAP_MCE +int r; + +r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_MCE); +if (r 0) { +*max_banks = r; +return ioctl(kvm-fd, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap); +} +#endif +return -ENOSYS; +} + +int kvm_setup_mce(kvm_vcpu_context_t vcpu, uint64_t *mcg_cap) +{ +#ifdef KVM_CAP_MCE +return ioctl(vcpu-fd, KVM_X86_SETUP_MCE, mcg_cap); +#else +return -ENOSYS; +#endif +} + +int kvm_set_mce(kvm_vcpu_context_t vcpu, struct kvm_x86_mce *m) +{ +#ifdef KVM_CAP_MCE +return ioctl(vcpu-fd, KVM_X86_SET_MCE, m); +#else +return -ENOSYS; +#endif +} + static void print_seg(FILE *file, const char *name, struct kvm_segment *seg) { fprintf(stderr, @@ -1285,6 +1318,28 @@ int kvm_arch_qemu_init_env(CPUState *cenv) kvm_setup_cpuid2(cenv-kvm_cpu_state.vcpu_ctx, cpuid_nent, cpuid_ent); +#ifdef KVM_CAP_MCE +if (((cenv-cpuid_version 8)0xF) = 6 + (cenv-cpuid_features(CPUID_MCE|CPUID_MCA)) == (CPUID_MCE|CPUID_MCA) + kvm_check_extension(kvm_context, KVM_CAP_MCE) 0) { +uint64_t mcg_cap; +int banks; + +if (kvm_get_mce_cap_supported(kvm_context, mcg_cap, banks)) +perror(kvm_get_mce_cap_supported FAILED); +else { +if (banks MCE_BANKS_DEF) +banks = MCE_BANKS_DEF; +mcg_cap = MCE_CAP_DEF; +mcg_cap |= banks; +if (kvm_setup_mce(cenv-kvm_cpu_state.vcpu_ctx, mcg_cap)) +perror(kvm_setup_mce FAILED); +else +cenv-mcg_cap = mcg_cap; +} +} +#endif + return 0; } diff --git a/qemu-kvm.c b/qemu-kvm.c index 3c892e6..c13ecba 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -2703,3 +2703,43 @@ int kvm_set_boot_cpu_id(uint32_t id) { return kvm_set_boot_vcpu_id(kvm_context, id); } + +#ifdef TARGET_I386 +#ifdef KVM_CAP_MCE +struct kvm_x86_mce_data +{ +CPUState *env; +struct kvm_x86_mce *mce; +}; + +static void kvm_do_inject_x86_mce(void *_data) +{ +struct kvm_x86_mce_data *data = _data; +int r; + +r = kvm_set_mce(data-env-kvm_cpu_state.vcpu_ctx, data-mce); +if (r 0) +perror(kvm_set_mce FAILED); +} +#endif + +void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, +uint64_t mcg_status, uint64_t addr, uint64_t misc) +{ +#ifdef KVM_CAP_MCE +struct kvm_x86_mce mce = { +.bank = bank, +.status = status, +.mcg_status = mcg_status, +.addr = addr,
[COMMIT master] remove kvm types from handle unhandled
From: Glauber Costa glom...@redhat.com I'm in an ongoing process of not using kvm-specific types in function declarations. handle_unhandled() is the first victim. Since we don't really use this data, but just the reason, remove them entirely. Signed-off-by: Glauber Costa glom...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm.c b/qemu-kvm.c index c13ecba..2484bd9 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -176,8 +176,7 @@ int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t *data, int len) return 0; } -static int handle_unhandled(kvm_context_t kvm, kvm_vcpu_context_t vcpu, -uint64_t reason) +static int handle_unhandled(uint64_t reason) { fprintf(stderr, kvm: unhandled exit %PRIx64\n, reason); return -EINVAL; @@ -1085,12 +1084,10 @@ again: if (1) { switch (run-exit_reason) { case KVM_EXIT_UNKNOWN: - r = handle_unhandled(kvm, vcpu, - run-hw.hardware_exit_reason); + r = handle_unhandled(run-hw.hardware_exit_reason); break; case KVM_EXIT_FAIL_ENTRY: - r = handle_unhandled(kvm, vcpu, - run-fail_entry.hardware_entry_failure_reason); + r = handle_unhandled(run-fail_entry.hardware_entry_failure_reason); break; case KVM_EXIT_EXCEPTION: fprintf(stderr, exception %d (%x)\n, -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] remove env-exit_request usage from qemu-kvm.c
From: Glauber Costa glom...@redhat.com Today I found a very catastrophic regression: I cannot run my mission critical servers running RHL7.1 anymore. This is a total disaster. Fortunately, I was able to isolate the commit that caused it: commit bb598da496c040d42dde564bd8ace181be52293e Author: Glauber Costa glom...@redhat.com Date: Mon Jul 6 16:12:52 2009 -0400 This guy is certainly stupid, and deserves punishment. It means I'll be writting code using emacs for the next week. Signed-off-by: Glauber Costa glom...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm.c b/qemu-kvm.c index cb85dbc..edd400e 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -986,8 +986,6 @@ int pre_kvm_run(kvm_context_t kvm, CPUState *env) { kvm_arch_pre_kvm_run(kvm-opaque, env); -if (env-exit_request) -return 1; pthread_mutex_unlock(qemu_mutex); return 0; } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] remove created from kvm_state
From: Glauber Costa glom...@redhat.com Again, CPUState has it, and it means exactly that. Signed-off-by: Glauber Costa glom...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/cpu-defs.h b/cpu-defs.h index fce366f..ce9f96a 100644 --- a/cpu-defs.h +++ b/cpu-defs.h @@ -142,7 +142,6 @@ struct qemu_work_item; struct KVMCPUState { pthread_t thread; int signalled; -int created; void *vcpu_ctx; struct qemu_work_item *queued_work_first, *queued_work_last; }; diff --git a/qemu-kvm.c b/qemu-kvm.c index fd28b39..cb85dbc 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -1663,12 +1663,12 @@ void kvm_update_interrupt_request(CPUState *env) int signal = 0; if (env) { -if (!current_env || !current_env-kvm_cpu_state.created) +if (!current_env || !current_env-created) signal = 1; /* * Testing for created here is really redundant */ -if (current_env current_env-kvm_cpu_state.created +if (current_env current_env-created env != current_env !env-kvm_cpu_state.signalled) signal = 1; @@ -1948,7 +1948,7 @@ static void *ap_main_loop(void *_env) /* signal VCPU creation */ pthread_mutex_lock(qemu_mutex); -current_env-kvm_cpu_state.created = 1; +current_env-created = 1; pthread_cond_signal(qemu_vcpu_cond); /* and wait for machine initialization */ @@ -1964,13 +1964,13 @@ void kvm_init_vcpu(CPUState *env) { pthread_create(env-kvm_cpu_state.thread, NULL, ap_main_loop, env); -while (env-kvm_cpu_state.created == 0) +while (env-created == 0) qemu_cond_wait(qemu_vcpu_cond); } int kvm_vcpu_inited(CPUState *env) { -return env-kvm_cpu_state.created; +return env-created; } #ifdef TARGET_I386 -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] remove kvm_in* functions
From: Glauber Costa glom...@redhat.com We can use plain qemu's here, and save a couple of lines/complexity. I'm leaving outb for later, because the SMM thing makes it a little bit less trivial. Signed-off-by: Glauber Costa glom...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm.c b/qemu-kvm.c index e200dea..dce9d4e 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -97,24 +97,6 @@ static int kvm_debug(void *opaque, void *data, } #endif -static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data) -{ -*data = cpu_inb(0, addr); -return 0; -} - -static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data) -{ -*data = cpu_inw(0, addr); -return 0; -} - -static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data) -{ -*data = cpu_inl(0, addr); -return 0; -} - #define PM_IO_BASE 0xb000 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data) @@ -839,15 +821,16 @@ static int handle_io(kvm_vcpu_context_t vcpu) for (i = 0; i run-io.count; ++i) { switch (run-io.direction) { case KVM_EXIT_IO_IN: + r = 0; switch (run-io.size) { case 1: - r = kvm_inb(kvm-opaque, addr, p); + *(uint8_t *)p = cpu_inb(kvm-opaque, addr); break; case 2: - r = kvm_inw(kvm-opaque, addr, p); + *(uint16_t *)p = cpu_inw(kvm-opaque, addr); break; case 4: - r = kvm_inl(kvm-opaque, addr, p); + *(uint32_t *)p = cpu_inl(kvm-opaque, addr); break; default: fprintf(stderr, bad I/O size %d\n, run-io.size); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] remove kvm_specific kvm_out* functions
From: Glauber Costa glom...@redhat.com As example of what was already done with inb. This is a little bit more tricky, because of SMM, but those bits are handled directly in apic anyway. Signed-off-by: Glauber Costa glom...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm.c b/qemu-kvm.c index 9d550d3..5e7dc0a 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -97,55 +97,6 @@ static int kvm_debug(void *opaque, void *data, } #endif -#define PM_IO_BASE 0xb000 - -static int kvm_outb(void *opaque, uint16_t addr, uint8_t data) -{ -if (addr == 0xb2) { - switch (data) { - case 0: { - cpu_outb(0, 0xb3, 0); - break; - } - case 0xf0: { - unsigned x; - - /* enable acpi */ - x = cpu_inw(0, PM_IO_BASE + 4); - x = ~1; - cpu_outw(0, PM_IO_BASE + 4, x); - break; - } - case 0xf1: { - unsigned x; - - /* enable acpi */ - x = cpu_inw(0, PM_IO_BASE + 4); - x |= 1; - cpu_outw(0, PM_IO_BASE + 4, x); - break; - } - default: - break; - } - return 0; -} -cpu_outb(0, addr, data); -return 0; -} - -static int kvm_outw(void *opaque, uint16_t addr, uint16_t data) -{ -cpu_outw(0, addr, data); -return 0; -} - -static int kvm_outl(void *opaque, uint16_t addr, uint32_t data) -{ -cpu_outl(0, addr, data); -return 0; -} - int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len) { cpu_physical_memory_rw(addr, data, len, 0); @@ -816,14 +767,12 @@ static int handle_io(kvm_vcpu_context_t vcpu) struct kvm_run *run = vcpu-run; kvm_context_t kvm = vcpu-kvm; uint16_t addr = run-io.port; - int r; int i; void *p = (void *)run + run-io.data_offset; for (i = 0; i run-io.count; ++i) { switch (run-io.direction) { case KVM_EXIT_IO_IN: - r = 0; switch (run-io.size) { case 1: *(uint8_t *)p = cpu_inb(kvm-opaque, addr); @@ -842,16 +791,13 @@ static int handle_io(kvm_vcpu_context_t vcpu) case KVM_EXIT_IO_OUT: switch (run-io.size) { case 1: - r = kvm_outb(kvm-opaque, addr, -*(uint8_t *)p); +cpu_outb(kvm-opaque, addr, *(uint8_t *)p); break; case 2: - r = kvm_outw(kvm-opaque, addr, -*(uint16_t *)p); + cpu_outw(kvm-opaque, addr, *(uint16_t *)p); break; case 4: - r = kvm_outl(kvm-opaque, addr, -*(uint32_t *)p); + cpu_outl(kvm-opaque, addr, *(uint32_t *)p); break; default: fprintf(stderr, bad I/O size %d\n, run-io.size); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] reuse kvm_ioctl
From: Glauber Costa glom...@redhat.com Start using kvm_ioctl's code. For type safety, delete fd from kvm_context entirely, so the compiler can play along with us helping to detect errors I might have made. Signed-off-by: Glauber Costa glom...@redhat.com Also, we were slightly different from qemu upstream in handling error code from ioctl, since we were always testing for -1, while kvm_vm_ioctl returns -errno. We already did this in most of the call sites, so this patch has the big advantage of simplifying call sites. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/kvm-all.c b/kvm-all.c index 9373d99..0ec6475 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -793,6 +793,7 @@ void kvm_set_phys_mem(target_phys_addr_t start_addr, } } +#endif int kvm_ioctl(KVMState *s, int type, ...) { int ret; @@ -809,7 +810,6 @@ int kvm_ioctl(KVMState *s, int type, ...) return ret; } -#endif int kvm_vm_ioctl(KVMState *s, int type, ...) { diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index 58ec1f2..428e831 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -38,7 +38,7 @@ int kvm_set_tss_addr(kvm_context_t kvm, unsigned long addr) #ifdef KVM_CAP_SET_TSS_ADDR int r; - r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR); + r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR); if (r 0) { r = kvm_vm_ioctl(kvm_state, KVM_SET_TSS_ADDR, addr); if (r 0) { @@ -56,7 +56,7 @@ static int kvm_init_tss(kvm_context_t kvm) #ifdef KVM_CAP_SET_TSS_ADDR int r; - r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR); + r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR); if (r 0) { /* * this address is 3 pages before the bios, and the bios should present @@ -78,7 +78,7 @@ static int kvm_set_identity_map_addr(kvm_context_t kvm, unsigned long addr) #ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR int r; - r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_IDENTITY_MAP_ADDR); + r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_IDENTITY_MAP_ADDR); if (r 0) { r = kvm_vm_ioctl(kvm_state, KVM_SET_IDENTITY_MAP_ADDR, addr); if (r == -1) { @@ -96,7 +96,7 @@ static int kvm_init_identity_map_page(kvm_context_t kvm) #ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR int r; - r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_IDENTITY_MAP_ADDR); + r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_IDENTITY_MAP_ADDR); if (r 0) { /* * this address is 4 pages before the bios, and the bios should present @@ -121,7 +121,7 @@ static int kvm_create_pit(kvm_context_t kvm) kvm-pit_in_kernel = 0; if (!kvm-no_pit_creation) { - r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_PIT); + r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_PIT); if (r 0) { r = kvm_vm_ioctl(kvm_state, KVM_CREATE_PIT); if (r = 0) @@ -401,11 +401,11 @@ void kvm_show_code(kvm_vcpu_context_t vcpu) struct kvm_msr_list *kvm_get_msr_list(kvm_context_t kvm) { struct kvm_msr_list sizer, *msrs; - int r, e; + int r; sizer.nmsrs = 0; - r = ioctl(kvm-fd, KVM_GET_MSR_INDEX_LIST, sizer); - if (r == -1 errno != E2BIG) + r = kvm_ioctl(kvm_state, KVM_GET_MSR_INDEX_LIST, sizer); + if (r 0 r != -E2BIG) return NULL; /* Old kernel modules had a bug and could write beyond the provided memory. Allocate at least a safe amount of 1K. */ @@ -413,11 +413,10 @@ struct kvm_msr_list *kvm_get_msr_list(kvm_context_t kvm) sizer.nmsrs * sizeof(*msrs-indices))); msrs-nmsrs = sizer.nmsrs; - r = ioctl(kvm-fd, KVM_GET_MSR_INDEX_LIST, msrs); - if (r == -1) { - e = errno; + r = kvm_ioctl(kvm_state, KVM_GET_MSR_INDEX_LIST, msrs); + if (r 0) { free(msrs); - errno = e; + errno = r; return NULL; } return msrs; @@ -458,10 +457,10 @@ int kvm_get_mce_cap_supported(kvm_context_t kvm, uint64_t *mce_cap, #ifdef KVM_CAP_MCE int r; -r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_MCE); +r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MCE); if (r 0) { *max_banks = r; -return ioctl(kvm-fd, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap); +return kvm_ioctl(kvm_state, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap); } #endif return -ENOSYS; @@ -599,7 +598,7 @@ int kvm_set_shadow_pages(kvm_context_t kvm, unsigned int nrshadow_pages) #ifdef KVM_CAP_MMU_SHADOW_CACHE_CONTROL int r; - r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, + r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
[COMMIT master] check extension
From: Glauber Costa glom...@redhat.com use upstream check_extension code Signed-off-by: Glauber Costa glom...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 88c3baf..75db546 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -639,7 +639,7 @@ static int assign_device(AssignedDevInfo *adev) /* We always enable the IOMMU if present * (or when not disabled on the command line) */ -r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU); +r = kvm_check_extension(kvm_state, KVM_CAP_IOMMU); if (r !adev-disable_iommu) assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU; #endif diff --git a/kvm-all.c b/kvm-all.c index 0ec6475..b4b5a35 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -383,6 +383,7 @@ int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size) return ret; } +#endif int kvm_check_extension(KVMState *s, unsigned int extension) { int ret; @@ -394,6 +395,7 @@ int kvm_check_extension(KVMState *s, unsigned int extension) return ret; } +#ifdef KVM_UPSTREAM int kvm_init(int smp_cpus) { diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index 428e831..e4ae582 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -348,7 +348,7 @@ int kvm_has_pit_state2(kvm_context_t kvm) int r = 0; #ifdef KVM_CAP_PIT_STATE2 - r = kvm_check_extension(kvm, KVM_CAP_PIT_STATE2); + r = kvm_check_extension(kvm_state, KVM_CAP_PIT_STATE2); #endif return r; } @@ -702,7 +702,7 @@ uint32_t kvm_get_supported_cpuid(kvm_context_t kvm, uint32_t function, int reg) uint32_t ret = 0; uint32_t cpuid_1_edx; - if (!kvm_check_extension(kvm, KVM_CAP_EXT_CPUID)) { + if (!kvm_check_extension(kvm_state, KVM_CAP_EXT_CPUID)) { return -1U; } @@ -1234,7 +1234,7 @@ static int get_para_features(kvm_context_t kvm_context) int i, features = 0; for (i = 0; i ARRAY_SIZE(para_features)-1; i++) { - if (kvm_check_extension(kvm_context, para_features[i].cap)) + if (kvm_check_extension(kvm_state, para_features[i].cap)) features |= (1 para_features[i].feature); } diff --git a/qemu-kvm.c b/qemu-kvm.c index 98cfee0..e200dea 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -589,16 +589,6 @@ static int kvm_create_default_phys_mem(kvm_context_t kvm, return -1; } -int kvm_check_extension(kvm_context_t kvm, int ext) -{ - int ret; - - ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, ext); - if (ret 0) - return ret; - return 0; -} - void kvm_create_irqchip(kvm_context_t kvm) { int r; @@ -1345,7 +1335,7 @@ int kvm_has_gsi_routing(kvm_context_t kvm) int r = 0; #ifdef KVM_CAP_IRQ_ROUTING -r = kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING); +r = kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING); #endif return r; } @@ -1353,7 +1343,7 @@ int kvm_has_gsi_routing(kvm_context_t kvm) int kvm_get_gsi_count(kvm_context_t kvm) { #ifdef KVM_CAP_IRQ_ROUTING - return kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING); + return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING); #else return -EINVAL; #endif @@ -1606,7 +1596,7 @@ int kvm_irqfd(kvm_context_t kvm, int gsi, int flags) int r; int fd; - if (!kvm_check_extension(kvm, KVM_CAP_IRQFD)) + if (!kvm_check_extension(kvm_state, KVM_CAP_IRQFD)) return -ENOENT; fd = eventfd(0, 0); @@ -2381,7 +2371,7 @@ int kvm_setup_guest_memory(void *area, unsigned long size) int kvm_qemu_check_extension(int ext) { -return kvm_check_extension(kvm_context, ext); +return kvm_check_extension(kvm_state, ext); } int kvm_qemu_init_env(CPUState *cenv) diff --git a/qemu-kvm.h b/qemu-kvm.h index 8c9b72f..ec35f29 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -167,7 +167,6 @@ int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **phys_mem); int kvm_create_vm(kvm_context_t kvm); -int kvm_check_extension(kvm_context_t kvm, int ext); void kvm_create_irqchip(kvm_context_t kvm); /*! @@ -1198,5 +1197,6 @@ extern KVMState *kvm_state; int kvm_ioctl(KVMState *s, int type, ...); int kvm_vm_ioctl(KVMState *s, int type, ...); +int kvm_check_extension(KVMState *s, unsigned int ext); #endif -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] embed kvm_create_context into kvm_init
From: Glauber Costa glom...@redhat.com There is no reason why kvm_create_context is placed outside kvm_init(). After we call kvm_init(), no extra initialization step should be necessary. This patch folds kvm_create_context into it, simplifying vl.c code. Signed-off-by: Glauber Costa glom...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm.c b/qemu-kvm.c index edd400e..4d85993 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -400,6 +400,8 @@ int kvm_dirty_pages_log_reset(kvm_context_t kvm) } +static int kvm_create_context(void); + int kvm_init(int smp_cpus) { int fd; @@ -459,7 +461,7 @@ int kvm_init(int smp_cpus) } pthread_mutex_lock(qemu_mutex); - return 0; +return kvm_create_context(); out_close: close(fd); @@ -2163,7 +2165,7 @@ int kvm_arch_init_irq_routing(void) } #endif -int kvm_qemu_create_context(void) +static int kvm_create_context() { int r; diff --git a/qemu-kvm.h b/qemu-kvm.h index 6a9be12..b186c9d 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -943,7 +943,6 @@ struct kvm_pit_state { }; int kvm_main_loop(void); int kvm_qemu_init(void); -int kvm_qemu_create_context(void); int kvm_init_ap(void); int kvm_vcpu_inited(CPUState *env); void kvm_load_registers(CPUState *env); diff --git a/vl.c b/vl.c index 939da1f..c09d801 100644 --- a/vl.c +++ b/vl.c @@ -5830,13 +5830,6 @@ int main(int argc, char **argv, char **envp) if (ram_size == 0) ram_size = DEFAULT_RAM_SIZE * 1024 * 1024; -if (kvm_enabled()) { - if (kvm_qemu_create_context() 0) { - fprintf(stderr, Could not create KVM context\n); - exit(1); - } -} - #ifdef CONFIG_KQEMU /* FIXME: This is a nasty hack because kqemu can't cope with dynamic guest ram allocation. It needs to go away. */ -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] reuse env stop and stopped states
From: Glauber Costa glom...@redhat.com qemu CPUState already provides stop and stopped states. And they mean exactly that. There is no need for us to provide our own. Signed-off-by: Glauber Costa glom...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/cpu-defs.h b/cpu-defs.h index 7570096..fce366f 100644 --- a/cpu-defs.h +++ b/cpu-defs.h @@ -142,8 +142,6 @@ struct qemu_work_item; struct KVMCPUState { pthread_t thread; int signalled; -int stop; -int stopped; int created; void *vcpu_ctx; struct qemu_work_item *queued_work_first, *queued_work_last; diff --git a/qemu-kvm.c b/qemu-kvm.c index dce9d4e..fd28b39 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -91,7 +91,7 @@ static int kvm_debug(void *opaque, void *data, if (handle) { kvm_debug_cpu_requested = env; - env-kvm_cpu_state.stopped = 1; + env-stopped = 1; } return handle; } @@ -963,7 +963,7 @@ int handle_halt(kvm_vcpu_context_t vcpu) int handle_shutdown(kvm_context_t kvm, CPUState *env) { /* stop the current vcpu from going back to guest mode */ -env-kvm_cpu_state.stopped = 1; +env-stopped = 1; qemu_system_reset_request(); return 1; @@ -1748,7 +1748,7 @@ int kvm_cpu_exec(CPUState *env) static int is_cpu_stopped(CPUState *env) { -return !vm_running || env-kvm_cpu_state.stopped; +return !vm_running || env-stopped; } static void flush_queued_work(CPUState *env) @@ -1794,9 +1794,9 @@ static void kvm_main_loop_wait(CPUState *env, int timeout) cpu_single_env = env; flush_queued_work(env); -if (env-kvm_cpu_state.stop) { - env-kvm_cpu_state.stop = 0; - env-kvm_cpu_state.stopped = 1; +if (env-stop) { + env-stop = 0; + env-stopped = 1; pthread_cond_signal(qemu_pause_cond); } @@ -1808,7 +1808,7 @@ static int all_threads_paused(void) CPUState *penv = first_cpu; while (penv) { -if (penv-kvm_cpu_state.stop) +if (penv-stop) return 0; penv = (CPUState *)penv-next_cpu; } @@ -1822,11 +1822,11 @@ static void pause_all_threads(void) while (penv) { if (penv != cpu_single_env) { -penv-kvm_cpu_state.stop = 1; +penv-stop = 1; pthread_kill(penv-kvm_cpu_state.thread, SIG_IPI); } else { -penv-kvm_cpu_state.stop = 0; -penv-kvm_cpu_state.stopped = 1; +penv-stop = 0; +penv-stopped = 1; cpu_exit(penv); } penv = (CPUState *)penv-next_cpu; @@ -1843,8 +1843,8 @@ static void resume_all_threads(void) assert(!cpu_single_env); while (penv) { -penv-kvm_cpu_state.stop = 0; -penv-kvm_cpu_state.stopped = 0; +penv-stop = 0; +penv-stopped = 0; pthread_kill(penv-kvm_cpu_state.thread, SIG_IPI); penv = (CPUState *)penv-next_cpu; } @@ -2609,12 +2609,6 @@ int kvm_log_stop(target_phys_addr_t phys_addr, target_phys_addr_t len) return 0; } -void qemu_kvm_cpu_stop(CPUState *env) -{ -if (kvm_enabled()) -env-kvm_cpu_state.stopped = 1; -} - int kvm_set_boot_cpu_id(uint32_t id) { return kvm_set_boot_vcpu_id(kvm_context, id); diff --git a/vl.c b/vl.c index b3df596..6ef7690 100644 --- a/vl.c +++ b/vl.c @@ -3553,7 +3553,7 @@ void qemu_system_reset_request(void) reset_requested = 1; } if (cpu_single_env) { -qemu_kvm_cpu_stop(cpu_single_env); +cpu_single_env-stopped = 1; } qemu_notify_event(); } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] qemu-kvm: routing table update thinko fix
From: Michael S. Tsirkin m...@redhat.com When updating irq routing entries, we should memcpy the new entry over the old one. Current code gets it wrong, and only works because it's uncommon for guests to change tables. Signed-off-by: Michael S. Tsirkin m...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm.c b/qemu-kvm.c index 4d85993..9d550d3 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -1480,7 +1480,7 @@ int kvm_update_routing_entry(kvm_context_t kvm, case KVM_IRQ_ROUTING_IRQCHIP: if (e-u.irqchip.irqchip == entry-u.irqchip.irqchip e-u.irqchip.pin == entry-u.irqchip.pin) { -memcpy(e-u.irqchip, entry-u.irqchip, sizeof e-u.irqchip); +memcpy(e-u.irqchip, newentry-u.irqchip, sizeof e-u.irqchip); return 0; } break; @@ -1488,7 +1488,7 @@ int kvm_update_routing_entry(kvm_context_t kvm, if (e-u.msi.address_lo == entry-u.msi.address_lo e-u.msi.address_hi == entry-u.msi.address_hi e-u.msi.data == entry-u.msi.data) { -memcpy(e-u.msi, entry-u.msi, sizeof e-u.msi); +memcpy(e-u.msi, newentry-u.msi, sizeof e-u.msi); return 0; } break; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] hpet_reset: make it similar to upstream
From: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/hw/hpet.c b/hw/hpet.c index c1837ac..2bb2057 100644 --- a/hw/hpet.c +++ b/hw/hpet.c @@ -560,15 +560,13 @@ static void hpet_reset(void *opaque) { s-capability = 0x8086a201ULL; s-capability |= ((HPET_CLK_PERIOD) 32); s-config = 0ULL; -if (count 0) { +if (count 0) /* we don't enable pit when hpet_reset is first called (by hpet_init) * because hpet is taking over for pit here. On subsequent invocations, * hpet_reset is called due to system reset. At this point control must * be returned to pit until SW reenables hpet. */ hpet_enable_pit(); -dprintf(qemu: hpet enabled pit\n); -} count = 1; } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] remove custom qemu_select, use upstream hooks
From: Marcelo Tosatti mtosa...@redhat.com Use upstream hooks around select() to lock/unlock global mutex. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm.h b/qemu-kvm.h index ec35f29..6a9be12 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -1117,13 +1117,13 @@ static inline void kvm_load_tsc(CPUState *env) {} void kvm_mutex_unlock(void); void kvm_mutex_lock(void); -static inline void kvm_sleep_begin(void) +static inline void qemu_mutex_unlock_iothread(void) { if (kvm_enabled()) kvm_mutex_unlock(); } -static inline void kvm_sleep_end(void) +static inline void qemu_mutex_lock_iothread(void) { if (kvm_enabled()) kvm_mutex_lock(); diff --git a/vl.c b/vl.c index 6ef7690..0ee2908 100644 --- a/vl.c +++ b/vl.c @@ -289,24 +289,6 @@ static QEMUTimer *nographic_timer; uint8_t qemu_uuid[16]; -static int qemu_select(int max_fd, fd_set *rfds, fd_set *wfds, fd_set *xfds, - struct timeval *tv) -{ -int ret; - -/* KVM holds a mutex while QEMU code is running, we need hooks to - release the mutex whenever QEMU code sleeps. */ - -kvm_sleep_begin(); - -ret = select(max_fd, rfds, wfds, xfds, tv); - -kvm_sleep_end(); - -return ret; -} - - /***/ /* x86 ISA bus support */ @@ -3714,8 +3696,10 @@ void qemu_notify_event(void) } } +#ifdef KVM_UPSTREAM #define qemu_mutex_lock_iothread() do { } while (0) #define qemu_mutex_unlock_iothread() do { } while (0) +#endif void vm_stop(int reason) { @@ -4126,7 +4110,9 @@ void main_loop_wait(int timeout) slirp_select_fill(nfds, rfds, wfds, xfds); -ret = qemu_select(nfds + 1, rfds, wfds, xfds, tv); +qemu_mutex_unlock_iothread(); +ret = select(nfds + 1, rfds, wfds, xfds, tv); +qemu_mutex_lock_iothread(); if (ret 0) { IOHandlerRecord **pioh; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] fix mce check extension
From: Glauber Costa glom...@redhat.com Because a patch got into marcelo's tree before mine did, I forgot to convert one user of kvm_check_extension. Here's the fix for it. Signed-off-by: Glauber Costa glom...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index e4ae582..492dbc5 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -1338,7 +1338,7 @@ int kvm_arch_qemu_init_env(CPUState *cenv) #ifdef KVM_CAP_MCE if (((cenv-cpuid_version 8)0xF) = 6 (cenv-cpuid_features(CPUID_MCE|CPUID_MCA)) == (CPUID_MCE|CPUID_MCA) - kvm_check_extension(kvm_context, KVM_CAP_MCE) 0) { + kvm_check_extension(kvm_state, KVM_CAP_MCE) 0) { uint64_t mcg_cap; int banks; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] virtio_net_handle_rx: match upstream comment
From: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/hw/virtio-net.c b/hw/virtio-net.c index 75c9695..ce8e6cb 100644 --- a/hw/virtio-net.c +++ b/hw/virtio-net.c @@ -345,7 +345,7 @@ static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) qemu_flush_queued_packets(n-vc); /* We now have RX buffers, signal to the IO thread to break out of the - select to re-poll the tap file descriptor */ + * select to re-poll the tap file descriptor */ qemu_notify_event(); } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] fold second pass of kvm initialization
From: Glauber Costa glom...@t60.(none) There is no reason why kvm_init_ap() and friends are placed outside kvm_init(). After we call kvm_init(), no extra initialization step should be necessary. There are now no references to KVM_UPSTREAM outside of kvm*.c files Signed-off-by: Glauber Costa glom...@t60.(none) Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm.c b/qemu-kvm.c index 5e7dc0a..32dce4a 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -2111,6 +2111,8 @@ int kvm_arch_init_irq_routing(void) } #endif +extern int no_hpet; + static int kvm_create_context() { int r; @@ -2143,6 +2145,24 @@ static int kvm_create_context() return r; } +kvm_init_ap(); +if (kvm_irqchip) { +if (!qemu_kvm_has_gsi_routing()) { +irq0override = 0; +#ifdef TARGET_I386 +/* if kernel can't do irq routing, interrupt source + * override 0-2 can not be set up as required by hpet, + * so disable hpet. + */ +no_hpet=1; +} else if (!qemu_kvm_has_pit_state2()) { +no_hpet=1; +} +#else +} +#endif +} + return 0; } diff --git a/vl.c b/vl.c index 8ea0771..74a1c60 100644 --- a/vl.c +++ b/vl.c @@ -5998,28 +5998,6 @@ int main(int argc, char **argv, char **envp) module_call_init(MODULE_INIT_DEVICE); -if (kvm_enabled()) { -kvm_init_ap(); -#ifdef CONFIG_KVM -if (kvm_irqchip) { -if (!qemu_kvm_has_gsi_routing()) { -irq0override = 0; -#ifdef TARGET_I386 -/* if kernel can't do irq routing, interrupt source - * override 0-2 can not be set up as required by hpet, - * so disable hpet. - */ -no_hpet=1; -} else if (!qemu_kvm_has_pit_state2()) { -no_hpet=1; -} -#else -} -#endif -} -#endif -} - machine-init(ram_size, boot_devices, kernel_filename, kernel_cmdline, initrd_filename, cpu_model); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] vl.c: match upstream drive_init loop comment
From: Marcelo Tosatti mtosa...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/vl.c b/vl.c index 0ee2908..939da1f 100644 --- a/vl.c +++ b/vl.c @@ -5870,10 +5870,8 @@ int main(int argc, char **argv, char **envp) if (nb_drives_opt MAX_DRIVES) drive_add(NULL, SD_ALIAS); -/* open the virtual block devices - * note that migration with device - * hot add/remove is broken. - */ +/* open the virtual block devices */ + for(i = 0; i nb_drives_opt; i++) if (drive_init(drives_opt[i], snapshot, machine) == -1) exit(1); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] reuse kvm_vm_ioctl
From: Glauber Costa glom...@redhat.com Start using kvm_vm_ioctl's code. For type safety, delete vm_fd from kvm_context entirely, so the compiler can play along with us helping to detect errors I might have made. Also, we were slightly different from qemu upstream in handling error code from ioctl, since we were always testing for -1, while kvm_vm_ioctl returns -errno. We already did this in most of the call sites, so this patch has the big advantage of simplifying call sites. Diffstat says: 4 files changed, 58 insertions(+), 134 deletions(-) Signed-off-by: Glauber Costa glom...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/kvm-all.c b/kvm-all.c index 67908a7..9373d99 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -809,6 +809,7 @@ int kvm_ioctl(KVMState *s, int type, ...) return ret; } +#endif int kvm_vm_ioctl(KVMState *s, int type, ...) { @@ -827,6 +828,7 @@ int kvm_vm_ioctl(KVMState *s, int type, ...) return ret; } +#ifdef KVM_UPSTREAM int kvm_vcpu_ioctl(CPUState *env, int type, ...) { int ret; diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index d2c8abe..58ec1f2 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -40,10 +40,10 @@ int kvm_set_tss_addr(kvm_context_t kvm, unsigned long addr) r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR); if (r 0) { - r = ioctl(kvm-vm_fd, KVM_SET_TSS_ADDR, addr); - if (r == -1) { + r = kvm_vm_ioctl(kvm_state, KVM_SET_TSS_ADDR, addr); + if (r 0) { fprintf(stderr, kvm_set_tss_addr: %m\n); - return -errno; + return r; } return 0; } @@ -80,7 +80,7 @@ static int kvm_set_identity_map_addr(kvm_context_t kvm, unsigned long addr) r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_IDENTITY_MAP_ADDR); if (r 0) { - r = ioctl(kvm-vm_fd, KVM_SET_IDENTITY_MAP_ADDR, addr); + r = kvm_vm_ioctl(kvm_state, KVM_SET_IDENTITY_MAP_ADDR, addr); if (r == -1) { fprintf(stderr, kvm_set_identity_map_addr: %m\n); return -errno; @@ -123,7 +123,7 @@ static int kvm_create_pit(kvm_context_t kvm) if (!kvm-no_pit_creation) { r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_PIT); if (r 0) { - r = ioctl(kvm-vm_fd, KVM_CREATE_PIT); + r = kvm_vm_ioctl(kvm_state, KVM_CREATE_PIT); if (r = 0) kvm-pit_in_kernel = 1; else { @@ -256,7 +256,6 @@ int kvm_create_memory_alias(kvm_context_t kvm, .memory_size = len, .target_phys_addr = target_phys, }; - int fd = kvm-vm_fd; int r; int slot; @@ -267,7 +266,7 @@ int kvm_create_memory_alias(kvm_context_t kvm, return -EBUSY; alias.slot = slot; - r = ioctl(fd, KVM_SET_MEMORY_ALIAS, alias); + r = kvm_vm_ioctl(kvm_state, KVM_SET_MEMORY_ALIAS, alias); if (r == -1) return -errno; @@ -314,55 +313,31 @@ int kvm_set_lapic(kvm_vcpu_context_t vcpu, struct kvm_lapic_state *s) int kvm_get_pit(kvm_context_t kvm, struct kvm_pit_state *s) { - int r; if (!kvm-pit_in_kernel) return 0; - r = ioctl(kvm-vm_fd, KVM_GET_PIT, s); - if (r == -1) { - r = -errno; - perror(kvm_get_pit); - } - return r; + return kvm_vm_ioctl(kvm_state, KVM_GET_PIT, s); } int kvm_set_pit(kvm_context_t kvm, struct kvm_pit_state *s) { - int r; if (!kvm-pit_in_kernel) return 0; - r = ioctl(kvm-vm_fd, KVM_SET_PIT, s); - if (r == -1) { - r = -errno; - perror(kvm_set_pit); - } - return r; + return kvm_vm_ioctl(kvm_state, KVM_SET_PIT, s); } #ifdef KVM_CAP_PIT_STATE2 int kvm_get_pit2(kvm_context_t kvm, struct kvm_pit_state2 *ps2) { - int r; if (!kvm-pit_in_kernel) return 0; - r = ioctl(kvm-vm_fd, KVM_GET_PIT2, ps2); - if (r == -1) { - r = -errno; - perror(kvm_get_pit2); - } - return r; + return kvm_vm_ioctl(kvm_state, KVM_GET_PIT2, ps2); } int kvm_set_pit2(kvm_context_t kvm, struct kvm_pit_state2 *ps2) { - int r; if (!kvm-pit_in_kernel) return 0; - r = ioctl(kvm-vm_fd, KVM_SET_PIT2, ps2); - if (r == -1) { - r = -errno; - perror(kvm_set_pit2); - } - return r; + return kvm_vm_ioctl(kvm_state, KVM_SET_PIT2, ps2); } #endif @@ -627,10 +602,10 @@ int kvm_set_shadow_pages(kvm_context_t kvm, unsigned int nrshadow_pages) r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_MMU_SHADOW_CACHE_CONTROL); if
[COMMIT master] kvm-kmod: Fix including of arch/*/kvm/trace.h
From: Jan Kiszka jan.kis...@web.de When building against a split kernel, we have to add its source path to the include path. Otherwise arch/*/kvm/trace.h cannot be found. Signed-off-by: Jan Kiszka jan.kis...@siemens.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/Makefile b/Makefile index ad08c45..4c813a6 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,7 @@ all:: prerequisite $(MAKE) -C $(KERNELDIR) M=`pwd` \ LINUXINCLUDE=-I`pwd`/include -Iinclude \ $(if $(KERNELSOURCEDIR),\ - -Iinclude2 -I$(KERNELSOURCEDIR)/include -I$(KERNELSOURCEDIR)/arch/${ARCH_DIR}/include, \ + -Iinclude2 -I$(KERNELSOURCEDIR)/include -I$(KERNELSOURCEDIR)/arch/${ARCH_DIR}/include -I$(KERNELSOURCEDIR), \ -Iarch/${ARCH_DIR}/include) -I`pwd`/include-compat \ -include include/linux/autoconf.h \ -include `pwd`/$(ARCH_DIR)/external-module-compat.h $(module_defines) \ -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: silence lapic kernel messages that can be triggered by a guest
From: Gleb Natapov g...@redhat.com Some Linux versions (f8) try to read EOI register that is write only. Signed-off-by: Gleb Natapov g...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e2e2849..6c3cd2c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -594,14 +594,14 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len, static const u64 rmask = 0x43ff01ffe70cULL; if ((alignment + len) 4) { - printk(KERN_ERR KVM_APIC_READ: alignment error %x %d\n, - offset, len); + apic_debug(KVM_APIC_READ: alignment error %x %d\n, + offset, len); return 1; } if (offset 0x3f0 || !(rmask (1ULL (offset 4 { - printk(KERN_ERR KVM_APIC_READ: read reserved register %x\n, - offset); + apic_debug(KVM_APIC_READ: read reserved register %x\n, + offset); return 1; } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Discard unnecessary kvm_mmu_flush_tlb() in kvm_mmu_load()
From: Sheng Yang sh...@linux.intel.com set_cr3() should already cover the TLB flushing. Signed-off-by: Sheng Yang sh...@linux.intel.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7162651..5812812 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2349,8 +2349,8 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) spin_unlock(vcpu-kvm-mmu_lock); if (r) goto out; + /* set_cr3() should ensure TLB has been flushed */ kvm_x86_ops-set_cr3(vcpu, vcpu-arch.mmu.root_hpa); - kvm_mmu_flush_tlb(vcpu); out: return r; } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: s390: remove unused structs
From: Gleb Natapov g...@redhat.com They are not used by common code without defines which s390 does not have. Signed-off-by: Gleb Natapov g...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h index 0b2f829..3dfcaeb 100644 --- a/arch/s390/include/asm/kvm.h +++ b/arch/s390/include/asm/kvm.h @@ -15,15 +15,6 @@ */ #include linux/types.h -/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */ -struct kvm_pic_state { - /* no PIC for s390 */ -}; - -struct kvm_ioapic_state { - /* no IOAPIC for s390 */ -}; - /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { /* general purpose regs for s390 */ -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: fix kvm_apic_accept_irq tracepoint dest mode parameter
From: Gleb Natapov g...@redhat.com Switch dm parameter to u16 and use __print_symbolic. Signed-off-by: Gleb Natapov g...@redhat.com Signed-off-by: Gleb Natapov g...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 134bc63..0d480e7 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -323,12 +323,12 @@ TRACE_EVENT(kvm_apic_ipi, ); TRACE_EVENT(kvm_apic_accept_irq, - TP_PROTO(__u32 apicid, __u8 dm, __u8 tm, __u8 vec, bool coalesced), + TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec, bool coalesced), TP_ARGS(apicid, dm, tm, vec, coalesced), TP_STRUCT__entry( __field(__u32, apicid ) - __field(__u8, dm ) + __field(__u16, dm ) __field(__u8, tm ) __field(__u8, vec ) __field(bool, coalesced ) @@ -344,7 +344,7 @@ TRACE_EVENT(kvm_apic_accept_irq, TP_printk(apicid %x vec %u (%s|%s)%s, __entry-apicid, __entry-vec, - (__entry-dm == APIC_DM_LOWEST) ? lowest:fixed, + __print_symbolic((__entry-dm 8 0x7), kvm_deliver_mode), __entry-tm ? level : edge, __entry-coalesced ? (coalesced) : ) ); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: s390: fix wait_queue handling
From: Christian Borntraeger borntrae...@de.ibm.com There are two waitqueues in kvm for wait handling: vcpu-wq for virt/kvm/kvm_main.c and vpcu-arch.local_int.wq for the s390 specific wait code. the wait handling in kvm_s390_handle_wait was broken by using different wait_queues for add_wait queue and remove_wait_queue. There are two options to fix the problem: o move all the s390 specific code to vcpu-wq and remove vcpu-arch.local_int.wq o move all the s390 specific code to vcpu-arch.local_int.wq This patch chooses the 2nd variant for two reasons: o s390 does not use kvm_vcpu_block but implements its own enabled wait handling. Having a separate wait_queue make it clear, that our wait mechanism is different o the patch is much smaller Report-by: Julia Lawall ju...@diku.dk Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5f2e144..2c2f983 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -380,7 +380,7 @@ no_timer: } __unset_cpu_idle(vcpu); __set_current_state(TASK_RUNNING); - remove_wait_queue(vcpu-wq, wait); + remove_wait_queue(vcpu-arch.local_int.wq, wait); spin_unlock_bh(vcpu-arch.local_int.lock); spin_unlock(vcpu-arch.local_int.float_int-lock); hrtimer_try_to_cancel(vcpu-arch.ckc_timer); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: x86: use kvm_get_gdt() and kvm_read_ldt()
From: Akinobu Mita akinobu.m...@gmail.com Use kvm_get_gdt() and kvm_read_ldt() to reduce inline assembly code. Cc: Avi Kivity a...@redhat.com Cc: k...@vger.kernel.org Signed-off-by: Akinobu Mita akinobu.m...@gmail.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index fc14bdf..18085d3 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -291,7 +291,7 @@ static void svm_hardware_enable(void *garbage) struct svm_cpu_data *svm_data; uint64_t efer; - struct desc_ptr gdt_descr; + struct descriptor_table gdt_descr; struct desc_struct *gdt; int me = raw_smp_processor_id(); @@ -311,8 +311,8 @@ static void svm_hardware_enable(void *garbage) svm_data-max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; svm_data-next_asid = svm_data-max_asid + 1; - asm volatile (sgdt %0 : =m(gdt_descr)); - gdt = (struct desc_struct *)gdt_descr.address; + kvm_get_gdt(gdt_descr); + gdt = (struct desc_struct *)gdt_descr.base; svm_data-tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); rdmsrl(MSR_EFER, efer); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0ebd684..18ce27f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -132,13 +132,12 @@ unsigned long segment_base(u16 selector) if (selector == 0) return 0; - asm(sgdt %0 : =m(gdt)); + kvm_get_gdt(gdt); table_base = gdt.base; if (selector 4) { /* from ldt */ - u16 ldt_selector; + u16 ldt_selector = kvm_read_ldt(); - asm(sldt %0 : =g(ldt_selector)); table_base = segment_base(ldt_selector); } d = (struct desc_struct *)(table_base + (selector ~7)); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: x86: use get_desc_base() and get_desc_limit()
From: Akinobu Mita akinobu.m...@gmail.com Use get_desc_base() and get_desc_limit() to get the base address and limit in desc_struct. Cc: Avi Kivity a...@redhat.com Cc: k...@vger.kernel.org Signed-off-by: Akinobu Mita akinobu.m...@gmail.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 48567fa..0ebd684 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -142,8 +142,7 @@ unsigned long segment_base(u16 selector) table_base = segment_base(ldt_selector); } d = (struct desc_struct *)(table_base + (selector ~7)); - v = d-base0 | ((unsigned long)d-base1 16) | - ((unsigned long)d-base2 24); + v = get_desc_base(d); #ifdef CONFIG_X86_64 if (d-s == 0 (d-type == 2 || d-type == 9 || d-type == 11)) v |= ((unsigned long)((struct ldttss_desc64 *)d)-base3) 32; @@ -3943,11 +3942,8 @@ static void kvm_set_segment(struct kvm_vcpu *vcpu, static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, struct kvm_segment *kvm_desct) { - kvm_desct-base = seg_desc-base0; - kvm_desct-base |= seg_desc-base1 16; - kvm_desct-base |= seg_desc-base2 24; - kvm_desct-limit = seg_desc-limit0; - kvm_desct-limit |= seg_desc-limit 16; + kvm_desct-base = get_desc_base(seg_desc); + kvm_desct-limit = get_desc_limit(seg_desc); if (seg_desc-g) { kvm_desct-limit = 12; kvm_desct-limit |= 0xfff; @@ -4026,11 +4022,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, struct desc_struct *seg_desc) { - u32 base_addr; - - base_addr = seg_desc-base0; - base_addr |= (seg_desc-base1 16); - base_addr |= (seg_desc-base2 24); + u32 base_addr = get_desc_base(seg_desc); return vcpu-arch.mmu.gva_to_gpa(vcpu, base_addr); } @@ -4319,7 +4311,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) } } - if (!nseg_desc.p || (nseg_desc.limit0 | nseg_desc.limit 16) 0x67) { + if (!nseg_desc.p || get_desc_limit(nseg_desc) 0x67) { kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector 0xfffc); return 1; } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: PIT: Unregister ack notifier callback when freeing
From: Gleb Natapov g...@redhat.com Signed-off-by: Gleb Natapov g...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 137e548..472653c 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -672,6 +672,8 @@ void kvm_free_pit(struct kvm *kvm) if (kvm-arch.vpit) { kvm_unregister_irq_mask_notifier(kvm, 0, kvm-arch.vpit-mask_notifier); + kvm_unregister_irq_ack_notifier(kvm, + kvm-arch.vpit-pit_state.irq_ack_notifier); mutex_lock(kvm-arch.vpit-pit_state.lock); timer = kvm-arch.vpit-pit_state.pit_timer.timer; hrtimer_cancel(timer); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: VMX: Introduce KVM_SET_IDENTITY_MAP_ADDR ioctl
From: Sheng Yang sh...@linux.intel.com Now KVM allow guest to modify guest's physical address of EPT's identity mapping page. (change from v1, discard unnecessary check, change ioctl to accept parameter address rather than value) Signed-off-by: Sheng Yang sh...@linux.intel.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 08732d7..e210b21 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -411,6 +411,7 @@ struct kvm_arch{ struct page *ept_identity_pagetable; bool ept_identity_pagetable_done; + gpa_t ept_identity_map_addr; unsigned long irq_sources_bitmap; unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3a75db3..4ffc4c3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1719,7 +1719,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) eptp = construct_eptp(cr3); vmcs_write64(EPT_POINTER, eptp); guest_cr3 = is_paging(vcpu) ? vcpu-arch.cr3 : - VMX_EPT_IDENTITY_PAGETABLE_ADDR; + vcpu-kvm-arch.ept_identity_map_addr; } vmx_flush_tlb(vcpu); @@ -2122,7 +2122,7 @@ static int init_rmode_identity_map(struct kvm *kvm) if (likely(kvm-arch.ept_identity_pagetable_done)) return 1; ret = 0; - identity_map_pfn = VMX_EPT_IDENTITY_PAGETABLE_ADDR PAGE_SHIFT; + identity_map_pfn = kvm-arch.ept_identity_map_addr PAGE_SHIFT; r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); if (r 0) goto out; @@ -2191,7 +2191,8 @@ static int alloc_identity_pagetable(struct kvm *kvm) goto out; kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT; kvm_userspace_mem.flags = 0; - kvm_userspace_mem.guest_phys_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; + kvm_userspace_mem.guest_phys_addr = + kvm-arch.ept_identity_map_addr; kvm_userspace_mem.memory_size = PAGE_SIZE; r = __kvm_set_memory_region(kvm, kvm_userspace_mem, 0); if (r) @@ -3814,9 +3815,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (alloc_apic_access_page(kvm) != 0) goto free_vmcs; - if (enable_ept) + if (enable_ept) { + if (!kvm-arch.ept_identity_map_addr) + kvm-arch.ept_identity_map_addr = + VMX_EPT_IDENTITY_PAGETABLE_ADDR; if (alloc_identity_pagetable(kvm) != 0) goto free_vmcs; + } return vmx-vcpu; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 18ce27f..2539e9a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1206,6 +1206,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IOEVENTFD: case KVM_CAP_PIT2: case KVM_CAP_PIT_STATE2: + case KVM_CAP_SET_IDENTITY_MAP_ADDR: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -1906,6 +1907,13 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr) return ret; } +static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm, + u64 ident_addr) +{ + kvm-arch.ept_identity_map_addr = ident_addr; + return 0; +} + static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, u32 kvm_nr_mmu_pages) { @@ -2169,6 +2177,17 @@ long kvm_arch_vm_ioctl(struct file *filp, if (r 0) goto out; break; + case KVM_SET_IDENTITY_MAP_ADDR: { + u64 ident_addr; + + r = -EFAULT; + if (copy_from_user(ident_addr, argp, sizeof ident_addr)) + goto out; + r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr); + if (r 0) + goto out; + break; + } case KVM_SET_MEMORY_REGION: { struct kvm_memory_region kvm_mem; struct kvm_userspace_memory_region kvm_userspace_mem; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 230a91a..f8f8900 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -435,6 +435,7 @@ struct kvm_ioeventfd { #define KVM_CAP_PIT_STATE2 35 #endif #define KVM_CAP_IOEVENTFD 36 +#define KVM_CAP_SET_IDENTITY_MAP_ADDR 37 #ifdef KVM_CAP_IRQ_ROUTING @@ -512,6 +513,7 @@ struct kvm_irqfd { #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\ struct kvm_userspace_memory_region) #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) +#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) /* Device model IOC */
[COMMIT master] KVM: MMU: handle n_free_mmu_pages n_alloc_mmu_pages in kvm_mmu_change_mmu_pages
From: Marcelo Tosatti mtosa...@redhat.com kvm_mmu_change_mmu_pages mishandles the case where n_alloc_mmu_pages is smaller then n_free_mmu_pages, by not checking if the result of the subtraction is negative. Its a valid condition which can happen if a large number of pages has been recently freed. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 53c1d2c..9abea8e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1418,24 +1418,25 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) */ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) { + int used_pages; + + used_pages = kvm-arch.n_alloc_mmu_pages - kvm-arch.n_free_mmu_pages; + used_pages = max(0, used_pages); + /* * If we set the number of mmu pages to be smaller be than the * number of actived pages , we must to free some mmu pages before we * change the value */ - if ((kvm-arch.n_alloc_mmu_pages - kvm-arch.n_free_mmu_pages) - kvm_nr_mmu_pages) { - int n_used_mmu_pages = kvm-arch.n_alloc_mmu_pages - - kvm-arch.n_free_mmu_pages; - - while (n_used_mmu_pages kvm_nr_mmu_pages) { + if (used_pages kvm_nr_mmu_pages) { + while (used_pages kvm_nr_mmu_pages) { struct kvm_mmu_page *page; page = container_of(kvm-arch.active_mmu_pages.prev, struct kvm_mmu_page, link); kvm_mmu_zap_page(kvm, page); - n_used_mmu_pages--; + used_pages--; } kvm-arch.n_free_mmu_pages = 0; } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: SVM: force new asid on vcpu migration
From: Marcelo Tosatti mtosa...@redhat.com If a migrated vcpu matches the asid_generation value of the target pcpu, there will be no TLB flush via TLB_CONTROL_FLUSH_ALL_ASID. The check for vcpu.cpu in pre_svm_run is meaningless since svm_vcpu_load already updated it on schedule in. Such vcpu will VMRUN with stale TLB entries. Based on original patch from Joerg Roedel (http://patchwork.kernel.org/patch/10021/) Signed-off-by: Marcelo Tosatti mtosa...@redhat.com Acked-by: Joerg Roedel joerg.roe...@amd.com diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 18085d3..b720b02 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -739,6 +739,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) svm-vmcb-control.tsc_offset += delta; vcpu-cpu = cpu; kvm_migrate_timers(vcpu); + svm-asid_generation = 0; } for (i = 0; i NR_HOST_SAVE_USER_MSRS; i++) @@ -1071,7 +1072,6 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data) svm-vmcb-control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; } - svm-vcpu.cpu = svm_data-cpu; svm-asid_generation = svm_data-asid_generation; svm-vmcb-control.asid = svm_data-next_asid++; } @@ -2320,8 +2320,8 @@ static void pre_svm_run(struct vcpu_svm *svm) struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); svm-vmcb-control.tlb_ctl = TLB_CONTROL_DO_NOTHING; - if (svm-vcpu.cpu != cpu || - svm-asid_generation != svm_data-asid_generation) + /* FIXME: handle wraparound of asid_generation */ + if (svm-asid_generation != svm_data-asid_generation) new_asid(svm, svm_data); } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Drop obsolete cpu_get/put in make_all_cpus_request
From: Jan Kiszka jan.kis...@siemens.com spin_lock disables preemption, so we can simply read the current cpu. Signed-off-by: Jan Kiszka jan.kis...@siemens.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7cd1c10..98e4ec8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -741,8 +741,8 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) if (alloc_cpumask_var(cpus, GFP_ATOMIC)) cpumask_clear(cpus); - me = get_cpu(); spin_lock(kvm-requests_lock); + me = smp_processor_id(); kvm_for_each_vcpu(i, vcpu, kvm) { if (test_and_set_bit(req, vcpu-requests)) continue; @@ -757,7 +757,6 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) else called = false; spin_unlock(kvm-requests_lock); - put_cpu(); free_cpumask_var(cpus); return called; } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: VMX: Avoid to return ENOTSUPP to userland
From: Jan Kiszka jan.kis...@web.de Choose some allowed error values for the cases VMX returned ENOTSUPP so far as these values could be returned by the KVM_RUN IOCTL. Signed-off-by: Jan Kiszka jan.kis...@siemens.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a42d604..857b7ce 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3133,7 +3133,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) printk(KERN_ERR Fail to handle apic access vmexit! Offset is 0x%lx\n, offset); - return -ENOTSUPP; + return -ENOEXEC; } return 1; } @@ -3202,7 +3202,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (exit_qualification (1 6)) { printk(KERN_ERR EPT: GPA exceeds GAW!\n); - return -ENOTSUPP; + return -EINVAL; } gla_validity = (exit_qualification 7) 0x3; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] Fix build with -disable-kvm due to recent upstream compatibility work
From: Avi Kivity a...@redhat.com We now need kvm_context_t, kvm_allowed, and a few functions exposed even with kvm disabled. Signed-off-by: Avi Kivity a...@redhat.com diff --git a/qemu-kvm.h b/qemu-kvm.h index b186c9d..eb48ff8 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -936,8 +936,19 @@ uint32_t kvm_get_supported_cpuid(kvm_context_t kvm, uint32_t function, int reg); #else /* !CONFIG_KVM */ +typedef struct kvm_context *kvm_context_t; +typedef struct kvm_vcpu_context *kvm_vcpu_context_t; + struct kvm_pit_state { }; +static inline int kvm_init(int smp_cpus) { return 0; } +static inline void kvm_inject_x86_mce( +CPUState *cenv, int bank,uint64_t status, +uint64_t mcg_status, uint64_t addr, uint64_t misc) { } + + +extern int kvm_allowed; + #endif /* !CONFIG_KVM */ @@ -1167,6 +1178,8 @@ static inline int kvm_set_migration_log(int enable) return kvm_physical_memory_set_dirty_tracking(enable); } +#ifdef CONFIG_KVM + typedef struct KVMSlot { target_phys_addr_t start_addr; @@ -1199,3 +1212,5 @@ int kvm_vm_ioctl(KVMState *s, int type, ...); int kvm_check_extension(KVMState *s, unsigned int ext); #endif + +#endif -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: NMI Injection to Guest
On Sat, Jul 25, 2009 at 10:46:39PM +0200, Jiaqing Du wrote: Hi list, I'm trying to extend OProfile to support guest profiling. One step of my work is to push an NMI to the guest(s) when a performance counter overflows. Please correct me if the following is not correct: counter overflow -- NMI to host -- VM exit -- int $2 to handle NMI on host -- ... -- VM entry -- NMI to guest Correct except the last step (-- NMI to guest). Host nmi is not propagated to guests. On the path between VM-exit and VM-entry, I want to push an NMI to the guest. I tried to put the following code on the path, but never succeeded. Various wired things happened, such as KVM hangs, guest kernel oops, and host hangs. I tried both code with Linux 2.6.30 and version 88. if (vmx_nmi_allowed()) { vmx_inject_nmi(); } Any suggestions? Where is the right place to push an NMI and what are the necessary checks? Call kvm_inject_nmi(vcpu). And don't forget to vcpu_load(vcpu) before doing it. See kvm_vcpu_ioctl_nmi(). -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Host latency peaks due to kvm-intel
Do not meddle in the internals of kernels, for they are subtle and quick to panic. Also the kvm code. Are you sure that the processor supports KVM Extension. I know of a lot of intel architectures where KVM is not support. Especially the HW_CHECK_SUM. Might not be sure, but this sure seems an problem. Also there is no dependency check with the KVM on Linux. What I mean by this is that KVM Install on an Architecture that donot support the extension without problem. So compiling KVM alone does not mean it works on an architecture. -- -- Sujit K M blog(http://kmsujit.blogspot.com/) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Very high memory usage with KVM
On 07/25/2009 08:43 PM, Daniel Bareiro wrote: Hi all! I have an installation with Ubuntu Hardy Heron server amd64 with KVM-62 from Ubuntu repositories installed on an HP Proliant DL380 G5 with two Xeon E5405 quadcore processors and 16 GiB of RAM which has six VMs with the following configuration of memory: Hostname | RAM ===+=== Ganimedes |2 GiB Os |1 GiB Aprender |2 GiB Aps0 |2 GiB Aps2 |4 GiB Ratatoskr |4 GiB ===+=== TOTAL | 15 GiB Initially the host was created with a swap partition of 1 GiB (more 1 GiB than was free for use of host) but this amount with the time remained short and I had to add a LV of 7 GiB to be used with swap, being now a total of 8 GiB of swap of which at this moment I have only a 9% free. Is 'normal' this use of memory? r...@ss02:~# ps -e --sort -rss -Ho user,start_time,pid,pcpu,pmem,rss,size,vsz,args USER START PID %CPU %MEM RSSSZVSZ COMMAND [...] root Jul06 27471 52.3 24.4 4023232 4292200 4350296 kvmratatoskr root Jul24 9955 137 23.8 3923620 4308592 4350308 kvmaps2 root Jul06 8751 5.8 8.3 1368228 2171808 2229888 kvmaps0 root Jul07 8565 2.7 5.2 862844 2204704 2246416 kvmaprender root Apr22 7842 0.6 3.6 600072 2172056 2230136 kvmganimedes root Jul01 7944 0.6 2.0 334860 1119916 1177996 kvmos r...@ss02:~# free total used free sharedbuffers cached Mem: 16463388 16377844 85544 0 894216 66328 -/+ buffers/cache: 154173001046088 Swap: 83199487621916 698032 Updating to KVM-84 or superior can improve this situation? What is the storage configuration? Are you using qcow2? What are the image logical and physical sizes? What is the host kernel (uname -a)? -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM bug
On 07/23/2009 11:59 PM, Saksena, Abhishek wrote: Hi I am trying to boot a patched version of Boch's BIOS on KVM. It works fine with Qemu with -no-kvm option. However I get following with KVM unhandled vm exit: 0x8021 vcpu_id 0 ds 88f9 (00088f9b/ p 1 dpl 3 db 0 s 1 type 3 l 0 g 0 avl 0) When running on Intel we emulate real mode using vm86 mode. This mode requires ds.base == ds.selector 4, which isn't the case here. You can modify the code to satisfy this requirement, or you can try the emulate_invalid_guest_state=1 module parameter (which will likely fail since it is not completely implemented). -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Write bit in Shadow Page Table Entry
On 07/23/2009 11:49 PM, Lynda Yang wrote: Hi, As I understand, the KVM code (kvm-86) may clear the write bit when setting a shadow page table entry so that it can detect when an entry needs to be marked dirty later. However, it also plays with the write bit depending on whether the shadow page is allowed to be unsynched or not. I'm not quite clear on the latter, so if anyone can provide some insights it would be very much appreciated. Or perhaps even more helpful...if it is possible to provide a clear picture of how KVM generally plays with an entry's write bit. Let's see. 1. If the spte was derived from a guest pte, then the writeable bit reflects the guest permissions. If not (nested paging, real mode), the writeable bit is 1. 2. When we log dirty pages, writeable bits for the memory we are interested in are cleared. 3. If the spte points at a shadow page, the writeable bit is cleared to 0 so that we are informed of updates to page tables. 4. Under certain conditions[1], we allow a shadowed guest page table to be writeable. This happens on a write fault to a guest page table. [1] The conditions are: the page is the lowest-level mapping, and there are no other uses of the page as a paging element. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Fwd: buildbot failure in qemu-kvm on disable_kvm_i386_centos_5_2
Hi Avi, the last push broke --disable-kvm. Further build-regression will be send directly to kvm@vger.kernel.org, if no one complains. Best Regards, Daniel -- Forwarded Message -- Subject: buildbot failure in qemu-kvm on disable_kvm_i386_centos_5_2 Date: Sunday 26 July 2009 From: qemu-...@buildbot.b1-systems.de To: bere...@b1-systems.de, gol...@b1-systems.de The Buildbot has detected a new failure of disable_kvm_i386_centos_5_2 on qemu-kvm. Full details are available at: http://buildbot.b1-systems.de/qemu- kvm/builders/disable_kvm_i386_centos_5_2/builds/9 Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/ Buildslave for this Build: avi-kvmbot1 Build Reason: Build Source Stamp: [branch master] HEAD Blamelist: Amit Shah amit.s...@redhat.com,Dor Laor dl...@redhat.com,Glauber Costa glom...@redhat.com,Glauber Costa glom...@t60.(none),Huang Ying ying.hu...@intel.com,Marcelo Tosatti mtosa...@redhat.com,Michael S. Tsirkin m...@redhat.com,Sheng Yang sh...@linux.intel.com BUILD FAILED: failed compile sincerely, -The Buildbot --- -- Daniel GollubGeschaeftsfuehrer: Ralph Dehner FOSS Developer Unternehmenssitz: Vohburg B1 Systems GmbH Amtsgericht: Ingolstadt Mobil: +49-(0)-160 47 73 970 Handelsregister: HRB 3537 EMail: gol...@b1-systems.de http://www.b1-systems.de Adresse: B1 Systems GmbH, Osterfeldstraße 7, 85088 Vohburg http://pgpkeys.pca.dfn.de/pks/lookup?op=getsearch=0xED14B95C2F8CA78D The Buildbot has detected a new failure of disable_kvm_i386_centos_5_2 on qemu-kvm. Full details are available at: http://buildbot.b1-systems.de/qemu-kvm/builders/disable_kvm_i386_centos_5_2/builds/9 Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/ Buildslave for this Build: avi-kvmbot1 Build Reason: Build Source Stamp: [branch master] HEAD Blamelist: Amit Shah amit.s...@redhat.com,Dor Laor dl...@redhat.com,Glauber Costa glom...@redhat.com,Glauber Costa glom...@t60.(none),Huang Ying ying.hu...@intel.com,Marcelo Tosatti mtosa...@redhat.com,Michael S. Tsirkin m...@redhat.com,Sheng Yang sh...@linux.intel.com BUILD FAILED: failed compile sincerely, -The Buildbot signature.asc Description: This is a digitally signed message part.
Re: Fwd: buildbot failure in qemu-kvm on disable_kvm_i386_centos_5_2
On 07/26/2009 02:51 PM, Daniel Gollub wrote: Hi Avi, the last push broke --disable-kvm. Can you set up testing for the 'next' branch? It will catch issues much earlier. Further build-regression will be send directly to kvm@vger.kernel.org, if no one complains. I will complain if it isn't! Thanks for setting up buildbot, it's nice to have feedback a few minutes after pushing. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Fwd: buildbot failure in qemu-kvm on disable_kvm_i386_centos_5_2
Hi Avi, On Sunday 26 July 2009 02:01:18 pm Avi Kivity wrote: [...] Can you set up testing for the 'next' branch? It will catch issues much earlier. Actutally i thought it is. But for some reason it is not getting notified by the git hook. Could you ping me on IRC before you push something (only) to next-branch, so i can have a look at the buildbot logs ... Further build-regression will be send directly to kvm@vger.kernel.org, if no one complains. I will complain if it isn't! [...] Ok, cool. It's set. Best Regards, Daniel -- Daniel GollubGeschaeftsfuehrer: Ralph Dehner FOSS Developer Unternehmenssitz: Vohburg B1 Systems GmbH Amtsgericht: Ingolstadt Mobil: +49-(0)-160 47 73 970 Handelsregister: HRB 3537 EMail: gol...@b1-systems.de http://www.b1-systems.de Adresse: B1 Systems GmbH, Osterfeldstraße 7, 85088 Vohburg http://pgpkeys.pca.dfn.de/pks/lookup?op=getsearch=0xED14B95C2F8CA78D signature.asc Description: This is a digitally signed message part.
Re: Fwd: buildbot failure in qemu-kvm on disable_kvm_i386_centos_5_2
On 07/26/2009 02:51 PM, Daniel Gollub wrote: Hi Avi, the last push broke --disable-kvm. Further build-regression will be send directly to kvm@vger.kernel.org, if no one complains. I see that a bunch of slaves are not connected. Can you check what when wrong? -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Fwd: buildbot failure in qemu-kvm on disable_kvm_i386_centos_5_2
On Sunday 26 July 2009 02:19:02 pm Avi Kivity wrote: Further build-regression will be send directly to kvm@vger.kernel.org, if no one complains. I see that a bunch of slaves are not connected. Can you check what when wrong? We had to move our buildslaves to a different box. Currently only your buildslave is active. I hope to get the others back online within the next hours.. Will later request on the list for more buildslaves instances. Best Regards, Daniel -- Daniel GollubGeschaeftsfuehrer: Ralph Dehner FOSS Developer Unternehmenssitz: Vohburg B1 Systems GmbH Amtsgericht: Ingolstadt Mobil: +49-(0)-160 47 73 970 Handelsregister: HRB 3537 EMail: gol...@b1-systems.de http://www.b1-systems.de Adresse: B1 Systems GmbH, Osterfeldstraße 7, 85088 Vohburg http://pgpkeys.pca.dfn.de/pks/lookup?op=getsearch=0xED14B95C2F8CA78D signature.asc Description: This is a digitally signed message part.
[PATCHv3 0/2] virtio: find_vqs/del_vqs fixes
Here's a patch series to fix known regressions in virtio_pci, by refactoring code along the lines suggested by Rusty. This is on top of patch virtio: fix memory leak on device removal that has been applied by Rusty. This supercedes patches: [PATCHv3] virtio: recover from vector assignment failure [PATCHv2] virtio: fix double free_irq on device removal Michael S. Tsirkin (2): virtio: delete vq from list virtio: refactor find_vqs drivers/virtio/virtio_pci.c | 218 --- 1 files changed, 124 insertions(+), 94 deletions(-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv3 1/2] virtio: delete vq from list
This makes delete vq the reverse of find vq. This is required to make it possible to retry find_vqs after a failure, otherwise the list gets corrupted. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- drivers/virtio/virtio_pci.c |6 +- 1 files changed, 5 insertions(+), 1 deletions(-) diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 7e21389..2eaf1fb 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -464,7 +464,11 @@ static void vp_del_vq(struct virtqueue *vq) { struct virtio_pci_device *vp_dev = to_vp_device(vq-vdev); struct virtio_pci_vq_info *info = vq-priv; - unsigned long size; + unsigned long flags, size; + +spin_lock_irqsave(vp_dev-lock, flags); +list_del(info-node); +spin_unlock_irqrestore(vp_dev-lock, flags); iowrite16(info-queue_index, vp_dev-ioaddr + VIRTIO_PCI_QUEUE_SEL); -- 1.6.2.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv3 2/2] virtio: refactor find_vqs
This refactors find_vqs, making it more readable and robust, and fixing two regressions from 2.6.30: - double free_irq causing BUG_ON on device removal - probe failure when vq can't be assigned to msi-x vector (reported on old host kernels) An older version of this patch was tested by Amit Shah. Reported-by: Amit Shah amit.s...@redhat.com Signed-off-by: Michael S. Tsirkin m...@redhat.com --- drivers/virtio/virtio_pci.c | 212 --- 1 files changed, 119 insertions(+), 93 deletions(-) diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 2eaf1fb..3ad47da 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -52,8 +52,10 @@ struct virtio_pci_device char (*msix_names)[256]; /* Number of available vectors */ unsigned msix_vectors; - /* Vectors allocated */ + /* Vectors allocated, excluding per-vq vectors if any */ unsigned msix_used_vectors; + /* Whether we have vector per vq */ + bool per_vq_vectors; }; /* Constants for MSI-X */ @@ -278,27 +280,24 @@ static void vp_free_vectors(struct virtio_device *vdev) vp_dev-msix_entries = NULL; } -static int vp_enable_msix(struct pci_dev *dev, struct msix_entry *entries, - int *options, int noptions) -{ - int i; - for (i = 0; i noptions; ++i) - if (!pci_enable_msix(dev, entries, options[i])) - return options[i]; - return -EBUSY; -} - -static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs) +static int vp_request_vectors(struct virtio_device *vdev, int nvectors, + bool per_vq_vectors) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); const char *name = dev_name(vp_dev-vdev.dev); unsigned i, v; int err = -ENOMEM; - /* We want at most one vector per queue and one for config changes. -* Fallback to separate vectors for config and a shared for queues. -* Finally fall back to regular interrupts. */ - int options[] = { max_vqs + 1, 2 }; - int nvectors = max(options[0], options[1]); + + if (!nvectors) { + /* Can't allocate MSI-X vectors, use regular interrupt */ + vp_dev-msix_vectors = 0; + err = request_irq(vp_dev-pci_dev-irq, vp_interrupt, + IRQF_SHARED, name, vp_dev); + if (err) + return err; + vp_dev-intx_enabled = 1; + return 0; + } vp_dev-msix_entries = kmalloc(nvectors * sizeof *vp_dev-msix_entries, GFP_KERNEL); @@ -312,41 +311,34 @@ static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs) for (i = 0; i nvectors; ++i) vp_dev-msix_entries[i].entry = i; - err = vp_enable_msix(vp_dev-pci_dev, vp_dev-msix_entries, -options, ARRAY_SIZE(options)); - if (err 0) { - /* Can't allocate enough MSI-X vectors, use regular interrupt */ - vp_dev-msix_vectors = 0; - err = request_irq(vp_dev-pci_dev-irq, vp_interrupt, - IRQF_SHARED, name, vp_dev); - if (err) - goto error; - vp_dev-intx_enabled = 1; - } else { - vp_dev-msix_vectors = err; - vp_dev-msix_enabled = 1; - - /* Set the vector used for configuration */ - v = vp_dev-msix_used_vectors; - snprintf(vp_dev-msix_names[v], sizeof *vp_dev-msix_names, -%s-config, name); - err = request_irq(vp_dev-msix_entries[v].vector, - vp_config_changed, 0, vp_dev-msix_names[v], - vp_dev); - if (err) - goto error; - ++vp_dev-msix_used_vectors; + err = pci_enable_msix(vp_dev-pci_dev, vp_dev-msix_entries, nvectors); + if (err 0) + err = -ENOSPC; + if (err) + goto error; + vp_dev-msix_vectors = nvectors; + vp_dev-msix_enabled = 1; + + /* Set the vector used for configuration */ + v = vp_dev-msix_used_vectors; + snprintf(vp_dev-msix_names[v], sizeof *vp_dev-msix_names, +%s-config, name); + err = request_irq(vp_dev-msix_entries[v].vector, + vp_config_changed, 0, vp_dev-msix_names[v], + vp_dev); + if (err) + goto error; + ++vp_dev-msix_used_vectors; - iowrite16(v, vp_dev-ioaddr + VIRTIO_MSI_CONFIG_VECTOR); - /* Verify we had enough resources to assign the vector */ - v = ioread16(vp_dev-ioaddr + VIRTIO_MSI_CONFIG_VECTOR); - if (v ==
Re: [PATCH] KVM: VMX: Fix locking order in handle_invalid_guest_state
On 07/24/2009 10:00 AM, Jan Kiszka wrote: Marcelo Tosatti wrote: On Wed, Jul 22, 2009 at 11:53:26PM +0200, Jan Kiszka wrote: Release and re-acquire preemption and IRQ lock in the same order as vcpu_enter_guest does. This should happen in vcpu_enter_guest, before it decides to disable preemption/irqs (so you consolidate the control there). Maybe, maybe not. handle_invalid_guest_state is an alternative way of executing guest code, and it currently shares the setup and tear-down with vmx_vcpu_run. If it has to share parts that actually require preemption and IRQ lock, then moving makes not much sense. Can anyone comment on what the requirements for handle_invalid_guest_state are? Like you said, it's an alternative to vmx entry/exit, so it shares the same requirements. It must run with interrupts and preemption enabled, but any code that normally runs in the entry critical section (like interrupt injection) must continue to run in a critical section. I would suggest to merge this fix first and then decide about and potentially merge a refactoring patch. btw, what does it fix? a debug warning? -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] fix serious regression
On 07/22/2009 02:57 AM, Glauber Costa wrote: Today I found a very catastrophic regression: I cannot run my mission critical servers running RHL7.1 anymore. This is a total disaster. Fortunately, I was able to isolate the commit that caused it: commit bb598da496c040d42dde564bd8ace181be52293e Author: Glauber Costaglom...@redhat.com Date: Mon Jul 6 16:12:52 2009 -0400 This guy is certainly stupid, and deserves punishment. It means I'll be writting code using emacs for the next week. Marcelo, please apply While I appreciate the humour, it means I have to totally rewrite the changelog plus ask a question. Please submit patches with the most boring changelogs you can and reserve the funnies for the --- section where they belong. diff --git a/qemu-kvm.c b/qemu-kvm.c index e200dea..393c5cc 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -1003,8 +1003,6 @@ int pre_kvm_run(kvm_context_t kvm, CPUState *env) { kvm_arch_pre_kvm_run(kvm-opaque, env); -if (env-exit_request) -return 1; pthread_mutex_unlock(qemu_mutex); return 0; } Can you explain the failure mode? -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm: fix ack not being delivered when msi present
On Sun, Jul 26, 2009 at 05:10:01PM +0300, Michael S. Tsirkin wrote: kvm_notify_acked_irq does not check irq type, so that it sometimes interprets msi vector as irq. As a result, ack notifiers are not called, which typially hangs the guest. The fix is to track and check irq type. Looks good to me. Signed-off-by: Michael S. Tsirkin m...@redhat.com Acked-by: Gleb Natapov g...@redhat.com --- Avi, since this bug was introduced in 2.6.30 already, I think we need the fix in 2.6.30.x as well as 2.6.31. include/linux/kvm_host.h |1 + virt/kvm/irq_comm.c |4 +++- 2 files changed, 4 insertions(+), 1 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f244f11..f814512 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -119,6 +119,7 @@ struct kvm_memory_slot { struct kvm_kernel_irq_routing_entry { u32 gsi; + u32 type; int (*set)(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int level); union { diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 100c267..001663f 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -171,7 +171,8 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) trace_kvm_ack_irq(irqchip, pin); list_for_each_entry(e, kvm-irq_routing, link) - if (e-irqchip.irqchip == irqchip + if (e-type == KVM_IRQ_ROUTING_IRQCHIP + e-irqchip.irqchip == irqchip e-irqchip.pin == pin) { gsi = e-gsi; break; @@ -288,6 +289,7 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, int delta; e-gsi = ue-gsi; + e-type = ue-type; switch (ue-type) { case KVM_IRQ_ROUTING_IRQCHIP: delta = 0; -- 1.6.2.5 -- Gleb. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Host latency peaks due to kvm-intel
Avi Kivity wrote: On 07/25/2009 12:55 PM, Jan Kiszka wrote: Avi Kivity wrote: On 07/24/2009 12:41 PM, Jan Kiszka wrote: I vaguely recall that someone promised to add a feature reporting facility for all those nice things, modern VM-extensions may or may not support (something like or even an extension of /proc/cpuinfo). What is the state of this plan? Would be specifically interesting for Intel CPUs as there seem to be many of them out there with restrictions for special use cases - like real-time. Newer kernels do report some vmx features (like flexpriority) in /proc/cpuinfo but not all. Ah, nice. Then we just need this? From: Jan Kiszkajan.kis...@siemens.com Subject: [PATCH] x86: Report VMX feature vwbinvd Not all VMX-capable CPUs support guest exists on wbinvd execution. If this is not supported, the instruction will run natively on behalf of the guest. This can cause multi-millisecond latencies to the host which is very problematic in real-time scenarios. Report the wbinvd trapping feature along with other VMX feature flags, calling it 'vwbinvd' ('virtual wbinvd'). What about AMD cpus that can always trap wbinvd? do we set the bit or do we trust the user to know that it isn't needed on AMD (I suppose the latter)? I also think that the feature flags should remain vendor-specific. This should go in via tip.git, it isn't really kvm related (except that kvm should start reading these caps one day instead of querying the hardware directly). OK, will go that way. Probably I will also add some flags for AMD's NPT, Intel's EPT and they new unrestricted guest mode at this chance. Jan signature.asc Description: OpenPGP digital signature
Re: [PATCH] KVM: VMX: Fix locking order in handle_invalid_guest_state
Avi Kivity wrote: On 07/24/2009 10:00 AM, Jan Kiszka wrote: Marcelo Tosatti wrote: On Wed, Jul 22, 2009 at 11:53:26PM +0200, Jan Kiszka wrote: Release and re-acquire preemption and IRQ lock in the same order as vcpu_enter_guest does. This should happen in vcpu_enter_guest, before it decides to disable preemption/irqs (so you consolidate the control there). Maybe, maybe not. handle_invalid_guest_state is an alternative way of executing guest code, and it currently shares the setup and tear-down with vmx_vcpu_run. If it has to share parts that actually require preemption and IRQ lock, then moving makes not much sense. Can anyone comment on what the requirements for handle_invalid_guest_state are? Like you said, it's an alternative to vmx entry/exit, so it shares the same requirements. It must run with interrupts and preemption enabled, but any code that normally runs in the entry critical section (like interrupt injection) must continue to run in a critical section. I would suggest to merge this fix first and then decide about and potentially merge a refactoring patch. btw, what does it fix? a debug warning? I haven't seen anything in the wild, and I don't think it would raise a warning. All it should cause is a potential delay of some pending reschedule as preempt_enable will not fire under local_irq_disable. Jan signature.asc Description: OpenPGP digital signature
Re: [PATCH] KVM: VMX: Fix locking order in handle_invalid_guest_state
On 07/26/2009 05:23 PM, Jan Kiszka wrote: btw, what does it fix? a debug warning? I haven't seen anything in the wild, and I don't think it would raise a warning. All it should cause is a potential delay of some pending reschedule as preempt_enable will not fire under local_irq_disable. Ah, okay, then it is a real fix. Preempt-correctness is important. (but won't local_irq_enable() reschedule?) -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
buildbot failure in qemu-kvm on default_x86_64_debian_5_0
The Buildbot has detected a new failure of default_x86_64_debian_5_0 on qemu-kvm. Full details are available at: http://buildbot.b1-systems.de/qemu-kvm/builders/default_x86_64_debian_5_0/builds/8 Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/ Buildslave for this Build: b1_qemu_kvm_1 Build Reason: The web-page 'force build' button was pressed by 'Daniel Gollub': test: new debian5 buildslave Build Source Stamp: HEAD Blamelist: BUILD FAILED: failed git sincerely, -The Buildbot -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Host latency peaks due to kvm-intel
Avi Kivity wrote: On 07/24/2009 12:41 PM, Jan Kiszka wrote: Jan (who is now patching his guest to avoid wbinvd where possible) Is there ever a case where it is required? What about under a hypervisor (i.e. check the hypervisor enabled bit). Reminds me of the discussion in '07 when I first stumbled over this :) : Yes, the bochs bios could safely skip the wbinvd in qemu mode. But that won't safe us from Linux and - far more problematic - Windows or any binary-only guest which think they have to issue it. One may the close eyes, fire up the guest and then start the time-critical host application in the hope that the guest remains calm as long as it's up and running. But, well... Jan signature.asc Description: OpenPGP digital signature
Re: [PATCH] KVM: VMX: Fix locking order in handle_invalid_guest_state
Avi Kivity wrote: On 07/26/2009 05:23 PM, Jan Kiszka wrote: btw, what does it fix? a debug warning? I haven't seen anything in the wild, and I don't think it would raise a warning. All it should cause is a potential delay of some pending reschedule as preempt_enable will not fire under local_irq_disable. Ah, okay, then it is a real fix. Preempt-correctness is important. (but won't local_irq_enable() reschedule?) The last time I checked it was essentially a plain 'sti'. Jan signature.asc Description: OpenPGP digital signature
Re: Host latency peaks due to kvm-intel
On 07/26/2009 05:34 PM, Jan Kiszka wrote: Avi Kivity wrote: On 07/24/2009 12:41 PM, Jan Kiszka wrote: Jan (who is now patching his guest to avoid wbinvd where possible) Is there ever a case where it is required? What about under a hypervisor (i.e. check the hypervisor enabled bit). Reminds me of the discussion in '07 when I first stumbled over this :) : Yes, the bochs bios could safely skip the wbinvd in qemu mode. But that won't safe us from Linux and - far more problematic - Windows or any binary-only guest which think they have to issue it. One may the close eyes, fire up the guest and then start the time-critical host application in the hope that the guest remains calm as long as it's up and running. But, well... Given that it's now '09, how critical is the problem? Don't most cpus have vwbinvd now? If so, the real-time management application can simply refuse to run on such an old processor. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: VMX: Fix locking order in handle_invalid_guest_state
On 07/26/2009 05:38 PM, Jan Kiszka wrote: Avi Kivity wrote: On 07/26/2009 05:23 PM, Jan Kiszka wrote: btw, what does it fix? a debug warning? I haven't seen anything in the wild, and I don't think it would raise a warning. All it should cause is a potential delay of some pending reschedule as preempt_enable will not fire under local_irq_disable. Ah, okay, then it is a real fix. Preempt-correctness is important. (but won't local_irq_enable() reschedule?) The last time I checked it was essentially a plain 'sti'. Presumably there's a reschedule interrupt queued; I think if you set the reschedule bit you have to IPI the cpu running the task. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Host latency peaks due to kvm-intel
Avi Kivity wrote: On 07/26/2009 05:34 PM, Jan Kiszka wrote: Avi Kivity wrote: On 07/24/2009 12:41 PM, Jan Kiszka wrote: Jan (who is now patching his guest to avoid wbinvd where possible) Is there ever a case where it is required? What about under a hypervisor (i.e. check the hypervisor enabled bit). Reminds me of the discussion in '07 when I first stumbled over this :) : Yes, the bochs bios could safely skip the wbinvd in qemu mode. But that won't safe us from Linux and - far more problematic - Windows or any binary-only guest which think they have to issue it. One may the close eyes, fire up the guest and then start the time-critical host application in the hope that the guest remains calm as long as it's up and running. But, well... Given that it's now '09, how critical is the problem? Don't most cpus have vwbinvd now? Sadly, in (embedded) industry you have to live with old hardware for quite a long time. And I would have to throw my only 2-years-old notebook from the table to have a more decent portable test environment. If so, the real-time management application can simply refuse to run on such an old processor. At least one could go and collect the cpuinfo from some box that suffers from high latencies. Normally, you go through extensive testing anyway, also checking for issues like crazy SMI BIOS code that runs for eternities. Jan signature.asc Description: OpenPGP digital signature
Re: [PATCH] KVM: VMX: Fix locking order in handle_invalid_guest_state
Avi Kivity wrote: On 07/26/2009 05:38 PM, Jan Kiszka wrote: Avi Kivity wrote: On 07/26/2009 05:23 PM, Jan Kiszka wrote: btw, what does it fix? a debug warning? I haven't seen anything in the wild, and I don't think it would raise a warning. All it should cause is a potential delay of some pending reschedule as preempt_enable will not fire under local_irq_disable. Ah, okay, then it is a real fix. Preempt-correctness is important. (but won't local_irq_enable() reschedule?) The last time I checked it was essentially a plain 'sti'. Presumably there's a reschedule interrupt queued; I think if you set the reschedule bit you have to IPI the cpu running the task. Yeah. But as we preempt_disable first, that one might have been processed already. Jan signature.asc Description: OpenPGP digital signature
Re: Very high memory usage with KVM
Hi Avi. On Sunday, 26 July 2009 14:31:57 +0300, Avi Kivity wrote: I have an installation with Ubuntu Hardy Heron server amd64 with KVM-62 from Ubuntu repositories installed on an HP Proliant DL380 G5 with two Xeon E5405 quadcore processors and 16 GiB of RAM which has six VMs with the following configuration of memory: Hostname | RAM ===+=== Ganimedes |2 GiB Os |1 GiB Aprender |2 GiB Aps0 |2 GiB Aps2 |4 GiB Ratatoskr |4 GiB ===+=== TOTAL | 15 GiB Initially the host was created with a swap partition of 1 GiB (more 1 GiB than was free for use of host) but this amount with the time remained short and I had to add a LV of 7 GiB to be used with swap, being now a total of 8 GiB of swap of which at this moment I have only a 9% free. Is 'normal' this use of memory? r...@ss02:~# ps -e --sort -rss -Ho user,start_time,pid,pcpu,pmem,rss,size,vsz,args USER START PID %CPU %MEM RSSSZVSZ COMMAND [...] root Jul06 27471 52.3 24.4 4023232 4292200 4350296 kvmratatoskr root Jul24 9955 137 23.8 3923620 4308592 4350308 kvmaps2 root Jul06 8751 5.8 8.3 1368228 2171808 2229888 kvmaps0 root Jul07 8565 2.7 5.2 862844 2204704 2246416 kvmaprender root Apr22 7842 0.6 3.6 600072 2172056 2230136 kvmganimedes root Jul01 7944 0.6 2.0 334860 1119916 1177996 kvmos r...@ss02:~# free total used free sharedbuffers cached Mem: 16463388 16377844 85544 0 894216 66328 -/+ buffers/cache: 154173001046088 Swap: 83199487621916 698032 Updating to KVM-84 or superior can improve this situation? What is the storage configuration? Are you using qcow2? The host machine has 8 x 300 GiB SAS disk in RAID 5 by hardware (7 disks with 1 spare) on 1 logicaldrive. The partitioning scheme of host is the following one: r...@ss02:~# fdisk -l /dev/cciss/c0d0 Disk /dev/cciss/c0d0: 1799.7 GB, 1799797127168 bytes 255 heads, 63 sectors/track, 218812 cylinders Units = cylinders of 16065 * 512 = 8225280 bytes Disk identifier: 0x000af3c3 Device Boot Start End Blocks Id System /dev/cciss/c0d0p1 1 122 979933+ 82 Linux swap / Solaris /dev/cciss/c0d0p2 * 1231338 9767520 83 Linux /dev/cciss/c0d0p31339 218812 1746859905 8e Linux LVM I'm not using qcow2 files. The /dev/cciss/c0d0p3 partition is a physical volume that maintains the logical volumes that are used for VM's disks: r...@ss02:~# pvs PVVG Fmt Attr PSize PFree /dev/cciss/c0d0p3 vm lvm2 a- 1,63T 1,13T What are the image logical and physical sizes? The disks for the VMs have these sizes: aprender-raiz vm -wi-ao 8,00G aprender-space vm -wi-ao 20,00G aps0-raiz vm -wi-ao 7,00G aps0-space vm -wi-ao 10,00G aps2-cache vm -wi-ao 20,00G aps2-index vm -wi-ao 10,00G aps2-raiz vm -wi-ao 7,00G aps2-space vm -wi-ao 10,00G ganimedes-raiz vm -wi-ao 5,00G ganimedes-space vm -wi-ao 10,00G os-disk vm -wi-ao 6,00G os-mailbox vm -wi-ao 150,00G os-spacevm -wi-ao 10,00G ratatoskr-raiz vm -wi-ao 8,00G ratatoskr-space vm -wi-ao 200,00G With respect to the internal partitioning scheme for each one of the VMs, it is the following one: * Aprender: aprender:~# fdisk -l /dev/hda Disk /dev/hda: 8589 MB, 8589934592 bytes 255 heads, 63 sectors/track, 1044 cylinders Units = cylinders of 16065 * 512 = 8225280 bytes Device Boot Start End Blocks Id System /dev/hda1 1 486 3903763+ 82 Linux swap / Solaris /dev/hda2 4871044 4482135 83 Linux aprender:~# aprender:~# aprender:~# fdisk -l /dev/hdb Disk /dev/hdb: 21.4 GB, 21474836480 bytes 255 heads, 63 sectors/track, 2610 cylinders Units = cylinders of 16065 * 512 = 8225280 bytes Device Boot Start End Blocks Id System /dev/hdb1 1261020964793+ 83 Linux * Aps0: [r...@aps:~] $fdisk -l /dev/hda Disco /dev/hda: 7516 MB, 7516192768 bytes 255 heads, 63 sectors/track, 913 cylinders Units = cilindros of 16065 * 512 = 8225280 bytes Disk identifier: 0x00039c2a Disposit. InicioComienzo Fin Bloques Id Sistema /dev/hda1 1 365 2931831 82 Linux swap / Solaris /dev/hda2 366 913 4401810 83 Linux [r...@aps:~] $ [r...@aps:~] $fdisk -l /dev/hdb Disco /dev/hdb: 10.7 GB, 10737418240 bytes 255 heads, 63 sectors/track, 1305 cylinders Units = cilindros of 16065 * 512 = 8225280 bytes Disk identifier: 0x00087cc1 Disposit. InicioComienzo Fin Bloques Id Sistema /dev/hdb1 1
Re: [PATCH] KVM: VMX: Fix locking order in handle_invalid_guest_state
On 07/26/2009 05:55 PM, Jan Kiszka wrote: Presumably there's a reschedule interrupt queued; I think if you set the reschedule bit you have to IPI the cpu running the task. Yeah. But as we preempt_disable first, that one might have been processed already. Ah, yes. Thanks. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Very high memory usage with KVM
On 07/26/2009 05:56 PM, Daniel Bareiro wrote: What is the storage configuration? Are you using qcow2? I'm not using qcow2 files. The /dev/cciss/c0d0p3 partition is a physical volume that maintains the logical volumes that are used for VM's disks: In this case there should be no excessive memory usage. qcow2 could use extra memory, especially on older qemu-kvm versions (or images created with older qemu-img versions). What is the host kernel (uname -a)? r...@ss02:~# uname -a Linux ss02 2.6.24-19-server #1 SMP Wed Aug 20 18:43:06 UTC 2008 x86_64 GNU/Linux kvm memory management with pre 2.6.27 host kernels is pretty weak. Using a newer host kernel (and newer kvm) may solve this problem. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: buildbot failure in qemu-kvm on default_x86_64_debian_5_0
On 07/26/2009 05:26 PM, qemu-...@buildbot.b1-systems.de wrote: The Buildbot has detected a new failure of default_x86_64_debian_5_0 on qemu-kvm. Full details are available at: http://buildbot.b1-systems.de/qemu-kvm/builders/default_x86_64_debian_5_0/builds/8 Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/ Buildslave for this Build: b1_qemu_kvm_1 Build Reason: The web-page 'force build' button was pressed by 'Daniel Gollub': test: new debian5 buildslave Build Source Stamp: HEAD Blamelist: BUILD FAILED: failed git Upon execvpe git-init ['git-init'] in environment id 19800080 :Traceback (most recent call last): File /usr/lib/python2.5/site-packages/twisted/internet/process.py, line 394, in _fork executable, args, environment) File /usr/lib/python2.5/site-packages/twisted/internet/process.py, line 440, in _execChild os.execvpe(executable, args, environment) File /usr/lib/python2.5/os.py, line 363, in execvpe _execvpe(file, args, env) File /usr/lib/python2.5/os.py, line 390, in _execvpe func(fullname, *argrest) OSError: [Errno 2] No such file or directory program finished with exit code 1 Either git is not installed, or a new version of git is installed which no longer has git-init (instead it has 'git init'). -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: buildbot failure in qemu-kvm on default_x86_64_debian_5_0
On Sunday 26 July 2009 05:14:37 pm Avi Kivity wrote: Either git is not installed, or a new version of git is installed which no longer has git-init (instead it has 'git init'). It's already fixed - sorry about the noise. I forgot to install git-core. Check build #9: http://buildbot.b1-systems.de/qemu-kvm/admin/builders/default_x86_64_debian_5_0/builds/9 Best Regards, Daniel -- Daniel GollubGeschaeftsfuehrer: Ralph Dehner FOSS Developer Unternehmenssitz: Vohburg B1 Systems GmbH Amtsgericht: Ingolstadt Mobil: +49-(0)-160 47 73 970 Handelsregister: HRB 3537 EMail: gol...@b1-systems.de http://www.b1-systems.de Adresse: B1 Systems GmbH, Osterfeldstraße 7, 85088 Vohburg http://pgpkeys.pca.dfn.de/pks/lookup?op=getsearch=0xED14B95C2F8CA78D -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: buildbot failure in qemu-kvm on default_x86_64_debian_5_0
On Sunday 26 July 2009 05:13:14 pm Daniel Gollub wrote: Check build #9: http://buildbot.b1-systems.de/qemu-kvm/admin/builders/default_x86_64_debian _5_0/builds/9 Or try this (without admin ;)): http://buildbot.b1-systems.de/qemu-kvm/builders/default_x86_64_debian_5_0/builds/9 Best Regards, Daniel -- Daniel GollubGeschaeftsfuehrer: Ralph Dehner FOSS Developer Unternehmenssitz: Vohburg B1 Systems GmbH Amtsgericht: Ingolstadt Mobil: +49-(0)-160 47 73 970 Handelsregister: HRB 3537 EMail: gol...@b1-systems.de http://www.b1-systems.de Adresse: B1 Systems GmbH, Osterfeldstraße 7, 85088 Vohburg http://pgpkeys.pca.dfn.de/pks/lookup?op=getsearch=0xED14B95C2F8CA78D -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/6] kvm/x86/svm: force new asid on vcpu migration
On 03/05/2009 02:12 PM, Joerg Roedel wrote: Signed-off-by: Joerg Roedeljoerg.roe...@amd.com --- arch/x86/kvm/svm.c |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1821c20..0e66bca 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -180,7 +180,7 @@ static inline void kvm_write_cr2(unsigned long val) static inline void force_new_asid(struct kvm_vcpu *vcpu) { - to_svm(vcpu)-asid_generation--; + to_svm(vcpu)-asid_generation = 0; } static inline void flush_guest_tlb(struct kvm_vcpu *vcpu) @@ -716,6 +716,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) svm-vmcb-control.tsc_offset += delta; vcpu-cpu = cpu; kvm_migrate_timers(vcpu); + force_new_asid(vcpu); } for (i = 0; i NR_HOST_SAVE_USER_MSRS; i++) Does this remove the need for 6eaa802c (KVM: SVM: fix random segfaults with NPT enabled)? -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv3 1/2] virtio: delete vq from list
On Sun, Jul 26, 2009 at 03:48:01PM +0300, Michael S. Tsirkin wrote: This makes delete vq the reverse of find vq. This is required to make it possible to retry find_vqs after a failure, otherwise the list gets corrupted. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- drivers/virtio/virtio_pci.c |6 +- 1 files changed, 5 insertions(+), 1 deletions(-) diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 7e21389..2eaf1fb 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -464,7 +464,11 @@ static void vp_del_vq(struct virtqueue *vq) { struct virtio_pci_device *vp_dev = to_vp_device(vq-vdev); struct virtio_pci_vq_info *info = vq-priv; - unsigned long size; + unsigned long flags, size; + +spin_lock_irqsave(vp_dev-lock, flags); +list_del(info-node); +spin_unlock_irqrestore(vp_dev-lock, flags); Grr, whitespace damage. Not sure how this got in, resending a corrected patch. Sorry about the churn. iowrite16(info-queue_index, vp_dev-ioaddr + VIRTIO_PCI_QUEUE_SEL); -- 1.6.2.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv4 0/2] virtio: find_vqs/del_vqs fixes
Here's a patch series to fix known regressions in virtio_pci, by refactoring code along the lines suggested by Rusty. Changes since v3: whitespace fixed in PATCH 1/2 This is on top of patch virtio: fix memory leak on device removal that has been applied by Rusty. This supercedes patches: [PATCHv3] virtio: recover from vector assignment failure [PATCHv2] virtio: fix double free_irq on device removal Michael S. Tsirkin (2): virtio: make del_vq delete vq from list virtio: refactor find_vqs drivers/virtio/virtio_pci.c | 218 --- 1 files changed, 124 insertions(+), 94 deletions(-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv4 1/2] virtio: make del_vq delete vq from list
This makes delete vq the reverse of find vq. This is required to make it possible to retry find_vqs after a failure, otherwise the list gets corrupted. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- drivers/virtio/virtio_pci.c |6 +- 1 files changed, 5 insertions(+), 1 deletions(-) diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 7e21389..4c74c72 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -464,7 +464,11 @@ static void vp_del_vq(struct virtqueue *vq) { struct virtio_pci_device *vp_dev = to_vp_device(vq-vdev); struct virtio_pci_vq_info *info = vq-priv; - unsigned long size; + unsigned long flags, size; + + spin_lock_irqsave(vp_dev-lock, flags); + list_del(info-node); + spin_unlock_irqrestore(vp_dev-lock, flags); iowrite16(info-queue_index, vp_dev-ioaddr + VIRTIO_PCI_QUEUE_SEL); -- 1.6.2.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv4 2/2] virtio: refactor find_vqs
This refactors find_vqs, making it more readable and robust, and fixing two regressions from 2.6.30: - double free_irq causing BUG_ON on device removal - probe failure when vq can't be assigned to msi-x vector (reported on old host kernels) An older version of this patch was tested by Amit Shah. Reported-by: Amit Shah amit.s...@redhat.com Signed-off-by: Michael S. Tsirkin m...@redhat.com --- drivers/virtio/virtio_pci.c | 212 --- 1 files changed, 119 insertions(+), 93 deletions(-) diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c index 4c74c72..c17b830 100644 --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -52,8 +52,10 @@ struct virtio_pci_device char (*msix_names)[256]; /* Number of available vectors */ unsigned msix_vectors; - /* Vectors allocated */ + /* Vectors allocated, excluding per-vq vectors if any */ unsigned msix_used_vectors; + /* Whether we have vector per vq */ + bool per_vq_vectors; }; /* Constants for MSI-X */ @@ -278,27 +280,24 @@ static void vp_free_vectors(struct virtio_device *vdev) vp_dev-msix_entries = NULL; } -static int vp_enable_msix(struct pci_dev *dev, struct msix_entry *entries, - int *options, int noptions) -{ - int i; - for (i = 0; i noptions; ++i) - if (!pci_enable_msix(dev, entries, options[i])) - return options[i]; - return -EBUSY; -} - -static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs) +static int vp_request_vectors(struct virtio_device *vdev, int nvectors, + bool per_vq_vectors) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); const char *name = dev_name(vp_dev-vdev.dev); unsigned i, v; int err = -ENOMEM; - /* We want at most one vector per queue and one for config changes. -* Fallback to separate vectors for config and a shared for queues. -* Finally fall back to regular interrupts. */ - int options[] = { max_vqs + 1, 2 }; - int nvectors = max(options[0], options[1]); + + if (!nvectors) { + /* Can't allocate MSI-X vectors, use regular interrupt */ + vp_dev-msix_vectors = 0; + err = request_irq(vp_dev-pci_dev-irq, vp_interrupt, + IRQF_SHARED, name, vp_dev); + if (err) + return err; + vp_dev-intx_enabled = 1; + return 0; + } vp_dev-msix_entries = kmalloc(nvectors * sizeof *vp_dev-msix_entries, GFP_KERNEL); @@ -312,41 +311,34 @@ static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs) for (i = 0; i nvectors; ++i) vp_dev-msix_entries[i].entry = i; - err = vp_enable_msix(vp_dev-pci_dev, vp_dev-msix_entries, -options, ARRAY_SIZE(options)); - if (err 0) { - /* Can't allocate enough MSI-X vectors, use regular interrupt */ - vp_dev-msix_vectors = 0; - err = request_irq(vp_dev-pci_dev-irq, vp_interrupt, - IRQF_SHARED, name, vp_dev); - if (err) - goto error; - vp_dev-intx_enabled = 1; - } else { - vp_dev-msix_vectors = err; - vp_dev-msix_enabled = 1; - - /* Set the vector used for configuration */ - v = vp_dev-msix_used_vectors; - snprintf(vp_dev-msix_names[v], sizeof *vp_dev-msix_names, -%s-config, name); - err = request_irq(vp_dev-msix_entries[v].vector, - vp_config_changed, 0, vp_dev-msix_names[v], - vp_dev); - if (err) - goto error; - ++vp_dev-msix_used_vectors; + err = pci_enable_msix(vp_dev-pci_dev, vp_dev-msix_entries, nvectors); + if (err 0) + err = -ENOSPC; + if (err) + goto error; + vp_dev-msix_vectors = nvectors; + vp_dev-msix_enabled = 1; + + /* Set the vector used for configuration */ + v = vp_dev-msix_used_vectors; + snprintf(vp_dev-msix_names[v], sizeof *vp_dev-msix_names, +%s-config, name); + err = request_irq(vp_dev-msix_entries[v].vector, + vp_config_changed, 0, vp_dev-msix_names[v], + vp_dev); + if (err) + goto error; + ++vp_dev-msix_used_vectors; - iowrite16(v, vp_dev-ioaddr + VIRTIO_MSI_CONFIG_VECTOR); - /* Verify we had enough resources to assign the vector */ - v = ioread16(vp_dev-ioaddr + VIRTIO_MSI_CONFIG_VECTOR); - if (v ==
Re: Very high memory usage with KVM
Avi On Sunday, 26 July 2009 18:11:27 +0300, Avi Kivity wrote: What is the storage configuration? Are you using qcow2? I'm not using qcow2 files. The /dev/cciss/c0d0p3 partition is a physical volume that maintains the logical volumes that are used for VM's disks: In this case there should be no excessive memory usage. qcow2 could use extra memory, especially on older qemu-kvm versions (or images created with older qemu-img versions). What is the host kernel (uname -a)? r...@ss02:~# uname -a Linux ss02 2.6.24-19-server #1 SMP Wed Aug 20 18:43:06 UTC 2008 x86_64 GNU/Linux kvm memory management with pre 2.6.27 host kernels is pretty weak. Using a newer host kernel (and newer kvm) may solve this problem. Initially I am going to see how it improves the situation upgrading to KVM-84 of backports of Hardy Heron, since at the moment last kernel available for Hardy is the one I has commented. I would like to know if the newest versions of KVM published in the official site of the project solve a bug recently reported in Ubuntu Launchpad [1]. Also I was observing errors of type 'to swapper Not tainted' or 'java Not tainted' in the VM (aps2, with a high rate of I/O) and that I've commented in a previous message sent to the list [2]. I would want to know if you could indicate to me if this can be due to KVM bug that would be solved in a later version. Thanks for so quick reply. Regards, Daniel [1] https://bugs.launchpad.net/ubuntu/+source/kvm/+bug/359447 [2] http://thread.gmane.org/gmane.comp.emulators.kvm.devel/37631 -- Fingerprint: BFB3 08D6 B4D1 31B2 72B9 29CE 6696 BF1B 14E6 1D37 Powered by Debian GNU/Linux Squeeze - Linux user #188.598 signature.asc Description: Digital signature
Re: OpenSolaris boot failure with KVM and VirtualBox
On 07/16/2009 10:30 AM, Sid Boyce wrote: I first tried using kernel 2.6.31-rc1 on openSUSE 11.2 Milestone1 on a 4P box. All other VM's, Windows and Linux work, currently running openSUSE 11.2 Milestone 3 with 2.6.31-rc3. Verified the .iso is good. Error message = Booting 'OpenSolaris 2009.06' kernel$ /platform/i86pc/kernel/$ISADIR/unix loading '/platform/i86pc/kernel/$ISADIR/unix' ... cpu: 'AuthenticAMD' family 16 model 4 step 2 ... [BIOS accepted mixed-mode target setting!] [Multiboot-kernel, loadaddr=0xbffe38, text-and-data=0x1b1ff0, bss=0x0, entry=0xc0] 'platform/i86pc/kernel/amd64/unix' is loaded module$ /boot/$ISADIR/x86.microroot loading '/boot/$ISADIR/x86.microroot' ... Error 15: File not found Press any key to continue ... Regards Sid. Please copy kvm@vger.kernel.org on kvm issues. Is this a regression from previous kernel versions? What userspace are you using? -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Very high memory usage with KVM
On 07/26/2009 06:50 PM, Daniel Bareiro wrote: kvm memory management with pre 2.6.27 host kernels is pretty weak. Using a newer host kernel (and newer kvm) may solve this problem. Initially I am going to see how it improves the situation upgrading to KVM-84 of backports of Hardy Heron, since at the moment last kernel available for Hardy is the one I has commented. I would like to know if the newest versions of KVM published in the official site of the project solve a bug recently reported in Ubuntu Launchpad [1]. Also I was observing errors of type 'to swapper Not tainted' or 'java Not tainted' in the VM (aps2, with a high rate of I/O) and that I've commented in a previous message sent to the list [2]. I would want to know if you could indicate to me if this can be due to KVM bug that would be solved in a later version. My guess is that it is due to poor swapping with pre-2.6.27 hosts. 15 GB used out of 16GB total is just 6% reserve, which may be a bit too low. With a 2.6.27 host some small amount of memory would be swapped out, before that you'd see thrashing. Another way to check is to drop one guest (or reduce total memory needed by 1GB) and see if you get the same results or if things improve. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv0 RFC] kvm: irqfd support for level interrupts
Here's an untested patch with partial support for level triggered interrupts in irqfd. What this patch has: support for clearing interrupt on ack. What this patch does not have: support signalling eventfd on ack so that userspace can take action and e.g. reenable interrupt. Gleb, Marcelo, I'd like your input on the approach taken wrt locking. Does it look sane? Avi, how's the interface? I intend to also add an eventfd probably in the padding in the irqfd struct. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 230a91a..8bf16af 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -488,6 +488,7 @@ struct kvm_x86_mce { #endif #define KVM_IRQFD_FLAG_DEASSIGN (1 0) +#define KVM_IRQFD_FLAG_LEVEL (1 1) struct kvm_irqfd { __u32 fd; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 99017e8..fcbf5b5 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -45,12 +45,14 @@ struct _irqfd { struct kvm *kvm; struct eventfd_ctx *eventfd; int gsi; + int is_level; struct list_head list; poll_tablept; wait_queue_head_t*wqh; wait_queue_t wait; struct work_structinject; struct work_structshutdown; + struct kvm_irq_ack_notifier kian; }; static struct workqueue_struct *irqfd_cleanup_wq; @@ -63,10 +65,15 @@ irqfd_inject(struct work_struct *work) mutex_lock(kvm-irq_lock); kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 1); - kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 0); + if (!irqfd-is_level) + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 0); mutex_unlock(kvm-irq_lock); } +static void irqfd_irq_acked(struct kvm_irq_ack_notifier *kian) +{ + kvm_set_irq(kian-kvm, KVM_USERSPACE_IRQ_SOURCE_ID, kian-gsi, 0); +} /* * Race-free decouple logic (ordering is critical) */ @@ -87,6 +94,9 @@ irqfd_shutdown(struct work_struct *work) */ flush_work(irqfd-inject); + if (irqfd-is_level) + kvm_unregister_irq_ack_notifier(irqfd-kian); + /* * It is now safe to release the object's resources */ @@ -166,7 +176,7 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, } static int -kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) +kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi, int is_level) { struct _irqfd *irqfd; struct file *file = NULL; @@ -180,6 +190,7 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) irqfd-kvm = kvm; irqfd-gsi = gsi; + irqfd-is_level = is_level; INIT_LIST_HEAD(irqfd-list); INIT_WORK(irqfd-inject, irqfd_inject); INIT_WORK(irqfd-shutdown, irqfd_shutdown); @@ -198,6 +209,12 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) irqfd-eventfd = eventfd; + if (is_level) { + irqfd-kian.gsi = gsi; + irqfd-kian.irq_acked = irqfd_irq_acked; + kvm_register_irq_ack_notifier(irqfd-kian); + } + /* * Install our own custom wake-up handling so we are notified via * a callback whenever someone signals the underlying eventfd @@ -281,10 +298,13 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) { + if (flags ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_LEVEL)) + return -EINVAL; + if (flags KVM_IRQFD_FLAG_DEASSIGN) return kvm_irqfd_deassign(kvm, fd, gsi); - return kvm_irqfd_assign(kvm, fd, gsi); + return kvm_irqfd_assign(kvm, fd, gsi, !!(flags KVM_IRQFD_FLAG_LEVEL)); } /* -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: OpenSolaris boot failure with KVM and VirtualBox
On 26.07.2009, Avi Kivity wrote: On 07/16/2009 10:30 AM, Sid Boyce wrote: I first tried using kernel 2.6.31-rc1 on openSUSE 11.2 Milestone1 on a 4P box. All other VM's, Windows and Linux work, currently running openSUSE 11.2 Milestone 3 with 2.6.31-rc3. Verified the .iso is good. Opensuse Milestones are testing versions in at best alpha state. Error 15: File not found [] Virtualbox works properly running Opensolaris on my machines, with any 2.6.31-rc kernel released, including rc4 and git versions (opensuse 11.1 based). -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC] pci: expose function reset capability in sysfs
Some devices allow an individual function to be reset without affecting other functions in the same device: that's what pci_reset_function does. For devices that have this support, expose reset attribite in sysfs. This is useful e.g. for virtualization, where a qemu userspace process wants to reset the device when the guest is started/reset, to emulate machine reboot as closely as possible. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- Jesse, all, could you please comment on whether the following approach looks sane? Compile-tested only at this point. I'm also not sure whether the CAP_SYS_ADMIN check is necessary: maybe 400 permissions on the sysfs file are sufficient? drivers/pci/pci-sysfs.c | 37 + drivers/pci/pci.c| 16 drivers/pci/pci.h|1 + include/linux/kvm_host.h |1 + virt/kvm/irq_comm.c |4 +++- 5 files changed, 58 insertions(+), 1 deletions(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 85ebd02..92805e8 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -916,6 +916,28 @@ int __attribute__ ((weak)) pcibios_add_platform_entries(struct pci_dev *dev) return 0; } +static ssize_t reset_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t count) +{ + struct pci_dev *pdev = to_pci_dev(dev); + unsigned long val; + ssize_t result = strict_strtoul(buf, 0, val); + + if (result 0) + return result; + + /* this can crash the machine when done on the wrong device */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (val != 1) + return -EINVAL; + return pci_reset_function(pdev); +} + +static struct device_attribute reset_attr = __ATTR(reset, 0200, NULL, reset_store); + static int pci_create_capabilities_sysfs(struct pci_dev *dev) { int retval; @@ -943,7 +965,21 @@ static int pci_create_capabilities_sysfs(struct pci_dev *dev) /* Active State Power Management */ pcie_aspm_create_sysfs_dev_files(dev); + if (!pci_probe_reset_function(dev)) { + retval = device_create_file(dev-dev, reset_attr); + if (retval) + goto error; + } return 0; + +error: + pcie_aspm_remove_sysfs_dev_files(dev); + if (dev-vpd dev-vpd-attr) { + sysfs_remove_bin_file(dev-dev.kobj, dev-vpd-attr); + kfree(dev-vpd-attr); + } + + return retval; } int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev) @@ -1037,6 +1073,7 @@ static void pci_remove_capabilities_sysfs(struct pci_dev *dev) } pcie_aspm_remove_sysfs_dev_files(dev); + device_remove_file(dev-dev, reset_attr); } /** diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index dbd0f94..f6d1c6c 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2260,6 +2260,22 @@ int __pci_reset_function(struct pci_dev *dev) EXPORT_SYMBOL_GPL(__pci_reset_function); /** + * pci_probe_reset_function - check whether the device can be safely reset + * @dev: PCI device to reset + * + * Some devices allow an individual function to be reset without affecting + * other functions in the same device. The PCI device must be responsive + * to PCI config space in order to use this function. + * + * Returns 0 if the device function can be reset or negative if the + * device doesn't support resetting a single function. + */ +int pci_probe_reset_function(struct pci_dev *dev) +{ + return pci_dev_reset(dev, 1); +} + +/** * pci_reset_function - quiesce and reset a PCI device function * @dev: PCI device to reset * diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index f73bcbe..60a3811 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -16,6 +16,7 @@ extern void pci_cleanup_rom(struct pci_dev *dev); extern int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma); #endif +int pci_probe_reset_function(struct pci_dev *dev); /** * struct pci_platform_pm_ops - Firmware PM callbacks -- 1.6.2.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: OpenSolaris boot failure with KVM and VirtualBox
On 26/07/09 17:13, Avi Kivity wrote: On 07/16/2009 10:30 AM, Sid Boyce wrote: I first tried using kernel 2.6.31-rc1 on openSUSE 11.2 Milestone1 on a 4P box. All other VM's, Windows and Linux work, currently running openSUSE 11.2 Milestone 3 with 2.6.31-rc3. Verified the .iso is good. Error message = Booting 'OpenSolaris 2009.06' kernel$ /platform/i86pc/kernel/$ISADIR/unix loading '/platform/i86pc/kernel/$ISADIR/unix' ... cpu: 'AuthenticAMD' family 16 model 4 step 2 ... [BIOS accepted mixed-mode target setting!] [Multiboot-kernel, loadaddr=0xbffe38, text-and-data=0x1b1ff0, bss=0x0, entry=0xc0] 'platform/i86pc/kernel/amd64/unix' is loaded module$ /boot/$ISADIR/x86.microroot loading '/boot/$ISADIR/x86.microroot' ... Error 15: File not found Press any key to continue ... Regards Sid. Please copy kvm@vger.kernel.org on kvm issues. Is this a regression from previous kernel versions? What userspace are you using? Currently running 2.6.31-rc4 with the original kqemu-1.4.0pre1 on openSUSE 11.2 Milestone 4 and it boots OK - a 200G disk image used. qemu-system-x86_64 -cdrom /ISO/osol-0906-ai-x86.iso -boot d /osol0906.qcow2 -smp 4 -m 500M Regards Sid. -- Sid Boyce ... Hamradio License G3VBV, Licensed Private Pilot Emeritus IBM/Amdahl Mainframes and Sun/Fujitsu Servers Tech Support Specialist, Cricket Coach Microsoft Windows Free Zone - Linux used for all Computing Tasks -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: OpenSolaris boot failure with KVM and VirtualBox
On 07/26/2009 09:14 PM, Sid Boyce wrote: Is this a regression from previous kernel versions? What userspace are you using? Currently running 2.6.31-rc4 with the original kqemu-1.4.0pre1 on openSUSE 11.2 Milestone 4 and it boots OK - a 200G disk image used. qemu-system-x86_64 -cdrom /ISO/osol-0906-ai-x86.iso -boot d /osol0906.qcow2 -smp 4 -m 500M Wait, are you using kqemu or kvm? -- Do not meddle in the internals of kernels, for they are subtle and quick to panic. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/9] change order of kvm_init call.
Glauber Costa wrote: The goal is to get rid of the call to kvm_init. But those things are subtle, and often break. So do it in a separate patch, to help finding potential issues in future bisections. Found such an issued: This patch triggers a segfault if no kvm modules are loaded and you start qemu without -no-kvm. Please have a look. Jan Signed-off-by: Glauber Costa glom...@redhat.com --- vl.c | 18 +- 1 files changed, 9 insertions(+), 9 deletions(-) diff --git a/vl.c b/vl.c index f4e4d0f..86a6d70 100644 --- a/vl.c +++ b/vl.c @@ -5748,15 +5748,6 @@ int main(int argc, char **argv, char **envp) signal(SIGTTIN, SIG_IGN); } -#ifdef CONFIG_KVM -if (kvm_enabled()) { - if (kvm_init(smp_cpus) 0) { - fprintf(stderr, Could not initialize KVM, will disable KVM support\n); - exit(1); - } -} -#endif - if (pid_file qemu_create_pidfile(pid_file) != 0) { if (daemonize) { uint8_t status = 1; @@ -5956,6 +5947,15 @@ int main(int argc, char **argv, char **envp) } #endif +#ifdef CONFIG_KVM +if (kvm_enabled()) { + if (kvm_init(smp_cpus) 0) { + fprintf(stderr, Could not initialize KVM, will disable KVM support\n); + exit(1); + } +} +#endif + if (monitor_device) { monitor_hd = qemu_chr_open(monitor, monitor_device, NULL); if (!monitor_hd) { signature.asc Description: OpenPGP digital signature
Re: Host latency peaks due to kvm-intel
Jan Kiszka wrote: Avi Kivity wrote: On 07/24/2009 12:41 PM, Jan Kiszka wrote: I vaguely recall that someone promised to add a feature reporting facility for all those nice things, modern VM-extensions may or may not support (something like or even an extension of /proc/cpuinfo). What is the state of this plan? Would be specifically interesting for Intel CPUs as there seem to be many of them out there with restrictions for special use cases - like real-time. Newer kernels do report some vmx features (like flexpriority) in /proc/cpuinfo but not all. Ah, nice. Then we just need this? Fine with me. Acked-by: H. Peter Anvin h...@zytor.com However, I guess the real question if we shouldn't export ALL VMX features in a consistent way instead? -hpa -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: NMI Injection to Guest
Hi Gleb, Thanks for your reply. 2009/7/26 Gleb Natapov g...@redhat.com: On Sat, Jul 25, 2009 at 10:46:39PM +0200, Jiaqing Du wrote: Hi list, I'm trying to extend OProfile to support guest profiling. One step of my work is to push an NMI to the guest(s) when a performance counter overflows. Please correct me if the following is not correct: counter overflow -- NMI to host -- VM exit -- int $2 to handle NMI on host -- ... -- VM entry -- NMI to guest Correct except the last step (-- NMI to guest). Host nmi is not propagated to guests. Yes. I need to add some code to propagate host NMI to guests. On the path between VM-exit and VM-entry, I want to push an NMI to the guest. I tried to put the following code on the path, but never succeeded. Various wired things happened, such as KVM hangs, guest kernel oops, and host hangs. I tried both code with Linux 2.6.30 and version 88. if (vmx_nmi_allowed()) { vmx_inject_nmi(); } Any suggestions? Where is the right place to push an NMI and what are the necessary checks? Call kvm_inject_nmi(vcpu). And don't forget to vcpu_load(vcpu) before doing it. See kvm_vcpu_ioctl_nmi(). Based on the code with Linux 2.6.30, what kvm_inject_nmi(vcpu) does is just set vcpu-arch.nmi_pending to 1. kvm_vcpu_ioctl_nmi() puts vcpu_load() before the setting and vcpu_put() after it. I need to push host NMI to guests between a VM-exit and a VM-entry after that. The VM-exit is due to an NMI caused by performance counter overflow. The following code with vcpu_enter_guest(), which is surrounded by a vcpu_load() and vcpu_put(), checks this vcpu-arch.nmi_pending and other related flags to decide whether an NMI should be pushed to guests. if (vcpu-arch.exception.pending) __queue_exception(vcpu); else if (irqchip_in_kernel(vcpu-kvm)) kvm_x86_ops-inject_pending_irq(vcpu); else kvm_x86_ops-inject_pending_vectors(vcpu, kvm_run); What I did is given below: 3097 static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3098 { ... ... 3156 if (kvm_vm_exit_on_cnt_overflow) { 3157 vcpu-arch.nmi_pending = 1; 3158 } 3159 3160 if (vcpu-arch.exception.pending) 3161 __queue_exception(vcpu); 3162 else if (irqchip_in_kernel(vcpu-kvm)) 3163 kvm_x86_ops-inject_pending_irq(vcpu); 3164 else 3165 kvm_x86_ops-inject_pending_vectors(vcpu, kvm_run); ... 3236 } In vcpu_enter_guest(), before this part of code is reached, vcpu-arch.nmi_pending is set to 1 if the VM-exit is due to performance counter overflow. Still, no NMIs are seen by the guests. I also tried to put this vcpu-arch.nmi_pending = 1; somewhere else on the path between a VM-exit and VM-entry, it does not seem to work neither. Only vmx_inject_nmi() manages to push NMIs to guests, but without right sanity checks, it causes various wired host and guest behaviors. To inject NMIs on the path between a VM-exit and VM-entry, what's to try next? -- Gleb. Thanks, Jiaqing -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: NMI Injection to Guest
Hi all, What about using vmx_inject_nmi(vcpu) to inject the NMIs into the guest, when we are sure about the vcpu on which the NMI is to be injected. Nipun On Mon, Jul 27, 2009 at 12:55 AM, Jiaqing Du jiaq...@gmail.com wrote: Hi Gleb, Thanks for your reply. 2009/7/26 Gleb Natapov g...@redhat.com: On Sat, Jul 25, 2009 at 10:46:39PM +0200, Jiaqing Du wrote: Hi list, I'm trying to extend OProfile to support guest profiling. One step of my work is to push an NMI to the guest(s) when a performance counter overflows. Please correct me if the following is not correct: counter overflow -- NMI to host -- VM exit -- int $2 to handle NMI on host -- ... -- VM entry -- NMI to guest Correct except the last step (-- NMI to guest). Host nmi is not propagated to guests. Yes. I need to add some code to propagate host NMI to guests. On the path between VM-exit and VM-entry, I want to push an NMI to the guest. I tried to put the following code on the path, but never succeeded. Various wired things happened, such as KVM hangs, guest kernel oops, and host hangs. I tried both code with Linux 2.6.30 and version 88. if (vmx_nmi_allowed()) { vmx_inject_nmi(); } Any suggestions? Where is the right place to push an NMI and what are the necessary checks? Call kvm_inject_nmi(vcpu). And don't forget to vcpu_load(vcpu) before doing it. See kvm_vcpu_ioctl_nmi(). Based on the code with Linux 2.6.30, what kvm_inject_nmi(vcpu) does is just set vcpu-arch.nmi_pending to 1. kvm_vcpu_ioctl_nmi() puts vcpu_load() before the setting and vcpu_put() after it. I need to push host NMI to guests between a VM-exit and a VM-entry after that. The VM-exit is due to an NMI caused by performance counter overflow. The following code with vcpu_enter_guest(), which is surrounded by a vcpu_load() and vcpu_put(), checks this vcpu-arch.nmi_pending and other related flags to decide whether an NMI should be pushed to guests. if (vcpu-arch.exception.pending) __queue_exception(vcpu); else if (irqchip_in_kernel(vcpu-kvm)) kvm_x86_ops-inject_pending_irq(vcpu); else kvm_x86_ops-inject_pending_vectors(vcpu, kvm_run); What I did is given below: 3097 static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 3098 { ... ... 3156 if (kvm_vm_exit_on_cnt_overflow) { 3157 vcpu-arch.nmi_pending = 1; 3158 } 3159 3160 if (vcpu-arch.exception.pending) 3161 __queue_exception(vcpu); 3162 else if (irqchip_in_kernel(vcpu-kvm)) 3163 kvm_x86_ops-inject_pending_irq(vcpu); 3164 else 3165 kvm_x86_ops-inject_pending_vectors(vcpu, kvm_run); ... 3236 } In vcpu_enter_guest(), before this part of code is reached, vcpu-arch.nmi_pending is set to 1 if the VM-exit is due to performance counter overflow. Still, no NMIs are seen by the guests. I also tried to put this vcpu-arch.nmi_pending = 1; somewhere else on the path between a VM-exit and VM-entry, it does not seem to work neither. Only vmx_inject_nmi() manages to push NMIs to guests, but without right sanity checks, it causes various wired host and guest behaviors. To inject NMIs on the path between a VM-exit and VM-entry, what's to try next? -- Gleb. Thanks, Jiaqing -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: OpenSolaris boot failure with KVM and VirtualBox
On 26/07/09 19:18, Avi Kivity wrote: On 07/26/2009 09:14 PM, Sid Boyce wrote: Is this a regression from previous kernel versions? What userspace are you using? Currently running 2.6.31-rc4 with the original kqemu-1.4.0pre1 on openSUSE 11.2 Milestone 4 and it boots OK - a 200G disk image used. qemu-system-x86_64 -cdrom /ISO/osol-0906-ai-x86.iso -boot d /osol0906.qcow2 -smp 4 -m 500M Wait, are you using kqemu or kvm? # l /dev/kvm crw-rw+ 1 root root 10, 232 2009-07-24 20:26 /dev/kvm # lsmod|grep kvm kvm_amd41908 0 kvm 180488 1 kvm_amd From long ago I read that kvm needed kqemu, so I have always built the module, but I see here it's not used. lsmod|grep qemu kqemu 173496 0 # rpm -qf /usr/bin/qemu-system-x86_64 qemu-0.10.1-2.21 Regards Sid. -- Sid Boyce ... Hamradio License G3VBV, Licensed Private Pilot Emeritus IBM/Amdahl Mainframes and Sun/Fujitsu Servers Tech Support Specialist, Cricket Coach Microsoft Windows Free Zone - Linux used for all Computing Tasks -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/3] Allow larger BIOS image
From: Jordan Justen jljus...@gmail.com These changes are similar to my patches sent July 16, except they now are based on Yang Sheng's recent changes to enable a new ioctl for controlling the EPT identity mapping page location. -Jordan Jordan Justen (3): Update BIOS INT15-E820 to allow a larger BIOS image Move TSS pages to allow a larger BIOS image Move EPT identity mapping pages to allow a larger BIOS image kvm/bios/rombios.c |8 qemu-kvm-x86.c |4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/3] Update BIOS INT15-E820 to allow a larger BIOS image
The bios will now reserve more memory via the E820 functions. Note that the standard KVM BIOS will most likely not make use of this expanded BIOS region. This change will synchronize the BIOS INT15-E820 reservations to match other changes that will allow alternate BIOS images to be larger in size. Previously the BIOS reserved: 0xfffbc000-0xfffbcfff - 4KB - EPT identity mapping pages 0xfffbd000-0xfffb - 12KB - TSS pages 0xfffc-0x - 256KB - Max bios.bin (usually top 128KB is used) Now the BIOS will reserve: 0xfeffc000-0xfeffcfff - 4KB - EPT identity mapping pages 0xfeffd000-0xfeff - 12KB - TSS Pages 0xff00-0x - 16MB - Max bios.bin Signed-off-by: Jordan Justen jordan.l.jus...@intel.com --- kvm/bios/rombios.c |8 1 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kvm/bios/rombios.c b/kvm/bios/rombios.c index 6186199..2d0c153 100644 --- a/kvm/bios/rombios.c +++ b/kvm/bios/rombios.c @@ -4596,14 +4596,14 @@ ASM_END case 5: /* 4 pages before the bios, 3 pages for vmx tss pages, * the other page for EPT real mode pagetable */ -set_e820_range(ES, regs.u.r16.di, 0xfffbc000L, - 0xfffcL, 0, 0, 2); +set_e820_range(ES, regs.u.r16.di, 0xfeffc000L, + 0xff00L, 0, 0, 2); regs.u.r32.ebx = 6; break; case 6: -/* 256KB BIOS area at the end of 4 GB */ +/* 16MB BIOS area at the end of 4 GB */ set_e820_range(ES, regs.u.r16.di, - 0xfffcL, 0xL ,0, 0, 2); + 0xff00L, 0xL ,0, 0, 2); if (extra_highbits_memory_size || extra_lowbits_memory_size) regs.u.r32.ebx = 7; else -- 1.6.0.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/3] Move TSS pages to allow a larger BIOS image
Move from: 0xfffbd000-0xfffb to: 0xfeffd000-0xfeff This step is required to free up the 0xff00-0x (16MB) range for use with bios.bin. This change depends upon a change to kvm/bios/rombios.c so the bios INT15-E820 function will properly reserve the new location. Signed-off-by: Jordan Justen jordan.l.jus...@intel.com --- qemu-kvm-x86.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index 492dbc5..0b47b57 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -62,7 +62,7 @@ static int kvm_init_tss(kvm_context_t kvm) * this address is 3 pages before the bios, and the bios should present * as unavaible memory */ - r = kvm_set_tss_addr(kvm, 0xfffbd000); + r = kvm_set_tss_addr(kvm, 0xfeffd000); if (r 0) { fprintf(stderr, kvm_init_tss: unable to set tss addr\n); return r; -- 1.6.0.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/3] Move EPT identity mapping pages to allow a larger BIOS image
Move from: 0xfffbc000-0xfffbcfff to: 0xfeffc000-0xfeffcfff This step is required to free up the 0xff00-0x (16MB) range for use with bios.bin. The KVM kernel change depends upon a change to kvm/bios/rombios.c so the bios INT15-E820 function will properly reserve the new location. Signed-off-by: Jordan Justen jordan.l.jus...@intel.com --- qemu-kvm-x86.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index 0b47b57..65ba470 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -102,7 +102,7 @@ static int kvm_init_identity_map_page(kvm_context_t kvm) * this address is 4 pages before the bios, and the bios should present * as unavaible memory */ - r = kvm_set_identity_map_addr(kvm, 0xfffbc000); + r = kvm_set_identity_map_addr(kvm, 0xfeffc000); if (r 0) { fprintf(stderr, kvm_init_identity_map_page: unable to set identity mapping addr\n); -- 1.6.0.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Host latency peaks due to kvm-intel
On Monday 27 July 2009 03:16:27 H. Peter Anvin wrote: Jan Kiszka wrote: Avi Kivity wrote: On 07/24/2009 12:41 PM, Jan Kiszka wrote: I vaguely recall that someone promised to add a feature reporting facility for all those nice things, modern VM-extensions may or may not support (something like or even an extension of /proc/cpuinfo). What is the state of this plan? Would be specifically interesting for Intel CPUs as there seem to be many of them out there with restrictions for special use cases - like real-time. Newer kernels do report some vmx features (like flexpriority) in /proc/cpuinfo but not all. Ah, nice. Then we just need this? Fine with me. Acked-by: H. Peter Anvin h...@zytor.com However, I guess the real question if we shouldn't export ALL VMX features in a consistent way instead? When I add feature reporting to cpuinfo, I just put highlight features there, otherwise the VMX feature list would at least as long as CPU one. I have also suggested another field for virtualization feature for it, but some concern again userspace tools raised. For we got indeed quite a lot features, and would get more, would it better to export the part of struct vmcs_config entries(that's pin_based_exec_ctrl, cpu_based_exec_ctrl, and cpu_based_2nd_exec_ctrl) through sys/module/kvm_intel/? Put every feature to cpuinfo seems not that necessary for such a big list. -- regards Yang, Sheng -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html