[COMMIT master] Do not compile qemu-kvm.c and qemu-kvm-x86.c

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@redhat.com

Instead, include them from upstream files

Signed-off-by: Glauber Costa glom...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/Makefile.target b/Makefile.target
index e0edd27..df1f32b 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -160,7 +160,6 @@ ifeq ($(ARCH),sparc64)
 CPPFLAGS+=-I$(SRC_PATH)/tcg/sparc
 endif
 
-libobj-$(CONFIG_KVM) += qemu-kvm.o
 ifdef CONFIG_SOFTFLOAT
 libobj-y += fpu/softfloat.o
 else
@@ -171,13 +170,13 @@ libobj-y += op_helper.o helper.o
 
 ifeq ($(TARGET_ARCH), i386)
 libobj-y += helper.o
-libobj-$(CONFIG_KVM) += qemu-kvm-x86.o kvm-tpr-opt.o
+libobj-$(CONFIG_KVM) += kvm-tpr-opt.o
 libobj-$(CONFIG_KVM) += qemu-kvm-helper.o
 endif
 
 ifeq ($(TARGET_ARCH), x86_64)
 libobj-y += helper.o
-libobj-$(CONFIG_KVM) += qemu-kvm-x86.o kvm-tpr-opt.o
+libobj-$(CONFIG_KVM) += kvm-tpr-opt.o
 libobj-$(CONFIG_KVM) += qemu-kvm-helper.o
 endif
 
diff --git a/kvm-all.c b/kvm-all.c
index 4c2fdf5..e42b1f6 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1029,3 +1029,5 @@ void kvm_remove_all_breakpoints(CPUState *current_env)
 }
 #endif /* !KVM_CAP_SET_GUEST_DEBUG */
 #endif
+
+#include qemu-kvm.c
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index b7eb096..cfa5b80 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -964,3 +964,5 @@ void kvm_arch_update_guest_debug(CPUState *env, struct 
kvm_guest_debug *dbg)
 }
 #endif /* KVM_CAP_SET_GUEST_DEBUG */
 #endif
+
+#include qemu-kvm-x86.c
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] replace USE_KVM with CONFIG_KVM

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@redhat.com

Make things less confuse, and we have KVM_UPSTREAM to differentiate
between the two versions anyway. kvm-all.c and kvm.c gets compiled now,
but protected with KVM_UPSTREAM too, so no function in there gets visible
in the final binary

Signed-off-by: Glauber Costa glom...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/Makefile.target b/Makefile.target
index e3189a1..e0edd27 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -160,9 +160,7 @@ ifeq ($(ARCH),sparc64)
 CPPFLAGS+=-I$(SRC_PATH)/tcg/sparc
 endif
 
-ifeq ($(USE_KVM), 1)
-libobj-y += qemu-kvm.o
-endif
+libobj-$(CONFIG_KVM) += qemu-kvm.o
 ifdef CONFIG_SOFTFLOAT
 libobj-y += fpu/softfloat.o
 else
@@ -173,18 +171,14 @@ libobj-y += op_helper.o helper.o
 
 ifeq ($(TARGET_ARCH), i386)
 libobj-y += helper.o
-ifeq ($(USE_KVM), 1)
-libobj-y += qemu-kvm-x86.o kvm-tpr-opt.o
-libobj-y += qemu-kvm-helper.o
-endif
+libobj-$(CONFIG_KVM) += qemu-kvm-x86.o kvm-tpr-opt.o
+libobj-$(CONFIG_KVM) += qemu-kvm-helper.o
 endif
 
 ifeq ($(TARGET_ARCH), x86_64)
 libobj-y += helper.o
-ifeq ($(USE_KVM), 1)
-libobj-y += qemu-kvm-x86.o kvm-tpr-opt.o
-libobj-y += qemu-kvm-helper.o
-endif
+libobj-$(CONFIG_KVM) += qemu-kvm-x86.o kvm-tpr-opt.o
+libobj-$(CONFIG_KVM) += qemu-kvm-helper.o
 endif
 
 libobj-y += op_helper.o
@@ -203,9 +197,7 @@ endif
 
 ifeq ($(TARGET_BASE_ARCH), ia64)
 libobj-y += op_helper.o firmware.o
-ifeq ($(USE_KVM), 1)
-libobj-y += qemu-kvm-ia64.o
-endif
+libobj-$(CONFIG_KVM) += qemu-kvm-ia64.o
 endif
 
 ifeq ($(TARGET_BASE_ARCH), cris)
diff --git a/configure b/configure
index f8b80f2..9b744c5 100755
--- a/configure
+++ b/configure
@@ -2121,8 +2121,8 @@ disable_cpu_emulation() {
 configure_kvm() {
   if test $kvm = yes -a $target_softmmu = yes -a \
   \( $cpu = i386 -o $cpu = x86_64 -o $cpu = ia64 -o $cpu 
= powerpc \); then
-echo #define USE_KVM 1  $config_h
-echo USE_KVM=1  $config_mak
+echo #define CONFIG_KVM 1  $config_h
+echo CONFIG_KVM=y  $config_mak
 echo KVM_CFLAGS=$kvm_cflags  $config_mak
 if test $kvm_cap_pit = yes ; then
echo USE_KVM_PIT=1  $config_mak
@@ -2159,9 +2159,9 @@ case $target_arch2 in
   echo #define CONFIG_KQEMU 1  $config_h
 fi
 if test $target_kvm = yes ; then
-  echo USE_KVM=yes  $config_mak
+  echo CONFIG_KVM=y  $config_mak
   echo KVM_CFLAGS=$kvm_cflags  $config_mak
-  echo #define USE_KVM 1  $config_h
+  echo #define CONFIG_KVM 1  $config_h
 fi
 if test $xen = yes -a $target_softmmu = yes;
 then
@@ -2183,9 +2183,9 @@ case $target_arch2 in
 fi
 if [ use_upstream_kvm = yes ]; then
 if test $target_kvm = yes ; then
-  echo USE_KVM=yes  $config_mak
+  echo CONFIG_KVM=y  $config_mak
   echo KVM_CFLAGS=$kvm_cflags  $config_mak
-  echo #define USE_KVM 1  $config_h
+  echo #define CONFIG_KVM 1  $config_h
 fi
 fi
 if test $xen = yes -a $target_softmmu = yes
@@ -2281,7 +2281,7 @@ case $target_arch2 in
 if test $target_kvm = yes ; then
   echo CONFIG_KVM=y  $config_mak
   echo KVM_CFLAGS=$kvm_cflags  $config_mak
-  echo #define USE_KVM 1  $config_h
+  echo #define CONFIG_KVM 1  $config_h
 fi
 fi
 gdb_xml_files=power-core.xml power-fpu.xml power-altivec.xml 
power-spe.xml
diff --git a/hw/acpi.c b/hw/acpi.c
index 074e09f..7de9cb7 100644
--- a/hw/acpi.c
+++ b/hw/acpi.c
@@ -775,7 +775,7 @@ static void disable_processor(struct gpe_regs *g, int cpu)
 }
 
 #if defined(TARGET_I386) || defined(TARGET_X86_64)
-#ifdef USE_KVM
+#ifdef CONFIG_KVM
 static CPUState *qemu_kvm_cpu_env(int index)
 {
 CPUState *penv;
@@ -798,7 +798,7 @@ void qemu_system_cpu_hot_add(int cpu, int state)
 CPUState *env;
 
 if (state
-#ifdef USE_KVM
+#ifdef CONFIG_KVM
  (!qemu_kvm_cpu_env(cpu))
 #endif
 ) {
diff --git a/hw/msix.c b/hw/msix.c
index 5f77dc9..b5dfa0b 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -63,7 +63,7 @@
 /* Flag for interrupt controller to declare MSI-X support */
 int msix_supported;
 
-#ifdef USE_KVM
+#ifdef CONFIG_KVM
 /* KVM specific MSIX helpers */
 static void kvm_msix_free(PCIDevice *dev)
 {
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 6b82232..bda2397 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -15,7 +15,7 @@
 #include net.h
 #include qemu-timer.h
 #include virtio-net.h
-#ifdef USE_KVM
+#ifdef CONFIG_KVM
 #include qemu-kvm.h
 #endif
 
@@ -344,7 +344,7 @@ static void virtio_net_handle_rx(VirtIODevice *vdev, 
VirtQueue *vq)
 
 qemu_flush_queued_packets(n-vc);
 
-#ifdef USE_KVM
+#ifdef CONFIG_KVM
 /* We now have RX buffers, signal to the IO thread to break out of the
select to re-poll the tap file descriptor */
 if (kvm_enabled())
diff --git a/kvm-all.c b/kvm-all.c
index 8567ac9..4c2fdf5 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -26,6 +26,7 @@
 #include gdbstub.h
 #include kvm.h
 
+#ifdef KVM_UPSTREAM
 /* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */
 #define 

[COMMIT master] duplicate KVMState

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@redhat.com

In this patch, we duplicate most of KVMState in our files. This should be
removed later, when they are 100 % equal. Meanwhile, we fold our kvm_context_t
structure inside it.

To make transition smooth, we still keep a global variable kvm_context
pointing to its position inside the global KVMState. This way we don't
need to hurry about changing all callers.

kvm_init() and kvm_finalize are changed, though, since they have now to
deal with the creation/destruction of a global KVMState

Signed-off-by: Glauber Costa glom...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm.c b/qemu-kvm.c
index 43e7b4c..45f5abe 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -42,6 +42,9 @@ int kvm_irqchip = 1;
 int kvm_pit = 1;
 int kvm_pit_reinject = 1;
 int kvm_nested = 0;
+
+
+static KVMState *kvm_state;
 kvm_context_t kvm_context;
 
 pthread_mutex_t qemu_mutex = PTHREAD_MUTEX_INITIALIZER;
@@ -416,16 +419,16 @@ int kvm_dirty_pages_log_reset(kvm_context_t kvm)
 }
 
 
-kvm_context_t kvm_init(void *opaque)
+int kvm_init(int smp_cpus)
 {
int fd;
-   kvm_context_t kvm;
int r, gsi_count;
 
+
fd = open(/dev/kvm, O_RDWR);
if (fd == -1) {
perror(open /dev/kvm);
-   return NULL;
+   return -1;
}
r = ioctl(fd, KVM_GET_API_VERSION, 0);
if (r == -1) {
@@ -446,35 +449,39 @@ kvm_context_t kvm_init(void *opaque)
}
kvm_abi = r;
kvm_page_size = getpagesize();
-   kvm = qemu_mallocz(sizeof(*kvm));
-   kvm-fd = fd;
-   kvm-vm_fd = -1;
-   kvm-opaque = opaque;
-   kvm-dirty_pages_log_all = 0;
-   kvm-no_irqchip_creation = 0;
-   kvm-no_pit_creation = 0;
+   kvm_state = qemu_mallocz(sizeof(*kvm_state));
+kvm_context = kvm_state-kvm_context;
 
-   gsi_count = kvm_get_gsi_count(kvm);
+   kvm_context-fd = fd;
+   kvm_context-vm_fd = -1;
+   kvm_context-opaque = cpu_single_env;
+   kvm_context-dirty_pages_log_all = 0;
+   kvm_context-no_irqchip_creation = 0;
+   kvm_context-no_pit_creation = 0;
+
+   gsi_count = kvm_get_gsi_count(kvm_context);
if (gsi_count  0) {
int gsi_bits, i;
 
/* Round up so we can search ints using ffs */
gsi_bits = ALIGN(gsi_count, 32);
-   kvm-used_gsi_bitmap = qemu_mallocz(gsi_bits / 8);
-   kvm-max_gsi = gsi_bits;
+   kvm_context-used_gsi_bitmap = qemu_mallocz(gsi_bits / 8);
+   kvm_context-max_gsi = gsi_bits;
 
/* Mark any over-allocated bits as already in use */
for (i = gsi_count; i  gsi_bits; i++)
-   set_gsi(kvm, i);
+   set_gsi(kvm_context, i);
}
 
-   return kvm;
+pthread_mutex_lock(qemu_mutex);
+   return 0;
+
  out_close:
close(fd);
-   return NULL;
+   return -1;
 }
 
-void kvm_finalize(kvm_context_t kvm)
+static void kvm_finalize(KVMState *s)
 {
/* FIXME
if (kvm-vcpu_fd[0] != -1)
@@ -482,8 +489,8 @@ void kvm_finalize(kvm_context_t kvm)
if (kvm-vm_fd != -1)
close(kvm-vm_fd);
*/
-   close(kvm-fd);
-   free(kvm);
+   close(s-kvm_context.fd);
+   free(s);
 }
 
 void kvm_disable_irqchip_creation(kvm_context_t kvm)
@@ -2217,18 +2224,6 @@ int kvm_main_loop(void)
 return 0;
 }
 
-int kvm_qemu_init()
-{
-/* Try to initialize kvm */
-kvm_context = kvm_init(cpu_single_env);
-if (!kvm_context) {
-   return -1;
-}
-pthread_mutex_lock(qemu_mutex);
-
-return 0;
-}
-
 #ifdef TARGET_I386
 static int destroy_region_works = 0;
 #endif
@@ -2252,12 +2247,12 @@ int kvm_qemu_create_context(void)
 kvm_disable_pit_creation(kvm_context);
 }
 if (kvm_create(kvm_context, 0, NULL)  0) {
-   kvm_finalize(kvm_context);
+   kvm_finalize(kvm_state);
return -1;
 }
 r = kvm_arch_qemu_create_context();
 if(r 0)
-   kvm_finalize(kvm_context);
+   kvm_finalize(kvm_state);
 if (kvm_pit  !kvm_pit_reinject) {
 if (kvm_reinject_control(kvm_context, 0)) {
 fprintf(stderr, failure to disable in-kernel PIT reinjection\n);
diff --git a/qemu-kvm.h b/qemu-kvm.h
index 20993f6..5d2d54c 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -128,18 +128,7 @@ int kvm_set_msrs(kvm_vcpu_context_t, struct kvm_msr_entry 
*msrs, int n);
  * \param opaque Not used
  * \return NULL on failure
  */
-kvm_context_t kvm_init(void *opaque);
-
-/*!
- * \brief Cleanup the KVM context
- *
- * Should always be called when closing down KVM.\n
- * Exception: If kvm_init() fails, this function should not be called, as the
- * context would be invalid
- *
- * \param kvm Pointer to the kvm_context that is to be freed
- */
-void kvm_finalize(kvm_context_t kvm);
+int kvm_init(int smp_cpus);
 
 /*!
  * \brief Disable the in-kernel IRQCHIP creation
@@ -1166,4 

[COMMIT master] provide env-kvm_fd

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@redhat.com

qemu upstream puts kvm information on env. Do that too, since it will
allow us to use CPUState in cpu-specific functions, instead of kvm-specific
types.

Signed-off-by: Glauber Costa glom...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm.c b/qemu-kvm.c
index 45f5abe..6897e3c 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -503,11 +503,12 @@ void kvm_disable_pit_creation(kvm_context_t kvm)
kvm-no_pit_creation = 1;
 }
 
-kvm_vcpu_context_t kvm_create_vcpu(kvm_context_t kvm, int id)
+kvm_vcpu_context_t kvm_create_vcpu(CPUState *env, int id)
 {
long mmap_size;
int r;
kvm_vcpu_context_t vcpu_ctx = qemu_malloc(sizeof(struct 
kvm_vcpu_context));
+kvm_context_t kvm = kvm_context;
 
vcpu_ctx-kvm = kvm;
vcpu_ctx-id = id;
@@ -518,6 +519,10 @@ kvm_vcpu_context_t kvm_create_vcpu(kvm_context_t kvm, int 
id)
goto err;
}
vcpu_ctx-fd = r;
+
+env-kvm_fd = r;
+env-kvm_state = kvm_state;
+
mmap_size = ioctl(kvm-fd, KVM_GET_VCPU_MMAP_SIZE, 0);
if (mmap_size == -1) {
fprintf(stderr, get vcpu mmap size: %m\n);
@@ -2013,7 +2018,7 @@ static void *ap_main_loop(void *_env)
 env-thread_id = kvm_get_thread_id();
 sigfillset(signals);
 sigprocmask(SIG_BLOCK, signals, NULL);
-env-kvm_cpu_state.vcpu_ctx = kvm_create_vcpu(kvm_context, env-cpu_index);
+env-kvm_cpu_state.vcpu_ctx = kvm_create_vcpu(env, env-cpu_index);
 
 #ifdef USE_KVM_DEVICE_ASSIGNMENT
 /* do ioperm for io ports of assigned devices */
diff --git a/qemu-kvm.h b/qemu-kvm.h
index 5d2d54c..f43 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -181,7 +181,7 @@ void kvm_create_irqchip(kvm_context_t kvm);
  * \param slot vcpu number ( 0)
  * \return 0 on success, -errno on failure
  */
-kvm_vcpu_context_t kvm_create_vcpu(kvm_context_t kvm, int id);
+kvm_vcpu_context_t kvm_create_vcpu(CPUState *env, int id);
 
 /*!
  * \brief Start the VCPU
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] use kvm_upstream sw_breakpoints structure

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@redhat.com

Signed-off-by: Glauber Costa glom...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index b531ca4..3bbb9d2 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -1519,7 +1519,7 @@ int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info)
break;
}
}
-} else if (kvm_find_sw_breakpoint(arch_info-pc))
+} else if (kvm_find_sw_breakpoint(cpu_single_env, arch_info-pc))
handle = 1;
 
 if (!handle)
@@ -1542,7 +1542,7 @@ void kvm_arch_update_guest_debug(CPUState *env, struct 
kvm_guest_debug *dbg)
 };
 int n;
 
-if (!TAILQ_EMPTY(kvm_sw_breakpoints))
+if (kvm_sw_breakpoints_active(env))
dbg-control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
 
 if (nb_hw_breakpoint  0) {
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 6897e3c..b0661b6 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -459,6 +459,10 @@ int kvm_init(int smp_cpus)
kvm_context-no_irqchip_creation = 0;
kvm_context-no_pit_creation = 0;
 
+#ifdef KVM_CAP_SET_GUEST_DEBUG
+TAILQ_INIT(kvm_state-kvm_sw_breakpoints);
+#endif
+
gsi_count = kvm_get_gsi_count(kvm_context);
if (gsi_count  0) {
int gsi_bits, i;
@@ -2439,14 +2443,13 @@ int kvm_qemu_init_env(CPUState *cenv)
 }
 
 #ifdef KVM_CAP_SET_GUEST_DEBUG
-struct kvm_sw_breakpoint_head kvm_sw_breakpoints =
-TAILQ_HEAD_INITIALIZER(kvm_sw_breakpoints);
 
-struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(target_ulong pc)
+struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
+ target_ulong pc)
 {
 struct kvm_sw_breakpoint *bp;
 
-TAILQ_FOREACH(bp, kvm_sw_breakpoints, entry) {
+TAILQ_FOREACH(bp, env-kvm_state-kvm_sw_breakpoints, entry) {
if (bp-pc == pc)
return bp;
 }
@@ -2481,6 +2484,11 @@ int kvm_update_guest_debug(CPUState *env, unsigned long 
reinject_trap)
 return data.err;
 }
 
+int kvm_sw_breakpoints_active(CPUState *env)
+{
+return !TAILQ_EMPTY(env-kvm_state-kvm_sw_breakpoints);
+}
+
 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
   target_ulong len, int type)
 {
@@ -2489,7 +2497,7 @@ int kvm_insert_breakpoint(CPUState *current_env, 
target_ulong addr,
 int err;
 
 if (type == GDB_BREAKPOINT_SW) {
-   bp = kvm_find_sw_breakpoint(addr);
+   bp = kvm_find_sw_breakpoint(current_env, addr);
if (bp) {
bp-use_count++;
return 0;
@@ -2507,7 +2515,8 @@ int kvm_insert_breakpoint(CPUState *current_env, 
target_ulong addr,
return err;
}
 
-   TAILQ_INSERT_HEAD(kvm_sw_breakpoints, bp, entry);
+TAILQ_INSERT_HEAD(current_env-kvm_state-kvm_sw_breakpoints,
+  bp, entry);
 } else {
err = kvm_arch_insert_hw_breakpoint(addr, len, type);
if (err)
@@ -2530,7 +2539,7 @@ int kvm_remove_breakpoint(CPUState *current_env, 
target_ulong addr,
 int err;
 
 if (type == GDB_BREAKPOINT_SW) {
-   bp = kvm_find_sw_breakpoint(addr);
+   bp = kvm_find_sw_breakpoint(current_env, addr);
if (!bp)
return -ENOENT;
 
@@ -2543,7 +2552,7 @@ int kvm_remove_breakpoint(CPUState *current_env, 
target_ulong addr,
if (err)
return err;
 
-   TAILQ_REMOVE(kvm_sw_breakpoints, bp, entry);
+   TAILQ_REMOVE(current_env-kvm_state-kvm_sw_breakpoints, bp, entry);
qemu_free(bp);
 } else {
err = kvm_arch_remove_hw_breakpoint(addr, len, type);
@@ -2564,7 +2573,7 @@ void kvm_remove_all_breakpoints(CPUState *current_env)
 struct kvm_sw_breakpoint *bp, *next;
 CPUState *env;
 
-TAILQ_FOREACH_SAFE(bp, kvm_sw_breakpoints, entry, next) {
+TAILQ_FOREACH_SAFE(bp, current_env-kvm_state-kvm_sw_breakpoints, entry, 
next) {
 if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
 /* Try harder to find a CPU that currently sees the breakpoint. */
 for (env = first_cpu; env != NULL; env = env-next_cpu) {
diff --git a/qemu-kvm.h b/qemu-kvm.h
index f43..d5291a3 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -1004,12 +1004,12 @@ struct kvm_sw_breakpoint {
 int use_count;
 TAILQ_ENTRY(kvm_sw_breakpoint) entry;
 };
-TAILQ_HEAD(kvm_sw_breakpoint_head, kvm_sw_breakpoint);
 
-extern struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
+TAILQ_HEAD(kvm_sw_breakpoint_head, kvm_sw_breakpoint);
 
 int kvm_arch_debug(struct kvm_debug_exit_arch *arch_info);
-struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(target_ulong pc);
+int kvm_sw_breakpoints_active(CPUState *env);
+struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env, target_ulong 
pc);
 int kvm_arch_insert_sw_breakpoint(CPUState *current_env,
   struct kvm_sw_breakpoint *bp);
 int kvm_arch_remove_sw_breakpoint(CPUState *current_env,
@@ -1174,6 +1174,9 @@ 

[COMMIT master] qemu-kvm: x86: fix memleak if ioctl fails

2009-07-26 Thread Avi Kivity
From: Amit Shah amit.s...@redhat.com

Fix a memleak when the KVM_SET_CPUID2 ioctl fails. Free the
memory that we allocate to store cpuids.

Reported-by: Mark McLoughlin mar...@redhat.com
Signed-off-by: Amit Shah amit.s...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 3bbb9d2..350f272 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -535,7 +535,7 @@ int kvm_setup_cpuid2(kvm_vcpu_context_t vcpu, int nent,
r = ioctl(vcpu-fd, KVM_SET_CPUID2, cpuid);
if (r == -1) {
fprintf(stderr, kvm_setup_cpuid2: %m\n);
-   return -errno;
+   r = -errno;
}
free(cpuid);
return r;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] Set the iothread's eventfd/pipe descriptors to non-blocking

2009-07-26 Thread Avi Kivity
From: Dor Laor dl...@redhat.com

It fixes migration issue when the destination is loaded.

If the migration socket is full, we get EAGAIN for the write.
The set_fd_handler2 defers the write for later on. The function
tries to wake up the iothread by qemu_kvm_notify_work.
Since this happens in a loop, multiple times, the pipe that emulates eventfd
becomes full and we get a deadlock.

Mark McLoughlin suggested to remove spurious wake-up of the migration code
when we get EAGAIN and wait for the socket to become writeable. (+1)

Nevertheless, the pipe descriptors shouldn't be blocking and the reader can
also read several chunks in a time.

Signed-off-by: Dor Laor d...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm.c b/qemu-kvm.c
index 355adf4..3c892e6 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -2106,14 +2106,17 @@ void qemu_kvm_notify_work(void)
if (len == -1  errno == EINTR)
continue;
 
-   if (len = 0)
+/* In case we have a pipe, there is not reason to insist writing
+ * 8 bytes
+ */
+   if (len == -1  errno == EAGAIN)
break;
 
+if (len = 0)
+break;
+
offset += len;
 }
-
-if (offset != 8)
-   fprintf(stderr, failed to notify io thread\n);
 }
 
 /* If we have signalfd, we mask out the signals we want to handle and then
@@ -2152,20 +2155,18 @@ static void sigfd_handler(void *opaque)
 static void io_thread_wakeup(void *opaque)
 {
 int fd = (unsigned long)opaque;
-char buffer[8];
-size_t offset = 0;
+char buffer[4096];
 
-while (offset  8) {
+/* Drain the pipe/(eventfd) */
+while (1) {
ssize_t len;
 
-   len = read(fd, buffer + offset, 8 - offset);
+   len = read(fd, buffer, sizeof(buffer));
if (len == -1  errno == EINTR)
continue;
 
if (len = 0)
break;
-
-   offset += len;
 }
 }
 
@@ -2183,6 +2184,9 @@ int kvm_main_loop(void)
return -errno;
 }
 
+fcntl(fds[0], F_SETFL, O_NONBLOCK);
+fcntl(fds[1], F_SETFL, O_NONBLOCK);
+
 qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL,
 (void *)(unsigned long)fds[0]);
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] replace qemu_kvm_cpu_env

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@redhat.com

We now have an upstream qemu function that does exactly that,
but in a kvm-independent way. Use it.

Signed-off-by: Glauber Costa glom...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/hw/acpi.c b/hw/acpi.c
index 7de9cb7..d23abd1 100644
--- a/hw/acpi.c
+++ b/hw/acpi.c
@@ -775,33 +775,11 @@ static void disable_processor(struct gpe_regs *g, int cpu)
 }
 
 #if defined(TARGET_I386) || defined(TARGET_X86_64)
-#ifdef CONFIG_KVM
-static CPUState *qemu_kvm_cpu_env(int index)
-{
-CPUState *penv;
-
-penv = first_cpu;
-
-while (penv) {
-if (penv-cpu_index == index)
-return penv;
-penv = (CPUState *)penv-next_cpu;
-}
-
-return NULL;
-}
-#endif
-
-
 void qemu_system_cpu_hot_add(int cpu, int state)
 {
 CPUState *env;
 
-if (state
-#ifdef CONFIG_KVM
- (!qemu_kvm_cpu_env(cpu))
-#endif
-) {
+if (state  !qemu_get_cpu(cpu)) {
 env = pc_new_cpu(model);
 if (!env) {
 fprintf(stderr, cpu %d creation failed\n, cpu);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] kvm: allow qemu to set EPT identity mapping address

2009-07-26 Thread Avi Kivity
From: Sheng Yang sh...@linux.intel.com

If we use larger BIOS image than current 256KB, we would need move reserved
TSS and EPT identity mapping pages. Currently TSS support this, but not
EPT.

(change from v1, use parameter address instead of value for ioctl)

Signed-off-by: Sheng Yang sh...@linux.intel.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/kvm/include/linux/kvm.h b/kvm/include/linux/kvm.h
index af6d592..ff1025d 100644
--- a/kvm/include/linux/kvm.h
+++ b/kvm/include/linux/kvm.h
@@ -468,6 +468,7 @@ struct kvm_trace_rec {
 #endif
 #define KVM_CAP_PIT2 33
 #define KVM_CAP_PIT_STATE2 35
+#define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -529,6 +530,7 @@ struct kvm_x86_mce {
 #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
struct kvm_userspace_memory_region)
 #define KVM_SET_TSS_ADDR  _IO(KVMIO, 0x47)
+#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
 /*
  * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
  * a vcpu fd.
diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index df40aae..d2c8abe 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -73,6 +73,47 @@ static int kvm_init_tss(kvm_context_t kvm)
return 0;
 }
 
+static int kvm_set_identity_map_addr(kvm_context_t kvm, unsigned long addr)
+{
+#ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR
+   int r;
+
+   r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_IDENTITY_MAP_ADDR);
+   if (r  0) {
+   r = ioctl(kvm-vm_fd, KVM_SET_IDENTITY_MAP_ADDR, addr);
+   if (r == -1) {
+   fprintf(stderr, kvm_set_identity_map_addr: %m\n);
+   return -errno;
+   }
+   return 0;
+   }
+#endif
+   return -ENOSYS;
+}
+
+static int kvm_init_identity_map_page(kvm_context_t kvm)
+{
+#ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR
+   int r;
+
+   r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_IDENTITY_MAP_ADDR);
+   if (r  0) {
+   /*
+* this address is 4 pages before the bios, and the bios should 
present
+* as unavaible memory
+*/
+   r = kvm_set_identity_map_addr(kvm, 0xfffbc000);
+   if (r  0) {
+   fprintf(stderr, kvm_init_identity_map_page: 
+   unable to set identity mapping addr\n);
+   return r;
+   }
+
+   }
+#endif
+   return 0;
+}
+
 static int kvm_create_pit(kvm_context_t kvm)
 {
 #ifdef KVM_CAP_PIT
@@ -104,6 +145,10 @@ int kvm_arch_create(kvm_context_t kvm, unsigned long 
phys_mem_bytes,
if (r  0)
return r;
 
+   r = kvm_init_identity_map_page(kvm);
+   if (r  0)
+   return r;
+
r = kvm_create_pit(kvm);
if (r  0)
return r;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] reuse upstream breakpoint code

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@redhat.com

Drop KVM_UPSTREAM around functions we intend to reuse.
This allow us to share code in kvm-all.c, that is equal in qemu-kvm.c

Signed-off-by: Glauber Costa glom...@redhat.com
CC: Jan Kiszka jan.kis...@siemens.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/kvm-all.c b/kvm-all.c
index e42b1f6..67908a7 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -873,6 +873,8 @@ void kvm_setup_guest_memory(void *start, size_t size)
 }
 }
 
+#endif /* KVM_UPSTREAM */
+
 #ifdef KVM_CAP_SET_GUEST_DEBUG
 struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
  target_ulong pc)
@@ -891,6 +893,7 @@ int kvm_sw_breakpoints_active(CPUState *env)
 return !TAILQ_EMPTY(env-kvm_state-kvm_sw_breakpoints);
 }
 
+#ifdef KVM_UPSTREAM
 int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap)
 {
 struct kvm_guest_debug dbg;
@@ -904,6 +907,7 @@ int kvm_update_guest_debug(CPUState *env, unsigned long 
reinject_trap)
 
 return kvm_vcpu_ioctl(env, KVM_SET_GUEST_DEBUG, dbg);
 }
+#endif
 
 int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
   target_ulong len, int type)
@@ -1028,6 +1032,5 @@ void kvm_remove_all_breakpoints(CPUState *current_env)
 {
 }
 #endif /* !KVM_CAP_SET_GUEST_DEBUG */
-#endif
 
 #include qemu-kvm.c
diff --git a/kvm.h b/kvm.h
index e9a43e2..0191752 100644
--- a/kvm.h
+++ b/kvm.h
@@ -16,6 +16,7 @@
 
 #include config.h
 #include sys-queue.h
+#include qemu-kvm.h
 
 #ifdef KVM_UPSTREAM
 
diff --git a/qemu-kvm.c b/qemu-kvm.c
index b0661b6..355adf4 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -2444,18 +2444,6 @@ int kvm_qemu_init_env(CPUState *cenv)
 
 #ifdef KVM_CAP_SET_GUEST_DEBUG
 
-struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env,
- target_ulong pc)
-{
-struct kvm_sw_breakpoint *bp;
-
-TAILQ_FOREACH(bp, env-kvm_state-kvm_sw_breakpoints, entry) {
-   if (bp-pc == pc)
-   return bp;
-}
-return NULL;
-}
-
 struct kvm_set_guest_debug_data {
 struct kvm_guest_debug dbg;
 int err;
@@ -2484,133 +2472,7 @@ int kvm_update_guest_debug(CPUState *env, unsigned long 
reinject_trap)
 return data.err;
 }
 
-int kvm_sw_breakpoints_active(CPUState *env)
-{
-return !TAILQ_EMPTY(env-kvm_state-kvm_sw_breakpoints);
-}
-
-int kvm_insert_breakpoint(CPUState *current_env, target_ulong addr,
-  target_ulong len, int type)
-{
-struct kvm_sw_breakpoint *bp;
-CPUState *env;
-int err;
-
-if (type == GDB_BREAKPOINT_SW) {
-   bp = kvm_find_sw_breakpoint(current_env, addr);
-   if (bp) {
-   bp-use_count++;
-   return 0;
-   }
-
-   bp = qemu_malloc(sizeof(struct kvm_sw_breakpoint));
-   if (!bp)
-   return -ENOMEM;
-
-   bp-pc = addr;
-   bp-use_count = 1;
-   err = kvm_arch_insert_sw_breakpoint(current_env, bp);
-   if (err) {
-   free(bp);
-   return err;
-   }
-
-TAILQ_INSERT_HEAD(current_env-kvm_state-kvm_sw_breakpoints,
-  bp, entry);
-} else {
-   err = kvm_arch_insert_hw_breakpoint(addr, len, type);
-   if (err)
-   return err;
-}
-
-for (env = first_cpu; env != NULL; env = env-next_cpu) {
-   err = kvm_update_guest_debug(env, 0);
-   if (err)
-   return err;
-}
-return 0;
-}
-
-int kvm_remove_breakpoint(CPUState *current_env, target_ulong addr,
-  target_ulong len, int type)
-{
-struct kvm_sw_breakpoint *bp;
-CPUState *env;
-int err;
-
-if (type == GDB_BREAKPOINT_SW) {
-   bp = kvm_find_sw_breakpoint(current_env, addr);
-   if (!bp)
-   return -ENOENT;
-
-   if (bp-use_count  1) {
-   bp-use_count--;
-   return 0;
-   }
-
-   err = kvm_arch_remove_sw_breakpoint(current_env, bp);
-   if (err)
-   return err;
-
-   TAILQ_REMOVE(current_env-kvm_state-kvm_sw_breakpoints, bp, entry);
-   qemu_free(bp);
-} else {
-   err = kvm_arch_remove_hw_breakpoint(addr, len, type);
-   if (err)
-   return err;
-}
-
-for (env = first_cpu; env != NULL; env = env-next_cpu) {
-   err = kvm_update_guest_debug(env, 0);
-   if (err)
-   return err;
-}
-return 0;
-}
-
-void kvm_remove_all_breakpoints(CPUState *current_env)
-{
-struct kvm_sw_breakpoint *bp, *next;
-CPUState *env;
-
-TAILQ_FOREACH_SAFE(bp, current_env-kvm_state-kvm_sw_breakpoints, entry, 
next) {
-if (kvm_arch_remove_sw_breakpoint(current_env, bp) != 0) {
-/* Try harder to find a CPU that currently sees the breakpoint. */
-for (env = first_cpu; env != NULL; env = env-next_cpu) {
-if (kvm_arch_remove_sw_breakpoint(env, bp) == 0)
-break;
-}
-}
-}
-

[COMMIT master] virtio-net: replace custom io thread notify with qemu one

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@redhat.com

replace qemu_kvm_notify_work() with qemu_notify_event(), that ends
up calling it anyway.

Signed-off-by: Glauber Costa glom...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index bda2397..75c9695 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -344,12 +344,9 @@ static void virtio_net_handle_rx(VirtIODevice *vdev, 
VirtQueue *vq)
 
 qemu_flush_queued_packets(n-vc);
 
-#ifdef CONFIG_KVM
 /* We now have RX buffers, signal to the IO thread to break out of the
select to re-poll the tap file descriptor */
-if (kvm_enabled())
-qemu_kvm_notify_work();
-#endif
+qemu_notify_event();
 }
 
 static int do_virtio_net_can_receive(VirtIONet *n, int bufsize)
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] fix segfault with -no-kvm

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@redhat.com

Our PIT implementation calls qemu_kvm_pit_in_kernel without
checking for kvm_enabled() as does everybody else. It will make it
dereference kvm_context pointer wich will be NULL.

Signed-off-by: Glauber Costa glom...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/hw/i8254.c b/hw/i8254.c
index fd0bdfe..34a716c 100644
--- a/hw/i8254.c
+++ b/hw/i8254.c
@@ -478,7 +478,7 @@ void hpet_disable_pit(void)
 {
 PITChannelState *s = pit_state.channels[0];
 
-if (qemu_kvm_pit_in_kernel()) {
+if (kvm_enabled()  qemu_kvm_pit_in_kernel()) {
 if (qemu_kvm_has_pit_state2()) {
 kvm_hpet_disable_kpit();
 } else {
@@ -502,7 +502,7 @@ void hpet_enable_pit(void)
 PITState *pit = pit_state;
 PITChannelState *s = pit-channels[0];
 
-if (qemu_kvm_pit_in_kernel()) {
+if (kvm_enabled()  qemu_kvm_pit_in_kernel()) {
 if (qemu_kvm_has_pit_state2()) {
 kvm_hpet_enable_kpit();
 } else {
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] Add MCE simulation support to qemu/kvm

2009-07-26 Thread Avi Kivity
From: Huang Ying ying.hu...@intel.com

KVM ioctls are used to initialize MCE simulation and inject MCE. The
real MCE simulation is implemented in Linux kernel. The Kernel part
has been merged.

ChangeLog:

v7:

- Re-based on qemu-kvm.git/next branch

v6:

- Re-based on latest qemu-kvm.git

v5:

- Re-based on latest qemu-kvm.git

v3:

- Re-based on qemu/tcg MCE support patch

v2:

- Use new kernel MCE capability exportion interface.

Signed-off-by: Huang Ying ying.hu...@intel.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/kvm/include/linux/kvm.h b/kvm/include/linux/kvm.h
index 790601d..af6d592 100644
--- a/kvm/include/linux/kvm.h
+++ b/kvm/include/linux/kvm.h
@@ -463,6 +463,9 @@ struct kvm_trace_rec {
 #define KVM_CAP_ASSIGN_DEV_IRQ 29
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
+#ifdef __KVM_HAVE_MCE
+#define KVM_CAP_MCE 31
+#endif
 #define KVM_CAP_PIT2 33
 #define KVM_CAP_PIT_STATE2 35
 
@@ -504,6 +507,19 @@ struct kvm_irq_routing {
 
 #endif
 
+#ifdef KVM_CAP_MCE
+/* x86 MCE */
+struct kvm_x86_mce {
+   __u64 status;
+   __u64 addr;
+   __u64 misc;
+   __u64 mcg_status;
+   __u8 bank;
+   __u8 pad1[7];
+   __u64 pad2[3];
+};
+#endif
+
 /*
  * ioctls for VM fds
  */
@@ -592,6 +608,10 @@ struct kvm_irq_routing {
 #define KVM_NMI   _IO(KVMIO,  0x9a)
 /* Available with KVM_CAP_SET_GUEST_DEBUG */
 #define KVM_SET_GUEST_DEBUG   _IOW(KVMIO,  0x9b, struct kvm_guest_debug)
+/* MCE for x86 */
+#define KVM_X86_SETUP_MCE _IOW(KVMIO,  0x9c, __u64)
+#define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO,  0x9d, __u64)
+#define KVM_X86_SET_MCE   _IOW(KVMIO,  0x9e, struct kvm_x86_mce)
 
 /*
  * Deprecated interfaces
diff --git a/kvm/include/x86/asm/kvm.h b/kvm/include/x86/asm/kvm.h
index 0c6bf8a..411063c 100644
--- a/kvm/include/x86/asm/kvm.h
+++ b/kvm/include/x86/asm/kvm.h
@@ -57,6 +57,7 @@
 #define __KVM_HAVE_USER_NMI
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_MSIX
+#define __KVM_HAVE_MCE
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 350f272..df40aae 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -432,6 +432,39 @@ int kvm_set_msrs(kvm_vcpu_context_t vcpu, struct 
kvm_msr_entry *msrs, int n)
 return r;
 }
 
+int kvm_get_mce_cap_supported(kvm_context_t kvm, uint64_t *mce_cap,
+  int *max_banks)
+{
+#ifdef KVM_CAP_MCE
+int r;
+
+r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_MCE);
+if (r  0) {
+*max_banks = r;
+return ioctl(kvm-fd, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap);
+}
+#endif
+return -ENOSYS;
+}
+
+int kvm_setup_mce(kvm_vcpu_context_t vcpu, uint64_t *mcg_cap)
+{
+#ifdef KVM_CAP_MCE
+return ioctl(vcpu-fd, KVM_X86_SETUP_MCE, mcg_cap);
+#else
+return -ENOSYS;
+#endif
+}
+
+int kvm_set_mce(kvm_vcpu_context_t vcpu, struct kvm_x86_mce *m)
+{
+#ifdef KVM_CAP_MCE
+return ioctl(vcpu-fd, KVM_X86_SET_MCE, m);
+#else
+return -ENOSYS;
+#endif
+}
+
 static void print_seg(FILE *file, const char *name, struct kvm_segment *seg)
 {
fprintf(stderr,
@@ -1285,6 +1318,28 @@ int kvm_arch_qemu_init_env(CPUState *cenv)
 
 kvm_setup_cpuid2(cenv-kvm_cpu_state.vcpu_ctx, cpuid_nent, cpuid_ent);
 
+#ifdef KVM_CAP_MCE
+if (((cenv-cpuid_version  8)0xF) = 6
+ (cenv-cpuid_features(CPUID_MCE|CPUID_MCA)) == 
(CPUID_MCE|CPUID_MCA)
+ kvm_check_extension(kvm_context, KVM_CAP_MCE)  0) {
+uint64_t mcg_cap;
+int banks;
+
+if (kvm_get_mce_cap_supported(kvm_context, mcg_cap, banks))
+perror(kvm_get_mce_cap_supported FAILED);
+else {
+if (banks  MCE_BANKS_DEF)
+banks = MCE_BANKS_DEF;
+mcg_cap = MCE_CAP_DEF;
+mcg_cap |= banks;
+if (kvm_setup_mce(cenv-kvm_cpu_state.vcpu_ctx, mcg_cap))
+perror(kvm_setup_mce FAILED);
+else
+cenv-mcg_cap = mcg_cap;
+}
+}
+#endif
+
 return 0;
 }
 
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 3c892e6..c13ecba 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -2703,3 +2703,43 @@ int kvm_set_boot_cpu_id(uint32_t id)
 {
return kvm_set_boot_vcpu_id(kvm_context, id);
 }
+
+#ifdef TARGET_I386
+#ifdef KVM_CAP_MCE
+struct kvm_x86_mce_data
+{
+CPUState *env;
+struct kvm_x86_mce *mce;
+};
+
+static void kvm_do_inject_x86_mce(void *_data)
+{
+struct kvm_x86_mce_data *data = _data;
+int r;
+
+r = kvm_set_mce(data-env-kvm_cpu_state.vcpu_ctx, data-mce);
+if (r  0)
+perror(kvm_set_mce FAILED);
+}
+#endif
+
+void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
+uint64_t mcg_status, uint64_t addr, uint64_t misc)
+{
+#ifdef KVM_CAP_MCE
+struct kvm_x86_mce mce = {
+.bank = bank,
+.status = status,
+.mcg_status = mcg_status,
+.addr = addr,

[COMMIT master] remove kvm types from handle unhandled

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@redhat.com

I'm in an ongoing process of not using kvm-specific types in function
declarations. handle_unhandled() is the first victim. Since we don't
really use this data, but just the reason, remove them entirely.

Signed-off-by: Glauber Costa glom...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm.c b/qemu-kvm.c
index c13ecba..2484bd9 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -176,8 +176,7 @@ int kvm_mmio_write(void *opaque, uint64_t addr, uint8_t 
*data, int len)
return 0;
 }
 
-static int handle_unhandled(kvm_context_t kvm, kvm_vcpu_context_t vcpu,
-uint64_t reason)
+static int handle_unhandled(uint64_t reason)
 {
 fprintf(stderr, kvm: unhandled exit %PRIx64\n, reason);
 return -EINVAL;
@@ -1085,12 +1084,10 @@ again:
if (1) {
switch (run-exit_reason) {
case KVM_EXIT_UNKNOWN:
-   r = handle_unhandled(kvm, vcpu,
-   run-hw.hardware_exit_reason);
+   r = handle_unhandled(run-hw.hardware_exit_reason);
break;
case KVM_EXIT_FAIL_ENTRY:
-   r = handle_unhandled(kvm, vcpu,
-   run-fail_entry.hardware_entry_failure_reason);
+   r = 
handle_unhandled(run-fail_entry.hardware_entry_failure_reason);
break;
case KVM_EXIT_EXCEPTION:
fprintf(stderr, exception %d (%x)\n,
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] remove env-exit_request usage from qemu-kvm.c

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@redhat.com

Today I found a very catastrophic regression: I cannot run my mission
critical servers running RHL7.1 anymore. This is a total disaster.

Fortunately, I was able to isolate the commit that caused it:
commit bb598da496c040d42dde564bd8ace181be52293e
Author: Glauber Costa glom...@redhat.com
Date:   Mon Jul 6 16:12:52 2009 -0400

This guy is certainly stupid, and deserves punishment. It means I'll
be writting code using emacs for the next week.

Signed-off-by: Glauber Costa glom...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm.c b/qemu-kvm.c
index cb85dbc..edd400e 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -986,8 +986,6 @@ int pre_kvm_run(kvm_context_t kvm, CPUState *env)
 {
 kvm_arch_pre_kvm_run(kvm-opaque, env);
 
-if (env-exit_request)
-return 1;
 pthread_mutex_unlock(qemu_mutex);
 return 0;
 }
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] remove created from kvm_state

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@redhat.com

Again, CPUState has it, and it means exactly that.

Signed-off-by: Glauber Costa glom...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/cpu-defs.h b/cpu-defs.h
index fce366f..ce9f96a 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -142,7 +142,6 @@ struct qemu_work_item;
 struct KVMCPUState {
 pthread_t thread;
 int signalled;
-int created;
 void *vcpu_ctx;
 struct qemu_work_item *queued_work_first, *queued_work_last;
 };
diff --git a/qemu-kvm.c b/qemu-kvm.c
index fd28b39..cb85dbc 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -1663,12 +1663,12 @@ void kvm_update_interrupt_request(CPUState *env)
 int signal = 0;
 
 if (env) {
-if (!current_env || !current_env-kvm_cpu_state.created)
+if (!current_env || !current_env-created)
 signal = 1;
 /*
  * Testing for created here is really redundant
  */
-if (current_env  current_env-kvm_cpu_state.created 
+if (current_env  current_env-created 
 env != current_env  !env-kvm_cpu_state.signalled)
 signal = 1;
 
@@ -1948,7 +1948,7 @@ static void *ap_main_loop(void *_env)
 
 /* signal VCPU creation */
 pthread_mutex_lock(qemu_mutex);
-current_env-kvm_cpu_state.created = 1;
+current_env-created = 1;
 pthread_cond_signal(qemu_vcpu_cond);
 
 /* and wait for machine initialization */
@@ -1964,13 +1964,13 @@ void kvm_init_vcpu(CPUState *env)
 {
 pthread_create(env-kvm_cpu_state.thread, NULL, ap_main_loop, env);
 
-while (env-kvm_cpu_state.created == 0)
+while (env-created == 0)
qemu_cond_wait(qemu_vcpu_cond);
 }
 
 int kvm_vcpu_inited(CPUState *env)
 {
-return env-kvm_cpu_state.created;
+return env-created;
 }
 
 #ifdef TARGET_I386
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] remove kvm_in* functions

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@redhat.com

We can use plain qemu's here, and save a couple of lines/complexity.
I'm leaving outb for later, because the SMM thing makes it a little bit
less trivial.

Signed-off-by: Glauber Costa glom...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm.c b/qemu-kvm.c
index e200dea..dce9d4e 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -97,24 +97,6 @@ static int kvm_debug(void *opaque, void *data,
 }
 #endif
 
-static int kvm_inb(void *opaque, uint16_t addr, uint8_t *data)
-{
-*data = cpu_inb(0, addr);
-return 0;
-}
-
-static int kvm_inw(void *opaque, uint16_t addr, uint16_t *data)
-{
-*data = cpu_inw(0, addr);
-return 0;
-}
-
-static int kvm_inl(void *opaque, uint16_t addr, uint32_t *data)
-{
-*data = cpu_inl(0, addr);
-return 0;
-}
-
 #define PM_IO_BASE 0xb000
 
 static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
@@ -839,15 +821,16 @@ static int handle_io(kvm_vcpu_context_t vcpu)
for (i = 0; i  run-io.count; ++i) {
switch (run-io.direction) {
case KVM_EXIT_IO_IN:
+   r = 0;
switch (run-io.size) {
case 1:
-   r = kvm_inb(kvm-opaque, addr, p);
+   *(uint8_t *)p = cpu_inb(kvm-opaque, addr);
break;
case 2:
-   r = kvm_inw(kvm-opaque, addr, p);
+   *(uint16_t *)p = cpu_inw(kvm-opaque, addr);
break;
case 4:
-   r = kvm_inl(kvm-opaque, addr, p);
+   *(uint32_t *)p = cpu_inl(kvm-opaque, addr);
break;
default:
fprintf(stderr, bad I/O size %d\n, 
run-io.size);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] remove kvm_specific kvm_out* functions

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@redhat.com

As example of what was already done with inb.
This is a little bit more tricky, because of SMM, but those
bits are handled directly in apic anyway.

Signed-off-by: Glauber Costa glom...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm.c b/qemu-kvm.c
index 9d550d3..5e7dc0a 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -97,55 +97,6 @@ static int kvm_debug(void *opaque, void *data,
 }
 #endif
 
-#define PM_IO_BASE 0xb000
-
-static int kvm_outb(void *opaque, uint16_t addr, uint8_t data)
-{
-if (addr == 0xb2) {
-   switch (data) {
-   case 0: {
-   cpu_outb(0, 0xb3, 0);
-   break;
-   }
-   case 0xf0: {
-   unsigned x;
-
-   /* enable acpi */
-   x = cpu_inw(0, PM_IO_BASE + 4);
-   x = ~1;
-   cpu_outw(0, PM_IO_BASE + 4, x);
-   break;
-   }
-   case 0xf1: {
-   unsigned x;
-
-   /* enable acpi */
-   x = cpu_inw(0, PM_IO_BASE + 4);
-   x |= 1;
-   cpu_outw(0, PM_IO_BASE + 4, x);
-   break;
-   }
-   default:
-   break;
-   }
-   return 0;
-}
-cpu_outb(0, addr, data);
-return 0;
-}
-
-static int kvm_outw(void *opaque, uint16_t addr, uint16_t data)
-{
-cpu_outw(0, addr, data);
-return 0;
-}
-
-static int kvm_outl(void *opaque, uint16_t addr, uint32_t data)
-{
-cpu_outl(0, addr, data);
-return 0;
-}
-
 int kvm_mmio_read(void *opaque, uint64_t addr, uint8_t *data, int len)
 {
cpu_physical_memory_rw(addr, data, len, 0);
@@ -816,14 +767,12 @@ static int handle_io(kvm_vcpu_context_t vcpu)
struct kvm_run *run = vcpu-run;
kvm_context_t kvm = vcpu-kvm;
uint16_t addr = run-io.port;
-   int r;
int i;
void *p = (void *)run + run-io.data_offset;
 
for (i = 0; i  run-io.count; ++i) {
switch (run-io.direction) {
case KVM_EXIT_IO_IN:
-   r = 0;
switch (run-io.size) {
case 1:
*(uint8_t *)p = cpu_inb(kvm-opaque, addr);
@@ -842,16 +791,13 @@ static int handle_io(kvm_vcpu_context_t vcpu)
case KVM_EXIT_IO_OUT:
switch (run-io.size) {
case 1:
-   r = kvm_outb(kvm-opaque, addr,
-*(uint8_t *)p);
+cpu_outb(kvm-opaque, addr, *(uint8_t *)p);
break;
case 2:
-   r = kvm_outw(kvm-opaque, addr,
-*(uint16_t *)p);
+   cpu_outw(kvm-opaque, addr, *(uint16_t *)p);
break;
case 4:
-   r = kvm_outl(kvm-opaque, addr,
-*(uint32_t *)p);
+   cpu_outl(kvm-opaque, addr, *(uint32_t *)p);
break;
default:
fprintf(stderr, bad I/O size %d\n, 
run-io.size);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] reuse kvm_ioctl

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@redhat.com

Start using kvm_ioctl's code.
For type safety, delete fd from kvm_context entirely, so the
compiler can play along with us helping to detect errors I might
have made.

Signed-off-by: Glauber Costa glom...@redhat.com

Also, we were slightly different from qemu upstream in handling error
code from ioctl, since we were always testing for -1, while kvm_vm_ioctl
returns -errno. We already did this in most of the call sites, so
this patch has the big advantage of simplifying call sites.
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/kvm-all.c b/kvm-all.c
index 9373d99..0ec6475 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -793,6 +793,7 @@ void kvm_set_phys_mem(target_phys_addr_t start_addr,
 }
 }
 
+#endif
 int kvm_ioctl(KVMState *s, int type, ...)
 {
 int ret;
@@ -809,7 +810,6 @@ int kvm_ioctl(KVMState *s, int type, ...)
 
 return ret;
 }
-#endif
 
 int kvm_vm_ioctl(KVMState *s, int type, ...)
 {
diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 58ec1f2..428e831 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -38,7 +38,7 @@ int kvm_set_tss_addr(kvm_context_t kvm, unsigned long addr)
 #ifdef KVM_CAP_SET_TSS_ADDR
int r;
 
-   r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
+   r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
if (r  0) {
r = kvm_vm_ioctl(kvm_state, KVM_SET_TSS_ADDR, addr);
if (r  0) {
@@ -56,7 +56,7 @@ static int kvm_init_tss(kvm_context_t kvm)
 #ifdef KVM_CAP_SET_TSS_ADDR
int r;
 
-   r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
+   r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
if (r  0) {
/*
 * this address is 3 pages before the bios, and the bios should 
present
@@ -78,7 +78,7 @@ static int kvm_set_identity_map_addr(kvm_context_t kvm, 
unsigned long addr)
 #ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR
int r;
 
-   r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_IDENTITY_MAP_ADDR);
+   r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, 
KVM_CAP_SET_IDENTITY_MAP_ADDR);
if (r  0) {
r = kvm_vm_ioctl(kvm_state, KVM_SET_IDENTITY_MAP_ADDR, addr);
if (r == -1) {
@@ -96,7 +96,7 @@ static int kvm_init_identity_map_page(kvm_context_t kvm)
 #ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR
int r;
 
-   r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_IDENTITY_MAP_ADDR);
+   r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, 
KVM_CAP_SET_IDENTITY_MAP_ADDR);
if (r  0) {
/*
 * this address is 4 pages before the bios, and the bios should 
present
@@ -121,7 +121,7 @@ static int kvm_create_pit(kvm_context_t kvm)
 
kvm-pit_in_kernel = 0;
if (!kvm-no_pit_creation) {
-   r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_PIT);
+   r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_PIT);
if (r  0) {
r = kvm_vm_ioctl(kvm_state, KVM_CREATE_PIT);
if (r = 0)
@@ -401,11 +401,11 @@ void kvm_show_code(kvm_vcpu_context_t vcpu)
 struct kvm_msr_list *kvm_get_msr_list(kvm_context_t kvm)
 {
struct kvm_msr_list sizer, *msrs;
-   int r, e;
+   int r;
 
sizer.nmsrs = 0;
-   r = ioctl(kvm-fd, KVM_GET_MSR_INDEX_LIST, sizer);
-   if (r == -1  errno != E2BIG)
+   r = kvm_ioctl(kvm_state, KVM_GET_MSR_INDEX_LIST, sizer);
+   if (r  0  r != -E2BIG)
return NULL;
/* Old kernel modules had a bug and could write beyond the provided
   memory. Allocate at least a safe amount of 1K. */
@@ -413,11 +413,10 @@ struct kvm_msr_list *kvm_get_msr_list(kvm_context_t kvm)
   sizer.nmsrs * sizeof(*msrs-indices)));
 
msrs-nmsrs = sizer.nmsrs;
-   r = ioctl(kvm-fd, KVM_GET_MSR_INDEX_LIST, msrs);
-   if (r == -1) {
-   e = errno;
+   r = kvm_ioctl(kvm_state, KVM_GET_MSR_INDEX_LIST, msrs);
+   if (r  0) {
free(msrs);
-   errno = e;
+   errno = r;
return NULL;
}
return msrs;
@@ -458,10 +457,10 @@ int kvm_get_mce_cap_supported(kvm_context_t kvm, uint64_t 
*mce_cap,
 #ifdef KVM_CAP_MCE
 int r;
 
-r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_MCE);
+r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MCE);
 if (r  0) {
 *max_banks = r;
-return ioctl(kvm-fd, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap);
+return kvm_ioctl(kvm_state, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap);
 }
 #endif
 return -ENOSYS;
@@ -599,7 +598,7 @@ int kvm_set_shadow_pages(kvm_context_t kvm, unsigned int 
nrshadow_pages)
 #ifdef KVM_CAP_MMU_SHADOW_CACHE_CONTROL
int r;
 
-   r = ioctl(kvm-fd, KVM_CHECK_EXTENSION,
+   r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION,
  

[COMMIT master] check extension

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@redhat.com

use upstream check_extension code

Signed-off-by: Glauber Costa glom...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 88c3baf..75db546 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -639,7 +639,7 @@ static int assign_device(AssignedDevInfo *adev)
 /* We always enable the IOMMU if present
  * (or when not disabled on the command line)
  */
-r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
+r = kvm_check_extension(kvm_state, KVM_CAP_IOMMU);
 if (r  !adev-disable_iommu)
assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
 #endif
diff --git a/kvm-all.c b/kvm-all.c
index 0ec6475..b4b5a35 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -383,6 +383,7 @@ int kvm_uncoalesce_mmio_region(target_phys_addr_t start, 
ram_addr_t size)
 return ret;
 }
 
+#endif
 int kvm_check_extension(KVMState *s, unsigned int extension)
 {
 int ret;
@@ -394,6 +395,7 @@ int kvm_check_extension(KVMState *s, unsigned int extension)
 
 return ret;
 }
+#ifdef KVM_UPSTREAM
 
 int kvm_init(int smp_cpus)
 {
diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 428e831..e4ae582 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -348,7 +348,7 @@ int kvm_has_pit_state2(kvm_context_t kvm)
int r = 0;
 
 #ifdef KVM_CAP_PIT_STATE2
-   r = kvm_check_extension(kvm, KVM_CAP_PIT_STATE2);
+   r = kvm_check_extension(kvm_state, KVM_CAP_PIT_STATE2);
 #endif
return r;
 }
@@ -702,7 +702,7 @@ uint32_t kvm_get_supported_cpuid(kvm_context_t kvm, 
uint32_t function, int reg)
uint32_t ret = 0;
uint32_t cpuid_1_edx;
 
-   if (!kvm_check_extension(kvm, KVM_CAP_EXT_CPUID)) {
+   if (!kvm_check_extension(kvm_state, KVM_CAP_EXT_CPUID)) {
return -1U;
}
 
@@ -1234,7 +1234,7 @@ static int get_para_features(kvm_context_t kvm_context)
int i, features = 0;
 
for (i = 0; i  ARRAY_SIZE(para_features)-1; i++) {
-   if (kvm_check_extension(kvm_context, para_features[i].cap))
+   if (kvm_check_extension(kvm_state, para_features[i].cap))
features |= (1  para_features[i].feature);
}
 
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 98cfee0..e200dea 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -589,16 +589,6 @@ static int kvm_create_default_phys_mem(kvm_context_t kvm,
return -1;
 }
 
-int kvm_check_extension(kvm_context_t kvm, int ext)
-{
-   int ret;
-
-   ret = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, ext);
-   if (ret  0)
-   return ret;
-   return 0;
-}
-
 void kvm_create_irqchip(kvm_context_t kvm)
 {
int r;
@@ -1345,7 +1335,7 @@ int kvm_has_gsi_routing(kvm_context_t kvm)
 int r = 0;
 
 #ifdef KVM_CAP_IRQ_ROUTING
-r = kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING);
+r = kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
 #endif
 return r;
 }
@@ -1353,7 +1343,7 @@ int kvm_has_gsi_routing(kvm_context_t kvm)
 int kvm_get_gsi_count(kvm_context_t kvm)
 {
 #ifdef KVM_CAP_IRQ_ROUTING
-   return kvm_check_extension(kvm, KVM_CAP_IRQ_ROUTING);
+   return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
 #else
return -EINVAL;
 #endif
@@ -1606,7 +1596,7 @@ int kvm_irqfd(kvm_context_t kvm, int gsi, int flags)
int r;
int fd;
 
-   if (!kvm_check_extension(kvm, KVM_CAP_IRQFD))
+   if (!kvm_check_extension(kvm_state, KVM_CAP_IRQFD))
return -ENOENT;
 
fd = eventfd(0, 0);
@@ -2381,7 +2371,7 @@ int kvm_setup_guest_memory(void *area, unsigned long size)
 
 int kvm_qemu_check_extension(int ext)
 {
-return kvm_check_extension(kvm_context, ext);
+return kvm_check_extension(kvm_state, ext);
 }
 
 int kvm_qemu_init_env(CPUState *cenv)
diff --git a/qemu-kvm.h b/qemu-kvm.h
index 8c9b72f..ec35f29 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -167,7 +167,6 @@ int kvm_create(kvm_context_t kvm,
   unsigned long phys_mem_bytes,
   void **phys_mem);
 int kvm_create_vm(kvm_context_t kvm);
-int kvm_check_extension(kvm_context_t kvm, int ext);
 void kvm_create_irqchip(kvm_context_t kvm);
 
 /*!
@@ -1198,5 +1197,6 @@ extern KVMState *kvm_state;
 
 int kvm_ioctl(KVMState *s, int type, ...);
 int kvm_vm_ioctl(KVMState *s, int type, ...);
+int kvm_check_extension(KVMState *s, unsigned int ext);
 
 #endif
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] embed kvm_create_context into kvm_init

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@redhat.com

There is no reason why kvm_create_context is placed outside kvm_init().
After we call kvm_init(), no extra initialization step should be necessary.

This patch folds kvm_create_context into it, simplifying vl.c code.

Signed-off-by: Glauber Costa glom...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm.c b/qemu-kvm.c
index edd400e..4d85993 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -400,6 +400,8 @@ int kvm_dirty_pages_log_reset(kvm_context_t kvm)
 }
 
 
+static int kvm_create_context(void);
+
 int kvm_init(int smp_cpus)
 {
int fd;
@@ -459,7 +461,7 @@ int kvm_init(int smp_cpus)
}
 
 pthread_mutex_lock(qemu_mutex);
-   return 0;
+return kvm_create_context();
 
  out_close:
close(fd);
@@ -2163,7 +2165,7 @@ int kvm_arch_init_irq_routing(void)
 }
 #endif
 
-int kvm_qemu_create_context(void)
+static int kvm_create_context()
 {
 int r;
 
diff --git a/qemu-kvm.h b/qemu-kvm.h
index 6a9be12..b186c9d 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -943,7 +943,6 @@ struct kvm_pit_state { };
 
 int kvm_main_loop(void);
 int kvm_qemu_init(void);
-int kvm_qemu_create_context(void);
 int kvm_init_ap(void);
 int kvm_vcpu_inited(CPUState *env);
 void kvm_load_registers(CPUState *env);
diff --git a/vl.c b/vl.c
index 939da1f..c09d801 100644
--- a/vl.c
+++ b/vl.c
@@ -5830,13 +5830,6 @@ int main(int argc, char **argv, char **envp)
 if (ram_size == 0)
 ram_size = DEFAULT_RAM_SIZE * 1024 * 1024;
 
-if (kvm_enabled()) {
-   if (kvm_qemu_create_context()  0) {
-   fprintf(stderr, Could not create KVM context\n);
-   exit(1);
-   }
-}
-
 #ifdef CONFIG_KQEMU
 /* FIXME: This is a nasty hack because kqemu can't cope with dynamic
guest ram allocation.  It needs to go away.  */
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] reuse env stop and stopped states

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@redhat.com

qemu CPUState already provides stop and stopped states. And they
mean exactly that. There is no need for us to provide our own.

Signed-off-by: Glauber Costa glom...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/cpu-defs.h b/cpu-defs.h
index 7570096..fce366f 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -142,8 +142,6 @@ struct qemu_work_item;
 struct KVMCPUState {
 pthread_t thread;
 int signalled;
-int stop;
-int stopped;
 int created;
 void *vcpu_ctx;
 struct qemu_work_item *queued_work_first, *queued_work_last;
diff --git a/qemu-kvm.c b/qemu-kvm.c
index dce9d4e..fd28b39 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -91,7 +91,7 @@ static int kvm_debug(void *opaque, void *data,
 
 if (handle) {
kvm_debug_cpu_requested = env;
-   env-kvm_cpu_state.stopped = 1;
+   env-stopped = 1;
 }
 return handle;
 }
@@ -963,7 +963,7 @@ int handle_halt(kvm_vcpu_context_t vcpu)
 int handle_shutdown(kvm_context_t kvm, CPUState *env)
 {
 /* stop the current vcpu from going back to guest mode */
-env-kvm_cpu_state.stopped = 1;
+env-stopped = 1;
 
 qemu_system_reset_request();
 return 1;
@@ -1748,7 +1748,7 @@ int kvm_cpu_exec(CPUState *env)
 
 static int is_cpu_stopped(CPUState *env)
 {
-return !vm_running || env-kvm_cpu_state.stopped;
+return !vm_running || env-stopped;
 }
 
 static void flush_queued_work(CPUState *env)
@@ -1794,9 +1794,9 @@ static void kvm_main_loop_wait(CPUState *env, int timeout)
 cpu_single_env = env;
 flush_queued_work(env);
 
-if (env-kvm_cpu_state.stop) {
-   env-kvm_cpu_state.stop = 0;
-   env-kvm_cpu_state.stopped = 1;
+if (env-stop) {
+   env-stop = 0;
+   env-stopped = 1;
pthread_cond_signal(qemu_pause_cond);
 }
 
@@ -1808,7 +1808,7 @@ static int all_threads_paused(void)
 CPUState *penv = first_cpu;
 
 while (penv) {
-if (penv-kvm_cpu_state.stop)
+if (penv-stop)
 return 0;
 penv = (CPUState *)penv-next_cpu;
 }
@@ -1822,11 +1822,11 @@ static void pause_all_threads(void)
 
 while (penv) {
 if (penv != cpu_single_env) {
-penv-kvm_cpu_state.stop = 1;
+penv-stop = 1;
 pthread_kill(penv-kvm_cpu_state.thread, SIG_IPI);
 } else {
-penv-kvm_cpu_state.stop = 0;
-penv-kvm_cpu_state.stopped = 1;
+penv-stop = 0;
+penv-stopped = 1;
 cpu_exit(penv);
 }
 penv = (CPUState *)penv-next_cpu;
@@ -1843,8 +1843,8 @@ static void resume_all_threads(void)
 assert(!cpu_single_env);
 
 while (penv) {
-penv-kvm_cpu_state.stop = 0;
-penv-kvm_cpu_state.stopped = 0;
+penv-stop = 0;
+penv-stopped = 0;
 pthread_kill(penv-kvm_cpu_state.thread, SIG_IPI);
 penv = (CPUState *)penv-next_cpu;
 }
@@ -2609,12 +2609,6 @@ int kvm_log_stop(target_phys_addr_t phys_addr, 
target_phys_addr_t len)
 return 0;
 }
 
-void qemu_kvm_cpu_stop(CPUState *env)
-{
-if (kvm_enabled())
-env-kvm_cpu_state.stopped = 1;
-}
-
 int kvm_set_boot_cpu_id(uint32_t id)
 {
return kvm_set_boot_vcpu_id(kvm_context, id);
diff --git a/vl.c b/vl.c
index b3df596..6ef7690 100644
--- a/vl.c
+++ b/vl.c
@@ -3553,7 +3553,7 @@ void qemu_system_reset_request(void)
 reset_requested = 1;
 }
 if (cpu_single_env) {
-qemu_kvm_cpu_stop(cpu_single_env);
+cpu_single_env-stopped = 1;
 }
 qemu_notify_event();
 }
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] qemu-kvm: routing table update thinko fix

2009-07-26 Thread Avi Kivity
From: Michael S. Tsirkin m...@redhat.com

When updating irq routing entries, we should memcpy the new entry
over the old one. Current code gets it wrong, and only works because
it's uncommon for guests to change tables.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm.c b/qemu-kvm.c
index 4d85993..9d550d3 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -1480,7 +1480,7 @@ int kvm_update_routing_entry(kvm_context_t kvm,
 case KVM_IRQ_ROUTING_IRQCHIP:
 if (e-u.irqchip.irqchip == entry-u.irqchip.irqchip 
 e-u.irqchip.pin == entry-u.irqchip.pin) {
-memcpy(e-u.irqchip, entry-u.irqchip, sizeof e-u.irqchip);
+memcpy(e-u.irqchip, newentry-u.irqchip, sizeof 
e-u.irqchip);
 return 0;
 }
 break;
@@ -1488,7 +1488,7 @@ int kvm_update_routing_entry(kvm_context_t kvm,
 if (e-u.msi.address_lo == entry-u.msi.address_lo 
 e-u.msi.address_hi == entry-u.msi.address_hi 
 e-u.msi.data == entry-u.msi.data) {
-memcpy(e-u.msi, entry-u.msi, sizeof e-u.msi);
+memcpy(e-u.msi, newentry-u.msi, sizeof e-u.msi);
 return 0;
 }
 break;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] hpet_reset: make it similar to upstream

2009-07-26 Thread Avi Kivity
From: Marcelo Tosatti mtosa...@redhat.com

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/hw/hpet.c b/hw/hpet.c
index c1837ac..2bb2057 100644
--- a/hw/hpet.c
+++ b/hw/hpet.c
@@ -560,15 +560,13 @@ static void hpet_reset(void *opaque) {
 s-capability = 0x8086a201ULL;
 s-capability |= ((HPET_CLK_PERIOD)  32);
 s-config = 0ULL;
-if (count  0) {
+if (count  0)
 /* we don't enable pit when hpet_reset is first called (by hpet_init)
  * because hpet is taking over for pit here. On subsequent invocations,
  * hpet_reset is called due to system reset. At this point control must
  * be returned to pit until SW reenables hpet.
  */
 hpet_enable_pit();
-dprintf(qemu: hpet enabled pit\n);
-}
 count = 1;
 }
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] remove custom qemu_select, use upstream hooks

2009-07-26 Thread Avi Kivity
From: Marcelo Tosatti mtosa...@redhat.com

Use upstream hooks around select() to lock/unlock global mutex.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm.h b/qemu-kvm.h
index ec35f29..6a9be12 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -1117,13 +1117,13 @@ static inline void kvm_load_tsc(CPUState *env) {}
 void kvm_mutex_unlock(void);
 void kvm_mutex_lock(void);
 
-static inline void kvm_sleep_begin(void)
+static inline void qemu_mutex_unlock_iothread(void)
 {
 if (kvm_enabled())
kvm_mutex_unlock();
 }
 
-static inline void kvm_sleep_end(void)
+static inline void qemu_mutex_lock_iothread(void)
 {
 if (kvm_enabled())
kvm_mutex_lock();
diff --git a/vl.c b/vl.c
index 6ef7690..0ee2908 100644
--- a/vl.c
+++ b/vl.c
@@ -289,24 +289,6 @@ static QEMUTimer *nographic_timer;
 
 uint8_t qemu_uuid[16];
 
-static int qemu_select(int max_fd, fd_set *rfds, fd_set *wfds, fd_set *xfds,
-  struct timeval *tv)
-{
-int ret;
-
-/* KVM holds a mutex while QEMU code is running, we need hooks to
-   release the mutex whenever QEMU code sleeps. */
-
-kvm_sleep_begin();
-
-ret = select(max_fd, rfds, wfds, xfds, tv);
-
-kvm_sleep_end();
-
-return ret;
-}
-
-
 /***/
 /* x86 ISA bus support */
 
@@ -3714,8 +3696,10 @@ void qemu_notify_event(void)
  }
 }
 
+#ifdef KVM_UPSTREAM
 #define qemu_mutex_lock_iothread() do { } while (0)
 #define qemu_mutex_unlock_iothread() do { } while (0)
+#endif
 
 void vm_stop(int reason)
 {
@@ -4126,7 +4110,9 @@ void main_loop_wait(int timeout)
 
 slirp_select_fill(nfds, rfds, wfds, xfds);
 
-ret = qemu_select(nfds + 1, rfds, wfds, xfds, tv);
+qemu_mutex_unlock_iothread();
+ret = select(nfds + 1, rfds, wfds, xfds, tv);
+qemu_mutex_lock_iothread();
 if (ret  0) {
 IOHandlerRecord **pioh;
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] fix mce check extension

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@redhat.com

Because a patch got into marcelo's tree before mine did, I forgot
to convert one user of kvm_check_extension. Here's the fix for it.

Signed-off-by: Glauber Costa glom...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index e4ae582..492dbc5 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -1338,7 +1338,7 @@ int kvm_arch_qemu_init_env(CPUState *cenv)
 #ifdef KVM_CAP_MCE
 if (((cenv-cpuid_version  8)0xF) = 6
  (cenv-cpuid_features(CPUID_MCE|CPUID_MCA)) == 
(CPUID_MCE|CPUID_MCA)
- kvm_check_extension(kvm_context, KVM_CAP_MCE)  0) {
+ kvm_check_extension(kvm_state, KVM_CAP_MCE)  0) {
 uint64_t mcg_cap;
 int banks;
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] virtio_net_handle_rx: match upstream comment

2009-07-26 Thread Avi Kivity
From: Marcelo Tosatti mtosa...@redhat.com

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 75c9695..ce8e6cb 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -345,7 +345,7 @@ static void virtio_net_handle_rx(VirtIODevice *vdev, 
VirtQueue *vq)
 qemu_flush_queued_packets(n-vc);
 
 /* We now have RX buffers, signal to the IO thread to break out of the
-   select to re-poll the tap file descriptor */
+ * select to re-poll the tap file descriptor */
 qemu_notify_event();
 }
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] fold second pass of kvm initialization

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@t60.(none)

There is no reason why kvm_init_ap() and friends are placed outside kvm_init().
After we call kvm_init(), no extra initialization step should be necessary.
There are now no references to KVM_UPSTREAM outside of kvm*.c files

Signed-off-by: Glauber Costa glom...@t60.(none)
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/qemu-kvm.c b/qemu-kvm.c
index 5e7dc0a..32dce4a 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -2111,6 +2111,8 @@ int kvm_arch_init_irq_routing(void)
 }
 #endif
 
+extern int no_hpet;
+
 static int kvm_create_context()
 {
 int r;
@@ -2143,6 +2145,24 @@ static int kvm_create_context()
 return r;
 }
 
+kvm_init_ap();
+if (kvm_irqchip) {
+if (!qemu_kvm_has_gsi_routing()) {
+irq0override = 0;
+#ifdef TARGET_I386
+/* if kernel can't do irq routing, interrupt source
+ * override 0-2 can not be set up as required by hpet,
+ * so disable hpet.
+ */
+no_hpet=1;
+} else  if (!qemu_kvm_has_pit_state2()) {
+no_hpet=1;
+}
+#else
+}
+#endif
+}
+
 return 0;
 }
 
diff --git a/vl.c b/vl.c
index 8ea0771..74a1c60 100644
--- a/vl.c
+++ b/vl.c
@@ -5998,28 +5998,6 @@ int main(int argc, char **argv, char **envp)
 
 module_call_init(MODULE_INIT_DEVICE);
 
-if (kvm_enabled()) {
-kvm_init_ap();
-#ifdef CONFIG_KVM
-if (kvm_irqchip) {
-if (!qemu_kvm_has_gsi_routing()) {
-irq0override = 0;
-#ifdef TARGET_I386
-/* if kernel can't do irq routing, interrupt source
- * override 0-2 can not be set up as required by hpet,
- * so disable hpet.
- */
-no_hpet=1;
-} else  if (!qemu_kvm_has_pit_state2()) {
-no_hpet=1;
-}
-#else
-}
-#endif
-}
-#endif
-}
-
 machine-init(ram_size, boot_devices,
   kernel_filename, kernel_cmdline, initrd_filename, cpu_model);
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] vl.c: match upstream drive_init loop comment

2009-07-26 Thread Avi Kivity
From: Marcelo Tosatti mtosa...@redhat.com

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/vl.c b/vl.c
index 0ee2908..939da1f 100644
--- a/vl.c
+++ b/vl.c
@@ -5870,10 +5870,8 @@ int main(int argc, char **argv, char **envp)
 if (nb_drives_opt  MAX_DRIVES)
 drive_add(NULL, SD_ALIAS);
 
-/* open the virtual block devices
- * note that migration with device
- * hot add/remove is broken.
- */
+/* open the virtual block devices */
+
 for(i = 0; i  nb_drives_opt; i++)
 if (drive_init(drives_opt[i], snapshot, machine) == -1)
exit(1);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] reuse kvm_vm_ioctl

2009-07-26 Thread Avi Kivity
From: Glauber Costa glom...@redhat.com

Start using kvm_vm_ioctl's code.
For type safety, delete vm_fd from kvm_context entirely, so the
compiler can play along with us helping to detect errors I might
have made.

Also, we were slightly different from qemu upstream in handling error
code from ioctl, since we were always testing for -1, while kvm_vm_ioctl
returns -errno. We already did this in most of the call sites, so
this patch has the big advantage of simplifying call sites.

Diffstat says:
 4 files changed, 58 insertions(+), 134 deletions(-)

Signed-off-by: Glauber Costa glom...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/kvm-all.c b/kvm-all.c
index 67908a7..9373d99 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -809,6 +809,7 @@ int kvm_ioctl(KVMState *s, int type, ...)
 
 return ret;
 }
+#endif
 
 int kvm_vm_ioctl(KVMState *s, int type, ...)
 {
@@ -827,6 +828,7 @@ int kvm_vm_ioctl(KVMState *s, int type, ...)
 return ret;
 }
 
+#ifdef KVM_UPSTREAM
 int kvm_vcpu_ioctl(CPUState *env, int type, ...)
 {
 int ret;
diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index d2c8abe..58ec1f2 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -40,10 +40,10 @@ int kvm_set_tss_addr(kvm_context_t kvm, unsigned long addr)
 
r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR);
if (r  0) {
-   r = ioctl(kvm-vm_fd, KVM_SET_TSS_ADDR, addr);
-   if (r == -1) {
+   r = kvm_vm_ioctl(kvm_state, KVM_SET_TSS_ADDR, addr);
+   if (r  0) {
fprintf(stderr, kvm_set_tss_addr: %m\n);
-   return -errno;
+   return r;
}
return 0;
}
@@ -80,7 +80,7 @@ static int kvm_set_identity_map_addr(kvm_context_t kvm, 
unsigned long addr)
 
r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_SET_IDENTITY_MAP_ADDR);
if (r  0) {
-   r = ioctl(kvm-vm_fd, KVM_SET_IDENTITY_MAP_ADDR, addr);
+   r = kvm_vm_ioctl(kvm_state, KVM_SET_IDENTITY_MAP_ADDR, addr);
if (r == -1) {
fprintf(stderr, kvm_set_identity_map_addr: %m\n);
return -errno;
@@ -123,7 +123,7 @@ static int kvm_create_pit(kvm_context_t kvm)
if (!kvm-no_pit_creation) {
r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_PIT);
if (r  0) {
-   r = ioctl(kvm-vm_fd, KVM_CREATE_PIT);
+   r = kvm_vm_ioctl(kvm_state, KVM_CREATE_PIT);
if (r = 0)
kvm-pit_in_kernel = 1;
else {
@@ -256,7 +256,6 @@ int kvm_create_memory_alias(kvm_context_t kvm,
.memory_size = len,
.target_phys_addr = target_phys,
};
-   int fd = kvm-vm_fd;
int r;
int slot;
 
@@ -267,7 +266,7 @@ int kvm_create_memory_alias(kvm_context_t kvm,
return -EBUSY;
alias.slot = slot;
 
-   r = ioctl(fd, KVM_SET_MEMORY_ALIAS, alias);
+   r = kvm_vm_ioctl(kvm_state, KVM_SET_MEMORY_ALIAS, alias);
if (r == -1)
return -errno;
 
@@ -314,55 +313,31 @@ int kvm_set_lapic(kvm_vcpu_context_t vcpu, struct 
kvm_lapic_state *s)
 
 int kvm_get_pit(kvm_context_t kvm, struct kvm_pit_state *s)
 {
-   int r;
if (!kvm-pit_in_kernel)
return 0;
-   r = ioctl(kvm-vm_fd, KVM_GET_PIT, s);
-   if (r == -1) {
-   r = -errno;
-   perror(kvm_get_pit);
-   }
-   return r;
+   return kvm_vm_ioctl(kvm_state, KVM_GET_PIT, s);
 }
 
 int kvm_set_pit(kvm_context_t kvm, struct kvm_pit_state *s)
 {
-   int r;
if (!kvm-pit_in_kernel)
return 0;
-   r = ioctl(kvm-vm_fd, KVM_SET_PIT, s);
-   if (r == -1) {
-   r = -errno;
-   perror(kvm_set_pit);
-   }
-   return r;
+   return kvm_vm_ioctl(kvm_state, KVM_SET_PIT, s);
 }
 
 #ifdef KVM_CAP_PIT_STATE2
 int kvm_get_pit2(kvm_context_t kvm, struct kvm_pit_state2 *ps2)
 {
-   int r;
if (!kvm-pit_in_kernel)
return 0;
-   r = ioctl(kvm-vm_fd, KVM_GET_PIT2, ps2);
-   if (r == -1) {
-   r = -errno;
-   perror(kvm_get_pit2);
-   }
-   return r;
+   return kvm_vm_ioctl(kvm_state, KVM_GET_PIT2, ps2);
 }
 
 int kvm_set_pit2(kvm_context_t kvm, struct kvm_pit_state2 *ps2)
 {
-   int r;
if (!kvm-pit_in_kernel)
return 0;
-   r = ioctl(kvm-vm_fd, KVM_SET_PIT2, ps2);
-   if (r == -1) {
-   r = -errno;
-   perror(kvm_set_pit2);
-   }
-   return r;
+   return kvm_vm_ioctl(kvm_state, KVM_SET_PIT2, ps2);
 }
 
 #endif
@@ -627,10 +602,10 @@ int kvm_set_shadow_pages(kvm_context_t kvm, unsigned int 
nrshadow_pages)
r = ioctl(kvm-fd, KVM_CHECK_EXTENSION,
  KVM_CAP_MMU_SHADOW_CACHE_CONTROL);
if 

[COMMIT master] kvm-kmod: Fix including of arch/*/kvm/trace.h

2009-07-26 Thread Avi Kivity
From: Jan Kiszka jan.kis...@web.de

When building against a split kernel, we have to add its source path to
the include path. Otherwise arch/*/kvm/trace.h cannot be found.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/Makefile b/Makefile
index ad08c45..4c813a6 100644
--- a/Makefile
+++ b/Makefile
@@ -28,7 +28,7 @@ all:: prerequisite
$(MAKE) -C $(KERNELDIR) M=`pwd` \
LINUXINCLUDE=-I`pwd`/include -Iinclude \
$(if $(KERNELSOURCEDIR),\
-   -Iinclude2 -I$(KERNELSOURCEDIR)/include 
-I$(KERNELSOURCEDIR)/arch/${ARCH_DIR}/include, \
+   -Iinclude2 -I$(KERNELSOURCEDIR)/include 
-I$(KERNELSOURCEDIR)/arch/${ARCH_DIR}/include -I$(KERNELSOURCEDIR), \
-Iarch/${ARCH_DIR}/include) -I`pwd`/include-compat \
-include include/linux/autoconf.h \
-include `pwd`/$(ARCH_DIR)/external-module-compat.h 
$(module_defines) \
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: silence lapic kernel messages that can be triggered by a guest

2009-07-26 Thread Avi Kivity
From: Gleb Natapov g...@redhat.com

Some Linux versions (f8) try to read EOI register that is write only.

Signed-off-by: Gleb Natapov g...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e2e2849..6c3cd2c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -594,14 +594,14 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 
offset, int len,
static const u64 rmask = 0x43ff01ffe70cULL;
 
if ((alignment + len)  4) {
-   printk(KERN_ERR KVM_APIC_READ: alignment error %x %d\n,
-   offset, len);
+   apic_debug(KVM_APIC_READ: alignment error %x %d\n,
+  offset, len);
return 1;
}
 
if (offset  0x3f0 || !(rmask  (1ULL  (offset  4 {
-   printk(KERN_ERR KVM_APIC_READ: read reserved register %x\n,
-   offset);
+   apic_debug(KVM_APIC_READ: read reserved register %x\n,
+  offset);
return 1;
}
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: Discard unnecessary kvm_mmu_flush_tlb() in kvm_mmu_load()

2009-07-26 Thread Avi Kivity
From: Sheng Yang sh...@linux.intel.com

set_cr3() should already cover the TLB flushing.

Signed-off-by: Sheng Yang sh...@linux.intel.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7162651..5812812 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2349,8 +2349,8 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
spin_unlock(vcpu-kvm-mmu_lock);
if (r)
goto out;
+   /* set_cr3() should ensure TLB has been flushed */
kvm_x86_ops-set_cr3(vcpu, vcpu-arch.mmu.root_hpa);
-   kvm_mmu_flush_tlb(vcpu);
 out:
return r;
 }
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: s390: remove unused structs

2009-07-26 Thread Avi Kivity
From: Gleb Natapov g...@redhat.com

They are not used by common code without defines which s390 does not
have.

Signed-off-by: Gleb Natapov g...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/s390/include/asm/kvm.h b/arch/s390/include/asm/kvm.h
index 0b2f829..3dfcaeb 100644
--- a/arch/s390/include/asm/kvm.h
+++ b/arch/s390/include/asm/kvm.h
@@ -15,15 +15,6 @@
  */
 #include linux/types.h
 
-/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
-struct kvm_pic_state {
-   /* no PIC for s390 */
-};
-
-struct kvm_ioapic_state {
-   /* no IOAPIC for s390 */
-};
-
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
/* general purpose regs for s390 */
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: fix kvm_apic_accept_irq tracepoint dest mode parameter

2009-07-26 Thread Avi Kivity
From: Gleb Natapov g...@redhat.com

Switch dm parameter to u16 and use __print_symbolic.

Signed-off-by: Gleb Natapov g...@redhat.com

Signed-off-by: Gleb Natapov g...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 134bc63..0d480e7 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -323,12 +323,12 @@ TRACE_EVENT(kvm_apic_ipi,
 );
 
 TRACE_EVENT(kvm_apic_accept_irq,
-   TP_PROTO(__u32 apicid, __u8 dm, __u8 tm, __u8 vec, bool coalesced),
+   TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec, bool coalesced),
TP_ARGS(apicid, dm, tm, vec, coalesced),
 
TP_STRUCT__entry(
__field(__u32,  apicid  )
-   __field(__u8,   dm  )
+   __field(__u16,  dm  )
__field(__u8,   tm  )
__field(__u8,   vec )
__field(bool,   coalesced   )
@@ -344,7 +344,7 @@ TRACE_EVENT(kvm_apic_accept_irq,
 
TP_printk(apicid %x vec %u (%s|%s)%s,
  __entry-apicid, __entry-vec,
- (__entry-dm == APIC_DM_LOWEST) ? lowest:fixed,
+ __print_symbolic((__entry-dm  8  0x7), kvm_deliver_mode),
  __entry-tm ? level : edge,
  __entry-coalesced ?  (coalesced) : )
 );
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: s390: fix wait_queue handling

2009-07-26 Thread Avi Kivity
From: Christian Borntraeger borntrae...@de.ibm.com

There are two waitqueues in kvm for wait handling:
vcpu-wq for virt/kvm/kvm_main.c and
vpcu-arch.local_int.wq for the s390 specific wait code.

the wait handling in kvm_s390_handle_wait was broken by using different
wait_queues for add_wait queue and remove_wait_queue.

There are two options to fix the problem:
o  move all the s390 specific code to vcpu-wq and remove
   vcpu-arch.local_int.wq
o  move all the s390 specific code to vcpu-arch.local_int.wq

This patch chooses the 2nd variant for two reasons:
o  s390 does not use kvm_vcpu_block but implements its own enabled wait
   handling.
   Having a separate wait_queue make it clear, that our wait mechanism is
   different
o  the patch is much smaller

Report-by:  Julia Lawall ju...@diku.dk
Signed-off-by: Christian Borntraeger borntrae...@de.ibm.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 5f2e144..2c2f983 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -380,7 +380,7 @@ no_timer:
}
__unset_cpu_idle(vcpu);
__set_current_state(TASK_RUNNING);
-   remove_wait_queue(vcpu-wq, wait);
+   remove_wait_queue(vcpu-arch.local_int.wq, wait);
spin_unlock_bh(vcpu-arch.local_int.lock);
spin_unlock(vcpu-arch.local_int.float_int-lock);
hrtimer_try_to_cancel(vcpu-arch.ckc_timer);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: x86: use kvm_get_gdt() and kvm_read_ldt()

2009-07-26 Thread Avi Kivity
From: Akinobu Mita akinobu.m...@gmail.com

Use kvm_get_gdt() and kvm_read_ldt() to reduce inline assembly code.

Cc: Avi Kivity a...@redhat.com
Cc: k...@vger.kernel.org
Signed-off-by: Akinobu Mita akinobu.m...@gmail.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index fc14bdf..18085d3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -291,7 +291,7 @@ static void svm_hardware_enable(void *garbage)
 
struct svm_cpu_data *svm_data;
uint64_t efer;
-   struct desc_ptr gdt_descr;
+   struct descriptor_table gdt_descr;
struct desc_struct *gdt;
int me = raw_smp_processor_id();
 
@@ -311,8 +311,8 @@ static void svm_hardware_enable(void *garbage)
svm_data-max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
svm_data-next_asid = svm_data-max_asid + 1;
 
-   asm volatile (sgdt %0 : =m(gdt_descr));
-   gdt = (struct desc_struct *)gdt_descr.address;
+   kvm_get_gdt(gdt_descr);
+   gdt = (struct desc_struct *)gdt_descr.base;
svm_data-tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
 
rdmsrl(MSR_EFER, efer);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0ebd684..18ce27f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -132,13 +132,12 @@ unsigned long segment_base(u16 selector)
if (selector == 0)
return 0;
 
-   asm(sgdt %0 : =m(gdt));
+   kvm_get_gdt(gdt);
table_base = gdt.base;
 
if (selector  4) {   /* from ldt */
-   u16 ldt_selector;
+   u16 ldt_selector = kvm_read_ldt();
 
-   asm(sldt %0 : =g(ldt_selector));
table_base = segment_base(ldt_selector);
}
d = (struct desc_struct *)(table_base + (selector  ~7));
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: x86: use get_desc_base() and get_desc_limit()

2009-07-26 Thread Avi Kivity
From: Akinobu Mita akinobu.m...@gmail.com

Use get_desc_base() and get_desc_limit() to get the base address and
limit in desc_struct.

Cc: Avi Kivity a...@redhat.com
Cc: k...@vger.kernel.org
Signed-off-by: Akinobu Mita akinobu.m...@gmail.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 48567fa..0ebd684 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -142,8 +142,7 @@ unsigned long segment_base(u16 selector)
table_base = segment_base(ldt_selector);
}
d = (struct desc_struct *)(table_base + (selector  ~7));
-   v = d-base0 | ((unsigned long)d-base1  16) |
-   ((unsigned long)d-base2  24);
+   v = get_desc_base(d);
 #ifdef CONFIG_X86_64
if (d-s == 0  (d-type == 2 || d-type == 9 || d-type == 11))
v |= ((unsigned long)((struct ldttss_desc64 *)d)-base3)  32;
@@ -3943,11 +3942,8 @@ static void kvm_set_segment(struct kvm_vcpu *vcpu,
 static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
   struct kvm_segment *kvm_desct)
 {
-   kvm_desct-base = seg_desc-base0;
-   kvm_desct-base |= seg_desc-base1  16;
-   kvm_desct-base |= seg_desc-base2  24;
-   kvm_desct-limit = seg_desc-limit0;
-   kvm_desct-limit |= seg_desc-limit  16;
+   kvm_desct-base = get_desc_base(seg_desc);
+   kvm_desct-limit = get_desc_limit(seg_desc);
if (seg_desc-g) {
kvm_desct-limit = 12;
kvm_desct-limit |= 0xfff;
@@ -4026,11 +4022,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu 
*vcpu, u16 selector,
 static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
 struct desc_struct *seg_desc)
 {
-   u32 base_addr;
-
-   base_addr = seg_desc-base0;
-   base_addr |= (seg_desc-base1  16);
-   base_addr |= (seg_desc-base2  24);
+   u32 base_addr = get_desc_base(seg_desc);
 
return vcpu-arch.mmu.gva_to_gpa(vcpu, base_addr);
 }
@@ -4319,7 +4311,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 
tss_selector, int reason)
}
}
 
-   if (!nseg_desc.p || (nseg_desc.limit0 | nseg_desc.limit  16)  0x67) {
+   if (!nseg_desc.p || get_desc_limit(nseg_desc)  0x67) {
kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector  0xfffc);
return 1;
}
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: PIT: Unregister ack notifier callback when freeing

2009-07-26 Thread Avi Kivity
From: Gleb Natapov g...@redhat.com

Signed-off-by: Gleb Natapov g...@redhat.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 137e548..472653c 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -672,6 +672,8 @@ void kvm_free_pit(struct kvm *kvm)
if (kvm-arch.vpit) {
kvm_unregister_irq_mask_notifier(kvm, 0,
   kvm-arch.vpit-mask_notifier);
+   kvm_unregister_irq_ack_notifier(kvm,
+   kvm-arch.vpit-pit_state.irq_ack_notifier);
mutex_lock(kvm-arch.vpit-pit_state.lock);
timer = kvm-arch.vpit-pit_state.pit_timer.timer;
hrtimer_cancel(timer);
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: VMX: Introduce KVM_SET_IDENTITY_MAP_ADDR ioctl

2009-07-26 Thread Avi Kivity
From: Sheng Yang sh...@linux.intel.com

Now KVM allow guest to modify guest's physical address of EPT's identity 
mapping page.

(change from v1, discard unnecessary check, change ioctl to accept parameter
address rather than value)

Signed-off-by: Sheng Yang sh...@linux.intel.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 08732d7..e210b21 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -411,6 +411,7 @@ struct kvm_arch{
 
struct page *ept_identity_pagetable;
bool ept_identity_pagetable_done;
+   gpa_t ept_identity_map_addr;
 
unsigned long irq_sources_bitmap;
unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3a75db3..4ffc4c3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1719,7 +1719,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned 
long cr3)
eptp = construct_eptp(cr3);
vmcs_write64(EPT_POINTER, eptp);
guest_cr3 = is_paging(vcpu) ? vcpu-arch.cr3 :
-   VMX_EPT_IDENTITY_PAGETABLE_ADDR;
+   vcpu-kvm-arch.ept_identity_map_addr;
}
 
vmx_flush_tlb(vcpu);
@@ -2122,7 +2122,7 @@ static int init_rmode_identity_map(struct kvm *kvm)
if (likely(kvm-arch.ept_identity_pagetable_done))
return 1;
ret = 0;
-   identity_map_pfn = VMX_EPT_IDENTITY_PAGETABLE_ADDR  PAGE_SHIFT;
+   identity_map_pfn = kvm-arch.ept_identity_map_addr  PAGE_SHIFT;
r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
if (r  0)
goto out;
@@ -2191,7 +2191,8 @@ static int alloc_identity_pagetable(struct kvm *kvm)
goto out;
kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
kvm_userspace_mem.flags = 0;
-   kvm_userspace_mem.guest_phys_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
+   kvm_userspace_mem.guest_phys_addr =
+   kvm-arch.ept_identity_map_addr;
kvm_userspace_mem.memory_size = PAGE_SIZE;
r = __kvm_set_memory_region(kvm, kvm_userspace_mem, 0);
if (r)
@@ -3814,9 +3815,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, 
unsigned int id)
if (alloc_apic_access_page(kvm) != 0)
goto free_vmcs;
 
-   if (enable_ept)
+   if (enable_ept) {
+   if (!kvm-arch.ept_identity_map_addr)
+   kvm-arch.ept_identity_map_addr =
+   VMX_EPT_IDENTITY_PAGETABLE_ADDR;
if (alloc_identity_pagetable(kvm) != 0)
goto free_vmcs;
+   }
 
return vmx-vcpu;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 18ce27f..2539e9a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1206,6 +1206,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_IOEVENTFD:
case KVM_CAP_PIT2:
case KVM_CAP_PIT_STATE2:
+   case KVM_CAP_SET_IDENTITY_MAP_ADDR:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -1906,6 +1907,13 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, 
unsigned long addr)
return ret;
 }
 
+static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
+ u64 ident_addr)
+{
+   kvm-arch.ept_identity_map_addr = ident_addr;
+   return 0;
+}
+
 static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
  u32 kvm_nr_mmu_pages)
 {
@@ -2169,6 +2177,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
if (r  0)
goto out;
break;
+   case KVM_SET_IDENTITY_MAP_ADDR: {
+   u64 ident_addr;
+
+   r = -EFAULT;
+   if (copy_from_user(ident_addr, argp, sizeof ident_addr))
+   goto out;
+   r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
+   if (r  0)
+   goto out;
+   break;
+   }
case KVM_SET_MEMORY_REGION: {
struct kvm_memory_region kvm_mem;
struct kvm_userspace_memory_region kvm_userspace_mem;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 230a91a..f8f8900 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -435,6 +435,7 @@ struct kvm_ioeventfd {
 #define KVM_CAP_PIT_STATE2 35
 #endif
 #define KVM_CAP_IOEVENTFD 36
+#define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -512,6 +513,7 @@ struct kvm_irqfd {
 #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
struct kvm_userspace_memory_region)
 #define KVM_SET_TSS_ADDR  _IO(KVMIO, 0x47)
+#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
 /* Device model IOC */
 

[COMMIT master] KVM: MMU: handle n_free_mmu_pages n_alloc_mmu_pages in kvm_mmu_change_mmu_pages

2009-07-26 Thread Avi Kivity
From: Marcelo Tosatti mtosa...@redhat.com

kvm_mmu_change_mmu_pages mishandles the case where n_alloc_mmu_pages is
smaller then n_free_mmu_pages, by not checking if the result of
the subtraction is negative.

Its a valid condition which can happen if a large number of pages has
been recently freed.

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 53c1d2c..9abea8e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1418,24 +1418,25 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct 
kvm_mmu_page *sp)
  */
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
 {
+   int used_pages;
+
+   used_pages = kvm-arch.n_alloc_mmu_pages - kvm-arch.n_free_mmu_pages;
+   used_pages = max(0, used_pages);
+
/*
 * If we set the number of mmu pages to be smaller be than the
 * number of actived pages , we must to free some mmu pages before we
 * change the value
 */
 
-   if ((kvm-arch.n_alloc_mmu_pages - kvm-arch.n_free_mmu_pages) 
-   kvm_nr_mmu_pages) {
-   int n_used_mmu_pages = kvm-arch.n_alloc_mmu_pages
-  - kvm-arch.n_free_mmu_pages;
-
-   while (n_used_mmu_pages  kvm_nr_mmu_pages) {
+   if (used_pages  kvm_nr_mmu_pages) {
+   while (used_pages  kvm_nr_mmu_pages) {
struct kvm_mmu_page *page;
 
page = container_of(kvm-arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
kvm_mmu_zap_page(kvm, page);
-   n_used_mmu_pages--;
+   used_pages--;
}
kvm-arch.n_free_mmu_pages = 0;
}
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: SVM: force new asid on vcpu migration

2009-07-26 Thread Avi Kivity
From: Marcelo Tosatti mtosa...@redhat.com

If a migrated vcpu matches the asid_generation value of the target pcpu,
there will be no TLB flush via TLB_CONTROL_FLUSH_ALL_ASID.

The check for vcpu.cpu in pre_svm_run is meaningless since svm_vcpu_load
already updated it on schedule in.

Such vcpu will VMRUN with stale TLB entries.

Based on original patch from Joerg Roedel 
(http://patchwork.kernel.org/patch/10021/)

Signed-off-by: Marcelo Tosatti mtosa...@redhat.com
Acked-by: Joerg Roedel joerg.roe...@amd.com

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 18085d3..b720b02 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -739,6 +739,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
svm-vmcb-control.tsc_offset += delta;
vcpu-cpu = cpu;
kvm_migrate_timers(vcpu);
+   svm-asid_generation = 0;
}
 
for (i = 0; i  NR_HOST_SAVE_USER_MSRS; i++)
@@ -1071,7 +1072,6 @@ static void new_asid(struct vcpu_svm *svm, struct 
svm_cpu_data *svm_data)
svm-vmcb-control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
}
 
-   svm-vcpu.cpu = svm_data-cpu;
svm-asid_generation = svm_data-asid_generation;
svm-vmcb-control.asid = svm_data-next_asid++;
 }
@@ -2320,8 +2320,8 @@ static void pre_svm_run(struct vcpu_svm *svm)
struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
 
svm-vmcb-control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
-   if (svm-vcpu.cpu != cpu ||
-   svm-asid_generation != svm_data-asid_generation)
+   /* FIXME: handle wraparound of asid_generation */
+   if (svm-asid_generation != svm_data-asid_generation)
new_asid(svm, svm_data);
 }
 
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: Drop obsolete cpu_get/put in make_all_cpus_request

2009-07-26 Thread Avi Kivity
From: Jan Kiszka jan.kis...@siemens.com

spin_lock disables preemption, so we can simply read the current cpu.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7cd1c10..98e4ec8 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -741,8 +741,8 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned 
int req)
if (alloc_cpumask_var(cpus, GFP_ATOMIC))
cpumask_clear(cpus);
 
-   me = get_cpu();
spin_lock(kvm-requests_lock);
+   me = smp_processor_id();
kvm_for_each_vcpu(i, vcpu, kvm) {
if (test_and_set_bit(req, vcpu-requests))
continue;
@@ -757,7 +757,6 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned 
int req)
else
called = false;
spin_unlock(kvm-requests_lock);
-   put_cpu();
free_cpumask_var(cpus);
return called;
 }
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] KVM: VMX: Avoid to return ENOTSUPP to userland

2009-07-26 Thread Avi Kivity
From: Jan Kiszka jan.kis...@web.de

Choose some allowed error values for the cases VMX returned ENOTSUPP so
far as these values could be returned by the KVM_RUN IOCTL.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
Signed-off-by: Marcelo Tosatti mtosa...@redhat.com

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a42d604..857b7ce 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3133,7 +3133,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, 
struct kvm_run *kvm_run)
printk(KERN_ERR
   Fail to handle apic access vmexit! Offset is 0x%lx\n,
   offset);
-   return -ENOTSUPP;
+   return -ENOEXEC;
}
return 1;
 }
@@ -3202,7 +3202,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, 
struct kvm_run *kvm_run)
 
if (exit_qualification  (1  6)) {
printk(KERN_ERR EPT: GPA exceeds GAW!\n);
-   return -ENOTSUPP;
+   return -EINVAL;
}
 
gla_validity = (exit_qualification  7)  0x3;
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[COMMIT master] Fix build with -disable-kvm due to recent upstream compatibility work

2009-07-26 Thread Avi Kivity
From: Avi Kivity a...@redhat.com

We now need kvm_context_t, kvm_allowed, and a few functions exposed even
with kvm disabled.

Signed-off-by: Avi Kivity a...@redhat.com

diff --git a/qemu-kvm.h b/qemu-kvm.h
index b186c9d..eb48ff8 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -936,8 +936,19 @@ uint32_t kvm_get_supported_cpuid(kvm_context_t kvm, 
uint32_t function, int reg);
 
 #else /* !CONFIG_KVM */
 
+typedef struct kvm_context *kvm_context_t;
+typedef struct kvm_vcpu_context *kvm_vcpu_context_t;
+
 struct kvm_pit_state { };
 
+static inline int kvm_init(int smp_cpus) { return 0; }
+static inline void kvm_inject_x86_mce(
+CPUState *cenv, int bank,uint64_t status,
+uint64_t mcg_status, uint64_t addr, uint64_t misc) { }
+
+
+extern int kvm_allowed;
+
 #endif /* !CONFIG_KVM */
 
 
@@ -1167,6 +1178,8 @@ static inline int kvm_set_migration_log(int enable)
 return kvm_physical_memory_set_dirty_tracking(enable);
 }
 
+#ifdef CONFIG_KVM
+
 typedef struct KVMSlot
 {
 target_phys_addr_t start_addr;
@@ -1199,3 +1212,5 @@ int kvm_vm_ioctl(KVMState *s, int type, ...);
 int kvm_check_extension(KVMState *s, unsigned int ext);
 
 #endif
+
+#endif
--
To unsubscribe from this list: send the line unsubscribe kvm-commits in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: NMI Injection to Guest

2009-07-26 Thread Gleb Natapov
On Sat, Jul 25, 2009 at 10:46:39PM +0200, Jiaqing Du wrote:
 Hi list,
 
 I'm trying to extend OProfile to support guest profiling. One step of
 my work is to push an NMI to the guest(s) when a performance counter
 overflows. Please correct me if the following is not correct:
 
 counter overflow -- NMI to host -- VM exit -- int $2 to handle
 NMI on host -- ...   -- VM entry -- NMI to guest
 
Correct except the last step (-- NMI to guest). Host nmi is not
propagated to guests.

 On the path between VM-exit and VM-entry, I want to push an NMI to the
 guest. I tried to put the following code on the path, but never
 succeeded. Various wired things happened, such as KVM hangs, guest
 kernel oops, and host hangs. I tried both code with Linux 2.6.30 and
 version 88.
 
 if (vmx_nmi_allowed())  { vmx_inject_nmi(); }
 
 Any suggestions? Where is the right place to push an NMI and what are
 the necessary checks?
Call kvm_inject_nmi(vcpu). And don't forget to vcpu_load(vcpu) before
doing it. See kvm_vcpu_ioctl_nmi().

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Host latency peaks due to kvm-intel

2009-07-26 Thread Sujit Karataparambil
 Do not meddle in the internals of kernels, for they are subtle and quick to
 panic.
Also the kvm code. Are you sure that the processor supports KVM
Extension. I know of a lot of intel architectures where KVM is not
support. Especially the HW_CHECK_SUM. Might not be sure, but this sure
seems an problem. Also there is no dependency check with the KVM on
Linux. What I mean by this is that KVM Install on an Architecture that
donot support the extension without problem. So compiling KVM alone
does not mean it works on an architecture.


-- 
-- Sujit K M

blog(http://kmsujit.blogspot.com/)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Very high memory usage with KVM

2009-07-26 Thread Avi Kivity

On 07/25/2009 08:43 PM, Daniel Bareiro wrote:

Hi all!

I have an installation with Ubuntu Hardy Heron server amd64 with KVM-62
from Ubuntu repositories installed on an HP Proliant DL380 G5 with two
Xeon E5405 quadcore processors and 16 GiB of RAM which has six VMs with
the following configuration of memory:

Hostname   |  RAM
===+===
Ganimedes  |2 GiB
Os |1 GiB
Aprender   |2 GiB
Aps0   |2 GiB
Aps2   |4 GiB
Ratatoskr  |4 GiB
===+===
TOTAL  |   15 GiB


Initially the host was created with a swap partition of 1 GiB (more 1
GiB than was free for use of host) but this amount with the time
remained short and I had to add a LV of 7 GiB to be used with swap,
being now a total of 8 GiB of swap of which at this moment I have only a
9% free. Is 'normal' this use of memory?

r...@ss02:~# ps -e --sort -rss -Ho 
user,start_time,pid,pcpu,pmem,rss,size,vsz,args
USER START   PID %CPU %MEM   RSSSZVSZ COMMAND
[...]
root Jul06 27471 52.3 24.4 4023232 4292200 4350296   kvmratatoskr
root Jul24  9955  137 23.8 3923620 4308592 4350308   kvmaps2
root Jul06  8751  5.8  8.3 1368228 2171808 2229888   kvmaps0
root Jul07  8565  2.7  5.2 862844 2204704 2246416   kvmaprender
root Apr22  7842  0.6  3.6 600072 2172056 2230136   kvmganimedes
root Jul01  7944  0.6  2.0 334860 1119916 1177996   kvmos

r...@ss02:~# free
  total   used   free sharedbuffers cached
Mem:  16463388   16377844  85544  0 894216  66328
-/+ buffers/cache:   154173001046088
Swap:  83199487621916 698032


Updating to KVM-84 or superior can improve this situation?
   


What is the storage configuration?  Are you using qcow2?  What are the 
image logical and physical sizes?


What is the host kernel (uname -a)?

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM bug

2009-07-26 Thread Avi Kivity

On 07/23/2009 11:59 PM, Saksena, Abhishek wrote:

Hi
I am trying to boot a patched version of Boch's BIOS on KVM. It works fine with 
Qemu with -no-kvm option. However I get following with KVM


unhandled vm exit: 0x8021 vcpu_id 0
ds 88f9 (00088f9b/ p 1 dpl 3 db 0 s 1 type 3 l 0 g 0 avl 0)
   


When running on Intel we emulate real mode using vm86 mode.  This mode 
requires ds.base == ds.selector  4, which isn't the case here.  You 
can modify the code to satisfy this requirement, or you can try the 
emulate_invalid_guest_state=1 module parameter (which will likely fail 
since it is not completely implemented).


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Write bit in Shadow Page Table Entry

2009-07-26 Thread Avi Kivity

On 07/23/2009 11:49 PM, Lynda Yang wrote:

Hi,
As I understand, the KVM code (kvm-86) may clear the write bit when
setting a shadow page table entry so that it can detect when an entry
needs to be marked dirty later. However, it also plays with the write
bit depending on whether the shadow page is allowed to be unsynched or
not. I'm not quite clear on the latter, so if anyone can provide some
insights it would be very much appreciated. Or perhaps even more
helpful...if it is possible to provide a clear picture of how KVM
generally plays with an entry's write bit.
   


Let's see.

1. If the spte was derived from a guest pte, then the writeable bit 
reflects the guest permissions.  If not (nested paging, real mode), the 
writeable bit is 1.
2. When we log dirty pages, writeable bits for the memory we are 
interested in are cleared.
3. If the spte points at a shadow page, the writeable bit is cleared to 
0 so that we are informed of updates to page tables.
4. Under certain conditions[1], we allow a shadowed guest page table to 
be writeable.  This happens on a write fault to a guest page table.


[1] The conditions are: the page is the lowest-level mapping, and there 
are no other uses of the page as a paging element.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Fwd: buildbot failure in qemu-kvm on disable_kvm_i386_centos_5_2

2009-07-26 Thread Daniel Gollub
Hi Avi,

the last push broke --disable-kvm.

Further build-regression will be send directly to kvm@vger.kernel.org, if no 
one complains.

Best Regards,
Daniel

--  Forwarded Message  --

Subject: buildbot failure in qemu-kvm on disable_kvm_i386_centos_5_2
Date: Sunday 26 July 2009
From: qemu-...@buildbot.b1-systems.de
To: bere...@b1-systems.de, gol...@b1-systems.de

The Buildbot has detected a new failure of disable_kvm_i386_centos_5_2 on 
qemu-kvm.
Full details are available at:
 http://buildbot.b1-systems.de/qemu-
kvm/builders/disable_kvm_i386_centos_5_2/builds/9

Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/

Buildslave for this Build: avi-kvmbot1

Build Reason: 
Build Source Stamp: [branch master] HEAD
Blamelist: Amit Shah amit.s...@redhat.com,Dor Laor 
dl...@redhat.com,Glauber Costa glom...@redhat.com,Glauber Costa 
glom...@t60.(none),Huang Ying ying.hu...@intel.com,Marcelo Tosatti 
mtosa...@redhat.com,Michael S. Tsirkin m...@redhat.com,Sheng Yang 
sh...@linux.intel.com

BUILD FAILED: failed compile

sincerely,
 -The Buildbot


---
-- 
Daniel GollubGeschaeftsfuehrer: Ralph Dehner
FOSS Developer   Unternehmenssitz:  Vohburg
B1 Systems GmbH  Amtsgericht:   Ingolstadt
Mobil: +49-(0)-160 47 73 970 Handelsregister:   HRB 3537
EMail: gol...@b1-systems.de  http://www.b1-systems.de

Adresse: B1 Systems GmbH, Osterfeldstraße 7, 85088 Vohburg
http://pgpkeys.pca.dfn.de/pks/lookup?op=getsearch=0xED14B95C2F8CA78D
The Buildbot has detected a new failure of disable_kvm_i386_centos_5_2 on 
qemu-kvm.
Full details are available at:
 
http://buildbot.b1-systems.de/qemu-kvm/builders/disable_kvm_i386_centos_5_2/builds/9

Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/

Buildslave for this Build: avi-kvmbot1

Build Reason: 
Build Source Stamp: [branch master] HEAD
Blamelist: Amit Shah amit.s...@redhat.com,Dor Laor dl...@redhat.com,Glauber 
Costa glom...@redhat.com,Glauber Costa glom...@t60.(none),Huang Ying 
ying.hu...@intel.com,Marcelo Tosatti mtosa...@redhat.com,Michael S. Tsirkin 
m...@redhat.com,Sheng Yang sh...@linux.intel.com

BUILD FAILED: failed compile

sincerely,
 -The Buildbot



signature.asc
Description: This is a digitally signed message part.


Re: Fwd: buildbot failure in qemu-kvm on disable_kvm_i386_centos_5_2

2009-07-26 Thread Avi Kivity

On 07/26/2009 02:51 PM, Daniel Gollub wrote:

Hi Avi,

the last push broke --disable-kvm.

   


Can you set up testing for the 'next' branch?  It will catch issues much 
earlier.



Further build-regression will be send directly to kvm@vger.kernel.org, if no
one complains.
   


I will complain if it isn't!

Thanks for setting up buildbot, it's nice to have feedback a few minutes 
after pushing.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Fwd: buildbot failure in qemu-kvm on disable_kvm_i386_centos_5_2

2009-07-26 Thread Daniel Gollub
Hi Avi,

On Sunday 26 July 2009 02:01:18 pm Avi Kivity wrote:
[...]
 Can you set up testing for the 'next' branch?  It will catch issues much
 earlier.

Actutally i thought it is. But for some reason it is not getting notified by 
the git hook. Could you ping me on IRC before you push something (only) to 
next-branch, so i can have a look at the buildbot logs ...


  Further build-regression will be send directly to kvm@vger.kernel.org, if
  no one complains.

 I will complain if it isn't!
[...]

Ok, cool. It's set.

Best Regards,
Daniel

-- 
Daniel GollubGeschaeftsfuehrer: Ralph Dehner
FOSS Developer   Unternehmenssitz:  Vohburg
B1 Systems GmbH  Amtsgericht:   Ingolstadt
Mobil: +49-(0)-160 47 73 970 Handelsregister:   HRB 3537
EMail: gol...@b1-systems.de  http://www.b1-systems.de

Adresse: B1 Systems GmbH, Osterfeldstraße 7, 85088 Vohburg
http://pgpkeys.pca.dfn.de/pks/lookup?op=getsearch=0xED14B95C2F8CA78D


signature.asc
Description: This is a digitally signed message part.


Re: Fwd: buildbot failure in qemu-kvm on disable_kvm_i386_centos_5_2

2009-07-26 Thread Avi Kivity

On 07/26/2009 02:51 PM, Daniel Gollub wrote:

Hi Avi,

the last push broke --disable-kvm.

Further build-regression will be send directly to kvm@vger.kernel.org, if no
one complains.
   


I see that a bunch of slaves are not connected.  Can you check what when 
wrong?


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Fwd: buildbot failure in qemu-kvm on disable_kvm_i386_centos_5_2

2009-07-26 Thread Daniel Gollub
On Sunday 26 July 2009 02:19:02 pm Avi Kivity wrote:
  Further build-regression will be send directly to kvm@vger.kernel.org, if
  no one complains.
 

 I see that a bunch of slaves are not connected.  Can you check what when
 wrong?

We had to move our buildslaves to a different box. Currently only your 
buildslave is active. I hope to get the others back online within the next 
hours..

Will later request on the list for more buildslaves instances.

Best Regards,
Daniel

-- 
Daniel GollubGeschaeftsfuehrer: Ralph Dehner
FOSS Developer   Unternehmenssitz:  Vohburg
B1 Systems GmbH  Amtsgericht:   Ingolstadt
Mobil: +49-(0)-160 47 73 970 Handelsregister:   HRB 3537
EMail: gol...@b1-systems.de  http://www.b1-systems.de

Adresse: B1 Systems GmbH, Osterfeldstraße 7, 85088 Vohburg
http://pgpkeys.pca.dfn.de/pks/lookup?op=getsearch=0xED14B95C2F8CA78D


signature.asc
Description: This is a digitally signed message part.


[PATCHv3 0/2] virtio: find_vqs/del_vqs fixes

2009-07-26 Thread Michael S. Tsirkin
Here's a patch series to fix known regressions in virtio_pci,
by refactoring code along the lines suggested by Rusty.

This is on top of patch
virtio: fix memory leak on device removal
that has been applied by Rusty.

This supercedes patches:
[PATCHv3] virtio: recover from vector assignment failure
[PATCHv2] virtio: fix double free_irq on device removal

Michael S. Tsirkin (2):
  virtio: delete vq from list
  virtio: refactor find_vqs

 drivers/virtio/virtio_pci.c |  218 ---
 1 files changed, 124 insertions(+), 94 deletions(-)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv3 1/2] virtio: delete vq from list

2009-07-26 Thread Michael S. Tsirkin
This makes delete vq the reverse of find vq.
This is required to make it possible to retry find_vqs
after a failure, otherwise the list gets corrupted.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 drivers/virtio/virtio_pci.c |6 +-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 7e21389..2eaf1fb 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -464,7 +464,11 @@ static void vp_del_vq(struct virtqueue *vq)
 {
struct virtio_pci_device *vp_dev = to_vp_device(vq-vdev);
struct virtio_pci_vq_info *info = vq-priv;
-   unsigned long size;
+   unsigned long flags, size;
+
+spin_lock_irqsave(vp_dev-lock, flags);
+list_del(info-node);
+spin_unlock_irqrestore(vp_dev-lock, flags);
 
iowrite16(info-queue_index, vp_dev-ioaddr + VIRTIO_PCI_QUEUE_SEL);
 
-- 
1.6.2.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv3 2/2] virtio: refactor find_vqs

2009-07-26 Thread Michael S. Tsirkin
This refactors find_vqs, making it more readable and robust, and fixing
two regressions from 2.6.30:
- double free_irq causing BUG_ON on device removal
- probe failure when vq can't be assigned to msi-x vector
  (reported on old host kernels)

An older version of this patch was tested by Amit Shah.

Reported-by: Amit Shah amit.s...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 drivers/virtio/virtio_pci.c |  212 ---
 1 files changed, 119 insertions(+), 93 deletions(-)

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 2eaf1fb..3ad47da 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -52,8 +52,10 @@ struct virtio_pci_device
char (*msix_names)[256];
/* Number of available vectors */
unsigned msix_vectors;
-   /* Vectors allocated */
+   /* Vectors allocated, excluding per-vq vectors if any */
unsigned msix_used_vectors;
+   /* Whether we have vector per vq */
+   bool per_vq_vectors;
 };
 
 /* Constants for MSI-X */
@@ -278,27 +280,24 @@ static void vp_free_vectors(struct virtio_device *vdev)
vp_dev-msix_entries = NULL;
 }
 
-static int vp_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
- int *options, int noptions)
-{
-   int i;
-   for (i = 0; i  noptions; ++i)
-   if (!pci_enable_msix(dev, entries, options[i]))
-   return options[i];
-   return -EBUSY;
-}
-
-static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs)
+static int vp_request_vectors(struct virtio_device *vdev, int nvectors,
+ bool per_vq_vectors)
 {
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
const char *name = dev_name(vp_dev-vdev.dev);
unsigned i, v;
int err = -ENOMEM;
-   /* We want at most one vector per queue and one for config changes.
-* Fallback to separate vectors for config and a shared for queues.
-* Finally fall back to regular interrupts. */
-   int options[] = { max_vqs + 1, 2 };
-   int nvectors = max(options[0], options[1]);
+
+   if (!nvectors) {
+   /* Can't allocate MSI-X vectors, use regular interrupt */
+   vp_dev-msix_vectors = 0;
+   err = request_irq(vp_dev-pci_dev-irq, vp_interrupt,
+ IRQF_SHARED, name, vp_dev);
+   if (err)
+   return err;
+   vp_dev-intx_enabled = 1;
+   return 0;
+   }
 
vp_dev-msix_entries = kmalloc(nvectors * sizeof *vp_dev-msix_entries,
   GFP_KERNEL);
@@ -312,41 +311,34 @@ static int vp_request_vectors(struct virtio_device *vdev, 
unsigned max_vqs)
for (i = 0; i  nvectors; ++i)
vp_dev-msix_entries[i].entry = i;
 
-   err = vp_enable_msix(vp_dev-pci_dev, vp_dev-msix_entries,
-options, ARRAY_SIZE(options));
-   if (err  0) {
-   /* Can't allocate enough MSI-X vectors, use regular interrupt */
-   vp_dev-msix_vectors = 0;
-   err = request_irq(vp_dev-pci_dev-irq, vp_interrupt,
- IRQF_SHARED, name, vp_dev);
-   if (err)
-   goto error;
-   vp_dev-intx_enabled = 1;
-   } else {
-   vp_dev-msix_vectors = err;
-   vp_dev-msix_enabled = 1;
-
-   /* Set the vector used for configuration */
-   v = vp_dev-msix_used_vectors;
-   snprintf(vp_dev-msix_names[v], sizeof *vp_dev-msix_names,
-%s-config, name);
-   err = request_irq(vp_dev-msix_entries[v].vector,
- vp_config_changed, 0, vp_dev-msix_names[v],
- vp_dev);
-   if (err)
-   goto error;
-   ++vp_dev-msix_used_vectors;
+   err = pci_enable_msix(vp_dev-pci_dev, vp_dev-msix_entries, nvectors);
+   if (err  0)
+   err = -ENOSPC;
+   if (err)
+   goto error;
+   vp_dev-msix_vectors = nvectors;
+   vp_dev-msix_enabled = 1;
+
+   /* Set the vector used for configuration */
+   v = vp_dev-msix_used_vectors;
+   snprintf(vp_dev-msix_names[v], sizeof *vp_dev-msix_names,
+%s-config, name);
+   err = request_irq(vp_dev-msix_entries[v].vector,
+ vp_config_changed, 0, vp_dev-msix_names[v],
+ vp_dev);
+   if (err)
+   goto error;
+   ++vp_dev-msix_used_vectors;
 
-   iowrite16(v, vp_dev-ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
-   /* Verify we had enough resources to assign the vector */
-   v = ioread16(vp_dev-ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
-   if (v == 

Re: [PATCH] KVM: VMX: Fix locking order in handle_invalid_guest_state

2009-07-26 Thread Avi Kivity

On 07/24/2009 10:00 AM, Jan Kiszka wrote:

Marcelo Tosatti wrote:
   

On Wed, Jul 22, 2009 at 11:53:26PM +0200, Jan Kiszka wrote:
 

Release and re-acquire preemption and IRQ lock in the same order as
vcpu_enter_guest does.
   

This should happen in vcpu_enter_guest, before it decides to disable
preemption/irqs (so you consolidate the control there).
 


Maybe, maybe not. handle_invalid_guest_state is an alternative way of
executing guest code, and it currently shares the setup and tear-down
with vmx_vcpu_run. If it has to share parts that actually require
preemption and IRQ lock, then moving makes not much sense. Can anyone
comment on what the requirements for handle_invalid_guest_state are?
   


Like you said, it's an alternative to vmx entry/exit, so it shares the 
same requirements.  It must run with interrupts and preemption enabled, 
but any code that normally runs in the entry critical section (like 
interrupt injection) must continue to run in a critical section.




I would suggest to merge this fix first and then decide about and
potentially merge a refactoring patch.
   


btw, what does it fix?  a debug warning?

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] fix serious regression

2009-07-26 Thread Avi Kivity

On 07/22/2009 02:57 AM, Glauber Costa wrote:

Today I found a very catastrophic regression: I cannot run my mission
critical servers running RHL7.1 anymore. This is a total disaster.

Fortunately, I was able to isolate the commit that caused it:
 commit bb598da496c040d42dde564bd8ace181be52293e
 Author: Glauber Costaglom...@redhat.com
 Date:   Mon Jul 6 16:12:52 2009 -0400

This guy is certainly stupid, and deserves punishment. It means I'll
be writting code using emacs for the next week.

Marcelo, please apply

   


While I appreciate the humour, it means I have to totally rewrite the 
changelog plus ask a question.  Please submit patches with the most 
boring changelogs you can and reserve the funnies for the --- section 
where they belong.



diff --git a/qemu-kvm.c b/qemu-kvm.c
index e200dea..393c5cc 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -1003,8 +1003,6 @@ int pre_kvm_run(kvm_context_t kvm, CPUState *env)
  {
  kvm_arch_pre_kvm_run(kvm-opaque, env);

-if (env-exit_request)
-return 1;
  pthread_mutex_unlock(qemu_mutex);
  return 0;
  }
   


Can you explain the failure mode?

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] kvm: fix ack not being delivered when msi present

2009-07-26 Thread Gleb Natapov
On Sun, Jul 26, 2009 at 05:10:01PM +0300, Michael S. Tsirkin wrote:
 kvm_notify_acked_irq does not check irq type, so that it sometimes
 interprets msi vector as irq.  As a result, ack notifiers are not
 called, which typially hangs the guest.  The fix is to track and
 check irq type.
 
Looks good to me.

 Signed-off-by: Michael S. Tsirkin m...@redhat.com
Acked-by: Gleb Natapov g...@redhat.com

 ---
 
 Avi, since this bug was introduced in 2.6.30 already, I think
 we need the fix in 2.6.30.x as well as 2.6.31.
 
  include/linux/kvm_host.h |1 +
  virt/kvm/irq_comm.c  |4 +++-
  2 files changed, 4 insertions(+), 1 deletions(-)
 
 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
 index f244f11..f814512 100644
 --- a/include/linux/kvm_host.h
 +++ b/include/linux/kvm_host.h
 @@ -119,6 +119,7 @@ struct kvm_memory_slot {
  
  struct kvm_kernel_irq_routing_entry {
   u32 gsi;
 + u32 type;
   int (*set)(struct kvm_kernel_irq_routing_entry *e,
   struct kvm *kvm, int level);
   union {
 diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
 index 100c267..001663f 100644
 --- a/virt/kvm/irq_comm.c
 +++ b/virt/kvm/irq_comm.c
 @@ -171,7 +171,8 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned 
 irqchip, unsigned pin)
   trace_kvm_ack_irq(irqchip, pin);
  
   list_for_each_entry(e, kvm-irq_routing, link)
 - if (e-irqchip.irqchip == irqchip 
 + if (e-type == KVM_IRQ_ROUTING_IRQCHIP 
 + e-irqchip.irqchip == irqchip 
   e-irqchip.pin == pin) {
   gsi = e-gsi;
   break;
 @@ -288,6 +289,7 @@ static int setup_routing_entry(struct 
 kvm_kernel_irq_routing_entry *e,
   int delta;
  
   e-gsi = ue-gsi;
 + e-type = ue-type;
   switch (ue-type) {
   case KVM_IRQ_ROUTING_IRQCHIP:
   delta = 0;
 -- 
 1.6.2.5

--
Gleb.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Host latency peaks due to kvm-intel

2009-07-26 Thread Jan Kiszka
Avi Kivity wrote:
 On 07/25/2009 12:55 PM, Jan Kiszka wrote:
 Avi Kivity wrote:
   
 On 07/24/2009 12:41 PM, Jan Kiszka wrote:
 
 I vaguely recall that someone promised to add a feature reporting
 facility for all those nice things, modern VM-extensions may or may not
 support (something like or even an extension of /proc/cpuinfo). What is
 the state of this plan? Would be specifically interesting for Intel
 CPUs
 as there seem to be many of them out there with restrictions for
 special
 use cases - like real-time.


 Newer kernels do report some vmx features (like flexpriority) in
 /proc/cpuinfo but not all.

  

 Ah, nice. Then we just need this?

 

 From: Jan Kiszkajan.kis...@siemens.com
 Subject: [PATCH] x86: Report VMX feature vwbinvd

 Not all VMX-capable CPUs support guest exists on wbinvd execution. If
 this is not supported, the instruction will run natively on behalf of
 the guest. This can cause multi-millisecond latencies to the host which
 is very problematic in real-time scenarios.

 Report the wbinvd trapping feature along with other VMX feature flags,
 calling it 'vwbinvd' ('virtual wbinvd').


 
 What about AMD cpus that can always trap wbinvd?  do we set the bit or
 do we trust the user to know that it isn't needed on AMD (I suppose the
 latter)?

I also think that the feature flags should remain vendor-specific.

 
 This should go in via tip.git, it isn't really kvm related (except that
 kvm should start reading these caps one day instead of querying the
 hardware directly).
 

OK, will go that way. Probably I will also add some flags for AMD's NPT,
Intel's EPT and they new unrestricted guest mode at this chance.

Jan



signature.asc
Description: OpenPGP digital signature


Re: [PATCH] KVM: VMX: Fix locking order in handle_invalid_guest_state

2009-07-26 Thread Jan Kiszka
Avi Kivity wrote:
 On 07/24/2009 10:00 AM, Jan Kiszka wrote:
 Marcelo Tosatti wrote:
   
 On Wed, Jul 22, 2009 at 11:53:26PM +0200, Jan Kiszka wrote:
 
 Release and re-acquire preemption and IRQ lock in the same order as
 vcpu_enter_guest does.

 This should happen in vcpu_enter_guest, before it decides to disable
 preemption/irqs (so you consolidate the control there).
  

 Maybe, maybe not. handle_invalid_guest_state is an alternative way of
 executing guest code, and it currently shares the setup and tear-down
 with vmx_vcpu_run. If it has to share parts that actually require
 preemption and IRQ lock, then moving makes not much sense. Can anyone
 comment on what the requirements for handle_invalid_guest_state are?

 
 Like you said, it's an alternative to vmx entry/exit, so it shares the
 same requirements.  It must run with interrupts and preemption enabled,
 but any code that normally runs in the entry critical section (like
 interrupt injection) must continue to run in a critical section.
 
 
 I would suggest to merge this fix first and then decide about and
 potentially merge a refactoring patch.

 
 btw, what does it fix?  a debug warning?
 

I haven't seen anything in the wild, and I don't think it would raise a
warning. All it should cause is a potential delay of some pending
reschedule as preempt_enable will not fire under local_irq_disable.

Jan



signature.asc
Description: OpenPGP digital signature


Re: [PATCH] KVM: VMX: Fix locking order in handle_invalid_guest_state

2009-07-26 Thread Avi Kivity

On 07/26/2009 05:23 PM, Jan Kiszka wrote:

btw, what does it fix?  a debug warning?

 


I haven't seen anything in the wild, and I don't think it would raise a
warning. All it should cause is a potential delay of some pending
reschedule as preempt_enable will not fire under local_irq_disable.
   


Ah, okay, then it is a real fix.  Preempt-correctness is important.

(but won't local_irq_enable() reschedule?)

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


buildbot failure in qemu-kvm on default_x86_64_debian_5_0

2009-07-26 Thread qemu-kvm
The Buildbot has detected a new failure of default_x86_64_debian_5_0 on 
qemu-kvm.
Full details are available at:
 
http://buildbot.b1-systems.de/qemu-kvm/builders/default_x86_64_debian_5_0/builds/8

Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/

Buildslave for this Build: b1_qemu_kvm_1

Build Reason: The web-page 'force build' button was pressed by 'Daniel Gollub': 
test: new debian5 buildslave

Build Source Stamp: HEAD
Blamelist: 

BUILD FAILED: failed git

sincerely,
 -The Buildbot

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Host latency peaks due to kvm-intel

2009-07-26 Thread Jan Kiszka
Avi Kivity wrote:
 On 07/24/2009 12:41 PM, Jan Kiszka wrote:
 Jan (who is now patching his guest to avoid wbinvd where possible)


 
 Is there ever a case where it is required?  What about under a
 hypervisor (i.e. check the hypervisor enabled bit).
 

Reminds me of the discussion in '07 when I first stumbled over this :) :
Yes, the bochs bios could safely skip the wbinvd in qemu mode. But that
won't safe us from Linux and - far more problematic - Windows or any
binary-only guest which think they have to issue it.

One may the close eyes, fire up the guest and then start the
time-critical host application in the hope that the guest remains calm
as long as it's up and running. But, well...

Jan



signature.asc
Description: OpenPGP digital signature


Re: [PATCH] KVM: VMX: Fix locking order in handle_invalid_guest_state

2009-07-26 Thread Jan Kiszka
Avi Kivity wrote:
 On 07/26/2009 05:23 PM, Jan Kiszka wrote:
 btw, what does it fix?  a debug warning?

  

 I haven't seen anything in the wild, and I don't think it would raise a
 warning. All it should cause is a potential delay of some pending
 reschedule as preempt_enable will not fire under local_irq_disable.

 
 Ah, okay, then it is a real fix.  Preempt-correctness is important.
 
 (but won't local_irq_enable() reschedule?)

The last time I checked it was essentially a plain 'sti'.

Jan



signature.asc
Description: OpenPGP digital signature


Re: Host latency peaks due to kvm-intel

2009-07-26 Thread Avi Kivity

On 07/26/2009 05:34 PM, Jan Kiszka wrote:

Avi Kivity wrote:
   

On 07/24/2009 12:41 PM, Jan Kiszka wrote:
 

Jan (who is now patching his guest to avoid wbinvd where possible)


   

Is there ever a case where it is required?  What about under a
hypervisor (i.e. check the hypervisor enabled bit).

 


Reminds me of the discussion in '07 when I first stumbled over this :) :
Yes, the bochs bios could safely skip the wbinvd in qemu mode. But that
won't safe us from Linux and - far more problematic - Windows or any
binary-only guest which think they have to issue it.

One may the close eyes, fire up the guest and then start the
time-critical host application in the hope that the guest remains calm
as long as it's up and running. But, well...
   


Given that it's now '09, how critical is the problem?  Don't most cpus 
have vwbinvd now?


If so, the real-time management application can simply refuse to run on 
such an old processor.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: VMX: Fix locking order in handle_invalid_guest_state

2009-07-26 Thread Avi Kivity

On 07/26/2009 05:38 PM, Jan Kiszka wrote:

Avi Kivity wrote:
   

On 07/26/2009 05:23 PM, Jan Kiszka wrote:
 

btw, what does it fix?  a debug warning?


 

I haven't seen anything in the wild, and I don't think it would raise a
warning. All it should cause is a potential delay of some pending
reschedule as preempt_enable will not fire under local_irq_disable.

   

Ah, okay, then it is a real fix.  Preempt-correctness is important.

(but won't local_irq_enable() reschedule?)
 


The last time I checked it was essentially a plain 'sti'.

   


Presumably there's a reschedule interrupt queued; I think if you set the 
reschedule bit you have to IPI the cpu running the task.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Host latency peaks due to kvm-intel

2009-07-26 Thread Jan Kiszka
Avi Kivity wrote:
 On 07/26/2009 05:34 PM, Jan Kiszka wrote:
 Avi Kivity wrote:
   
 On 07/24/2009 12:41 PM, Jan Kiszka wrote:
 
 Jan (who is now patching his guest to avoid wbinvd where possible)



 Is there ever a case where it is required?  What about under a
 hypervisor (i.e. check the hypervisor enabled bit).

  

 Reminds me of the discussion in '07 when I first stumbled over this :) :
 Yes, the bochs bios could safely skip the wbinvd in qemu mode. But that
 won't safe us from Linux and - far more problematic - Windows or any
 binary-only guest which think they have to issue it.

 One may the close eyes, fire up the guest and then start the
 time-critical host application in the hope that the guest remains calm
 as long as it's up and running. But, well...

 
 Given that it's now '09, how critical is the problem?  Don't most cpus
 have vwbinvd now?

Sadly, in (embedded) industry you have to live with old hardware for
quite a long time. And I would have to throw my only 2-years-old
notebook from the table to have a more decent portable test environment.

 
 If so, the real-time management application can simply refuse to run on
 such an old processor.
 

At least one could go and collect the cpuinfo from some box that suffers
from high latencies. Normally, you go through extensive testing anyway,
also checking for issues like crazy SMI BIOS code that runs for eternities.

Jan



signature.asc
Description: OpenPGP digital signature


Re: [PATCH] KVM: VMX: Fix locking order in handle_invalid_guest_state

2009-07-26 Thread Jan Kiszka
Avi Kivity wrote:
 On 07/26/2009 05:38 PM, Jan Kiszka wrote:
 Avi Kivity wrote:
   
 On 07/26/2009 05:23 PM, Jan Kiszka wrote:
 
 btw, what does it fix?  a debug warning?


  
 I haven't seen anything in the wild, and I don't think it would raise a
 warning. All it should cause is a potential delay of some pending
 reschedule as preempt_enable will not fire under local_irq_disable.


 Ah, okay, then it is a real fix.  Preempt-correctness is important.

 (but won't local_irq_enable() reschedule?)
  

 The last time I checked it was essentially a plain 'sti'.


 
 Presumably there's a reschedule interrupt queued; I think if you set the
 reschedule bit you have to IPI the cpu running the task.
 

Yeah. But as we preempt_disable first, that one might have been
processed already.

Jan



signature.asc
Description: OpenPGP digital signature


Re: Very high memory usage with KVM

2009-07-26 Thread Daniel Bareiro
Hi Avi.

On Sunday, 26 July 2009 14:31:57 +0300,
Avi Kivity wrote:

 I have an installation with Ubuntu Hardy Heron server amd64 with KVM-62
 from Ubuntu repositories installed on an HP Proliant DL380 G5 with two
 Xeon E5405 quadcore processors and 16 GiB of RAM which has six VMs with
 the following configuration of memory:
 
 Hostname   |  RAM
 ===+===
 Ganimedes  |2 GiB
 Os |1 GiB
 Aprender   |2 GiB
 Aps0   |2 GiB
 Aps2   |4 GiB
 Ratatoskr  |4 GiB
 ===+===
 TOTAL  |   15 GiB
 
 
 Initially the host was created with a swap partition of 1 GiB (more 1
 GiB than was free for use of host) but this amount with the time
 remained short and I had to add a LV of 7 GiB to be used with swap,
 being now a total of 8 GiB of swap of which at this moment I have only a
 9% free. Is 'normal' this use of memory?
 
 r...@ss02:~# ps -e --sort -rss -Ho 
 user,start_time,pid,pcpu,pmem,rss,size,vsz,args
 USER START   PID %CPU %MEM   RSSSZVSZ COMMAND
 [...]
 root Jul06 27471 52.3 24.4 4023232 4292200 4350296   kvmratatoskr
 root Jul24  9955  137 23.8 3923620 4308592 4350308   kvmaps2
 root Jul06  8751  5.8  8.3 1368228 2171808 2229888   kvmaps0
 root Jul07  8565  2.7  5.2 862844 2204704 2246416   kvmaprender
 root Apr22  7842  0.6  3.6 600072 2172056 2230136   kvmganimedes
 root Jul01  7944  0.6  2.0 334860 1119916 1177996   kvmos
 
 r...@ss02:~# free
   total   used   free sharedbuffers cached
 Mem:  16463388   16377844  85544  0 894216  66328
 -/+ buffers/cache:   154173001046088
 Swap:  83199487621916 698032
 
 
 Updating to KVM-84 or superior can improve this situation?
 
 What is the storage configuration? Are you using qcow2?

The host machine has 8 x 300 GiB SAS disk in RAID 5 by hardware (7 disks
with 1 spare) on 1 logicaldrive. The partitioning scheme of host is the
following one:


r...@ss02:~# fdisk -l /dev/cciss/c0d0

Disk /dev/cciss/c0d0: 1799.7 GB, 1799797127168 bytes
255 heads, 63 sectors/track, 218812 cylinders
Units = cylinders of 16065 * 512 = 8225280 bytes
Disk identifier: 0x000af3c3

   Device Boot  Start End  Blocks   Id  System
/dev/cciss/c0d0p1   1 122  979933+  82  Linux swap / 
Solaris
/dev/cciss/c0d0p2   * 1231338 9767520   83  Linux
/dev/cciss/c0d0p31339  218812  1746859905   8e  Linux LVM


I'm not using qcow2 files. The /dev/cciss/c0d0p3 partition is a physical
volume that maintains the logical volumes that are used for VM's disks:


r...@ss02:~# pvs
  PVVG   Fmt  Attr PSize PFree
  /dev/cciss/c0d0p3 vm   lvm2 a-   1,63T 1,13T


 What are the image logical and physical sizes?

The disks for the VMs have these sizes:

  aprender-raiz   vm   -wi-ao   8,00G
  aprender-space  vm   -wi-ao  20,00G

  aps0-raiz   vm   -wi-ao   7,00G
  aps0-space  vm   -wi-ao  10,00G

  aps2-cache  vm   -wi-ao  20,00G
  aps2-index  vm   -wi-ao  10,00G
  aps2-raiz   vm   -wi-ao   7,00G
  aps2-space  vm   -wi-ao  10,00G

  ganimedes-raiz  vm   -wi-ao   5,00G
  ganimedes-space vm   -wi-ao  10,00G

  os-disk vm   -wi-ao   6,00G
  os-mailbox  vm   -wi-ao 150,00G
  os-spacevm   -wi-ao  10,00G

  ratatoskr-raiz  vm   -wi-ao   8,00G
  ratatoskr-space vm   -wi-ao 200,00G

With respect to the internal partitioning scheme for each one of the
VMs, it is the following one:

* Aprender:

aprender:~# fdisk -l /dev/hda

Disk /dev/hda: 8589 MB, 8589934592 bytes
255 heads, 63 sectors/track, 1044 cylinders
Units = cylinders of 16065 * 512 = 8225280 bytes

   Device Boot  Start End  Blocks   Id  System
/dev/hda1   1 486 3903763+  82  Linux swap / Solaris
/dev/hda2 4871044 4482135   83  Linux
aprender:~#
aprender:~#
aprender:~# fdisk -l /dev/hdb

Disk /dev/hdb: 21.4 GB, 21474836480 bytes
255 heads, 63 sectors/track, 2610 cylinders
Units = cylinders of 16065 * 512 = 8225280 bytes

   Device Boot  Start End  Blocks   Id  System
/dev/hdb1   1261020964793+  83  Linux


* Aps0:

[r...@aps:~]
$fdisk -l /dev/hda

Disco /dev/hda: 7516 MB, 7516192768 bytes
255 heads, 63 sectors/track, 913 cylinders
Units = cilindros of 16065 * 512 = 8225280 bytes
Disk identifier: 0x00039c2a

Disposit. InicioComienzo  Fin  Bloques  Id  Sistema
/dev/hda1   1 365 2931831   82  Linux swap / Solaris
/dev/hda2 366 913 4401810   83  Linux
[r...@aps:~]
$
[r...@aps:~]
$fdisk -l /dev/hdb

Disco /dev/hdb: 10.7 GB, 10737418240 bytes
255 heads, 63 sectors/track, 1305 cylinders
Units = cilindros of 16065 * 512 = 8225280 bytes
Disk identifier: 0x00087cc1

Disposit. InicioComienzo  Fin  Bloques  Id  Sistema
/dev/hdb1   1

Re: [PATCH] KVM: VMX: Fix locking order in handle_invalid_guest_state

2009-07-26 Thread Avi Kivity

On 07/26/2009 05:55 PM, Jan Kiszka wrote:

Presumably there's a reschedule interrupt queued; I think if you set the
reschedule bit you have to IPI the cpu running the task.
 


Yeah. But as we preempt_disable first, that one might have been
processed already.
   


Ah, yes.  Thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Very high memory usage with KVM

2009-07-26 Thread Avi Kivity

On 07/26/2009 05:56 PM, Daniel Bareiro wrote:



What is the storage configuration? Are you using qcow2?
 


I'm not using qcow2 files. The /dev/cciss/c0d0p3 partition is a physical
volume that maintains the logical volumes that are used for VM's disks:

   


In this case there should be no excessive memory usage.  qcow2 could use 
extra memory, especially on older qemu-kvm versions (or images created 
with older qemu-img versions).



What is the host kernel (uname -a)?
 


r...@ss02:~# uname -a
Linux ss02 2.6.24-19-server #1 SMP Wed Aug 20 18:43:06 UTC 2008 x86_64 GNU/Linux

   


kvm memory management with pre 2.6.27 host kernels is pretty weak.  
Using a newer host kernel (and newer kvm) may solve this problem.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: buildbot failure in qemu-kvm on default_x86_64_debian_5_0

2009-07-26 Thread Avi Kivity

On 07/26/2009 05:26 PM, qemu-...@buildbot.b1-systems.de wrote:

The Buildbot has detected a new failure of default_x86_64_debian_5_0 on 
qemu-kvm.
Full details are available at:
  
http://buildbot.b1-systems.de/qemu-kvm/builders/default_x86_64_debian_5_0/builds/8

Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/

Buildslave for this Build: b1_qemu_kvm_1

Build Reason: The web-page 'force build' button was pressed by 'Daniel Gollub': 
test: new debian5 buildslave

Build Source Stamp: HEAD
Blamelist:

BUILD FAILED: failed git

   


Upon execvpe git-init ['git-init'] in environment id 19800080
:Traceback (most recent call last):
  File /usr/lib/python2.5/site-packages/twisted/internet/process.py, line 
394, in _fork
executable, args, environment)
  File /usr/lib/python2.5/site-packages/twisted/internet/process.py, line 
440, in _execChild
os.execvpe(executable, args, environment)
  File /usr/lib/python2.5/os.py, line 363, in execvpe
_execvpe(file, args, env)
  File /usr/lib/python2.5/os.py, line 390, in _execvpe
func(fullname, *argrest)
OSError: [Errno 2] No such file or directory
program finished with exit code 1


Either git is not installed, or a new version of git is installed which 
no longer has git-init (instead it has 'git init').


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: buildbot failure in qemu-kvm on default_x86_64_debian_5_0

2009-07-26 Thread Daniel Gollub
On Sunday 26 July 2009 05:14:37 pm Avi Kivity wrote:
 Either git is not installed, or a new version of git is installed which
 no longer has git-init (instead it has 'git init').

It's already fixed - sorry about the noise. I forgot to install git-core.

Check build #9:
http://buildbot.b1-systems.de/qemu-kvm/admin/builders/default_x86_64_debian_5_0/builds/9

Best Regards,
Daniel

-- 
Daniel GollubGeschaeftsfuehrer: Ralph Dehner
FOSS Developer   Unternehmenssitz:  Vohburg
B1 Systems GmbH  Amtsgericht:   Ingolstadt
Mobil: +49-(0)-160 47 73 970 Handelsregister:   HRB 3537
EMail: gol...@b1-systems.de  http://www.b1-systems.de

Adresse: B1 Systems GmbH, Osterfeldstraße 7, 85088 Vohburg
http://pgpkeys.pca.dfn.de/pks/lookup?op=getsearch=0xED14B95C2F8CA78D
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: buildbot failure in qemu-kvm on default_x86_64_debian_5_0

2009-07-26 Thread Daniel Gollub
On Sunday 26 July 2009 05:13:14 pm Daniel Gollub wrote:
 Check build #9:
 http://buildbot.b1-systems.de/qemu-kvm/admin/builders/default_x86_64_debian
_5_0/builds/9

Or try this (without admin ;)):

http://buildbot.b1-systems.de/qemu-kvm/builders/default_x86_64_debian_5_0/builds/9

Best Regards,
Daniel

-- 
Daniel GollubGeschaeftsfuehrer: Ralph Dehner
FOSS Developer   Unternehmenssitz:  Vohburg
B1 Systems GmbH  Amtsgericht:   Ingolstadt
Mobil: +49-(0)-160 47 73 970 Handelsregister:   HRB 3537
EMail: gol...@b1-systems.de  http://www.b1-systems.de

Adresse: B1 Systems GmbH, Osterfeldstraße 7, 85088 Vohburg
http://pgpkeys.pca.dfn.de/pks/lookup?op=getsearch=0xED14B95C2F8CA78D
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/6] kvm/x86/svm: force new asid on vcpu migration

2009-07-26 Thread Avi Kivity

On 03/05/2009 02:12 PM, Joerg Roedel wrote:

Signed-off-by: Joerg Roedeljoerg.roe...@amd.com
---
  arch/x86/kvm/svm.c |3 ++-
  1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1821c20..0e66bca 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -180,7 +180,7 @@ static inline void kvm_write_cr2(unsigned long val)

  static inline void force_new_asid(struct kvm_vcpu *vcpu)
  {
-   to_svm(vcpu)-asid_generation--;
+   to_svm(vcpu)-asid_generation = 0;
  }

  static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
@@ -716,6 +716,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
svm-vmcb-control.tsc_offset += delta;
vcpu-cpu = cpu;
kvm_migrate_timers(vcpu);
+   force_new_asid(vcpu);
}

for (i = 0; i  NR_HOST_SAVE_USER_MSRS; i++)
   


Does this remove the need for 6eaa802c (KVM: SVM: fix random segfaults 
with NPT enabled)?


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv3 1/2] virtio: delete vq from list

2009-07-26 Thread Michael S. Tsirkin
On Sun, Jul 26, 2009 at 03:48:01PM +0300, Michael S. Tsirkin wrote:
 This makes delete vq the reverse of find vq.
 This is required to make it possible to retry find_vqs
 after a failure, otherwise the list gets corrupted.
 
 Signed-off-by: Michael S. Tsirkin m...@redhat.com
 ---
  drivers/virtio/virtio_pci.c |6 +-
  1 files changed, 5 insertions(+), 1 deletions(-)
 
 diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
 index 7e21389..2eaf1fb 100644
 --- a/drivers/virtio/virtio_pci.c
 +++ b/drivers/virtio/virtio_pci.c
 @@ -464,7 +464,11 @@ static void vp_del_vq(struct virtqueue *vq)
  {
   struct virtio_pci_device *vp_dev = to_vp_device(vq-vdev);
   struct virtio_pci_vq_info *info = vq-priv;
 - unsigned long size;
 + unsigned long flags, size;
 +
 +spin_lock_irqsave(vp_dev-lock, flags);
 +list_del(info-node);
 +spin_unlock_irqrestore(vp_dev-lock, flags);

Grr, whitespace damage. Not sure how this got in, resending a corrected
patch. Sorry about the churn.

   iowrite16(info-queue_index, vp_dev-ioaddr + VIRTIO_PCI_QUEUE_SEL);
  
 -- 
 1.6.2.5
 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv4 0/2] virtio: find_vqs/del_vqs fixes

2009-07-26 Thread Michael S. Tsirkin
Here's a patch series to fix known regressions in virtio_pci,
by refactoring code along the lines suggested by Rusty.

Changes since v3:
   whitespace fixed in PATCH 1/2

This is on top of patch
virtio: fix memory leak on device removal
that has been applied by Rusty.

This supercedes patches:
[PATCHv3] virtio: recover from vector assignment failure
[PATCHv2] virtio: fix double free_irq on device removal

Michael S. Tsirkin (2):
  virtio: make del_vq delete vq from list
  virtio: refactor find_vqs

 drivers/virtio/virtio_pci.c |  218 ---
 1 files changed, 124 insertions(+), 94 deletions(-)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv4 1/2] virtio: make del_vq delete vq from list

2009-07-26 Thread Michael S. Tsirkin
This makes delete vq the reverse of find vq.
This is required to make it possible to retry find_vqs
after a failure, otherwise the list gets corrupted.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 drivers/virtio/virtio_pci.c |6 +-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 7e21389..4c74c72 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -464,7 +464,11 @@ static void vp_del_vq(struct virtqueue *vq)
 {
struct virtio_pci_device *vp_dev = to_vp_device(vq-vdev);
struct virtio_pci_vq_info *info = vq-priv;
-   unsigned long size;
+   unsigned long flags, size;
+
+   spin_lock_irqsave(vp_dev-lock, flags);
+   list_del(info-node);
+   spin_unlock_irqrestore(vp_dev-lock, flags);
 
iowrite16(info-queue_index, vp_dev-ioaddr + VIRTIO_PCI_QUEUE_SEL);
 
-- 
1.6.2.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv4 2/2] virtio: refactor find_vqs

2009-07-26 Thread Michael S. Tsirkin
This refactors find_vqs, making it more readable and robust, and fixing
two regressions from 2.6.30:
- double free_irq causing BUG_ON on device removal
- probe failure when vq can't be assigned to msi-x vector
  (reported on old host kernels)

An older version of this patch was tested by Amit Shah.

Reported-by: Amit Shah amit.s...@redhat.com
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 drivers/virtio/virtio_pci.c |  212 ---
 1 files changed, 119 insertions(+), 93 deletions(-)

diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 4c74c72..c17b830 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -52,8 +52,10 @@ struct virtio_pci_device
char (*msix_names)[256];
/* Number of available vectors */
unsigned msix_vectors;
-   /* Vectors allocated */
+   /* Vectors allocated, excluding per-vq vectors if any */
unsigned msix_used_vectors;
+   /* Whether we have vector per vq */
+   bool per_vq_vectors;
 };
 
 /* Constants for MSI-X */
@@ -278,27 +280,24 @@ static void vp_free_vectors(struct virtio_device *vdev)
vp_dev-msix_entries = NULL;
 }
 
-static int vp_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
- int *options, int noptions)
-{
-   int i;
-   for (i = 0; i  noptions; ++i)
-   if (!pci_enable_msix(dev, entries, options[i]))
-   return options[i];
-   return -EBUSY;
-}
-
-static int vp_request_vectors(struct virtio_device *vdev, unsigned max_vqs)
+static int vp_request_vectors(struct virtio_device *vdev, int nvectors,
+ bool per_vq_vectors)
 {
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
const char *name = dev_name(vp_dev-vdev.dev);
unsigned i, v;
int err = -ENOMEM;
-   /* We want at most one vector per queue and one for config changes.
-* Fallback to separate vectors for config and a shared for queues.
-* Finally fall back to regular interrupts. */
-   int options[] = { max_vqs + 1, 2 };
-   int nvectors = max(options[0], options[1]);
+
+   if (!nvectors) {
+   /* Can't allocate MSI-X vectors, use regular interrupt */
+   vp_dev-msix_vectors = 0;
+   err = request_irq(vp_dev-pci_dev-irq, vp_interrupt,
+ IRQF_SHARED, name, vp_dev);
+   if (err)
+   return err;
+   vp_dev-intx_enabled = 1;
+   return 0;
+   }
 
vp_dev-msix_entries = kmalloc(nvectors * sizeof *vp_dev-msix_entries,
   GFP_KERNEL);
@@ -312,41 +311,34 @@ static int vp_request_vectors(struct virtio_device *vdev, 
unsigned max_vqs)
for (i = 0; i  nvectors; ++i)
vp_dev-msix_entries[i].entry = i;
 
-   err = vp_enable_msix(vp_dev-pci_dev, vp_dev-msix_entries,
-options, ARRAY_SIZE(options));
-   if (err  0) {
-   /* Can't allocate enough MSI-X vectors, use regular interrupt */
-   vp_dev-msix_vectors = 0;
-   err = request_irq(vp_dev-pci_dev-irq, vp_interrupt,
- IRQF_SHARED, name, vp_dev);
-   if (err)
-   goto error;
-   vp_dev-intx_enabled = 1;
-   } else {
-   vp_dev-msix_vectors = err;
-   vp_dev-msix_enabled = 1;
-
-   /* Set the vector used for configuration */
-   v = vp_dev-msix_used_vectors;
-   snprintf(vp_dev-msix_names[v], sizeof *vp_dev-msix_names,
-%s-config, name);
-   err = request_irq(vp_dev-msix_entries[v].vector,
- vp_config_changed, 0, vp_dev-msix_names[v],
- vp_dev);
-   if (err)
-   goto error;
-   ++vp_dev-msix_used_vectors;
+   err = pci_enable_msix(vp_dev-pci_dev, vp_dev-msix_entries, nvectors);
+   if (err  0)
+   err = -ENOSPC;
+   if (err)
+   goto error;
+   vp_dev-msix_vectors = nvectors;
+   vp_dev-msix_enabled = 1;
+
+   /* Set the vector used for configuration */
+   v = vp_dev-msix_used_vectors;
+   snprintf(vp_dev-msix_names[v], sizeof *vp_dev-msix_names,
+%s-config, name);
+   err = request_irq(vp_dev-msix_entries[v].vector,
+ vp_config_changed, 0, vp_dev-msix_names[v],
+ vp_dev);
+   if (err)
+   goto error;
+   ++vp_dev-msix_used_vectors;
 
-   iowrite16(v, vp_dev-ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
-   /* Verify we had enough resources to assign the vector */
-   v = ioread16(vp_dev-ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
-   if (v == 

Re: Very high memory usage with KVM

2009-07-26 Thread Daniel Bareiro
Avi

On Sunday, 26 July 2009 18:11:27 +0300,
Avi Kivity wrote:

 What is the storage configuration? Are you using qcow2?

 I'm not using qcow2 files. The /dev/cciss/c0d0p3 partition is a
 physical volume that maintains the logical volumes that are used for
 VM's disks:
 
 In this case there should be no excessive memory usage.  qcow2 could
 use extra memory, especially on older qemu-kvm versions (or images
 created with older qemu-img versions).
 
 What is the host kernel (uname -a)?

 r...@ss02:~# uname -a
 Linux ss02 2.6.24-19-server #1 SMP Wed Aug 20 18:43:06 UTC 2008 x86_64 
 GNU/Linux

 kvm memory management with pre 2.6.27 host kernels is pretty weak.
 Using a newer host kernel (and newer kvm) may solve this problem.

Initially I am going to see how it improves the situation upgrading to
KVM-84 of backports of Hardy Heron, since at the moment last kernel
available for Hardy is the one I has commented.

I would like to know if the newest versions of KVM published in the
official site of the project solve a bug recently reported in Ubuntu
Launchpad [1]. Also I was observing errors of type 'to swapper Not
tainted' or 'java Not tainted' in the VM (aps2, with a high rate of I/O)
and that I've commented in a previous message sent to the list [2]. I
would want to know if you could indicate to me if this can be due to KVM
bug that would be solved in a later version.

Thanks for so quick reply.

Regards,
Daniel

[1] https://bugs.launchpad.net/ubuntu/+source/kvm/+bug/359447
[2] http://thread.gmane.org/gmane.comp.emulators.kvm.devel/37631
-- 
Fingerprint: BFB3 08D6 B4D1 31B2 72B9  29CE 6696 BF1B 14E6 1D37
Powered by Debian GNU/Linux Squeeze - Linux user #188.598


signature.asc
Description: Digital signature


Re: OpenSolaris boot failure with KVM and VirtualBox

2009-07-26 Thread Avi Kivity

On 07/16/2009 10:30 AM, Sid Boyce wrote:

I first tried using kernel 2.6.31-rc1 on openSUSE 11.2 Milestone1 on a
4P box. All other VM's, Windows and Linux work, currently running
openSUSE 11.2 Milestone 3 with 2.6.31-rc3. Verified the .iso is good.
Error message
=
Booting 'OpenSolaris 2009.06'
kernel$ /platform/i86pc/kernel/$ISADIR/unix
loading '/platform/i86pc/kernel/$ISADIR/unix' ...
cpu: 'AuthenticAMD' family 16 model 4 step 2 ...
[BIOS accepted mixed-mode target setting!]
   [Multiboot-kernel, loadaddr=0xbffe38, text-and-data=0x1b1ff0, bss=0x0,
entry=0xc0]
'platform/i86pc/kernel/amd64/unix' is loaded
module$ /boot/$ISADIR/x86.microroot
loading '/boot/$ISADIR/x86.microroot' ...
Error 15: File not found
Press any key to continue ...
Regards
Sid.
   


Please copy kvm@vger.kernel.org on kvm issues.

Is this a regression from previous kernel versions?  What userspace are 
you using?


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Very high memory usage with KVM

2009-07-26 Thread Avi Kivity

On 07/26/2009 06:50 PM, Daniel Bareiro wrote:

kvm memory management with pre 2.6.27 host kernels is pretty weak.
Using a newer host kernel (and newer kvm) may solve this problem.
 


Initially I am going to see how it improves the situation upgrading to
KVM-84 of backports of Hardy Heron, since at the moment last kernel
available for Hardy is the one I has commented.

I would like to know if the newest versions of KVM published in the
official site of the project solve a bug recently reported in Ubuntu
Launchpad [1]. Also I was observing errors of type 'to swapper Not
tainted' or 'java Not tainted' in the VM (aps2, with a high rate of I/O)
and that I've commented in a previous message sent to the list [2]. I
would want to know if you could indicate to me if this can be due to KVM
bug that would be solved in a later version.

   


My guess is that it is due to poor swapping with pre-2.6.27 hosts. 15 GB 
used out of 16GB total is just 6% reserve, which may be a bit too low.  
With a 2.6.27 host some small amount of memory would be swapped out, 
before that you'd see thrashing.  Another way to check is to drop one 
guest (or reduce total memory needed by 1GB) and see if you get the same 
results or if things improve.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv0 RFC] kvm: irqfd support for level interrupts

2009-07-26 Thread Michael S. Tsirkin
Here's an untested patch with partial support for level triggered
interrupts in irqfd. What this patch has: support for clearing interrupt
on ack. What this patch does not have: support signalling eventfd on ack
so that userspace can take action and e.g. reenable interrupt.

Gleb, Marcelo, I'd like your input on the approach taken wrt locking.
Does it look sane?

Avi, how's the interface? I intend to also add an eventfd probably in
the padding in the irqfd struct.

Signed-off-by: Michael S. Tsirkin m...@redhat.com

---

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 230a91a..8bf16af 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -488,6 +488,7 @@ struct kvm_x86_mce {
 #endif
 
 #define KVM_IRQFD_FLAG_DEASSIGN (1  0)
+#define KVM_IRQFD_FLAG_LEVEL (1  1)
 
 struct kvm_irqfd {
__u32 fd;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 99017e8..fcbf5b5 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -45,12 +45,14 @@ struct _irqfd {
struct kvm   *kvm;
struct eventfd_ctx   *eventfd;
int   gsi;
+   int   is_level;
struct list_head  list;
poll_tablept;
wait_queue_head_t*wqh;
wait_queue_t  wait;
struct work_structinject;
struct work_structshutdown;
+   struct kvm_irq_ack_notifier kian;
 };
 
 static struct workqueue_struct *irqfd_cleanup_wq;
@@ -63,10 +65,15 @@ irqfd_inject(struct work_struct *work)
 
mutex_lock(kvm-irq_lock);
kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 1);
-   kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 0);
+   if (!irqfd-is_level)
+   kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd-gsi, 0);
mutex_unlock(kvm-irq_lock);
 }
 
+static void irqfd_irq_acked(struct kvm_irq_ack_notifier *kian)
+{
+   kvm_set_irq(kian-kvm, KVM_USERSPACE_IRQ_SOURCE_ID, kian-gsi, 0);
+}
 /*
  * Race-free decouple logic (ordering is critical)
  */
@@ -87,6 +94,9 @@ irqfd_shutdown(struct work_struct *work)
 */
flush_work(irqfd-inject);
 
+   if (irqfd-is_level)
+   kvm_unregister_irq_ack_notifier(irqfd-kian);
+
/*
 * It is now safe to release the object's resources
 */
@@ -166,7 +176,7 @@ irqfd_ptable_queue_proc(struct file *file, 
wait_queue_head_t *wqh,
 }
 
 static int
-kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
+kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi, int is_level)
 {
struct _irqfd *irqfd;
struct file *file = NULL;
@@ -180,6 +190,7 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
 
irqfd-kvm = kvm;
irqfd-gsi = gsi;
+   irqfd-is_level = is_level;
INIT_LIST_HEAD(irqfd-list);
INIT_WORK(irqfd-inject, irqfd_inject);
INIT_WORK(irqfd-shutdown, irqfd_shutdown);
@@ -198,6 +209,12 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
 
irqfd-eventfd = eventfd;
 
+   if (is_level) {
+   irqfd-kian.gsi = gsi;
+   irqfd-kian.irq_acked = irqfd_irq_acked;
+   kvm_register_irq_ack_notifier(irqfd-kian);
+   }
+
/*
 * Install our own custom wake-up handling so we are notified via
 * a callback whenever someone signals the underlying eventfd
@@ -281,10 +298,13 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
 int
 kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
 {
+   if (flags  ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_LEVEL))
+   return -EINVAL;
+
if (flags  KVM_IRQFD_FLAG_DEASSIGN)
return kvm_irqfd_deassign(kvm, fd, gsi);
 
-   return kvm_irqfd_assign(kvm, fd, gsi);
+   return kvm_irqfd_assign(kvm, fd, gsi, !!(flags  KVM_IRQFD_FLAG_LEVEL));
 }
 
 /*
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: OpenSolaris boot failure with KVM and VirtualBox

2009-07-26 Thread Heinz Diehl
On 26.07.2009, Avi Kivity wrote: 

 On 07/16/2009 10:30 AM, Sid Boyce wrote:
 I first tried using kernel 2.6.31-rc1 on openSUSE 11.2 Milestone1 on a
 4P box. All other VM's, Windows and Linux work, currently running
 openSUSE 11.2 Milestone 3 with 2.6.31-rc3. Verified the .iso is good.

Opensuse Milestones are testing versions in at best alpha state.

 Error 15: File not found
[]

Virtualbox works properly running Opensolaris on my machines, with any
2.6.31-rc kernel released, including rc4 and git versions (opensuse 11.1
based).


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH RFC] pci: expose function reset capability in sysfs

2009-07-26 Thread Michael S. Tsirkin
Some devices allow an individual function to be reset without affecting
other functions in the same device: that's what pci_reset_function does.
For devices that have this support, expose reset attribite in sysfs.

This is useful e.g. for virtualization, where a qemu userspace
process wants to reset the device when the guest is started/reset,
to emulate machine reboot as closely as possible.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---

Jesse, all,
could you please comment on whether the following approach looks sane?
Compile-tested only at this point.  I'm also not sure whether the
CAP_SYS_ADMIN check is necessary: maybe 400 permissions on the sysfs
file are sufficient?

 drivers/pci/pci-sysfs.c  |   37 +
 drivers/pci/pci.c|   16 
 drivers/pci/pci.h|1 +
 include/linux/kvm_host.h |1 +
 virt/kvm/irq_comm.c  |4 +++-
 5 files changed, 58 insertions(+), 1 deletions(-)

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 85ebd02..92805e8 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -916,6 +916,28 @@ int __attribute__ ((weak)) 
pcibios_add_platform_entries(struct pci_dev *dev)
return 0;
 }
 
+static ssize_t reset_store(struct device *dev,
+  struct device_attribute *attr, const char *buf,
+  size_t count)
+{
+   struct pci_dev *pdev = to_pci_dev(dev);
+   unsigned long val;
+   ssize_t result = strict_strtoul(buf, 0, val);
+
+   if (result  0)
+   return result;
+
+   /* this can crash the machine when done on the wrong device */
+   if (!capable(CAP_SYS_ADMIN))
+   return -EPERM;
+
+   if (val != 1)
+   return -EINVAL;
+   return pci_reset_function(pdev);
+}
+
+static struct device_attribute reset_attr = __ATTR(reset, 0200, NULL, 
reset_store);
+
 static int pci_create_capabilities_sysfs(struct pci_dev *dev)
 {
int retval;
@@ -943,7 +965,21 @@ static int pci_create_capabilities_sysfs(struct pci_dev 
*dev)
/* Active State Power Management */
pcie_aspm_create_sysfs_dev_files(dev);
 
+   if (!pci_probe_reset_function(dev)) {
+   retval = device_create_file(dev-dev, reset_attr);
+   if (retval)
+   goto error;
+   }
return 0;
+
+error:
+   pcie_aspm_remove_sysfs_dev_files(dev);
+   if (dev-vpd  dev-vpd-attr) {
+   sysfs_remove_bin_file(dev-dev.kobj, dev-vpd-attr);
+   kfree(dev-vpd-attr);
+   }
+
+   return retval;
 }
 
 int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
@@ -1037,6 +1073,7 @@ static void pci_remove_capabilities_sysfs(struct pci_dev 
*dev)
}
 
pcie_aspm_remove_sysfs_dev_files(dev);
+   device_remove_file(dev-dev, reset_attr);
 }
 
 /**
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index dbd0f94..f6d1c6c 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2260,6 +2260,22 @@ int __pci_reset_function(struct pci_dev *dev)
 EXPORT_SYMBOL_GPL(__pci_reset_function);
 
 /**
+ * pci_probe_reset_function - check whether the device can be safely reset
+ * @dev: PCI device to reset
+ *
+ * Some devices allow an individual function to be reset without affecting
+ * other functions in the same device.  The PCI device must be responsive
+ * to PCI config space in order to use this function.
+ *
+ * Returns 0 if the device function can be reset or negative if the
+ * device doesn't support resetting a single function.
+ */
+int pci_probe_reset_function(struct pci_dev *dev)
+{
+   return pci_dev_reset(dev, 1);
+}
+
+/**
  * pci_reset_function - quiesce and reset a PCI device function
  * @dev: PCI device to reset
  *
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index f73bcbe..60a3811 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -16,6 +16,7 @@ extern void pci_cleanup_rom(struct pci_dev *dev);
 extern int pci_mmap_fits(struct pci_dev *pdev, int resno,
 struct vm_area_struct *vma);
 #endif
+int pci_probe_reset_function(struct pci_dev *dev);
 
 /**
  * struct pci_platform_pm_ops - Firmware PM callbacks
-- 
1.6.2.5
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: OpenSolaris boot failure with KVM and VirtualBox

2009-07-26 Thread Sid Boyce
On 26/07/09 17:13, Avi Kivity wrote:
 On 07/16/2009 10:30 AM, Sid Boyce wrote:
 I first tried using kernel 2.6.31-rc1 on openSUSE 11.2 Milestone1 on a
 4P box. All other VM's, Windows and Linux work, currently running
 openSUSE 11.2 Milestone 3 with 2.6.31-rc3. Verified the .iso is good.
 Error message
 =
 Booting 'OpenSolaris 2009.06'
 kernel$ /platform/i86pc/kernel/$ISADIR/unix
 loading '/platform/i86pc/kernel/$ISADIR/unix' ...
 cpu: 'AuthenticAMD' family 16 model 4 step 2 ...
 [BIOS accepted mixed-mode target setting!]
[Multiboot-kernel, loadaddr=0xbffe38, text-and-data=0x1b1ff0, bss=0x0,
 entry=0xc0]
 'platform/i86pc/kernel/amd64/unix' is loaded
 module$ /boot/$ISADIR/x86.microroot
 loading '/boot/$ISADIR/x86.microroot' ...
 Error 15: File not found
 Press any key to continue ...
 Regards
 Sid.

 
 Please copy kvm@vger.kernel.org on kvm issues.
 
 Is this a regression from previous kernel versions?  What userspace are
 you using?
 

Currently running 2.6.31-rc4 with the original kqemu-1.4.0pre1 on
openSUSE 11.2 Milestone 4 and it boots OK - a 200G disk image used.
qemu-system-x86_64 -cdrom /ISO/osol-0906-ai-x86.iso -boot d
/osol0906.qcow2 -smp 4 -m 500M 
Regards
Sid.
-- 
Sid Boyce ... Hamradio License G3VBV, Licensed Private Pilot
Emeritus IBM/Amdahl Mainframes and Sun/Fujitsu Servers Tech Support
Specialist, Cricket Coach
Microsoft Windows Free Zone - Linux used for all Computing Tasks

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: OpenSolaris boot failure with KVM and VirtualBox

2009-07-26 Thread Avi Kivity

On 07/26/2009 09:14 PM, Sid Boyce wrote:



Is this a regression from previous kernel versions?  What userspace are
you using?

 


Currently running 2.6.31-rc4 with the original kqemu-1.4.0pre1 on
openSUSE 11.2 Milestone 4 and it boots OK - a 200G disk image used.
qemu-system-x86_64 -cdrom /ISO/osol-0906-ai-x86.iso -boot d
/osol0906.qcow2 -smp 4 -m 500M
   


Wait, are you using kqemu or kvm?

--
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/9] change order of kvm_init call.

2009-07-26 Thread Jan Kiszka
Glauber Costa wrote:
 The goal is to get rid of the call to kvm_init. But those things
 are subtle, and often break. So do it in a separate patch, to help
 finding potential issues in future bisections.

Found such an issued: This patch triggers a segfault if no kvm modules
are loaded and you start qemu without -no-kvm. Please have a look.

Jan

 
 Signed-off-by: Glauber Costa glom...@redhat.com
 ---
  vl.c |   18 +-
  1 files changed, 9 insertions(+), 9 deletions(-)
 
 diff --git a/vl.c b/vl.c
 index f4e4d0f..86a6d70 100644
 --- a/vl.c
 +++ b/vl.c
 @@ -5748,15 +5748,6 @@ int main(int argc, char **argv, char **envp)
  signal(SIGTTIN, SIG_IGN);
  }
  
 -#ifdef CONFIG_KVM
 -if (kvm_enabled()) {
 - if (kvm_init(smp_cpus)  0) {
 - fprintf(stderr, Could not initialize KVM, will disable KVM 
 support\n);
 - exit(1);
 - }
 -}
 -#endif
 -
  if (pid_file  qemu_create_pidfile(pid_file) != 0) {
  if (daemonize) {
  uint8_t status = 1;
 @@ -5956,6 +5947,15 @@ int main(int argc, char **argv, char **envp)
  }
  #endif
  
 +#ifdef CONFIG_KVM
 +if (kvm_enabled()) {
 + if (kvm_init(smp_cpus)  0) {
 + fprintf(stderr, Could not initialize KVM, will disable KVM 
 support\n);
 + exit(1);
 + }
 +}
 +#endif
 +
  if (monitor_device) {
  monitor_hd = qemu_chr_open(monitor, monitor_device, NULL);
  if (!monitor_hd) {




signature.asc
Description: OpenPGP digital signature


Re: Host latency peaks due to kvm-intel

2009-07-26 Thread H. Peter Anvin
Jan Kiszka wrote:
 Avi Kivity wrote:
 On 07/24/2009 12:41 PM, Jan Kiszka wrote:
 I vaguely recall that someone promised to add a feature reporting
 facility for all those nice things, modern VM-extensions may or may not
 support (something like or even an extension of /proc/cpuinfo). What is
 the state of this plan? Would be specifically interesting for Intel CPUs
 as there seem to be many of them out there with restrictions for special
 use cases - like real-time.

 Newer kernels do report some vmx features (like flexpriority) in
 /proc/cpuinfo but not all.

 
 Ah, nice. Then we just need this?
 

Fine with me.

Acked-by: H. Peter Anvin h...@zytor.com

However, I guess the real question if we shouldn't export ALL VMX
features in a consistent way instead?

-hpa
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: NMI Injection to Guest

2009-07-26 Thread Jiaqing Du
Hi Gleb,

Thanks for your reply.

2009/7/26 Gleb Natapov g...@redhat.com:
 On Sat, Jul 25, 2009 at 10:46:39PM +0200, Jiaqing Du wrote:
 Hi list,

 I'm trying to extend OProfile to support guest profiling. One step of
 my work is to push an NMI to the guest(s) when a performance counter
 overflows. Please correct me if the following is not correct:

 counter overflow -- NMI to host -- VM exit -- int $2 to handle
 NMI on host -- ...   -- VM entry -- NMI to guest

 Correct except the last step (-- NMI to guest). Host nmi is not
 propagated to guests.

Yes. I need to add some code to propagate host NMI to guests.

 On the path between VM-exit and VM-entry, I want to push an NMI to the
 guest. I tried to put the following code on the path, but never
 succeeded. Various wired things happened, such as KVM hangs, guest
 kernel oops, and host hangs. I tried both code with Linux 2.6.30 and
 version 88.

 if (vmx_nmi_allowed())  { vmx_inject_nmi(); }

 Any suggestions? Where is the right place to push an NMI and what are
 the necessary checks?
 Call kvm_inject_nmi(vcpu). And don't forget to vcpu_load(vcpu) before
 doing it. See kvm_vcpu_ioctl_nmi().

Based on the code with Linux 2.6.30, what kvm_inject_nmi(vcpu) does is
just set vcpu-arch.nmi_pending to 1. kvm_vcpu_ioctl_nmi() puts
vcpu_load() before the setting and vcpu_put() after it.

I need to push host NMI to guests between a VM-exit and a VM-entry
after that. The VM-exit is due to an NMI caused by performance counter
overflow. The following code with vcpu_enter_guest(), which is
surrounded by a vcpu_load() and vcpu_put(), checks this
vcpu-arch.nmi_pending and other related flags to decide whether an
NMI should be pushed to guests.

if (vcpu-arch.exception.pending)
__queue_exception(vcpu);
else if (irqchip_in_kernel(vcpu-kvm))
kvm_x86_ops-inject_pending_irq(vcpu);
else
kvm_x86_ops-inject_pending_vectors(vcpu, kvm_run);

What I did is given below:

3097 static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3098 {
   ... ...

3156 if (kvm_vm_exit_on_cnt_overflow) {
3157 vcpu-arch.nmi_pending = 1;
3158 }
3159
3160 if (vcpu-arch.exception.pending)
3161 __queue_exception(vcpu);
3162 else if (irqchip_in_kernel(vcpu-kvm))
3163 kvm_x86_ops-inject_pending_irq(vcpu);
3164 else
3165 kvm_x86_ops-inject_pending_vectors(vcpu, kvm_run);

  ... 
3236 }

In vcpu_enter_guest(), before this part of code is reached,
vcpu-arch.nmi_pending is set to 1 if the VM-exit is due to
performance counter overflow. Still, no NMIs are seen by the guests. I
also tried to put this vcpu-arch.nmi_pending = 1; somewhere else on
the path between a VM-exit and VM-entry, it does not seem to work
neither. Only vmx_inject_nmi() manages to push NMIs to guests, but
without right sanity checks, it causes various wired host and guest
behaviors.

To inject NMIs on the path between a VM-exit and VM-entry, what's to try next?


 --
                        Gleb.


Thanks,
Jiaqing
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: NMI Injection to Guest

2009-07-26 Thread Nipun sehrawat
Hi all,

What about using vmx_inject_nmi(vcpu) to inject the NMIs into the
guest, when we are sure about the vcpu on which the NMI is to be
injected.

Nipun

On Mon, Jul 27, 2009 at 12:55 AM, Jiaqing Du jiaq...@gmail.com wrote:

 Hi Gleb,

 Thanks for your reply.

 2009/7/26 Gleb Natapov g...@redhat.com:
  On Sat, Jul 25, 2009 at 10:46:39PM +0200, Jiaqing Du wrote:
  Hi list,
 
  I'm trying to extend OProfile to support guest profiling. One step of
  my work is to push an NMI to the guest(s) when a performance counter
  overflows. Please correct me if the following is not correct:
 
  counter overflow -- NMI to host -- VM exit -- int $2 to handle
  NMI on host -- ...   -- VM entry -- NMI to guest
 
  Correct except the last step (-- NMI to guest). Host nmi is not
  propagated to guests.

 Yes. I need to add some code to propagate host NMI to guests.
 
  On the path between VM-exit and VM-entry, I want to push an NMI to the
  guest. I tried to put the following code on the path, but never
  succeeded. Various wired things happened, such as KVM hangs, guest
  kernel oops, and host hangs. I tried both code with Linux 2.6.30 and
  version 88.
 
  if (vmx_nmi_allowed())  { vmx_inject_nmi(); }
 
  Any suggestions? Where is the right place to push an NMI and what are
  the necessary checks?
  Call kvm_inject_nmi(vcpu). And don't forget to vcpu_load(vcpu) before
  doing it. See kvm_vcpu_ioctl_nmi().

 Based on the code with Linux 2.6.30, what kvm_inject_nmi(vcpu) does is
 just set vcpu-arch.nmi_pending to 1. kvm_vcpu_ioctl_nmi() puts
 vcpu_load() before the setting and vcpu_put() after it.

 I need to push host NMI to guests between a VM-exit and a VM-entry
 after that. The VM-exit is due to an NMI caused by performance counter
 overflow. The following code with vcpu_enter_guest(), which is
 surrounded by a vcpu_load() and vcpu_put(), checks this
 vcpu-arch.nmi_pending and other related flags to decide whether an
 NMI should be pushed to guests.

        if (vcpu-arch.exception.pending)
                __queue_exception(vcpu);
        else if (irqchip_in_kernel(vcpu-kvm))
                kvm_x86_ops-inject_pending_irq(vcpu);
        else
                kvm_x86_ops-inject_pending_vectors(vcpu, kvm_run);

 What I did is given below:

 3097 static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run 
 *kvm_run)
 3098 {
               ... ...

 3156         if (kvm_vm_exit_on_cnt_overflow) {
 3157                 vcpu-arch.nmi_pending = 1;
 3158         }
 3159
 3160         if (vcpu-arch.exception.pending)
 3161                 __queue_exception(vcpu);
 3162         else if (irqchip_in_kernel(vcpu-kvm))
 3163                 kvm_x86_ops-inject_pending_irq(vcpu);
 3164         else
 3165                 kvm_x86_ops-inject_pending_vectors(vcpu, kvm_run);

              ... 
 3236 }

 In vcpu_enter_guest(), before this part of code is reached,
 vcpu-arch.nmi_pending is set to 1 if the VM-exit is due to
 performance counter overflow. Still, no NMIs are seen by the guests. I
 also tried to put this vcpu-arch.nmi_pending = 1; somewhere else on
 the path between a VM-exit and VM-entry, it does not seem to work
 neither. Only vmx_inject_nmi() manages to push NMIs to guests, but
 without right sanity checks, it causes various wired host and guest
 behaviors.

 To inject NMIs on the path between a VM-exit and VM-entry, what's to try next?

 
  --
                         Gleb.
 

 Thanks,
 Jiaqing
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: OpenSolaris boot failure with KVM and VirtualBox

2009-07-26 Thread Sid Boyce
On 26/07/09 19:18, Avi Kivity wrote:
 On 07/26/2009 09:14 PM, Sid Boyce wrote:

 Is this a regression from previous kernel versions?  What userspace are
 you using?

  

 Currently running 2.6.31-rc4 with the original kqemu-1.4.0pre1 on
 openSUSE 11.2 Milestone 4 and it boots OK - a 200G disk image used.
 qemu-system-x86_64 -cdrom /ISO/osol-0906-ai-x86.iso -boot d
 /osol0906.qcow2 -smp 4 -m 500M

 
 Wait, are you using kqemu or kvm?
 

# l /dev/kvm
crw-rw+ 1 root root 10, 232 2009-07-24 20:26 /dev/kvm
# lsmod|grep kvm
kvm_amd41908  0
kvm   180488  1 kvm_amd

From long ago I read that kvm needed kqemu, so I have always built the
module, but I see here it's not used.
 lsmod|grep qemu
kqemu 173496  0

# rpm -qf /usr/bin/qemu-system-x86_64
qemu-0.10.1-2.21

Regards
Sid.
-- 
Sid Boyce ... Hamradio License G3VBV, Licensed Private Pilot
Emeritus IBM/Amdahl Mainframes and Sun/Fujitsu Servers Tech Support
Specialist, Cricket Coach
Microsoft Windows Free Zone - Linux used for all Computing Tasks

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/3] Allow larger BIOS image

2009-07-26 Thread Jordan Justen
From: Jordan Justen jljus...@gmail.com

These changes are similar to my patches sent July 16, except they
now are based on Yang Sheng's recent changes to enable a new
ioctl for controlling the EPT identity mapping page location.

-Jordan

Jordan Justen (3):
  Update BIOS INT15-E820 to allow a larger BIOS image
  Move TSS pages to allow a larger BIOS image
  Move EPT identity mapping pages to allow a larger BIOS image

 kvm/bios/rombios.c |8 
 qemu-kvm-x86.c |4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] Update BIOS INT15-E820 to allow a larger BIOS image

2009-07-26 Thread Jordan Justen
The bios will now reserve more memory via the E820 functions.

Note that the standard KVM BIOS will most likely not make use of
this expanded BIOS region.  This change will synchronize
the BIOS INT15-E820 reservations to match other changes that
will allow alternate BIOS images to be larger in size.

Previously the BIOS reserved:
  0xfffbc000-0xfffbcfff -   4KB - EPT identity mapping pages
  0xfffbd000-0xfffb -  12KB - TSS pages
  0xfffc-0x - 256KB - Max bios.bin (usually top 128KB is used)

Now the BIOS will reserve:
  0xfeffc000-0xfeffcfff -   4KB - EPT identity mapping pages
  0xfeffd000-0xfeff -  12KB - TSS Pages
  0xff00-0x -  16MB - Max bios.bin

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
---
 kvm/bios/rombios.c |8 
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/kvm/bios/rombios.c b/kvm/bios/rombios.c
index 6186199..2d0c153 100644
--- a/kvm/bios/rombios.c
+++ b/kvm/bios/rombios.c
@@ -4596,14 +4596,14 @@ ASM_END
 case 5:
 /* 4 pages before the bios, 3 pages for vmx tss pages,
 * the other page for EPT real mode pagetable */
-set_e820_range(ES, regs.u.r16.di, 0xfffbc000L,
-   0xfffcL, 0, 0, 2);
+set_e820_range(ES, regs.u.r16.di, 0xfeffc000L,
+   0xff00L, 0, 0, 2);
 regs.u.r32.ebx = 6;
 break;
 case 6:
-/* 256KB BIOS area at the end of 4 GB */
+/* 16MB BIOS area at the end of 4 GB */
 set_e820_range(ES, regs.u.r16.di,
-   0xfffcL, 0xL ,0, 0, 2);
+   0xff00L, 0xL ,0, 0, 2);
 if (extra_highbits_memory_size || 
extra_lowbits_memory_size)
 regs.u.r32.ebx = 7;
 else
-- 
1.6.0.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] Move TSS pages to allow a larger BIOS image

2009-07-26 Thread Jordan Justen
Move from:
  0xfffbd000-0xfffb
to:
  0xfeffd000-0xfeff

This step is required to free up the 0xff00-0x (16MB) range
for use with bios.bin.

This change depends upon a change to kvm/bios/rombios.c so the bios
INT15-E820 function will properly reserve the new location.

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
---
 qemu-kvm-x86.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 492dbc5..0b47b57 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -62,7 +62,7 @@ static int kvm_init_tss(kvm_context_t kvm)
 * this address is 3 pages before the bios, and the bios should 
present
 * as unavaible memory
 */
-   r = kvm_set_tss_addr(kvm, 0xfffbd000);
+   r = kvm_set_tss_addr(kvm, 0xfeffd000);
if (r  0) {
fprintf(stderr, kvm_init_tss: unable to set tss 
addr\n);
return r;
-- 
1.6.0.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] Move EPT identity mapping pages to allow a larger BIOS image

2009-07-26 Thread Jordan Justen
Move from:
  0xfffbc000-0xfffbcfff
to:
  0xfeffc000-0xfeffcfff

This step is required to free up the 0xff00-0x (16MB) range
for use with bios.bin.

The KVM kernel change depends upon a change to kvm/bios/rombios.c so the bios
INT15-E820 function will properly reserve the new location.

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
---
 qemu-kvm-x86.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 0b47b57..65ba470 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -102,7 +102,7 @@ static int kvm_init_identity_map_page(kvm_context_t kvm)
 * this address is 4 pages before the bios, and the bios should 
present
 * as unavaible memory
 */
-   r = kvm_set_identity_map_addr(kvm, 0xfffbc000);
+   r = kvm_set_identity_map_addr(kvm, 0xfeffc000);
if (r  0) {
fprintf(stderr, kvm_init_identity_map_page: 
unable to set identity mapping addr\n);
-- 
1.6.0.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Host latency peaks due to kvm-intel

2009-07-26 Thread Yang, Sheng
On Monday 27 July 2009 03:16:27 H. Peter Anvin wrote:
 Jan Kiszka wrote:
  Avi Kivity wrote:
  On 07/24/2009 12:41 PM, Jan Kiszka wrote:
  I vaguely recall that someone promised to add a feature reporting
  facility for all those nice things, modern VM-extensions may or may not
  support (something like or even an extension of /proc/cpuinfo). What is
  the state of this plan? Would be specifically interesting for Intel
  CPUs as there seem to be many of them out there with restrictions for
  special use cases - like real-time.
 
  Newer kernels do report some vmx features (like flexpriority) in
  /proc/cpuinfo but not all.
 
  Ah, nice. Then we just need this?

 Fine with me.

 Acked-by: H. Peter Anvin h...@zytor.com

 However, I guess the real question if we shouldn't export ALL VMX
 features in a consistent way instead?

When I add feature reporting to cpuinfo, I just put highlight features there, 
otherwise the VMX feature list would at least as long as CPU one.

I have also suggested another field for virtualization feature for it, but 
some concern again userspace tools raised.

For we got indeed quite a lot features, and would get more, would it better to 
export the part of struct vmcs_config entries(that's pin_based_exec_ctrl, 
cpu_based_exec_ctrl, and cpu_based_2nd_exec_ctrl) through 
sys/module/kvm_intel/? Put every feature to cpuinfo seems not that necessary 
for such a big list.

-- 
regards
Yang, Sheng
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


  1   2   >