[kvm-devel] exits for no reason?

2008-05-16 Thread Gerd Hoffmann
  Hi,

With xenner I see a very high number of exits for no appearent reason in
the statistics:

kvm stats  :total diff
  mmu_cache_miss   : 53800
  mmu_flooded  : 12940
  mmu_pde_zapped   :101320
  mmu_pte_updated  :   1222920
  mmu_pte_write:   1282230
  mmu_shadow_zapped: 47420
  insn_emulation   :   179971 1001
  fpu_reload   :  9200
  host_state_reload:52192 1065
  irq_exits:52956 1226
  halt_wakeup  : 33020
  halt_exits   : 17700
  io_exits :41564 1001
  exits: 12209733   454557
  tlb_flush:   514163 2002
  pf_guest :805540
  pf_fixed :   1720320

Ideas what this might be?  Unusual exit reason with no counter I guess?

Suggestions how to track that one down?

cheers,
  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/

-
This SF.net email is sponsored by: Microsoft 
Defy all challenges. Microsoft(R) Visual Studio 2008. 
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 1/4] Add helper functions for paravirtual clocksources.

2008-05-16 Thread Gerd Hoffmann

Signed-off-by: Gerd Hoffmann <[EMAIL PROTECTED]>
---
 arch/x86/Kconfig  |4 +
 arch/x86/kernel/Makefile  |1 +
 arch/x86/kernel/pvclock.c |  148 +
 include/asm-x86/pvclock.h |6 ++
 4 files changed, 159 insertions(+), 0 deletions(-)
 create mode 100644 arch/x86/kernel/pvclock.c
 create mode 100644 include/asm-x86/pvclock.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fe361ae..deb3049 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -417,6 +417,10 @@ config PARAVIRT
  over full virtualization.  However, when run without a hypervisor
  the kernel is theoretically slower and slightly larger.
 
+config PARAVIRT_CLOCK
+   bool
+   default n
+
 endif
 
 config MEMTEST_BOOTPARAM
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5e618c3..77807d4 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -82,6 +82,7 @@ obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
 obj-$(CONFIG_KVM_GUEST)+= kvm.o
 obj-$(CONFIG_KVM_CLOCK)+= kvmclock.o
 obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT_CLOCK)   += pvclock.o
 
 obj-$(CONFIG_PCSPKR_PLATFORM)  += pcspeaker.o
 
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
new file mode 100644
index 000..33e526f
--- /dev/null
+++ b/arch/x86/kernel/pvclock.c
@@ -0,0 +1,148 @@
+/*  paravirtual clock -- common code used by kvm/xen
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#include 
+#include 
+#include 
+
+/*
+ * These are perodically updated
+ *xen: magic shared_info page
+ *kvm: gpa registered via msr
+ * and then copied here.
+ */
+struct pvclock_shadow_time {
+   u64 tsc_timestamp; /* TSC at last update of time vals.  */
+   u64 system_timestamp;  /* Time, in nanosecs, since boot.*/
+   u32 tsc_to_nsec_mul;
+   int tsc_shift;
+   u32 version;
+};
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
+{
+   u64 product;
+#ifdef __i386__
+   u32 tmp1, tmp2;
+#endif
+
+   if (shift < 0)
+   delta >>= -shift;
+   else
+   delta <<= shift;
+
+#ifdef __i386__
+   __asm__ (
+   "mul  %5   ; "
+   "mov  %4,%%eax ; "
+   "mov  %%edx,%4 ; "
+   "mul  %5   ; "
+   "xor  %5,%5; "
+   "add  %4,%%eax ; "
+   "adc  %5,%%edx ; "
+   : "=A" (product), "=r" (tmp1), "=r" (tmp2)
+   : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
+#elif __x86_64__
+   __asm__ (
+   "mul %%rdx ; shrd $32,%%rdx,%%rax"
+   : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
+#else
+#error implement me!
+#endif
+
+   return product;
+}
+
+static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
+{
+   u64 delta = native_read_tsc() - shadow->tsc_timestamp;
+   return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
+}
+
+/*
+ * Reads a consistent set of time-base values from hypervisor,
+ * into a shadow data area.
+ */
+static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
+   struct kvm_vcpu_time_info *src)
+{
+   do {
+   dst->version = src->version;
+   rmb();  /* fetch version before data */
+   dst->tsc_timestamp = src->tsc_timestamp;
+   dst->system_timestamp  = src->system_time;
+   dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
+   dst->tsc_shift = src->tsc_shift;
+   rmb();  /* test version after fetching data */
+   } while ((src->version & 1) || (dst->version != src->version));
+
+   return dst->version;
+}
+
+/*
+ * This is our read_clock functio

[kvm-devel] [PATCH 4/4] kvm/guest: fix paravirt clocksource to be compartible with xen.

2008-05-16 Thread Gerd Hoffmann

Signed-off-by: Gerd Hoffmann <[EMAIL PROTECTED]>
---
 arch/x86/Kconfig   |1 +
 arch/x86/kernel/kvmclock.c |   86 ---
 2 files changed, 33 insertions(+), 54 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index deb3049..b749c85 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -390,6 +390,7 @@ config VMI
 config KVM_CLOCK
bool "KVM paravirtualized clock"
select PARAVIRT
+   select PARAVIRT_CLOCK
depends on !(X86_VISWS || X86_VOYAGER)
help
  Turning on this option will allow you to run a paravirtualized clock
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 4bc1be5..135a8f7 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -18,6 +18,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -37,17 +38,9 @@ early_param("no-kvmclock", parse_no_kvmclock);
 
 /* The hypervisor will put information about time periodically here */
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
-#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
-
-static inline u64 kvm_get_delta(u64 last_tsc)
-{
-   int cpu = smp_processor_id();
-   u64 delta = native_read_tsc() - last_tsc;
-   return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
-}
 
 static struct kvm_wall_clock wall_clock;
-static cycle_t kvm_clock_read(void);
+
 /*
  * The wallclock is the time of day when we booted. Since then, some time may
  * have elapsed since the hypervisor wrote the data. So we try to account for
@@ -55,35 +48,19 @@ static cycle_t kvm_clock_read(void);
  */
 unsigned long kvm_get_wallclock(void)
 {
-   u32 wc_sec, wc_nsec;
-   u64 delta;
+   struct kvm_vcpu_time_info *vcpu_time;
struct timespec ts;
-   int version, nsec;
int low, high;
 
low = (int)__pa(&wall_clock);
high = ((u64)__pa(&wall_clock) >> 32);
-
-   delta = kvm_clock_read();
-
native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
-   do {
-   version = wall_clock.wc_version;
-   rmb();
-   wc_sec = wall_clock.wc_sec;
-   wc_nsec = wall_clock.wc_nsec;
-   rmb();
-   } while ((wall_clock.wc_version != version) || (version & 1));
-
-   delta = kvm_clock_read() - delta;
-   delta += wc_nsec;
-   nsec = do_div(delta, NSEC_PER_SEC);
-   set_normalized_timespec(&ts, wc_sec + delta, nsec);
-   /*
-* Of all mechanisms of time adjustment I've tested, this one
-* was the champion!
-*/
-   return ts.tv_sec + 1;
+
+   vcpu_time = &get_cpu_var(hv_clock);
+   pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
+   put_cpu_var(hv_clock);
+   
+   return ts.tv_sec;
 }
 
 int kvm_set_wallclock(unsigned long now)
@@ -91,28 +68,17 @@ int kvm_set_wallclock(unsigned long now)
return 0;
 }
 
-/*
- * This is our read_clock function. The host puts an tsc timestamp each time
- * it updates a new time. Without the tsc adjustment, we can have a situation
- * in which a vcpu starts to run earlier (smaller system_time), but probes
- * time later (compared to another vcpu), leading to backwards time
- */
 static cycle_t kvm_clock_read(void)
 {
-   u64 last_tsc, now;
-   int cpu;
-
-   preempt_disable();
-   cpu = smp_processor_id();
-
-   last_tsc = get_clock(cpu, tsc_timestamp);
-   now = get_clock(cpu, system_time);
+   struct kvm_vcpu_time_info *src;
+   cycle_t ret;
 
-   now += kvm_get_delta(last_tsc);
-   preempt_enable();
-
-   return now;
+   src = &get_cpu_var(hv_clock);
+   ret = pvclock_clocksource_read(src);
+   put_cpu_var(hv_clock);
+   return ret;
 }
+
 static struct clocksource kvm_clock = {
.name = "kvm-clock",
.read = kvm_clock_read,
@@ -123,13 +89,14 @@ static struct clocksource kvm_clock = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static int kvm_register_clock(void)
+static int kvm_register_clock(char *txt)
 {
int cpu = smp_processor_id();
int low, high;
low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
-
+   printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
+  cpu, high, low, txt);
return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
 }
 
@@ -140,12 +107,20 @@ static void kvm_setup_secondary_clock(void)
 * Now that the first cpu already had this clocksource initialized,
 * we shouldn't fail.
 */
-   WARN_ON(kvm_register_clock());
+   WARN_ON(kvm_register_clock("secondary cpu clock"));
/* ok, done with our trickery, call native */
setup_secondary_APIC_clock();
 }
 #end

[kvm-devel] [PATCH 2/4] Make xen use the generic paravirt clocksource code.

2008-05-16 Thread Gerd Hoffmann

Signed-off-by: Gerd Hoffmann <[EMAIL PROTECTED]>
---
 arch/x86/xen/Kconfig |1 +
 arch/x86/xen/time.c  |  110 +-
 2 files changed, 12 insertions(+), 99 deletions(-)

diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 2e641be..3a4f16a 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -5,6 +5,7 @@
 config XEN
bool "Xen guest support"
select PARAVIRT
+   select PARAVIRT_CLOCK
depends on X86_32
depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER)
help
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index c39e1a5..3d5f945 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 
@@ -30,17 +31,6 @@
 
 static cycle_t xen_clocksource_read(void);
 
-/* These are perodically updated in shared_info, and then copied here. */
-struct shadow_time_info {
-   u64 tsc_timestamp; /* TSC at last update of time vals.  */
-   u64 system_timestamp;  /* Time, in nanosecs, since boot.*/
-   u32 tsc_to_nsec_mul;
-   int tsc_shift;
-   u32 version;
-};
-
-static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
-
 /* runstate info updated by Xen */
 static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
 
@@ -230,95 +220,14 @@ unsigned long xen_cpu_khz(void)
return xen_khz;
 }
 
-/*
- * Reads a consistent set of time-base values from Xen, into a shadow data
- * area.
- */
-static unsigned get_time_values_from_xen(void)
-{
-   struct vcpu_time_info   *src;
-   struct shadow_time_info *dst;
-
-   /* src is shared memory with the hypervisor, so we need to
-  make sure we get a consistent snapshot, even in the face of
-  being preempted. */
-   src = &__get_cpu_var(xen_vcpu)->time;
-   dst = &__get_cpu_var(shadow_time);
-
-   do {
-   dst->version = src->version;
-   rmb();  /* fetch version before data */
-   dst->tsc_timestamp = src->tsc_timestamp;
-   dst->system_timestamp  = src->system_time;
-   dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
-   dst->tsc_shift = src->tsc_shift;
-   rmb();  /* test version after fetching data */
-   } while ((src->version & 1) | (dst->version ^ src->version));
-
-   return dst->version;
-}
-
-/*
- * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
- * yielding a 64-bit result.
- */
-static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
-{
-   u64 product;
-#ifdef __i386__
-   u32 tmp1, tmp2;
-#endif
-
-   if (shift < 0)
-   delta >>= -shift;
-   else
-   delta <<= shift;
-
-#ifdef __i386__
-   __asm__ (
-   "mul  %5   ; "
-   "mov  %4,%%eax ; "
-   "mov  %%edx,%4 ; "
-   "mul  %5   ; "
-   "xor  %5,%5; "
-   "add  %4,%%eax ; "
-   "adc  %5,%%edx ; "
-   : "=A" (product), "=r" (tmp1), "=r" (tmp2)
-   : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
-#elif __x86_64__
-   __asm__ (
-   "mul %%rdx ; shrd $32,%%rdx,%%rax"
-   : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
-#else
-#error implement me!
-#endif
-
-   return product;
-}
-
-static u64 get_nsec_offset(struct shadow_time_info *shadow)
-{
-   u64 now, delta;
-   now = native_read_tsc();
-   delta = now - shadow->tsc_timestamp;
-   return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
-}
-
 static cycle_t xen_clocksource_read(void)
 {
-   struct shadow_time_info *shadow = &get_cpu_var(shadow_time);
+struct vcpu_time_info *src;
cycle_t ret;
-   unsigned version;
-
-   do {
-   version = get_time_values_from_xen();
-   barrier();
-   ret = shadow->system_timestamp + get_nsec_offset(shadow);
-   barrier();
-   } while (version != __get_cpu_var(xen_vcpu)->time.version);
-
-   put_cpu_var(shadow_time);
 
+   src = &get_cpu_var(xen_vcpu)->time;
+   ret = pvclock_clocksource_read((void*)src);
+   put_cpu_var(xen_vcpu);
return ret;
 }
 
@@ -349,9 +258,14 @@ static void xen_read_wallclock(struct timespec *ts)
 
 unsigned long xen_get_wallclock(void)
 {
+   const struct shared_info *s = HYPERVISOR_shared_info;
+   struct kvm_wall_clock *wall_clock = (void*)&(s->wc_version);
+struct vcpu_time_info *vcpu_time;
struct tim

[kvm-devel] [PATCH 0/4] paravirt clock source patches, #3

2008-05-16 Thread Gerd Hoffmann
paravirt clock source patches, next round, with a bunch of changes
in the host code according to Avi's review comments and some minor
code tweaks.

cheers,
  Gerd


-
This SF.net email is sponsored by: Microsoft 
Defy all challenges. Microsoft(R) Visual Studio 2008. 
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 3/4] kvm/host: fix paravirt clocksource to be compatible with xen.

2008-05-16 Thread Gerd Hoffmann

Signed-off-by: Gerd Hoffmann <[EMAIL PROTECTED]>
---
 arch/x86/kvm/x86.c |   71 ---
 include/asm-x86/kvm_host.h |1 +
 2 files changed, 60 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index dab3d4f..7f84467 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -493,7 +493,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t 
wall_clock)
 {
static int version;
struct kvm_wall_clock wc;
-   struct timespec wc_ts;
+   struct timespec now, sys, boot;
 
if (!wall_clock)
return;
@@ -502,9 +502,18 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t 
wall_clock)
 
kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
 
-   wc_ts = current_kernel_time();
-   wc.wc_sec = wc_ts.tv_sec;
-   wc.wc_nsec = wc_ts.tv_nsec;
+   /*
+* The guest calculates current wall clock time by adding
+* system time (updated by kvm_write_guest_time below) to the
+* wall clock specified here.  guest system time equals host
+* system time for us, thus we must fill in host boot time here.
+*/
+   now = current_kernel_time();
+   ktime_get_ts(&sys);
+   boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
+
+   wc.wc_sec = boot.tv_sec;
+   wc.wc_nsec = boot.tv_nsec;
wc.wc_version = version;
 
kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
@@ -513,6 +522,44 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t 
wall_clock)
kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
 }
 
+static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
+{
+   uint32_t quotient, remainder;
+
+   /* This is NOT what do_div() does ... */
+   __asm__ ( "divl %4"
+ : "=a" (quotient), "=d" (remainder)
+ : "0" (0), "1" (dividend), "r" (divisor) );
+   return quotient;
+}
+
+static void kvm_set_time_scale(uint32_t tsc_khz, struct kvm_vcpu_time_info 
*hv_clock)
+{
+   uint64_t nsecs = 10LL;
+   int32_t  shift = 0;
+   uint64_t tps64;
+   uint32_t tps32;
+
+   tps64 = tsc_khz * 1000LL;
+   while (tps64 > nsecs*2) {
+   tps64 >>= 1;
+   shift--;
+   }
+
+   tps32 = (uint32_t)tps64;
+   while (tps32 <= (uint32_t)nsecs) {
+   tps32 <<= 1;
+   shift++;
+   }
+
+   hv_clock->tsc_shift = shift;
+   hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32);
+
+   pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n",
+__FUNCTION__, tsc_khz, hv_clock->tsc_shift,
+hv_clock->tsc_to_system_mul);
+}
+
 static void kvm_write_guest_time(struct kvm_vcpu *v)
 {
struct timespec ts;
@@ -523,6 +570,11 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
if ((!vcpu->time_page))
return;
 
+   if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) {
+   kvm_set_time_scale(tsc_khz, &vcpu->hv_clock);
+   vcpu->hv_clock_tsc_khz = tsc_khz;
+   }
+   
/* Keep irq disabled to prevent changes to the clock */
local_irq_save(flags);
kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
@@ -537,21 +589,20 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
/*
 * The interface expects us to write an even number signaling that the
 * update is finished. Since the guest won't see the intermediate
-* state, we just write "2" at the end
+* state, we just increase by 2 at the end.
 */
-   vcpu->hv_clock.version = 2;
+   vcpu->hv_clock.version += 2;
 
shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
 
memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
-   sizeof(vcpu->hv_clock));
+  sizeof(vcpu->hv_clock));
 
kunmap_atomic(shared_kaddr, KM_USER0);
 
mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
 }
 
-
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
switch (msr) {
@@ -599,10 +650,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 
data)
/* ...but clean it before doing the actual write */
vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
 
-   vcpu->arch.hv_clock.tsc_to_system_mul =
-   clocksource_khz2mult(tsc_khz, 22);
-   vcpu->arch.hv_clock.tsc_shift = 22;
-
down_read(¤t->mm->mmap_sem);
vcpu->arch.time_page =
gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
diff --git a/include/asm-x86/kvm_h

Re: [kvm-devel] [PATCH 3/4] kvm/host: fix paravirt clocksource to be compatible with xen.

2008-05-16 Thread Gerd Hoffmann
Avi Kivity wrote:
>> +struct timespec now,sys,boot;
> 
> Add spaces.

Done.

>> +#if 0
>> +/* Hmm, getboottime() isn't exported to modules ... */
>> +getboottime(&boot);
>> +#else
>> +now = current_kernel_time();
>> +ktime_get_ts(&sys);
>> +boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
>> +#endif
>> +wc.wc_sec = boot.tv_sec;
>> +wc.wc_nsec = boot.tv_nsec;
> 
> Please drop the #if 0.

Done, and added a comment for the calculation.

>> +static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
>> +{
>> +uint32_t quotient, remainder;
>> +
>> +__asm__ ( "divl %4"
>> +  : "=a" (quotient), "=d" (remainder)
>> +  : "0" (0), "1" (dividend), "r" (divisor) );
>> +return quotient;   
>> +}
>>   
> 
> do_div()?

No, this one does something else.  Already tried to get rid of that one
before ;)

> pr_debug() or something?

Done.

>> +kvm_set_time_scale(tsc_khz, &vcpu->arch.hv_clock);
>>  
> What if the tsc frequency changes later on?  we need to adjust the
> multiplier, no?

We better do that, yes.

New patch series prepared and tested, will be posted in a moment ...

cheers,
  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/

-
This SF.net email is sponsored by: Microsoft 
Defy all challenges. Microsoft(R) Visual Studio 2008. 
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/4] paravirt clock patches

2008-05-12 Thread Gerd Hoffmann
Glauber Costa wrote:

> So maybe declare the per-cpu areas in a special section, then in
> setup_per_cpu_areas, copy them into the definitive per-cpu section and
> update the callers?

The special section and the copy is implemented already.

That doesn't cut it for the kvmclock case though.  We registered the
physical address via msr write in the host, and *that* needs an update
too.  Otherwise the host continues to update the pre-setup location, and
the guest sees the (stale) values the kvm clock had at
per-cpu-area-setup time (when the copy took place).

cheers,
  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/

-
This SF.net email is sponsored by: Microsoft 
Defy all challenges. Microsoft(R) Visual Studio 2008. 
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 4/4] kvm/guest: fix paravirt clocksource to be compartible with xen.

2008-05-08 Thread Gerd Hoffmann
This patch switches the kvm clocksource code over to use the
paravirt clock helpers, thereby making it compatible with xen.

Signed-off-by: Gerd Hoffmann <[EMAIL PROTECTED]>
---
 arch/x86/Kconfig   |1 +
 arch/x86/kernel/kvmclock.c |   84 ---
 2 files changed, 32 insertions(+), 53 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b12e188..30feb9f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -388,6 +388,7 @@ config VMI
 config KVM_CLOCK
bool "KVM paravirtualized clock"
select PARAVIRT
+   select PARAVIRT_CLOCK
depends on !(X86_VISWS || X86_VOYAGER)
help
  Turning on this option will allow you to run a paravirtualized clock
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 4bc1be5..1c63f75 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -18,6 +18,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -37,17 +38,9 @@ early_param("no-kvmclock", parse_no_kvmclock);
 
 /* The hypervisor will put information about time periodically here */
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
-#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
-
-static inline u64 kvm_get_delta(u64 last_tsc)
-{
-   int cpu = smp_processor_id();
-   u64 delta = native_read_tsc() - last_tsc;
-   return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
-}
 
 static struct kvm_wall_clock wall_clock;
-static cycle_t kvm_clock_read(void);
+
 /*
  * The wallclock is the time of day when we booted. Since then, some time may
  * have elapsed since the hypervisor wrote the data. So we try to account for
@@ -55,35 +48,19 @@ static cycle_t kvm_clock_read(void);
  */
 unsigned long kvm_get_wallclock(void)
 {
-   u32 wc_sec, wc_nsec;
-   u64 delta;
+   struct kvm_vcpu_time_info *vcpu_time;
struct timespec ts;
-   int version, nsec;
int low, high;
 
low = (int)__pa(&wall_clock);
high = ((u64)__pa(&wall_clock) >> 32);
+   native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
 
-   delta = kvm_clock_read();
+   vcpu_time = &get_cpu_var(hv_clock);
+   pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
+   put_cpu_var(hv_clock);
 
-   native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
-   do {
-   version = wall_clock.wc_version;
-   rmb();
-   wc_sec = wall_clock.wc_sec;
-   wc_nsec = wall_clock.wc_nsec;
-   rmb();
-   } while ((wall_clock.wc_version != version) || (version & 1));
-
-   delta = kvm_clock_read() - delta;
-   delta += wc_nsec;
-   nsec = do_div(delta, NSEC_PER_SEC);
-   set_normalized_timespec(&ts, wc_sec + delta, nsec);
-   /*
-* Of all mechanisms of time adjustment I've tested, this one
-* was the champion!
-*/
-   return ts.tv_sec + 1;
+   return ts.tv_sec;
 }
 
 int kvm_set_wallclock(unsigned long now)
@@ -91,28 +68,17 @@ int kvm_set_wallclock(unsigned long now)
return 0;
 }
 
-/*
- * This is our read_clock function. The host puts an tsc timestamp each time
- * it updates a new time. Without the tsc adjustment, we can have a situation
- * in which a vcpu starts to run earlier (smaller system_time), but probes
- * time later (compared to another vcpu), leading to backwards time
- */
 static cycle_t kvm_clock_read(void)
 {
-   u64 last_tsc, now;
-   int cpu;
+   struct kvm_vcpu_time_info *src;
+   cycle_t ret;
 
-   preempt_disable();
-   cpu = smp_processor_id();
-
-   last_tsc = get_clock(cpu, tsc_timestamp);
-   now = get_clock(cpu, system_time);
-
-   now += kvm_get_delta(last_tsc);
-   preempt_enable();
-
-   return now;
+   src = &get_cpu_var(hv_clock);
+   ret = pvclock_clocksource_read(src);
+   put_cpu_var(hv_clock);
+   return ret;
 }
+
 static struct clocksource kvm_clock = {
.name = "kvm-clock",
.read = kvm_clock_read,
@@ -123,13 +89,14 @@ static struct clocksource kvm_clock = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static int kvm_register_clock(void)
+static int kvm_register_clock(char *txt)
 {
int cpu = smp_processor_id();
int low, high;
low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
-
+   printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
+  cpu, high, low, txt);
return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
 }
 
@@ -140,12 +107,20 @@ static void kvm_setup_secondary_clock(void)
 * Now that the first cpu already had this clocksource initialized,
 * we shouldn't fail.
 */
-   WARN_ON(kvm_register_clock(

[kvm-devel] [PATCH 2/4] Make xen use the generic paravirt clocksource code.

2008-05-08 Thread Gerd Hoffmann
This patch switches the xen paravirt clock over to use the
generic paravirt clock code.

Cc: Jeremy Fitzhardinge <[EMAIL PROTECTED]>
Signed-off-by: Gerd Hoffmann <[EMAIL PROTECTED]>
---
 arch/x86/xen/Kconfig |1 +
 arch/x86/xen/time.c  |  110 +-
 2 files changed, 12 insertions(+), 99 deletions(-)

diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 2e641be..3a4f16a 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -5,6 +5,7 @@
 config XEN
bool "Xen guest support"
select PARAVIRT
+   select PARAVIRT_CLOCK
depends on X86_32
depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER)
help
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index c39e1a5..3d5f945 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 
@@ -30,17 +31,6 @@
 
 static cycle_t xen_clocksource_read(void);
 
-/* These are perodically updated in shared_info, and then copied here. */
-struct shadow_time_info {
-   u64 tsc_timestamp; /* TSC at last update of time vals.  */
-   u64 system_timestamp;  /* Time, in nanosecs, since boot.*/
-   u32 tsc_to_nsec_mul;
-   int tsc_shift;
-   u32 version;
-};
-
-static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
-
 /* runstate info updated by Xen */
 static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
 
@@ -230,95 +220,14 @@ unsigned long xen_cpu_khz(void)
return xen_khz;
 }
 
-/*
- * Reads a consistent set of time-base values from Xen, into a shadow data
- * area.
- */
-static unsigned get_time_values_from_xen(void)
-{
-   struct vcpu_time_info   *src;
-   struct shadow_time_info *dst;
-
-   /* src is shared memory with the hypervisor, so we need to
-  make sure we get a consistent snapshot, even in the face of
-  being preempted. */
-   src = &__get_cpu_var(xen_vcpu)->time;
-   dst = &__get_cpu_var(shadow_time);
-
-   do {
-   dst->version = src->version;
-   rmb();  /* fetch version before data */
-   dst->tsc_timestamp = src->tsc_timestamp;
-   dst->system_timestamp  = src->system_time;
-   dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
-   dst->tsc_shift = src->tsc_shift;
-   rmb();  /* test version after fetching data */
-   } while ((src->version & 1) | (dst->version ^ src->version));
-
-   return dst->version;
-}
-
-/*
- * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
- * yielding a 64-bit result.
- */
-static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
-{
-   u64 product;
-#ifdef __i386__
-   u32 tmp1, tmp2;
-#endif
-
-   if (shift < 0)
-   delta >>= -shift;
-   else
-   delta <<= shift;
-
-#ifdef __i386__
-   __asm__ (
-   "mul  %5   ; "
-   "mov  %4,%%eax ; "
-   "mov  %%edx,%4 ; "
-   "mul  %5   ; "
-   "xor  %5,%5; "
-   "add  %4,%%eax ; "
-   "adc  %5,%%edx ; "
-   : "=A" (product), "=r" (tmp1), "=r" (tmp2)
-   : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
-#elif __x86_64__
-   __asm__ (
-   "mul %%rdx ; shrd $32,%%rdx,%%rax"
-   : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
-#else
-#error implement me!
-#endif
-
-   return product;
-}
-
-static u64 get_nsec_offset(struct shadow_time_info *shadow)
-{
-   u64 now, delta;
-   now = native_read_tsc();
-   delta = now - shadow->tsc_timestamp;
-   return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
-}
-
 static cycle_t xen_clocksource_read(void)
 {
-   struct shadow_time_info *shadow = &get_cpu_var(shadow_time);
+struct vcpu_time_info *src;
cycle_t ret;
-   unsigned version;
-
-   do {
-   version = get_time_values_from_xen();
-   barrier();
-   ret = shadow->system_timestamp + get_nsec_offset(shadow);
-   barrier();
-   } while (version != __get_cpu_var(xen_vcpu)->time.version);
-
-   put_cpu_var(shadow_time);
 
+   src = &get_cpu_var(xen_vcpu)->time;
+   ret = pvclock_clocksource_read((void*)src);
+   put_cpu_var(xen_vcpu);
return ret;
 }
 
@@ -349,9 +258,14 @@ static void xen_read_wallclock(struct timespec *ts)
 
 unsigned long xen_get_wallclock(void)
 {
+   const struct shared_info *s = HYPERVISOR_shared_info;

[kvm-devel] [PATCH 3/4] kvm/host: fix paravirt clocksource to be compatible with xen.

2008-05-08 Thread Gerd Hoffmann

Signed-off-by: Gerd Hoffmann <[EMAIL PROTECTED]>
---
 arch/x86/kvm/x86.c |   63 +++
 1 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 979f983..6906d54 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -493,7 +493,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t 
wall_clock)
 {
static int version;
struct kvm_wall_clock wc;
-   struct timespec wc_ts;
+   struct timespec now,sys,boot;
 
if (!wall_clock)
return;
@@ -502,9 +502,16 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t 
wall_clock)
 
kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
 
-   wc_ts = current_kernel_time();
-   wc.wc_sec = wc_ts.tv_sec;
-   wc.wc_nsec = wc_ts.tv_nsec;
+#if 0
+   /* Hmm, getboottime() isn't exported to modules ... */
+   getboottime(&boot);
+#else
+   now = current_kernel_time();
+   ktime_get_ts(&sys);
+   boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
+#endif
+   wc.wc_sec = boot.tv_sec;
+   wc.wc_nsec = boot.tv_nsec;
wc.wc_version = version;
 
kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
@@ -537,20 +544,58 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
/*
 * The interface expects us to write an even number signaling that the
 * update is finished. Since the guest won't see the intermediate
-* state, we just write "2" at the end
+* state, we just increase by 2 at the end.
 */
-   vcpu->hv_clock.version = 2;
+   vcpu->hv_clock.version += 2;
 
shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
 
memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
-   sizeof(vcpu->hv_clock));
+  sizeof(vcpu->hv_clock));
 
kunmap_atomic(shared_kaddr, KM_USER0);
 
mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
 }
 
+static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
+{
+   uint32_t quotient, remainder;
+
+   __asm__ ( "divl %4"
+ : "=a" (quotient), "=d" (remainder)
+ : "0" (0), "1" (dividend), "r" (divisor) );
+   return quotient;
+}
+
+static void kvm_set_time_scale(uint32_t tsc_khz, struct kvm_vcpu_time_info 
*hv_clock)
+{
+   uint64_t nsecs = 10LL;
+   int32_t  shift = 0;
+   uint64_t tps64;
+   uint32_t tps32;
+
+   tps64 = tsc_khz * 1000LL;
+   while (tps64 > nsecs*2) {
+   tps64 >>= 1;
+   shift--;
+   }
+
+   tps32 = (uint32_t)tps64;
+   while (tps32 <= (uint32_t)nsecs) {
+   tps32 <<= 1;
+   shift++;
+   }
+
+   hv_clock->tsc_shift = shift;
+   hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32);
+
+#if 0
+   printk(KERN_DEBUG "%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n",
+  __FUNCTION__, tsc_khz, hv_clock->tsc_shift,
+  hv_clock->tsc_to_system_mul);
+#endif
+}
 
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
@@ -599,9 +644,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 
data)
/* ...but clean it before doing the actual write */
vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
 
-   vcpu->arch.hv_clock.tsc_to_system_mul =
-   clocksource_khz2mult(tsc_khz, 22);
-   vcpu->arch.hv_clock.tsc_shift = 22;
+   kvm_set_time_scale(tsc_khz, &vcpu->arch.hv_clock);
 
down_read(¤t->mm->mmap_sem);
vcpu->arch.time_page =
-- 
1.5.4.1


-
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 0/4] paravirt clock series.

2008-05-08 Thread Gerd Hoffmann
Respin of the paravirt clock patch series.

On the host side the kvm paravirt clock is made compatible with the
xen clock.

On the guest side some xen code has been factored out into a separate
source file shared by both kvm and xen clock implementations.

This time it should work ok for kvm smp guests ;)

cheers,
  Gerd


-
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 1/4] Add helper functions for paravirtual clocksources.

2008-05-08 Thread Gerd Hoffmann
The helper functions are intended to be used by both xen and kvm
paravirtual clock sources.  Following patches of this series put
them into use.  They are based on the xen code.

Cc: Jeremy Fitzhardinge <[EMAIL PROTECTED]>
Signed-off-by: Gerd Hoffmann <[EMAIL PROTECTED]>
---
 arch/x86/Kconfig  |4 +
 arch/x86/kernel/Makefile  |1 +
 arch/x86/kernel/pvclock.c |  148 +
 include/asm-x86/pvclock.h |6 ++
 4 files changed, 159 insertions(+), 0 deletions(-)
 create mode 100644 arch/x86/kernel/pvclock.c
 create mode 100644 include/asm-x86/pvclock.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 845ea2b..b12e188 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -415,6 +415,10 @@ config PARAVIRT
  over full virtualization.  However, when run without a hypervisor
  the kernel is theoretically slower and slightly larger.
 
+config PARAVIRT_CLOCK
+   bool
+   default n
+
 endif
 
 config MEMTEST_BOOTPARAM
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index bbdacb3..5d8e086 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -82,6 +82,7 @@ obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
 obj-$(CONFIG_KVM_GUEST)+= kvm.o
 obj-$(CONFIG_KVM_CLOCK)+= kvmclock.o
 obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT_CLOCK)   += pvclock.o
 
 ifdef CONFIG_INPUT_PCSPKR
 obj-y  += pcspeaker.o
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
new file mode 100644
index 000..33e526f
--- /dev/null
+++ b/arch/x86/kernel/pvclock.c
@@ -0,0 +1,148 @@
+/*  paravirtual clock -- common code used by kvm/xen
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#include 
+#include 
+#include 
+
+/*
+ * These are perodically updated
+ *xen: magic shared_info page
+ *kvm: gpa registered via msr
+ * and then copied here.
+ */
+struct pvclock_shadow_time {
+   u64 tsc_timestamp; /* TSC at last update of time vals.  */
+   u64 system_timestamp;  /* Time, in nanosecs, since boot.*/
+   u32 tsc_to_nsec_mul;
+   int tsc_shift;
+   u32 version;
+};
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
+{
+   u64 product;
+#ifdef __i386__
+   u32 tmp1, tmp2;
+#endif
+
+   if (shift < 0)
+   delta >>= -shift;
+   else
+   delta <<= shift;
+
+#ifdef __i386__
+   __asm__ (
+   "mul  %5   ; "
+   "mov  %4,%%eax ; "
+   "mov  %%edx,%4 ; "
+   "mul  %5   ; "
+   "xor  %5,%5; "
+   "add  %4,%%eax ; "
+   "adc  %5,%%edx ; "
+   : "=A" (product), "=r" (tmp1), "=r" (tmp2)
+   : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
+#elif __x86_64__
+   __asm__ (
+   "mul %%rdx ; shrd $32,%%rdx,%%rax"
+   : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
+#else
+#error implement me!
+#endif
+
+   return product;
+}
+
+static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
+{
+   u64 delta = native_read_tsc() - shadow->tsc_timestamp;
+   return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
+}
+
+/*
+ * Reads a consistent set of time-base values from hypervisor,
+ * into a shadow data area.
+ */
+static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
+   struct kvm_vcpu_time_info *src)
+{
+   do {
+   dst->version = src->version;
+   rmb();  /* fetch version before data */
+   dst->tsc_timestamp = src->tsc_timestamp;
+   dst->system_timestamp  = src->system_time;
+   dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
+   dst->tsc_shift = 

Re: [kvm-devel] [PATCH 0/4] paravirt clock patches

2008-05-07 Thread Gerd Hoffmann
Marcelo Tosatti wrote:
> On Thu, Apr 24, 2008 at 10:37:04AM +0200, Gerd Hoffmann wrote:
>>   Hi folks,
>>
>> My first attempt to send out a patch series with git ...
>>
>> The patches fix the kvm paravirt clocksource code to be compatible with
>> xen and they also factor out some code which can be shared into a
>> separate source files used by both kvm and xen.
> 
> The issue with SMP guests is still present. Booting with "nohz=off" resolves 
> it.
> 
> Same symptoms as before, apic_timer_fn for one of the vcpu's is ticking way 
> slower
> than the remaining ones:
> 
> [EMAIL PROTECTED] ~]# cat /proc/timer_stats  | grep apic
>   391,  4125 qemu-system-x86  apic_mmio_write (apic_timer_fn)
>  2103,  4126 qemu-system-x86  apic_mmio_write (apic_timer_fn)
>  1896,  4127 qemu-system-x86  apic_mmio_write (apic_timer_fn)
>  1857,  4128 qemu-system-x86  apic_mmio_write (apic_timer_fn)
> 
> Let me know what else is needed, or any patches to try.

Ok folks, here is the band aid fix for testing from the odd bugs
department.  Goes on top of the four patches of this series.  A real,
clean solution is TBD.  Tomorrow I hope (some urgent private problems
are in the queue too ...).

Problem is the per-cpu area for cpu 0 has two locations in memory, one
before and one after pda initialization.  kvmclock registers the first
due to being initialized quite early, and the paravirt clock for cpu 0
stops seeing updates once the pda setup is done.  Which makes the TSC
effectively the base for timekeeping (instead of using the TSC for
millisecond delta adjustments only).  Secondary CPUs work as intended.

This obviously screws up timekeeping on SMP guests, especially on hosts
with unstable TSC.

happy testing,

  Gerd

-- 
[EMAIL PROTECTED] ~]# dmesg | grep _clock
kvm_register_clock: cpu 0 at 0:798601 (boot)
kvm_clock_read: cpu 0 at 0:140b601 (pda)
kvm_register_clock: cpu 1 at 0:1415601
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 867523e..43135ed 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -40,6 +40,7 @@ early_param("no-kvmclock", parse_no_kvmclock);
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
 
 static struct kvm_wall_clock wall_clock;
+static void *boot_clock;
 
 /*
  * The wallclock is the time of day when we booted. Since then, some time may
@@ -74,6 +75,19 @@ static cycle_t kvm_clock_read(void)
cycle_t ret;
 
src = &get_cpu_var(hv_clock);
+
+   if (boot_clock && 0 == smp_processor_id()) {
+   if (boot_clock != src) {
+   int low, high;
+   low  = (int)__pa(src) | 1;
+   high = ((u64)__pa(src) >> 32);
+   printk(KERN_INFO "%s: cpu %d at %x:%x (pda)\n", 
__FUNCTION__,
+  smp_processor_id(), high, low);
+   native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
+   boot_clock = NULL;
+   }
+   }
+
ret = pvclock_clocksource_read(src);
put_cpu_var(hv_clock);
return ret;
@@ -92,12 +106,18 @@ static struct clocksource kvm_clock = {
 static int kvm_register_clock(void)
 {
int cpu = smp_processor_id();
+   void *ptr;
int low, high;
-   low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
-   high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
 
-   printk(KERN_DEBUG "%s: cpu %d at %x:%x\n", __FUNCTION__,
-  cpu, high, low);
+   ptr = &per_cpu(hv_clock, cpu);
+   if (0 == cpu)
+   boot_clock = ptr;
+
+   low  = (int)__pa(ptr) | 1;
+   high = ((u64)__pa(ptr) >> 32);
+
+   printk(KERN_INFO "%s: cpu %d at %x:%x%s\n", __FUNCTION__,
+  cpu, high, low, boot_clock ? " (boot)" : "");
return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
 }
 
-
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/4] paravirt clock patches

2008-05-06 Thread Gerd Hoffmann
Gerd Hoffmann wrote:
> Marcelo Tosatti wrote:
>> F8 host, recent kvm-userspace.git (so with IO thread), recent kvm.git
>> (plus your patches), haven't tried 2x but I think 4x is not necessary to
>> reproduce the problem.
> 
> Ok, see it too.  Seem to be actually two (maybe related) problems.
> 
> First the guest hangs hard after a while, burning 100% CPU time
> (deadlocked I guess), doesn't respond to sysrq any more.  Is there some
> easy way to get the guest vcpu state then?

Hmm, "info registers" in qemu monitor hangs ...

cheers,
  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/

-
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/4] paravirt clock patches

2008-05-06 Thread Gerd Hoffmann
Marcelo Tosatti wrote:
> F8 host, recent kvm-userspace.git (so with IO thread), recent kvm.git
> (plus your patches), haven't tried 2x but I think 4x is not necessary to
> reproduce the problem.

Ok, see it too.  Seem to be actually two (maybe related) problems.

First the guest hangs hard after a while, burning 100% CPU time
(deadlocked I guess), doesn't respond to sysrq any more.  Is there some
easy way to get the guest vcpu state then?  EIP for starters, preferably
with stack trace?

The other one is that one ticks slower than the other.  I don't see it
from start, but after a while it starts happening (unless the guest
deadlocks before ...).

cheers,
  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/

-
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/4] paravirt clock patches

2008-05-05 Thread Gerd Hoffmann
Marcelo Tosatti wrote:
> On Thu, Apr 24, 2008 at 10:37:04AM +0200, Gerd Hoffmann wrote:
>>   Hi folks,
>>
>> My first attempt to send out a patch series with git ...
>>
>> The patches fix the kvm paravirt clocksource code to be compatible with
>> xen and they also factor out some code which can be shared into a
>> separate source files used by both kvm and xen.
> 
> The issue with SMP guests is still present. Booting with "nohz=off" resolves 
> it.
> 
> Same symptoms as before, apic_timer_fn for one of the vcpu's is ticking way 
> slower
> than the remaining ones:
> 
> [EMAIL PROTECTED] ~]# cat /proc/timer_stats  | grep apic
>   391,  4125 qemu-system-x86  apic_mmio_write (apic_timer_fn)
>  2103,  4126 qemu-system-x86  apic_mmio_write (apic_timer_fn)
>  1896,  4127 qemu-system-x86  apic_mmio_write (apic_timer_fn)
>  1857,  4128 qemu-system-x86  apic_mmio_write (apic_timer_fn)

What userspace version is this?  With iothread support?  Or older one
where the vcpu0 thread also handles all the I/O?  Is 4x neeed to
reproduce or do you see it with 2x too?  What host?

A quick test with xenner (which has a separate I/O thread) didn't show
anything unusual.  Going investigate ...

cheers,
  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/

-
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 0/4] paravirt clock patches

2008-04-28 Thread Gerd Hoffmann
Avi Kivity wrote:
> The patches look good, but pleasy copy Jeremy and virtualization@ for
> patches which touch things outside kvm.

Will do for the next round.

> It's perhaps better to reverse the order: first fix kvm to be
> compatible, then merge the Xen and kvm implementations into a single one.

Fixing kvm (guest side) would be copying parts of the xen guest code, so
that doesn't make much sense to me ...

cheers,
  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/

-
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 1/4] Add helper functions for paravirtual clocksources.

2008-04-28 Thread Gerd Hoffmann
Glauber Costa wrote:

> This is not exactly what kvm does. For us, wallclock read and system
> time reads are decoupled operations, controlled by different msrs.

Same for xen.  Although both live in the shared_info page they are
updated independently (and the wall clock is updated much less frequently).

> This function might exist, but in this case, it have to be wrapped
> around a kvm_read_wallclock(), that does the msr read. (I'm not sure
> whether or not you do it in your later patches, doing sequential reads
> :-) )

It is, as you have seen in the kvm patch ;)

What is the reason to handle the two clock msrs in different ways btw?
I think it would be better to have both msrs work the same way though,
i.e. the wallclock msr should have a enable bit and should auto-update too.

cheers,
  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/

-
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 1/4] Add helper functions for paravirtual clocksources.

2008-04-24 Thread Gerd Hoffmann

Signed-off-by: Gerd Hoffmann <[EMAIL PROTECTED]>
---
 arch/x86/Kconfig  |4 +
 arch/x86/kernel/Makefile  |1 +
 arch/x86/kernel/pvclock.c |  146 +
 include/asm-x86/pvclock.h |6 ++
 4 files changed, 157 insertions(+), 0 deletions(-)
 create mode 100644 arch/x86/kernel/pvclock.c
 create mode 100644 include/asm-x86/pvclock.h

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a22be4a..fe73d38 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -400,6 +400,10 @@ config PARAVIRT
  over full virtualization.  However, when run without a hypervisor
  the kernel is theoretically slower and slightly larger.
 
+config PARAVIRT_CLOCK
+   bool
+   default n
+
 endif
 
 config MEMTEST_BOOTPARAM
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index fa19c38..ab7999c 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -83,6 +83,7 @@ obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
 obj-$(CONFIG_KVM_GUEST)+= kvm.o
 obj-$(CONFIG_KVM_CLOCK)+= kvmclock.o
 obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT_CLOCK)   += pvclock.o
 
 ifdef CONFIG_INPUT_PCSPKR
 obj-y  += pcspeaker.o
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
new file mode 100644
index 000..fecf17a
--- /dev/null
+++ b/arch/x86/kernel/pvclock.c
@@ -0,0 +1,146 @@
+/*  paravirtual clock -- common code used by kvm/xen
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#include 
+#include 
+#include 
+
+/*
+ * These are perodically updated
+ *xen: magic shared_info page
+ *kvm: gpa registered via msr
+ * and then copied here.
+ */
+struct pvclock_shadow_time {
+   u64 tsc_timestamp; /* TSC at last update of time vals.  */
+   u64 system_timestamp;  /* Time, in nanosecs, since boot.*/
+   u32 tsc_to_nsec_mul;
+   int tsc_shift;
+   u32 version;
+};
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
+{
+   u64 product;
+#ifdef __i386__
+   u32 tmp1, tmp2;
+#endif
+
+   if (shift < 0)
+   delta >>= -shift;
+   else
+   delta <<= shift;
+
+#ifdef __i386__
+   __asm__ (
+   "mul  %5   ; "
+   "mov  %4,%%eax ; "
+   "mov  %%edx,%4 ; "
+   "mul  %5   ; "
+   "xor  %5,%5; "
+   "add  %4,%%eax ; "
+   "adc  %5,%%edx ; "
+   : "=A" (product), "=r" (tmp1), "=r" (tmp2)
+   : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
+#elif __x86_64__
+   __asm__ (
+   "mul %%rdx ; shrd $32,%%rdx,%%rax"
+   : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
+#else
+#error implement me!
+#endif
+
+   return product;
+}
+
+static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
+{
+   u64 delta = native_read_tsc() - shadow->tsc_timestamp;
+   return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
+}
+
+/*
+ * Reads a consistent set of time-base values from hypervisor,
+ * into a shadow data area.
+ */
+static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
+   struct kvm_vcpu_time_info *src)
+{
+   do {
+   dst->version = src->version;
+   rmb();  /* fetch version before data */
+   dst->tsc_timestamp = src->tsc_timestamp;
+   dst->system_timestamp  = src->system_time;
+   dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
+   dst->tsc_shift = src->tsc_shift;
+   rmb();  /* test version after fetching data */
+   } while ((src->version & 1) | (dst->version ^ src->version));
+
+   return dst->version;
+}
+
+/*
+ * This is our 

[kvm-devel] [PATCH 4/4] kvm/guest: fix paravirt clocksource to be compartible with xen.

2008-04-24 Thread Gerd Hoffmann

Signed-off-by: Gerd Hoffmann <[EMAIL PROTECTED]>
---
 arch/x86/Kconfig   |1 +
 arch/x86/kernel/kvmclock.c |   66 ++-
 2 files changed, 17 insertions(+), 50 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fe73d38..ed1a679 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -373,6 +373,7 @@ config VMI
 config KVM_CLOCK
bool "KVM paravirtualized clock"
select PARAVIRT
+   select PARAVIRT_CLOCK
depends on !(X86_VISWS || X86_VOYAGER)
help
  Turning on this option will allow you to run a paravirtualized clock
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index ddee040..476b7c7 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -18,6 +18,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -37,17 +38,9 @@ early_param("no-kvmclock", parse_no_kvmclock);
 
 /* The hypervisor will put information about time periodically here */
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
-#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
-
-static inline u64 kvm_get_delta(u64 last_tsc)
-{
-   int cpu = smp_processor_id();
-   u64 delta = native_read_tsc() - last_tsc;
-   return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
-}
 
 static struct kvm_wall_clock wall_clock;
-static cycle_t kvm_clock_read(void);
+
 /*
  * The wallclock is the time of day when we booted. Since then, some time may
  * have elapsed since the hypervisor wrote the data. So we try to account for
@@ -55,35 +48,19 @@ static cycle_t kvm_clock_read(void);
  */
 unsigned long kvm_get_wallclock(void)
 {
-   u32 wc_sec, wc_nsec;
-   u64 delta;
+   struct kvm_vcpu_time_info *vcpu_time;
struct timespec ts;
-   int version, nsec;
int low, high;
 
low = (int)__pa(&wall_clock);
high = ((u64)__pa(&wall_clock) >> 32);
-
-   delta = kvm_clock_read();
-
native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
-   do {
-   version = wall_clock.wc_version;
-   rmb();
-   wc_sec = wall_clock.wc_sec;
-   wc_nsec = wall_clock.wc_nsec;
-   rmb();
-   } while ((wall_clock.wc_version != version) || (version & 1));
-
-   delta = kvm_clock_read() - delta;
-   delta += wc_nsec;
-   nsec = do_div(delta, NSEC_PER_SEC);
-   set_normalized_timespec(&ts, wc_sec + delta, nsec);
-   /*
-* Of all mechanisms of time adjustment I've tested, this one
-* was the champion!
-*/
-   return ts.tv_sec + 1;
+
+   vcpu_time = &get_cpu_var(hv_clock);
+   pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
+   put_cpu_var(hv_clock);
+   
+   return ts.tv_sec;
 }
 
 int kvm_set_wallclock(unsigned long now)
@@ -91,28 +68,17 @@ int kvm_set_wallclock(unsigned long now)
return 0;
 }
 
-/*
- * This is our read_clock function. The host puts an tsc timestamp each time
- * it updates a new time. Without the tsc adjustment, we can have a situation
- * in which a vcpu starts to run earlier (smaller system_time), but probes
- * time later (compared to another vcpu), leading to backwards time
- */
 static cycle_t kvm_clock_read(void)
 {
-   u64 last_tsc, now;
-   int cpu;
+   struct kvm_vcpu_time_info *src;
+   cycle_t ret;
 
-   preempt_disable();
-   cpu = smp_processor_id();
-
-   last_tsc = get_clock(cpu, tsc_timestamp);
-   now = get_clock(cpu, system_time);
-
-   now += kvm_get_delta(last_tsc);
-   preempt_enable();
-
-   return now;
+   src = &get_cpu_var(hv_clock);
+   ret = pvclock_clocksource_read(src);
+   put_cpu_var(hv_clock);
+   return ret;
 }
+
 static struct clocksource kvm_clock = {
.name = "kvm-clock",
.read = kvm_clock_read,
-- 
1.5.4.1


-
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 2/4] Make xen use the generic paravirt clocksource code.

2008-04-24 Thread Gerd Hoffmann

Signed-off-by: Gerd Hoffmann <[EMAIL PROTECTED]>
---
 arch/x86/xen/Kconfig |1 +
 arch/x86/xen/time.c  |  110 +-
 2 files changed, 12 insertions(+), 99 deletions(-)

diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 4d5f264..47f0cdc 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -5,6 +5,7 @@
 config XEN
bool "Xen guest support"
select PARAVIRT
+   select PARAVIRT_CLOCK
depends on X86_32
depends on X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES && 
!(X86_VISWS || X86_VOYAGER)
help
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index c39e1a5..3d5f945 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -13,6 +13,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 
@@ -30,17 +31,6 @@
 
 static cycle_t xen_clocksource_read(void);
 
-/* These are perodically updated in shared_info, and then copied here. */
-struct shadow_time_info {
-   u64 tsc_timestamp; /* TSC at last update of time vals.  */
-   u64 system_timestamp;  /* Time, in nanosecs, since boot.*/
-   u32 tsc_to_nsec_mul;
-   int tsc_shift;
-   u32 version;
-};
-
-static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
-
 /* runstate info updated by Xen */
 static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
 
@@ -230,95 +220,14 @@ unsigned long xen_cpu_khz(void)
return xen_khz;
 }
 
-/*
- * Reads a consistent set of time-base values from Xen, into a shadow data
- * area.
- */
-static unsigned get_time_values_from_xen(void)
-{
-   struct vcpu_time_info   *src;
-   struct shadow_time_info *dst;
-
-   /* src is shared memory with the hypervisor, so we need to
-  make sure we get a consistent snapshot, even in the face of
-  being preempted. */
-   src = &__get_cpu_var(xen_vcpu)->time;
-   dst = &__get_cpu_var(shadow_time);
-
-   do {
-   dst->version = src->version;
-   rmb();  /* fetch version before data */
-   dst->tsc_timestamp = src->tsc_timestamp;
-   dst->system_timestamp  = src->system_time;
-   dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
-   dst->tsc_shift = src->tsc_shift;
-   rmb();  /* test version after fetching data */
-   } while ((src->version & 1) | (dst->version ^ src->version));
-
-   return dst->version;
-}
-
-/*
- * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
- * yielding a 64-bit result.
- */
-static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
-{
-   u64 product;
-#ifdef __i386__
-   u32 tmp1, tmp2;
-#endif
-
-   if (shift < 0)
-   delta >>= -shift;
-   else
-   delta <<= shift;
-
-#ifdef __i386__
-   __asm__ (
-   "mul  %5   ; "
-   "mov  %4,%%eax ; "
-   "mov  %%edx,%4 ; "
-   "mul  %5   ; "
-   "xor  %5,%5; "
-   "add  %4,%%eax ; "
-   "adc  %5,%%edx ; "
-   : "=A" (product), "=r" (tmp1), "=r" (tmp2)
-   : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
-#elif __x86_64__
-   __asm__ (
-   "mul %%rdx ; shrd $32,%%rdx,%%rax"
-   : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
-#else
-#error implement me!
-#endif
-
-   return product;
-}
-
-static u64 get_nsec_offset(struct shadow_time_info *shadow)
-{
-   u64 now, delta;
-   now = native_read_tsc();
-   delta = now - shadow->tsc_timestamp;
-   return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
-}
-
 static cycle_t xen_clocksource_read(void)
 {
-   struct shadow_time_info *shadow = &get_cpu_var(shadow_time);
+struct vcpu_time_info *src;
cycle_t ret;
-   unsigned version;
-
-   do {
-   version = get_time_values_from_xen();
-   barrier();
-   ret = shadow->system_timestamp + get_nsec_offset(shadow);
-   barrier();
-   } while (version != __get_cpu_var(xen_vcpu)->time.version);
-
-   put_cpu_var(shadow_time);
 
+   src = &get_cpu_var(xen_vcpu)->time;
+   ret = pvclock_clocksource_read((void*)src);
+   put_cpu_var(xen_vcpu);
return ret;
 }
 
@@ -349,9 +258,14 @@ static void xen_read_wallclock(struct timespec *ts)
 
 unsigned long xen_get_wallclock(void)
 {
+   const struct shared_info *s = HYPERVISOR_shared_info;
+   struct kvm_wall_clock *wall_clock = (void*)&(s->wc_version);
+struct vcpu_time_info *vc

[kvm-devel] [PATCH 0/4] paravirt clock patches

2008-04-24 Thread Gerd Hoffmann
  Hi folks,

My first attempt to send out a patch series with git ...

The patches fix the kvm paravirt clocksource code to be compatible with
xen and they also factor out some code which can be shared into a
separate source files used by both kvm and xen.

cheers,
  Gerd



-
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 3/4] kvm/host: fix paravirt clocksource to be compatible with xen.

2008-04-24 Thread Gerd Hoffmann

Signed-off-by: Gerd Hoffmann <[EMAIL PROTECTED]>
---
 arch/x86/kvm/x86.c |   63 +++
 1 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0ce5563..45b71c6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -493,7 +493,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t 
wall_clock)
 {
static int version;
struct kvm_wall_clock wc;
-   struct timespec wc_ts;
+   struct timespec now,sys,boot;
 
if (!wall_clock)
return;
@@ -502,9 +502,16 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t 
wall_clock)
 
kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
 
-   wc_ts = current_kernel_time();
-   wc.wc_sec = wc_ts.tv_sec;
-   wc.wc_nsec = wc_ts.tv_nsec;
+#if 0
+   /* Hmm, getboottime() isn't exported to modules ... */
+   getboottime(&boot);
+#else
+   now = current_kernel_time();
+   ktime_get_ts(&sys);
+   boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
+#endif
+   wc.wc_sec = boot.tv_sec;
+   wc.wc_nsec = boot.tv_nsec;
wc.wc_version = version;
 
kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
@@ -537,20 +544,58 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
/*
 * The interface expects us to write an even number signaling that the
 * update is finished. Since the guest won't see the intermediate
-* state, we just write "2" at the end
+* state, we just increase by 2 at the end.
 */
-   vcpu->hv_clock.version = 2;
+   vcpu->hv_clock.version += 2;
 
shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
 
memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
-   sizeof(vcpu->hv_clock));
+  sizeof(vcpu->hv_clock));
 
kunmap_atomic(shared_kaddr, KM_USER0);
 
mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
 }
 
+static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
+{
+   uint32_t quotient, remainder;
+   
+   __asm__ ( "divl %4"
+ : "=a" (quotient), "=d" (remainder)
+ : "0" (0), "1" (dividend), "r" (divisor) );
+   return quotient;
+}
+
+static void kvm_set_time_scale(uint32_t tsc_khz, struct kvm_vcpu_time_info 
*hv_clock)
+{
+   uint64_t nsecs = 10LL;
+   int32_t  shift = 0;
+   uint64_t tps64;
+   uint32_t tps32;
+   
+   tps64 = tsc_khz * 1000LL;
+   while (tps64 > nsecs*2) {
+   tps64 >>= 1;
+   shift--;
+   }
+
+   tps32 = (uint32_t)tps64;
+   while (tps32 <= (uint32_t)nsecs) {
+   tps32 <<= 1;
+   shift++;
+   }
+
+   hv_clock->tsc_shift = shift;
+   hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32);
+
+#if 0
+   printk(KERN_DEBUG "%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n",
+  __FUNCTION__, tsc_khz, hv_clock->tsc_shift,
+  hv_clock->tsc_to_system_mul);
+#endif
+}
 
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
@@ -599,9 +644,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 
data)
/* ...but clean it before doing the actual write */
vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
 
-   vcpu->arch.hv_clock.tsc_to_system_mul =
-   clocksource_khz2mult(tsc_khz, 22);
-   vcpu->arch.hv_clock.tsc_shift = 22;
+   kvm_set_time_scale(tsc_khz, &vcpu->arch.hv_clock);
 
down_read(¤t->mm->mmap_sem);
vcpu->arch.time_page =
-- 
1.5.4.1


-
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] pv clock: kvm is incompatible with xen :-(

2008-04-22 Thread Gerd Hoffmann
Glauber Costa wrote:
> Gerd Hoffmann wrote:
>> Jeremy Fitzhardinge wrote:
>>> Xen could change the parameters in the instant after
>>> get_time_values(). That change could be as a result of
>>> suspend-resume, so the parameters
>>> and the tsc could be wildly different.
>>
>> Ah, ok, forgot the rdtsc in the picture.  With that in mind I fully
>> agree that the loop is needed.  I think kvm guests can even hit that one
>> with the vcpu migrating to a different physical cpu, so we better handle
>> it correctly ;)
> 
> It's probably not needed for kvm, since we update everything everytime
> we get scheduled in the host side, which would cover the case for
> migration between physical cpus. 

No, it wouldn't.  The corner case we must catch is: guest reads time
info, kvm reschedules the guest to another pcpu, guest reads the tsc.
The time info used by the guest for the tsc delta is stale then, it
belongs to the previous pcpu.

cheers,
  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/

-
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] pv clock: kvm is incompatible with xen :-(

2008-04-21 Thread Gerd Hoffmann
Jeremy Fitzhardinge wrote:
> Gerd Hoffmann wrote:
>> Not really.  There are only two calls, one in clocksource_read() and one
>> in the init path.  The later is superfluous I think because
>> clocksource_read() is the only user of the shadowed time info.
> 
> Hm.  It doesn't look like shadow_time needs to be a static percpu at 
> all.  It could just be a local to clocksource_read, I think.

Good point, one more cleanup.

thanks,
  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/

-
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [ RfC / patch ] kvmclock fixes

2008-04-21 Thread Gerd Hoffmann
Jeremy Fitzhardinge wrote:
> Gerd Hoffmann wrote:
>> +cycle_t pvclock_clocksource_read(struct kvm_vcpu_time_info *src)
>> +{
>> +struct pvclock_shadow_time *shadow = &get_cpu_var(shadow_time);
>> +cycle_t ret;
>> +
>> +pvclock_get_time_values(shadow, src);
>> +ret = shadow->system_timestamp + pvclock_get_nsec_offset(shadow);
>>   
> 
> You need to put this in a loop in case the system clock parameters 
> change between the pvclock_get_time_values() and pvclock_get_nsec_offset().

Fixed, new patch attached.

> How does kvm deal with suspend/resume with respect to time?  Is the 
> "system" timestamp guaranteed to remain monotonic?  For Xen, I think 
> we'll need to maintain an offset between the initial system timestamp 
> and whatever it is after resuming.

Havn't looked at it yet.

cheers,
  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 1cc9d42..688df87 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -79,7 +79,7 @@ obj-$(CONFIG_DEBUG_NX_TEST)	+= test_nx.o
 obj-$(CONFIG_VMI)		+= vmi_32.o vmiclock_32.o
 obj-$(CONFIG_KVM_GUEST)		+= kvm.o
 obj-$(CONFIG_KVM_CLOCK)		+= kvmclock.o
-obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o pvclock.o
 
 ifdef CONFIG_INPUT_PCSPKR
 obj-y+= pcspeaker.o
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index ddee040..476b7c7 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -18,6 +18,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -37,17 +38,9 @@ early_param("no-kvmclock", parse_no_kvmclock);
 
 /* The hypervisor will put information about time periodically here */
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
-#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
-
-static inline u64 kvm_get_delta(u64 last_tsc)
-{
-	int cpu = smp_processor_id();
-	u64 delta = native_read_tsc() - last_tsc;
-	return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
-}
 
 static struct kvm_wall_clock wall_clock;
-static cycle_t kvm_clock_read(void);
+
 /*
  * The wallclock is the time of day when we booted. Since then, some time may
  * have elapsed since the hypervisor wrote the data. So we try to account for
@@ -55,35 +48,19 @@ static cycle_t kvm_clock_read(void);
  */
 unsigned long kvm_get_wallclock(void)
 {
-	u32 wc_sec, wc_nsec;
-	u64 delta;
+	struct kvm_vcpu_time_info *vcpu_time;
 	struct timespec ts;
-	int version, nsec;
 	int low, high;
 
 	low = (int)__pa(&wall_clock);
 	high = ((u64)__pa(&wall_clock) >> 32);
-
-	delta = kvm_clock_read();
-
 	native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
-	do {
-		version = wall_clock.wc_version;
-		rmb();
-		wc_sec = wall_clock.wc_sec;
-		wc_nsec = wall_clock.wc_nsec;
-		rmb();
-	} while ((wall_clock.wc_version != version) || (version & 1));
-
-	delta = kvm_clock_read() - delta;
-	delta += wc_nsec;
-	nsec = do_div(delta, NSEC_PER_SEC);
-	set_normalized_timespec(&ts, wc_sec + delta, nsec);
-	/*
-	 * Of all mechanisms of time adjustment I've tested, this one
-	 * was the champion!
-	 */
-	return ts.tv_sec + 1;
+
+	vcpu_time = &get_cpu_var(hv_clock);
+	pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
+	put_cpu_var(hv_clock);
+	
+	return ts.tv_sec;
 }
 
 int kvm_set_wallclock(unsigned long now)
@@ -91,28 +68,17 @@ int kvm_set_wallclock(unsigned long now)
 	return 0;
 }
 
-/*
- * This is our read_clock function. The host puts an tsc timestamp each time
- * it updates a new time. Without the tsc adjustment, we can have a situation
- * in which a vcpu starts to run earlier (smaller system_time), but probes
- * time later (compared to another vcpu), leading to backwards time
- */
 static cycle_t kvm_clock_read(void)
 {
-	u64 last_tsc, now;
-	int cpu;
+	struct kvm_vcpu_time_info *src;
+	cycle_t ret;
 
-	preempt_disable();
-	cpu = smp_processor_id();
-
-	last_tsc = get_clock(cpu, tsc_timestamp);
-	now = get_clock(cpu, system_time);
-
-	now += kvm_get_delta(last_tsc);
-	preempt_enable();
-
-	return now;
+	src = &get_cpu_var(hv_clock);
+	ret = pvclock_clocksource_read(src);
+	put_cpu_var(hv_clock);
+	return ret;
 }
+
 static struct clocksource kvm_clock = {
 	.name = "kvm-clock",
 	.read = kvm_clock_read,
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
new file mode 100644
index 000..6e7dae0
--- /dev/null
+++ b/arch/x86/kernel/pvclock.c
@@ -0,0 +1,150 @@
+/*  paravirtual clock -- common code used by kvm/xen
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later ver

Re: [kvm-devel] pv clock: kvm is incompatible with xen :-(

2008-04-21 Thread Gerd Hoffmann
Jeremy Fitzhardinge wrote:
> Xen could change the parameters in the instant after get_time_values(). 
> That change could be as a result of suspend-resume, so the parameters
> and the tsc could be wildly different.

Ah, ok, forgot the rdtsc in the picture.  With that in mind I fully
agree that the loop is needed.  I think kvm guests can even hit that one
with the vcpu migrating to a different physical cpu, so we better handle
it correctly ;)

> Sure, but get_time_values() has several other callers.

Not really.  There are only two calls, one in clocksource_read() and one
in the init path.  The later is superfluous I think because
clocksource_read() is the only user of the shadowed time info.

cheers,
  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/

-
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [ RfC / patch ] kvmclock fixes

2008-04-21 Thread Gerd Hoffmann
Gerd Hoffmann wrote:
> Marcelo Tosatti wrote:

>> Haven't seen Gerd's guest patches ? 
> 
> I'm still busy cooking them up.  I've mentioned them in a mail, but they
> didn't ran over the list (yet).  Stay tuned ;)

It compiles, ship it!

This time as all-in one patch (both guest and host side).  Almost
untested and not (yet) splitted into pieces.

Changes:

  * Host: make kvm pv clock really compatible with xen pv clock.
  * Guest/xen: factor out some xen clock code into a separate
   source file (pvclock.[ch]), so kvm can reuse it.
  * Guest/kvm: make kvm clock compatible with xen clock by using
   the common code bits.

Tests, reviews and comments are welcome.

cheers,
  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 1cc9d42..688df87 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -79,7 +79,7 @@ obj-$(CONFIG_DEBUG_NX_TEST)	+= test_nx.o
 obj-$(CONFIG_VMI)		+= vmi_32.o vmiclock_32.o
 obj-$(CONFIG_KVM_GUEST)		+= kvm.o
 obj-$(CONFIG_KVM_CLOCK)		+= kvmclock.o
-obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o pvclock.o
 
 ifdef CONFIG_INPUT_PCSPKR
 obj-y+= pcspeaker.o
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index ddee040..476b7c7 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -18,6 +18,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -37,17 +38,9 @@ early_param("no-kvmclock", parse_no_kvmclock);
 
 /* The hypervisor will put information about time periodically here */
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
-#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
-
-static inline u64 kvm_get_delta(u64 last_tsc)
-{
-	int cpu = smp_processor_id();
-	u64 delta = native_read_tsc() - last_tsc;
-	return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
-}
 
 static struct kvm_wall_clock wall_clock;
-static cycle_t kvm_clock_read(void);
+
 /*
  * The wallclock is the time of day when we booted. Since then, some time may
  * have elapsed since the hypervisor wrote the data. So we try to account for
@@ -55,35 +48,19 @@ static cycle_t kvm_clock_read(void);
  */
 unsigned long kvm_get_wallclock(void)
 {
-	u32 wc_sec, wc_nsec;
-	u64 delta;
+	struct kvm_vcpu_time_info *vcpu_time;
 	struct timespec ts;
-	int version, nsec;
 	int low, high;
 
 	low = (int)__pa(&wall_clock);
 	high = ((u64)__pa(&wall_clock) >> 32);
-
-	delta = kvm_clock_read();
-
 	native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
-	do {
-		version = wall_clock.wc_version;
-		rmb();
-		wc_sec = wall_clock.wc_sec;
-		wc_nsec = wall_clock.wc_nsec;
-		rmb();
-	} while ((wall_clock.wc_version != version) || (version & 1));
-
-	delta = kvm_clock_read() - delta;
-	delta += wc_nsec;
-	nsec = do_div(delta, NSEC_PER_SEC);
-	set_normalized_timespec(&ts, wc_sec + delta, nsec);
-	/*
-	 * Of all mechanisms of time adjustment I've tested, this one
-	 * was the champion!
-	 */
-	return ts.tv_sec + 1;
+
+	vcpu_time = &get_cpu_var(hv_clock);
+	pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
+	put_cpu_var(hv_clock);
+	
+	return ts.tv_sec;
 }
 
 int kvm_set_wallclock(unsigned long now)
@@ -91,28 +68,17 @@ int kvm_set_wallclock(unsigned long now)
 	return 0;
 }
 
-/*
- * This is our read_clock function. The host puts an tsc timestamp each time
- * it updates a new time. Without the tsc adjustment, we can have a situation
- * in which a vcpu starts to run earlier (smaller system_time), but probes
- * time later (compared to another vcpu), leading to backwards time
- */
 static cycle_t kvm_clock_read(void)
 {
-	u64 last_tsc, now;
-	int cpu;
+	struct kvm_vcpu_time_info *src;
+	cycle_t ret;
 
-	preempt_disable();
-	cpu = smp_processor_id();
-
-	last_tsc = get_clock(cpu, tsc_timestamp);
-	now = get_clock(cpu, system_time);
-
-	now += kvm_get_delta(last_tsc);
-	preempt_enable();
-
-	return now;
+	src = &get_cpu_var(hv_clock);
+	ret = pvclock_clocksource_read(src);
+	put_cpu_var(hv_clock);
+	return ret;
 }
+
 static struct clocksource kvm_clock = {
 	.name = "kvm-clock",
 	.read = kvm_clock_read,
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
new file mode 100644
index 000..2da148d
--- /dev/null
+++ b/arch/x86/kernel/pvclock.c
@@ -0,0 +1,144 @@
+/*  paravirtual clock -- common code used by kvm/xen
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FIT

Re: [kvm-devel] pv clock: kvm is incompatible with xen :-(

2008-04-21 Thread Gerd Hoffmann
Jeremy Fitzhardinge wrote:
> Gerd Hoffmann wrote:
>> I'm looking at the guest side of the issue right now, trying to identify
>> common code, and while doing so noticed that xen does the
>> version-check-loop in both get_time_values_from_xen(void) and
>> xen_clocksource_read(void), and I can't see any obvious reason for that.
>>  The loop in xen_clocksource_read(void) is not needed IMHO.  Can I
>> drop it?
> 
> No.  The get_nsec_offset() needs to be atomic with respect to the
> get_time_values() parameters.

Hmm, I somehow fail to see a case where it could be non-atomic ...

get_time_values() copies a consistent snapshot, thus
xen_clocksource_read() doesn't race against xen updating the fields.
The snapshot is in a per-cpu variable, thus it doesn't race against
other guest vcpus running get_time_values() at the same time.

> There could be a loopless
> __get_time_values() for use in this case, but given that it almost never
> loops, I don't think its worthwhile.

"in this case" ???  I'm confused.  There is only a single user of
get_nsec_offset(), which is xen_clocksource_read() ...

cheers,
  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/

-
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] paravirt clock stil causing hangs in kvm-65

2008-04-21 Thread Gerd Hoffmann
Marcelo Tosatti wrote:
>> >From what me and marcelo discussed, I think there's a possibility that
>> it has marginally something to do with precision of clock calculation.
>> Gerd's patches address that issues. Can somebody test this with those
>> patches (both guest and host), while I'm off ?
> 
> Haven't seen Gerd's guest patches ? 

I'm still busy cooking them up.  I've mentioned them in a mail, but they
didn't ran over the list (yet).  Stay tuned ;)

cheers,
  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/

-
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] pv clock: kvm is incompatible with xen :-(

2008-04-18 Thread Gerd Hoffmann
Jeremy Fitzhardinge wrote:
> Gerd Hoffmann wrote:
>> Wall clock is off a few hours though.  Oops.
>>
>> I think the way wall clock and system clock work together in xen (Jeremy
>> correct me if I'm wrong) is that the wall clock specifies the point in
>> time where the system clock started going.  As kvm fills in host system
>> time into the guest system time fields the guest wall clock fields
>> should be filled with the host boot time timestamp I'd say.
>>   
> 
> Yes.  The wallclock field in the shared info structure is the wallclock
> time at boot; you compute the current time by adding the system
> timestamp to it.   System time changes are effected by retroactively
> changing the boot time of the machine, though that can also change
> because of suspend/resume/migrate.
> 
> In general the kernel only reads the wallclock time at boot, and then
> maintains it for itself from then on.  I think.

Thanks.

I'm looking at the guest side of the issue right now, trying to identify
common code, and while doing so noticed that xen does the
version-check-loop in both get_time_values_from_xen(void) and
xen_clocksource_read(void), and I can't see any obvious reason for that.
 The loop in xen_clocksource_read(void) is not needed IMHO.  Can I drop it?

cheers,
  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/

-
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] pv clock: kvm is incompatible with xen :-(

2008-04-11 Thread Gerd Hoffmann
Gerd Hoffmann wrote:

> Wall clock is off a few hours though.  Oops.
> 
> I think the way wall clock and system clock work together in xen (Jeremy
> correct me if I'm wrong) is that the wall clock specifies the point in
> time where the system clock started going.  As kvm fills in host system
> time into the guest system time fields the guest wall clock fields
> should be filled with the host boot time timestamp I'd say.

Following up myself with a quick&dirty patch to tackle this issue too.
This one calculates the boot time.  That should be solveable better,
include/linux/time.h lists two functions which sound promising:

  extern void getboottime(struct timespec *ts);
  extern void monotonic_to_bootbased(struct timespec *ts);

Neither of them is available to modules though, so I can't test without
rebooting my laptop ...

monotonic_to_bootbased() sounds like we would get hosts ntp adjustments
in the guests for free.

cheers,
  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/
diff -up kvm-65/kernel/x86.c.fix kvm-65/kernel/x86.c
--- kvm-65/kernel/x86.c.fix	2008-04-06 21:23:07.0 +0200
+++ kvm-65/kernel/x86.c	2008-04-11 16:17:23.0 +0200
@@ -490,7 +490,7 @@ static void kvm_write_wall_clock(struct 
 {
 	static int version;
 	struct kvm_wall_clock wc;
-	struct timespec wc_ts;
+	struct timespec now,sys,boot;
 
 	if (!wall_clock)
 		return;
@@ -499,9 +499,11 @@ static void kvm_write_wall_clock(struct 
 
 	kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
 
-	wc_ts = current_kernel_time();
-	wc.wc_sec = wc_ts.tv_sec;
-	wc.wc_nsec = wc_ts.tv_nsec;
+	now = current_kernel_time();
+	ktime_get_ts(&sys);
+	boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
+	wc.wc_sec = boot.tv_sec;
+	wc.wc_nsec = boot.tv_nsec;
 	wc.wc_version = version;
 
 	kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
@@ -548,6 +550,44 @@ static void kvm_write_guest_time(struct 
 	mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
 }
 
+static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
+{
+	uint32_t quotient, remainder;
+	
+	__asm__ ( "divl %4"
+		  : "=a" (quotient), "=d" (remainder)
+		  : "0" (0), "1" (dividend), "r" (divisor) );
+	return quotient;
+}
+
+static void kvm_set_time_scale(uint32_t tsc_khz, struct kvm_vcpu_time_info *hv_clock)
+{
+	uint64_t nsecs = 10LL;
+	int32_t  shift = 0;
+	uint64_t tps64;
+	uint32_t tps32;
+	
+	tps64 = tsc_khz * 1000LL;
+	while (tps64 > nsecs*2) {
+		tps64 >>= 1;
+		shift--;
+	}
+
+	tps32 = (uint32_t)tps64;
+	while (tps32 <= (uint32_t)nsecs) {
+		tps32 <<= 1;
+		shift++;
+	}
+
+	hv_clock->tsc_shift = shift;
+	hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32);
+
+#if 0
+	printk(KERN_DEBUG "%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n",
+	   __FUNCTION__, tsc_khz, hv_clock->tsc_shift,
+	   hv_clock->tsc_to_system_mul);
+#endif
+}
 
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
@@ -596,9 +636,7 @@ int kvm_set_msr_common(struct kvm_vcpu *
 		/* ...but clean it before doing the actual write */
 		vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
 
-		vcpu->arch.hv_clock.tsc_to_system_mul =
-	clocksource_khz2mult(kvm_tsc_khz, 22);
-		vcpu->arch.hv_clock.tsc_shift = 22;
+		kvm_set_time_scale(kvm_tsc_khz, &vcpu->arch.hv_clock);
 
 		down_read(¤t->mm->mmap_sem);
 		vcpu->arch.time_page =
-
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] pv clock: kvm is incompatible with xen :-(

2008-04-11 Thread Gerd Hoffmann
Avi Kivity wrote:
> Gerd Hoffmann wrote:
>>   Hi,
>>
>> Tried to use kvmclock with xenner and noticed that the kvmclock
>> (MSR_KVM_SYSTEM_TIME msr) is incompatible with xen.
> 
> Patches are welcome, especially as kvmclock isn't merged yet, so there
> are no backward compatibility issues.

Great, so I'll happily go break kvm guests ;)

Patch revision #1 attached.  It changes the way the tsc-delta-scaling
fields are calculated to be compatible with xen.  Code is taken from
xenner (which got it from xen) and adapted a bit.  Host only, kvm guest
side not done (yet).

With that patch applied xen guests with pv clock enabled happily boot to
the login prompt, without complains about time going backwards.  Fine.

Wall clock is off a few hours though.  Oops.

I think the way wall clock and system clock work together in xen (Jeremy
correct me if I'm wrong) is that the wall clock specifies the point in
time where the system clock started going.  As kvm fills in host system
time into the guest system time fields the guest wall clock fields
should be filled with the host boot time timestamp I'd say.

Comments?

  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/
diff -up kvm-65/kernel/x86.c.fix kvm-65/kernel/x86.c
--- kvm-65/kernel/x86.c.fix	2008-04-06 21:23:07.0 +0200
+++ kvm-65/kernel/x86.c	2008-04-11 15:18:23.0 +0200
@@ -548,6 +548,44 @@ static void kvm_write_guest_time(struct 
 	mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
 }
 
+static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
+{
+	uint32_t quotient, remainder;
+	
+	__asm__ ( "divl %4"
+		  : "=a" (quotient), "=d" (remainder)
+		  : "0" (0), "1" (dividend), "r" (divisor) );
+	return quotient;
+}
+
+static void kvm_set_time_scale(uint32_t tsc_khz, struct kvm_vcpu_time_info *hv_clock)
+{
+	uint64_t nsecs = 10LL;
+	int32_t  shift = 0;
+	uint64_t tps64;
+	uint32_t tps32;
+	
+	tps64 = tsc_khz * 1000LL;
+	while (tps64 > nsecs*2) {
+		tps64 >>= 1;
+		shift--;
+	}
+
+	tps32 = (uint32_t)tps64;
+	while (tps32 <= (uint32_t)nsecs) {
+		tps32 <<= 1;
+		shift++;
+	}
+
+	hv_clock->tsc_shift = shift;
+	hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32);
+
+#if 0
+	printk(KERN_DEBUG "%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n",
+	   __FUNCTION__, tsc_khz, hv_clock->tsc_shift,
+	   hv_clock->tsc_to_system_mul);
+#endif
+}
 
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
@@ -596,9 +634,7 @@ int kvm_set_msr_common(struct kvm_vcpu *
 		/* ...but clean it before doing the actual write */
 		vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
 
-		vcpu->arch.hv_clock.tsc_to_system_mul =
-	clocksource_khz2mult(kvm_tsc_khz, 22);
-		vcpu->arch.hv_clock.tsc_shift = 22;
+		kvm_set_time_scale(kvm_tsc_khz, &vcpu->arch.hv_clock);
 
 		down_read(¤t->mm->mmap_sem);
 		vcpu->arch.time_page =
-
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] pv clock: kvm is incompatible with xen :-(

2008-04-11 Thread Gerd Hoffmann
  Hi,

Tried to use kvmclock with xenner and noticed that the kvmclock
(MSR_KVM_SYSTEM_TIME msr) is incompatible with xen.

kvm guests do this to translate the tsc delta into nsecs:

   #define get_clock(cpu, field) per_cpu(hv_clock, cpu).field

   static inline u64 kvm_get_delta(u64 last_tsc)
   {
   int cpu = smp_processor_id();
   u64 delta = native_read_tsc() - last_tsc;
   return (delta * get_clock(cpu, tsc_to_system_mul)) >> 22;
   }

whereas xen guests do this (64bit version):

   static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
   {
   u64 product;

   if (shift < 0)
   delta >>= -shift;
   else
   delta <<= shift;

   __asm__ (
   "mul %%rdx ; shrd $32,%%rdx,%%rax"
   : "=a" (product) : "0" (delta), "d" ((u64)mul_frac));
   return product;
   }

Note that xen does a 64bit multiply (of the 64bit delta and the 32bit
factor) yielding a 128bit result, then picking bits 32-95 for the 64bit
return value.  In contrast kvm does a simple 64bit multiply, which is
equivalent to using the lowest 64 bits.  Thus kvm is off by factor 2^32,
and that without even considering the ordering of two (shift + multiply)
operations and any rounding errors ...

cheers,
  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/

-
This SF.net email is sponsored by the 2008 JavaOne(SM) Conference 
Don't miss this year's exciting event. There's still time to save $100. 
Use priority code J8TL2D2. 
http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [RFC] VMX CR3 cache

2008-01-30 Thread Gerd Hoffmann
Avi Kivity wrote:

> [fairly amazing results.  how do they compare to xen?]

Didn't benchmark it side-by-side yet.  Most likely xenner is still
noticeable slower on 64bit (32bit should be roughly comparable).  I also
wouldn't surprised if you see different results on different workloads.

xen mangles page table flags alot to make guests run fast despite the
frequent cr3 switches.  It sets the global flag for userspace mappings
(which are identical in kernel/userspace page table tree) to avoid them
being flushed from tlb on every syscall enter/exit.  I havn't tried to
do similar things in xenner because I don't track page tables in the
first place.  I hope to get it to speed comparable to xen using
virtualization hardware features instead.

cheers,
  Gerd

-- 
http://kraxel.fedorapeople.org/xenner/

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [RFC] VMX CR3 cache

2008-01-30 Thread Gerd Hoffmann
Gerd Hoffmann wrote:
> I've passed in a physical address.  The vmx_cr3_cache_msr() function has
> a gva_to_page() call which makes me suspect it expects a virtual
> address.

Confirmed.  When passing in a virtual address it works.
And it gives me a nice speedup for kernel builds:

rhel5-64 kraxel ~# cat kbench-cr3-*
date:Wed Jan 30 09:50:03 CET 2008
host:2.6.18-53.el5xen x86_64 (cr3-cache)
target:  linux-2.6.21 i386 vmlinux
config:  allnoconfig
   1: real 184.03 user 96.66 sys 41.30
   2: real 137.23 user 91.57 sys 37.93
   3: real 136.53 user 90.98 sys 38.39
date:Wed Jan 30 09:37:10 CET 2008
host:2.6.18-53.el5xen x86_64 (cr3-nocache)
target:  linux-2.6.21 i386 vmlinux
config:  allnoconfig
   1: real 182.47 user 112.33 sys 41.56
   2: real 175.75 user 109.45 sys 40.53
   3: real 173.54 user 108.49 sys 41.22

xen pv guests on 64bit do two cr3 switches per syscall due to
kernel/userspace separation being done by having two different page
table sets.  Thus cr3 caching eliminates two vmexits per xen guest
system call, and the pgd caching probably helps too.  Watching
statistics confirms that the number of vmexits goes down significantly.

I strongly support cr3 caching being merged (after being cleaned up of
course).

cheers,
  Gerd

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [RFC] VMX CR3 cache

2008-01-30 Thread Gerd Hoffmann
Marcelo Tosatti wrote:
> And this is against a changed x86.git -mm tree (with pvops64 patches).
> I'll send the PTE-write-via-hypercall patches soon and will rebase on
> top of that (the CR3 cache needs more testing/tuning apparently).

Oops for sale ;)

Triggered by guests wrmsr, looks like some error checks are missing.

I've passed in a physical address.  The vmx_cr3_cache_msr() function has
a gva_to_page() call which makes me suspect it expects a virtual
address.  First it should not Oops when a invalid virtual address is
passed in, and second I think it better shouldn't expect a virtual
address in the first place.

What is the reason to expect the cr3 cache being page aligned btw?  It
should be enougth to require the struct not cross a page border, right?

cheers,
  Gerd
MSR_IA32_VMX_MISC: 000403c0
 cr3 target values: 4
device xenner0 entered promiscuous mode
audit(1201680208.401:28): dev=xenner0 prom=256 old_prom=0 auid=4294967295
br0: port 1(xenner0) entering learning state
Unable to handle kernel NULL pointer dereference at  RIP: 
 [] :kvm_intel:vmx_cr3_cache_msr+0x76/0xef
PGD 102d0067 PUD f816067 PMD 0 
Oops:  [1] SMP 
CPU 1 
Modules linked in: i915 drm nls_utf8 ipt_LOG xt_TCPMSS xt_mark xt_MARK 
iptable_mangle kvm_intel(U) kvm(U) ipt_MASQUERADE iptable_nat nf_nat nfsd 
exportfs lockd nfs_acl auth_rpcgss autofs4 tun sunrpc bridge nf_conntrack_ipv4 
ipt_REJECT iptable_filter ip_tables nf_conntrack_netbios_ns nf_conntrack_ipv6 
xt_state nf_conntrack nfnetlink xt_tcpudp ip6t_ipv6header ip6t_REJECT 
ip6table_filter ip6_tables x_tables ipv6 cpufreq_ondemand acpi_cpufreq loop 
dm_multipath sr_mod cdrom ata_generic snd_hda_intel snd_seq_dummy snd_seq_oss 
snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss arc4 snd_mixer_oss ecb 
blkcipher snd_pcm iTCO_wdt iTCO_vendor_support video snd_timer snd_page_alloc 
output i2c_i801 i2c_core ata_piix iwl3945 snd_hwdep snd battery ac nsc_ircc 
mac80211 button cfg80211 sg e1000 pcspkr irda soundcore thinkpad_acpi crc_ccitt 
hwmon joydev dm_snapshot dm_zero dm_mirror dm_mod ahci libata sd_mod scsi_mod 
ext3 jbd mbcache uhci_hcd ohci_hcd ehci_hcd
Pid: 7680, comm: xenner Not tainted 2.6.23.14-107.fc8 #1
RIP: 0010:[]  [] 
:kvm_intel:vmx_cr3_cache_msr+0x76/0xef
RSP: 0018:810011509ca8  EFLAGS: 00010296
RAX: 810008e48770 RBX:  RCX: 
RDX:  RSI: 0296 RDI: 810008e4876c
RBP: 81001648a000 R08: 87655678 R09: 
R10: 0034 R11: 8847afcd R12: 81007cc99000
R13:  R14: 000a R15: 
FS:  2aad2b20() GS:810037c21300() knlGS:
CS:  0010 DS: 002b ES: 002b CR0: 8005003b
CR2:  CR3: 7ad72000 CR4: 26e0
DR0: 8125b0e0 DR1:  DR2: 
DR3:  DR6: 0ff1 DR7: 0701
Process xenner (pid: 7680, threadinfo 810011508000, task 81000a83b040)
Stack:  81001648a000 81001648a000  8847aff8
 81001648a000 88460c3f 8090ae81 810011509e68
 810011509ee8 81001648a000  ae80
Call Trace:
 [] :kvm_intel:handle_wrmsr+0x2b/0x4f
 [] :kvm:kvm_arch_vcpu_ioctl_run+0x3a7/0x4fb
 [] :kvm:kvm_vcpu_ioctl+0xda/0x2dd
 [] n_tty_receive_buf+0xd49/0xdc9
 [] generic_file_aio_write+0x6c/0xc1
 [] avc_has_perm+0x49/0x5b
 [] :ext3:ext3_file_write+0x16/0x94
 [] inode_has_perm+0x65/0x72
 [] __wake_up+0x38/0x4f
 [] file_has_perm+0x94/0xa3
 [] do_ioctl+0x21/0x6b
 [] vfs_ioctl+0x243/0x25c
 [] sys_ioctl+0x59/0x79
 [] system_call+0x7e/0x83


Code: 48 8b 13 31 c9 48 c1 ea 33 48 89 d0 48 c1 e8 09 48 8b 04 c5 
RIP  [] :kvm_intel:vmx_cr3_cache_msr+0x76/0xef
 RSP 
CR2: 
xenner0: no IPv6 routers present
br0: topology change detected, propagating
br0: port 1(xenner0) entering forwarding state
-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [RFC] VMX CR3 cache

2008-01-29 Thread Gerd Hoffmann
Gerd Hoffmann wrote:
> Gerd Hoffmann wrote:
> 
>> Hmm, what kvm version is this against?  latest git I guess?  After
>> applying to kvm-60 (and fixing up some trivial rejects) it doesn't build.
> 
> Looks like the mmu.h chunk is missing in the patch.

Hmm, and x86.c looks incomplete too.  vcpu->arch.mmu.root_hpa becomes an
array, but mmu.h and x86.c still use it the old way.

Can you double-check and resend the patch please?

thanks,
  Gerd



-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [RFC] VMX CR3 cache

2008-01-29 Thread Gerd Hoffmann
Gerd Hoffmann wrote:

> Hmm, what kvm version is this against?  latest git I guess?  After
> applying to kvm-60 (and fixing up some trivial rejects) it doesn't build.

Looks like the mmu.h chunk is missing in the patch.

cheers,
  Gerd



-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [RFC] VMX CR3 cache

2008-01-29 Thread Gerd Hoffmann
Marcelo Tosatti wrote:
> Hi,
> 
> The CR3 cache feature of VMX CPU's does not seem to increase
> context switch performance significantly as it did in the original
> implementation (http://lkml.org/lkml/2007/1/5/205).
> 
> The following is similar to the original, but it also caches roots for
> 4-level pagetables on x86-64, and clearing the cache is only performed
> in zap_page() instead of on every pagefault.

Hmm, what kvm version is this against?  latest git I guess?  After
applying to kvm-60 (and fixing up some trivial rejects) it doesn't build.

> Nowhere near the results achieved earlier (and kernel compilation and
> httperf seems slightly slower, probably due to paravirt overhead).

Even if it it doesn't help much on native:  With xenner it probably
gives a nice speedup especially on 64 bit where each guest syscall
involves a cr3 switch (not benchmarked yet though).

>  #ifdef __KERNEL__
>  #include 
>  
> -#define KVM_PARA_FEATURES (1UL << KVM_FEATURE_NOP_IO_DELAY)
> +#define KVM_PARA_FEATURES ((1UL << KVM_FEATURE_NOP_IO_DELAY) |   \
> +(1UL << KVM_FEATURE_CR3_CACHE))
> +
> +#define KVM_MSR_SET_CR3_CACHE 0x87655678
> +
> +#define KVM_CR3_CACHE_SIZE 4
> +
> +struct kvm_cr3_cache_entry {
> + u64 guest_cr3;
> + u64 host_cr3;
> +};
> +
> +struct kvm_cr3_cache {
> + struct kvm_cr3_cache_entry entry[KVM_CR3_CACHE_SIZE];
> + u32 max_idx;
> +};

Can you move the structs out of #ifdef __KERNEL__ please?

thanks,
  Gerd



-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH] system_powerdown via acpi power button

2008-01-24 Thread Gerd Hoffmann
Avi Kivity wrote:
> Agree, but should try a "quit" monitor command first.  Signals are racy, 
> like anything that deals with pids (qemu dies, another process is 
> fork()ed with the same pid, libvirt kills it).

There is no race in that specific case because qemu is started by
libvirtd.  libvirtd can savely kill qemu as long as it hasn't collected
the exit status via waitpid().  While the qemu zombie hangs around the
pid will not be reused.

cheers,
  Gerd

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] pae guest on non-pae host?

2008-01-23 Thread Gerd Hoffmann
  Hi,

Quick question: is kvm able to handle guests using pae when the host
runs a non-pae 32bit kernel?

cheers,
  Gerd

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH] export notifier #1

2008-01-23 Thread Gerd Hoffmann
Andrea Arcangeli wrote:
> Like Avi said, Xen is dealing with the linux pte only, so there's no
> racy smp page fault to serialize against. Perhaps we can add another
> notifier for Xen though.
> 
> But I think it's still not enough for Xen to have a method called
> before the ptep_clear_flush: rmap.c would get confused in
> page_mkclean_one for example.

The current code sets a bunch of vma flags (VM_RESERVED, VM_DONTCOPY,
VM_FOREIGN) so the VM doesn't try to handle those special mapping.  IIRC
one of them was needed to not make rmap unhappy.

> Nevertheless if you've any idea on how to use the notifiers for Xen
> I'd be glad to help. Perhaps one workable way to change my patch to
> work for you could be to pass the retval of ptep_clear_flush to the
> notifiers themself. something like:
> 
> #define ptep_clear_flush(__vma, __address, __ptep)\
> ({\
>   pte_t __pte;\
>   __pte = ptep_get_and_clear((__vma)->vm_mm, __address, __ptep);  \
>   flush_tlb_page(__vma, __address);   \
>   __pte = mmu_notifier(invalidate_page, (__vma)->vm_mm, __address, __pte, 
> __ptep);\
>   __pte;  \
> })

Would not work.  Need to pass a pointer to the pte so the xen hypervisor
can do unmap (aka pte_clear) and grant release as atomic operation.
Thus passing the value of the pte entry isn't good enougth.

Another maybe workable approach for Xen is to go through pv_ops
(although pte_clear doesn't go through pv_ops right now, so this would
be an additional hook too ...).

cheers,
  Gerd


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH] export notifier #1

2008-01-23 Thread Gerd Hoffmann
Robin Holt wrote:
> We have a seg structure which is similar to some structure you probably
> have which describes the grant.  One of the things hanging off that
> seg structure is essentially a page table containing PFNs with their
> respective flags (XPMEM specific and not the same as the pfn flags in
> the processor page tables).

i.e. page tables used by hardware != cpu, right?

In the Xen guest case the normal processor page tables are modified, but
in a special way to make the Xen hypervisor also release the grant.

cheers,
  Gerd

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH] export notifier #1

2008-01-23 Thread Gerd Hoffmann
  Hi,

>> That would render the notifies useless for Xen too.  Xen needs to
>> intercept the actual pte clear and instead of just zapping it use the
>> hypercall to do the unmap and release the grant.
> 
> We are tackling that by having our own page table hanging off the
> structure representing our seg (thing created when we do the equiv of
> your grant call).

--verbose please.  I don't understand that "own page table" trick.  Is
that page table actually used by the processor or is it just used to
maintain some sort of page list?

>> Current implementation uses a new vm_ops operation which is called if
>> present instead of doing a ptep_get_and_clear_full().  It is in the
>> XenSource tree only, mainline hasn't this yet due to implementing only
>> the DomU bits so far.  When adding Dom0 support to mainline we'll need
>> some way to handle it, and I'd like to see the notifies be designed in a
>> way that Xen can simply use them.
> 
> Would the callouts Christoph proposed work for you if you maintained
> your own page table and moved them after the callouts the mmu_notifiers
> are using.

I *think* it would.  I'm not that deep in the VM details to be sure
though.  One possible problem I see is that the hypercall does also tear
down the mapping, so this isn't just a notify but also changes the page
tables, which could confuse the VM later on when it comes to the actual
pte clearing.

cheers,
  Gerd


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH] export notifier #1

2008-01-23 Thread Gerd Hoffmann
  Hi,

Jumping in here, looks like this could develop into a direction useful
for Xen.

Background:  Xen has a mechanism called "grant tables" for page sharing.
 Guest #1 can issue a "grant" for another guest #2, which in turn then
can use that grant to map the page owned by guest #1 into its address
space.  This is used by the virtual network/disk drivers, i.e. typically
Domain-0 (which has access to the real hardware) maps pages of other
guests to fill in disk/network data.

Establishing and tearing down mappings for those grants must happen
through a special grant table hypercall, and especially for the tear
down part of the problem mmu/export/whatever-we-call-them-in-the-end
notifies could help.

> Issues with mmu_ops #2
> 
> - Notifiers are called *after* we tore down ptes.

That would render the notifies useless for Xen too.  Xen needs to
intercept the actual pte clear and instead of just zapping it use the
hypercall to do the unmap and release the grant.

Current implementation uses a new vm_ops operation which is called if
present instead of doing a ptep_get_and_clear_full().  It is in the
XenSource tree only, mainline hasn't this yet due to implementing only
the DomU bits so far.  When adding Dom0 support to mainline we'll need
some way to handle it, and I'd like to see the notifies be designed in a
way that Xen can simply use them.

cheers,
  Gerd


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH] add acpi powerbutton support

2008-01-22 Thread Gerd Hoffmann
  Hi,

>>> Catching ctrl-c sounds like a good idea but "ctrl-c, ctrl-c" should
>>> probably kill qemu then, since the machine might have no acpid running -
>>> in that case hitting ctrl-c would have no effect.
>>> 
>> Good idea.
>>   
> 
> I'm worried about the 30+ second shutdown latency.  Is there precedent 
> for SIGTERM or SIGINT requiring this long to take effect?

xenner signals a shutdown request to the guest for the first SIGINT (and
prints a message to the user saying so).  Sending SIGINT twice kills the
guest and cleans up.  I find that very useful, you can shutdown the
guest cleanly with a convenient Ctrl-C and also kill it off quickly by
simply pressing Ctrl-C again.

SIGTERM kills the guest instantly.  Applictions are expected to react
quickly on SIGTERM, there is no way you can wait for a clean guest
shutdown then.  It is used on (host) shutdown for example, where you'll
get a SIGKILL when you don't exit within three seconds.

cheers,
  Gerd


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 2/2] kvmclock implementation, the guest part.

2008-01-17 Thread Gerd Hoffmann
> +struct shared_info shared_info __attribute__((__aligned__(PAGE_SIZE)));

leftover from old version?

> +unsigned long kvm_get_wallclock(void)
> +{
> + u32 wc_sec, wc_nsec;
> + u64 delta, last_tsc;
> + struct timespec ts;
> + int version, nsec, cpu = smp_processor_id();
> +
> + native_write_msr(MSR_KVM_WALL_CLOCK, __pa(&wall_clock));

Huh?  Shouldn't that be done once at boot time?

cheers,
  Gerd



-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 1/2] kvmclock - the host part.

2008-01-16 Thread Gerd Hoffmann
Glauber de Oliveira Costa wrote:
> This is the host part of kvm clocksource implementation. As it does
> not include clockevents, it is a fairly simple implementation. We
> only have to register a per-vcpu area, and start writting to it periodically.
> 
> The area is binary compatible with xen, as we use the same shadow_info 
> structure.

comment needs an update too ;)

> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> - MSR_IA32_TIME_STAMP_COUNTER,
> + MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME,

+ MSR_KVM_WALL_CLOCK

Looks good otherwise.

cheers,
  Gerd


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 2/3] kvmclock - the host part.

2008-01-16 Thread Gerd Hoffmann
  Hi,

> We want to avoid updating wall clock all the time.  As far as I 
> understand, wall clock is just a base which doesn't change.

Yep, it is.  Got that wrong first in xenner, with the result that guest
time ran at double speed ;)

>> +/* xen binary-compatible interfaces. See xen headers for details */
>> +struct xen_vcpu_time_info {
>> +uint32_t version;
>> +uint32_t pad0;
>> +uint64_t tsc_timestamp;
>> +uint64_t system_time;
>> +uint32_t tsc_to_system_mul;
>> +int8_t   tsc_shift;
>> +int8_t   pad1[3];
>> +};

>> +struct xen_vcpu_info {
>> +uint8_t  pad[32];
>> +struct xen_vcpu_time_info time;
>> +};
>>   
> 
> Please drop xen_vcpu_info...

Oh, yeah.  No point in assembling the whole xen shared info page.  Just
xen_vcpu_time_info is enougth, it will work just fine for xenner.

cheers,
  Gerd

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 3/3] [PATCH] kvmclock implementation, the guest part.

2008-01-11 Thread Gerd Hoffmann
  Hi,

> diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
> index d083ff5..7728b87 100644
> --- a/arch/x86/xen/time.c
> +++ b/arch/x86/xen/time.c
> -static cycle_t xen_clocksource_read(void);
> +cycle_t xen_clocksource_read(void);

Huh?  You kill the static, but don't use the functions anywhere?  Looks
like half-done code sharing with xen paravirt clock ...

cheers,
  Gerd



-
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 3/8] KVM: PVDMA Guest: Guest-side routines for paravirtualized DMA

2007-11-12 Thread Gerd Hoffmann
Muli Ben-Yehuda wrote:
> On Wed, Nov 07, 2007 at 04:21:04PM +0200, Amit Shah wrote:
> 
>> We make the dma_mapping_ops structure to point to our structure so
>> that every DMA access goes through us. (This is the reason this only
>> works for 64-bit guest. 32-bit guest doesn't yet have a dma_ops
>> struct.)
> 
> I need the same facility for Calgary for falling back to swiotlb if a
> translation is disabled on some slot, and IB needs the same facility
> for some IB adapters (e.g., ipath). Perhaps it's time to consider
> stackable dma-ops (unless someone has a better idea...).

Hmm, at least the later sounds like for per-device dma_ops would be more
useful that stackable ones, as each stack instance just checks "should I
do something for device $foo, if not, call the next one ...".

cheers,
  Gerd


-
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 1/3] include files for kvmclock

2007-11-09 Thread Gerd Hoffmann
> +/*
> + * Guest has page alignment and padding requirements. At the host, it will
> + * only lead to wasted space at the vcpu struct. For this reason, the struct
> + * is not anonymous
> + */
> +union kvm_hv_clock {
> + struct kvm_hv_clock_s {
> + u64 tsc_mult;
> + u64 now_ns;
> + /* That's the wall clock, not the water closet */
> + u64 wc_sec;
> + u64 last_tsc;
> + /* At first, we could use the tsc value as a marker, but Jeremy
> +  * well noted that it will cause us locking problems in 32-bit
> +  * sys, so we have a special version field */
> + u32 version;
> + } fields;
> + char page_align[PAGE_SIZE];
> +};

What is the point in using a whole page per vcpu?  You probably don't
want struct kvm_hv_clock_s cross a page border.  Is that the only reason
or are there other constrains too?

As the kvm clock looks quite simliar to what xen does, how about making
the structs binary-compatible or simply reuse the xen version (struct
vcpu_time_info in xen/interface/xen.h)?

cheers,
  Gerd

-
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch] kvmctl.c: allow custom memory setup.

2007-10-18 Thread Gerd Hoffmann
Avi Kivity wrote:
> Gerd Hoffmann wrote:
>> No, I'm hacking up one more user ;)
> 
> Nice.  What will it do?

Run xenified guest kernels without Xen.

>> Should I send an updated patch or do you just drop these lines when
>> merging?
> 
> Please send a rebased and retested patch.

Oh, -47 is there.  Updated patch attached.

cheers,
  Gerd
diff -up ./kvmctl.h.upstream ./kvmctl.h
--- ./kvmctl.h.upstream	2007-10-18 17:09:06.0 +0200
+++ ./kvmctl.h	2007-10-18 17:09:11.0 +0200
@@ -146,6 +146,9 @@ int kvm_get_shadow_pages(kvm_context_t k
 int kvm_create(kvm_context_t kvm,
 	   unsigned long phys_mem_bytes,
 	   void **phys_mem);
+int kvm_create_vm(kvm_context_t kvm);
+void kvm_create_irqchip(kvm_context_t kvm);
+
 /*!
  * \brief Create a new virtual cpu
  *
@@ -413,6 +416,9 @@ void *kvm_create_phys_mem(kvm_context_t,
 			  unsigned long len, int slot, int log, int writable);
 void kvm_destroy_phys_mem(kvm_context_t, unsigned long phys_start, 
 			  unsigned long len, int slot);
+int kvm_register_userspace_phys_mem(kvm_context_t kvm,
+			unsigned long phys_start, void *userspace_addr,
+			unsigned long len, int slot, int log);
 int kvm_get_dirty_pages(kvm_context_t, int slot, void *buf);
 
 
diff -up ./kvmctl.c.upstream ./kvmctl.c
--- ./kvmctl.c.upstream	2007-10-18 17:08:57.0 +0200
+++ ./kvmctl.c	2007-10-18 17:18:53.0 +0200
@@ -436,12 +436,9 @@ int kvm_alloc_userspace_memory(kvm_conte
 
 #endif
 
-int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
+int kvm_create_vm(kvm_context_t kvm)
 {
-	unsigned long memory = (phys_mem_bytes + PAGE_SIZE - 1) & PAGE_MASK;
 	int fd = kvm->fd;
-	int zfd;
-	int r;
 
 	kvm->vcpu_fd[0] = -1;
 
@@ -451,6 +448,15 @@ int kvm_create(kvm_context_t kvm, unsign
 		return -1;
 	}
 	kvm->vm_fd = fd;
+	return 0;
+}
+
+int kvm_create_default_phys_mem(kvm_context_t kvm, unsigned long phys_mem_bytes,
+void **vm_mem)
+{
+	unsigned long memory = (phys_mem_bytes + PAGE_SIZE - 1) & PAGE_MASK;
+	int zfd;
+	int r;
 
 #ifdef KVM_CAP_USER_MEMORY
 	r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
@@ -468,13 +474,19 @@ int kvm_create(kvm_context_t kvm, unsign
 close(zfd);
 
 	kvm->physical_memory = *vm_mem;
+	return 0;
+}
+
+void kvm_create_irqchip(kvm_context_t kvm)
+{
+	int r;
 
 	kvm->irqchip_in_kernel = 0;
 #ifdef KVM_CAP_IRQCHIP
 	if (!kvm->no_irqchip_creation) {
 		r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
 		if (r > 0) {	/* kernel irqchip supported */
-			r = ioctl(fd, KVM_CREATE_IRQCHIP);
+			r = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP);
 			if (r >= 0)
 kvm->irqchip_in_kernel = 1;
 			else
@@ -482,6 +494,19 @@ int kvm_create(kvm_context_t kvm, unsign
 		}
 	}
 #endif
+}
+
+int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
+{
+	int r;
+	
+	r = kvm_create_vm(kvm);
+	if (r < 0)
+	return r;
+	r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
+	if (r < 0)
+	return r;
+	kvm_create_irqchip(kvm);
 	r = kvm_create_vcpu(kvm, 0);
 	if (r < 0)
 		return r;
@@ -577,6 +602,32 @@ void *kvm_create_phys_mem(kvm_context_t 
 log, writable);
 }
 
+int kvm_register_userspace_phys_mem(kvm_context_t kvm,
+			unsigned long phys_start, void *userspace_addr,
+			unsigned long len, int slot, int log)
+{
+	struct kvm_userspace_memory_region memory = {
+		.slot = slot,
+		.memory_size = len,
+		.guest_phys_addr = phys_start,
+		.userspace_addr = (intptr_t)userspace_addr,
+		.flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
+	};
+	int r;
+
+	if (!kvm->physical_memory)
+		kvm->physical_memory = userspace_addr - phys_start;
+
+	r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
+	if (r == -1) {
+		fprintf(stderr, "create_userspace_phys_mem: %s\n", strerror(errno));
+		return -1;
+	}
+
+	kvm_userspace_memory_region_save_params(kvm, &memory);
+return 0;
+}
+
 /* destroy/free a whole slot.
  * phys_start, len and slot are the params passed to kvm_create_phys_mem()
  */
-
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch] kvmctl.c: allow custom memory setup.

2007-10-18 Thread Gerd Hoffmann
  Hi,

> Break it.  It has just one user, our qemu, which is included in the same
> package.

No, I'm hacking up one more user ;)

But maybe I'm better off shipping a private copy of kvmctl.c as long as
the library interface isn't finalized yet and subject to change.

>> Thats why I went the route to additionally split the job kvm_create()
>> does into multiple, individually callable pieces.  So I can first create
>> the vm, then create my custom memory slots (instead of the standard
>> setup built by kvm_create_userspace_memory()), then create the vcpu.
>>   
> 
> That's exactly what's needed.
> 
> The patch looks good, except that I wouldn't export
> kvm_create_default_phys_mem().

Fine with me.  If you one uses the splitted versions, then for creating
a non-default memory layout, so there is no point in exporting that one.

Should I send an updated patch or do you just drop these lines when merging?

cheers,
  Gerd



-
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch] kvmctl.c: allow custom memory setup.

2007-10-18 Thread Gerd Hoffmann
Avi Kivity wrote:
> Izik Eidus wrote:
>> On Thu, 2007-10-18 at 12:18 +0200, Gerd Hoffmann wrote:
>> 
>>>>> I don't see how I can pass a pointer to
>>>>> kvm_create_userspace_memory() via kvm_create() without
>>>>> breaking the libkvm interface.  There is no flags field or
>>>>> similar which could be used to signal "vm_mem is a valid 
>>>>> pointer, please use that instead of mmap()'ing anonymous
>>>>> memory".
>>>>> 
>>>> There is no need to keep binary compat to libkvm as long as it
>>>> is linked statically.

>> anyone who use kvmctl, should not call kvm_create_userspace_memory 
>> directly, instead should call kvm_create()...

I'm talking about the kvm_create() interface, dammit.  Sure I can modify
kvm_create_userspace_memory() without breaking anyone as this isn't part
of the public (kvmctl.h) interface.  That doesn't buy us much though.
I'd have to pass the pointer to kvm_create() somehow so it can be passed
down to kvm_create_userspace_memory().  I can't without breaking the
public library interface though.

> Why not have an API for creating memory slots?  It allows much more 
> flexibility and is more in line with what qemu wants.

This *is* what the patch does.  It adds a function to add a
userspace-memory-backed memory slot: kvm_register_userspace_phys_mem().

That alone doesn't cut it though as there are some more constrains:

 * kvm_create() builds a standard memory layout, which I want be able
   to skip.
 * kvm_create() fails to create a vcpu in case no memory is available,
   which makes simple approach to just ask kvm_create() for 0 bytes
   physical memory for the guest unusable.

Thats why I went the route to additionally split the job kvm_create()
does into multiple, individually callable pieces.  So I can first create
the vm, then create my custom memory slots (instead of the standard
setup built by kvm_create_userspace_memory()), then create the vcpu.

cheers,
  Gerd

ps: patch attached again for reference.
* split kvm_create() into smaller pieces which can be individually
  called if needed.
* add kvm_register_userspace_phys_mem() function.
---
 user/kvmctl.c |   61 +-
 user/kvmctl.h |8 +++
 2 files changed, 64 insertions(+), 5 deletions(-)

Index: kvm-46/user/kvmctl.c
===
--- kvm-46.orig/user/kvmctl.c
+++ kvm-46/user/kvmctl.c
@@ -427,12 +427,9 @@ int kvm_alloc_userspace_memory(kvm_conte
 	return 0;
 }
 
-int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
+int kvm_create_vm(kvm_context_t kvm)
 {
-	unsigned long memory = (phys_mem_bytes + PAGE_SIZE - 1) & PAGE_MASK;
 	int fd = kvm->fd;
-	int zfd;
-	int r;
 
 	kvm->vcpu_fd[0] = -1;
 
@@ -442,6 +439,15 @@ int kvm_create(kvm_context_t kvm, unsign
 		return -1;
 	}
 	kvm->vm_fd = fd;
+	return 0;
+}
+
+int kvm_create_default_phys_mem(kvm_context_t kvm, unsigned long phys_mem_bytes,
+void **vm_mem)
+{
+	unsigned long memory = (phys_mem_bytes + PAGE_SIZE - 1) & PAGE_MASK;
+	int zfd;
+	int r;
 
 	r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
 	if (r > 0)
@@ -457,18 +463,37 @@ int kvm_create(kvm_context_t kvm, unsign
 close(zfd);
 
 	kvm->physical_memory = *vm_mem;
+	return 0;
+}
+
+void kvm_create_irqchip(kvm_context_t kvm)
+{
+	int r;
 
 	kvm->irqchip_in_kernel = 0;
 	if (!kvm->no_irqchip_creation) {
 		r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
 		if (r > 0) {	/* kernel irqchip supported */
-			r = ioctl(fd, KVM_CREATE_IRQCHIP);
+			r = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP);
 			if (r >= 0)
 kvm->irqchip_in_kernel = 1;
 			else
 printf("Create kernel PIC irqchip failed\n");
 		}
 	}
+}
+
+int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
+{
+	int r;
+
+	r = kvm_create_vm(kvm);
+	if (r < 0)
+		return r;
+	r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
+	if (r < 0)
+		return r;
+	kvm_create_irqchip(kvm);
 	r = kvm_create_vcpu(kvm, 0);
 	if (r < 0)
 		return r;
@@ -558,6 +583,32 @@ void *kvm_create_phys_mem(kvm_context_t 
 log, writable);
 }
 
+int kvm_register_userspace_phys_mem(kvm_context_t kvm,
+			unsigned long phys_start, void *userspace_addr,
+			unsigned long len, int slot, int log)
+{
+	struct kvm_userspace_memory_region memory = {
+		.slot = slot,
+		.memory_size = len,
+		.guest_phys_addr = phys_start,
+		.userspace_addr = (intptr_t)userspace_addr,
+		.flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
+	};
+	int r;
+
+	if (!kvm->physical_memory)
+		kvm->physical_memory = userspace_addr - phys_start;
+
+	r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
+	if (r == -1) {
+		fprintf(stderr, 

Re: [kvm-devel] [patch] kvmctl.c: allow custom memory setup.

2007-10-18 Thread Gerd Hoffmann
Dor Laor wrote:
> Gerd Hoffmann wrote:
>> I don't see how I can pass a pointer to kvm_create_userspace_memory()
>> via kvm_create() without breaking the libkvm interface.  There is no
>> flags field or similar which could be used to signal "vm_mem is a valid
>> pointer, please use that instead of mmap()'ing anonymous memory".

> There is no need to keep binary compat to libkvm as long as it is linked
> statically.

What about source compatibility?

cheers,
  Gerd



-
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch] kvmctl.c: allow custom memory setup.

2007-10-18 Thread Gerd Hoffmann
Izik Eidus wrote:
> hi,
> why not making kvm_create_userspace_memory() recive a pointer to a
> userspace allocated memory (that was allocated from file or from normal
> malloc)
> and make all the changes before kvm_create_userspace_memory() get called?

I don't see how I can pass a pointer to kvm_create_userspace_memory()
via kvm_create() without breaking the libkvm interface.  There is no
flags field or similar which could be used to signal "vm_mem is a valid
pointer, please use that instead of mmap()'ing anonymous memory".

Oh, and I also don't want special the treatment of the 0xa ->
0xf memory window.

cheers,
  Gerd


-
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [patch] kvmctl.c: allow custom memory setup.

2007-10-17 Thread Gerd Hoffmann
Gerd Hoffmann wrote:
> I've made kvm_create() optionally skip the memory setup, so I can create
> my own later on.  That doesn't work though because creating the vcpu
> fails then.

Ugh, vmx grabs last 4 pages from slot 0 (looks like for real mode
emulation).  Thus memory must exist before creating the vcpu.  Which
makes fitting filemap-backed memory into the current libkvm init
procedure a bit difficuilt.

I've decided to split the kvm_create() into a bunch of pieces which can
be called as needed, so I can first create the vm, then do my custom
memory setup, then create the vcpu without making vmx unhappy ...

cheers,
  Gerd

* split kvm_create() into smaller pieces which can be individually
  called if needed.
* add kvm_register_userspace_phys_mem() function.
---
 user/kvmctl.c |   61 +-
 user/kvmctl.h |8 +++
 2 files changed, 64 insertions(+), 5 deletions(-)

Index: kvm-46/user/kvmctl.c
===
--- kvm-46.orig/user/kvmctl.c
+++ kvm-46/user/kvmctl.c
@@ -427,12 +427,9 @@ int kvm_alloc_userspace_memory(kvm_conte
 	return 0;
 }
 
-int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
+int kvm_create_vm(kvm_context_t kvm)
 {
-	unsigned long memory = (phys_mem_bytes + PAGE_SIZE - 1) & PAGE_MASK;
 	int fd = kvm->fd;
-	int zfd;
-	int r;
 
 	kvm->vcpu_fd[0] = -1;
 
@@ -442,6 +439,15 @@ int kvm_create(kvm_context_t kvm, unsign
 		return -1;
 	}
 	kvm->vm_fd = fd;
+	return 0;
+}
+
+int kvm_create_default_phys_mem(kvm_context_t kvm, unsigned long phys_mem_bytes,
+void **vm_mem)
+{
+	unsigned long memory = (phys_mem_bytes + PAGE_SIZE - 1) & PAGE_MASK;
+	int zfd;
+	int r;
 
 	r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
 	if (r > 0)
@@ -457,18 +463,37 @@ int kvm_create(kvm_context_t kvm, unsign
 close(zfd);
 
 	kvm->physical_memory = *vm_mem;
+	return 0;
+}
+
+void kvm_create_irqchip(kvm_context_t kvm)
+{
+	int r;
 
 	kvm->irqchip_in_kernel = 0;
 	if (!kvm->no_irqchip_creation) {
 		r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_IRQCHIP);
 		if (r > 0) {	/* kernel irqchip supported */
-			r = ioctl(fd, KVM_CREATE_IRQCHIP);
+			r = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP);
 			if (r >= 0)
 kvm->irqchip_in_kernel = 1;
 			else
 printf("Create kernel PIC irqchip failed\n");
 		}
 	}
+}
+
+int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem)
+{
+	int r;
+
+	r = kvm_create_vm(kvm);
+	if (r < 0)
+		return r;
+	r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem);
+	if (r < 0)
+		return r;
+	kvm_create_irqchip(kvm);
 	r = kvm_create_vcpu(kvm, 0);
 	if (r < 0)
 		return r;
@@ -558,6 +583,32 @@ void *kvm_create_phys_mem(kvm_context_t 
 log, writable);
 }
 
+int kvm_register_userspace_phys_mem(kvm_context_t kvm,
+			unsigned long phys_start, void *userspace_addr,
+			unsigned long len, int slot, int log)
+{
+	struct kvm_userspace_memory_region memory = {
+		.slot = slot,
+		.memory_size = len,
+		.guest_phys_addr = phys_start,
+		.userspace_addr = (intptr_t)userspace_addr,
+		.flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
+	};
+	int r;
+
+	if (!kvm->physical_memory)
+		kvm->physical_memory = userspace_addr - phys_start;
+
+	r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
+	if (r == -1) {
+		fprintf(stderr, "create_userspace_phys_mem: %s", strerror(errno));
+		return -1;
+	}
+
+	kvm_userspace_memory_region_save_params(kvm, &memory);
+return 0;
+}
+
 /* destroy/free a whole slot.
  * phys_start, len and slot are the params passed to kvm_create_phys_mem()
  */
Index: kvm-46/user/kvmctl.h
===
--- kvm-46.orig/user/kvmctl.h
+++ kvm-46/user/kvmctl.h
@@ -137,6 +137,11 @@ int kvm_get_shadow_pages(kvm_context_t k
 int kvm_create(kvm_context_t kvm,
 	   unsigned long phys_mem_bytes,
 	   void **phys_mem);
+int kvm_create_vm(kvm_context_t kvm);
+int kvm_create_default_phys_mem(kvm_context_t kvm, unsigned long phys_mem_bytes,
+void **vm_mem);
+void kvm_create_irqchip(kvm_context_t kvm);
+
 /*!
  * \brief Create a new virtual cpu
  *
@@ -404,6 +409,9 @@ void *kvm_create_phys_mem(kvm_context_t,
 			  unsigned long len, int slot, int log, int writable);
 void kvm_destroy_phys_mem(kvm_context_t, unsigned long phys_start, 
 			  unsigned long len, int slot);
+int kvm_register_userspace_phys_mem(kvm_context_t kvm,
+			unsigned long phys_start, void *userspace_addr,
+			unsigned long len, int slot, int log);
 int kvm_get_dirty_pages(kvm_context_t, int slot, void *buf);
 
 
-
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browse

Re: [kvm-devel] [RFC] Paravirt timer for KVM

2007-10-16 Thread Gerd Hoffmann
Hollis Blanchard wrote:
> That's a good point, but does PIT/HPET emulation show up as a hot spot
> in any profiles? I think keeping the hypercall API as small as feasible
> is a desirable design goal.

pit probably doesn't due to being rarely updated.  For hpet I'd expect
it showing up much more.  Asking for current time is a quite frequent
operation in the linux kernel.  I've had X86_CR4_TSD set for a while and
saw *lots* of rdtsc traps then ...

cheers,
  Gerd

-
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [RFC] Paravirt timer for KVM

2007-10-16 Thread Gerd Hoffmann
Glauber de Oliveira Costa wrote:
> Well, my previous understanding was that if the CPU marks the tsc as
> stable, it _won't_ stop even in C3, and it was done this way exactly
> to make sure there are a stable source for timing.

Other way around:  Kernel can mark the TSC unstable if it figures it
does not tick on a constant rate (or if the kernel knows for certain
hardware it will not ...).

C3 on Intel hardware seems to be a known unstable case:

zweiblum kraxel ~# grep -A2 -B2 C3
/export/git/linux-2.6/arch/x86_64/kernel/tsc.c
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
#ifdef CONFIG_ACPI
/* But TSC doesn't tick in C3 so don't use it there */
if (acpi_gbl_FADT.header.length > 0 &&
acpi_gbl_FADT.C3latency < 1000)
return 1;
#endif


> But it does not really matter. We have two options:
> * Not considering tsc stable at all if sleeps, and then using the tsc
> just for adjustments.

Using the tsc for adjustments should do fine.

Xen updates the guest paravirt time with both systime and corrosponding
tsc timestamp, and the guest then uses the tsc delta to get more precise
time without having to ask the hypervisor each time.

If we take that scheme and additionally take care to update paravirt
time before re-running the guest after it went sleep for a while (and
thus the host might have been in C3) we should be set, right?

> * Considering the tsc stable, but taking the time the cpu spend
> sleeping into account, and returning some sorf of "return tsc +
> total_time_spent_sleeping_in_so_deep_sleep_states_such_as_C3" . I
> specially liked the naming.

I suspect that scheme would quickly becomes quite complex.  For starters
consider that the TSC-stops-in-C3 behavior seems to apply to Intel
Hardware only ...

cheers,
  Gerd



-
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [ANNOUNCE] kvm-46 release

2007-10-15 Thread Gerd Hoffmann
Izik Eidus wrote:
> can you send me your kvmctl.c file so i will look at it?
> it is somewhat hard to me understand how you did the things that way

I'm trying to back a vm with memory coming from a file map, see patch
attached against kvm-46 (was also attached earlier in this thread).

I've made kvm_create() optionally skip the memory setup, so I can create
my own later on.  That doesn't work though because creating the vcpu
fails then.

cheers,
  Gerd
* make kvm_create() skip all memory setup in case phys_mem_bytes == 0
* add kvm_register_userspace_phys_mem() to register any userspace memory
  as guest physical memory.

Signed-off-by: Gerd Hoffmann <[EMAIL PROTECTED]>
---
 user/kvmctl.c |   53 +
 user/kvmctl.h |3 +++
 2 files changed, 44 insertions(+), 12 deletions(-)

Index: kvm-46/user/kvmctl.c
===
--- kvm-46.orig/user/kvmctl.c
+++ kvm-46/user/kvmctl.c
@@ -443,20 +443,22 @@ int kvm_create(kvm_context_t kvm, unsign
 	}
 	kvm->vm_fd = fd;
 
-	r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
-	if (r > 0)
-		r = kvm_alloc_userspace_memory(kvm, memory, vm_mem);
-	else
-		r = kvm_alloc_kernel_memory(kvm, memory, vm_mem);
-	if (r < 0)
-		return r;
+	if (phys_mem_bytes) {
+		r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
+		if (r > 0)
+			r = kvm_alloc_userspace_memory(kvm, memory, vm_mem);
+		else
+			r = kvm_alloc_kernel_memory(kvm, memory, vm_mem);
+		if (r < 0)
+			return r;
 
-zfd = open("/dev/zero", O_RDONLY);
-mmap(*vm_mem + 0xa8000, 0x8000, PROT_READ|PROT_WRITE,
- MAP_PRIVATE|MAP_FIXED, zfd, 0);
-close(zfd);
+	zfd = open("/dev/zero", O_RDONLY);
+	mmap(*vm_mem + 0xa8000, 0x8000, PROT_READ|PROT_WRITE,
+	 MAP_PRIVATE|MAP_FIXED, zfd, 0);
+	close(zfd);
 
-	kvm->physical_memory = *vm_mem;
+		kvm->physical_memory = *vm_mem;
+	}
 
 	kvm->irqchip_in_kernel = 0;
 	if (!kvm->no_irqchip_creation) {
@@ -558,6 +560,33 @@ void *kvm_create_phys_mem(kvm_context_t 
 log, writable);
 }
 
+int kvm_register_userspace_phys_mem(kvm_context_t kvm,
+			unsigned long phys_start, void *userspace_addr,
+			unsigned long len, int slot, int log)
+{
+	int r;
+	struct kvm_userspace_memory_region memory = {
+		.slot = slot,
+		.memory_size = len,
+		.guest_phys_addr = phys_start,
+		.userspace_addr = (intptr_t)userspace_addr,
+		.flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
+	};
+
+	if (!kvm->physical_memory)
+		kvm->physical_memory = userspace_addr - phys_start;
+
+	r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
+	if (r == -1) {
+		fprintf(stderr, "create_userspace_phys_mem: %s", strerror(errno));
+		return -1;
+	}
+
+	kvm_userspace_memory_region_save_params(kvm, &memory);
+
+	return 0;
+}
+
 /* destroy/free a whole slot.
  * phys_start, len and slot are the params passed to kvm_create_phys_mem()
  */
Index: kvm-46/user/kvmctl.h
===
--- kvm-46.orig/user/kvmctl.h
+++ kvm-46/user/kvmctl.h
@@ -404,6 +404,9 @@ void *kvm_create_phys_mem(kvm_context_t,
 			  unsigned long len, int slot, int log, int writable);
 void kvm_destroy_phys_mem(kvm_context_t, unsigned long phys_start, 
 			  unsigned long len, int slot);
+int kvm_register_userspace_phys_mem(kvm_context_t kvm,
+			unsigned long phys_start, void *userspace_addr,
+			unsigned long len, int slot, int log);
 int kvm_get_dirty_pages(kvm_context_t, int slot, void *buf);
 
 
-
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [ANNOUNCE] kvm-46 release

2007-10-15 Thread Gerd Hoffmann
Izik Eidus wrote:
> On Mon, 2007-10-15 at 13:00 +0200, Gerd Hoffmann wrote:
>> Gerd Hoffmann wrote:
>>
>>> Something like this? (compiles, not tested yet).
>> Hmm, no-go, results in "kvm_create_vcpu: Cannot allocate memory".
> 
> try use slot bigger bigger than 5 and smaller than 8

It doesn't come that far, kvm_create(&foo, 0, NULL) fails, seems doing
vcpu_create() before memory setup doesn't work.

cheers,
  Gerd



-
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [ANNOUNCE] kvm-46 release

2007-10-15 Thread Gerd Hoffmann
Gerd Hoffmann wrote:

> Something like this? (compiles, not tested yet).

Hmm, no-go, results in "kvm_create_vcpu: Cannot allocate memory".

cheers,
  Gerd

-
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [ANNOUNCE] kvm-46 release

2007-10-15 Thread Gerd Hoffmann
Avi Kivity wrote:
> Gerd Hoffmann wrote:
>> Avi Kivity wrote:
>>   
>>> We've now switched to allocating guest memory in userspace rather than
>>> in the kernel.
>>> 
>> Hmm, a quick glimpse over kvmctl.h doesn't show an obvious way how to
>> use that.  If I want to back vm memory with a file mapping, how can I do
>> that?
>>   
> 
> kvmctl.h doesn't expose an API for that currently, though is should be
> fairly trivial to do so.

Something like this? (compiles, not tested yet).

cheers,
  Gerd

* make kvm_create() skip all memory setup in case phys_mem_bytes == 0
* add kvm_register_userspace_phys_mem() to register any userspace memory
  as guest physical memory.

Signed-off-by: Gerd Hoffmann <[EMAIL PROTECTED]>
---
 user/kvmctl.c |   53 +
 user/kvmctl.h |3 +++
 2 files changed, 44 insertions(+), 12 deletions(-)

Index: kvm-46/user/kvmctl.c
===
--- kvm-46.orig/user/kvmctl.c
+++ kvm-46/user/kvmctl.c
@@ -443,20 +443,22 @@ int kvm_create(kvm_context_t kvm, unsign
 	}
 	kvm->vm_fd = fd;
 
-	r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
-	if (r > 0)
-		r = kvm_alloc_userspace_memory(kvm, memory, vm_mem);
-	else
-		r = kvm_alloc_kernel_memory(kvm, memory, vm_mem);
-	if (r < 0)
-		return r;
+	if (phys_mem_bytes) {
+		r = ioctl(kvm->fd, KVM_CHECK_EXTENSION, KVM_CAP_USER_MEMORY);
+		if (r > 0)
+			r = kvm_alloc_userspace_memory(kvm, memory, vm_mem);
+		else
+			r = kvm_alloc_kernel_memory(kvm, memory, vm_mem);
+		if (r < 0)
+			return r;
 
-zfd = open("/dev/zero", O_RDONLY);
-mmap(*vm_mem + 0xa8000, 0x8000, PROT_READ|PROT_WRITE,
- MAP_PRIVATE|MAP_FIXED, zfd, 0);
-close(zfd);
+	zfd = open("/dev/zero", O_RDONLY);
+	mmap(*vm_mem + 0xa8000, 0x8000, PROT_READ|PROT_WRITE,
+	 MAP_PRIVATE|MAP_FIXED, zfd, 0);
+	close(zfd);
 
-	kvm->physical_memory = *vm_mem;
+		kvm->physical_memory = *vm_mem;
+	}
 
 	kvm->irqchip_in_kernel = 0;
 	if (!kvm->no_irqchip_creation) {
@@ -558,6 +560,33 @@ void *kvm_create_phys_mem(kvm_context_t 
 log, writable);
 }
 
+int kvm_register_userspace_phys_mem(kvm_context_t kvm,
+			unsigned long phys_start, void *userspace_addr,
+			unsigned long len, int slot, int log)
+{
+	int r;
+	struct kvm_userspace_memory_region memory = {
+		.slot = slot,
+		.memory_size = len,
+		.guest_phys_addr = phys_start,
+		.userspace_addr = (intptr_t)userspace_addr,
+		.flags = log ? KVM_MEM_LOG_DIRTY_PAGES : 0,
+	};
+
+	if (!kvm->physical_memory)
+		kvm->physical_memory = userspace_addr - phys_start;
+
+	r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
+	if (r == -1) {
+		fprintf(stderr, "create_userspace_phys_mem: %s", strerror(errno));
+		return -1;
+	}
+
+	kvm_userspace_memory_region_save_params(kvm, &memory);
+
+	return 0;
+}
+
 /* destroy/free a whole slot.
  * phys_start, len and slot are the params passed to kvm_create_phys_mem()
  */
Index: kvm-46/user/kvmctl.h
===
--- kvm-46.orig/user/kvmctl.h
+++ kvm-46/user/kvmctl.h
@@ -404,6 +404,9 @@ void *kvm_create_phys_mem(kvm_context_t,
 			  unsigned long len, int slot, int log, int writable);
 void kvm_destroy_phys_mem(kvm_context_t, unsigned long phys_start, 
 			  unsigned long len, int slot);
+int kvm_register_userspace_phys_mem(kvm_context_t kvm,
+			unsigned long phys_start, void *userspace_addr,
+			unsigned long len, int slot, int log);
 int kvm_get_dirty_pages(kvm_context_t, int slot, void *buf);
 
 
-
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [RFC] Paravirt timer for KVM

2007-10-15 Thread Gerd Hoffmann
Avi Kivity wrote:
> Gerd Hoffmann wrote:
>>   
>>> 2) the TSC would have to be used as a clocksource.  You don't know the 
>>> frequency which is the first problem with using the TSC but some systems 
>>> have a TSC that changes frequencies.
>>> 
>> Also note the tsc may stop ticking if the CPU goes sleep in C3, which
>> IMHO makes the tsc almost useless as clocksource for guests ...
> 
> But the host knows that, right?  So it can update the guest's timebase?

Host should know.  Well, I hope.  Dunno whenever one really can be sure
in all cases given all the different CPUs and tsc implementations.

With VT you can attempt to make that invisible to the guest using the
tsc offset field.  Probably svm can do that too (didn't check docs
though).  kvm-lite can't (what is the status btw?).  Xen "solves" that
by not doing power management *evil grin*.

Nevertheless it is probably much easier to go with pv timers (or maybe
emulate hpet timers).

cheers,
  Gerd

-
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [RFC] Paravirt timer for KVM

2007-10-15 Thread Gerd Hoffmann
  Hi,

> 2) the TSC would have to be used as a clocksource.  You don't know the 
> frequency which is the first problem with using the TSC but some systems 
> have a TSC that changes frequencies.

Also note the tsc may stop ticking if the CPU goes sleep in C3, which
IMHO makes the tsc almost useless as clocksource for guests ...

cheers,
  Gerd


-
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [ANNOUNCE] kvm-46 release

2007-10-12 Thread Gerd Hoffmann
Avi Kivity wrote:
> We've now switched to allocating guest memory in userspace rather than
> in the kernel.

Hmm, a quick glimpse over kvmctl.h doesn't show an obvious way how to
use that.  If I want to back vm memory with a file mapping, how can I do
that?

cheers,
  Gerd


-
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [ANNOUNCE] kvm-43 release

2007-09-24 Thread Gerd Hoffmann
>> I *think* it happens because I'm changing guest page tables from outside
>> (i.e. host application), and the new intel pgtable optimization bits
>> don't expect that (yes, it is a vt box).
> 
> Well, the older implementation doesn't expect that either.  You can
> disable the optimization with bypass_guest_pf=0, though.

Thanks.  Using bypass_guest_pf=0 and irqchip_disable() brings things
back to -41 level of functionality.

kvm not expecting both guest and host app changing pagetables could
explain other problems I'm seeing though, so maybe i better fix that.
Long term it isn't planned to stay that way anyway, it's just handy for
proof-of-concept because userspace is very easy to hack and debug ...

thanks,
  Gerd



-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [ANNOUNCE] kvm-43 release

2007-09-24 Thread Gerd Hoffmann
Avi Kivity wrote:
> You can call kvm_disable_irqchip_creation() to kill in-kernel pic and
> friends.
> 
> (the logic is inverted -- the function should be
> kvm_enable_irqchip_creation() so that this problem would not occur)

Ok, will try, next question first though as I'm running in trouble much
earlier in -44:  I get suspious page faults.

I *think* it happens because I'm changing guest page tables from outside
(i.e. host application), and the new intel pgtable optimization bits
don't expect that (yes, it is a vt box).

A quick scan of kvmctl.h doesn't show anything which looks like I could
call to tell kvm the guest page tables have been modified.  Hints how to
handle this?

thanks,
  Gerd

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [ANNOUNCE] kvm-43 release

2007-09-21 Thread Gerd Hoffmann
Avi Kivity wrote:
> Only one fix, but an important one.  It fixes booting of newer Linux
> versions, which experienced disk and keyboard problems without
> -no-kvm-irqchip.
> 
> As usual, if you have an issue please try with -no-kvm-irqchip and report.

Updated from -41.  Now my libkvm-using-tool's
kvm_callbacks->try_push_interrupts() callback isn't called any more.
Hmm.  Guess due to the lapic/irqchip changes.  Are there any additional
initializations I have to do now?

As there isn't any real pic-like hardware emulated maybe using
try_to_push_interrupts is a bad idea anyway and I should better move the
code (checks state and calls kvm_inject_irq() if needed) to the
pre_kvm_run() callback?

hints anyone?

  Gerd


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [ANNOUNCE] kvm-41 release

2007-09-20 Thread Gerd Hoffmann

> Please, a changelog entry and a signoff.  Obvious patches need them too.

updated patch with description and signed-off attached.

cheers,
  Gerd

Install new include file linux/kvm_para.h.  Without that fix builds
using the installed libkvm fail because kvmctl.h includes kvm_para.h.

Signed-off-by: Gerd Hoffmann <[EMAIL PROTECTED]>
diff -up kvm-41/user/Makefile.hdr kvm-41/user/Makefile
--- kvm-41/user/Makefile.hdr	2007-09-20 11:30:47.0 +0200
+++ kvm-41/user/Makefile	2007-09-20 11:31:10.0 +0200
@@ -55,6 +55,8 @@ install:
 	install -D kvmctl.h $(DESTDIR)/$(PREFIX)/include/kvmctl.h
 	install -D $(KERNELDIR)/include/linux/kvm.h \
 		$(DESTDIR)/$(PREFIX)/include/linux/kvm.h
+	install -D $(KERNELDIR)/include/linux/kvm_para.h \
+		$(DESTDIR)/$(PREFIX)/include/linux/kvm_para.h
 	install -D libkvm.a $(DESTDIR)/$(PREFIX)/$(LIBDIR)/libkvm.a
 
 %.flat: %.o
-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [ANNOUNCE] kvm-41 release

2007-09-20 Thread Gerd Hoffmann
Gerd Hoffmann wrote:
> Avi Kivity wrote:
>> Changes since kvm-40:
>> - refactor hypercall infrastructure for simplicity and better smp 
>> support (Anthony Liguori)
> 
> The new kvm_para.h header file added by (I think) this change isn't
> installed by "make install", making builds using the libkvm installed on
> the system fail.

... and the obvious fix for that.

please apply,

  Gerd
diff -up kvm-41/user/Makefile.hdr kvm-41/user/Makefile
--- kvm-41/user/Makefile.hdr	2007-09-20 11:30:47.0 +0200
+++ kvm-41/user/Makefile	2007-09-20 11:31:10.0 +0200
@@ -55,6 +55,8 @@ install:
 	install -D kvmctl.h $(DESTDIR)/$(PREFIX)/include/kvmctl.h
 	install -D $(KERNELDIR)/include/linux/kvm.h \
 		$(DESTDIR)/$(PREFIX)/include/linux/kvm.h
+	install -D $(KERNELDIR)/include/linux/kvm_para.h \
+		$(DESTDIR)/$(PREFIX)/include/linux/kvm_para.h
 	install -D libkvm.a $(DESTDIR)/$(PREFIX)/$(LIBDIR)/libkvm.a
 
 %.flat: %.o
-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [ANNOUNCE] kvm-41 release

2007-09-20 Thread Gerd Hoffmann
Avi Kivity wrote:
> Changes since kvm-40:
> - refactor hypercall infrastructure for simplicity and better smp 
> support (Anthony Liguori)

The new kvm_para.h header file added by (I think) this change isn't
installed by "make install", making builds using the libkvm installed on
the system fail.

cheers,
  Gerd

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] virtio implementation?

2007-07-18 Thread Gerd Hoffmann
Rusty Russell wrote:
> You mean backend?  For networking it makes a great deal of sense.  For
> block it makes far less sense (COW, weird formats, etc).

For block you probably want both:  userspace driver which can handle all
sorts of funny image files, and a kernel driver doing a 1:1 mapping to a
block device (lvm volume).

cheers,
  Gerd

-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [Xen-devel] Re: More virtio users

2007-06-12 Thread Gerd Hoffmann
   Hi,

> Framebuffer is an interesting one.  Virtio doesn't assume shared memory,
> so naively the fb you would just send outbufs describing changed memory.
> This would work, but describing rectangles is better.  A helper might be
> the right approach here

Rectangles work just fine for a framebuffer console.  They stop working 
once you plan to run any graphical stuff such as an X-Server on top of 
the framebuffer.  Only way to get notified about changes is page faults, 
i.e. 4k granularity on the linear framebuffer memory.

Related to Framebuffer is virtual keyboard and virtual mouse (or better 
touchscreen), which probably works perfectly fine with virtio.  I'd 
guess you can even reuse the input layer event struct for the virtio events.

Xen has virtual framebuffer, kbd & mouse, although not (yet?) in the 
paravirt_ops patch queue, so there is something you can look at ;)

cheers,
   Gerd


-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] install kvm on opensuse10.2

2007-06-01 Thread Gerd Hoffmann
Maria Casale wrote:
> I have got an Intel Core Duo T2400 processor.
> I added the repository: 
> http://software.opensuse.org/download/home:/kraxel/openSUSE_10.2/i386/
> I installed kvm and kvm-kmp. I added kvm modules in etc/sysconfig.
> I ran qemu-kvm and this appeared: Could not initialize KVM, will disable 
> KVM support.
> I typed #modprobe kvm-intel but the module is not present:
> FATAL: Module kvm_intel not found.
> When I type uname -a:
> Linux linux-hb3b 2.6.18.8-0.3-default #1 SMP Tue Apr 17 08:42:35 UTC 
> 2007 i686 i686 i386 GNU/Linux
> 
> How can I run kvm?

Could be the modules don't match the kernel.  gcc-3.3 bombs out with an 
impressive compiler error (see below) on i386, thus the i386 packages 
all are a bit outdated (kvm-17 from dec last year IIRC).  If an update 
kernel was released meanwhile the kvm modules and the kernel have 
different versions ...

cheers,
   Gerd

> gcc-3.3 -I /usr/src/packages/BUILD/kvm-26/qemu/../user -Wall -O2 -g 
> -fno-strict-aliasing -I /usr/src/packages/BUILD/kvm-26/kernel/include 
> -fomit-frame-pointer -I. -I.. 
> -I/usr/src/packages/BUILD/kvm-26/qemu/target-i386 
> -I/usr/src/packages/BUILD/kvm-26/qemu -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 
> -D_LARGEFILE_SOURCE -D__user= -I/usr/src/packages/BUILD/kvm-26/qemu/fpu 
> -DHAS_AUDIO -I/usr/src/packages/BUILD/kvm-26/qemu/slirp  -c -o qemu-kvm.o 
> /usr/src/packages/BUILD/kvm-26/qemu/qemu-kvm.c
> /usr/src/packages/BUILD/kvm-26/qemu/qemu-kvm.c: In function `kvm_cpu_exec':
> /usr/src/packages/BUILD/kvm-26/qemu/qemu-kvm.c:472: warning: implicit 
> declaration of function `exit'
> /usr/src/packages/BUILD/kvm-26/qemu/qemu-kvm.c: In function `kvm_shutdown':
> /usr/src/packages/BUILD/kvm-26/qemu/qemu-kvm.c:624: warning: implicit 
> declaration of function `qemu_system_reset_request'
> /usr/src/packages/BUILD/kvm-26/qemu/qemu-kvm.c: In function 
> `kvm_qemu_init_env':
> /usr/src/packages/BUILD/kvm-26/qemu/qemu-kvm.c:804: error: unable to find a 
> register to spill in class `SIREG'
> /usr/src/packages/BUILD/kvm-26/qemu/qemu-kvm.c:804: error: this is the insn:
> (insn:HI 37 35 38 0 0x40514478 (parallel [
> (set (reg:SI 67)
> (const_int 0 [0x0]))
> (set (reg/f:SI 0 eax [64])
> (plus:SI (ashift:SI (reg:SI 2 ecx [66])
> (const_int 2 [0x2]))
> (reg/f:SI 0 eax [64])))
> (set (reg/f:SI 2 ecx [orig:65 cenv ] [65])
> (plus:SI (ashift:SI (reg:SI 2 ecx [66])
> (const_int 2 [0x2]))
> (reg/v/f:SI 1 edx [orig:59 cenv ] [59])))
> (set (mem/s:BLK (reg/f:SI 0 eax [64]) [0 copy+0 S25712 A128])
> (mem/s:BLK (reg/v/f:SI 1 edx [orig:59 cenv ] [59]) [0 S25712 
> A128]))
> (use (reg:SI 2 ecx [66]))
> (use (reg:SI 19 dirflag))
> ]) 464 {rep_movsi} (insn_list 3 (insn_list 32 (insn_list 34 
> (insn_list 35 (nil)
> (expr_list:REG_DEAD (reg:SI 19 dirflag)
> (expr_list:REG_DEAD (reg:SI 2 ecx [66])
> (expr_list:REG_UNUSED (reg/f:SI 2 ecx [orig:65 cenv ] [65])
> (expr_list:REG_UNUSED (reg/f:SI 0 eax [64])
> (expr_list:REG_UNUSED (reg:SI 67)
> (nil)))
> /usr/src/packages/BUILD/kvm-26/qemu/qemu-kvm.c:804: confused by earlier 
> errors, bailing out


-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [ANNOUNCE] kvm-15 release

2007-02-26 Thread Gerd Hoffmann
Avi Kivity wrote:
> - new userspace interface (work in progress)

kvmfs in kvm-15 kernel code does not to build with older kernels (2.6.16
fails, 2.6.18 works ok), looks like the reason are some changes in
superblock handling.

Do you intend to fix that?

cheers,
  Gerd

-- 
Gerd Hoffmann <[EMAIL PROTECTED]>

-
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [ANNOUNCE] kvm userspace release 6

2006-12-12 Thread Gerd Hoffmann
  Hi,

> I'm not sure what you mean. The -nographic option doesn't help, and Suse
> doesn't even come far enough to switch to graphics mode. It dies in the
> bootloader.

both isolinux and grub fail to switch into gfx mode, which likely is
caused by either incomplete real mode emulation or big real mode.

Workaround for grub: just turn off the eye candy, comment the gfxmenu
line in /boot/grub/menu.lst.

Kicking the installation is a bit harder as you can't just go and edit
the isolinux.cfg on the cdrom.  One way is to use normal qemu (without
kvm acceleration) for installation and fixing up menu.lst, then go ahead
with the kvm version ...

reproduce: fetch the boot iso[1], boot with "-cdrom /path/to/mini.iso
-boot d"

cheers,
  Gerd

[1]
http://ftp.opensuse.org/pub/opensuse/distribution/10.2/iso/cd/openSUSE-10.2-GM-i386-mini.iso

-- 
Gerd Hoffmann <[EMAIL PROTECTED]>

-
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel