Unable to get MementOS booting

2009-10-14 Thread Ubitux
Hi,

I'm not able to get MementOS booting while using kvm modules. QEmu
hangs on Floppy boot. Here is the procedure:

cd /tmp
wget -c 'http://www.menuetos.be/download.php?CurrentMenuetOS' -O menuetos.zip
unzip -u menuetos.zip
qemu-kvm -m 512 -fda M64-*.IMG -boot a

I have a intel i7 920, I use kvm-88, kernel 2.6.31.4, x86_64, 64 bits
release of MenuetOS.
-no-kvm-irqchip or -no-kvm-pit don't solve the issue, but -no-kvm does.


-- 
ubitux
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH][RFC] Xen PV-on-HVM guest support

2009-10-14 Thread Jan Kiszka
Ed Swierk wrote:
 As we discussed a while back, support for Xen PV-on-HVM guests can be
 implemented almost entirely in userspace, except for handling one
 annoying MSR that maps a Xen hypercall blob into guest address space.
 
 A generic mechanism to delegate MSR writes to userspace seems overkill
 and risks encouraging similar MSR abuse in the future.  Thus this patch
 adds special support for the Xen HVM MSR.
 
 At Avi's suggestion[1] I implemented a new ioctl, KVM_XEN_HVM_CONFIG,
 that lets userspace tell KVM which MSR the guest will write to, as well
 as the starting address and size of the hypercall blobs (one each for
 32-bit and 64-bit) that userspace has loaded from files.  When the guest
 writes to the MSR, KVM copies one page of the blob from userspace to the
 guest.
 
 I've tested this patch against a hacked-up version of Gerd's userspace
 code[2]; I'm happy to share those hacks if anyone is interested.
 
 [1] http://www.mail-archive.com/kvm@vger.kernel.org/msg16065.html
 [2]
 http://git.et.redhat.com/?p=qemu-kraxel.git;a=log;h=refs/heads/xenner.v5
 
 Signed-off-by: Ed Swierk eswi...@aristanetworks.com
 
 ---
 diff -BurN a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h
 --- a/include/asm-x86/kvm.h   2009-10-13 20:40:55.0 -0700
 +++ b/include/asm-x86/kvm.h   2009-10-13 20:21:07.0 -0700
 @@ -59,6 +59,7 @@
  #define __KVM_HAVE_MSIX
  #define __KVM_HAVE_MCE
  #define __KVM_HAVE_PIT_STATE2
 +#define __KVM_HAVE_XEN_HVM
  
  /* Architectural interrupt line count. */
  #define KVM_NR_INTERRUPTS 256
 diff -BurN a/include/linux/kvm.h b/include/linux/kvm.h
 --- a/include/linux/kvm.h 2009-10-13 20:40:55.0 -0700
 +++ b/include/linux/kvm.h 2009-10-13 20:21:26.0 -0700
 @@ -476,6 +476,9 @@
  #endif
  #define KVM_CAP_IOEVENTFD 36
  #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
 +#ifdef __KVM_HAVE_XEN_HVM
 +#define KVM_CAP_XEN_HVM 90
 +#endif

When submitting for merge, I would close this gab in the CAP number space.

  
  #ifdef KVM_CAP_IRQ_ROUTING
  
 @@ -528,6 +531,14 @@
  };
  #endif
  
 +#ifdef KVM_CAP_XEN_HVM
 +struct kvm_xen_hvm_config {
 + __u32 msr;
 + __u64 blob_addr[2];
 + __u8 blob_size[2];

This needs padding to achieve a stable layout across 32 and 64 bit.

 +};
 +#endif
 +
  #define KVM_IRQFD_FLAG_DEASSIGN (1  0)
  
  struct kvm_irqfd {
 @@ -586,6 +597,7 @@
  #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct 
 kvm_pit_config)
  #define KVM_SET_BOOT_CPU_ID_IO(KVMIO, 0x78)
  #define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
 +#define KVM_XEN_HVM_CONFIG_IOW(KVMIO, 0xa1, struct 
 kvm_xen_hvm_config)

Also here: next is 0x7a.

  
  /*
   * ioctls for vcpu fds
 diff -BurN a/include/linux/kvm_host.h b/include/linux/kvm_host.h
 --- a/include/linux/kvm_host.h2009-10-13 20:40:55.0 -0700
 +++ b/include/linux/kvm_host.h2009-10-13 20:27:03.0 -0700
 @@ -236,6 +236,10 @@
   unsigned long mmu_notifier_seq;
   long mmu_notifier_count;
  #endif
 +
 +#ifdef KVM_CAP_XEN_HVM
 + struct kvm_xen_hvm_config xen_hvm_config;
 +#endif
  };
  
  /* The guest did something we don't support. */
 diff -BurN a/x86/x86.c b/x86/x86.c
 --- a/x86/x86.c   2009-10-13 20:40:58.0 -0700
 +++ b/x86/x86.c   2009-10-13 20:33:49.0 -0700
 @@ -875,6 +875,33 @@
   return 0;
  }
  
 +#ifdef KVM_CAP_XEN_HVM
 +static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
 +{
 + int blob = !!(vcpu-arch.shadow_efer  EFER_LME);
 + u32 pnum = data  ~PAGE_MASK;
 + u64 paddr = data  PAGE_MASK;
 + u8 *page;
 + int r = 1;
 + printk(KERN_INFO kvm: loading xen hvm blob %d page %d at %llx\n,
 +blob, pnum, paddr);

Debugging left-over? And please insert a blank line after the variable
block.

 + if (pnum = vcpu-kvm-xen_hvm_config.blob_size[blob])
 + goto out;
 + page = kzalloc(PAGE_SIZE, GFP_KERNEL);
 + if (!page)
 + goto out;
 + if (copy_from_user(page, (u8 *)vcpu-kvm-xen_hvm_config.blob_addr[blob]
 ++ pnum * PAGE_SIZE, PAGE_SIZE))
 + goto out_free;
 + kvm_write_guest(vcpu-kvm, paddr, page, PAGE_SIZE);

This function returns an error code. Not interested in it?

 + r = 0;
 +out_free:
 + kfree(page);
 +out:
 + return r;
 +}
 +#endif
 +
  int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
  {
   switch (msr) {
 @@ -990,6 +1017,10 @@
   0x%x data 0x%llx\n, msr, data);
   break;
   default:
 +#ifdef KVM_CAP_XEN_HVM
 + if (msr  (msr == vcpu-kvm-xen_hvm_config.msr))
 + return xen_hvm_config(vcpu, data);
 +#endif
   if (!ignore_msrs) {
   pr_unimpl(vcpu, unhandled wrmsr: 0x%x data %llx\n,
   msr, data);
 @@ -2453,6 +2484,17 @@
   r = 0;
   break;
   }
 +#ifdef KVM_CAP_XEN_HVM
 + case 

[PATCH] qemu-kvm: x86: Add support for NMI states

2009-10-14 Thread Jan Kiszka
This adds the required bit to retrieve and set the so far hidden NMI
pending and NMI masked states of the KVM kernel side. It also extends
CPU VMState for proper saving/restoring. We can now savely reset a VM
while NMIs are on the fly, and we can live migrate etc. too.

Fortunately, the probability that this deficit bit normal VMs in
practice was very low.

Signed-off-by: Jan Kiszka jan.kis...@siemens.com
---

 qemu-kvm-x86.c|   52 +
 target-i386/cpu.h |1 +
 target-i386/machine.c |1 +
 3 files changed, 54 insertions(+), 0 deletions(-)

diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index acb1b91..86fd341 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -901,6 +901,53 @@ static void get_seg(SegmentCache *lhs, const struct 
kvm_segment *rhs)
| (rhs-avl * DESC_AVL_MASK);
 }
 
+static void kvm_get_nmi_state(CPUState *env)
+{
+#ifdef KVM_CAP_VCPU_STATE
+kvm_vcpu_context_t vcpu = env-kvm_cpu_state.vcpu_ctx;
+struct {
+struct kvm_vcpu_state header;
+struct kvm_vcpu_substate substates[1];
+} request;
+struct kvm_nmi_state nmi_state;
+int r;
+
+request.header.nsubstates = 1;
+request.header.substates[0].type = KVM_X86_VCPU_NMI;
+request.header.substates[0].offset = (size_t)nmi_state - (size_t)request;
+r = ioctl(vcpu-fd, KVM_GET_VCPU_STATE, request);
+if (r == 0) {
+env-nmi_pending = nmi_state.pending;
+if (nmi_state.masked) {
+env-hflags2 |= HF2_NMI_MASK;
+} else {
+env-hflags2 = ~HF2_NMI_MASK;
+}
+}
+#endif
+env-nmi_pending = 0;
+env-hflags2 = ~HF2_NMI_MASK;
+}
+
+static void kvm_set_nmi_state(CPUState *env)
+{
+#ifdef KVM_CAP_VCPU_STATE
+kvm_vcpu_context_t vcpu = env-kvm_cpu_state.vcpu_ctx;
+struct {
+struct kvm_vcpu_state header;
+struct kvm_vcpu_substate substates[1];
+} request;
+struct kvm_nmi_state nmi_state;
+
+request.header.nsubstates = 1;
+request.header.substates[0].type = KVM_X86_VCPU_NMI;
+request.header.substates[0].offset = (size_t)nmi_state - (size_t)request;
+nmi_state.pending = env-nmi_pending;
+nmi_state.masked = !!(env-hflags2  HF2_NMI_MASK);
+ioctl(vcpu-fd, KVM_SET_VCPU_STATE, request);
+#endif
+}
+
 void kvm_arch_load_regs(CPUState *env)
 {
 struct kvm_regs regs;
@@ -1010,6 +1057,8 @@ void kvm_arch_load_regs(CPUState *env)
 rc = kvm_set_msrs(env-kvm_cpu_state.vcpu_ctx, msrs, n);
 if (rc == -1)
 perror(kvm_set_msrs FAILED);
+
+kvm_set_nmi_state(env);
 }
 
 void kvm_load_tsc(CPUState *env)
@@ -1195,6 +1244,8 @@ void kvm_arch_save_regs(CPUState *env)
 return;
 }
 }
+
+kvm_get_nmi_state(env);
 }
 
 static void do_cpuid_ent(struct kvm_cpuid_entry2 *e, uint32_t function,
@@ -1438,6 +1489,7 @@ void kvm_arch_push_nmi(void *opaque)
 
 void kvm_arch_cpu_reset(CPUState *env)
 {
+env-nmi_pending = 0;
 kvm_arch_load_regs(env);
 if (!cpu_is_bsp(env)) {
if (kvm_irqchip_in_kernel(kvm_context)) {
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 278d3e3..620822a 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -709,6 +709,7 @@ typedef struct CPUX86State {
 /* For KVM */
 uint64_t interrupt_bitmap[256 / 64];
 uint32_t mp_state;
+uint32_t nmi_pending;
 
 /* in order to simplify APIC support, we leave this pointer to the
user */
diff --git a/target-i386/machine.c b/target-i386/machine.c
index e640dad..5c290f3 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -469,6 +469,7 @@ const VMStateDescription vmstate_cpu = {
 VMSTATE_INT32_V(pending_irq_vmstate, CPUState, 9),
 VMSTATE_UINT32_V(mp_state, CPUState, 9),
 VMSTATE_UINT64_V(tsc, CPUState, 9),
+VMSTATE_UINT32_V(nmi_pending, CPUState, 11),
 /* MCE */
 VMSTATE_UINT64_V(mcg_cap, CPUState, 10),
 VMSTATE_UINT64_V(mcg_status, CPUState, 10),
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] KVM test: Add a kvm subtest guest_s4

2009-10-14 Thread Lucas Meneghel Rodrigues
This test suspends a guest OS to disk, it supports Linux and Windows.

Signed-off-by: Ken Cao k...@redhat.com
Signed-off-by: Yolkfull Chow yz...@redhat.com
---
 client/tests/kvm/kvm_tests.cfg.sample |   16 
 client/tests/kvm/tests/guest_s4.py|   66 +
 2 files changed, 82 insertions(+), 0 deletions(-)
 create mode 100644 client/tests/kvm/tests/guest_s4.py

diff --git a/client/tests/kvm/kvm_tests.cfg.sample 
b/client/tests/kvm/kvm_tests.cfg.sample
index cc3228a..9ccc9b5 100644
--- a/client/tests/kvm/kvm_tests.cfg.sample
+++ b/client/tests/kvm/kvm_tests.cfg.sample
@@ -118,6 +118,15 @@ variants:
 - linux_s3: install setup
 type = linux_s3
 
+- guest_s4:
+type = guest_s4
+check_s4_support_cmd = grep -q disk /sys/power/state
+test_s4_cmd = cd /tmp/;nohup tcpdump -q -t ip host localhost
+check_s4_cmd = pgrep tcpdump
+set_s4_cmd = echo disk  /sys/power/state
+kill_test_s4_cmd = pkill tcpdump
+services_up_timeout = 30
+
 - timedrift:install setup
 extra_params +=  -rtc-td-hack
 variants:
@@ -507,6 +516,13 @@ variants:
 # Alternative host load:
 #host_load_command = dd if=/dev/urandom of=/dev/null
 host_load_instances = 8
+guest_s4:
+check_s4_support_cmd = powercfg /hibernate on
+test_s4_cmd = start /B ping -n 3000 localhost
+check_s4_cmd = tasklist | find /I ping
+set_s4_cmd = rundll32.exe PowrProf.dll, SetSuspendState
+kill_test_s4_cmd = taskkill /IM ping.exe /F
+services_up_timeout = 30
 nic_hotplug:
 reference_cmd = ipconfig /all
 find_pci_cmd = ipconfig /all | find Description
diff --git a/client/tests/kvm/tests/guest_s4.py 
b/client/tests/kvm/tests/guest_s4.py
new file mode 100644
index 000..7147e3b
--- /dev/null
+++ b/client/tests/kvm/tests/guest_s4.py
@@ -0,0 +1,66 @@
+import logging, time
+from autotest_lib.client.common_lib import error
+import kvm_test_utils, kvm_utils
+
+
+def run_guest_s4(test, params, env):
+
+Suspend guest to disk,supports both Linux  Windows OSes.
+
+@param test: kvm test object.
+@param params: Dictionary with test parameters.
+@param env: Dictionary with the test environment.
+
+vm = kvm_test_utils.get_living_vm(env, params.get(main_vm))
+session = kvm_test_utils.wait_for_login(vm)
+
+logging.info(Checking whether guest OS supports suspend to disk (S4))
+status = session.get_command_status(params.get(check_s4_support_cmd))
+if status is None:
+logging.error(Failed to check if guest OS supports S4)
+elif status != 0:
+raise error.TestFail(Guest OS does not support S4)
+
+logging.info(Wait until all guest OS services are fully started)
+time.sleep(params.get(services_up_timeout))
+
+# Start up a program (tcpdump for linux  ping for Windows), as a flag.
+# If the program died after suspend, then fails this testcase.
+test_s4_cmd = params.get(test_s4_cmd)
+session.sendline(test_s4_cmd)
+
+# Get the second session to start S4
+session2 = kvm_test_utils.wait_for_login(vm)
+
+check_s4_cmd = params.get(check_s4_cmd)
+if session2.get_command_status(check_s4_cmd):
+raise error.TestError(Failed to launch '%s' as a background process %
+  test_s4_cmd)
+logging.info(Launched background command in guest: %s % test_s4_cmd)
+
+# Suspend to disk
+logging.info(Start suspend to disk now...)
+session2.sendline(params.get(set_s4_cmd))
+
+if not kvm_utils.wait_for(vm.is_dead, 360, 30, 2):
+raise error.TestFail(VM refuses to go down. Suspend failed)
+logging.info(VM suspended successfully. Wait before booting it again.)
+time.sleep(10)
+
+# Start vm, and check whether the program is still running
+logging.info(Start suspended VM...)
+
+if not vm.create():
+raise error.TestError(Failed to start VM after suspend to disk)
+if not vm.is_alive():
+raise error.TestError(VM seems to be dead after it was suspended)
+
+# Check whether test command still alive
+logging.info(Checking if background command is still alive)
+if session2.get_command_status(check_s4_cmd):
+raise error.TestFail(Command %s failed. S4 failed % test_s4_cmd)
+
+logging.info(VM resumed successfuly after suspend to disk)
+session2.sendline(params.get(kill_test_s4_cmd))
+session.close()
+session2.close()
-- 
1.6.2.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Autotest] [PATCH] Add a kvm test guest_s4 which supports both Linux and Windows platform

2009-10-14 Thread Lucas Meneghel Rodrigues
On Tue, Oct 13, 2009 at 11:54 PM, Yolkfull Chow yz...@redhat.com wrote:
 On Tue, Oct 13, 2009 at 05:29:40PM -0300, Lucas Meneghel Rodrigues wrote:
 Hi Yolkfull and Chen:

 Thanks for your test! I have some comments and doubts to clear, most
 of them are about content of the messages delivered for the user and
 some other details.

 On Sun, Sep 27, 2009 at 6:11 AM, Yolkfull Chow yz...@redhat.com wrote:
  For this case, Ken Cao wrote the linux part previously and I did extensive
  modifications on Windows platform support.
 
  Signed-off-by: Ken Cao k...@redhat.com
  Signed-off-by: Yolkfull Chow yz...@redhat.com
  ---
   client/tests/kvm/kvm_tests.cfg.sample |   14 +++
   client/tests/kvm/tests/guest_s4.py    |   66 
  +
   2 files changed, 80 insertions(+), 0 deletions(-)
   create mode 100644 client/tests/kvm/tests/guest_s4.py
 
  diff --git a/client/tests/kvm/kvm_tests.cfg.sample 
  b/client/tests/kvm/kvm_tests.cfg.sample
  index 285a38f..f9ecb61 100644
  --- a/client/tests/kvm/kvm_tests.cfg.sample
  +++ b/client/tests/kvm/kvm_tests.cfg.sample
  @@ -94,6 +94,14 @@ variants:
      - linux_s3:     install setup
          type = linux_s3
 
  +    - guest_s4:
  +        type = guest_s4
  +        check_s4_support_cmd = grep -q disk /sys/power/state
  +        test_s4_cmd = cd /tmp/;nohup tcpdump -q -t ip host localhost
  +        check_s4_cmd = pgrep tcpdump
  +        set_s4_cmd = echo disk  /sys/power/state
  +        kill_test_s4_cmd = pkill tcpdump
  +
      - timedrift:    install setup
          type = timedrift
          extra_params +=  -rtc-td-hack
  @@ -382,6 +390,12 @@ variants:
              # Alternative host load:
              #host_load_command = dd if=/dev/urandom of=/dev/null
              host_load_instances = 8
  +        guest_s4:
  +            check_s4_support_cmd = powercfg /hibernate on
  +            test_s4_cmd = start /B ping -n 3000 localhost
  +            check_s4_cmd = tasklist | find /I ping
  +            set_s4_cmd = rundll32.exe PowrProf.dll, SetSuspendState
  +            kill_test_s4_cmd = taskkill /IM ping.exe /F
 
          variants:
              - Win2000:
  diff --git a/client/tests/kvm/tests/guest_s4.py 
  b/client/tests/kvm/tests/guest_s4.py
  new file mode 100644
  index 000..5d8fbdf
  --- /dev/null
  +++ b/client/tests/kvm/tests/guest_s4.py
  @@ -0,0 +1,66 @@
  +import logging, time
  +from autotest_lib.client.common_lib import error
  +import kvm_test_utils, kvm_utils
  +
  +
  +def run_guest_s4(test, params, env):
  +    
  +    Suspend guest to disk,supports both Linux  Windows OSes.
  +
  +   �...@param test: kvm test object.
  +   �...@param params: Dictionary with test parameters.
  +   �...@param env: Dictionary with the test environment.
  +    
  +    vm = kvm_test_utils.get_living_vm(env, params.get(main_vm))
  +    session = kvm_test_utils.wait_for_login(vm)
  +
  +    logging.info(Checking whether VM supports S4)
  +    status = 
  session.get_command_status(params.get(check_s4_support_cmd))
  +    if status is None:
  +        logging.error(Failed to check if S4 exists)
  +    elif status != 0:
  +        raise error.TestFail(Guest does not support S4)
  +
  +    logging.info(Waiting for a while for X to start...)

 Yes, generally X starts a bit later than the SSH service, so I
 understand the time being here, however:

  * In fact we are waiting for all services of the guest to be up and
 functional, so depending on the level of load, I don't think 10s is
 gonna make it. So I suggest something = 30s

 Yeah,reasonable, we did ignore the circumstance with workload. But as
 you metioned,it can depend on different level of workload, therefore 30s
 may be not enough as well. Your idea that write a utility function
 waiting for some services up is good I think, thus it could be something
 like:

 def wait_services_up(services_list):
    ...

 and for this case:

 wait_services_up([Xorg]) for Linux and
 wait_services_up([explore.exe]) for Windows.

Ok, sounds good to me!

  * It's also true that just wait for a given time and hope that it
 will be OK kinda sucks, so ideally we need to write utility functions
 to stablish as well as possible when all services of a host are fully
 booted up. Stated this way, it looks simple, but it's not.

 Autotest experience suggests that there's no real sane way to
 determine when a linux box is booted up, but we can take a
 semi-rational approach and verify if all services for the current run
 level have the status up or a similar approach. For windows, I was
 talking to Yaniv Kaul and it seems that processing the output of the
 'sc query' command might give what we want. Bottom line, I'd like to
 add a TODO item, and write a function to stablish (fairly confidently)
 that a windows/linux guest is booted up.

  +    time.sleep(10)
  +
  +    # Start up a program(tcpdump for linux OS  ping for M$ OS), as a 
  flag.
  +    # If the program died after suspend, then fails this 

Re: [Autotest] [PATCH] Add a kvm test guest_s4 which supports both Linux and Windows platform

2009-10-14 Thread Yolkfull Chow
On Wed, Oct 14, 2009 at 06:58:01AM -0300, Lucas Meneghel Rodrigues wrote:
 On Tue, Oct 13, 2009 at 11:54 PM, Yolkfull Chow yz...@redhat.com wrote:
  On Tue, Oct 13, 2009 at 05:29:40PM -0300, Lucas Meneghel Rodrigues wrote:
  Hi Yolkfull and Chen:
 
  Thanks for your test! I have some comments and doubts to clear, most
  of them are about content of the messages delivered for the user and
  some other details.
 
  On Sun, Sep 27, 2009 at 6:11 AM, Yolkfull Chow yz...@redhat.com wrote:
   For this case, Ken Cao wrote the linux part previously and I did 
   extensive
   modifications on Windows platform support.
  
   Signed-off-by: Ken Cao k...@redhat.com
   Signed-off-by: Yolkfull Chow yz...@redhat.com
   ---
    client/tests/kvm/kvm_tests.cfg.sample |   14 +++
    client/tests/kvm/tests/guest_s4.py    |   66 
   +
    2 files changed, 80 insertions(+), 0 deletions(-)
    create mode 100644 client/tests/kvm/tests/guest_s4.py
  
   diff --git a/client/tests/kvm/kvm_tests.cfg.sample 
   b/client/tests/kvm/kvm_tests.cfg.sample
   index 285a38f..f9ecb61 100644
   --- a/client/tests/kvm/kvm_tests.cfg.sample
   +++ b/client/tests/kvm/kvm_tests.cfg.sample
   @@ -94,6 +94,14 @@ variants:
       - linux_s3:     install setup
           type = linux_s3
  
   +    - guest_s4:
   +        type = guest_s4
   +        check_s4_support_cmd = grep -q disk /sys/power/state
   +        test_s4_cmd = cd /tmp/;nohup tcpdump -q -t ip host localhost
   +        check_s4_cmd = pgrep tcpdump
   +        set_s4_cmd = echo disk  /sys/power/state
   +        kill_test_s4_cmd = pkill tcpdump
   +
       - timedrift:    install setup
           type = timedrift
           extra_params +=  -rtc-td-hack
   @@ -382,6 +390,12 @@ variants:
               # Alternative host load:
               #host_load_command = dd if=/dev/urandom of=/dev/null
               host_load_instances = 8
   +        guest_s4:
   +            check_s4_support_cmd = powercfg /hibernate on
   +            test_s4_cmd = start /B ping -n 3000 localhost
   +            check_s4_cmd = tasklist | find /I ping
   +            set_s4_cmd = rundll32.exe PowrProf.dll, SetSuspendState
   +            kill_test_s4_cmd = taskkill /IM ping.exe /F
  
           variants:
               - Win2000:
   diff --git a/client/tests/kvm/tests/guest_s4.py 
   b/client/tests/kvm/tests/guest_s4.py
   new file mode 100644
   index 000..5d8fbdf
   --- /dev/null
   +++ b/client/tests/kvm/tests/guest_s4.py
   @@ -0,0 +1,66 @@
   +import logging, time
   +from autotest_lib.client.common_lib import error
   +import kvm_test_utils, kvm_utils
   +
   +
   +def run_guest_s4(test, params, env):
   +    
   +    Suspend guest to disk,supports both Linux  Windows OSes.
   +
   +   �...@param test: kvm test object.
   +   �...@param params: Dictionary with test parameters.
   +   �...@param env: Dictionary with the test environment.
   +    
   +    vm = kvm_test_utils.get_living_vm(env, params.get(main_vm))
   +    session = kvm_test_utils.wait_for_login(vm)
   +
   +    logging.info(Checking whether VM supports S4)
   +    status = 
   session.get_command_status(params.get(check_s4_support_cmd))
   +    if status is None:
   +        logging.error(Failed to check if S4 exists)
   +    elif status != 0:
   +        raise error.TestFail(Guest does not support S4)
   +
   +    logging.info(Waiting for a while for X to start...)
 
  Yes, generally X starts a bit later than the SSH service, so I
  understand the time being here, however:
 
   * In fact we are waiting for all services of the guest to be up and
  functional, so depending on the level of load, I don't think 10s is
  gonna make it. So I suggest something = 30s
 
  Yeah,reasonable, we did ignore the circumstance with workload. But as
  you metioned,it can depend on different level of workload, therefore 30s
  may be not enough as well. Your idea that write a utility function
  waiting for some services up is good I think, thus it could be something
  like:
 
  def wait_services_up(services_list):
     ...
 
  and for this case:
 
  wait_services_up([Xorg]) for Linux and
  wait_services_up([explore.exe]) for Windows.
 
 Ok, sounds good to me!
 
   * It's also true that just wait for a given time and hope that it
  will be OK kinda sucks, so ideally we need to write utility functions
  to stablish as well as possible when all services of a host are fully
  booted up. Stated this way, it looks simple, but it's not.
 
  Autotest experience suggests that there's no real sane way to
  determine when a linux box is booted up, but we can take a
  semi-rational approach and verify if all services for the current run
  level have the status up or a similar approach. For windows, I was
  talking to Yaniv Kaul and it seems that processing the output of the
  'sc query' command might give what we want. Bottom line, I'd like to
  add a TODO item, and write a function to stablish (fairly confidently)
  that a 

Re: [Autotest] [PATCH] Using shutil.move to move result files in job.py

2009-10-14 Thread Lucas Meneghel Rodrigues
Ok, looks good. Commited as

http://autotest.kernel.org/changeset/3844

On Mon, Oct 12, 2009 at 11:36 PM, Cao, Chen k...@redhat.com wrote:
 Since os.rename requires that the file is in the same partition with
 the dest directory, we would get a python OSError if the result
 directory is mounted to a nfs server (or different partition or
 something else alike).

 the traceback would be like:

  Traceback (most recent call last):
    File /usr/local/kvm/kvm-test/bin/autotest, line 52, in ?
      options.log)
    File /usr/local/kvm/kvm-test/bin/job.py, line 1274, in runjob
      myjob.complete(0)
    File /usr/local/kvm/kvm-test/bin/job.py, line 798, in complete
    os.rename(self.state_file, dest)
  OSError: [Errno 18] Invalid cross-device link

 Signed-off-by: Cao, Chen k...@redhat.com
 ---
  client/bin/job.py |    2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)

 diff --git a/client/bin/job.py b/client/bin/job.py
 index a1c07cc..ebfb3a3 100755
 --- a/client/bin/job.py
 +++ b/client/bin/job.py
 @@ -938,7 +938,7 @@ class base_job(object):
         Clean up and exit
         # We are about to exit 'complete' so clean up the control file.
         dest = os.path.join(self.resultdir, os.path.basename(self.state_file))
 -        os.rename(self.state_file, dest)
 +        shutil.move(self.state_file, dest)

         self.harness.run_complete()
         self.disable_external_logging()
 --
 1.6.0.6

 ___
 Autotest mailing list
 autot...@test.kernel.org
 http://test.kernel.org/cgi-bin/mailman/listinfo/autotest




-- 
Lucas
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Autotest] [PATCH] Test 802.1Q vlan of nic

2009-10-14 Thread Lucas Meneghel Rodrigues
Hi Amos, thanks for the patch, here are my comments (pretty much
concerning only coding style):

On Wed, Sep 23, 2009 at 8:19 AM, Amos Kong ak...@redhat.com wrote:
 Test 802.1Q vlan of nic, config it by vconfig command.
 1) Create two VMs
 2) Setup guests in different vlan by vconfig and test communication by ping
   using hard-coded ip address
 3) Setup guests in same vlan and test communication by ping
 4) Recover the vlan config

 Signed-off-by: Amos Kong ak...@redhat.com
 ---
  client/tests/kvm/kvm_tests.cfg.sample |    6 +++
  client/tests/kvm/tests/vlan_tag.py    |   66 
 +
  2 files changed, 72 insertions(+), 0 deletions(-)
  create mode 100644 client/tests/kvm/tests/vlan_tag.py

 diff --git a/client/tests/kvm/kvm_tests.cfg.sample 
 b/client/tests/kvm/kvm_tests.cfg.sample
 index 285a38f..5a3f97d 100644
 --- a/client/tests/kvm/kvm_tests.cfg.sample
 +++ b/client/tests/kvm/kvm_tests.cfg.sample
 @@ -145,6 +145,12 @@ variants:
         kill_vm = yes
         kill_vm_gracefully = no

 +    - vlan_tag:  install setup
 +        type = vlan_tag
 +        subnet2 = 192.168.123
 +        vlans = 10 20
 +        nic_mode = tap
 +        nic_model = e1000

  # NICs
  variants:
 diff --git a/client/tests/kvm/tests/vlan_tag.py 
 b/client/tests/kvm/tests/vlan_tag.py
 new file mode 100644
 index 000..2904276
 --- /dev/null
 +++ b/client/tests/kvm/tests/vlan_tag.py
 @@ -0,0 +1,66 @@
 +import logging, time
 +from autotest_lib.client.common_lib import error
 +import kvm_subprocess, kvm_test_utils, kvm_utils
 +
 +def run_vlan_tag(test, params, env):
 +    
 +    Test 802.1Q vlan of nic, config it by vconfig command.
 +
 +    1) Create two VMs
 +    2) Setup guests in different vlan by vconfig and test communication by 
 ping
 +       using hard-coded ip address
 +    3) Setup guests in same vlan and test communication by ping
 +    4) Recover the vlan config
 +
 +   �...@param test: Kvm test object
 +   �...@param params: Dictionary with the test parameters.
 +   �...@param env: Dictionary with test environment.
 +    
 +
 +    vm = []
 +    session = []
 +    subnet2 = params.get(subnet2)
 +    vlans = params.get(vlans).split()
 +
 +    vm.append(kvm_test_utils.get_living_vm(env, %s % 
 params.get(main_vm)))
 +
 +    params_vm2 = params.copy()
 +    params_vm2['image_snapshot'] = yes
 +    params_vm2['kill_vm_gracefully'] = no
 +    params_vm2[address_index] = int(params.get(address_index, 0))+1
 +    vm.append(vm[0].clone(vm2, params_vm2))
 +    kvm_utils.env_register_vm(env, vm2, vm[1])
 +    if not vm[1].create():
 +        raise error.TestError, VM 'vm[1]' create faild

In the above exception raise statement, the preferred form to do it is:

raise error.TestError(VM 1 create failed)

 +    for i in range(2):
 +        session.append(kvm_test_utils.wait_for_login(vm[i]))
 +
 +    try:
 +        vconfig_cmd = vconfig add eth0 %s;ifconfig eth0.%s %s.%s
 +        if session[0].get_command_status(vconfig_cmd % (vlans[0],
 +                                                        vlans[0],
 +                                                        subnet2,
 +                                                        11)) != 0 or \
 +           session[1].get_command_status(vconfig_cmd % (vlans[1],
 +                                                        vlans[1],
 +                                                        subnet2,
 +                                                        12)) != 0:

In the above if statement, I'd assign the comparisons to variables to
make the code more readable, like:

try:
vconfig_cmd = vconfig add eth0 %s;ifconfig eth0.%s %s.%s
# Attempt to configure IPs for the VMs and record the results in boolean
# variables
ip_config_vm1_ok = (session[0].get_command_status(
vconfig_cmd % (vlans[0], vlans[0], subnet2, 11)) == 0)

ip_config_vm1_ok = (session[1].get_command_status(
vconfig_cmd % (vlans[1], vlans[1], subnet2, 12)) == 0)

if not ip_config_vm1_ok or not ip_config_vm2_ok:

 +            raise error.TestError, Fail to config VMs ip address
 +        if session[0].get_command_status(ping -c 2 %s.12 % subnet2) == 0:
 +            raise error.TestFail(Guest is unexpectedly pingable in 
 different 
 +                                 vlan)

A similar comment applies to the above block

 +        if session[1].get_command_status(vconfig rem eth0.%s;vconfig add 
 eth0 
 +                                         %s;ifconfig eth0.%s %s.12 %
 +                                          (vlans[1],
 +                                           vlans[0],
 +                                           vlans[0],
 +                                           subnet2)) != 0:

Idem

 +            raise error.TestError, Fail to config ip address of VM 'vm[1]'
 +        if session[0].get_command_status(ping -c 2 %s.12 % subnet2) != 0:
 +            raise error.TestFail, Fail to ping the guest in same vlan

See 

Re: sync guest calls made async on host - SQLite performance

2009-10-14 Thread Avi Kivity

On 10/14/2009 07:37 AM, Christoph Hellwig wrote:

Christoph, wasn't there a bug where the guest didn't wait for requests
in response to a barrier request?
 

Can't remember anything like that.  The bug was the complete lack of
cache flush infrastructure for virtio, and the lack of advertising a
volative write cache on ide.
   


By complete flush infrastructure, you mean host-side and guest-side 
support for a new barrier command, yes?


But can't this be also implemented using QUEUE_ORDERED_DRAIN, and on the 
host side disabling the backing device write cache?  I'm talking about 
cache=none, primarily.


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: sync guest calls made async on host - SQLite performance

2009-10-14 Thread Matthew Tippett
I understand.  However the test itself is fairly trivial
representation of a single teir high-transactional load system.  (Ie:
a system that is logging a large number of events).

The phoronix test suite simply hands over to a binary using sqlite and
does 25000 sequential inserts.  The overhead of the suite would be
measured in milliseconds at the start and end.  Over the life of the
test (100-2500 seconds), it becomes insignificant noise.

As I said, the relevant system calls itself for the running of the
test are expressed as

write
write
write
fdatasync

The writes are typically small (5-100) bytes.

With that information, I believe the method of execution is mostly
irrelevant.  If people are still concerned, I can write a trivial
application that should reproduce the behaviour.

It still ultimately comes down to the guests expected semantics of
fdatasync, and the actual behaviour relative to the hosts physical
device.  I am not saying that the currentl behaviour is wrong, I just
want a clear understanding of what is expected by the kvm team vs what
we are seeing.

Regards... Matthew


On 10/14/09, Dustin Kirkland kirkl...@canonical.com wrote:
 On Tue, Oct 13, 2009 at 9:09 PM, Matthew Tippett tippe...@gmail.com wrote:
 I believe that I have removed the benchmark from discussion, we are now
 looking at semantics of small writes followed by
 ...
 And quoting from Dustin

 ===
 I have tried this, exactly as you have described.  The tests took:

  * 1162.08033204 seconds on native hardware
  * 2306.68306303 seconds in a kvm using if=scsi disk
  * 405.382308006 seconds in a kvm using if=virtio

 Hang on now...

 My timings are from running the Phoronix test *as you described*.  I
 have not looked at what magic is happening inside of this Phoronix
 test.  I am most certainly *not* speaking as to the quality or
 legitimacy of the test.

 :-Dustin


-- 
Sent from my mobile device
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] virtio-blk: fallback to draining the queue if barrier ops are not supported

2009-10-14 Thread Avi Kivity
Early implementations of virtio devices did not support barrier operations,
but did commit the data to disk.  In such cases, drain the queue to emulate
barrier operations.

Signed-off-by: Avi Kivity a...@redhat.com
---
 drivers/block/virtio_blk.c |6 +-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 43f1938..2627cc3 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -354,12 +354,16 @@ static int __devinit virtblk_probe(struct virtio_device 
*vdev)
vblk-disk-driverfs_dev = vdev-dev;
index++;
 
-   /* If barriers are supported, tell block layer that queue is ordered */
+   /* If barriers are supported, tell block layer that queue is ordered;
+* otherwise just drain the queue.
+*/
if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
blk_queue_ordered(vblk-disk-queue, QUEUE_ORDERED_DRAIN_FLUSH,
  virtblk_prepare_flush);
else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER))
blk_queue_ordered(vblk-disk-queue, QUEUE_ORDERED_TAG, NULL);
+   else
+   blk_queue_ordered(vblk-disk-queue, QUEUE_ORDERED_DRAIN, NULL);
 
/* If disk is read-only in the host, the guest should obey */
if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
-- 
1.6.2.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Autotest] [PATCH] Add pass through feature test (support SR-IOV)

2009-10-14 Thread Lucas Meneghel Rodrigues
Yolkfull, I've studied about single root IO virtualization before
reviewing your patch, the general approach here looks good. There were
some stylistic points as far as code is concerned, so I have rebased
your patch against the latest trunk, and added some explanation about
the features being tested and referenced (extracted from a Fedora 12
blueprint).

Please let me know if you are OK with it, I guess I will review this
patch a couple more times, as the code and the features being tested
are fairly complex.

Thanks!

On Mon, Sep 14, 2009 at 11:20 PM, Yolkfull Chow yz...@redhat.com wrote:
 It supports both SR-IOV virtual functions' and physical NIC card pass through.
  * For SR-IOV virtual functions passthrough, we could specify the module
    parameter 'max_vfs' in config file.
  * For physical NIC card pass through, we should specify the device name(s).

 Signed-off-by: Yolkfull Chow yz...@redhat.com
 ---
  client/tests/kvm/kvm_tests.cfg.sample |   12 ++
  client/tests/kvm/kvm_utils.py         |  248 
 -
  client/tests/kvm/kvm_vm.py            |   68 +-
  3 files changed, 326 insertions(+), 2 deletions(-)

 diff --git a/client/tests/kvm/kvm_tests.cfg.sample 
 b/client/tests/kvm/kvm_tests.cfg.sample
 index a83ef9b..c6037da 100644
 --- a/client/tests/kvm/kvm_tests.cfg.sample
 +++ b/client/tests/kvm/kvm_tests.cfg.sample
 @@ -627,6 +627,18 @@ variants:


  variants:
 +    - @no_passthrough:
 +        pass_through = no
 +    - nic_passthrough:
 +        pass_through = pf
 +        passthrough_devs = eth1
 +    - vfs_passthrough:
 +        pass_through = vf
 +        max_vfs = 7
 +        vfs_count = 7
 +
 +
 +variants:
     - @basic:
         only Fedora Windows
     - @full:
 diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py
 index dfca938..1fe3b31 100644
 --- a/client/tests/kvm/kvm_utils.py
 +++ b/client/tests/kvm/kvm_utils.py
 @@ -1,5 +1,5 @@
  import md5, thread, subprocess, time, string, random, socket, os, signal, pty
 -import select, re, logging, commands
 +import select, re, logging, commands, cPickle
  from autotest_lib.client.bin import utils
  from autotest_lib.client.common_lib import error
  import kvm_subprocess
 @@ -795,3 +795,249 @@ def md5sum_file(filename, size=None):
         size -= len(data)
     f.close()
     return o.hexdigest()
 +
 +
 +def get_full_id(pci_id):
 +    
 +    Get full PCI ID of pci_id.
 +    
 +    cmd = lspci -D | awk '/%s/ {print $1}' % pci_id
 +    status, full_id = commands.getstatusoutput(cmd)
 +    if status != 0:
 +        return None
 +    return full_id
 +
 +
 +def get_vendor_id(pci_id):
 +    
 +    Check out the device vendor ID according to PCI ID.
 +    
 +    cmd = lspci -n | awk '/%s/ {print $3}' % pci_id
 +    return re.sub(:,  , commands.getoutput(cmd))
 +
 +
 +def release_pci_devs(dict):
 +    
 +    Release assigned PCI devices to host.
 +    
 +    def release_dev(pci_id):
 +        base_dir = /sys/bus/pci
 +        full_id = get_full_id(pci_id)
 +        vendor_id = get_vendor_id(pci_id)
 +        drv_path = os.path.join(base_dir, devices/%s/driver % full_id)
 +        if 'pci-stub' in os.readlink(drv_path):
 +            cmd = echo '%s'  %s/new_id % (vendor_id, drv_path)
 +            if os.system(cmd):
 +                return False
 +
 +            stub_path = os.path.join(base_dir, drivers/pci-stub)
 +            cmd = echo '%s'  %s/unbind % (full_id, stub_path)
 +            if os.system(cmd):
 +                return False
 +
 +            prev_driver = self.dev_prev_drivers[pci_id]
 +            cmd = echo '%s'  %s/bind % (full_id, prev_driver)
 +            if os.system(cmd):
 +                return False
 +        return True
 +
 +    for pci_id in dict.keys():
 +        if not release_dev(pci_id):
 +            logging.error(Failed to release device [%s] to host % pci_id)
 +        else:
 +            logging.info(Release device [%s] successfully % pci_id)
 +
 +
 +class PassThrough:
 +    
 +    Request passthroughable devices on host. It will check whether to request
 +    PF(physical NIC cards) or VF(Virtual Functions).
 +    
 +    def __init__(self, type=nic_vf, max_vfs=None, names=None):
 +        
 +        Initialize parameter 'type' which could be:
 +        nic_vf: Virtual Functions
 +        nic_pf: Physical NIC card
 +        mixed:  Both includes VFs and PFs
 +
 +        If pass through Physical NIC cards, we need to specify which devices
 +        to be assigned, e.g. 'eth1 eth2'.
 +
 +        If pass through Virtual Functions, we need to specify how many vfs
 +        are going to be assigned, e.g. passthrough_count = 8 and max_vfs in
 +        config file.
 +
 +       �...@param type: Pass through device's type
 +       �...@param max_vfs: parameter of module 'igb'
 +       �...@param names: Physical NIC cards' names, e.g.'eth1 eth2 ...'
 +        
 +        self.type = type
 +        if max_vfs:
 +            self.max_vfs = int(max_vfs)
 +        if names:
 

Re: [Qemu-devel] Release plan for 0.12.0

2009-10-14 Thread Arnd Bergmann
On Thursday 08 October 2009, Anthony Liguori wrote:
 Jens Osterkamp wrote:
  On Wednesday 30 September 2009, Anthony Liguori wrote:
 
  Please add to this list and I'll collect it all and post it somewhere.
  
 
  What about Or Gerlitz' raw backend driver ? I did not see it go in yet, or 
  did 
  I miss something ?

 
 The patch seems to have not been updated after the initial posting and 
 the first feedback cycle.
 
 I'm generally inclined to oppose the functionality as I don't think it 
 offers any advantages over the existing backends.

There are two reasons why I think this backend is important:

- As an easy way to provide isolation between guests (private ethernet
  port aggregator, PEPA) and external enforcement of network priviledges
  (virtual ethernet port aggregator, VEPA) using the macvlan subsystem.

- As a counterpart to the vhost_net driver, providing an identical
  user interface with or without vhost_net acceleration in the kernel.

Arnd 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Release plan for 0.12.0

2009-10-14 Thread Michael S. Tsirkin
On Thu, Oct 08, 2009 at 09:21:04AM -0500, Anthony Liguori wrote:
 Jens Osterkamp wrote:
 On Wednesday 30 September 2009, Anthony Liguori wrote:

   
  o VMState conversion -- I expect most of the pc target to be completed
  o qdev conversion -- I hope that we'll get most of the pc target
 completely converted to qdev
  o storage live migration
  o switch to SeaBIOS (need to finish porting features from Bochs)
  o switch to gPXE (need to resolve slirp tftp server issue)
  o KSM integration
  o in-kernel APIC support for KVM
  o guest SMP support for KVM
  o updates to the default pc machine type

 Please add to this list and I'll collect it all and post it somewhere.
 

 What about Or Gerlitz' raw backend driver ? I did not see it go in yet, 
 or did I miss something ?
   

 The patch seems to have not been updated after the initial posting and  
 the first feedback cycle.

Looks like Or has abandoned it.  I have an updated version which works
with new APIs, etc.  Let me post it and we'll go from there.

 I'm generally inclined to oppose the functionality as I don't think it  
 offers any advantages over the existing backends.

I patch it in and use it all the time.  It's much easier to setup
on a random machine than a bridged config.

-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: sync guest calls made async on host - SQLite performance

2009-10-14 Thread Christoph Hellwig
On Wed, Oct 14, 2009 at 08:03:41PM +0900, Avi Kivity wrote:
 Can't remember anything like that.  The bug was the complete lack of
 cache flush infrastructure for virtio, and the lack of advertising a
 volative write cache on ide.

 
 By complete flush infrastructure, you mean host-side and guest-side 
 support for a new barrier command, yes?

The cache flush command, not barrier command.  The new virtio code
implements barrier the same way we do for IDE and SCSI - all barrier
semantics are implemented by generic code in the block layer by draining
the queues, the only thing we send over the wire are cache flush
commands in strategic places.

 But can't this be also implemented using QUEUE_ORDERED_DRAIN, and on the 
 host side disabling the backing device write cache?  I'm talking about 
 cache=none, primarily.

Yes, it could.  But as I found out in a long discussion with Stephen
it's not actually nessecary.  All filesystems do the right thing for
a device not claiming to support barriers if it doesn't include write
caches, that is implement ordering internally.  So there is no urge to
set QUEUE_ORDERED_DRAIN for the case without write cache.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCHv2 1/2] Complete cpu initialization before signaling main thread.

2009-10-14 Thread Gleb Natapov
Otherwise some cpus may start executing code before others
are fully initialized.

Signed-off-by: Gleb Natapov g...@redhat.com
---
v1-v2:
 - reinit cpu_single_env after qemu_cond_wait()

 qemu-kvm.c |   29 +++--
 1 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/qemu-kvm.c b/qemu-kvm.c
index 62ca050..a104ab8 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -1954,18 +1954,6 @@ static void process_irqchip_events(CPUState *env)
 
 static int kvm_main_loop_cpu(CPUState *env)
 {
-setup_kernel_sigmask(env);
-
-pthread_mutex_lock(qemu_mutex);
-
-kvm_arch_init_vcpu(env);
-#ifdef TARGET_I386
-kvm_tpr_vcpu_start(env);
-#endif
-
-cpu_single_env = env;
-kvm_arch_load_regs(env);
-
 while (1) {
 int run_cpu = !is_cpu_stopped(env);
 if (run_cpu  !kvm_irqchip_in_kernel(kvm_context)) {
@@ -2003,15 +1991,28 @@ static void *ap_main_loop(void *_env)
 on_vcpu(env, kvm_arch_do_ioperm, data);
 #endif
 
-/* signal VCPU creation */
+setup_kernel_sigmask(env);
+
 pthread_mutex_lock(qemu_mutex);
+cpu_single_env = env;
+
+kvm_arch_init_vcpu(env);
+#ifdef TARGET_I386
+kvm_tpr_vcpu_start(env);
+#endif
+
+kvm_arch_load_regs(env);
+
+/* signal VCPU creation */
 current_env-created = 1;
 pthread_cond_signal(qemu_vcpu_cond);
 
 /* and wait for machine initialization */
 while (!qemu_system_ready)
 qemu_cond_wait(qemu_system_cond);
-pthread_mutex_unlock(qemu_mutex);
+
+/* re-initialize cpu_single_env after re-acquiring qemu_mutex */
+cpu_single_env = env;
 
 kvm_main_loop_cpu(env);
 return NULL;
-- 
1.6.3.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] Don't sync mpstate to/from kernel when unneeded.

2009-10-14 Thread Gleb Natapov
mp_state, unlike other cpu state, can be changed not only from vcpu
context it belongs to, but by other vcpus too. That makes its loading
from kernel/saving back not safe if mp_state value is changed inside
kernel between load and save. For example vcpu 1 loads mp_sate into
user-space and the state is RUNNING, vcpu 0 sends INIT/SIPI to vcpu 1
so in-kernel mp_sate becomes SIPI, vcpu 1 save user-space copy into
kernel and calls vcpu_run(). SIPI sate is lost.

The patch copies mp_sate into kernel only when it is knows that
int-kernel value is outdated. This happens on reset and vmload.

Signed-off-by: Gleb Natapov g...@redhat.com
---
 hw/apic.c |1 +
 monitor.c |2 ++
 qemu-kvm.c|9 -
 qemu-kvm.h|1 -
 target-i386/machine.c |3 +++
 5 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/hw/apic.c b/hw/apic.c
index 2952675..729 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -512,6 +512,7 @@ void apic_init_reset(CPUState *env)
 if (kvm_enabled()  qemu_kvm_irqchip_in_kernel()) {
 env-mp_state
 = env-halted ? KVM_MP_STATE_UNINITIALIZED : KVM_MP_STATE_RUNNABLE;
+kvm_load_mpstate(env);
 }
 #endif
 }
diff --git a/monitor.c b/monitor.c
index 7f0f5a9..dd8f2ca 100644
--- a/monitor.c
+++ b/monitor.c
@@ -350,6 +350,7 @@ static CPUState *mon_get_cpu(void)
 mon_set_cpu(0);
 }
 cpu_synchronize_state(cur_mon-mon_cpu);
+kvm_save_mpstate(cur_mon-mon_cpu);
 return cur_mon-mon_cpu;
 }
 
@@ -377,6 +378,7 @@ static void do_info_cpus(Monitor *mon)
 
 for(env = first_cpu; env != NULL; env = env-next_cpu) {
 cpu_synchronize_state(env);
+kvm_save_mpstate(env);
 monitor_printf(mon, %c CPU #%d:,
(env == mon-mon_cpu) ? '*' : ' ',
env-cpu_index);
diff --git a/qemu-kvm.c b/qemu-kvm.c
index a104ab8..267222d 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -1609,11 +1609,6 @@ static void on_vcpu(CPUState *env, void (*func)(void 
*data), void *data)
 void kvm_arch_get_registers(CPUState *env)
 {
kvm_arch_save_regs(env);
-   kvm_arch_save_mpstate(env);
-#ifdef KVM_CAP_MP_STATE
-   if (kvm_irqchip_in_kernel(kvm_context))
-   env-halted = (env-mp_state == KVM_MP_STATE_HALTED);
-#endif
 }
 
 static void do_kvm_cpu_synchronize_state(void *_env)
@@ -1707,6 +1702,10 @@ static void kvm_do_save_mpstate(void *_env)
 CPUState *env = _env;
 
 kvm_arch_save_mpstate(env);
+#ifdef KVM_CAP_MP_STATE
+if (kvm_irqchip_in_kernel(kvm_context))
+env-halted = (env-mp_state == KVM_MP_STATE_HALTED);
+#endif
 }
 
 void kvm_save_mpstate(CPUState *env)
diff --git a/qemu-kvm.h b/qemu-kvm.h
index d6748c7..e2a87b8 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -1186,7 +1186,6 @@ void kvm_arch_get_registers(CPUState *env);
 static inline void kvm_arch_put_registers(CPUState *env)
 {
 kvm_load_registers(env);
-kvm_load_mpstate(env);
 }
 
 void kvm_cpu_synchronize_state(CPUState *env);
diff --git a/target-i386/machine.c b/target-i386/machine.c
index e640dad..16d9c57 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -324,6 +324,7 @@ static void cpu_pre_save(void *opaque)
 int i, bit;
 
 cpu_synchronize_state(env);
+kvm_save_mpstate(env);
 
 /* FPU */
 env-fpus_vmstate = (env-fpus  ~0x3800) | (env-fpstt  0x7)  11;
@@ -385,6 +386,8 @@ static int cpu_post_load(void *opaque, int version_id)
 }
 
 tlb_flush(env, 1);
+kvm_load_mpstate(env);
+
 return 0;
 }
 
-- 
1.6.3.3

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Release plan for 0.12.0

2009-10-14 Thread Anthony Liguori

Arnd Bergmann wrote:

There are two reasons why I think this backend is important:

- As an easy way to provide isolation between guests (private ethernet
  port aggregator, PEPA) and external enforcement of network priviledges
  (virtual ethernet port aggregator, VEPA) using the macvlan subsystem.
  


Can't this all be done with tap and a bridge?

Regards,

Anthony Liguori
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Release plan for 0.12.0

2009-10-14 Thread Michael S. Tsirkin
On Wed, Oct 14, 2009 at 08:53:55AM -0500, Anthony Liguori wrote:
 Arnd Bergmann wrote:
 There are two reasons why I think this backend is important:

 - As an easy way to provide isolation between guests (private ethernet
   port aggregator, PEPA) and external enforcement of network priviledges
   (virtual ethernet port aggregator, VEPA) using the macvlan subsystem.
   

 Can't this all be done with tap and a bridge?

Not with existing kernels, I think.

 Regards,

 Anthony Liguori
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Release plan for 0.12.0

2009-10-14 Thread Michael S. Tsirkin
On Wed, Oct 14, 2009 at 03:09:28PM +0200, Arnd Bergmann wrote:
 On Thursday 08 October 2009, Anthony Liguori wrote:
  Jens Osterkamp wrote:
   On Wednesday 30 September 2009, Anthony Liguori wrote:
  
   Please add to this list and I'll collect it all and post it somewhere.
   
  
   What about Or Gerlitz' raw backend driver ? I did not see it go in yet, 
   or did 
   I miss something ?
 
  
  The patch seems to have not been updated after the initial posting and 
  the first feedback cycle.
  
  I'm generally inclined to oppose the functionality as I don't think it 
  offers any advantages over the existing backends.
 
 There are two reasons why I think this backend is important:
 
 - As an easy way to provide isolation between guests (private ethernet
   port aggregator, PEPA) and external enforcement of network priviledges
   (virtual ethernet port aggregator, VEPA) using the macvlan subsystem.
 
 - As a counterpart to the vhost_net driver, providing an identical
   user interface with or without vhost_net acceleration in the kernel.
 
   Arnd 

I think raw sockets also support RX mac/vlan filtering in kernel, which
might be faster than doing it in virtio in userspace as it's done now.

-- 
MST
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Release plan for 0.12.0

2009-10-14 Thread Anthony Liguori

Michael S. Tsirkin wrote:

Looks like Or has abandoned it.  I have an updated version which works
with new APIs, etc.  Let me post it and we'll go from there.

  
I'm generally inclined to oppose the functionality as I don't think it  
offers any advantages over the existing backends.



I patch it in and use it all the time.  It's much easier to setup
on a random machine than a bridged config.
  


Having two things that do the same thing is just going to lead to user 
confusion.  If the problem is tap is too hard to setup, we should try to 
simplify tap configuration.


Regards,

Anthony Liguori


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Release plan for 0.12.0

2009-10-14 Thread Michael S. Tsirkin
On Wed, Oct 14, 2009 at 09:17:15AM -0500, Anthony Liguori wrote:
 Michael S. Tsirkin wrote:
 Looks like Or has abandoned it.  I have an updated version which works
 with new APIs, etc.  Let me post it and we'll go from there.

   
 I'm generally inclined to oppose the functionality as I don't think 
 it  offers any advantages over the existing backends.
 

 I patch it in and use it all the time.  It's much easier to setup
 on a random machine than a bridged config.
   

 Having two things that do the same thing is just going to lead to user  
 confusion.

They do not do the same thing. With raw socket you can use windows
update without a bridge in the host, with tap you can't.

 If the problem is tap is too hard to setup, we should try to  
 simplify tap configuration.

The problem is bridge is too hard to setup.
Simplifying that is a good idea, but outside the scope
of the qemu project.


 Regards,

 Anthony Liguori

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Autotest] [PATCH] KVM test: Add PCI pass through test

2009-10-14 Thread Lucas Meneghel Rodrigues
FYI, Amit pointed out that the correct name for this test would be PCI
device assignment, so the final version of this patch will be called
PCI device assignment instead.

On Wed, Oct 14, 2009 at 9:08 AM, Lucas Meneghel Rodrigues
l...@redhat.com wrote:
 Add a new PCI pass trough test. It supports both SR-IOV virtual
 functions and physical NIC card pass through.

 Single Root I/O Virtualization (SR-IOV) allows a single PCI device to
 be shared amongst multiple virtual machines while retaining the
 performance benefit of assigning a PCI device to a virtual machine.
 A common example is where a single SR-IOV capable NIC - with perhaps
 only a single physical network port - might be shared with multiple
 virtual machines by assigning a virtual function to each VM.

 SR-IOV support is implemented in the kernel. The core implementation is
 contained in the PCI subsystem, but there must also be driver support
 for both the Physical Function (PF) and Virtual Function (VF) devices.
 With an SR-IOV capable device one can allocate VFs from a PF. The VFs
 surface as PCI devices which are backed on the physical PCI device by
 resources (queues, and register sets).

 Device support:

 In 2.6.30, the Intel® 82576 Gigabit Ethernet Controller is the only
 SR-IOV capable device supported. The igb driver has PF support and the
 igbvf has VF support.

 In 2.6.31 the Neterion® X3100™ is supported as well. This device uses
 the same vxge driver for the PF as well as the VFs.

 In order to configure the test:

  * For SR-IOV virtual functions passthrough, we could specify the
    module parameter 'max_vfs' in config file.
  * For physical NIC card pass through, we should specify the device
    name(s).

 Signed-off-by: Yolkfull Chow yz...@redhat.com
 ---
  client/tests/kvm/kvm_tests.cfg.sample |   11 ++-
  client/tests/kvm/kvm_utils.py         |  278 
 +
  client/tests/kvm/kvm_vm.py            |   72 +
  3 files changed, 360 insertions(+), 1 deletions(-)

 diff --git a/client/tests/kvm/kvm_tests.cfg.sample 
 b/client/tests/kvm/kvm_tests.cfg.sample
 index cc3228a..1dad188 100644
 --- a/client/tests/kvm/kvm_tests.cfg.sample
 +++ b/client/tests/kvm/kvm_tests.cfg.sample
 @@ -786,13 +786,22 @@ variants:
         only default
         image_format = raw

 -
  variants:
     - @smallpages:
     - hugepages:
         pre_command = /usr/bin/python scripts/hugepage.py /mnt/kvm_hugepage
         extra_params +=  -mem-path /mnt/kvm_hugepage

 +variants:
 +    - @no_passthrough:
 +        pass_through = no
 +    - nic_passthrough:
 +        pass_through = pf
 +        passthrough_devs = eth1
 +    - vfs_passthrough:
 +        pass_through = vf
 +        max_vfs = 7
 +        vfs_count = 7

  variants:
     - @basic:
 diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py
 index 53b664a..0e3398c 100644
 --- a/client/tests/kvm/kvm_utils.py
 +++ b/client/tests/kvm/kvm_utils.py
 @@ -788,3 +788,281 @@ def md5sum_file(filename, size=None):
         size -= len(data)
     f.close()
     return o.hexdigest()
 +
 +
 +def get_full_id(pci_id):
 +    
 +    Get full PCI ID of pci_id.
 +    
 +    cmd = lspci -D | awk '/%s/ {print $1}' % pci_id
 +    status, full_id = commands.getstatusoutput(cmd)
 +    if status != 0:
 +        return None
 +    return full_id
 +
 +
 +def get_vendor_id(pci_id):
 +    
 +    Check out the device vendor ID according to PCI ID.
 +    
 +    cmd = lspci -n | awk '/%s/ {print $3}' % pci_id
 +    return re.sub(:,  , commands.getoutput(cmd))
 +
 +
 +def release_dev(pci_id, pci_dict):
 +    
 +    Release a single PCI device.
 +
 +   �...@param pci_id: PCI ID of a given PCI device
 +   �...@param pci_dict: Dictionary with information about PCI devices
 +    
 +    base_dir = /sys/bus/pci
 +    full_id = get_full_id(pci_id)
 +    vendor_id = get_vendor_id(pci_id)
 +    drv_path = os.path.join(base_dir, devices/%s/driver % full_id)
 +    if 'pci-stub' in os.readlink(drv_path):
 +        cmd = echo '%s'  %s/new_id % (vendor_id, drv_path)
 +        if os.system(cmd):
 +            return False
 +
 +        stub_path = os.path.join(base_dir, drivers/pci-stub)
 +        cmd = echo '%s'  %s/unbind % (full_id, stub_path)
 +        if os.system(cmd):
 +            return False
 +
 +        prev_driver = pci_dict[pci_id]
 +        cmd = echo '%s'  %s/bind % (full_id, prev_driver)
 +        if os.system(cmd):
 +            return False
 +    return True
 +
 +
 +def release_pci_devs(pci_dict):
 +    
 +    Release all PCI devices assigned to host.
 +
 +   �...@param pci_dict: Dictionary with information about PCI devices
 +    
 +    for pci_id in pci_dict:
 +        if not release_dev(pci_id, pci_dict):
 +            logging.error(Failed to release device [%s] to host % pci_id)
 +        else:
 +            logging.info(Release device [%s] successfully % pci_id)
 +
 +
 +class PassThrough(object):
 +    
 +    Request passthroughable devices on host. It will check whether to 

[PATCH] v4: allow userspace to adjust kvmclock offset

2009-10-14 Thread Glauber Costa
When we migrate a kvm guest that uses pvclock between two hosts, we may
suffer a large skew. This is because there can be significant differences
between the monotonic clock of the hosts involved. When a new host with
a much larger monotonic time starts running the guest, the view of time
will be significantly impacted.

Situation is much worse when we do the opposite, and migrate to a host with
a smaller monotonic clock.

This proposed ioctl will allow userspace to inform us what is the monotonic
clock value in the source host, so we can keep the time skew short, and
more importantly, never goes backwards. Userspace may also need to trigger
the current data, since from the first migration onwards, it won't be
reflected by a simple call to clock_gettime() anymore.

[ v2: uses a struct with a padding ]
[ v3: provide an ioctl to get clock data too ]
[ v4: used fixed-width signed type for delta ]

Signed-off-by: Glauber Costa glom...@redhat.com
---
 arch/x86/include/asm/kvm_host.h |1 +
 arch/x86/kvm/x86.c  |   35 ++-
 include/linux/kvm.h |7 +++
 3 files changed, 42 insertions(+), 1 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 179a919..c9b0d9f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -410,6 +410,7 @@ struct kvm_arch{
 
unsigned long irq_sources_bitmap;
u64 vm_init_tsc;
+   s64 kvmclock_offset;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9601bc6..09f31e2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -699,7 +699,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
/* With all the info we got, fill in the values */
 
vcpu-hv_clock.system_time = ts.tv_nsec +
-(NSEC_PER_SEC * (u64)ts.tv_sec);
+(NSEC_PER_SEC * (u64)ts.tv_sec) + 
v-kvm-arch.kvmclock_offset;
+
/*
 * The interface expects us to write an even number signaling that the
 * update is finished. Since the guest won't see the intermediate
@@ -2441,6 +2442,38 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
break;
}
+   case KVM_SET_CLOCK: {
+   struct timespec now;
+   struct kvm_clock_data user_ns;
+   u64 now_ns;
+   s64 delta;
+
+   r =  -EFAULT;
+   if (copy_from_user(user_ns, argp, sizeof(user_ns)))
+   goto out;
+
+   r = 0;
+   ktime_get_ts(now);
+   now_ns = timespec_to_ns(now);
+   delta = user_ns.clock - now_ns;
+   kvm-arch.kvmclock_offset = delta;
+   break;  
+   }
+   case KVM_GET_CLOCK: {
+   struct timespec now;
+   struct kvm_clock_data user_ns;
+   u64 now_ns;
+
+   ktime_get_ts(now);
+   now_ns = timespec_to_ns(now);
+   user_ns.clock = kvm-arch.kvmclock_offset + now_ns;
+
+   if (copy_to_user(argp, user_ns, sizeof(user_ns)))
+   r =  -EFAULT;
+
+   break;  
+   }
+
default:
;
}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index f8f8900..ad0ecbc 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -497,6 +497,11 @@ struct kvm_irqfd {
__u8  pad[20];
 };
 
+struct kvm_clock_data {
+   __u64 clock;
+   __u64 pad[2];
+};
+
 /*
  * ioctls for VM fds
  */
@@ -546,6 +551,8 @@ struct kvm_irqfd {
 #define KVM_CREATE_PIT2   _IOW(KVMIO, 0x77, struct 
kvm_pit_config)
 #define KVM_SET_BOOT_CPU_ID_IO(KVMIO, 0x78)
 #define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
+#define KVM_SET_CLOCK_IOW(KVMIO, 0x7a, struct kvm_clock_data)
+#define KVM_GET_CLOCK_IOW(KVMIO, 0x7b, struct kvm_clock_data)
 
 /*
  * ioctls for vcpu fds
-- 
1.6.2.2

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] virtio-blk: fallback to draining the queue if barrier ops are not supported

2009-10-14 Thread Javier Guerra
On Wed, Oct 14, 2009 at 7:03 AM, Avi Kivity a...@redhat.com wrote:
 Early implementations of virtio devices did not support barrier operations,
 but did commit the data to disk.  In such cases, drain the queue to emulate
 barrier operations.

would this help on the (i think common) situation with XFS on a
virtio-enabled VM, using LVM-backed storage; where LVM just loses
barriers.

-- 
Javier
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Re: Release plan for 0.12.0

2009-10-14 Thread Jamie Lokier
Michael S. Tsirkin wrote:
 On Wed, Oct 14, 2009 at 09:17:15AM -0500, Anthony Liguori wrote:
  Michael S. Tsirkin wrote:
  Looks like Or has abandoned it.  I have an updated version which works
  with new APIs, etc.  Let me post it and we'll go from there.
 

  I'm generally inclined to oppose the functionality as I don't think 
  it  offers any advantages over the existing backends.
  
 
  I patch it in and use it all the time.  It's much easier to setup
  on a random machine than a bridged config.

 
  Having two things that do the same thing is just going to lead to user  
  confusion.
 
 They do not do the same thing. With raw socket you can use windows
 update without a bridge in the host, with tap you can't.

On the other hand, with raw socket, guest Windows can't access files
on the host's Samba share can it?  So it's not that useful even for
Windows guests.

  If the problem is tap is too hard to setup, we should try to  
  simplify tap configuration.
 
 The problem is bridge is too hard to setup.
 Simplifying that is a good idea, but outside the scope
 of the qemu project.

I venture it's important enough for qemu that it's worth working on
that.  Something that looks like the raw socket but behaves like an
automatically instantiated bridge attached to the bound interface
would be a useful interface.

I don't have much time, but I'll help anybody who wants to do that.

-- Jamie
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] virtio-blk: fallback to draining the queue if barrier ops are not supported

2009-10-14 Thread Michael Tokarev

Avi Kivity wrote:

Early implementations of virtio devices did not support barrier operations,
but did commit the data to disk.  In such cases, drain the queue to emulate
barrier operations.


Are there any implementation currently that actually supports barriers?
As far as I remember there's no way to invoke barriers from a user-space
application on linux, and this is how kvm/qemu is running on this OS.

Thanks!

/mjt
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] virtio-blk: fallback to draining the queue if barrier ops are not supported

2009-10-14 Thread Christoph Hellwig
On Wed, Oct 14, 2009 at 07:38:45PM +0400, Michael Tokarev wrote:
 Avi Kivity wrote:
 Early implementations of virtio devices did not support barrier operations,
 but did commit the data to disk.  In such cases, drain the queue to emulate
 barrier operations.
 
 Are there any implementation currently that actually supports barriers?
 As far as I remember there's no way to invoke barriers from a user-space
 application on linux, and this is how kvm/qemu is running on this OS.

Ignore all the barrier talk.  The way Linux uses the various storage
transport the primitives are queue draining (done entirely in the guest
block layer) and cache flushes.  Fdatasync is exactly the same primitive
as a WIN FLUSH CACHE in ATA or SYNCHRONIZE cache in SCSI module the lack
or ranges in fdatasync - but that is just a performance optimization and
not actually used by Linux guests for now.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Re: Release plan for 0.12.0

2009-10-14 Thread Michael S. Tsirkin
On Wed, Oct 14, 2009 at 04:19:17PM +0100, Jamie Lokier wrote:
 Michael S. Tsirkin wrote:
  On Wed, Oct 14, 2009 at 09:17:15AM -0500, Anthony Liguori wrote:
   Michael S. Tsirkin wrote:
   Looks like Or has abandoned it.  I have an updated version which works
   with new APIs, etc.  Let me post it and we'll go from there.
  
 
   I'm generally inclined to oppose the functionality as I don't think 
   it  offers any advantages over the existing backends.
   
  
   I patch it in and use it all the time.  It's much easier to setup
   on a random machine than a bridged config.
 
  
   Having two things that do the same thing is just going to lead to user  
   confusion.
  
  They do not do the same thing. With raw socket you can use windows
  update without a bridge in the host, with tap you can't.
 
 On the other hand, with raw socket, guest Windows can't access files
 on the host's Samba share can it?  So it's not that useful even for
 Windows guests.

I guess this depends on whether you use the same host for samba :)

   If the problem is tap is too hard to setup, we should try to  
   simplify tap configuration.
  
  The problem is bridge is too hard to setup.
  Simplifying that is a good idea, but outside the scope
  of the qemu project.
 
 I venture it's important enough for qemu that it's worth working on
 that.  Something that looks like the raw socket but behaves like an
 automatically instantiated bridge attached to the bound interface
 would be a useful interface.

I agree, that would be good to have.

 I don't have much time, but I'll help anybody who wants to do that.
 
 -- Jamie
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Latest -git qemu-kvm doesn't boot an x86 kernel

2009-10-14 Thread Aneesh Kumar K.V
Hi,

I am trying qemu-system-x86_64 on a x86 host running 2.6.30-2 (debian testing)
kernel and trying to boot latest linus git kernel (x86). The kernel hang
after printing the below 

[   4.394392] ACPI: PCI Interrupt Link [LNKC] enabled at IRQ 11
[4.397837] virtio-pci :00:03.0: PCI INT A - Link[LNKC] - GSI 11 
(level, high) - IRQ 11
[4.436489] ACPI: PCI Interrupt Link [LNKD] enabled at IRQ 10
[4.439829] virtio-pci :00:04.0: PCI INT A - Link[LNKD] - GSI 10 
(level, high) - IRQ 10
[4.462538]  vda:
[4.526913] input: ImExPS/2 Generic Explorer Mouse as 
/devices/platform/i8042/serio1/input/input3
[5.349554] async/1 used greatest stack depth: 5872 bytes left


An earlier version of kvm booted fine the new kernel. So the vm disk image user 
space should
all be fine. The older version of kvm that worked fine is 
QEMU PC emulator version 0.10.50 (kvm-devel)


Any patches i need to try ?

-aneesh
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/3] get rid of kvm vcpu structure

2009-10-14 Thread Glauber Costa
Hello,

Done in three parts, the following patches get rid of vcpu structure in 
qemu-kvm.
All state is now held in CPUState, getting us a bit closer to upstream qemu 
again.
The last pass converts us to the use of kvm_vcpu_ioctl, allowing more code to 
be shared.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] change function signatures so that they don't take a vcpu argument

2009-10-14 Thread Glauber Costa
At this point, vcpu arguments are passed only for the fd field.
We already provide that in env, as kvm_fd. Replace it.

Signed-off-by: Glauber Costa glom...@redhat.com
---
 cpu-defs.h |1 -
 hw/apic.c  |4 +-
 kvm-tpr-opt.c  |   16 +-
 qemu-kvm-x86.c |   91 ++--
 qemu-kvm.c |   97 +++
 qemu-kvm.h |   74 ++-
 6 files changed, 134 insertions(+), 149 deletions(-)

diff --git a/cpu-defs.h b/cpu-defs.h
index 1f48267..cf502e9 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -141,7 +141,6 @@ struct qemu_work_item;
 struct KVMCPUState {
 pthread_t thread;
 int signalled;
-void *vcpu_ctx;
 struct qemu_work_item *queued_work_first, *queued_work_last;
 int regs_modified;
 };
diff --git a/hw/apic.c b/hw/apic.c
index b8fe529..9e707bd 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -900,7 +900,7 @@ static void kvm_kernel_lapic_save_to_user(APICState *s)
 struct kvm_lapic_state *kapic = apic;
 int i, v;
 
-kvm_get_lapic(s-cpu_env-kvm_cpu_state.vcpu_ctx, kapic);
+kvm_get_lapic(s-cpu_env, kapic);
 
 s-id = kapic_reg(kapic, 0x2)  24;
 s-tpr = kapic_reg(kapic, 0x8);
@@ -953,7 +953,7 @@ static void kvm_kernel_lapic_load_from_user(APICState *s)
 kapic_set_reg(klapic, 0x38, s-initial_count);
 kapic_set_reg(klapic, 0x3e, s-divide_conf);
 
-kvm_set_lapic(s-cpu_env-kvm_cpu_state.vcpu_ctx, klapic);
+kvm_set_lapic(s-cpu_env, klapic);
 }
 
 #endif
diff --git a/kvm-tpr-opt.c b/kvm-tpr-opt.c
index f7b6f3b..932b49b 100644
--- a/kvm-tpr-opt.c
+++ b/kvm-tpr-opt.c
@@ -70,7 +70,7 @@ static uint8_t read_byte_virt(CPUState *env, target_ulong 
virt)
 {
 struct kvm_sregs sregs;
 
-kvm_get_sregs(env-kvm_cpu_state.vcpu_ctx, sregs);
+kvm_get_sregs(env, sregs);
 return ldub_phys(map_addr(sregs, virt, NULL));
 }
 
@@ -78,7 +78,7 @@ static void write_byte_virt(CPUState *env, target_ulong virt, 
uint8_t b)
 {
 struct kvm_sregs sregs;
 
-kvm_get_sregs(env-kvm_cpu_state.vcpu_ctx, sregs);
+kvm_get_sregs(env, sregs);
 stb_phys(map_addr(sregs, virt, NULL), b);
 }
 
@@ -86,7 +86,7 @@ static __u64 kvm_rsp_read(CPUState *env)
 {
 struct kvm_regs regs;
 
-kvm_get_regs(env-kvm_cpu_state.vcpu_ctx, regs);
+kvm_get_regs(env, regs);
 return regs.rsp;
 }
 
@@ -192,7 +192,7 @@ static int bios_is_mapped(CPUState *env, uint64_t rip)
 if (bios_enabled)
return 1;
 
-kvm_get_sregs(env-kvm_cpu_state.vcpu_ctx, sregs);
+kvm_get_sregs(env, sregs);
 
 probe = (rip  0xf000) + 0xe;
 phys = map_addr(sregs, probe, perms);
@@ -240,7 +240,7 @@ static int enable_vapic(CPUState *env)
 if (pcr_cpu  0)
return 0;
 
-kvm_enable_vapic(env-kvm_cpu_state.vcpu_ctx, vapic_phys + (pcr_cpu  7));
+kvm_enable_vapic(env, vapic_phys + (pcr_cpu  7));
 cpu_physical_memory_rw(vapic_phys + (pcr_cpu  7) + 4, one, 1, 1);
 bios_enabled = 1;
 
@@ -313,7 +313,7 @@ void kvm_tpr_access_report(CPUState *env, uint64_t rip, int 
is_write)
 
 void kvm_tpr_vcpu_start(CPUState *env)
 {
-kvm_enable_tpr_access_reporting(env-kvm_cpu_state.vcpu_ctx);
+kvm_enable_tpr_access_reporting(env);
 if (bios_enabled)
enable_vapic(env);
 }
@@ -363,7 +363,7 @@ static void vtpr_ioport_write(void *opaque, uint32_t addr, 
uint32_t val)
 struct kvm_sregs sregs;
 uint32_t rip;
 
-kvm_get_regs(env-kvm_cpu_state.vcpu_ctx, regs);
+kvm_get_regs(env, regs);
 rip = regs.rip - 2;
 write_byte_virt(env, rip, 0x66);
 write_byte_virt(env, rip + 1, 0x90);
@@ -371,7 +371,7 @@ static void vtpr_ioport_write(void *opaque, uint32_t addr, 
uint32_t val)
return;
 if (!bios_is_mapped(env, rip))
printf(bios not mapped?\n);
-kvm_get_sregs(env-kvm_cpu_state.vcpu_ctx, sregs);
+kvm_get_sregs(env, sregs);
 for (addr = 0xf000u; addr = 0x8000u; addr -= 4096)
if (map_addr(sregs, addr, NULL) == 0xfee0u) {
real_tpr = addr + 0x80;
diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index fffcfd8..8c4140d 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -172,14 +172,14 @@ static int kvm_handle_tpr_access(CPUState *env)
 }
 
 
-int kvm_enable_vapic(kvm_vcpu_context_t vcpu, uint64_t vapic)
+int kvm_enable_vapic(CPUState *env, uint64_t vapic)
 {
int r;
struct kvm_vapic_addr va = {
.vapic_addr = vapic,
};
 
-   r = ioctl(vcpu-fd, KVM_SET_VAPIC_ADDR, va);
+   r = ioctl(env-kvm_fd, KVM_SET_VAPIC_ADDR, va);
if (r == -1) {
r = -errno;
perror(kvm_enable_vapic);
@@ -281,12 +281,12 @@ int kvm_destroy_memory_alias(kvm_context_t kvm, uint64_t 
phys_start)
 
 #ifdef KVM_CAP_IRQCHIP
 
-int kvm_get_lapic(kvm_vcpu_context_t vcpu, struct kvm_lapic_state *s)
+int kvm_get_lapic(CPUState *env, struct kvm_lapic_state *s)
 {
int r;
if 

[PATCH 2/3] get rid of vcpu structure

2009-10-14 Thread Glauber Costa
We have no use for it anymore. Only trace of it was in vcpu_create.
Make it disappear.

Signed-off-by: Glauber Costa glom...@redhat.com
---
 qemu-kvm.c |   11 +++
 qemu-kvm.h |5 -
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/qemu-kvm.c b/qemu-kvm.c
index 700d030..7943281 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -440,16 +440,13 @@ static void kvm_create_vcpu(CPUState *env, int id)
 {
 long mmap_size;
 int r;
-kvm_vcpu_context_t vcpu_ctx = qemu_malloc(sizeof(struct kvm_vcpu_context));
 
 r = kvm_vm_ioctl(kvm_state, KVM_CREATE_VCPU, id);
 if (r  0) {
 fprintf(stderr, kvm_create_vcpu: %m\n);
-goto err;
+return;
 }
 
-vcpu_ctx-fd = r;
-
 env-kvm_fd = r;
 env-kvm_state = kvm_state;
 
@@ -459,7 +456,7 @@ static void kvm_create_vcpu(CPUState *env, int id)
 goto err_fd;
 }
 env-kvm_run =
-mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu_ctx-fd,
+mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, env-kvm_fd,
  0);
 if (env-kvm_run == MAP_FAILED) {
 fprintf(stderr, mmap vcpu area: %m\n);
@@ -468,9 +465,7 @@ static void kvm_create_vcpu(CPUState *env, int id)
 
 return;
   err_fd:
-close(vcpu_ctx-fd);
-  err:
-free(vcpu_ctx);
+close(env-kvm_fd);
 }
 
 static int kvm_set_boot_vcpu_id(kvm_context_t kvm, uint32_t id)
diff --git a/qemu-kvm.h b/qemu-kvm.h
index abcb98d..588bc80 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -76,12 +76,7 @@ struct kvm_context {
 int max_gsi;
 };
 
-struct kvm_vcpu_context {
-int fd;
-};
-
 typedef struct kvm_context *kvm_context_t;
-typedef struct kvm_vcpu_context *kvm_vcpu_context_t;
 
 #include kvm.h
 int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory,
-- 
1.6.2.5

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] use upstream kvm_vcpu_ioctl

2009-10-14 Thread Glauber Costa
Signed-off-by: Glauber Costa glom...@redhat.com
---
 kvm-all.c  |3 ---
 qemu-kvm-x86.c |   20 ++--
 qemu-kvm.c |   26 +-
 qemu-kvm.h |1 +
 4 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/kvm-all.c b/kvm-all.c
index 1356aa8..5ea999e 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -861,7 +861,6 @@ int kvm_vm_ioctl(KVMState *s, int type, ...)
 return ret;
 }
 
-#ifdef KVM_UPSTREAM
 int kvm_vcpu_ioctl(CPUState *env, int type, ...)
 {
 int ret;
@@ -879,8 +878,6 @@ int kvm_vcpu_ioctl(CPUState *env, int type, ...)
 return ret;
 }
 
-#endif
-
 int kvm_has_sync_mmu(void)
 {
 #ifdef KVM_CAP_SYNC_MMU
diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 8c4140d..fd0e6a9 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -179,7 +179,7 @@ int kvm_enable_vapic(CPUState *env, uint64_t vapic)
.vapic_addr = vapic,
};
 
-   r = ioctl(env-kvm_fd, KVM_SET_VAPIC_ADDR, va);
+   r = kvm_vcpu_ioctl(env, KVM_SET_VAPIC_ADDR, va);
if (r == -1) {
r = -errno;
perror(kvm_enable_vapic);
@@ -286,7 +286,7 @@ int kvm_get_lapic(CPUState *env, struct kvm_lapic_state *s)
int r;
if (!kvm_irqchip_in_kernel())
return 0;
-   r = ioctl(env-kvm_fd, KVM_GET_LAPIC, s);
+   r = kvm_vcpu_ioctl(env, KVM_GET_LAPIC, s);
if (r == -1) {
r = -errno;
perror(kvm_get_lapic);
@@ -299,7 +299,7 @@ int kvm_set_lapic(CPUState *env, struct kvm_lapic_state *s)
int r;
if (!kvm_irqchip_in_kernel())
return 0;
-   r = ioctl(env-kvm_fd, KVM_SET_LAPIC, s);
+   r = kvm_vcpu_ioctl(env, KVM_SET_LAPIC, s);
if (r == -1) {
r = -errno;
perror(kvm_set_lapic);
@@ -424,7 +424,7 @@ int kvm_get_msrs(CPUState *env, struct kvm_msr_entry *msrs, 
int n)
 
 kmsrs-nmsrs = n;
 memcpy(kmsrs-entries, msrs, n * sizeof *msrs);
-r = ioctl(env-kvm_fd, KVM_GET_MSRS, kmsrs);
+r = kvm_vcpu_ioctl(env, KVM_GET_MSRS, kmsrs);
 e = errno;
 memcpy(msrs, kmsrs-entries, n * sizeof *msrs);
 free(kmsrs);
@@ -439,7 +439,7 @@ int kvm_set_msrs(CPUState *env, struct kvm_msr_entry *msrs, 
int n)
 
 kmsrs-nmsrs = n;
 memcpy(kmsrs-entries, msrs, n * sizeof *msrs);
-r = ioctl(env-kvm_fd, KVM_SET_MSRS, kmsrs);
+r = kvm_vcpu_ioctl(env, KVM_SET_MSRS, kmsrs);
 e = errno;
 free(kmsrs);
 errno = e;
@@ -464,7 +464,7 @@ int kvm_get_mce_cap_supported(kvm_context_t kvm, uint64_t 
*mce_cap,
 int kvm_setup_mce(CPUState *env, uint64_t *mcg_cap)
 {
 #ifdef KVM_CAP_MCE
-return ioctl(env-kvm_fd, KVM_X86_SETUP_MCE, mcg_cap);
+return kvm_vcpu_ioctl(env, KVM_X86_SETUP_MCE, mcg_cap);
 #else
 return -ENOSYS;
 #endif
@@ -473,7 +473,7 @@ int kvm_setup_mce(CPUState *env, uint64_t *mcg_cap)
 int kvm_set_mce(CPUState *env, struct kvm_x86_mce *m)
 {
 #ifdef KVM_CAP_MCE
-return ioctl(env-kvm_fd, KVM_X86_SET_MCE, m);
+return kvm_vcpu_ioctl(env, KVM_X86_SET_MCE, m);
 #else
 return -ENOSYS;
 #endif
@@ -563,7 +563,7 @@ int kvm_setup_cpuid(CPUState *env, int nent,
 
cpuid-nent = nent;
memcpy(cpuid-entries, entries, nent * sizeof(*entries));
-   r = ioctl(env-kvm_fd, KVM_SET_CPUID, cpuid);
+   r = kvm_vcpu_ioctl(env, KVM_SET_CPUID, cpuid);
 
free(cpuid);
return r;
@@ -579,7 +579,7 @@ int kvm_setup_cpuid2(CPUState *env, int nent,
 
cpuid-nent = nent;
memcpy(cpuid-entries, entries, nent * sizeof(*entries));
-   r = ioctl(env-kvm_fd, KVM_SET_CPUID2, cpuid);
+   r = kvm_vcpu_ioctl(env, KVM_SET_CPUID2, cpuid);
if (r == -1) {
fprintf(stderr, kvm_setup_cpuid2: %m\n);
r = -errno;
@@ -634,7 +634,7 @@ static int tpr_access_reporting(CPUState *env, int enabled)
r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_VAPIC);
if (r = 0)
return -ENOSYS;
-   r = ioctl(env-kvm_fd, KVM_TPR_ACCESS_REPORTING, tac);
+   r = kvm_vcpu_ioctl(env, KVM_TPR_ACCESS_REPORTING, tac);
if (r == -1) {
r = -errno;
perror(KVM_TPR_ACCESS_REPORTING);
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 7943281..5284426 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -816,32 +816,32 @@ static int handle_debug(CPUState *env)
 
 int kvm_get_regs(CPUState *env, struct kvm_regs *regs)
 {
-return ioctl(env-kvm_fd, KVM_GET_REGS, regs);
+return kvm_vcpu_ioctl(env, KVM_GET_REGS, regs);
 }
 
 int kvm_set_regs(CPUState *env, struct kvm_regs *regs)
 {
-return ioctl(env-kvm_fd, KVM_SET_REGS, regs);
+return kvm_vcpu_ioctl(env, KVM_SET_REGS, regs);
 }
 
 int kvm_get_fpu(CPUState *env, struct kvm_fpu *fpu)
 {
-return ioctl(env-kvm_fd, KVM_GET_FPU, fpu);
+return kvm_vcpu_ioctl(env, KVM_GET_FPU, fpu);
 }
 
 int kvm_set_fpu(CPUState *env, struct kvm_fpu *fpu)
 {
-return ioctl(env-kvm_fd, KVM_SET_FPU, fpu);
+return 

Re: [PATCH] virtio-blk: fallback to draining the queue if barrier ops are not supported

2009-10-14 Thread Avi Kivity

On 10/14/2009 11:46 PM, Javier Guerra wrote:

On Wed, Oct 14, 2009 at 7:03 AM, Avi Kivitya...@redhat.com  wrote:
   

Early implementations of virtio devices did not support barrier operations,
but did commit the data to disk.  In such cases, drain the queue to emulate
barrier operations.
 

would this help on the (i think common) situation with XFS on a
virtio-enabled VM, using LVM-backed storage; where LVM just loses
barriers.
   


No, it's a guest only patch.  If LVM loses barriers, I don't think 
anything can restore them.


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: sync guest calls made async on host - SQLite performance

2009-10-14 Thread Avi Kivity

On 10/14/2009 10:41 PM, Christoph Hellwig wrote:

But can't this be also implemented using QUEUE_ORDERED_DRAIN, and on the
host side disabling the backing device write cache?  I'm talking about
cache=none, primarily.
 

Yes, it could.  But as I found out in a long discussion with Stephen
it's not actually nessecary.  All filesystems do the right thing for
a device not claiming to support barriers if it doesn't include write
caches, that is implement ordering internally.  So there is no urge to
set QUEUE_ORDERED_DRAIN for the case without write cache.
   


Does virtio say it has a write cache or not (and how does one say it?)?

According to the report, a write+fdatasync completes too fast, at least 
on Ubuntu's qemu.  So perhaps somewhere this information is lost.


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: sync guest calls made async on host - SQLite performance

2009-10-14 Thread Christoph Hellwig
On Thu, Oct 15, 2009 at 01:56:40AM +0900, Avi Kivity wrote:
 Does virtio say it has a write cache or not (and how does one say it?)?

Historically it didn't and the only safe way to use virtio was in
cache=writethrough mode.  Since qemu git as of 4th Sempember and Linux
2.6.32-rc there is a virtio-blk feature to communicate the existance
of a volatile write cache, and the support for a cache flush command.
With the combination of these two data=writeback and data=none modes
are safe for the first time.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: kernel bug in kvm_intel

2009-10-14 Thread Avi Kivity

On 10/13/2009 11:04 PM, Andrew Theurer wrote:



Look at the address where vmx_vcpu_run starts, add 0x26d, and show the
surrounding code.

Thinking about it, it probably _is_ what you showed, due to module page
alignment.  But please verify this; I can't reconcile the fault address
(9fe9a2b) with %rsp at the time of the fault.
 

Here is the start of the function:

   

3884vmx_vcpu_run:
 3884:   55  push   %rbp
 3885:   48 89 e5mov%rsp,%rbp
 

and 0x26d later is 0x3af1:

   

 3ad2:   4c 8b b1 88 01 00 00mov0x188(%rcx),%r14
 3ad9:   4c 8b b9 90 01 00 00mov0x190(%rcx),%r15
 3ae0:   48 8b 89 20 01 00 00mov0x120(%rcx),%rcx
 3ae7:   75 05   jne3aeevmx_vcpu_run+0x26a
 3ae9:   0f 01 c2vmlaunch
 3aec:   eb 03   jmp3af1vmx_vcpu_run+0x26d
 3aee:   0f 01 c3vmresume
 3af1:   48 87 0c 24 xchg   %rcx,(%rsp)
 3af5:   48 89 81 18 01 00 00mov%rax,0x118(%rcx)
 3afc:   48 89 99 30 01 00 00mov%rbx,0x130(%rcx)
 3b03:   ff 34 24pushq  (%rsp)
 3b06:   8f 81 20 01 00 00   popq   0x120(%rcx)
 




Ok.  So it faults on the xchg instruction, rsp is 8806369ffc80 but 
the fault address is 9fe9a2b4.  So it looks like the IDT is 
corrupted.


Can you check what's around 9fe9a2b4 in System.map?

--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [STABLE PATCH] hotplug: fix scsi hotplug.

2009-10-14 Thread Dustin Kirkland
On Wed, Oct 14, 2009 at 8:30 AM, Gerd Hoffmann kra...@redhat.com wrote:
 Well, partly just papering over the issues.  But without proper scsi bus
 infrastructure we hardly can do better.  Changes:

  * Avoid auto-attach by setting the bus number to -1.
  * Ignore the unit value calculated by drive_init().
  * Explicitly attach the devices to the adapter.
  * Add sanity checks.  Don't allow attaching scsi drives to your
   network device.
  * Kill the bus+unit printing.  The values are bogus, and we can't
   easily figure the correct ones.  I doubt this ever worked correctly
   with multiple scsi adapters present in the system.

 Should come more close to the expected behavior now ...

 Oh, and pc-bios/bios.bin needs a update too, otherwise pci hotplug
 doesn't work at all.

 Signed-off-by: Gerd Hoffmann kra...@redhat.com
 ---
  hw/pci-hotplug.c |   24 +++-
  pc-bios/bios.bin |  Bin 131072 - 131072 bytes
  2 files changed, 19 insertions(+), 5 deletions(-)

 diff --git a/hw/pci-hotplug.c b/hw/pci-hotplug.c
 index d0f2911..8bedea2 100644
 --- a/hw/pci-hotplug.c
 +++ b/hw/pci-hotplug.c
 @@ -52,9 +52,10 @@ void drive_hot_add(Monitor *mon, const char *pci_addr, 
 const char *opts)
  {
     int dom, pci_bus;
     unsigned slot;
 -    int drive_idx, type, bus;
 +    int drive_idx, type;
     int success = 0;
     PCIDevice *dev;
 +    char buf[128];

     if (pci_read_devaddr(mon, pci_addr, dom, pci_bus, slot)) {
         return;
 @@ -74,11 +75,19 @@ void drive_hot_add(Monitor *mon, const char *pci_addr, 
 const char *opts)
         return;
     }
     type = drives_table[drive_idx].type;
 -    bus = drive_get_max_bus (type);

     switch (type) {
     case IF_SCSI:
 +        if (!dev-qdev.info || strcmp(dev-qdev.info-name, lsi53c895a) != 
 0) {
 +            monitor_printf(mon, Device is not a scsi adapter\n);
 +            break;
 +        }
         success = 1;
 +        drives_table[drive_idx].bus = -1;
 +        drives_table[drive_idx].unit = -1;
 +        if (get_param_value(buf, sizeof(buf), unit, opts)) {
 +            drives_table[drive_idx].unit = atoi(buf);
 +        }
         lsi_scsi_attach(dev-qdev, drives_table[drive_idx].bdrv,
                         drives_table[drive_idx].unit);
         break;
 @@ -87,9 +96,7 @@ void drive_hot_add(Monitor *mon, const char *pci_addr, 
 const char *opts)
     }

     if (success)
 -        monitor_printf(mon, OK bus %d, unit %d\n,
 -                       drives_table[drive_idx].bus,
 -                       drives_table[drive_idx].unit);
 +        monitor_printf(mon, OK\n);
     return;
  }

 @@ -130,7 +137,14 @@ static PCIDevice *qemu_pci_hot_add_storage(Monitor *mon,

     switch (type) {
     case IF_SCSI:
 +        drives_table[drive_idx].bus = -1;
 +        drives_table[drive_idx].unit = -1;
 +        if (get_param_value(buf, sizeof(buf), unit, opts)) {
 +            drives_table[drive_idx].unit = atoi(buf);
 +        }
         dev = pci_create(lsi53c895a, devaddr);
 +        lsi_scsi_attach(dev-qdev, drives_table[drive_idx].bdrv,
 +                        drives_table[drive_idx].unit);
         break;
     case IF_VIRTIO:
         dev = pci_create(virtio-blk-pci, devaddr);

Thanks, Gerd.

I applied this patch against qemu-kvm-0.11.0 stable, built, and tested
it.  I can verify that it fixes the scsi hot-add issues I was seeing.
I am now able to add/remove/add/remove/add/remove a scsi disk to a
running instance without segfaulting qemu.

Note that on remove, I do get a stack track in the guest's kernel
(2.6.31), though the remove does succeed, and the disk disappears.

Also note that I did not replace the bios.bin, as it appears to me
that the qemu-kvm-0.11 bios.bin is working properly.

Tested-by: Dustin Kirkland kirkl...@canonical.com
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: fix MSR_COUNT for kvm_arch_save_regs()

2009-10-14 Thread Eduardo Habkost

A new register was added to the load/save list on commit
d283d5a65a2bdcc570065267be21848bd6fe3d78, but MSR_COUNT was not updated, leading
to potential stack corruption on kvm_arch_save_regs().

The following registers are saved by kvm_arch_save_regs():

 1) MSR_IA32_SYSENTER_CS
 2) MSR_IA32_SYSENTER_ESP
 3) MSR_IA32_SYSENTER_EIP
 4) MSR_STAR
 5) MSR_IA32_TSC
 6) MSR_VM_HSAVE_PA
 7) MSR_CSTAR (x86_64 only)
 8) MSR_KERNELGSBASE (x86_64 only)
 9) MSR_FMASK (x86_64 only)
10) MSR_LSTAR (x86_64 only)

Signed-off-by: Eduardo Habkost ehabk...@redhat.com
---
 qemu-kvm-x86.c |6 --
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index acb1b91..81d2c53 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -847,9 +847,9 @@ static int get_msr_entry(struct kvm_msr_entry *entry, 
CPUState *env)
 }
 
 #ifdef TARGET_X86_64
-#define MSR_COUNT 9
+#define MSR_COUNT 10
 #else
-#define MSR_COUNT 5
+#define MSR_COUNT 6
 #endif
 
 static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs)
@@ -991,6 +991,7 @@ void kvm_arch_load_regs(CPUState *env)
 
 /* msrs */
 n = 0;
+/* Remember to increase MSR_COUNT if you add new registers below */
 set_msr_entry(msrs[n++], MSR_IA32_SYSENTER_CS,  env-sysenter_cs);
 set_msr_entry(msrs[n++], MSR_IA32_SYSENTER_ESP, env-sysenter_esp);
 set_msr_entry(msrs[n++], MSR_IA32_SYSENTER_EIP, env-sysenter_eip);
@@ -1168,6 +1169,7 @@ void kvm_arch_save_regs(CPUState *env)
 
 /* msrs */
 n = 0;
+/* Remember to increase MSR_COUNT if you add new registers below */
 msrs[n++].index = MSR_IA32_SYSENTER_CS;
 msrs[n++].index = MSR_IA32_SYSENTER_ESP;
 msrs[n++].index = MSR_IA32_SYSENTER_EIP;
-- 
1.6.3.rc4.29.g8146

-- 
Eduardo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCHv2 1/2] Complete cpu initialization before signaling main thread.

2009-10-14 Thread Marcelo Tosatti
On Wed, Oct 14, 2009 at 03:52:31PM +0200, Gleb Natapov wrote:
 Otherwise some cpus may start executing code before others
 are fully initialized.
 
 Signed-off-by: Gleb Natapov g...@redhat.com

Applied both, thanks.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] v4: allow userspace to adjust kvmclock offset

2009-10-14 Thread Marcelo Tosatti
On Wed, Oct 14, 2009 at 10:47:46AM -0400, Glauber Costa wrote:
 When we migrate a kvm guest that uses pvclock between two hosts, we may
 suffer a large skew. This is because there can be significant differences
 between the monotonic clock of the hosts involved. When a new host with
 a much larger monotonic time starts running the guest, the view of time
 will be significantly impacted.
 
 Situation is much worse when we do the opposite, and migrate to a host with
 a smaller monotonic clock.
 
 This proposed ioctl will allow userspace to inform us what is the monotonic
 clock value in the source host, so we can keep the time skew short, and
 more importantly, never goes backwards. Userspace may also need to trigger
 the current data, since from the first migration onwards, it won't be
 reflected by a simple call to clock_gettime() anymore.
 
 [ v2: uses a struct with a padding ]
 [ v3: provide an ioctl to get clock data too ]
 [ v4: used fixed-width signed type for delta ]
 
 Signed-off-by: Glauber Costa glom...@redhat.com
 ---
  arch/x86/include/asm/kvm_host.h |1 +
  arch/x86/kvm/x86.c  |   35 ++-
  include/linux/kvm.h |7 +++
  3 files changed, 42 insertions(+), 1 deletions(-)
 
 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
 index 179a919..c9b0d9f 100644
 --- a/arch/x86/include/asm/kvm_host.h
 +++ b/arch/x86/include/asm/kvm_host.h
 @@ -410,6 +410,7 @@ struct kvm_arch{
  
   unsigned long irq_sources_bitmap;
   u64 vm_init_tsc;
 + s64 kvmclock_offset;
  };
  
  struct kvm_vm_stat {
 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
 index 9601bc6..09f31e2 100644
 --- a/arch/x86/kvm/x86.c
 +++ b/arch/x86/kvm/x86.c
 @@ -699,7 +699,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
   /* With all the info we got, fill in the values */
  
   vcpu-hv_clock.system_time = ts.tv_nsec +
 -  (NSEC_PER_SEC * (u64)ts.tv_sec);
 +  (NSEC_PER_SEC * (u64)ts.tv_sec) + 
 v-kvm-arch.kvmclock_offset;
 +
   /*
* The interface expects us to write an even number signaling that the
* update is finished. Since the guest won't see the intermediate
 @@ -2441,6 +2442,38 @@ long kvm_arch_vm_ioctl(struct file *filp,
   r = 0;
   break;
   }
 + case KVM_SET_CLOCK: {
 + struct timespec now;
 + struct kvm_clock_data user_ns;
 + u64 now_ns;
 + s64 delta;
 +
 + r =  -EFAULT;

Extra space :)

  #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct 
 kvm_pit_config)
  #define KVM_SET_BOOT_CPU_ID_IO(KVMIO, 0x78)
  #define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
 +#define KVM_SET_CLOCK  _IOW(KVMIO, 0x7a, struct 
 kvm_clock_data)
 +#define KVM_GET_CLOCK  _IOW(KVMIO, 0x7b, struct 
 kvm_clock_data)
  _IOR

Otherwise looks fine, please send the userspace changes together.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Added VM Exit on RDTSC, trouble handling in userspace

2009-10-14 Thread Marcelo Tosatti
On Tue, Oct 13, 2009 at 10:51:48PM -0700, Kurt Kiefer wrote:
 -BEGIN PGP SIGNED MESSAGE-
 Hash: SHA1

 Hi all,

 In short, I have a need for trapping RDTSC with a VM Exit and this  
 works, but I'm having trouble handling it in userspace. I have added the 
 hooks I need (I only care about VMX right now), but a piece of the  
 puzzle is missing and I don't know which. When I go back to userspace,  
 it's triggering a different (faulty) execution vs. handling only in the 
 kernel. Here's what I've done:


 1. Added the CPU_BASED_RDTSC_EXITING flag to MSR_IA32_VMX_PROCBASED_CTLS 
 in vmx.c:setup_vmcs_config()


 2. Defined KVM_EXIT_RDTSC, and hooked into EXIT_REASON_RDTSC my handler 
 for the exit:

 static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
 struct kvm_run *kvm_run) = {
 // ...
   [EXIT_REASON_RDTSC]   = handle_rdtsc,
 // ...
 }

 static int handle_rdtsc(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
   u64 data;

   if (vmx_get_msr(vcpu, MSR_IA32_TIME_STAMP_COUNTER, data)) {
 kvm_inject_gp(vcpu, 0);
 return 1;
   }

   vcpu-run-exit_reason = KVM_EXIT_RDTSC;
   vcpu-arch.regs[VCPU_REGS_RAX] = data  -1u;
   vcpu-arch.regs[VCPU_REGS_RDX] = (data  32)  -1u;

   skip_emulated_instruction(vcpu);

   // flag a need for userspace invervention
   // note: this works when we return 1 and we don't involve userspace
   return 0;
 }


 3. Handle KVM_EXIT_RDTSC in libkvm.c:kvm_run() :

 case KVM_EXIT_RDTSC:
   r = handle_rdtsc_usp(kvm, vcpu, env);
   break;

 via a handler where I do _nothing_ :

 static int handle_rdtsc_usp(kvm_context_t kvm, int vcpu, void *data)
 {
   return 0;
 }



 All well and good, right? I can add print statements to my userspace  
 handle_rtsc_usp() and see I get in there just fine. However, when I try 
 to boot Linux, the following code is called over and over and over, and 
 Linux will never load:

 Breakpoint 4, 0xc01103d3 in ?? ()
 (gdb) x/10i $rip-10
 0xc01103c9:   lea0x0(%rdi,%riz,1),%edi
 0xc01103d0:   push   %rbp
 0xc01103d1:   mov%esp,%ebp
 0xc01103d3:   rdtsc
 0xc01103d5:   pop%rbp
 0xc01103d6:   retq

 If I only handle the exit in the kernel (by returning 1 from  
 handle_rdtsc()), everything works and Linux will load! I counted the  
 number of RDTSC exits before linux fully loads to be somewhere around  
 20. If I exit all the way to userspace (return 0 in my  
 handle_rdtsc()) that count is infinitely surpassed in number of exits,  
 wall time, and the value of RDTSC.

 So is anything glaringly wrong with my modifications? Maybe there is  
 there some extra state that needs to be restored on VM entry? Is there  
 an interrupt flag that needs to be cleared? Maybe I need to do something 
 with kvm_run.if_flag or kvm_run.ready_for_interrupt_injection? Please, I 
 need help, I'm losing sleep over this!

Can't see anything wrong. Perhaps the userspace exit breaks a latency
assumption of that algorithm (can you translate it to function names?).

Try the tsc.flat test (from qemu-kvm.git):

x86_64-softmmu/qemu-system-x86_64 -chardev file,path=/tmp/log.txt,id=testlog \
-device testdev,chardev=testlog -kernel kvm/user/test/x86/tsc.flat

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] v4: allow userspace to adjust kvmclock offset

2009-10-14 Thread Glauber Costa
On Wed, Oct 14, 2009 at 03:53:27PM -0300, Marcelo Tosatti wrote:
 On Wed, Oct 14, 2009 at 10:47:46AM -0400, Glauber Costa wrote:
  When we migrate a kvm guest that uses pvclock between two hosts, we may
  suffer a large skew. This is because there can be significant differences
  between the monotonic clock of the hosts involved. When a new host with
  a much larger monotonic time starts running the guest, the view of time
  will be significantly impacted.
  
  Situation is much worse when we do the opposite, and migrate to a host with
  a smaller monotonic clock.
  
  This proposed ioctl will allow userspace to inform us what is the monotonic
  clock value in the source host, so we can keep the time skew short, and
  more importantly, never goes backwards. Userspace may also need to trigger
  the current data, since from the first migration onwards, it won't be
  reflected by a simple call to clock_gettime() anymore.
  
  [ v2: uses a struct with a padding ]
  [ v3: provide an ioctl to get clock data too ]
  [ v4: used fixed-width signed type for delta ]
  
  Signed-off-by: Glauber Costa glom...@redhat.com
  ---
   arch/x86/include/asm/kvm_host.h |1 +
   arch/x86/kvm/x86.c  |   35 ++-
   include/linux/kvm.h |7 +++
   3 files changed, 42 insertions(+), 1 deletions(-)
  
  diff --git a/arch/x86/include/asm/kvm_host.h 
  b/arch/x86/include/asm/kvm_host.h
  index 179a919..c9b0d9f 100644
  --- a/arch/x86/include/asm/kvm_host.h
  +++ b/arch/x86/include/asm/kvm_host.h
  @@ -410,6 +410,7 @@ struct kvm_arch{
   
  unsigned long irq_sources_bitmap;
  u64 vm_init_tsc;
  +   s64 kvmclock_offset;
   };
   
   struct kvm_vm_stat {
  diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
  index 9601bc6..09f31e2 100644
  --- a/arch/x86/kvm/x86.c
  +++ b/arch/x86/kvm/x86.c
  @@ -699,7 +699,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
  /* With all the info we got, fill in the values */
   
  vcpu-hv_clock.system_time = ts.tv_nsec +
  -(NSEC_PER_SEC * (u64)ts.tv_sec);
  +(NSEC_PER_SEC * (u64)ts.tv_sec) + 
  v-kvm-arch.kvmclock_offset;
  +
  /*
   * The interface expects us to write an even number signaling that the
   * update is finished. Since the guest won't see the intermediate
  @@ -2441,6 +2442,38 @@ long kvm_arch_vm_ioctl(struct file *filp,
  r = 0;
  break;
  }
  +   case KVM_SET_CLOCK: {
  +   struct timespec now;
  +   struct kvm_clock_data user_ns;
  +   u64 now_ns;
  +   s64 delta;
  +
  +   r =  -EFAULT;
 
 Extra space :)
want me to send a new because of that?

 
   #define KVM_CREATE_PIT2   _IOW(KVMIO, 0x77, struct 
  kvm_pit_config)
   #define KVM_SET_BOOT_CPU_ID_IO(KVMIO, 0x78)
   #define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
  +#define KVM_SET_CLOCK_IOW(KVMIO, 0x7a, struct 
  kvm_clock_data)
  +#define KVM_GET_CLOCK_IOW(KVMIO, 0x7b, struct 
  kvm_clock_data)
 _IOR
 
 Otherwise looks fine, please send the userspace changes together.
Note that this changed quite a while in the process already. It only makes 
sense to implement
userspace once this is commited, IMHO.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH][RFC] Xen PV-on-HVM guest support

2009-10-14 Thread Ed Swierk
Thanks for the feedback; I'll post a new version shortly.

On Tue, Oct 13, 2009 at 11:45 PM, Jan Kiszka jan.kis...@web.de wrote:
 Interesting stuff. How usable is your work at this point? I've no
 immediate demand, but the question if one could integrate Xen guests
 with KVM already popped up more than once @work.

So far I've managed to boot CentOS 5.3 (both i386 and x86_64) and use
the Xen PV block and net devices, with pretty good performance. I've
also booted FreeBSD 8.0-RC1 (amd64 only) with a XENHVM kernel and used
the Xen PV block and net devices, but the performance of the net
device is significantly worse than with CentOS. Also some FreeBSD
applications use a flag that's not yet implemented in the net device
emulation, but I'm working on fixing that.

Overall it seems pretty solid for Linux PV-on-HVM guests. I think more
work is needed to support full PV guests, but I don't know how much.
Have folks been asking about PV-on-HVM or full PV?

--Ed
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Latest -git qemu-kvm doesn't boot an x86 kernel

2009-10-14 Thread Marcelo Tosatti
On Wed, Oct 14, 2009 at 09:23:43PM +0530, Aneesh Kumar K.V wrote:
 Hi,
 
 I am trying qemu-system-x86_64 on a x86 host running 2.6.30-2 (debian testing)
 kernel and trying to boot latest linus git kernel (x86). The kernel hang
 after printing the below 
 
 [   4.394392] ACPI: PCI Interrupt Link [LNKC] enabled at IRQ 11
 [4.397837] virtio-pci :00:03.0: PCI INT A - Link[LNKC] - GSI 11 
 (level, high) - IRQ 11
 [4.436489] ACPI: PCI Interrupt Link [LNKD] enabled at IRQ 10
 [4.439829] virtio-pci :00:04.0: PCI INT A - Link[LNKD] - GSI 10 
 (level, high) - IRQ 10
 [4.462538]  vda:
 [4.526913] input: ImExPS/2 Generic Explorer Mouse as 
 /devices/platform/i8042/serio1/input/input3
 [5.349554] async/1 used greatest stack depth: 5872 bytes left
 
 
 An earlier version of kvm booted fine the new kernel. So the vm disk image 
 user space should
 all be fine. The older version of kvm that worked fine is 
 QEMU PC emulator version 0.10.50 (kvm-devel)
 
 
 Any patches i need to try ?

Please try qemu-kvm.git (should be fixed by commit
1536fc28ae1954e2990c3ee14b4a92624ecfcb68).

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] Re: Release plan for 0.12.0

2009-10-14 Thread Sridhar Samudrala
On Wed, 2009-10-14 at 17:50 +0200, Michael S. Tsirkin wrote:
 On Wed, Oct 14, 2009 at 04:19:17PM +0100, Jamie Lokier wrote:
  Michael S. Tsirkin wrote:
   On Wed, Oct 14, 2009 at 09:17:15AM -0500, Anthony Liguori wrote:
Michael S. Tsirkin wrote:
Looks like Or has abandoned it.  I have an updated version which works
with new APIs, etc.  Let me post it and we'll go from there.
   
  
I'm generally inclined to oppose the functionality as I don't think 
it  offers any advantages over the existing backends.

   
I patch it in and use it all the time.  It's much easier to setup
on a random machine than a bridged config.
  
   
Having two things that do the same thing is just going to lead to user  
confusion.
   
   They do not do the same thing. With raw socket you can use windows
   update without a bridge in the host, with tap you can't.
  
  On the other hand, with raw socket, guest Windows can't access files
  on the host's Samba share can it?  So it's not that useful even for
  Windows guests.
 
 I guess this depends on whether you use the same host for samba :)
 
If the problem is tap is too hard to setup, we should try to  
simplify tap configuration.
   
   The problem is bridge is too hard to setup.
   Simplifying that is a good idea, but outside the scope
   of the qemu project.
  
  I venture it's important enough for qemu that it's worth working on
  that.  Something that looks like the raw socket but behaves like an
  automatically instantiated bridge attached to the bound interface
  would be a useful interface.
 
 I agree, that would be good to have.

Can't we bind the raw socket to the tap interface instead of the
physical interface and allow the bridge config to work.

Thanks
Sridhar


 
  I don't have much time, but I'll help anybody who wants to do that.
  
  -- Jamie
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Add a qemu interface for sharing memory between guests.

2009-10-14 Thread Cam Macdonell
On Mon, Oct 12, 2009 at 2:55 AM, Avi Kivity a...@redhat.com wrote:

 On 10/12/2009 08:53 AM, Sivaram Kannan wrote:

 Hi all,

 I am a KVM newbie and I picked up the following task from the TODO of the 
 KVM wiki.

 Add a qemu interface for sharing memory between guests. Using a pci device 
 to expose the shared memory is probably a good starting point. (this should 
 use virtio and probably depends on mmu-notifiers)

 Is the task still relevant? Can I some one give some pointer for me to start 
 with.



 Cam did a lot of work on this, perhaps he can provide a pointer.

 --
 error compiling committee.c: too many arguments to function

Hi Sivaram,

Here are the two patches for KVM describing what I have done.  I am
continuing to work on it and still mulling a move to virtio.  These
don't apply against the current tree, but I can provide those patches
if you would like to see them.

http://patchwork.kernel.org/patch/38355/

http://patchwork.kernel.org/patch/38347/

We're you interested in using the shared memory for something in
particular or were you just looking for a to-do task to pick up?

Let me know if you have any questions,

Cheers,
Cam
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Raw vs. tap (was: Re: [Qemu-devel] Re: Release plan for 0.12.0)

2009-10-14 Thread Anthony Liguori

Sridhar Samudrala wrote:

Can't we bind the raw socket to the tap interface instead of the
physical interface and allow the bridge config to work.
  


But why use the raw interface instead of tap directly.

Let me summarize the discussion so far:

Raw sockets
Pros:
o User specifies a network interface to bind to
o External traffic Just Works, guest-to-guest traffic Just Works

Cons:
o Requires root (cannot chmod)
o Guest-host traffic does not work
o No support for GSO/checksum offload

Some things that I'm not sure will work or not:
o guest with a bridge (sending traffic with multiple mac addresses)
o guest trying to enter promiscuous mode

Tap
Pros:
o All types of networking works when configured
o Supports non-root users via tunctl
o Supports GSO/checksum offload

Cons:
o Requires configuring a bridge which can be difficult for some users

Since I don't see any clear features in raw sockets that aren't present 
in tap, the argument really boils down to two things.  First, we should 
take any feature in qemu and let the user decide whether or not they 
want to use it.  I strongly feel this is a bad philosophy that will lead 
to increased user confusion and a poor user experience.


Second, even though raw looses performance and requires root, since it 
requires no external configuration it is easier to use and therefore 
should be an option for users.  I dislike this argument because it 
tricks a user into thinking that raw is a viable replacement for tap.  
It certainly isn't performance wise but most importantly, it isn't from 
a functional perspective.  I would be much more inclined to consider 
taking raw and improving the performance long term if guest-host 
networking worked.  This appears to be a fundamental limitation though 
and I think it's something that will forever plague users if we include 
this feature.


So at this point, I think it's a mistake to include raw socket support.  
If the goal is to improve networking usability such that it just works 
as a root user, let's incorporate a default network script that creates 
a bridge or something like that.  There are better ways to achieve that 
goal.


Regards,

Anthony Liguori
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: sync guest calls made async on host - SQLite performance

2009-10-14 Thread Anthony Liguori

Christoph Hellwig wrote:

On Thu, Oct 15, 2009 at 01:56:40AM +0900, Avi Kivity wrote:
  

Does virtio say it has a write cache or not (and how does one say it?)?



Historically it didn't and the only safe way to use virtio was in
cache=writethrough mode.


Which should be the default on Ubuntu's kvm that this report is 
concerned with so I'm a bit confused.


Avi's patch is a performance optimization, not a correctness issue?

Regards,

Anthony Liguori
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] kvm: Prevent kvm_init from corrupting debugfs structures

2009-10-14 Thread Darrick J. Wong
I'm seeing an oops condition when kvm-intel and kvm-amd are modprobe'd
during boot (say on an Intel system) and then rmmod'd:

   # modprobe kvm-intel
 kvm_init()
 kvm_init_debug()
 kvm_arch_init()  -- stores debugfs dentries internally
 (success, etc)

   # modprobe kvm-amd
 kvm_init()
 kvm_init_debug() -- second initialization clobbers kvm's
  internal pointers to dentries
 kvm_arch_init()
 kvm_exit_debug() -- and frees them

   # rmmod kvm-intel
 kvm_exit()
 kvm_exit_debug() -- double free of debugfs files!

 *BOOM*

If execution gets to the end of kvm_init(), then the calling module has been
established as the kvm provider.  Move the debugfs initialization to the end of
the function, and remove the now-unnecessary call to kvm_exit_debug() from the
error path.  That way we avoid trampling on the debugfs entries and freeing
them twice.

Signed-off-by: Darrick J. Wong djw...@us.ibm.com
---

 virt/kvm/kvm_main.c |7 +++
 1 files changed, 3 insertions(+), 4 deletions(-)


diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b7c78a4..7495ce3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2717,8 +2717,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
int r;
int cpu;
 
-   kvm_init_debug();
-
r = kvm_arch_init(opaque);
if (r)
goto out_fail;
@@ -2785,6 +2783,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
kvm_preempt_ops.sched_in = kvm_sched_in;
kvm_preempt_ops.sched_out = kvm_sched_out;
 
+   kvm_init_debug();
+
return 0;
 
 out_free:
@@ -2807,7 +2807,6 @@ out_free_0:
 out:
kvm_arch_exit();
 out_fail:
-   kvm_exit_debug();
return r;
 }
 EXPORT_SYMBOL_GPL(kvm_init);
@@ -2815,6 +2814,7 @@ EXPORT_SYMBOL_GPL(kvm_init);
 void kvm_exit(void)
 {
tracepoint_synchronize_unregister();
+   kvm_exit_debug();
misc_deregister(kvm_dev);
kmem_cache_destroy(kvm_vcpu_cache);
sysdev_unregister(kvm_sysdev);
@@ -2824,7 +2824,6 @@ void kvm_exit(void)
on_each_cpu(hardware_disable, NULL, 1);
kvm_arch_hardware_unsetup();
kvm_arch_exit();
-   kvm_exit_debug();
free_cpumask_var(cpus_hardware_enabled);
__free_page(bad_page);
 }
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: sync guest calls made async on host - SQLite performance

2009-10-14 Thread Avi Kivity

On 10/15/2009 07:54 AM, Anthony Liguori wrote:

Christoph Hellwig wrote:

On Thu, Oct 15, 2009 at 01:56:40AM +0900, Avi Kivity wrote:

Does virtio say it has a write cache or not (and how does one say it?)?


Historically it didn't and the only safe way to use virtio was in
cache=writethrough mode.


It didn't say?  So it's up to the default, which is what?



Which should be the default on Ubuntu's kvm that this report is 
concerned with so I'm a bit confused.


Avi's patch is a performance optimization, not a correctness issue?


If filesystems do drain by default, it should be a no-op on 
cache!=writeback.


However if lseek(0); write(1); fdatasync(); are faster than disk speed, 
then something in our assumptions has to be wrong.


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] allow userspace to adjust kvmclock offset

2009-10-14 Thread Avi Kivity

On 10/13/2009 09:46 PM, Glauber Costa wrote:

On Tue, Oct 13, 2009 at 03:31:08PM +0300, Avi Kivity wrote:
   

On 10/13/2009 03:28 PM, Glauber Costa wrote:
 
   

Do we want an absolute or relative adjustment?

 

What exactly do you mean?

   

Absolute adjustment: clock = t
Relative adjustment: clock += t
 

The delta is absolute, but the adjustment in the clock is relative.

So we pick the difference between what userspace is passing us and what
we currently have, then relatively adds up so we can make sure we won't
go back or suffer a too big skew.
   


The motivation for relative adjustment is when you have a jitter 
resistant place to gather timing information (like the kernel, which can 
disable interrupts and preemption), then pass it on to kvm without 
losing information due to scheduling.  For migration there is no such 
place since it involves two hosts, but it makes sense to support 
relative adjustments.


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


buildbot failure in qemu-kvm on default_i386_out_of_tree

2009-10-14 Thread qemu-kvm
The Buildbot has detected a new failure of default_i386_out_of_tree on qemu-kvm.
Full details are available at:
 
http://buildbot.b1-systems.de/qemu-kvm/builders/default_i386_out_of_tree/builds/51

Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/

Buildslave for this Build: b1_qemu_kvm_2

Build Reason: The Nightly scheduler named 'nightly_default' triggered this build
Build Source Stamp: [branch master] HEAD
Blamelist: 

BUILD FAILED: failed compile

sincerely,
 -The Buildbot

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


buildbot failure in qemu-kvm on default_i386_debian_5_0

2009-10-14 Thread qemu-kvm
The Buildbot has detected a new failure of default_i386_debian_5_0 on qemu-kvm.
Full details are available at:
 
http://buildbot.b1-systems.de/qemu-kvm/builders/default_i386_debian_5_0/builds/114

Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/

Buildslave for this Build: b1_qemu_kvm_2

Build Reason: The Nightly scheduler named 'nightly_default' triggered this build
Build Source Stamp: [branch master] HEAD
Blamelist: 

BUILD FAILED: failed compile

sincerely,
 -The Buildbot

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


buildbot failure in qemu-kvm on default_x86_64_debian_5_0

2009-10-14 Thread qemu-kvm
The Buildbot has detected a new failure of default_x86_64_debian_5_0 on 
qemu-kvm.
Full details are available at:
 
http://buildbot.b1-systems.de/qemu-kvm/builders/default_x86_64_debian_5_0/builds/112

Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/

Buildslave for this Build: b1_qemu_kvm_1

Build Reason: The Nightly scheduler named 'nightly_default' triggered this build
Build Source Stamp: [branch master] HEAD
Blamelist: 

BUILD FAILED: failed compile

sincerely,
 -The Buildbot

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


buildbot failure in qemu-kvm on default_x86_64_out_of_tree

2009-10-14 Thread qemu-kvm
The Buildbot has detected a new failure of default_x86_64_out_of_tree on 
qemu-kvm.
Full details are available at:
 
http://buildbot.b1-systems.de/qemu-kvm/builders/default_x86_64_out_of_tree/builds/53

Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/

Buildslave for this Build: b1_qemu_kvm_1

Build Reason: The Nightly scheduler named 'nightly_default' triggered this build
Build Source Stamp: [branch master] HEAD
Blamelist: 

BUILD FAILED: failed compile

sincerely,
 -The Buildbot

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Can't make virtio block driver work on Windows 2003

2009-10-14 Thread Asdo

Hi all
I have a new installation of Windows 2003 SBS server 32bit which I 
installed using IDE disk.
KVM version is QEMU PC emulator version 0.10.50 (qemu-kvm-devel-86) 
compiled by myself on kernel 2.6.28-11-server.


I have already moved networking from e1000 to virtio (e1000 was 
performing very sluggishly btw, probably was losing many packets, virtio 
seems to work)


Now I want to move the disk to virtio...

This is complex so I thought that first I wanted to see virtio installed 
and working on another drive.
So I tried adding another drive, a virtio one, (a new 100MB file at host 
side) to the virtual machine and rebooting.


A first problem is that Windows does not detect the new device upon boot 
or Add Hardware scan.


Here is the kvm commandline (it's complex because it comes from libvirt):

/usr/local/kvm/bin/qemu-system-x86_64 -S -M pc -m 4096-smp 4 -name 
winserv2 -uuid  -monitor pty -boot c 
-drive 
file=/virtual_machines/kvm/nfsimport/winserv2.raw,if=ide,index=0,boot=on 
-drive file=/virtual_machines/kvm/nfsimport/zerofile,if=virtio,index=1 
-net nic,macaddr=xx:xx:xx:xx:xx:xx,vlan=0,model=virtio -net 
tap,fd=25,vlan=0 -serial none -parallel none -usb -vnc 127.0.0.1:4


Even if Windows couldn't detect the new device I tried to install the 
driver anyway. On Add Hardware I go through to -- SCSI and RAID 
controllers -- Have Disk .. and point it to the location of viostor 
files (windows 2003 x86) downloaded from:


 http://www.linux-kvm.org/page/WindowsGuestDrivers/Download_Drivers
 http://people.redhat.com/~yvugenfi/24.09.2009/viostor.zip

Windows does install the driver, however at the end it says:

 The software for this device is now installed, but may not work 
correctly.

 This device cannot start. (Code 10)

and the new device gets flagged with a yellow exclamation mark in Device 
Manager.


I don't know if it's the same reason as before, that the device is not 
detected so the driver cannot work, or another reason.


Any idea?

Thanks for your help
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Latest -git qemu-kvm doesn't boot an x86 kernel

2009-10-14 Thread Aneesh Kumar K.V
On Wed, Oct 14, 2009 at 04:54:35PM -0300, Marcelo Tosatti wrote:
 On Wed, Oct 14, 2009 at 09:23:43PM +0530, Aneesh Kumar K.V wrote:
  Hi,
  
  I am trying qemu-system-x86_64 on a x86 host running 2.6.30-2 (debian 
  testing)
  kernel and trying to boot latest linus git kernel (x86). The kernel hang
  after printing the below 
  
  [   4.394392] ACPI: PCI Interrupt Link [LNKC] enabled at IRQ 11
  [4.397837] virtio-pci :00:03.0: PCI INT A - Link[LNKC] - GSI 11 
  (level, high) - IRQ 11
  [4.436489] ACPI: PCI Interrupt Link [LNKD] enabled at IRQ 10
  [4.439829] virtio-pci :00:04.0: PCI INT A - Link[LNKD] - GSI 10 
  (level, high) - IRQ 10
  [4.462538]  vda:
  [4.526913] input: ImExPS/2 Generic Explorer Mouse as 
  /devices/platform/i8042/serio1/input/input3
  [5.349554] async/1 used greatest stack depth: 5872 bytes left
  
  
  An earlier version of kvm booted fine the new kernel. So the vm disk image 
  user space should
  all be fine. The older version of kvm that worked fine is 
  QEMU PC emulator version 0.10.50 (kvm-devel)
  
  
  Any patches i need to try ?
 
 Please try qemu-kvm.git (should be fixed by commit
 1536fc28ae1954e2990c3ee14b4a92624ecfcb68).
 

That worked.

Thanks
-aneesh
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Can't make virtio block driver work on Windows 2003

2009-10-14 Thread Vadim Rozenfeld

On 10/14/2009 07:52 PM, Asdo wrote:

Hi all
I have a new installation of Windows 2003 SBS server 32bit which I 
installed using IDE disk.
KVM version is QEMU PC emulator version 0.10.50 (qemu-kvm-devel-86) 
compiled by myself on kernel 2.6.28-11-server.


I have already moved networking from e1000 to virtio (e1000 was 
performing very sluggishly btw, probably was losing many packets, 
virtio seems to work)


Now I want to move the disk to virtio...

This is complex so I thought that first I wanted to see virtio 
installed and working on another drive.
So I tried adding another drive, a virtio one, (a new 100MB file at 
host side) to the virtual machine and rebooting.


A first problem is that Windows does not detect the new device upon 
boot or Add Hardware scan.
Check PCI devices with info pci. You must have SCSI controller: PCI 
device 1af4:1001 device reported.


Here is the kvm commandline (it's complex because it comes from libvirt):

/usr/local/kvm/bin/qemu-system-x86_64 -S -M pc -m 4096-smp 4 -name 
winserv2 -uuid  -monitor pty -boot 
c -drive 
file=/virtual_machines/kvm/nfsimport/winserv2.raw,if=ide,index=0,boot=on 
-drive file=/virtual_machines/kvm/nfsimport/zerofile,if=virtio,index=1 
-net nic,macaddr=xx:xx:xx:xx:xx:xx,vlan=0,model=virtio -net 
tap,fd=25,vlan=0 -serial none -parallel none -usb -vnc 127.0.0.1:4


Even if Windows couldn't detect the new device I tried to install the 
driver anyway. On Add Hardware I go through to -- SCSI and RAID 
controllers -- Have Disk .. and point it to the location of viostor 
files (windows 2003 x86) downloaded from:


 http://www.linux-kvm.org/page/WindowsGuestDrivers/Download_Drivers
 http://people.redhat.com/~yvugenfi/24.09.2009/viostor.zip

Windows does install the driver, however at the end it says:

 The software for this device is now installed, but may not work 
correctly.

 This device cannot start. (Code 10)

and the new device gets flagged with a yellow exclamation mark in 
Device Manager.


I don't know if it's the same reason as before, that the device is not 
detected so the driver cannot work, or another reason.
Yes, it must be the same problem. Code 10 means that device driver was 
not able to find or initialize hardware.

Regards,
Vadim


Any idea?

Thanks for your help
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Xen PV-on-HVM guest support (v2)

2009-10-14 Thread Ed Swierk
Support for Xen PV-on-HVM guests can be implemented almost entirely in
userspace, except for handling one annoying MSR that maps a Xen
hypercall blob into guest address space.

A generic mechanism to delegate MSR writes to userspace seems overkill
and risks encouraging similar MSR abuse in the future.  Thus this patch
adds special support for the Xen HVM MSR.

I implemented a new ioctl, KVM_XEN_HVM_CONFIG, that lets userspace tell
KVM which MSR the guest will write to, as well as the starting address
and size of the hypercall blobs (one each for 32-bit and 64-bit) that
userspace has loaded from files.  When the guest writes to the MSR, KVM
copies one page of the blob from userspace to the guest.

I've tested this patch with a hacked-up version of Gerd's userspace
code, booting a number of guests (CentOS 5.3 i386 and x86_64, and
FreeBSD 8.0-RC1 amd64) and exercising PV network and block devices.

v2: fix ioctl struct padding; renumber CAP and ioctl constants; check
kvm_write_guest() return value; change printks to KERN_DEBUG (I think
they're worth keeping for debugging userspace)

Signed-off-by: Ed Swierk eswi...@aristanetworks.com

---
Index: kvm-kmod/include/asm-x86/kvm.h
===
--- kvm-kmod.orig/include/asm-x86/kvm.h
+++ kvm-kmod/include/asm-x86/kvm.h
@@ -59,6 +59,7 @@
 #define __KVM_HAVE_MSIX
 #define __KVM_HAVE_MCE
 #define __KVM_HAVE_PIT_STATE2
+#define __KVM_HAVE_XEN_HVM
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
Index: kvm-kmod/include/linux/kvm.h
===
--- kvm-kmod.orig/include/linux/kvm.h
+++ kvm-kmod/include/linux/kvm.h
@@ -476,6 +476,9 @@ struct kvm_ioeventfd {
 #endif
 #define KVM_CAP_IOEVENTFD 36
 #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
+#ifdef __KVM_HAVE_XEN_HVM
+#define KVM_CAP_XEN_HVM 38
+#endif
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -528,6 +531,15 @@ struct kvm_x86_mce {
 };
 #endif
 
+#ifdef KVM_CAP_XEN_HVM
+struct kvm_xen_hvm_config {
+   __u32 msr;
+   __u8 pad[2];
+   __u8 blob_size[2];
+   __u64 blob_addr[2];
+};
+#endif
+
 #define KVM_IRQFD_FLAG_DEASSIGN (1  0)
 
 struct kvm_irqfd {
@@ -586,6 +598,7 @@ struct kvm_irqfd {
 #define KVM_CREATE_PIT2   _IOW(KVMIO, 0x77, struct 
kvm_pit_config)
 #define KVM_SET_BOOT_CPU_ID_IO(KVMIO, 0x78)
 #define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
+#define KVM_XEN_HVM_CONFIG_IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config)
 
 /*
  * ioctls for vcpu fds
Index: kvm-kmod/include/linux/kvm_host.h
===
--- kvm-kmod.orig/include/linux/kvm_host.h
+++ kvm-kmod/include/linux/kvm_host.h
@@ -236,6 +236,10 @@ struct kvm {
unsigned long mmu_notifier_seq;
long mmu_notifier_count;
 #endif
+
+#ifdef KVM_CAP_XEN_HVM
+   struct kvm_xen_hvm_config xen_hvm_config;
+#endif
 };
 
 /* The guest did something we don't support. */
Index: kvm-kmod/x86/x86.c
===
--- kvm-kmod.orig/x86/x86.c
+++ kvm-kmod/x86/x86.c
@@ -875,6 +875,35 @@ static int set_msr_mce(struct kvm_vcpu *
return 0;
 }
 
+#ifdef KVM_CAP_XEN_HVM
+static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
+{
+   int blob = !!(vcpu-arch.shadow_efer  EFER_LME);
+   u32 pnum = data  ~PAGE_MASK;
+   u64 paddr = data  PAGE_MASK;
+   u8 *page;
+   int r = 1;
+
+   if (pnum = vcpu-kvm-xen_hvm_config.blob_size[blob])
+   goto out;
+   page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+   if (!page)
+   goto out;
+   if (copy_from_user(page, (u8 *)vcpu-kvm-xen_hvm_config.blob_addr[blob]
+  + pnum * PAGE_SIZE, PAGE_SIZE))
+   goto out_free;
+   if (kvm_write_guest(vcpu-kvm, paddr, page, PAGE_SIZE))
+   goto out_free;
+   printk(KERN_DEBUG kvm: copied xen hvm blob %d page %d to 0x%llx\n,
+  blob, pnum, paddr);
+   r = 0;
+out_free:
+   kfree(page);
+out:
+   return r;
+}
+#endif
+
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
switch (msr) {
@@ -990,6 +1019,10 @@ int kvm_set_msr_common(struct kvm_vcpu *
0x%x data 0x%llx\n, msr, data);
break;
default:
+#ifdef KVM_CAP_XEN_HVM
+   if (msr  (msr == vcpu-kvm-xen_hvm_config.msr))
+   return xen_hvm_config(vcpu, data);
+#endif
if (!ignore_msrs) {
pr_unimpl(vcpu, unhandled wrmsr: 0x%x data %llx\n,
msr, data);
@@ -2453,6 +2486,17 @@ long kvm_arch_vm_ioctl(struct file *filp
r = 0;
break;
}
+#ifdef KVM_CAP_XEN_HVM
+   case KVM_XEN_HVM_CONFIG: {
+   r = -EFAULT;
+   if (copy_from_user(kvm-xen_hvm_config, argp,
+

Re: linux-next: tree build failure

2009-10-14 Thread Hollis Blanchard
On Fri, 2009-10-09 at 12:14 -0700, Hollis Blanchard wrote:
 Rusty's version of BUILD_BUG_ON() does indeed fix the build break, and
 also exposes the bug in kvmppc_account_exit_stat(). So to recap:
 
 original: built but didn't work
 Jan's: doesn't build
 Rusty's: builds and works
 
 Where do you want to go from here?

Jan, what are your thoughts? Your BUILD_BUG_ON patch has broken the
build, and we still need to fix it.

-- 
Hollis Blanchard
IBM Linux Technology Center

--
To unsubscribe from this list: send the line unsubscribe kvm-ppc in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html