Unable to get MementOS booting
Hi, I'm not able to get MementOS booting while using kvm modules. QEmu hangs on Floppy boot. Here is the procedure: cd /tmp wget -c 'http://www.menuetos.be/download.php?CurrentMenuetOS' -O menuetos.zip unzip -u menuetos.zip qemu-kvm -m 512 -fda M64-*.IMG -boot a I have a intel i7 920, I use kvm-88, kernel 2.6.31.4, x86_64, 64 bits release of MenuetOS. -no-kvm-irqchip or -no-kvm-pit don't solve the issue, but -no-kvm does. -- ubitux -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][RFC] Xen PV-on-HVM guest support
Ed Swierk wrote: As we discussed a while back, support for Xen PV-on-HVM guests can be implemented almost entirely in userspace, except for handling one annoying MSR that maps a Xen hypercall blob into guest address space. A generic mechanism to delegate MSR writes to userspace seems overkill and risks encouraging similar MSR abuse in the future. Thus this patch adds special support for the Xen HVM MSR. At Avi's suggestion[1] I implemented a new ioctl, KVM_XEN_HVM_CONFIG, that lets userspace tell KVM which MSR the guest will write to, as well as the starting address and size of the hypercall blobs (one each for 32-bit and 64-bit) that userspace has loaded from files. When the guest writes to the MSR, KVM copies one page of the blob from userspace to the guest. I've tested this patch against a hacked-up version of Gerd's userspace code[2]; I'm happy to share those hacks if anyone is interested. [1] http://www.mail-archive.com/kvm@vger.kernel.org/msg16065.html [2] http://git.et.redhat.com/?p=qemu-kraxel.git;a=log;h=refs/heads/xenner.v5 Signed-off-by: Ed Swierk eswi...@aristanetworks.com --- diff -BurN a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h --- a/include/asm-x86/kvm.h 2009-10-13 20:40:55.0 -0700 +++ b/include/asm-x86/kvm.h 2009-10-13 20:21:07.0 -0700 @@ -59,6 +59,7 @@ #define __KVM_HAVE_MSIX #define __KVM_HAVE_MCE #define __KVM_HAVE_PIT_STATE2 +#define __KVM_HAVE_XEN_HVM /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff -BurN a/include/linux/kvm.h b/include/linux/kvm.h --- a/include/linux/kvm.h 2009-10-13 20:40:55.0 -0700 +++ b/include/linux/kvm.h 2009-10-13 20:21:26.0 -0700 @@ -476,6 +476,9 @@ #endif #define KVM_CAP_IOEVENTFD 36 #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37 +#ifdef __KVM_HAVE_XEN_HVM +#define KVM_CAP_XEN_HVM 90 +#endif When submitting for merge, I would close this gab in the CAP number space. #ifdef KVM_CAP_IRQ_ROUTING @@ -528,6 +531,14 @@ }; #endif +#ifdef KVM_CAP_XEN_HVM +struct kvm_xen_hvm_config { + __u32 msr; + __u64 blob_addr[2]; + __u8 blob_size[2]; This needs padding to achieve a stable layout across 32 and 64 bit. +}; +#endif + #define KVM_IRQFD_FLAG_DEASSIGN (1 0) struct kvm_irqfd { @@ -586,6 +597,7 @@ #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) #define KVM_SET_BOOT_CPU_ID_IO(KVMIO, 0x78) #define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) +#define KVM_XEN_HVM_CONFIG_IOW(KVMIO, 0xa1, struct kvm_xen_hvm_config) Also here: next is 0x7a. /* * ioctls for vcpu fds diff -BurN a/include/linux/kvm_host.h b/include/linux/kvm_host.h --- a/include/linux/kvm_host.h2009-10-13 20:40:55.0 -0700 +++ b/include/linux/kvm_host.h2009-10-13 20:27:03.0 -0700 @@ -236,6 +236,10 @@ unsigned long mmu_notifier_seq; long mmu_notifier_count; #endif + +#ifdef KVM_CAP_XEN_HVM + struct kvm_xen_hvm_config xen_hvm_config; +#endif }; /* The guest did something we don't support. */ diff -BurN a/x86/x86.c b/x86/x86.c --- a/x86/x86.c 2009-10-13 20:40:58.0 -0700 +++ b/x86/x86.c 2009-10-13 20:33:49.0 -0700 @@ -875,6 +875,33 @@ return 0; } +#ifdef KVM_CAP_XEN_HVM +static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data) +{ + int blob = !!(vcpu-arch.shadow_efer EFER_LME); + u32 pnum = data ~PAGE_MASK; + u64 paddr = data PAGE_MASK; + u8 *page; + int r = 1; + printk(KERN_INFO kvm: loading xen hvm blob %d page %d at %llx\n, +blob, pnum, paddr); Debugging left-over? And please insert a blank line after the variable block. + if (pnum = vcpu-kvm-xen_hvm_config.blob_size[blob]) + goto out; + page = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!page) + goto out; + if (copy_from_user(page, (u8 *)vcpu-kvm-xen_hvm_config.blob_addr[blob] ++ pnum * PAGE_SIZE, PAGE_SIZE)) + goto out_free; + kvm_write_guest(vcpu-kvm, paddr, page, PAGE_SIZE); This function returns an error code. Not interested in it? + r = 0; +out_free: + kfree(page); +out: + return r; +} +#endif + int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) { switch (msr) { @@ -990,6 +1017,10 @@ 0x%x data 0x%llx\n, msr, data); break; default: +#ifdef KVM_CAP_XEN_HVM + if (msr (msr == vcpu-kvm-xen_hvm_config.msr)) + return xen_hvm_config(vcpu, data); +#endif if (!ignore_msrs) { pr_unimpl(vcpu, unhandled wrmsr: 0x%x data %llx\n, msr, data); @@ -2453,6 +2484,17 @@ r = 0; break; } +#ifdef KVM_CAP_XEN_HVM + case
[PATCH] qemu-kvm: x86: Add support for NMI states
This adds the required bit to retrieve and set the so far hidden NMI pending and NMI masked states of the KVM kernel side. It also extends CPU VMState for proper saving/restoring. We can now savely reset a VM while NMIs are on the fly, and we can live migrate etc. too. Fortunately, the probability that this deficit bit normal VMs in practice was very low. Signed-off-by: Jan Kiszka jan.kis...@siemens.com --- qemu-kvm-x86.c| 52 + target-i386/cpu.h |1 + target-i386/machine.c |1 + 3 files changed, 54 insertions(+), 0 deletions(-) diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index acb1b91..86fd341 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -901,6 +901,53 @@ static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs) | (rhs-avl * DESC_AVL_MASK); } +static void kvm_get_nmi_state(CPUState *env) +{ +#ifdef KVM_CAP_VCPU_STATE +kvm_vcpu_context_t vcpu = env-kvm_cpu_state.vcpu_ctx; +struct { +struct kvm_vcpu_state header; +struct kvm_vcpu_substate substates[1]; +} request; +struct kvm_nmi_state nmi_state; +int r; + +request.header.nsubstates = 1; +request.header.substates[0].type = KVM_X86_VCPU_NMI; +request.header.substates[0].offset = (size_t)nmi_state - (size_t)request; +r = ioctl(vcpu-fd, KVM_GET_VCPU_STATE, request); +if (r == 0) { +env-nmi_pending = nmi_state.pending; +if (nmi_state.masked) { +env-hflags2 |= HF2_NMI_MASK; +} else { +env-hflags2 = ~HF2_NMI_MASK; +} +} +#endif +env-nmi_pending = 0; +env-hflags2 = ~HF2_NMI_MASK; +} + +static void kvm_set_nmi_state(CPUState *env) +{ +#ifdef KVM_CAP_VCPU_STATE +kvm_vcpu_context_t vcpu = env-kvm_cpu_state.vcpu_ctx; +struct { +struct kvm_vcpu_state header; +struct kvm_vcpu_substate substates[1]; +} request; +struct kvm_nmi_state nmi_state; + +request.header.nsubstates = 1; +request.header.substates[0].type = KVM_X86_VCPU_NMI; +request.header.substates[0].offset = (size_t)nmi_state - (size_t)request; +nmi_state.pending = env-nmi_pending; +nmi_state.masked = !!(env-hflags2 HF2_NMI_MASK); +ioctl(vcpu-fd, KVM_SET_VCPU_STATE, request); +#endif +} + void kvm_arch_load_regs(CPUState *env) { struct kvm_regs regs; @@ -1010,6 +1057,8 @@ void kvm_arch_load_regs(CPUState *env) rc = kvm_set_msrs(env-kvm_cpu_state.vcpu_ctx, msrs, n); if (rc == -1) perror(kvm_set_msrs FAILED); + +kvm_set_nmi_state(env); } void kvm_load_tsc(CPUState *env) @@ -1195,6 +1244,8 @@ void kvm_arch_save_regs(CPUState *env) return; } } + +kvm_get_nmi_state(env); } static void do_cpuid_ent(struct kvm_cpuid_entry2 *e, uint32_t function, @@ -1438,6 +1489,7 @@ void kvm_arch_push_nmi(void *opaque) void kvm_arch_cpu_reset(CPUState *env) { +env-nmi_pending = 0; kvm_arch_load_regs(env); if (!cpu_is_bsp(env)) { if (kvm_irqchip_in_kernel(kvm_context)) { diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 278d3e3..620822a 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -709,6 +709,7 @@ typedef struct CPUX86State { /* For KVM */ uint64_t interrupt_bitmap[256 / 64]; uint32_t mp_state; +uint32_t nmi_pending; /* in order to simplify APIC support, we leave this pointer to the user */ diff --git a/target-i386/machine.c b/target-i386/machine.c index e640dad..5c290f3 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -469,6 +469,7 @@ const VMStateDescription vmstate_cpu = { VMSTATE_INT32_V(pending_irq_vmstate, CPUState, 9), VMSTATE_UINT32_V(mp_state, CPUState, 9), VMSTATE_UINT64_V(tsc, CPUState, 9), +VMSTATE_UINT32_V(nmi_pending, CPUState, 11), /* MCE */ VMSTATE_UINT64_V(mcg_cap, CPUState, 10), VMSTATE_UINT64_V(mcg_status, CPUState, 10), -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] KVM test: Add a kvm subtest guest_s4
This test suspends a guest OS to disk, it supports Linux and Windows. Signed-off-by: Ken Cao k...@redhat.com Signed-off-by: Yolkfull Chow yz...@redhat.com --- client/tests/kvm/kvm_tests.cfg.sample | 16 client/tests/kvm/tests/guest_s4.py| 66 + 2 files changed, 82 insertions(+), 0 deletions(-) create mode 100644 client/tests/kvm/tests/guest_s4.py diff --git a/client/tests/kvm/kvm_tests.cfg.sample b/client/tests/kvm/kvm_tests.cfg.sample index cc3228a..9ccc9b5 100644 --- a/client/tests/kvm/kvm_tests.cfg.sample +++ b/client/tests/kvm/kvm_tests.cfg.sample @@ -118,6 +118,15 @@ variants: - linux_s3: install setup type = linux_s3 +- guest_s4: +type = guest_s4 +check_s4_support_cmd = grep -q disk /sys/power/state +test_s4_cmd = cd /tmp/;nohup tcpdump -q -t ip host localhost +check_s4_cmd = pgrep tcpdump +set_s4_cmd = echo disk /sys/power/state +kill_test_s4_cmd = pkill tcpdump +services_up_timeout = 30 + - timedrift:install setup extra_params += -rtc-td-hack variants: @@ -507,6 +516,13 @@ variants: # Alternative host load: #host_load_command = dd if=/dev/urandom of=/dev/null host_load_instances = 8 +guest_s4: +check_s4_support_cmd = powercfg /hibernate on +test_s4_cmd = start /B ping -n 3000 localhost +check_s4_cmd = tasklist | find /I ping +set_s4_cmd = rundll32.exe PowrProf.dll, SetSuspendState +kill_test_s4_cmd = taskkill /IM ping.exe /F +services_up_timeout = 30 nic_hotplug: reference_cmd = ipconfig /all find_pci_cmd = ipconfig /all | find Description diff --git a/client/tests/kvm/tests/guest_s4.py b/client/tests/kvm/tests/guest_s4.py new file mode 100644 index 000..7147e3b --- /dev/null +++ b/client/tests/kvm/tests/guest_s4.py @@ -0,0 +1,66 @@ +import logging, time +from autotest_lib.client.common_lib import error +import kvm_test_utils, kvm_utils + + +def run_guest_s4(test, params, env): + +Suspend guest to disk,supports both Linux Windows OSes. + +@param test: kvm test object. +@param params: Dictionary with test parameters. +@param env: Dictionary with the test environment. + +vm = kvm_test_utils.get_living_vm(env, params.get(main_vm)) +session = kvm_test_utils.wait_for_login(vm) + +logging.info(Checking whether guest OS supports suspend to disk (S4)) +status = session.get_command_status(params.get(check_s4_support_cmd)) +if status is None: +logging.error(Failed to check if guest OS supports S4) +elif status != 0: +raise error.TestFail(Guest OS does not support S4) + +logging.info(Wait until all guest OS services are fully started) +time.sleep(params.get(services_up_timeout)) + +# Start up a program (tcpdump for linux ping for Windows), as a flag. +# If the program died after suspend, then fails this testcase. +test_s4_cmd = params.get(test_s4_cmd) +session.sendline(test_s4_cmd) + +# Get the second session to start S4 +session2 = kvm_test_utils.wait_for_login(vm) + +check_s4_cmd = params.get(check_s4_cmd) +if session2.get_command_status(check_s4_cmd): +raise error.TestError(Failed to launch '%s' as a background process % + test_s4_cmd) +logging.info(Launched background command in guest: %s % test_s4_cmd) + +# Suspend to disk +logging.info(Start suspend to disk now...) +session2.sendline(params.get(set_s4_cmd)) + +if not kvm_utils.wait_for(vm.is_dead, 360, 30, 2): +raise error.TestFail(VM refuses to go down. Suspend failed) +logging.info(VM suspended successfully. Wait before booting it again.) +time.sleep(10) + +# Start vm, and check whether the program is still running +logging.info(Start suspended VM...) + +if not vm.create(): +raise error.TestError(Failed to start VM after suspend to disk) +if not vm.is_alive(): +raise error.TestError(VM seems to be dead after it was suspended) + +# Check whether test command still alive +logging.info(Checking if background command is still alive) +if session2.get_command_status(check_s4_cmd): +raise error.TestFail(Command %s failed. S4 failed % test_s4_cmd) + +logging.info(VM resumed successfuly after suspend to disk) +session2.sendline(params.get(kill_test_s4_cmd)) +session.close() +session2.close() -- 1.6.2.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Autotest] [PATCH] Add a kvm test guest_s4 which supports both Linux and Windows platform
On Tue, Oct 13, 2009 at 11:54 PM, Yolkfull Chow yz...@redhat.com wrote: On Tue, Oct 13, 2009 at 05:29:40PM -0300, Lucas Meneghel Rodrigues wrote: Hi Yolkfull and Chen: Thanks for your test! I have some comments and doubts to clear, most of them are about content of the messages delivered for the user and some other details. On Sun, Sep 27, 2009 at 6:11 AM, Yolkfull Chow yz...@redhat.com wrote: For this case, Ken Cao wrote the linux part previously and I did extensive modifications on Windows platform support. Signed-off-by: Ken Cao k...@redhat.com Signed-off-by: Yolkfull Chow yz...@redhat.com --- client/tests/kvm/kvm_tests.cfg.sample | 14 +++ client/tests/kvm/tests/guest_s4.py | 66 + 2 files changed, 80 insertions(+), 0 deletions(-) create mode 100644 client/tests/kvm/tests/guest_s4.py diff --git a/client/tests/kvm/kvm_tests.cfg.sample b/client/tests/kvm/kvm_tests.cfg.sample index 285a38f..f9ecb61 100644 --- a/client/tests/kvm/kvm_tests.cfg.sample +++ b/client/tests/kvm/kvm_tests.cfg.sample @@ -94,6 +94,14 @@ variants: - linux_s3: install setup type = linux_s3 + - guest_s4: + type = guest_s4 + check_s4_support_cmd = grep -q disk /sys/power/state + test_s4_cmd = cd /tmp/;nohup tcpdump -q -t ip host localhost + check_s4_cmd = pgrep tcpdump + set_s4_cmd = echo disk /sys/power/state + kill_test_s4_cmd = pkill tcpdump + - timedrift: install setup type = timedrift extra_params += -rtc-td-hack @@ -382,6 +390,12 @@ variants: # Alternative host load: #host_load_command = dd if=/dev/urandom of=/dev/null host_load_instances = 8 + guest_s4: + check_s4_support_cmd = powercfg /hibernate on + test_s4_cmd = start /B ping -n 3000 localhost + check_s4_cmd = tasklist | find /I ping + set_s4_cmd = rundll32.exe PowrProf.dll, SetSuspendState + kill_test_s4_cmd = taskkill /IM ping.exe /F variants: - Win2000: diff --git a/client/tests/kvm/tests/guest_s4.py b/client/tests/kvm/tests/guest_s4.py new file mode 100644 index 000..5d8fbdf --- /dev/null +++ b/client/tests/kvm/tests/guest_s4.py @@ -0,0 +1,66 @@ +import logging, time +from autotest_lib.client.common_lib import error +import kvm_test_utils, kvm_utils + + +def run_guest_s4(test, params, env): + + Suspend guest to disk,supports both Linux Windows OSes. + + �...@param test: kvm test object. + �...@param params: Dictionary with test parameters. + �...@param env: Dictionary with the test environment. + + vm = kvm_test_utils.get_living_vm(env, params.get(main_vm)) + session = kvm_test_utils.wait_for_login(vm) + + logging.info(Checking whether VM supports S4) + status = session.get_command_status(params.get(check_s4_support_cmd)) + if status is None: + logging.error(Failed to check if S4 exists) + elif status != 0: + raise error.TestFail(Guest does not support S4) + + logging.info(Waiting for a while for X to start...) Yes, generally X starts a bit later than the SSH service, so I understand the time being here, however: * In fact we are waiting for all services of the guest to be up and functional, so depending on the level of load, I don't think 10s is gonna make it. So I suggest something = 30s Yeah,reasonable, we did ignore the circumstance with workload. But as you metioned,it can depend on different level of workload, therefore 30s may be not enough as well. Your idea that write a utility function waiting for some services up is good I think, thus it could be something like: def wait_services_up(services_list): ... and for this case: wait_services_up([Xorg]) for Linux and wait_services_up([explore.exe]) for Windows. Ok, sounds good to me! * It's also true that just wait for a given time and hope that it will be OK kinda sucks, so ideally we need to write utility functions to stablish as well as possible when all services of a host are fully booted up. Stated this way, it looks simple, but it's not. Autotest experience suggests that there's no real sane way to determine when a linux box is booted up, but we can take a semi-rational approach and verify if all services for the current run level have the status up or a similar approach. For windows, I was talking to Yaniv Kaul and it seems that processing the output of the 'sc query' command might give what we want. Bottom line, I'd like to add a TODO item, and write a function to stablish (fairly confidently) that a windows/linux guest is booted up. + time.sleep(10) + + # Start up a program(tcpdump for linux OS ping for M$ OS), as a flag. + # If the program died after suspend, then fails this
Re: [Autotest] [PATCH] Add a kvm test guest_s4 which supports both Linux and Windows platform
On Wed, Oct 14, 2009 at 06:58:01AM -0300, Lucas Meneghel Rodrigues wrote: On Tue, Oct 13, 2009 at 11:54 PM, Yolkfull Chow yz...@redhat.com wrote: On Tue, Oct 13, 2009 at 05:29:40PM -0300, Lucas Meneghel Rodrigues wrote: Hi Yolkfull and Chen: Thanks for your test! I have some comments and doubts to clear, most of them are about content of the messages delivered for the user and some other details. On Sun, Sep 27, 2009 at 6:11 AM, Yolkfull Chow yz...@redhat.com wrote: For this case, Ken Cao wrote the linux part previously and I did extensive modifications on Windows platform support. Signed-off-by: Ken Cao k...@redhat.com Signed-off-by: Yolkfull Chow yz...@redhat.com --- client/tests/kvm/kvm_tests.cfg.sample | 14 +++ client/tests/kvm/tests/guest_s4.py | 66 + 2 files changed, 80 insertions(+), 0 deletions(-) create mode 100644 client/tests/kvm/tests/guest_s4.py diff --git a/client/tests/kvm/kvm_tests.cfg.sample b/client/tests/kvm/kvm_tests.cfg.sample index 285a38f..f9ecb61 100644 --- a/client/tests/kvm/kvm_tests.cfg.sample +++ b/client/tests/kvm/kvm_tests.cfg.sample @@ -94,6 +94,14 @@ variants: - linux_s3: install setup type = linux_s3 + - guest_s4: + type = guest_s4 + check_s4_support_cmd = grep -q disk /sys/power/state + test_s4_cmd = cd /tmp/;nohup tcpdump -q -t ip host localhost + check_s4_cmd = pgrep tcpdump + set_s4_cmd = echo disk /sys/power/state + kill_test_s4_cmd = pkill tcpdump + - timedrift: install setup type = timedrift extra_params += -rtc-td-hack @@ -382,6 +390,12 @@ variants: # Alternative host load: #host_load_command = dd if=/dev/urandom of=/dev/null host_load_instances = 8 + guest_s4: + check_s4_support_cmd = powercfg /hibernate on + test_s4_cmd = start /B ping -n 3000 localhost + check_s4_cmd = tasklist | find /I ping + set_s4_cmd = rundll32.exe PowrProf.dll, SetSuspendState + kill_test_s4_cmd = taskkill /IM ping.exe /F variants: - Win2000: diff --git a/client/tests/kvm/tests/guest_s4.py b/client/tests/kvm/tests/guest_s4.py new file mode 100644 index 000..5d8fbdf --- /dev/null +++ b/client/tests/kvm/tests/guest_s4.py @@ -0,0 +1,66 @@ +import logging, time +from autotest_lib.client.common_lib import error +import kvm_test_utils, kvm_utils + + +def run_guest_s4(test, params, env): + + Suspend guest to disk,supports both Linux Windows OSes. + + �...@param test: kvm test object. + �...@param params: Dictionary with test parameters. + �...@param env: Dictionary with the test environment. + + vm = kvm_test_utils.get_living_vm(env, params.get(main_vm)) + session = kvm_test_utils.wait_for_login(vm) + + logging.info(Checking whether VM supports S4) + status = session.get_command_status(params.get(check_s4_support_cmd)) + if status is None: + logging.error(Failed to check if S4 exists) + elif status != 0: + raise error.TestFail(Guest does not support S4) + + logging.info(Waiting for a while for X to start...) Yes, generally X starts a bit later than the SSH service, so I understand the time being here, however: * In fact we are waiting for all services of the guest to be up and functional, so depending on the level of load, I don't think 10s is gonna make it. So I suggest something = 30s Yeah,reasonable, we did ignore the circumstance with workload. But as you metioned,it can depend on different level of workload, therefore 30s may be not enough as well. Your idea that write a utility function waiting for some services up is good I think, thus it could be something like: def wait_services_up(services_list): ... and for this case: wait_services_up([Xorg]) for Linux and wait_services_up([explore.exe]) for Windows. Ok, sounds good to me! * It's also true that just wait for a given time and hope that it will be OK kinda sucks, so ideally we need to write utility functions to stablish as well as possible when all services of a host are fully booted up. Stated this way, it looks simple, but it's not. Autotest experience suggests that there's no real sane way to determine when a linux box is booted up, but we can take a semi-rational approach and verify if all services for the current run level have the status up or a similar approach. For windows, I was talking to Yaniv Kaul and it seems that processing the output of the 'sc query' command might give what we want. Bottom line, I'd like to add a TODO item, and write a function to stablish (fairly confidently) that a
Re: [Autotest] [PATCH] Using shutil.move to move result files in job.py
Ok, looks good. Commited as http://autotest.kernel.org/changeset/3844 On Mon, Oct 12, 2009 at 11:36 PM, Cao, Chen k...@redhat.com wrote: Since os.rename requires that the file is in the same partition with the dest directory, we would get a python OSError if the result directory is mounted to a nfs server (or different partition or something else alike). the traceback would be like: Traceback (most recent call last): File /usr/local/kvm/kvm-test/bin/autotest, line 52, in ? options.log) File /usr/local/kvm/kvm-test/bin/job.py, line 1274, in runjob myjob.complete(0) File /usr/local/kvm/kvm-test/bin/job.py, line 798, in complete os.rename(self.state_file, dest) OSError: [Errno 18] Invalid cross-device link Signed-off-by: Cao, Chen k...@redhat.com --- client/bin/job.py | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/client/bin/job.py b/client/bin/job.py index a1c07cc..ebfb3a3 100755 --- a/client/bin/job.py +++ b/client/bin/job.py @@ -938,7 +938,7 @@ class base_job(object): Clean up and exit # We are about to exit 'complete' so clean up the control file. dest = os.path.join(self.resultdir, os.path.basename(self.state_file)) - os.rename(self.state_file, dest) + shutil.move(self.state_file, dest) self.harness.run_complete() self.disable_external_logging() -- 1.6.0.6 ___ Autotest mailing list autot...@test.kernel.org http://test.kernel.org/cgi-bin/mailman/listinfo/autotest -- Lucas -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Autotest] [PATCH] Test 802.1Q vlan of nic
Hi Amos, thanks for the patch, here are my comments (pretty much concerning only coding style): On Wed, Sep 23, 2009 at 8:19 AM, Amos Kong ak...@redhat.com wrote: Test 802.1Q vlan of nic, config it by vconfig command. 1) Create two VMs 2) Setup guests in different vlan by vconfig and test communication by ping using hard-coded ip address 3) Setup guests in same vlan and test communication by ping 4) Recover the vlan config Signed-off-by: Amos Kong ak...@redhat.com --- client/tests/kvm/kvm_tests.cfg.sample | 6 +++ client/tests/kvm/tests/vlan_tag.py | 66 + 2 files changed, 72 insertions(+), 0 deletions(-) create mode 100644 client/tests/kvm/tests/vlan_tag.py diff --git a/client/tests/kvm/kvm_tests.cfg.sample b/client/tests/kvm/kvm_tests.cfg.sample index 285a38f..5a3f97d 100644 --- a/client/tests/kvm/kvm_tests.cfg.sample +++ b/client/tests/kvm/kvm_tests.cfg.sample @@ -145,6 +145,12 @@ variants: kill_vm = yes kill_vm_gracefully = no + - vlan_tag: install setup + type = vlan_tag + subnet2 = 192.168.123 + vlans = 10 20 + nic_mode = tap + nic_model = e1000 # NICs variants: diff --git a/client/tests/kvm/tests/vlan_tag.py b/client/tests/kvm/tests/vlan_tag.py new file mode 100644 index 000..2904276 --- /dev/null +++ b/client/tests/kvm/tests/vlan_tag.py @@ -0,0 +1,66 @@ +import logging, time +from autotest_lib.client.common_lib import error +import kvm_subprocess, kvm_test_utils, kvm_utils + +def run_vlan_tag(test, params, env): + + Test 802.1Q vlan of nic, config it by vconfig command. + + 1) Create two VMs + 2) Setup guests in different vlan by vconfig and test communication by ping + using hard-coded ip address + 3) Setup guests in same vlan and test communication by ping + 4) Recover the vlan config + + �...@param test: Kvm test object + �...@param params: Dictionary with the test parameters. + �...@param env: Dictionary with test environment. + + + vm = [] + session = [] + subnet2 = params.get(subnet2) + vlans = params.get(vlans).split() + + vm.append(kvm_test_utils.get_living_vm(env, %s % params.get(main_vm))) + + params_vm2 = params.copy() + params_vm2['image_snapshot'] = yes + params_vm2['kill_vm_gracefully'] = no + params_vm2[address_index] = int(params.get(address_index, 0))+1 + vm.append(vm[0].clone(vm2, params_vm2)) + kvm_utils.env_register_vm(env, vm2, vm[1]) + if not vm[1].create(): + raise error.TestError, VM 'vm[1]' create faild In the above exception raise statement, the preferred form to do it is: raise error.TestError(VM 1 create failed) + for i in range(2): + session.append(kvm_test_utils.wait_for_login(vm[i])) + + try: + vconfig_cmd = vconfig add eth0 %s;ifconfig eth0.%s %s.%s + if session[0].get_command_status(vconfig_cmd % (vlans[0], + vlans[0], + subnet2, + 11)) != 0 or \ + session[1].get_command_status(vconfig_cmd % (vlans[1], + vlans[1], + subnet2, + 12)) != 0: In the above if statement, I'd assign the comparisons to variables to make the code more readable, like: try: vconfig_cmd = vconfig add eth0 %s;ifconfig eth0.%s %s.%s # Attempt to configure IPs for the VMs and record the results in boolean # variables ip_config_vm1_ok = (session[0].get_command_status( vconfig_cmd % (vlans[0], vlans[0], subnet2, 11)) == 0) ip_config_vm1_ok = (session[1].get_command_status( vconfig_cmd % (vlans[1], vlans[1], subnet2, 12)) == 0) if not ip_config_vm1_ok or not ip_config_vm2_ok: + raise error.TestError, Fail to config VMs ip address + if session[0].get_command_status(ping -c 2 %s.12 % subnet2) == 0: + raise error.TestFail(Guest is unexpectedly pingable in different + vlan) A similar comment applies to the above block + if session[1].get_command_status(vconfig rem eth0.%s;vconfig add eth0 + %s;ifconfig eth0.%s %s.12 % + (vlans[1], + vlans[0], + vlans[0], + subnet2)) != 0: Idem + raise error.TestError, Fail to config ip address of VM 'vm[1]' + if session[0].get_command_status(ping -c 2 %s.12 % subnet2) != 0: + raise error.TestFail, Fail to ping the guest in same vlan See
Re: sync guest calls made async on host - SQLite performance
On 10/14/2009 07:37 AM, Christoph Hellwig wrote: Christoph, wasn't there a bug where the guest didn't wait for requests in response to a barrier request? Can't remember anything like that. The bug was the complete lack of cache flush infrastructure for virtio, and the lack of advertising a volative write cache on ide. By complete flush infrastructure, you mean host-side and guest-side support for a new barrier command, yes? But can't this be also implemented using QUEUE_ORDERED_DRAIN, and on the host side disabling the backing device write cache? I'm talking about cache=none, primarily. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: sync guest calls made async on host - SQLite performance
I understand. However the test itself is fairly trivial representation of a single teir high-transactional load system. (Ie: a system that is logging a large number of events). The phoronix test suite simply hands over to a binary using sqlite and does 25000 sequential inserts. The overhead of the suite would be measured in milliseconds at the start and end. Over the life of the test (100-2500 seconds), it becomes insignificant noise. As I said, the relevant system calls itself for the running of the test are expressed as write write write fdatasync The writes are typically small (5-100) bytes. With that information, I believe the method of execution is mostly irrelevant. If people are still concerned, I can write a trivial application that should reproduce the behaviour. It still ultimately comes down to the guests expected semantics of fdatasync, and the actual behaviour relative to the hosts physical device. I am not saying that the currentl behaviour is wrong, I just want a clear understanding of what is expected by the kvm team vs what we are seeing. Regards... Matthew On 10/14/09, Dustin Kirkland kirkl...@canonical.com wrote: On Tue, Oct 13, 2009 at 9:09 PM, Matthew Tippett tippe...@gmail.com wrote: I believe that I have removed the benchmark from discussion, we are now looking at semantics of small writes followed by ... And quoting from Dustin === I have tried this, exactly as you have described. The tests took: * 1162.08033204 seconds on native hardware * 2306.68306303 seconds in a kvm using if=scsi disk * 405.382308006 seconds in a kvm using if=virtio Hang on now... My timings are from running the Phoronix test *as you described*. I have not looked at what magic is happening inside of this Phoronix test. I am most certainly *not* speaking as to the quality or legitimacy of the test. :-Dustin -- Sent from my mobile device -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] virtio-blk: fallback to draining the queue if barrier ops are not supported
Early implementations of virtio devices did not support barrier operations, but did commit the data to disk. In such cases, drain the queue to emulate barrier operations. Signed-off-by: Avi Kivity a...@redhat.com --- drivers/block/virtio_blk.c |6 +- 1 files changed, 5 insertions(+), 1 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 43f1938..2627cc3 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -354,12 +354,16 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) vblk-disk-driverfs_dev = vdev-dev; index++; - /* If barriers are supported, tell block layer that queue is ordered */ + /* If barriers are supported, tell block layer that queue is ordered; +* otherwise just drain the queue. +*/ if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH)) blk_queue_ordered(vblk-disk-queue, QUEUE_ORDERED_DRAIN_FLUSH, virtblk_prepare_flush); else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER)) blk_queue_ordered(vblk-disk-queue, QUEUE_ORDERED_TAG, NULL); + else + blk_queue_ordered(vblk-disk-queue, QUEUE_ORDERED_DRAIN, NULL); /* If disk is read-only in the host, the guest should obey */ if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) -- 1.6.2.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Autotest] [PATCH] Add pass through feature test (support SR-IOV)
Yolkfull, I've studied about single root IO virtualization before reviewing your patch, the general approach here looks good. There were some stylistic points as far as code is concerned, so I have rebased your patch against the latest trunk, and added some explanation about the features being tested and referenced (extracted from a Fedora 12 blueprint). Please let me know if you are OK with it, I guess I will review this patch a couple more times, as the code and the features being tested are fairly complex. Thanks! On Mon, Sep 14, 2009 at 11:20 PM, Yolkfull Chow yz...@redhat.com wrote: It supports both SR-IOV virtual functions' and physical NIC card pass through. * For SR-IOV virtual functions passthrough, we could specify the module parameter 'max_vfs' in config file. * For physical NIC card pass through, we should specify the device name(s). Signed-off-by: Yolkfull Chow yz...@redhat.com --- client/tests/kvm/kvm_tests.cfg.sample | 12 ++ client/tests/kvm/kvm_utils.py | 248 - client/tests/kvm/kvm_vm.py | 68 +- 3 files changed, 326 insertions(+), 2 deletions(-) diff --git a/client/tests/kvm/kvm_tests.cfg.sample b/client/tests/kvm/kvm_tests.cfg.sample index a83ef9b..c6037da 100644 --- a/client/tests/kvm/kvm_tests.cfg.sample +++ b/client/tests/kvm/kvm_tests.cfg.sample @@ -627,6 +627,18 @@ variants: variants: + - @no_passthrough: + pass_through = no + - nic_passthrough: + pass_through = pf + passthrough_devs = eth1 + - vfs_passthrough: + pass_through = vf + max_vfs = 7 + vfs_count = 7 + + +variants: - @basic: only Fedora Windows - @full: diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py index dfca938..1fe3b31 100644 --- a/client/tests/kvm/kvm_utils.py +++ b/client/tests/kvm/kvm_utils.py @@ -1,5 +1,5 @@ import md5, thread, subprocess, time, string, random, socket, os, signal, pty -import select, re, logging, commands +import select, re, logging, commands, cPickle from autotest_lib.client.bin import utils from autotest_lib.client.common_lib import error import kvm_subprocess @@ -795,3 +795,249 @@ def md5sum_file(filename, size=None): size -= len(data) f.close() return o.hexdigest() + + +def get_full_id(pci_id): + + Get full PCI ID of pci_id. + + cmd = lspci -D | awk '/%s/ {print $1}' % pci_id + status, full_id = commands.getstatusoutput(cmd) + if status != 0: + return None + return full_id + + +def get_vendor_id(pci_id): + + Check out the device vendor ID according to PCI ID. + + cmd = lspci -n | awk '/%s/ {print $3}' % pci_id + return re.sub(:, , commands.getoutput(cmd)) + + +def release_pci_devs(dict): + + Release assigned PCI devices to host. + + def release_dev(pci_id): + base_dir = /sys/bus/pci + full_id = get_full_id(pci_id) + vendor_id = get_vendor_id(pci_id) + drv_path = os.path.join(base_dir, devices/%s/driver % full_id) + if 'pci-stub' in os.readlink(drv_path): + cmd = echo '%s' %s/new_id % (vendor_id, drv_path) + if os.system(cmd): + return False + + stub_path = os.path.join(base_dir, drivers/pci-stub) + cmd = echo '%s' %s/unbind % (full_id, stub_path) + if os.system(cmd): + return False + + prev_driver = self.dev_prev_drivers[pci_id] + cmd = echo '%s' %s/bind % (full_id, prev_driver) + if os.system(cmd): + return False + return True + + for pci_id in dict.keys(): + if not release_dev(pci_id): + logging.error(Failed to release device [%s] to host % pci_id) + else: + logging.info(Release device [%s] successfully % pci_id) + + +class PassThrough: + + Request passthroughable devices on host. It will check whether to request + PF(physical NIC cards) or VF(Virtual Functions). + + def __init__(self, type=nic_vf, max_vfs=None, names=None): + + Initialize parameter 'type' which could be: + nic_vf: Virtual Functions + nic_pf: Physical NIC card + mixed: Both includes VFs and PFs + + If pass through Physical NIC cards, we need to specify which devices + to be assigned, e.g. 'eth1 eth2'. + + If pass through Virtual Functions, we need to specify how many vfs + are going to be assigned, e.g. passthrough_count = 8 and max_vfs in + config file. + + �...@param type: Pass through device's type + �...@param max_vfs: parameter of module 'igb' + �...@param names: Physical NIC cards' names, e.g.'eth1 eth2 ...' + + self.type = type + if max_vfs: + self.max_vfs = int(max_vfs) + if names:
Re: [Qemu-devel] Release plan for 0.12.0
On Thursday 08 October 2009, Anthony Liguori wrote: Jens Osterkamp wrote: On Wednesday 30 September 2009, Anthony Liguori wrote: Please add to this list and I'll collect it all and post it somewhere. What about Or Gerlitz' raw backend driver ? I did not see it go in yet, or did I miss something ? The patch seems to have not been updated after the initial posting and the first feedback cycle. I'm generally inclined to oppose the functionality as I don't think it offers any advantages over the existing backends. There are two reasons why I think this backend is important: - As an easy way to provide isolation between guests (private ethernet port aggregator, PEPA) and external enforcement of network priviledges (virtual ethernet port aggregator, VEPA) using the macvlan subsystem. - As a counterpart to the vhost_net driver, providing an identical user interface with or without vhost_net acceleration in the kernel. Arnd -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Release plan for 0.12.0
On Thu, Oct 08, 2009 at 09:21:04AM -0500, Anthony Liguori wrote: Jens Osterkamp wrote: On Wednesday 30 September 2009, Anthony Liguori wrote: o VMState conversion -- I expect most of the pc target to be completed o qdev conversion -- I hope that we'll get most of the pc target completely converted to qdev o storage live migration o switch to SeaBIOS (need to finish porting features from Bochs) o switch to gPXE (need to resolve slirp tftp server issue) o KSM integration o in-kernel APIC support for KVM o guest SMP support for KVM o updates to the default pc machine type Please add to this list and I'll collect it all and post it somewhere. What about Or Gerlitz' raw backend driver ? I did not see it go in yet, or did I miss something ? The patch seems to have not been updated after the initial posting and the first feedback cycle. Looks like Or has abandoned it. I have an updated version which works with new APIs, etc. Let me post it and we'll go from there. I'm generally inclined to oppose the functionality as I don't think it offers any advantages over the existing backends. I patch it in and use it all the time. It's much easier to setup on a random machine than a bridged config. -- MST -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: sync guest calls made async on host - SQLite performance
On Wed, Oct 14, 2009 at 08:03:41PM +0900, Avi Kivity wrote: Can't remember anything like that. The bug was the complete lack of cache flush infrastructure for virtio, and the lack of advertising a volative write cache on ide. By complete flush infrastructure, you mean host-side and guest-side support for a new barrier command, yes? The cache flush command, not barrier command. The new virtio code implements barrier the same way we do for IDE and SCSI - all barrier semantics are implemented by generic code in the block layer by draining the queues, the only thing we send over the wire are cache flush commands in strategic places. But can't this be also implemented using QUEUE_ORDERED_DRAIN, and on the host side disabling the backing device write cache? I'm talking about cache=none, primarily. Yes, it could. But as I found out in a long discussion with Stephen it's not actually nessecary. All filesystems do the right thing for a device not claiming to support barriers if it doesn't include write caches, that is implement ordering internally. So there is no urge to set QUEUE_ORDERED_DRAIN for the case without write cache. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCHv2 1/2] Complete cpu initialization before signaling main thread.
Otherwise some cpus may start executing code before others are fully initialized. Signed-off-by: Gleb Natapov g...@redhat.com --- v1-v2: - reinit cpu_single_env after qemu_cond_wait() qemu-kvm.c | 29 +++-- 1 files changed, 15 insertions(+), 14 deletions(-) diff --git a/qemu-kvm.c b/qemu-kvm.c index 62ca050..a104ab8 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -1954,18 +1954,6 @@ static void process_irqchip_events(CPUState *env) static int kvm_main_loop_cpu(CPUState *env) { -setup_kernel_sigmask(env); - -pthread_mutex_lock(qemu_mutex); - -kvm_arch_init_vcpu(env); -#ifdef TARGET_I386 -kvm_tpr_vcpu_start(env); -#endif - -cpu_single_env = env; -kvm_arch_load_regs(env); - while (1) { int run_cpu = !is_cpu_stopped(env); if (run_cpu !kvm_irqchip_in_kernel(kvm_context)) { @@ -2003,15 +1991,28 @@ static void *ap_main_loop(void *_env) on_vcpu(env, kvm_arch_do_ioperm, data); #endif -/* signal VCPU creation */ +setup_kernel_sigmask(env); + pthread_mutex_lock(qemu_mutex); +cpu_single_env = env; + +kvm_arch_init_vcpu(env); +#ifdef TARGET_I386 +kvm_tpr_vcpu_start(env); +#endif + +kvm_arch_load_regs(env); + +/* signal VCPU creation */ current_env-created = 1; pthread_cond_signal(qemu_vcpu_cond); /* and wait for machine initialization */ while (!qemu_system_ready) qemu_cond_wait(qemu_system_cond); -pthread_mutex_unlock(qemu_mutex); + +/* re-initialize cpu_single_env after re-acquiring qemu_mutex */ +cpu_single_env = env; kvm_main_loop_cpu(env); return NULL; -- 1.6.3.3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/2] Don't sync mpstate to/from kernel when unneeded.
mp_state, unlike other cpu state, can be changed not only from vcpu context it belongs to, but by other vcpus too. That makes its loading from kernel/saving back not safe if mp_state value is changed inside kernel between load and save. For example vcpu 1 loads mp_sate into user-space and the state is RUNNING, vcpu 0 sends INIT/SIPI to vcpu 1 so in-kernel mp_sate becomes SIPI, vcpu 1 save user-space copy into kernel and calls vcpu_run(). SIPI sate is lost. The patch copies mp_sate into kernel only when it is knows that int-kernel value is outdated. This happens on reset and vmload. Signed-off-by: Gleb Natapov g...@redhat.com --- hw/apic.c |1 + monitor.c |2 ++ qemu-kvm.c|9 - qemu-kvm.h|1 - target-i386/machine.c |3 +++ 5 files changed, 10 insertions(+), 6 deletions(-) diff --git a/hw/apic.c b/hw/apic.c index 2952675..729 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -512,6 +512,7 @@ void apic_init_reset(CPUState *env) if (kvm_enabled() qemu_kvm_irqchip_in_kernel()) { env-mp_state = env-halted ? KVM_MP_STATE_UNINITIALIZED : KVM_MP_STATE_RUNNABLE; +kvm_load_mpstate(env); } #endif } diff --git a/monitor.c b/monitor.c index 7f0f5a9..dd8f2ca 100644 --- a/monitor.c +++ b/monitor.c @@ -350,6 +350,7 @@ static CPUState *mon_get_cpu(void) mon_set_cpu(0); } cpu_synchronize_state(cur_mon-mon_cpu); +kvm_save_mpstate(cur_mon-mon_cpu); return cur_mon-mon_cpu; } @@ -377,6 +378,7 @@ static void do_info_cpus(Monitor *mon) for(env = first_cpu; env != NULL; env = env-next_cpu) { cpu_synchronize_state(env); +kvm_save_mpstate(env); monitor_printf(mon, %c CPU #%d:, (env == mon-mon_cpu) ? '*' : ' ', env-cpu_index); diff --git a/qemu-kvm.c b/qemu-kvm.c index a104ab8..267222d 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -1609,11 +1609,6 @@ static void on_vcpu(CPUState *env, void (*func)(void *data), void *data) void kvm_arch_get_registers(CPUState *env) { kvm_arch_save_regs(env); - kvm_arch_save_mpstate(env); -#ifdef KVM_CAP_MP_STATE - if (kvm_irqchip_in_kernel(kvm_context)) - env-halted = (env-mp_state == KVM_MP_STATE_HALTED); -#endif } static void do_kvm_cpu_synchronize_state(void *_env) @@ -1707,6 +1702,10 @@ static void kvm_do_save_mpstate(void *_env) CPUState *env = _env; kvm_arch_save_mpstate(env); +#ifdef KVM_CAP_MP_STATE +if (kvm_irqchip_in_kernel(kvm_context)) +env-halted = (env-mp_state == KVM_MP_STATE_HALTED); +#endif } void kvm_save_mpstate(CPUState *env) diff --git a/qemu-kvm.h b/qemu-kvm.h index d6748c7..e2a87b8 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -1186,7 +1186,6 @@ void kvm_arch_get_registers(CPUState *env); static inline void kvm_arch_put_registers(CPUState *env) { kvm_load_registers(env); -kvm_load_mpstate(env); } void kvm_cpu_synchronize_state(CPUState *env); diff --git a/target-i386/machine.c b/target-i386/machine.c index e640dad..16d9c57 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -324,6 +324,7 @@ static void cpu_pre_save(void *opaque) int i, bit; cpu_synchronize_state(env); +kvm_save_mpstate(env); /* FPU */ env-fpus_vmstate = (env-fpus ~0x3800) | (env-fpstt 0x7) 11; @@ -385,6 +386,8 @@ static int cpu_post_load(void *opaque, int version_id) } tlb_flush(env, 1); +kvm_load_mpstate(env); + return 0; } -- 1.6.3.3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] Release plan for 0.12.0
Arnd Bergmann wrote: There are two reasons why I think this backend is important: - As an easy way to provide isolation between guests (private ethernet port aggregator, PEPA) and external enforcement of network priviledges (virtual ethernet port aggregator, VEPA) using the macvlan subsystem. Can't this all be done with tap and a bridge? Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] Release plan for 0.12.0
On Wed, Oct 14, 2009 at 08:53:55AM -0500, Anthony Liguori wrote: Arnd Bergmann wrote: There are two reasons why I think this backend is important: - As an easy way to provide isolation between guests (private ethernet port aggregator, PEPA) and external enforcement of network priviledges (virtual ethernet port aggregator, VEPA) using the macvlan subsystem. Can't this all be done with tap and a bridge? Not with existing kernels, I think. Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] Release plan for 0.12.0
On Wed, Oct 14, 2009 at 03:09:28PM +0200, Arnd Bergmann wrote: On Thursday 08 October 2009, Anthony Liguori wrote: Jens Osterkamp wrote: On Wednesday 30 September 2009, Anthony Liguori wrote: Please add to this list and I'll collect it all and post it somewhere. What about Or Gerlitz' raw backend driver ? I did not see it go in yet, or did I miss something ? The patch seems to have not been updated after the initial posting and the first feedback cycle. I'm generally inclined to oppose the functionality as I don't think it offers any advantages over the existing backends. There are two reasons why I think this backend is important: - As an easy way to provide isolation between guests (private ethernet port aggregator, PEPA) and external enforcement of network priviledges (virtual ethernet port aggregator, VEPA) using the macvlan subsystem. - As a counterpart to the vhost_net driver, providing an identical user interface with or without vhost_net acceleration in the kernel. Arnd I think raw sockets also support RX mac/vlan filtering in kernel, which might be faster than doing it in virtio in userspace as it's done now. -- MST -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Release plan for 0.12.0
Michael S. Tsirkin wrote: Looks like Or has abandoned it. I have an updated version which works with new APIs, etc. Let me post it and we'll go from there. I'm generally inclined to oppose the functionality as I don't think it offers any advantages over the existing backends. I patch it in and use it all the time. It's much easier to setup on a random machine than a bridged config. Having two things that do the same thing is just going to lead to user confusion. If the problem is tap is too hard to setup, we should try to simplify tap configuration. Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Release plan for 0.12.0
On Wed, Oct 14, 2009 at 09:17:15AM -0500, Anthony Liguori wrote: Michael S. Tsirkin wrote: Looks like Or has abandoned it. I have an updated version which works with new APIs, etc. Let me post it and we'll go from there. I'm generally inclined to oppose the functionality as I don't think it offers any advantages over the existing backends. I patch it in and use it all the time. It's much easier to setup on a random machine than a bridged config. Having two things that do the same thing is just going to lead to user confusion. They do not do the same thing. With raw socket you can use windows update without a bridge in the host, with tap you can't. If the problem is tap is too hard to setup, we should try to simplify tap configuration. The problem is bridge is too hard to setup. Simplifying that is a good idea, but outside the scope of the qemu project. Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Autotest] [PATCH] KVM test: Add PCI pass through test
FYI, Amit pointed out that the correct name for this test would be PCI device assignment, so the final version of this patch will be called PCI device assignment instead. On Wed, Oct 14, 2009 at 9:08 AM, Lucas Meneghel Rodrigues l...@redhat.com wrote: Add a new PCI pass trough test. It supports both SR-IOV virtual functions and physical NIC card pass through. Single Root I/O Virtualization (SR-IOV) allows a single PCI device to be shared amongst multiple virtual machines while retaining the performance benefit of assigning a PCI device to a virtual machine. A common example is where a single SR-IOV capable NIC - with perhaps only a single physical network port - might be shared with multiple virtual machines by assigning a virtual function to each VM. SR-IOV support is implemented in the kernel. The core implementation is contained in the PCI subsystem, but there must also be driver support for both the Physical Function (PF) and Virtual Function (VF) devices. With an SR-IOV capable device one can allocate VFs from a PF. The VFs surface as PCI devices which are backed on the physical PCI device by resources (queues, and register sets). Device support: In 2.6.30, the Intel® 82576 Gigabit Ethernet Controller is the only SR-IOV capable device supported. The igb driver has PF support and the igbvf has VF support. In 2.6.31 the Neterion® X3100™ is supported as well. This device uses the same vxge driver for the PF as well as the VFs. In order to configure the test: * For SR-IOV virtual functions passthrough, we could specify the module parameter 'max_vfs' in config file. * For physical NIC card pass through, we should specify the device name(s). Signed-off-by: Yolkfull Chow yz...@redhat.com --- client/tests/kvm/kvm_tests.cfg.sample | 11 ++- client/tests/kvm/kvm_utils.py | 278 + client/tests/kvm/kvm_vm.py | 72 + 3 files changed, 360 insertions(+), 1 deletions(-) diff --git a/client/tests/kvm/kvm_tests.cfg.sample b/client/tests/kvm/kvm_tests.cfg.sample index cc3228a..1dad188 100644 --- a/client/tests/kvm/kvm_tests.cfg.sample +++ b/client/tests/kvm/kvm_tests.cfg.sample @@ -786,13 +786,22 @@ variants: only default image_format = raw - variants: - @smallpages: - hugepages: pre_command = /usr/bin/python scripts/hugepage.py /mnt/kvm_hugepage extra_params += -mem-path /mnt/kvm_hugepage +variants: + - @no_passthrough: + pass_through = no + - nic_passthrough: + pass_through = pf + passthrough_devs = eth1 + - vfs_passthrough: + pass_through = vf + max_vfs = 7 + vfs_count = 7 variants: - @basic: diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py index 53b664a..0e3398c 100644 --- a/client/tests/kvm/kvm_utils.py +++ b/client/tests/kvm/kvm_utils.py @@ -788,3 +788,281 @@ def md5sum_file(filename, size=None): size -= len(data) f.close() return o.hexdigest() + + +def get_full_id(pci_id): + + Get full PCI ID of pci_id. + + cmd = lspci -D | awk '/%s/ {print $1}' % pci_id + status, full_id = commands.getstatusoutput(cmd) + if status != 0: + return None + return full_id + + +def get_vendor_id(pci_id): + + Check out the device vendor ID according to PCI ID. + + cmd = lspci -n | awk '/%s/ {print $3}' % pci_id + return re.sub(:, , commands.getoutput(cmd)) + + +def release_dev(pci_id, pci_dict): + + Release a single PCI device. + + �...@param pci_id: PCI ID of a given PCI device + �...@param pci_dict: Dictionary with information about PCI devices + + base_dir = /sys/bus/pci + full_id = get_full_id(pci_id) + vendor_id = get_vendor_id(pci_id) + drv_path = os.path.join(base_dir, devices/%s/driver % full_id) + if 'pci-stub' in os.readlink(drv_path): + cmd = echo '%s' %s/new_id % (vendor_id, drv_path) + if os.system(cmd): + return False + + stub_path = os.path.join(base_dir, drivers/pci-stub) + cmd = echo '%s' %s/unbind % (full_id, stub_path) + if os.system(cmd): + return False + + prev_driver = pci_dict[pci_id] + cmd = echo '%s' %s/bind % (full_id, prev_driver) + if os.system(cmd): + return False + return True + + +def release_pci_devs(pci_dict): + + Release all PCI devices assigned to host. + + �...@param pci_dict: Dictionary with information about PCI devices + + for pci_id in pci_dict: + if not release_dev(pci_id, pci_dict): + logging.error(Failed to release device [%s] to host % pci_id) + else: + logging.info(Release device [%s] successfully % pci_id) + + +class PassThrough(object): + + Request passthroughable devices on host. It will check whether to
[PATCH] v4: allow userspace to adjust kvmclock offset
When we migrate a kvm guest that uses pvclock between two hosts, we may suffer a large skew. This is because there can be significant differences between the monotonic clock of the hosts involved. When a new host with a much larger monotonic time starts running the guest, the view of time will be significantly impacted. Situation is much worse when we do the opposite, and migrate to a host with a smaller monotonic clock. This proposed ioctl will allow userspace to inform us what is the monotonic clock value in the source host, so we can keep the time skew short, and more importantly, never goes backwards. Userspace may also need to trigger the current data, since from the first migration onwards, it won't be reflected by a simple call to clock_gettime() anymore. [ v2: uses a struct with a padding ] [ v3: provide an ioctl to get clock data too ] [ v4: used fixed-width signed type for delta ] Signed-off-by: Glauber Costa glom...@redhat.com --- arch/x86/include/asm/kvm_host.h |1 + arch/x86/kvm/x86.c | 35 ++- include/linux/kvm.h |7 +++ 3 files changed, 42 insertions(+), 1 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 179a919..c9b0d9f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -410,6 +410,7 @@ struct kvm_arch{ unsigned long irq_sources_bitmap; u64 vm_init_tsc; + s64 kvmclock_offset; }; struct kvm_vm_stat { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9601bc6..09f31e2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -699,7 +699,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) /* With all the info we got, fill in the values */ vcpu-hv_clock.system_time = ts.tv_nsec + -(NSEC_PER_SEC * (u64)ts.tv_sec); +(NSEC_PER_SEC * (u64)ts.tv_sec) + v-kvm-arch.kvmclock_offset; + /* * The interface expects us to write an even number signaling that the * update is finished. Since the guest won't see the intermediate @@ -2441,6 +2442,38 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_SET_CLOCK: { + struct timespec now; + struct kvm_clock_data user_ns; + u64 now_ns; + s64 delta; + + r = -EFAULT; + if (copy_from_user(user_ns, argp, sizeof(user_ns))) + goto out; + + r = 0; + ktime_get_ts(now); + now_ns = timespec_to_ns(now); + delta = user_ns.clock - now_ns; + kvm-arch.kvmclock_offset = delta; + break; + } + case KVM_GET_CLOCK: { + struct timespec now; + struct kvm_clock_data user_ns; + u64 now_ns; + + ktime_get_ts(now); + now_ns = timespec_to_ns(now); + user_ns.clock = kvm-arch.kvmclock_offset + now_ns; + + if (copy_to_user(argp, user_ns, sizeof(user_ns))) + r = -EFAULT; + + break; + } + default: ; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index f8f8900..ad0ecbc 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -497,6 +497,11 @@ struct kvm_irqfd { __u8 pad[20]; }; +struct kvm_clock_data { + __u64 clock; + __u64 pad[2]; +}; + /* * ioctls for VM fds */ @@ -546,6 +551,8 @@ struct kvm_irqfd { #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) #define KVM_SET_BOOT_CPU_ID_IO(KVMIO, 0x78) #define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) +#define KVM_SET_CLOCK_IOW(KVMIO, 0x7a, struct kvm_clock_data) +#define KVM_GET_CLOCK_IOW(KVMIO, 0x7b, struct kvm_clock_data) /* * ioctls for vcpu fds -- 1.6.2.2 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] virtio-blk: fallback to draining the queue if barrier ops are not supported
On Wed, Oct 14, 2009 at 7:03 AM, Avi Kivity a...@redhat.com wrote: Early implementations of virtio devices did not support barrier operations, but did commit the data to disk. In such cases, drain the queue to emulate barrier operations. would this help on the (i think common) situation with XFS on a virtio-enabled VM, using LVM-backed storage; where LVM just loses barriers. -- Javier -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] Re: Release plan for 0.12.0
Michael S. Tsirkin wrote: On Wed, Oct 14, 2009 at 09:17:15AM -0500, Anthony Liguori wrote: Michael S. Tsirkin wrote: Looks like Or has abandoned it. I have an updated version which works with new APIs, etc. Let me post it and we'll go from there. I'm generally inclined to oppose the functionality as I don't think it offers any advantages over the existing backends. I patch it in and use it all the time. It's much easier to setup on a random machine than a bridged config. Having two things that do the same thing is just going to lead to user confusion. They do not do the same thing. With raw socket you can use windows update without a bridge in the host, with tap you can't. On the other hand, with raw socket, guest Windows can't access files on the host's Samba share can it? So it's not that useful even for Windows guests. If the problem is tap is too hard to setup, we should try to simplify tap configuration. The problem is bridge is too hard to setup. Simplifying that is a good idea, but outside the scope of the qemu project. I venture it's important enough for qemu that it's worth working on that. Something that looks like the raw socket but behaves like an automatically instantiated bridge attached to the bound interface would be a useful interface. I don't have much time, but I'll help anybody who wants to do that. -- Jamie -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] virtio-blk: fallback to draining the queue if barrier ops are not supported
Avi Kivity wrote: Early implementations of virtio devices did not support barrier operations, but did commit the data to disk. In such cases, drain the queue to emulate barrier operations. Are there any implementation currently that actually supports barriers? As far as I remember there's no way to invoke barriers from a user-space application on linux, and this is how kvm/qemu is running on this OS. Thanks! /mjt -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] virtio-blk: fallback to draining the queue if barrier ops are not supported
On Wed, Oct 14, 2009 at 07:38:45PM +0400, Michael Tokarev wrote: Avi Kivity wrote: Early implementations of virtio devices did not support barrier operations, but did commit the data to disk. In such cases, drain the queue to emulate barrier operations. Are there any implementation currently that actually supports barriers? As far as I remember there's no way to invoke barriers from a user-space application on linux, and this is how kvm/qemu is running on this OS. Ignore all the barrier talk. The way Linux uses the various storage transport the primitives are queue draining (done entirely in the guest block layer) and cache flushes. Fdatasync is exactly the same primitive as a WIN FLUSH CACHE in ATA or SYNCHRONIZE cache in SCSI module the lack or ranges in fdatasync - but that is just a performance optimization and not actually used by Linux guests for now. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] Re: Release plan for 0.12.0
On Wed, Oct 14, 2009 at 04:19:17PM +0100, Jamie Lokier wrote: Michael S. Tsirkin wrote: On Wed, Oct 14, 2009 at 09:17:15AM -0500, Anthony Liguori wrote: Michael S. Tsirkin wrote: Looks like Or has abandoned it. I have an updated version which works with new APIs, etc. Let me post it and we'll go from there. I'm generally inclined to oppose the functionality as I don't think it offers any advantages over the existing backends. I patch it in and use it all the time. It's much easier to setup on a random machine than a bridged config. Having two things that do the same thing is just going to lead to user confusion. They do not do the same thing. With raw socket you can use windows update without a bridge in the host, with tap you can't. On the other hand, with raw socket, guest Windows can't access files on the host's Samba share can it? So it's not that useful even for Windows guests. I guess this depends on whether you use the same host for samba :) If the problem is tap is too hard to setup, we should try to simplify tap configuration. The problem is bridge is too hard to setup. Simplifying that is a good idea, but outside the scope of the qemu project. I venture it's important enough for qemu that it's worth working on that. Something that looks like the raw socket but behaves like an automatically instantiated bridge attached to the bound interface would be a useful interface. I agree, that would be good to have. I don't have much time, but I'll help anybody who wants to do that. -- Jamie -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Latest -git qemu-kvm doesn't boot an x86 kernel
Hi, I am trying qemu-system-x86_64 on a x86 host running 2.6.30-2 (debian testing) kernel and trying to boot latest linus git kernel (x86). The kernel hang after printing the below [ 4.394392] ACPI: PCI Interrupt Link [LNKC] enabled at IRQ 11 [4.397837] virtio-pci :00:03.0: PCI INT A - Link[LNKC] - GSI 11 (level, high) - IRQ 11 [4.436489] ACPI: PCI Interrupt Link [LNKD] enabled at IRQ 10 [4.439829] virtio-pci :00:04.0: PCI INT A - Link[LNKD] - GSI 10 (level, high) - IRQ 10 [4.462538] vda: [4.526913] input: ImExPS/2 Generic Explorer Mouse as /devices/platform/i8042/serio1/input/input3 [5.349554] async/1 used greatest stack depth: 5872 bytes left An earlier version of kvm booted fine the new kernel. So the vm disk image user space should all be fine. The older version of kvm that worked fine is QEMU PC emulator version 0.10.50 (kvm-devel) Any patches i need to try ? -aneesh -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/3] get rid of kvm vcpu structure
Hello, Done in three parts, the following patches get rid of vcpu structure in qemu-kvm. All state is now held in CPUState, getting us a bit closer to upstream qemu again. The last pass converts us to the use of kvm_vcpu_ioctl, allowing more code to be shared. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/3] change function signatures so that they don't take a vcpu argument
At this point, vcpu arguments are passed only for the fd field. We already provide that in env, as kvm_fd. Replace it. Signed-off-by: Glauber Costa glom...@redhat.com --- cpu-defs.h |1 - hw/apic.c |4 +- kvm-tpr-opt.c | 16 +- qemu-kvm-x86.c | 91 ++-- qemu-kvm.c | 97 +++ qemu-kvm.h | 74 ++- 6 files changed, 134 insertions(+), 149 deletions(-) diff --git a/cpu-defs.h b/cpu-defs.h index 1f48267..cf502e9 100644 --- a/cpu-defs.h +++ b/cpu-defs.h @@ -141,7 +141,6 @@ struct qemu_work_item; struct KVMCPUState { pthread_t thread; int signalled; -void *vcpu_ctx; struct qemu_work_item *queued_work_first, *queued_work_last; int regs_modified; }; diff --git a/hw/apic.c b/hw/apic.c index b8fe529..9e707bd 100644 --- a/hw/apic.c +++ b/hw/apic.c @@ -900,7 +900,7 @@ static void kvm_kernel_lapic_save_to_user(APICState *s) struct kvm_lapic_state *kapic = apic; int i, v; -kvm_get_lapic(s-cpu_env-kvm_cpu_state.vcpu_ctx, kapic); +kvm_get_lapic(s-cpu_env, kapic); s-id = kapic_reg(kapic, 0x2) 24; s-tpr = kapic_reg(kapic, 0x8); @@ -953,7 +953,7 @@ static void kvm_kernel_lapic_load_from_user(APICState *s) kapic_set_reg(klapic, 0x38, s-initial_count); kapic_set_reg(klapic, 0x3e, s-divide_conf); -kvm_set_lapic(s-cpu_env-kvm_cpu_state.vcpu_ctx, klapic); +kvm_set_lapic(s-cpu_env, klapic); } #endif diff --git a/kvm-tpr-opt.c b/kvm-tpr-opt.c index f7b6f3b..932b49b 100644 --- a/kvm-tpr-opt.c +++ b/kvm-tpr-opt.c @@ -70,7 +70,7 @@ static uint8_t read_byte_virt(CPUState *env, target_ulong virt) { struct kvm_sregs sregs; -kvm_get_sregs(env-kvm_cpu_state.vcpu_ctx, sregs); +kvm_get_sregs(env, sregs); return ldub_phys(map_addr(sregs, virt, NULL)); } @@ -78,7 +78,7 @@ static void write_byte_virt(CPUState *env, target_ulong virt, uint8_t b) { struct kvm_sregs sregs; -kvm_get_sregs(env-kvm_cpu_state.vcpu_ctx, sregs); +kvm_get_sregs(env, sregs); stb_phys(map_addr(sregs, virt, NULL), b); } @@ -86,7 +86,7 @@ static __u64 kvm_rsp_read(CPUState *env) { struct kvm_regs regs; -kvm_get_regs(env-kvm_cpu_state.vcpu_ctx, regs); +kvm_get_regs(env, regs); return regs.rsp; } @@ -192,7 +192,7 @@ static int bios_is_mapped(CPUState *env, uint64_t rip) if (bios_enabled) return 1; -kvm_get_sregs(env-kvm_cpu_state.vcpu_ctx, sregs); +kvm_get_sregs(env, sregs); probe = (rip 0xf000) + 0xe; phys = map_addr(sregs, probe, perms); @@ -240,7 +240,7 @@ static int enable_vapic(CPUState *env) if (pcr_cpu 0) return 0; -kvm_enable_vapic(env-kvm_cpu_state.vcpu_ctx, vapic_phys + (pcr_cpu 7)); +kvm_enable_vapic(env, vapic_phys + (pcr_cpu 7)); cpu_physical_memory_rw(vapic_phys + (pcr_cpu 7) + 4, one, 1, 1); bios_enabled = 1; @@ -313,7 +313,7 @@ void kvm_tpr_access_report(CPUState *env, uint64_t rip, int is_write) void kvm_tpr_vcpu_start(CPUState *env) { -kvm_enable_tpr_access_reporting(env-kvm_cpu_state.vcpu_ctx); +kvm_enable_tpr_access_reporting(env); if (bios_enabled) enable_vapic(env); } @@ -363,7 +363,7 @@ static void vtpr_ioport_write(void *opaque, uint32_t addr, uint32_t val) struct kvm_sregs sregs; uint32_t rip; -kvm_get_regs(env-kvm_cpu_state.vcpu_ctx, regs); +kvm_get_regs(env, regs); rip = regs.rip - 2; write_byte_virt(env, rip, 0x66); write_byte_virt(env, rip + 1, 0x90); @@ -371,7 +371,7 @@ static void vtpr_ioport_write(void *opaque, uint32_t addr, uint32_t val) return; if (!bios_is_mapped(env, rip)) printf(bios not mapped?\n); -kvm_get_sregs(env-kvm_cpu_state.vcpu_ctx, sregs); +kvm_get_sregs(env, sregs); for (addr = 0xf000u; addr = 0x8000u; addr -= 4096) if (map_addr(sregs, addr, NULL) == 0xfee0u) { real_tpr = addr + 0x80; diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index fffcfd8..8c4140d 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -172,14 +172,14 @@ static int kvm_handle_tpr_access(CPUState *env) } -int kvm_enable_vapic(kvm_vcpu_context_t vcpu, uint64_t vapic) +int kvm_enable_vapic(CPUState *env, uint64_t vapic) { int r; struct kvm_vapic_addr va = { .vapic_addr = vapic, }; - r = ioctl(vcpu-fd, KVM_SET_VAPIC_ADDR, va); + r = ioctl(env-kvm_fd, KVM_SET_VAPIC_ADDR, va); if (r == -1) { r = -errno; perror(kvm_enable_vapic); @@ -281,12 +281,12 @@ int kvm_destroy_memory_alias(kvm_context_t kvm, uint64_t phys_start) #ifdef KVM_CAP_IRQCHIP -int kvm_get_lapic(kvm_vcpu_context_t vcpu, struct kvm_lapic_state *s) +int kvm_get_lapic(CPUState *env, struct kvm_lapic_state *s) { int r; if
[PATCH 2/3] get rid of vcpu structure
We have no use for it anymore. Only trace of it was in vcpu_create. Make it disappear. Signed-off-by: Glauber Costa glom...@redhat.com --- qemu-kvm.c | 11 +++ qemu-kvm.h |5 - 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/qemu-kvm.c b/qemu-kvm.c index 700d030..7943281 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -440,16 +440,13 @@ static void kvm_create_vcpu(CPUState *env, int id) { long mmap_size; int r; -kvm_vcpu_context_t vcpu_ctx = qemu_malloc(sizeof(struct kvm_vcpu_context)); r = kvm_vm_ioctl(kvm_state, KVM_CREATE_VCPU, id); if (r 0) { fprintf(stderr, kvm_create_vcpu: %m\n); -goto err; +return; } -vcpu_ctx-fd = r; - env-kvm_fd = r; env-kvm_state = kvm_state; @@ -459,7 +456,7 @@ static void kvm_create_vcpu(CPUState *env, int id) goto err_fd; } env-kvm_run = -mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu_ctx-fd, +mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, env-kvm_fd, 0); if (env-kvm_run == MAP_FAILED) { fprintf(stderr, mmap vcpu area: %m\n); @@ -468,9 +465,7 @@ static void kvm_create_vcpu(CPUState *env, int id) return; err_fd: -close(vcpu_ctx-fd); - err: -free(vcpu_ctx); +close(env-kvm_fd); } static int kvm_set_boot_vcpu_id(kvm_context_t kvm, uint32_t id) diff --git a/qemu-kvm.h b/qemu-kvm.h index abcb98d..588bc80 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -76,12 +76,7 @@ struct kvm_context { int max_gsi; }; -struct kvm_vcpu_context { -int fd; -}; - typedef struct kvm_context *kvm_context_t; -typedef struct kvm_vcpu_context *kvm_vcpu_context_t; #include kvm.h int kvm_alloc_kernel_memory(kvm_context_t kvm, unsigned long memory, -- 1.6.2.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/3] use upstream kvm_vcpu_ioctl
Signed-off-by: Glauber Costa glom...@redhat.com --- kvm-all.c |3 --- qemu-kvm-x86.c | 20 ++-- qemu-kvm.c | 26 +- qemu-kvm.h |1 + 4 files changed, 24 insertions(+), 26 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index 1356aa8..5ea999e 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -861,7 +861,6 @@ int kvm_vm_ioctl(KVMState *s, int type, ...) return ret; } -#ifdef KVM_UPSTREAM int kvm_vcpu_ioctl(CPUState *env, int type, ...) { int ret; @@ -879,8 +878,6 @@ int kvm_vcpu_ioctl(CPUState *env, int type, ...) return ret; } -#endif - int kvm_has_sync_mmu(void) { #ifdef KVM_CAP_SYNC_MMU diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index 8c4140d..fd0e6a9 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -179,7 +179,7 @@ int kvm_enable_vapic(CPUState *env, uint64_t vapic) .vapic_addr = vapic, }; - r = ioctl(env-kvm_fd, KVM_SET_VAPIC_ADDR, va); + r = kvm_vcpu_ioctl(env, KVM_SET_VAPIC_ADDR, va); if (r == -1) { r = -errno; perror(kvm_enable_vapic); @@ -286,7 +286,7 @@ int kvm_get_lapic(CPUState *env, struct kvm_lapic_state *s) int r; if (!kvm_irqchip_in_kernel()) return 0; - r = ioctl(env-kvm_fd, KVM_GET_LAPIC, s); + r = kvm_vcpu_ioctl(env, KVM_GET_LAPIC, s); if (r == -1) { r = -errno; perror(kvm_get_lapic); @@ -299,7 +299,7 @@ int kvm_set_lapic(CPUState *env, struct kvm_lapic_state *s) int r; if (!kvm_irqchip_in_kernel()) return 0; - r = ioctl(env-kvm_fd, KVM_SET_LAPIC, s); + r = kvm_vcpu_ioctl(env, KVM_SET_LAPIC, s); if (r == -1) { r = -errno; perror(kvm_set_lapic); @@ -424,7 +424,7 @@ int kvm_get_msrs(CPUState *env, struct kvm_msr_entry *msrs, int n) kmsrs-nmsrs = n; memcpy(kmsrs-entries, msrs, n * sizeof *msrs); -r = ioctl(env-kvm_fd, KVM_GET_MSRS, kmsrs); +r = kvm_vcpu_ioctl(env, KVM_GET_MSRS, kmsrs); e = errno; memcpy(msrs, kmsrs-entries, n * sizeof *msrs); free(kmsrs); @@ -439,7 +439,7 @@ int kvm_set_msrs(CPUState *env, struct kvm_msr_entry *msrs, int n) kmsrs-nmsrs = n; memcpy(kmsrs-entries, msrs, n * sizeof *msrs); -r = ioctl(env-kvm_fd, KVM_SET_MSRS, kmsrs); +r = kvm_vcpu_ioctl(env, KVM_SET_MSRS, kmsrs); e = errno; free(kmsrs); errno = e; @@ -464,7 +464,7 @@ int kvm_get_mce_cap_supported(kvm_context_t kvm, uint64_t *mce_cap, int kvm_setup_mce(CPUState *env, uint64_t *mcg_cap) { #ifdef KVM_CAP_MCE -return ioctl(env-kvm_fd, KVM_X86_SETUP_MCE, mcg_cap); +return kvm_vcpu_ioctl(env, KVM_X86_SETUP_MCE, mcg_cap); #else return -ENOSYS; #endif @@ -473,7 +473,7 @@ int kvm_setup_mce(CPUState *env, uint64_t *mcg_cap) int kvm_set_mce(CPUState *env, struct kvm_x86_mce *m) { #ifdef KVM_CAP_MCE -return ioctl(env-kvm_fd, KVM_X86_SET_MCE, m); +return kvm_vcpu_ioctl(env, KVM_X86_SET_MCE, m); #else return -ENOSYS; #endif @@ -563,7 +563,7 @@ int kvm_setup_cpuid(CPUState *env, int nent, cpuid-nent = nent; memcpy(cpuid-entries, entries, nent * sizeof(*entries)); - r = ioctl(env-kvm_fd, KVM_SET_CPUID, cpuid); + r = kvm_vcpu_ioctl(env, KVM_SET_CPUID, cpuid); free(cpuid); return r; @@ -579,7 +579,7 @@ int kvm_setup_cpuid2(CPUState *env, int nent, cpuid-nent = nent; memcpy(cpuid-entries, entries, nent * sizeof(*entries)); - r = ioctl(env-kvm_fd, KVM_SET_CPUID2, cpuid); + r = kvm_vcpu_ioctl(env, KVM_SET_CPUID2, cpuid); if (r == -1) { fprintf(stderr, kvm_setup_cpuid2: %m\n); r = -errno; @@ -634,7 +634,7 @@ static int tpr_access_reporting(CPUState *env, int enabled) r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_VAPIC); if (r = 0) return -ENOSYS; - r = ioctl(env-kvm_fd, KVM_TPR_ACCESS_REPORTING, tac); + r = kvm_vcpu_ioctl(env, KVM_TPR_ACCESS_REPORTING, tac); if (r == -1) { r = -errno; perror(KVM_TPR_ACCESS_REPORTING); diff --git a/qemu-kvm.c b/qemu-kvm.c index 7943281..5284426 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -816,32 +816,32 @@ static int handle_debug(CPUState *env) int kvm_get_regs(CPUState *env, struct kvm_regs *regs) { -return ioctl(env-kvm_fd, KVM_GET_REGS, regs); +return kvm_vcpu_ioctl(env, KVM_GET_REGS, regs); } int kvm_set_regs(CPUState *env, struct kvm_regs *regs) { -return ioctl(env-kvm_fd, KVM_SET_REGS, regs); +return kvm_vcpu_ioctl(env, KVM_SET_REGS, regs); } int kvm_get_fpu(CPUState *env, struct kvm_fpu *fpu) { -return ioctl(env-kvm_fd, KVM_GET_FPU, fpu); +return kvm_vcpu_ioctl(env, KVM_GET_FPU, fpu); } int kvm_set_fpu(CPUState *env, struct kvm_fpu *fpu) { -return ioctl(env-kvm_fd, KVM_SET_FPU, fpu); +return
Re: [PATCH] virtio-blk: fallback to draining the queue if barrier ops are not supported
On 10/14/2009 11:46 PM, Javier Guerra wrote: On Wed, Oct 14, 2009 at 7:03 AM, Avi Kivitya...@redhat.com wrote: Early implementations of virtio devices did not support barrier operations, but did commit the data to disk. In such cases, drain the queue to emulate barrier operations. would this help on the (i think common) situation with XFS on a virtio-enabled VM, using LVM-backed storage; where LVM just loses barriers. No, it's a guest only patch. If LVM loses barriers, I don't think anything can restore them. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: sync guest calls made async on host - SQLite performance
On 10/14/2009 10:41 PM, Christoph Hellwig wrote: But can't this be also implemented using QUEUE_ORDERED_DRAIN, and on the host side disabling the backing device write cache? I'm talking about cache=none, primarily. Yes, it could. But as I found out in a long discussion with Stephen it's not actually nessecary. All filesystems do the right thing for a device not claiming to support barriers if it doesn't include write caches, that is implement ordering internally. So there is no urge to set QUEUE_ORDERED_DRAIN for the case without write cache. Does virtio say it has a write cache or not (and how does one say it?)? According to the report, a write+fdatasync completes too fast, at least on Ubuntu's qemu. So perhaps somewhere this information is lost. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: sync guest calls made async on host - SQLite performance
On Thu, Oct 15, 2009 at 01:56:40AM +0900, Avi Kivity wrote: Does virtio say it has a write cache or not (and how does one say it?)? Historically it didn't and the only safe way to use virtio was in cache=writethrough mode. Since qemu git as of 4th Sempember and Linux 2.6.32-rc there is a virtio-blk feature to communicate the existance of a volatile write cache, and the support for a cache flush command. With the combination of these two data=writeback and data=none modes are safe for the first time. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: kernel bug in kvm_intel
On 10/13/2009 11:04 PM, Andrew Theurer wrote: Look at the address where vmx_vcpu_run starts, add 0x26d, and show the surrounding code. Thinking about it, it probably _is_ what you showed, due to module page alignment. But please verify this; I can't reconcile the fault address (9fe9a2b) with %rsp at the time of the fault. Here is the start of the function: 3884vmx_vcpu_run: 3884: 55 push %rbp 3885: 48 89 e5mov%rsp,%rbp and 0x26d later is 0x3af1: 3ad2: 4c 8b b1 88 01 00 00mov0x188(%rcx),%r14 3ad9: 4c 8b b9 90 01 00 00mov0x190(%rcx),%r15 3ae0: 48 8b 89 20 01 00 00mov0x120(%rcx),%rcx 3ae7: 75 05 jne3aeevmx_vcpu_run+0x26a 3ae9: 0f 01 c2vmlaunch 3aec: eb 03 jmp3af1vmx_vcpu_run+0x26d 3aee: 0f 01 c3vmresume 3af1: 48 87 0c 24 xchg %rcx,(%rsp) 3af5: 48 89 81 18 01 00 00mov%rax,0x118(%rcx) 3afc: 48 89 99 30 01 00 00mov%rbx,0x130(%rcx) 3b03: ff 34 24pushq (%rsp) 3b06: 8f 81 20 01 00 00 popq 0x120(%rcx) Ok. So it faults on the xchg instruction, rsp is 8806369ffc80 but the fault address is 9fe9a2b4. So it looks like the IDT is corrupted. Can you check what's around 9fe9a2b4 in System.map? -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [STABLE PATCH] hotplug: fix scsi hotplug.
On Wed, Oct 14, 2009 at 8:30 AM, Gerd Hoffmann kra...@redhat.com wrote: Well, partly just papering over the issues. But without proper scsi bus infrastructure we hardly can do better. Changes: * Avoid auto-attach by setting the bus number to -1. * Ignore the unit value calculated by drive_init(). * Explicitly attach the devices to the adapter. * Add sanity checks. Don't allow attaching scsi drives to your network device. * Kill the bus+unit printing. The values are bogus, and we can't easily figure the correct ones. I doubt this ever worked correctly with multiple scsi adapters present in the system. Should come more close to the expected behavior now ... Oh, and pc-bios/bios.bin needs a update too, otherwise pci hotplug doesn't work at all. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/pci-hotplug.c | 24 +++- pc-bios/bios.bin | Bin 131072 - 131072 bytes 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/hw/pci-hotplug.c b/hw/pci-hotplug.c index d0f2911..8bedea2 100644 --- a/hw/pci-hotplug.c +++ b/hw/pci-hotplug.c @@ -52,9 +52,10 @@ void drive_hot_add(Monitor *mon, const char *pci_addr, const char *opts) { int dom, pci_bus; unsigned slot; - int drive_idx, type, bus; + int drive_idx, type; int success = 0; PCIDevice *dev; + char buf[128]; if (pci_read_devaddr(mon, pci_addr, dom, pci_bus, slot)) { return; @@ -74,11 +75,19 @@ void drive_hot_add(Monitor *mon, const char *pci_addr, const char *opts) return; } type = drives_table[drive_idx].type; - bus = drive_get_max_bus (type); switch (type) { case IF_SCSI: + if (!dev-qdev.info || strcmp(dev-qdev.info-name, lsi53c895a) != 0) { + monitor_printf(mon, Device is not a scsi adapter\n); + break; + } success = 1; + drives_table[drive_idx].bus = -1; + drives_table[drive_idx].unit = -1; + if (get_param_value(buf, sizeof(buf), unit, opts)) { + drives_table[drive_idx].unit = atoi(buf); + } lsi_scsi_attach(dev-qdev, drives_table[drive_idx].bdrv, drives_table[drive_idx].unit); break; @@ -87,9 +96,7 @@ void drive_hot_add(Monitor *mon, const char *pci_addr, const char *opts) } if (success) - monitor_printf(mon, OK bus %d, unit %d\n, - drives_table[drive_idx].bus, - drives_table[drive_idx].unit); + monitor_printf(mon, OK\n); return; } @@ -130,7 +137,14 @@ static PCIDevice *qemu_pci_hot_add_storage(Monitor *mon, switch (type) { case IF_SCSI: + drives_table[drive_idx].bus = -1; + drives_table[drive_idx].unit = -1; + if (get_param_value(buf, sizeof(buf), unit, opts)) { + drives_table[drive_idx].unit = atoi(buf); + } dev = pci_create(lsi53c895a, devaddr); + lsi_scsi_attach(dev-qdev, drives_table[drive_idx].bdrv, + drives_table[drive_idx].unit); break; case IF_VIRTIO: dev = pci_create(virtio-blk-pci, devaddr); Thanks, Gerd. I applied this patch against qemu-kvm-0.11.0 stable, built, and tested it. I can verify that it fixes the scsi hot-add issues I was seeing. I am now able to add/remove/add/remove/add/remove a scsi disk to a running instance without segfaulting qemu. Note that on remove, I do get a stack track in the guest's kernel (2.6.31), though the remove does succeed, and the disk disappears. Also note that I did not replace the bios.bin, as it appears to me that the qemu-kvm-0.11 bios.bin is working properly. Tested-by: Dustin Kirkland kirkl...@canonical.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] kvm: fix MSR_COUNT for kvm_arch_save_regs()
A new register was added to the load/save list on commit d283d5a65a2bdcc570065267be21848bd6fe3d78, but MSR_COUNT was not updated, leading to potential stack corruption on kvm_arch_save_regs(). The following registers are saved by kvm_arch_save_regs(): 1) MSR_IA32_SYSENTER_CS 2) MSR_IA32_SYSENTER_ESP 3) MSR_IA32_SYSENTER_EIP 4) MSR_STAR 5) MSR_IA32_TSC 6) MSR_VM_HSAVE_PA 7) MSR_CSTAR (x86_64 only) 8) MSR_KERNELGSBASE (x86_64 only) 9) MSR_FMASK (x86_64 only) 10) MSR_LSTAR (x86_64 only) Signed-off-by: Eduardo Habkost ehabk...@redhat.com --- qemu-kvm-x86.c |6 -- 1 files changed, 4 insertions(+), 2 deletions(-) diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index acb1b91..81d2c53 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -847,9 +847,9 @@ static int get_msr_entry(struct kvm_msr_entry *entry, CPUState *env) } #ifdef TARGET_X86_64 -#define MSR_COUNT 9 +#define MSR_COUNT 10 #else -#define MSR_COUNT 5 +#define MSR_COUNT 6 #endif static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs) @@ -991,6 +991,7 @@ void kvm_arch_load_regs(CPUState *env) /* msrs */ n = 0; +/* Remember to increase MSR_COUNT if you add new registers below */ set_msr_entry(msrs[n++], MSR_IA32_SYSENTER_CS, env-sysenter_cs); set_msr_entry(msrs[n++], MSR_IA32_SYSENTER_ESP, env-sysenter_esp); set_msr_entry(msrs[n++], MSR_IA32_SYSENTER_EIP, env-sysenter_eip); @@ -1168,6 +1169,7 @@ void kvm_arch_save_regs(CPUState *env) /* msrs */ n = 0; +/* Remember to increase MSR_COUNT if you add new registers below */ msrs[n++].index = MSR_IA32_SYSENTER_CS; msrs[n++].index = MSR_IA32_SYSENTER_ESP; msrs[n++].index = MSR_IA32_SYSENTER_EIP; -- 1.6.3.rc4.29.g8146 -- Eduardo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCHv2 1/2] Complete cpu initialization before signaling main thread.
On Wed, Oct 14, 2009 at 03:52:31PM +0200, Gleb Natapov wrote: Otherwise some cpus may start executing code before others are fully initialized. Signed-off-by: Gleb Natapov g...@redhat.com Applied both, thanks. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] v4: allow userspace to adjust kvmclock offset
On Wed, Oct 14, 2009 at 10:47:46AM -0400, Glauber Costa wrote: When we migrate a kvm guest that uses pvclock between two hosts, we may suffer a large skew. This is because there can be significant differences between the monotonic clock of the hosts involved. When a new host with a much larger monotonic time starts running the guest, the view of time will be significantly impacted. Situation is much worse when we do the opposite, and migrate to a host with a smaller monotonic clock. This proposed ioctl will allow userspace to inform us what is the monotonic clock value in the source host, so we can keep the time skew short, and more importantly, never goes backwards. Userspace may also need to trigger the current data, since from the first migration onwards, it won't be reflected by a simple call to clock_gettime() anymore. [ v2: uses a struct with a padding ] [ v3: provide an ioctl to get clock data too ] [ v4: used fixed-width signed type for delta ] Signed-off-by: Glauber Costa glom...@redhat.com --- arch/x86/include/asm/kvm_host.h |1 + arch/x86/kvm/x86.c | 35 ++- include/linux/kvm.h |7 +++ 3 files changed, 42 insertions(+), 1 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 179a919..c9b0d9f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -410,6 +410,7 @@ struct kvm_arch{ unsigned long irq_sources_bitmap; u64 vm_init_tsc; + s64 kvmclock_offset; }; struct kvm_vm_stat { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9601bc6..09f31e2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -699,7 +699,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) /* With all the info we got, fill in the values */ vcpu-hv_clock.system_time = ts.tv_nsec + - (NSEC_PER_SEC * (u64)ts.tv_sec); + (NSEC_PER_SEC * (u64)ts.tv_sec) + v-kvm-arch.kvmclock_offset; + /* * The interface expects us to write an even number signaling that the * update is finished. Since the guest won't see the intermediate @@ -2441,6 +2442,38 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_SET_CLOCK: { + struct timespec now; + struct kvm_clock_data user_ns; + u64 now_ns; + s64 delta; + + r = -EFAULT; Extra space :) #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) #define KVM_SET_BOOT_CPU_ID_IO(KVMIO, 0x78) #define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) +#define KVM_SET_CLOCK _IOW(KVMIO, 0x7a, struct kvm_clock_data) +#define KVM_GET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data) _IOR Otherwise looks fine, please send the userspace changes together. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Added VM Exit on RDTSC, trouble handling in userspace
On Tue, Oct 13, 2009 at 10:51:48PM -0700, Kurt Kiefer wrote: -BEGIN PGP SIGNED MESSAGE- Hash: SHA1 Hi all, In short, I have a need for trapping RDTSC with a VM Exit and this works, but I'm having trouble handling it in userspace. I have added the hooks I need (I only care about VMX right now), but a piece of the puzzle is missing and I don't know which. When I go back to userspace, it's triggering a different (faulty) execution vs. handling only in the kernel. Here's what I've done: 1. Added the CPU_BASED_RDTSC_EXITING flag to MSR_IA32_VMX_PROCBASED_CTLS in vmx.c:setup_vmcs_config() 2. Defined KVM_EXIT_RDTSC, and hooked into EXIT_REASON_RDTSC my handler for the exit: static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) = { // ... [EXIT_REASON_RDTSC] = handle_rdtsc, // ... } static int handle_rdtsc(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { u64 data; if (vmx_get_msr(vcpu, MSR_IA32_TIME_STAMP_COUNTER, data)) { kvm_inject_gp(vcpu, 0); return 1; } vcpu-run-exit_reason = KVM_EXIT_RDTSC; vcpu-arch.regs[VCPU_REGS_RAX] = data -1u; vcpu-arch.regs[VCPU_REGS_RDX] = (data 32) -1u; skip_emulated_instruction(vcpu); // flag a need for userspace invervention // note: this works when we return 1 and we don't involve userspace return 0; } 3. Handle KVM_EXIT_RDTSC in libkvm.c:kvm_run() : case KVM_EXIT_RDTSC: r = handle_rdtsc_usp(kvm, vcpu, env); break; via a handler where I do _nothing_ : static int handle_rdtsc_usp(kvm_context_t kvm, int vcpu, void *data) { return 0; } All well and good, right? I can add print statements to my userspace handle_rtsc_usp() and see I get in there just fine. However, when I try to boot Linux, the following code is called over and over and over, and Linux will never load: Breakpoint 4, 0xc01103d3 in ?? () (gdb) x/10i $rip-10 0xc01103c9: lea0x0(%rdi,%riz,1),%edi 0xc01103d0: push %rbp 0xc01103d1: mov%esp,%ebp 0xc01103d3: rdtsc 0xc01103d5: pop%rbp 0xc01103d6: retq If I only handle the exit in the kernel (by returning 1 from handle_rdtsc()), everything works and Linux will load! I counted the number of RDTSC exits before linux fully loads to be somewhere around 20. If I exit all the way to userspace (return 0 in my handle_rdtsc()) that count is infinitely surpassed in number of exits, wall time, and the value of RDTSC. So is anything glaringly wrong with my modifications? Maybe there is there some extra state that needs to be restored on VM entry? Is there an interrupt flag that needs to be cleared? Maybe I need to do something with kvm_run.if_flag or kvm_run.ready_for_interrupt_injection? Please, I need help, I'm losing sleep over this! Can't see anything wrong. Perhaps the userspace exit breaks a latency assumption of that algorithm (can you translate it to function names?). Try the tsc.flat test (from qemu-kvm.git): x86_64-softmmu/qemu-system-x86_64 -chardev file,path=/tmp/log.txt,id=testlog \ -device testdev,chardev=testlog -kernel kvm/user/test/x86/tsc.flat -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] v4: allow userspace to adjust kvmclock offset
On Wed, Oct 14, 2009 at 03:53:27PM -0300, Marcelo Tosatti wrote: On Wed, Oct 14, 2009 at 10:47:46AM -0400, Glauber Costa wrote: When we migrate a kvm guest that uses pvclock between two hosts, we may suffer a large skew. This is because there can be significant differences between the monotonic clock of the hosts involved. When a new host with a much larger monotonic time starts running the guest, the view of time will be significantly impacted. Situation is much worse when we do the opposite, and migrate to a host with a smaller monotonic clock. This proposed ioctl will allow userspace to inform us what is the monotonic clock value in the source host, so we can keep the time skew short, and more importantly, never goes backwards. Userspace may also need to trigger the current data, since from the first migration onwards, it won't be reflected by a simple call to clock_gettime() anymore. [ v2: uses a struct with a padding ] [ v3: provide an ioctl to get clock data too ] [ v4: used fixed-width signed type for delta ] Signed-off-by: Glauber Costa glom...@redhat.com --- arch/x86/include/asm/kvm_host.h |1 + arch/x86/kvm/x86.c | 35 ++- include/linux/kvm.h |7 +++ 3 files changed, 42 insertions(+), 1 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 179a919..c9b0d9f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -410,6 +410,7 @@ struct kvm_arch{ unsigned long irq_sources_bitmap; u64 vm_init_tsc; + s64 kvmclock_offset; }; struct kvm_vm_stat { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9601bc6..09f31e2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -699,7 +699,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) /* With all the info we got, fill in the values */ vcpu-hv_clock.system_time = ts.tv_nsec + -(NSEC_PER_SEC * (u64)ts.tv_sec); +(NSEC_PER_SEC * (u64)ts.tv_sec) + v-kvm-arch.kvmclock_offset; + /* * The interface expects us to write an even number signaling that the * update is finished. Since the guest won't see the intermediate @@ -2441,6 +2442,38 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + case KVM_SET_CLOCK: { + struct timespec now; + struct kvm_clock_data user_ns; + u64 now_ns; + s64 delta; + + r = -EFAULT; Extra space :) want me to send a new because of that? #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) #define KVM_SET_BOOT_CPU_ID_IO(KVMIO, 0x78) #define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) +#define KVM_SET_CLOCK_IOW(KVMIO, 0x7a, struct kvm_clock_data) +#define KVM_GET_CLOCK_IOW(KVMIO, 0x7b, struct kvm_clock_data) _IOR Otherwise looks fine, please send the userspace changes together. Note that this changed quite a while in the process already. It only makes sense to implement userspace once this is commited, IMHO. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][RFC] Xen PV-on-HVM guest support
Thanks for the feedback; I'll post a new version shortly. On Tue, Oct 13, 2009 at 11:45 PM, Jan Kiszka jan.kis...@web.de wrote: Interesting stuff. How usable is your work at this point? I've no immediate demand, but the question if one could integrate Xen guests with KVM already popped up more than once @work. So far I've managed to boot CentOS 5.3 (both i386 and x86_64) and use the Xen PV block and net devices, with pretty good performance. I've also booted FreeBSD 8.0-RC1 (amd64 only) with a XENHVM kernel and used the Xen PV block and net devices, but the performance of the net device is significantly worse than with CentOS. Also some FreeBSD applications use a flag that's not yet implemented in the net device emulation, but I'm working on fixing that. Overall it seems pretty solid for Linux PV-on-HVM guests. I think more work is needed to support full PV guests, but I don't know how much. Have folks been asking about PV-on-HVM or full PV? --Ed -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Latest -git qemu-kvm doesn't boot an x86 kernel
On Wed, Oct 14, 2009 at 09:23:43PM +0530, Aneesh Kumar K.V wrote: Hi, I am trying qemu-system-x86_64 on a x86 host running 2.6.30-2 (debian testing) kernel and trying to boot latest linus git kernel (x86). The kernel hang after printing the below [ 4.394392] ACPI: PCI Interrupt Link [LNKC] enabled at IRQ 11 [4.397837] virtio-pci :00:03.0: PCI INT A - Link[LNKC] - GSI 11 (level, high) - IRQ 11 [4.436489] ACPI: PCI Interrupt Link [LNKD] enabled at IRQ 10 [4.439829] virtio-pci :00:04.0: PCI INT A - Link[LNKD] - GSI 10 (level, high) - IRQ 10 [4.462538] vda: [4.526913] input: ImExPS/2 Generic Explorer Mouse as /devices/platform/i8042/serio1/input/input3 [5.349554] async/1 used greatest stack depth: 5872 bytes left An earlier version of kvm booted fine the new kernel. So the vm disk image user space should all be fine. The older version of kvm that worked fine is QEMU PC emulator version 0.10.50 (kvm-devel) Any patches i need to try ? Please try qemu-kvm.git (should be fixed by commit 1536fc28ae1954e2990c3ee14b4a92624ecfcb68). -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] Re: Release plan for 0.12.0
On Wed, 2009-10-14 at 17:50 +0200, Michael S. Tsirkin wrote: On Wed, Oct 14, 2009 at 04:19:17PM +0100, Jamie Lokier wrote: Michael S. Tsirkin wrote: On Wed, Oct 14, 2009 at 09:17:15AM -0500, Anthony Liguori wrote: Michael S. Tsirkin wrote: Looks like Or has abandoned it. I have an updated version which works with new APIs, etc. Let me post it and we'll go from there. I'm generally inclined to oppose the functionality as I don't think it offers any advantages over the existing backends. I patch it in and use it all the time. It's much easier to setup on a random machine than a bridged config. Having two things that do the same thing is just going to lead to user confusion. They do not do the same thing. With raw socket you can use windows update without a bridge in the host, with tap you can't. On the other hand, with raw socket, guest Windows can't access files on the host's Samba share can it? So it's not that useful even for Windows guests. I guess this depends on whether you use the same host for samba :) If the problem is tap is too hard to setup, we should try to simplify tap configuration. The problem is bridge is too hard to setup. Simplifying that is a good idea, but outside the scope of the qemu project. I venture it's important enough for qemu that it's worth working on that. Something that looks like the raw socket but behaves like an automatically instantiated bridge attached to the bound interface would be a useful interface. I agree, that would be good to have. Can't we bind the raw socket to the tap interface instead of the physical interface and allow the bridge config to work. Thanks Sridhar I don't have much time, but I'll help anybody who wants to do that. -- Jamie -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Add a qemu interface for sharing memory between guests.
On Mon, Oct 12, 2009 at 2:55 AM, Avi Kivity a...@redhat.com wrote: On 10/12/2009 08:53 AM, Sivaram Kannan wrote: Hi all, I am a KVM newbie and I picked up the following task from the TODO of the KVM wiki. Add a qemu interface for sharing memory between guests. Using a pci device to expose the shared memory is probably a good starting point. (this should use virtio and probably depends on mmu-notifiers) Is the task still relevant? Can I some one give some pointer for me to start with. Cam did a lot of work on this, perhaps he can provide a pointer. -- error compiling committee.c: too many arguments to function Hi Sivaram, Here are the two patches for KVM describing what I have done. I am continuing to work on it and still mulling a move to virtio. These don't apply against the current tree, but I can provide those patches if you would like to see them. http://patchwork.kernel.org/patch/38355/ http://patchwork.kernel.org/patch/38347/ We're you interested in using the shared memory for something in particular or were you just looking for a to-do task to pick up? Let me know if you have any questions, Cheers, Cam -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Raw vs. tap (was: Re: [Qemu-devel] Re: Release plan for 0.12.0)
Sridhar Samudrala wrote: Can't we bind the raw socket to the tap interface instead of the physical interface and allow the bridge config to work. But why use the raw interface instead of tap directly. Let me summarize the discussion so far: Raw sockets Pros: o User specifies a network interface to bind to o External traffic Just Works, guest-to-guest traffic Just Works Cons: o Requires root (cannot chmod) o Guest-host traffic does not work o No support for GSO/checksum offload Some things that I'm not sure will work or not: o guest with a bridge (sending traffic with multiple mac addresses) o guest trying to enter promiscuous mode Tap Pros: o All types of networking works when configured o Supports non-root users via tunctl o Supports GSO/checksum offload Cons: o Requires configuring a bridge which can be difficult for some users Since I don't see any clear features in raw sockets that aren't present in tap, the argument really boils down to two things. First, we should take any feature in qemu and let the user decide whether or not they want to use it. I strongly feel this is a bad philosophy that will lead to increased user confusion and a poor user experience. Second, even though raw looses performance and requires root, since it requires no external configuration it is easier to use and therefore should be an option for users. I dislike this argument because it tricks a user into thinking that raw is a viable replacement for tap. It certainly isn't performance wise but most importantly, it isn't from a functional perspective. I would be much more inclined to consider taking raw and improving the performance long term if guest-host networking worked. This appears to be a fundamental limitation though and I think it's something that will forever plague users if we include this feature. So at this point, I think it's a mistake to include raw socket support. If the goal is to improve networking usability such that it just works as a root user, let's incorporate a default network script that creates a bridge or something like that. There are better ways to achieve that goal. Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: sync guest calls made async on host - SQLite performance
Christoph Hellwig wrote: On Thu, Oct 15, 2009 at 01:56:40AM +0900, Avi Kivity wrote: Does virtio say it has a write cache or not (and how does one say it?)? Historically it didn't and the only safe way to use virtio was in cache=writethrough mode. Which should be the default on Ubuntu's kvm that this report is concerned with so I'm a bit confused. Avi's patch is a performance optimization, not a correctness issue? Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] kvm: Prevent kvm_init from corrupting debugfs structures
I'm seeing an oops condition when kvm-intel and kvm-amd are modprobe'd during boot (say on an Intel system) and then rmmod'd: # modprobe kvm-intel kvm_init() kvm_init_debug() kvm_arch_init() -- stores debugfs dentries internally (success, etc) # modprobe kvm-amd kvm_init() kvm_init_debug() -- second initialization clobbers kvm's internal pointers to dentries kvm_arch_init() kvm_exit_debug() -- and frees them # rmmod kvm-intel kvm_exit() kvm_exit_debug() -- double free of debugfs files! *BOOM* If execution gets to the end of kvm_init(), then the calling module has been established as the kvm provider. Move the debugfs initialization to the end of the function, and remove the now-unnecessary call to kvm_exit_debug() from the error path. That way we avoid trampling on the debugfs entries and freeing them twice. Signed-off-by: Darrick J. Wong djw...@us.ibm.com --- virt/kvm/kvm_main.c |7 +++ 1 files changed, 3 insertions(+), 4 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b7c78a4..7495ce3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2717,8 +2717,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size, int r; int cpu; - kvm_init_debug(); - r = kvm_arch_init(opaque); if (r) goto out_fail; @@ -2785,6 +2783,8 @@ int kvm_init(void *opaque, unsigned int vcpu_size, kvm_preempt_ops.sched_in = kvm_sched_in; kvm_preempt_ops.sched_out = kvm_sched_out; + kvm_init_debug(); + return 0; out_free: @@ -2807,7 +2807,6 @@ out_free_0: out: kvm_arch_exit(); out_fail: - kvm_exit_debug(); return r; } EXPORT_SYMBOL_GPL(kvm_init); @@ -2815,6 +2814,7 @@ EXPORT_SYMBOL_GPL(kvm_init); void kvm_exit(void) { tracepoint_synchronize_unregister(); + kvm_exit_debug(); misc_deregister(kvm_dev); kmem_cache_destroy(kvm_vcpu_cache); sysdev_unregister(kvm_sysdev); @@ -2824,7 +2824,6 @@ void kvm_exit(void) on_each_cpu(hardware_disable, NULL, 1); kvm_arch_hardware_unsetup(); kvm_arch_exit(); - kvm_exit_debug(); free_cpumask_var(cpus_hardware_enabled); __free_page(bad_page); } -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: sync guest calls made async on host - SQLite performance
On 10/15/2009 07:54 AM, Anthony Liguori wrote: Christoph Hellwig wrote: On Thu, Oct 15, 2009 at 01:56:40AM +0900, Avi Kivity wrote: Does virtio say it has a write cache or not (and how does one say it?)? Historically it didn't and the only safe way to use virtio was in cache=writethrough mode. It didn't say? So it's up to the default, which is what? Which should be the default on Ubuntu's kvm that this report is concerned with so I'm a bit confused. Avi's patch is a performance optimization, not a correctness issue? If filesystems do drain by default, it should be a no-op on cache!=writeback. However if lseek(0); write(1); fdatasync(); are faster than disk speed, then something in our assumptions has to be wrong. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] allow userspace to adjust kvmclock offset
On 10/13/2009 09:46 PM, Glauber Costa wrote: On Tue, Oct 13, 2009 at 03:31:08PM +0300, Avi Kivity wrote: On 10/13/2009 03:28 PM, Glauber Costa wrote: Do we want an absolute or relative adjustment? What exactly do you mean? Absolute adjustment: clock = t Relative adjustment: clock += t The delta is absolute, but the adjustment in the clock is relative. So we pick the difference between what userspace is passing us and what we currently have, then relatively adds up so we can make sure we won't go back or suffer a too big skew. The motivation for relative adjustment is when you have a jitter resistant place to gather timing information (like the kernel, which can disable interrupts and preemption), then pass it on to kvm without losing information due to scheduling. For migration there is no such place since it involves two hosts, but it makes sense to support relative adjustments. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
buildbot failure in qemu-kvm on default_i386_out_of_tree
The Buildbot has detected a new failure of default_i386_out_of_tree on qemu-kvm. Full details are available at: http://buildbot.b1-systems.de/qemu-kvm/builders/default_i386_out_of_tree/builds/51 Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/ Buildslave for this Build: b1_qemu_kvm_2 Build Reason: The Nightly scheduler named 'nightly_default' triggered this build Build Source Stamp: [branch master] HEAD Blamelist: BUILD FAILED: failed compile sincerely, -The Buildbot -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
buildbot failure in qemu-kvm on default_i386_debian_5_0
The Buildbot has detected a new failure of default_i386_debian_5_0 on qemu-kvm. Full details are available at: http://buildbot.b1-systems.de/qemu-kvm/builders/default_i386_debian_5_0/builds/114 Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/ Buildslave for this Build: b1_qemu_kvm_2 Build Reason: The Nightly scheduler named 'nightly_default' triggered this build Build Source Stamp: [branch master] HEAD Blamelist: BUILD FAILED: failed compile sincerely, -The Buildbot -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
buildbot failure in qemu-kvm on default_x86_64_debian_5_0
The Buildbot has detected a new failure of default_x86_64_debian_5_0 on qemu-kvm. Full details are available at: http://buildbot.b1-systems.de/qemu-kvm/builders/default_x86_64_debian_5_0/builds/112 Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/ Buildslave for this Build: b1_qemu_kvm_1 Build Reason: The Nightly scheduler named 'nightly_default' triggered this build Build Source Stamp: [branch master] HEAD Blamelist: BUILD FAILED: failed compile sincerely, -The Buildbot -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
buildbot failure in qemu-kvm on default_x86_64_out_of_tree
The Buildbot has detected a new failure of default_x86_64_out_of_tree on qemu-kvm. Full details are available at: http://buildbot.b1-systems.de/qemu-kvm/builders/default_x86_64_out_of_tree/builds/53 Buildbot URL: http://buildbot.b1-systems.de/qemu-kvm/ Buildslave for this Build: b1_qemu_kvm_1 Build Reason: The Nightly scheduler named 'nightly_default' triggered this build Build Source Stamp: [branch master] HEAD Blamelist: BUILD FAILED: failed compile sincerely, -The Buildbot -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Can't make virtio block driver work on Windows 2003
Hi all I have a new installation of Windows 2003 SBS server 32bit which I installed using IDE disk. KVM version is QEMU PC emulator version 0.10.50 (qemu-kvm-devel-86) compiled by myself on kernel 2.6.28-11-server. I have already moved networking from e1000 to virtio (e1000 was performing very sluggishly btw, probably was losing many packets, virtio seems to work) Now I want to move the disk to virtio... This is complex so I thought that first I wanted to see virtio installed and working on another drive. So I tried adding another drive, a virtio one, (a new 100MB file at host side) to the virtual machine and rebooting. A first problem is that Windows does not detect the new device upon boot or Add Hardware scan. Here is the kvm commandline (it's complex because it comes from libvirt): /usr/local/kvm/bin/qemu-system-x86_64 -S -M pc -m 4096-smp 4 -name winserv2 -uuid -monitor pty -boot c -drive file=/virtual_machines/kvm/nfsimport/winserv2.raw,if=ide,index=0,boot=on -drive file=/virtual_machines/kvm/nfsimport/zerofile,if=virtio,index=1 -net nic,macaddr=xx:xx:xx:xx:xx:xx,vlan=0,model=virtio -net tap,fd=25,vlan=0 -serial none -parallel none -usb -vnc 127.0.0.1:4 Even if Windows couldn't detect the new device I tried to install the driver anyway. On Add Hardware I go through to -- SCSI and RAID controllers -- Have Disk .. and point it to the location of viostor files (windows 2003 x86) downloaded from: http://www.linux-kvm.org/page/WindowsGuestDrivers/Download_Drivers http://people.redhat.com/~yvugenfi/24.09.2009/viostor.zip Windows does install the driver, however at the end it says: The software for this device is now installed, but may not work correctly. This device cannot start. (Code 10) and the new device gets flagged with a yellow exclamation mark in Device Manager. I don't know if it's the same reason as before, that the device is not detected so the driver cannot work, or another reason. Any idea? Thanks for your help -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Latest -git qemu-kvm doesn't boot an x86 kernel
On Wed, Oct 14, 2009 at 04:54:35PM -0300, Marcelo Tosatti wrote: On Wed, Oct 14, 2009 at 09:23:43PM +0530, Aneesh Kumar K.V wrote: Hi, I am trying qemu-system-x86_64 on a x86 host running 2.6.30-2 (debian testing) kernel and trying to boot latest linus git kernel (x86). The kernel hang after printing the below [ 4.394392] ACPI: PCI Interrupt Link [LNKC] enabled at IRQ 11 [4.397837] virtio-pci :00:03.0: PCI INT A - Link[LNKC] - GSI 11 (level, high) - IRQ 11 [4.436489] ACPI: PCI Interrupt Link [LNKD] enabled at IRQ 10 [4.439829] virtio-pci :00:04.0: PCI INT A - Link[LNKD] - GSI 10 (level, high) - IRQ 10 [4.462538] vda: [4.526913] input: ImExPS/2 Generic Explorer Mouse as /devices/platform/i8042/serio1/input/input3 [5.349554] async/1 used greatest stack depth: 5872 bytes left An earlier version of kvm booted fine the new kernel. So the vm disk image user space should all be fine. The older version of kvm that worked fine is QEMU PC emulator version 0.10.50 (kvm-devel) Any patches i need to try ? Please try qemu-kvm.git (should be fixed by commit 1536fc28ae1954e2990c3ee14b4a92624ecfcb68). That worked. Thanks -aneesh -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Can't make virtio block driver work on Windows 2003
On 10/14/2009 07:52 PM, Asdo wrote: Hi all I have a new installation of Windows 2003 SBS server 32bit which I installed using IDE disk. KVM version is QEMU PC emulator version 0.10.50 (qemu-kvm-devel-86) compiled by myself on kernel 2.6.28-11-server. I have already moved networking from e1000 to virtio (e1000 was performing very sluggishly btw, probably was losing many packets, virtio seems to work) Now I want to move the disk to virtio... This is complex so I thought that first I wanted to see virtio installed and working on another drive. So I tried adding another drive, a virtio one, (a new 100MB file at host side) to the virtual machine and rebooting. A first problem is that Windows does not detect the new device upon boot or Add Hardware scan. Check PCI devices with info pci. You must have SCSI controller: PCI device 1af4:1001 device reported. Here is the kvm commandline (it's complex because it comes from libvirt): /usr/local/kvm/bin/qemu-system-x86_64 -S -M pc -m 4096-smp 4 -name winserv2 -uuid -monitor pty -boot c -drive file=/virtual_machines/kvm/nfsimport/winserv2.raw,if=ide,index=0,boot=on -drive file=/virtual_machines/kvm/nfsimport/zerofile,if=virtio,index=1 -net nic,macaddr=xx:xx:xx:xx:xx:xx,vlan=0,model=virtio -net tap,fd=25,vlan=0 -serial none -parallel none -usb -vnc 127.0.0.1:4 Even if Windows couldn't detect the new device I tried to install the driver anyway. On Add Hardware I go through to -- SCSI and RAID controllers -- Have Disk .. and point it to the location of viostor files (windows 2003 x86) downloaded from: http://www.linux-kvm.org/page/WindowsGuestDrivers/Download_Drivers http://people.redhat.com/~yvugenfi/24.09.2009/viostor.zip Windows does install the driver, however at the end it says: The software for this device is now installed, but may not work correctly. This device cannot start. (Code 10) and the new device gets flagged with a yellow exclamation mark in Device Manager. I don't know if it's the same reason as before, that the device is not detected so the driver cannot work, or another reason. Yes, it must be the same problem. Code 10 means that device driver was not able to find or initialize hardware. Regards, Vadim Any idea? Thanks for your help -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Xen PV-on-HVM guest support (v2)
Support for Xen PV-on-HVM guests can be implemented almost entirely in userspace, except for handling one annoying MSR that maps a Xen hypercall blob into guest address space. A generic mechanism to delegate MSR writes to userspace seems overkill and risks encouraging similar MSR abuse in the future. Thus this patch adds special support for the Xen HVM MSR. I implemented a new ioctl, KVM_XEN_HVM_CONFIG, that lets userspace tell KVM which MSR the guest will write to, as well as the starting address and size of the hypercall blobs (one each for 32-bit and 64-bit) that userspace has loaded from files. When the guest writes to the MSR, KVM copies one page of the blob from userspace to the guest. I've tested this patch with a hacked-up version of Gerd's userspace code, booting a number of guests (CentOS 5.3 i386 and x86_64, and FreeBSD 8.0-RC1 amd64) and exercising PV network and block devices. v2: fix ioctl struct padding; renumber CAP and ioctl constants; check kvm_write_guest() return value; change printks to KERN_DEBUG (I think they're worth keeping for debugging userspace) Signed-off-by: Ed Swierk eswi...@aristanetworks.com --- Index: kvm-kmod/include/asm-x86/kvm.h === --- kvm-kmod.orig/include/asm-x86/kvm.h +++ kvm-kmod/include/asm-x86/kvm.h @@ -59,6 +59,7 @@ #define __KVM_HAVE_MSIX #define __KVM_HAVE_MCE #define __KVM_HAVE_PIT_STATE2 +#define __KVM_HAVE_XEN_HVM /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 Index: kvm-kmod/include/linux/kvm.h === --- kvm-kmod.orig/include/linux/kvm.h +++ kvm-kmod/include/linux/kvm.h @@ -476,6 +476,9 @@ struct kvm_ioeventfd { #endif #define KVM_CAP_IOEVENTFD 36 #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37 +#ifdef __KVM_HAVE_XEN_HVM +#define KVM_CAP_XEN_HVM 38 +#endif #ifdef KVM_CAP_IRQ_ROUTING @@ -528,6 +531,15 @@ struct kvm_x86_mce { }; #endif +#ifdef KVM_CAP_XEN_HVM +struct kvm_xen_hvm_config { + __u32 msr; + __u8 pad[2]; + __u8 blob_size[2]; + __u64 blob_addr[2]; +}; +#endif + #define KVM_IRQFD_FLAG_DEASSIGN (1 0) struct kvm_irqfd { @@ -586,6 +598,7 @@ struct kvm_irqfd { #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) #define KVM_SET_BOOT_CPU_ID_IO(KVMIO, 0x78) #define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) +#define KVM_XEN_HVM_CONFIG_IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config) /* * ioctls for vcpu fds Index: kvm-kmod/include/linux/kvm_host.h === --- kvm-kmod.orig/include/linux/kvm_host.h +++ kvm-kmod/include/linux/kvm_host.h @@ -236,6 +236,10 @@ struct kvm { unsigned long mmu_notifier_seq; long mmu_notifier_count; #endif + +#ifdef KVM_CAP_XEN_HVM + struct kvm_xen_hvm_config xen_hvm_config; +#endif }; /* The guest did something we don't support. */ Index: kvm-kmod/x86/x86.c === --- kvm-kmod.orig/x86/x86.c +++ kvm-kmod/x86/x86.c @@ -875,6 +875,35 @@ static int set_msr_mce(struct kvm_vcpu * return 0; } +#ifdef KVM_CAP_XEN_HVM +static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data) +{ + int blob = !!(vcpu-arch.shadow_efer EFER_LME); + u32 pnum = data ~PAGE_MASK; + u64 paddr = data PAGE_MASK; + u8 *page; + int r = 1; + + if (pnum = vcpu-kvm-xen_hvm_config.blob_size[blob]) + goto out; + page = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!page) + goto out; + if (copy_from_user(page, (u8 *)vcpu-kvm-xen_hvm_config.blob_addr[blob] + + pnum * PAGE_SIZE, PAGE_SIZE)) + goto out_free; + if (kvm_write_guest(vcpu-kvm, paddr, page, PAGE_SIZE)) + goto out_free; + printk(KERN_DEBUG kvm: copied xen hvm blob %d page %d to 0x%llx\n, + blob, pnum, paddr); + r = 0; +out_free: + kfree(page); +out: + return r; +} +#endif + int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) { switch (msr) { @@ -990,6 +1019,10 @@ int kvm_set_msr_common(struct kvm_vcpu * 0x%x data 0x%llx\n, msr, data); break; default: +#ifdef KVM_CAP_XEN_HVM + if (msr (msr == vcpu-kvm-xen_hvm_config.msr)) + return xen_hvm_config(vcpu, data); +#endif if (!ignore_msrs) { pr_unimpl(vcpu, unhandled wrmsr: 0x%x data %llx\n, msr, data); @@ -2453,6 +2486,17 @@ long kvm_arch_vm_ioctl(struct file *filp r = 0; break; } +#ifdef KVM_CAP_XEN_HVM + case KVM_XEN_HVM_CONFIG: { + r = -EFAULT; + if (copy_from_user(kvm-xen_hvm_config, argp, +
Re: linux-next: tree build failure
On Fri, 2009-10-09 at 12:14 -0700, Hollis Blanchard wrote: Rusty's version of BUILD_BUG_ON() does indeed fix the build break, and also exposes the bug in kvmppc_account_exit_stat(). So to recap: original: built but didn't work Jan's: doesn't build Rusty's: builds and works Where do you want to go from here? Jan, what are your thoughts? Your BUILD_BUG_ON patch has broken the build, and we still need to fix it. -- Hollis Blanchard IBM Linux Technology Center -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html