Re: [Qemu-devel] [PATCH 1/4] usb-desc: fix user trigerrable segfaults (!config)
On Sun, Feb 26, 2012 at 05:09:21PM +0100, Alon Levy wrote: Check for dev-config being NULL in two places: USB_REQ_GET_CONFIGURATION and USB_REQ_GET_STATUS. Ping. The behavior of USB_REQ_GET_STATUS is unspecified in the Default state, that corresponds to dev-config being NULL (it defaults to NULL and is reset whenever a SET_CONFIGURATION with value 0, or attachment). I implemented it to correspond with the state before ed5a83ddd8c1d8ec7b1015315530cf29949e7c48, the commit moving SET_STATUS to usb-desc; if dev-config is not set we return whatever is in the first configuration. The behavior of USB_REQ_GET_CONFIGURATION is also undefined before any SET_CONFIGURATION, but here we just return 0 (same as specified for the Address state). A win7 guest failed to initialize the device before this patch, segfaulting when GET_STATUS was called with dev-config == NULL. With this patch the passthrough device still doesn't work but the failure is unrelated. Signed-off-by: Alon Levy al...@redhat.com --- hw/usb-desc.c | 20 +--- 1 files changed, 17 insertions(+), 3 deletions(-) diff --git a/hw/usb-desc.c b/hw/usb-desc.c index 3c3ed6a..ccf85ad 100644 --- a/hw/usb-desc.c +++ b/hw/usb-desc.c @@ -536,7 +536,11 @@ int usb_desc_handle_control(USBDevice *dev, USBPacket *p, break; case DeviceRequest | USB_REQ_GET_CONFIGURATION: -data[0] = dev-config-bConfigurationValue; +/* + * 9.4.2: 0 should be returned if the device is unconfigured, otherwise + * the non zero value of bConfigurationValue. + */ +data[0] = dev-config ? dev-config-bConfigurationValue : 0; ret = 1; break; case DeviceOutRequest | USB_REQ_SET_CONFIGURATION: @@ -544,9 +548,18 @@ int usb_desc_handle_control(USBDevice *dev, USBPacket *p, trace_usb_set_config(dev-addr, value, ret); break; -case DeviceRequest | USB_REQ_GET_STATUS: +case DeviceRequest | USB_REQ_GET_STATUS: { +const USBDescConfig *config = dev-config ? +dev-config : dev-device-confs[0]; + data[0] = 0; -if (dev-config-bmAttributes 0x40) { +/* + * Default state: Device behavior when this request is received while + *the device is in the Default state is not specified. + * We return the same value that a configured device would return if + * it used the first configuration. + */ +if (config-bmAttributes 0x40) { data[0] |= 1 USB_DEVICE_SELF_POWERED; } if (dev-remote_wakeup) { @@ -555,6 +568,7 @@ int usb_desc_handle_control(USBDevice *dev, USBPacket *p, data[1] = 0x00; ret = 2; break; +} case DeviceOutRequest | USB_REQ_CLEAR_FEATURE: if (value == USB_DEVICE_REMOTE_WAKEUP) { dev-remote_wakeup = 0; -- 1.7.9.1
Re: [Qemu-devel] [PATCH 1/4] qapi-schema: fix typos and explain 'spice' auth
On Fri, Feb 24, 2012 at 11:22:02PM +0200, Alon Levy wrote: Signed-off-by: Alon Levy al...@redhat.com Ping. --- qapi-schema.json | 18 ++ 1 files changed, 10 insertions(+), 8 deletions(-) diff --git a/qapi-schema.json b/qapi-schema.json index d0b6792..72b17f1 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -616,12 +616,13 @@ # @connection-id: SPICE connection id number. All channels with the same id # belong to the same SPICE session. # -# @connection-type: SPICE channel type number. 1 is the main control channel, -# filter for this one if you want track spice sessions only +# @connection-type: SPICE channel type number. 1 is the main control +# channel, filter for this one if you want to track spice +# sessions only # -# @channel-id: SPICE channel ID number. Usually 0, might be different needed -# when multiple channels of the same type exist, such as multiple -# display channels in a multihead setup +# @channel-id: SPICE channel ID number. Usually 0, might be different when +# multiple channels of the same type exist, such as multiple +# display channels in a multihead setup # # @tls: true if the channel is encrypted, false otherwise. # @@ -649,8 +650,9 @@ # @tls-port: #optional The SPICE server's TLS port number. # # @auth: #optional the current authentication type used by the server -#'none' if no authentication is being used -#'spice' (TODO: describe) +#'none' if no authentication is being used +#'spice' uses SASL or direct TLS authentication, depending on command +#line options # # @channels: a list of @SpiceChannel for each active spice channel # @@ -1216,7 +1218,7 @@ { 'command': 'migrate_set_speed', 'data': {'value': 'int'} } ## -# @DevicePropertyInfo: +# @ObjectPropertyInfo: # # @name: the name of the property # -- 1.7.9.1
Re: [Qemu-devel] [PATCH] kvm: notify host when guest paniced
At 02/28/2012 01:26 PM, Wen Congyang Wrote: At 02/27/2012 11:08 PM, Jan Kiszka Wrote: On 2012-02-27 04:01, Wen Congyang wrote: We can know the guest is paniced when the guest runs on xen. But we do not have such feature on kvm. This patch implemnts this feature, and the implementation is the same as xen: register panic notifier, and call hypercall when the guest is paniced. Signed-off-by: Wen Congyang we...@cn.fujitsu.com --- arch/x86/kernel/kvm.c| 12 arch/x86/kvm/svm.c |8 ++-- arch/x86/kvm/vmx.c |8 ++-- arch/x86/kvm/x86.c | 13 +++-- include/linux/kvm.h |1 + include/linux/kvm_para.h |1 + 6 files changed, 37 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f0c6fd6..b928d1d 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = { .notifier_call = kvm_pv_reboot_notify, }; +static int +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused) +{ + kvm_hypercall0(KVM_HC_GUEST_PANIC); + return NOTIFY_DONE; +} + +static struct notifier_block kvm_pv_panic_nb = { + .notifier_call = kvm_pv_panic_notify, +}; + You should split up host and guest-side changes. OK static u64 kvm_steal_clock(int cpu) { u64 steal; @@ -417,6 +428,7 @@ void __init kvm_guest_init(void) paravirt_ops_setup(); register_reboot_notifier(kvm_pv_reboot_nb); + atomic_notifier_chain_register(panic_notifier_list, kvm_pv_panic_nb); for (i = 0; i KVM_TASK_SLEEP_HASHSIZE; i++) spin_lock_init(async_pf_sleepers[i].lock); if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0b7690e..38b4705 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm) static int vmmcall_interception(struct vcpu_svm *svm) { + int ret; + svm-next_rip = kvm_rip_read(svm-vcpu) + 3; skip_emulated_instruction(svm-vcpu); - kvm_emulate_hypercall(svm-vcpu); - return 1; + ret = kvm_emulate_hypercall(svm-vcpu); + + /* Ignore the error? */ + return ret == 0 ? 0 : 1; Why can't kvm_emulate_hypercall return the right value? Because before this patch, kvm always ignores the error. After rereading the code, kvm_emulate_hypercall() will return -KVM_EPERM Sorry for my miss. kvm_emulate_hypercall() does not return -KVM_EPERM. when vcpu's CPL is not 0. I think we should deal with this exception in kvm_emulate_hypercall(), and return 1. But I donot know how to do it. kvm_queue_exception(vcpu, UD_VECTOR)? } static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 66147ca..1b57ebb 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4582,9 +4582,13 @@ static int handle_halt(struct kvm_vcpu *vcpu) static int handle_vmcall(struct kvm_vcpu *vcpu) { + int ret; + skip_emulated_instruction(vcpu); - kvm_emulate_hypercall(vcpu); - return 1; + ret = kvm_emulate_hypercall(vcpu); + + /* Ignore the error? */ + return ret == 0 ? 0 : 1; } static int handle_invd(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c9d99e5..3fc2853 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4923,7 +4923,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) u64 param, ingpa, outgpa, ret; uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; bool fast, longmode; - int cs_db, cs_l; + int cs_db, cs_l, r = 1; /* * hypercall generates UD from non zero cpl and real mode @@ -4964,6 +4964,10 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: kvm_vcpu_on_spin(vcpu); break; + case KVM_HC_GUEST_PANIC: + vcpu-run-exit_reason = KVM_EXIT_GUEST_PANIC; + r = 0; + break; That's the wrong place. This is a KVM hypercall, not a HyperV one. OK, I will remove it. Thanks Wen Congyang default: res = HV_STATUS_INVALID_HYPERCALL_CODE; break; @@ -4977,7 +4981,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) kvm_register_write(vcpu, VCPU_REGS_RAX, ret 0x); } - return 1; + return r; } int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) @@ -5013,6 +5017,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) case KVM_HC_VAPIC_POLL_IRQ: ret = 0; break; + case KVM_HC_GUEST_PANIC: + ret = 0; + vcpu-run-exit_reason = KVM_EXIT_GUEST_PANIC; + r = 0; + break; default: ret = -KVM_ENOSYS; break; diff --git a/include/linux/kvm.h
Re: [Qemu-devel] [PATCH v2] TCG: Convert global variables to be TLS.
On 28 February 2012 03:13, Evgeny Voevodin e.voevo...@samsung.com wrote: I wanted to get some feedback and points to show up a direction to move in this field. And qomification of translation caches is an interesting suggestion I think. If you're serious about multithreading TCG then I think the first steps are: * fix existing race conditions * think very hard * come up with an overall design for what you're proposing You won't get there by incremental steps unless you know where you're going... -- PMM
Re: [Qemu-devel] [PATCH] kvm: notify host when guest paniced
At 02/27/2012 11:08 PM, Jan Kiszka Wrote: On 2012-02-27 04:01, Wen Congyang wrote: We can know the guest is paniced when the guest runs on xen. But we do not have such feature on kvm. This patch implemnts this feature, and the implementation is the same as xen: register panic notifier, and call hypercall when the guest is paniced. Signed-off-by: Wen Congyang we...@cn.fujitsu.com --- arch/x86/kernel/kvm.c| 12 arch/x86/kvm/svm.c |8 ++-- arch/x86/kvm/vmx.c |8 ++-- arch/x86/kvm/x86.c | 13 +++-- include/linux/kvm.h |1 + include/linux/kvm_para.h |1 + 6 files changed, 37 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f0c6fd6..b928d1d 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = { .notifier_call = kvm_pv_reboot_notify, }; +static int +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused) +{ +kvm_hypercall0(KVM_HC_GUEST_PANIC); +return NOTIFY_DONE; +} + +static struct notifier_block kvm_pv_panic_nb = { +.notifier_call = kvm_pv_panic_notify, +}; + You should split up host and guest-side changes. static u64 kvm_steal_clock(int cpu) { u64 steal; @@ -417,6 +428,7 @@ void __init kvm_guest_init(void) paravirt_ops_setup(); register_reboot_notifier(kvm_pv_reboot_nb); +atomic_notifier_chain_register(panic_notifier_list, kvm_pv_panic_nb); for (i = 0; i KVM_TASK_SLEEP_HASHSIZE; i++) spin_lock_init(async_pf_sleepers[i].lock); if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0b7690e..38b4705 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm) static int vmmcall_interception(struct vcpu_svm *svm) { +int ret; + svm-next_rip = kvm_rip_read(svm-vcpu) + 3; skip_emulated_instruction(svm-vcpu); -kvm_emulate_hypercall(svm-vcpu); -return 1; +ret = kvm_emulate_hypercall(svm-vcpu); + +/* Ignore the error? */ +return ret == 0 ? 0 : 1; Why can't kvm_emulate_hypercall return the right value? kvm_emulate_hypercall() will call kvm_hv_hypercall(), and kvm_hv_hypercall() will return 0 when vcpu's CPL 0. If vcpu's CPL 0, does kvm need to exit and tell it to qemu? Thanks Wen Congyang } static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 66147ca..1b57ebb 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4582,9 +4582,13 @@ static int handle_halt(struct kvm_vcpu *vcpu) static int handle_vmcall(struct kvm_vcpu *vcpu) { +int ret; + skip_emulated_instruction(vcpu); -kvm_emulate_hypercall(vcpu); -return 1; +ret = kvm_emulate_hypercall(vcpu); + +/* Ignore the error? */ +return ret == 0 ? 0 : 1; } static int handle_invd(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c9d99e5..3fc2853 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4923,7 +4923,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) u64 param, ingpa, outgpa, ret; uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; bool fast, longmode; -int cs_db, cs_l; +int cs_db, cs_l, r = 1; /* * hypercall generates UD from non zero cpl and real mode @@ -4964,6 +4964,10 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: kvm_vcpu_on_spin(vcpu); break; +case KVM_HC_GUEST_PANIC: +vcpu-run-exit_reason = KVM_EXIT_GUEST_PANIC; +r = 0; +break; That's the wrong place. This is a KVM hypercall, not a HyperV one. default: res = HV_STATUS_INVALID_HYPERCALL_CODE; break; @@ -4977,7 +4981,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) kvm_register_write(vcpu, VCPU_REGS_RAX, ret 0x); } -return 1; +return r; } int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) @@ -5013,6 +5017,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) case KVM_HC_VAPIC_POLL_IRQ: ret = 0; break; +case KVM_HC_GUEST_PANIC: +ret = 0; +vcpu-run-exit_reason = KVM_EXIT_GUEST_PANIC; +r = 0; +break; default: ret = -KVM_ENOSYS; break; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index acbe429..8f0e31b 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -163,6 +163,7 @@ struct kvm_pit_config { #define KVM_EXIT_OSI 18 #define KVM_EXIT_PAPR_HCALL 19 #define
Re: [Qemu-devel] [PATCH] qxl: properly handle upright and non-shared surfaces
-dprint(qxl, 1, %s: stride %d, [%d, %d, %d, %d]\n, __func__, +dprint(qxl, 2, %s: stride %d, [%d, %d, %d, %d]\n, __func__, You know 2 is used right now for high frequency stuff, like interface_get_command? I think this should be lower. Anyway, not a big deal. /me used '1' for important but infrequent stuff, basically all init ops, mode switching etc. This can happen quite alot in case you are running with SDL. Maybe we need not just 1+2 but 1+2+3 levels, with 3 for the really frequent stuff which will flood the logfiles. Or even better just turn them all into tracepoints ... cheers, Gerd
Re: [Qemu-devel] [PATCH] qxl: properly handle upright and non-shared surfaces
On Tue, Feb 28, 2012 at 09:29:38AM +0100, Gerd Hoffmann wrote: -dprint(qxl, 1, %s: stride %d, [%d, %d, %d, %d]\n, __func__, +dprint(qxl, 2, %s: stride %d, [%d, %d, %d, %d]\n, __func__, You know 2 is used right now for high frequency stuff, like interface_get_command? I think this should be lower. Anyway, not a big deal. /me used '1' for important but infrequent stuff, basically all init ops, mode switching etc. This can happen quite alot in case you are running with SDL. Maybe we need not just 1+2 but 1+2+3 levels, with 3 for the really frequent stuff which will flood the logfiles. Or even better just turn them all into tracepoints ... Yeah, I started working on this but didn't finish. I'll try to do it. cheers, Gerd
Re: [Qemu-devel] qemu-kvm-1.0 crashes with threaded vnc server?
On Mon, Feb 13, 2012 at 10:24 AM, Peter Lieven p...@dlh.net wrote: Am 11.02.2012 um 09:55 schrieb Corentin Chary: On Thu, Feb 9, 2012 at 7:08 PM, Peter Lieven p...@dlh.net wrote: Hi, is anyone aware if there are still problems when enabling the threaded vnc server? I saw some VMs crashing when using a qemu-kvm build with --enable-vnc-thread. qemu-kvm-1.0[22646]: segfault at 0 ip 7fec1ca7ea0b sp 7fec19d056d0 error 6 in libz.so.1.2.3.3[7fec1ca75000+16000] qemu-kvm-1.0[26056]: segfault at 7f06d8d6e010 ip 7f06e0a30d71 sp 7f06df035748 error 6 in libc-2.11.1.so[7f06e09aa000+17a000] I had no time to debug further. It seems to happen shortly after migrating, but thats uncertain. At least the segfault in libz seems to give a hint to VNC since I cannot image of any other part of qemu-kvm using libz except for VNC server. Thanks, Peter Hi Peter, I found two patches on my git tree that I sent long ago but somehow get lost on the mailing list. I rebased the tree but did not have the time (yet) to test them. http://git.iksaif.net/?p=qemu.git;a=shortlog;h=refs/heads/wip Feel free to try them. If QEMU segfault again, please send a full gdb backtrace / valgrind trace / way to reproduce :). Thanks, Hi Corentin, thanks for rebasing those patches. I remember that I have seen them the last time I noticed (about 1 year ago) that the threaded VNC is crashing. I'm on vacation this week, but I will test them next week and let you know if I can force a crash with them applied. If not we should consider to include them asap. Hi Peter, any news on that ? -- Corentin Chary http://xf.iksaif.net
Re: [Qemu-devel] [PATCH] qom: if @instance_size==0, assign size of object to parent object size
Il 28/02/2012 08:18, Igor Mitsyanko ha scritto: QOM documentation states that for objects of type with @instance_size == 0 size will be assigned to match parent object's size. But currently this feauture is not implemented and qemu asserts during creation of object with zero instance_size. This patch adjusts actual behaviour in accordance with documentation. You can do it just once, in type_get_parent instead. Paolo
[Qemu-devel] [PATCH 1/2] qdev: accept empty string properties
These were stored as NULL due to wrong cut-and-paste from set_pointer. Reported-by: Gerhard Wiesinger li...@wiesinger.com Signed-off-by: Paolo Bonzini pbonz...@redhat.com --- hw/qdev-properties.c |4 1 files changed, 0 insertions(+), 4 deletions(-) diff --git a/hw/qdev-properties.c b/hw/qdev-properties.c index 0423af1..bff9152 100644 --- a/hw/qdev-properties.c +++ b/hw/qdev-properties.c @@ -421,10 +421,6 @@ static void set_string(Object *obj, Visitor *v, void *opaque, error_propagate(errp, local_err); return; } -if (!*str) { -g_free(str); -str = NULL; -} if (*ptr) { g_free(*ptr); } -- 1.7.7.6
[Qemu-devel] [PATCH 2/2] qom: fix device hot-unplug
Property removal modifies the list, so it is not safe to continue iteration. We know anyway that each object can have only one parent (see object_property_add_child), so exit after finding the requested object. Reported-by: Michael S. Tsirkin m...@redhat.com Signed-off-by: Paolo Bonzini pbonz...@redhat.com --- qom/object.c |7 ++- 1 files changed, 2 insertions(+), 5 deletions(-) diff --git a/qom/object.c b/qom/object.c index aa037d2..39cbcb9 100644 --- a/qom/object.c +++ b/qom/object.c @@ -304,12 +304,9 @@ static void object_property_del_child(Object *obj, Object *child, Error **errp) ObjectProperty *prop; QTAILQ_FOREACH(prop, obj-properties, node) { -if (!strstart(prop-type, child, NULL)) { -continue; -} - -if (prop-opaque == child) { +if (strstart(prop-type, child, NULL) prop-opaque == child) { object_property_del(obj, prop-name, errp); +break; } } } -- 1.7.7.6
[Qemu-devel] [PATCH 0/2] Two small QOM fixes
Two fixes for bugs that were reported on the list. Paolo Bonzini (2): qdev: accept empty string properties qom: fix device hot-unplug hw/qdev-properties.c |4 qom/object.c |7 ++- 2 files changed, 2 insertions(+), 9 deletions(-) -- 1.7.7.6
[Qemu-devel] [PATCH 0/3] qemu-iotests: add image streaming tests
This series adds the image streaming test suite to qemu-iotests. It covers the 'block_stream', 'block_job_cancel', 'block_job_set_speed', and 'query-block-jobs' QMP interfaces. Since these tests involve QMP it is no longer convenient to write them in bash. Instead these tests are written in Python and make use of the existing QMP/qmp.py module. In order to achieve this, it adds an iotests Python module that handles interaction with the qemu-iotests framework. If you want to review using a top-down approach, I suggest reading this series backwards, starting from Patch 3 which introduces 030, the image streaming test suite. Or if you like the bottom-up approach: * Patch 1 exports TEST_DIR from qemu-iotests so test executables can learn the temporary directory path. * Patch 2 adds the iotests.py module, which brings together qemu-iotests, unittest, and QEMU in a way that is easy to consume in Python. * Patch 3 adds 030, the image streaming test suite. Tests pass successfully with both qcow2 and qed on qemu.git/master. Stefan Hajnoczi (3): qemu-iotests: export TEST_DIR for non-bash tests qemu-iotests: add iotests Python module test: add image streaming tests tests/qemu-iotests/030 | 151 ++ tests/qemu-iotests/030.out |5 + tests/qemu-iotests/common.config |2 + tests/qemu-iotests/group |1 + tests/qemu-iotests/iotests.py| 165 ++ 5 files changed, 324 insertions(+), 0 deletions(-) create mode 100755 tests/qemu-iotests/030 create mode 100644 tests/qemu-iotests/030.out create mode 100644 tests/qemu-iotests/iotests.py -- 1.7.9
[Qemu-devel] [PATCH 1/3] qemu-iotests: export TEST_DIR for non-bash tests
Since qemu-iotests may need to create large image files it is possible to specify the test directory. The TEST_DIR variable needs to be exported so non-bash tests can make use of it. Signed-off-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com --- tests/qemu-iotests/common.config |2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config index d07f435..a220684 100644 --- a/tests/qemu-iotests/common.config +++ b/tests/qemu-iotests/common.config @@ -121,6 +121,8 @@ if [ ! -d $TEST_DIR ]; then exit 1 fi +export TEST_DIR + _readlink() { if [ $# -ne 1 ]; then -- 1.7.9
[Qemu-devel] [PATCH 2/3] qemu-iotests: add iotests Python module
Block layer tests that involve QMP commands rather than qemu-img or qemu-io are not well-suited for shell scripting. This patch adds a Python module which allows tests to be written in Python instead. The basic API is: VM - class for launching and interacting with a VM QMPTestCase - abstract base class for tests that use QMP qemu_img() - wrapper function for invoking qemu-img qemu_io() - wrapper function for invoking qemu-io imgfmt - the image format under test (e.g. qcow2, qed) test_dir- scratch directory path for temporary files main() - entry point for running tests Signed-off-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com --- tests/qemu-iotests/iotests.py | 165 + 1 files changed, 165 insertions(+), 0 deletions(-) create mode 100644 tests/qemu-iotests/iotests.py diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py new file mode 100644 index 000..3fb5fbd --- /dev/null +++ b/tests/qemu-iotests/iotests.py @@ -0,0 +1,165 @@ +# Common utilities and Python wrappers for qemu-iotests +# +# Copyright (C) 2012 IBM Corp. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. +# + +import os +import re +import subprocess +import unittest +import sys; sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'QMP')) +import qmp + +__all__ = ['imgfmt', 'imgproto', 'test_dir' 'qemu_img', 'qemu_io', + 'VM', 'QMPTestCase', 'notrun', 'main'] + +# This will not work if arguments or path contain spaces but is necessary if we +# want to support the override options that ./check supports. +qemu_img_args = os.environ.get('QEMU_IMG', 'qemu-img').split(' ') +qemu_io_args = os.environ.get('QEMU_IO', 'qemu-io').split(' ') +qemu_args = os.environ.get('QEMU', 'qemu').split(' ') + +imgfmt = os.environ.get('IMGFMT', 'raw') +imgproto = os.environ.get('IMGPROTO', 'file') +test_dir = os.environ.get('TEST_DIR', '/var/tmp') + +def qemu_img(*args): +'''Run qemu-img and return the exit code''' +devnull = open('/dev/null', 'r+') +return subprocess.call(qemu_img_args + list(args), stdin=devnull, stdout=devnull) + +def qemu_io(*args): +'''Run qemu-io and return the stdout data''' +args = qemu_io_args + list(args) +return subprocess.Popen(args, stdout=subprocess.PIPE).communicate()[0] + +class VM(object): +'''A QEMU VM''' + +def __init__(self): +self._monitor_path = os.path.join(test_dir, 'qemu-mon.%d' % os.getpid()) +self._qemu_log_path = os.path.join(test_dir, 'qemu-log.%d' % os.getpid()) +self._args = qemu_args + ['-chardev', + 'socket,id=mon,path=' + self._monitor_path, + '-mon', 'chardev=mon,mode=control', '-nographic'] +self._num_drives = 0 + +def add_drive(self, path, opts=''): +'''Add a virtio-blk drive to the VM''' +options = ['if=virtio', + 'format=%s' % imgfmt, + 'cache=none', + 'file=%s' % path, + 'id=drive%d' % self._num_drives] +if opts: +options.append(opts) + +self._args.append('-drive') +self._args.append(','.join(options)) +self._num_drives += 1 +return self + +def launch(self): +'''Launch the VM and establish a QMP connection''' +devnull = open('/dev/null', 'rb') +qemulog = open(self._qemu_log_path, 'wb') +try: +self._qmp = qmp.QEMUMonitorProtocol(self._monitor_path, server=True) +self._popen = subprocess.Popen(self._args, stdin=devnull, stdout=qemulog, + stderr=subprocess.STDOUT) +self._qmp.accept() +except: +os.remove(self._monitor_path) +raise + +def shutdown(self): +'''Terminate the VM and clean up''' +self._qmp.cmd('quit') +self._popen.wait() +os.remove(self._monitor_path) +os.remove(self._qemu_log_path) + +def qmp(self, cmd, **args): +'''Invoke a QMP command and return the result dict''' +return self._qmp.cmd(cmd, args=args) + +def get_qmp_events(self, wait=False): +'''Poll for queued QMP events and return a list of dicts''' +events = self._qmp.get_events(wait=wait) +self._qmp.clear_events() +return events +
[Qemu-devel] [PATCH 3/3] test: add image streaming tests
This patch adds a test suite for the image streaming feature. It exercises the 'block_stream', 'block_job_cancel', 'block_job_set_speed', and 'query-block-jobs' QMP commands. Signed-off-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com --- tests/qemu-iotests/030 | 151 tests/qemu-iotests/030.out |5 ++ tests/qemu-iotests/group |1 + 3 files changed, 157 insertions(+), 0 deletions(-) create mode 100755 tests/qemu-iotests/030 create mode 100644 tests/qemu-iotests/030.out diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 new file mode 100755 index 000..1faf984 --- /dev/null +++ b/tests/qemu-iotests/030 @@ -0,0 +1,151 @@ +#!/usr/bin/env python +# +# Tests for image streaming. +# +# Copyright (C) 2012 IBM Corp. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. +# + +import os +import iotests +from iotests import qemu_img, qemu_io + +backing_img = os.path.join(iotests.test_dir, 'backing.img') +test_img = os.path.join(iotests.test_dir, 'test.img') + +class ImageStreamingTestCase(iotests.QMPTestCase): +'''Abstract base class for image streaming test cases''' + +def assert_no_active_streams(self): +result = self.vm.qmp('query-block-jobs') +self.assert_qmp(result, 'return', []) + +class TestSingleDrive(ImageStreamingTestCase): +image_len = 1 * 1024 * 1024 # MB + +def setUp(self): +qemu_img('create', backing_img, str(TestSingleDrive.image_len)) +qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img) +self.vm = iotests.VM().add_drive(test_img) +self.vm.launch() + +def tearDown(self): +self.vm.shutdown() +os.remove(test_img) +os.remove(backing_img) + +def test_stream(self): +self.assert_no_active_streams() + +result = self.vm.qmp('block_stream', device='drive0') +self.assert_qmp(result, 'return', {}) + +completed = False +while not completed: +for event in self.vm.get_qmp_events(wait=True): +if event['event'] == 'BLOCK_JOB_COMPLETED': +self.assert_qmp(event, 'data/type', 'stream') +self.assert_qmp(event, 'data/device', 'drive0') +self.assert_qmp(event, 'data/offset', self.image_len) +self.assert_qmp(event, 'data/len', self.image_len) +completed = True + +self.assert_no_active_streams() + +self.assertFalse('sectors not allocated' in qemu_io('-c', 'map', test_img), + 'image file not fully populated after streaming') + +def test_device_not_found(self): +result = self.vm.qmp('block_stream', device='nonexistent') +self.assert_qmp(result, 'error/class', 'DeviceNotFound') + +class TestStreamStop(ImageStreamingTestCase): +image_len = 8 * 1024 * 1024 * 1024 # GB + +def setUp(self): +qemu_img('create', backing_img, str(TestStreamStop.image_len)) +qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img) +self.vm = iotests.VM().add_drive(test_img) +self.vm.launch() + +def tearDown(self): +self.vm.shutdown() +os.remove(test_img) +os.remove(backing_img) + +def test_stream_stop(self): +import time + +self.assert_no_active_streams() + +result = self.vm.qmp('block_stream', device='drive0') +self.assert_qmp(result, 'return', {}) + +time.sleep(1) +events = self.vm.get_qmp_events(wait=False) +self.assertEqual(events, [], 'unexpected QMP event: %s' % events) + +self.vm.qmp('block_job_cancel', device='drive0') +self.assert_qmp(result, 'return', {}) + +cancelled = False +while not cancelled: +for event in self.vm.get_qmp_events(wait=True): +if event['event'] == 'BLOCK_JOB_CANCELLED': +self.assert_qmp(event, 'data/type', 'stream') +self.assert_qmp(event, 'data/device', 'drive0') +cancelled = True + +self.assert_no_active_streams() + +# This is a short performance test which is not run by default. +# Invoke IMGFMT=qed ./030 TestSetSpeed.perf_test_set_speed +class TestSetSpeed(ImageStreamingTestCase): +image_len = 80 * 1024 * 1024 # MB +
Re: [Qemu-devel] [PATCH] qcow2: Fix offset in qcow2_read_extensions
On Mon, Feb 27, 2012 at 4:27 PM, Kevin Wolf kw...@redhat.com wrote: The spec says that the length of extensions is padded to 8 bytes, not the offset. Currently this is the same because the header size is a multiple of 8, so this is only about compatibility with future changes to the header size. While touching it, move the calculation to a common place instead of duplicating it for each header extension type. Signed-off-by: Kevin Wolf kw...@redhat.com --- block/qcow2.c | 5 ++--- 1 files changed, 2 insertions(+), 3 deletions(-) Reviewed-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
Re: [Qemu-devel] [PATCH] qcow2: Reject unrealistically large header extensions
On Mon, Feb 27, 2012 at 4:27 PM, Kevin Wolf kw...@redhat.com wrote: + if (ext.len 65536) { + error_report(Header extension larger than 64k - this looks wrong); + return -ENOTSUP; + } This is an implementation limit and not in the spec, but I think it's reasonable. Reviewed-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
Re: [Qemu-devel] [PATCH] kvm: notify host when guest paniced
On 2012-02-28 09:23, Wen Congyang wrote: At 02/27/2012 11:08 PM, Jan Kiszka Wrote: On 2012-02-27 04:01, Wen Congyang wrote: We can know the guest is paniced when the guest runs on xen. But we do not have such feature on kvm. This patch implemnts this feature, and the implementation is the same as xen: register panic notifier, and call hypercall when the guest is paniced. Signed-off-by: Wen Congyang we...@cn.fujitsu.com --- arch/x86/kernel/kvm.c| 12 arch/x86/kvm/svm.c |8 ++-- arch/x86/kvm/vmx.c |8 ++-- arch/x86/kvm/x86.c | 13 +++-- include/linux/kvm.h |1 + include/linux/kvm_para.h |1 + 6 files changed, 37 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f0c6fd6..b928d1d 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = { .notifier_call = kvm_pv_reboot_notify, }; +static int +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused) +{ + kvm_hypercall0(KVM_HC_GUEST_PANIC); + return NOTIFY_DONE; +} + +static struct notifier_block kvm_pv_panic_nb = { + .notifier_call = kvm_pv_panic_notify, +}; + You should split up host and guest-side changes. static u64 kvm_steal_clock(int cpu) { u64 steal; @@ -417,6 +428,7 @@ void __init kvm_guest_init(void) paravirt_ops_setup(); register_reboot_notifier(kvm_pv_reboot_nb); + atomic_notifier_chain_register(panic_notifier_list, kvm_pv_panic_nb); for (i = 0; i KVM_TASK_SLEEP_HASHSIZE; i++) spin_lock_init(async_pf_sleepers[i].lock); if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0b7690e..38b4705 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm) static int vmmcall_interception(struct vcpu_svm *svm) { + int ret; + svm-next_rip = kvm_rip_read(svm-vcpu) + 3; skip_emulated_instruction(svm-vcpu); - kvm_emulate_hypercall(svm-vcpu); - return 1; + ret = kvm_emulate_hypercall(svm-vcpu); + + /* Ignore the error? */ + return ret == 0 ? 0 : 1; Why can't kvm_emulate_hypercall return the right value? kvm_emulate_hypercall() will call kvm_hv_hypercall(), and kvm_hv_hypercall() will return 0 when vcpu's CPL 0. If vcpu's CPL 0, does kvm need to exit and tell it to qemu? No, there is currently no exit to userspace due to hypercalls, neither of HV nor KVM kind. The point is that the return code of kvm_emulate_hypercall is unused so far, so you can easily redefine it to encode continue vs. exit to userspace. Once someone has different needs, this could still be refactored again. Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux
Re: [Qemu-devel] converting the block layer from coroutines to threads
Il 24/02/2012 22:01, Anthony Liguori ha scritto: Once you can issue I/O from two threads at the same-time (such as streaming in the iothread and guest I/O in the virtqueue thread), everything already needs to be thread-safe. It is a pretty short step from there to thread pools for everything. If you start with a thread safe API for submitting block requests, that could be implemented as bapi_aiocb *bapi_submit_readv(bapi_driver *d, struct iovec *iov, int iovcnt, off_t offset) { bapi_request *req = make_bapi_request(BAPI_READ, iov, iovcnt, offset); return bapi_queue_add_req(req); } Which would schedule the I/O thread to actually implement the operation. You could then start incrementally refactoring specific drivers to be re-entrant (like linux-aio). My proposal has exactly these two ingredients: a representations of block layer requests, and a fast path for AIO. But there are really two complementary parts to it. One is generalizing thread-pool support to non-raw formats, the other is doing I/O from multiple threads at the same time. The first is quite easy overall. The second is hard, because it's not just about reentrancy. You need various pieces of infrastructure that do not yet exist; for example you need freeze/unfreeze, because drain/drain_all is not enough if other threads can submit I/O concurrently. But anything that already needs to use a thread pool to do its I/O probably wouldn't benefit from threading virtio. Linux AIO _is_ a thread-pool in the end. It is surprising how close the latencies are between io_submit and cond_signal, for example. Paolo
Re: [Qemu-devel] [PATCH] qom: if @instance_size==0, assign size of object to parent object size
On 02/28/2012 12:39 PM, Paolo Bonzini wrote: Il 28/02/2012 08:18, Igor Mitsyanko ha scritto: QOM documentation states that for objects of type with @instance_size == 0 size will be assigned to match parent object's size. But currently this feauture is not implemented and qemu asserts during creation of object with zero instance_size. This patch adjusts actual behaviour in accordance with documentation. You can do it just once, in type_get_parent instead. Paolo Can't understand how type_get_parent() is relevant to this, we can call object_initialize() or object_new_with_type() and these two functions must set instance_size before calling object_initialize_with_type(). Up to this point there are no calls to type_get_parent(). Mitsyanko Igor ASWG, Moscow RD center, Samsung Electronics email: i.mitsya...@samsung.com
Re: [Qemu-devel] [PATCH] qcow2: Reject unrealistically large header extensions
Am 28.02.2012 10:33, schrieb Stefan Hajnoczi: On Mon, Feb 27, 2012 at 4:27 PM, Kevin Wolf kw...@redhat.com wrote: +if (ext.len 65536) { +error_report(Header extension larger than 64k - this looks wrong); +return -ENOTSUP; +} This is an implementation limit and not in the spec, but I think it's reasonable. Reviewed-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com Hm, actually, now that I look at this patch again, I think there's a much better error condition that even matches the spec: if (offset + ext.len end_offset) I'll send a changed version of the patch. Kevin
[Qemu-devel] Wireshark SCSI dissectors for new transports
Wireshark today supports SCSI dissectors for iSCSI. In the QEMU system emulator we have an emulated SCSI target which handles devices for SCSI Parallel Interface (SPI), USB Mass Storage Device, and now supports the new virtio-scsi transport (for efficient virtual machine SCSI I/O). Cong and I would like to add pcap support to the emulated SCSI target in QEMU, making it easy to use Wireshark for SCSI debugging and analysis. I'm looking for advice on getting started because we are not familiar with Wireshark/dissector internals. I believe the SCSI dissector does not support raw CDB dissection - it requires a SCSI transport dissector (currently iSCSI). A few options come to mind: 1. Change the SCSI dissector to support top-level dissecting of raw SCSI CDBs without transport information (initiator, target, and other metadata). 2. Add virtio-scsi and perhaps SPI SCSI transport dissectors. 3. A simpler approach I'm missing? :) Suggestions appreciated. Stefan
Re: [Qemu-devel] [PATCH] qcow2: Reject unrealistically large header extensions
On Tue, Feb 28, 2012 at 9:47 AM, Kevin Wolf kw...@redhat.com wrote: Am 28.02.2012 10:33, schrieb Stefan Hajnoczi: On Mon, Feb 27, 2012 at 4:27 PM, Kevin Wolf kw...@redhat.com wrote: + if (ext.len 65536) { + error_report(Header extension larger than 64k - this looks wrong); + return -ENOTSUP; + } This is an implementation limit and not in the spec, but I think it's reasonable. Reviewed-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com Hm, actually, now that I look at this patch again, I think there's a much better error condition that even matches the spec: if (offset + ext.len end_offset) Careful, integer overflow. Stefan
Re: [Qemu-devel] [PATCH] kvm: notify host when guest paniced
At 02/28/2012 05:34 PM, Jan Kiszka Wrote: On 2012-02-28 09:23, Wen Congyang wrote: At 02/27/2012 11:08 PM, Jan Kiszka Wrote: On 2012-02-27 04:01, Wen Congyang wrote: We can know the guest is paniced when the guest runs on xen. But we do not have such feature on kvm. This patch implemnts this feature, and the implementation is the same as xen: register panic notifier, and call hypercall when the guest is paniced. Signed-off-by: Wen Congyang we...@cn.fujitsu.com --- arch/x86/kernel/kvm.c| 12 arch/x86/kvm/svm.c |8 ++-- arch/x86/kvm/vmx.c |8 ++-- arch/x86/kvm/x86.c | 13 +++-- include/linux/kvm.h |1 + include/linux/kvm_para.h |1 + 6 files changed, 37 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f0c6fd6..b928d1d 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = { .notifier_call = kvm_pv_reboot_notify, }; +static int +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused) +{ + kvm_hypercall0(KVM_HC_GUEST_PANIC); + return NOTIFY_DONE; +} + +static struct notifier_block kvm_pv_panic_nb = { + .notifier_call = kvm_pv_panic_notify, +}; + You should split up host and guest-side changes. static u64 kvm_steal_clock(int cpu) { u64 steal; @@ -417,6 +428,7 @@ void __init kvm_guest_init(void) paravirt_ops_setup(); register_reboot_notifier(kvm_pv_reboot_nb); + atomic_notifier_chain_register(panic_notifier_list, kvm_pv_panic_nb); for (i = 0; i KVM_TASK_SLEEP_HASHSIZE; i++) spin_lock_init(async_pf_sleepers[i].lock); if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0b7690e..38b4705 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm) static int vmmcall_interception(struct vcpu_svm *svm) { + int ret; + svm-next_rip = kvm_rip_read(svm-vcpu) + 3; skip_emulated_instruction(svm-vcpu); - kvm_emulate_hypercall(svm-vcpu); - return 1; + ret = kvm_emulate_hypercall(svm-vcpu); + + /* Ignore the error? */ + return ret == 0 ? 0 : 1; Why can't kvm_emulate_hypercall return the right value? kvm_emulate_hypercall() will call kvm_hv_hypercall(), and kvm_hv_hypercall() will return 0 when vcpu's CPL 0. If vcpu's CPL 0, does kvm need to exit and tell it to qemu? No, there is currently no exit to userspace due to hypercalls, neither of HV nor KVM kind. The point is that the return code of kvm_emulate_hypercall is unused so far, so you can easily redefine it to encode continue vs. exit to userspace. Once someone has different needs, this could still be refactored again. So, it is OK to change the return value of kvm_hv_hypercall() if vcpu's CPL 0? Thanks Wen Congyang Jan
Re: [Qemu-devel] [PATCH] qom: if @instance_size==0, assign size of object to parent object size
Il 28/02/2012 10:43, Igor Mitsyanko ha scritto: On 02/28/2012 12:39 PM, Paolo Bonzini wrote: Il 28/02/2012 08:18, Igor Mitsyanko ha scritto: QOM documentation states that for objects of type with @instance_size == 0 size will be assigned to match parent object's size. But currently this feauture is not implemented and qemu asserts during creation of object with zero instance_size. This patch adjusts actual behaviour in accordance with documentation. You can do it just once, in type_get_parent instead. Sorry, rewind. You can do it in type_class_init instead (you are obviously doing it just once since you assign to type-instance_size). type_class_init mostly deals with class initialization, but it's really the place where a type is hooked up with its parent. Perhaps type_late_init would be a better name. I think the problem is misplaced type_class_init calls. void object_initialize(void *data, const char *typename) { TypeImpl *type = type_get_by_name(typename); +type-instance_size = object_get_instance_size(type); object_initialize_with_type(data, type); } object_initialize_with_type needs to call type_class_init before testing type-instance_size, not after. @@ -357,6 +371,7 @@ Object *object_new_with_type(Type type) g_assert(type != NULL); +type-instance_size = object_get_instance_size(type); And this should also be a call to type_class_init. obj = g_malloc(type-instance_size); object_initialize_with_type(obj, type); object_ref(obj); Paolo
Re: [Qemu-devel] [PATCH] qcow2: Reject unrealistically large header extensions
Am 28.02.2012 11:00, schrieb Stefan Hajnoczi: On Tue, Feb 28, 2012 at 9:47 AM, Kevin Wolf kw...@redhat.com wrote: Am 28.02.2012 10:33, schrieb Stefan Hajnoczi: On Mon, Feb 27, 2012 at 4:27 PM, Kevin Wolf kw...@redhat.com wrote: +if (ext.len 65536) { +error_report(Header extension larger than 64k - this looks wrong); +return -ENOTSUP; +} This is an implementation limit and not in the spec, but I think it's reasonable. Reviewed-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com Hm, actually, now that I look at this patch again, I think there's a much better error condition that even matches the spec: if (offset + ext.len end_offset) Careful, integer overflow. offset/end_offset are uint64_t offsets into the first cluster, ext.len is uint32_t. Looks safe. Kevin
Re: [Qemu-devel] Wireshark SCSI dissectors for new transports
Hi Stefan. Wireshark supports a lot more than just SCSI over iSCSI It dissects SCSI over USB, FCOE, raw-FC, FCIP, iFCP (I never got access to traces for mFCP :-( ) and also over NDMP. I never got access to HyperSCSI traces either, so that is missing too. Since I never got HyperSCSI or mFCP (very short-lived attempt from HBA vendors) those two are the only ones today I think where we miss decode. virt-scsi from QEMU sounds interesting! SCSI has a very well defined API in wireshark so assind a new transport should be trivial. I have done so several times. First you need a DLT value from the tcpdump folks to wrap your packets in. Once you have that it should be semi-trivial to hook the SCSI-dissector into your transport/DLT Depending on what the framing looks like, you need at least a wrapper around the SCSI payload that can contain an I_T identifier, then a LUN field, and then scoped per LUN you need a task-tag or similar. Wireshark would need I_T to be able to track initiators and targets separately and form a conversation between an arbitrary pair. A LUN identifier to track different luns on the same I_T separately. Finally it also needs a task-tag so that on a specific ILT nexus it will be able to match a SCSI CDB with DATA-IN/OUT blobs and a SCSI response/sense to make it map well you might need to wrap thing inside a struct scsi_wrapper { initiator identifier target identifier lun task-tag opcode (cdb, datain, dataout, response/sense) scsi * } if your transport also supports multiple datain/out blobs for a single task, in order to reassemble the data we would also need a offset/length for each datain/out blob. regards ronnie sahlberg On Tue, Feb 28, 2012 at 8:59 PM, Stefan Hajnoczi stefa...@gmail.com wrote: Wireshark today supports SCSI dissectors for iSCSI. In the QEMU system emulator we have an emulated SCSI target which handles devices for SCSI Parallel Interface (SPI), USB Mass Storage Device, and now supports the new virtio-scsi transport (for efficient virtual machine SCSI I/O). Cong and I would like to add pcap support to the emulated SCSI target in QEMU, making it easy to use Wireshark for SCSI debugging and analysis. I'm looking for advice on getting started because we are not familiar with Wireshark/dissector internals. I believe the SCSI dissector does not support raw CDB dissection - it requires a SCSI transport dissector (currently iSCSI). A few options come to mind: 1. Change the SCSI dissector to support top-level dissecting of raw SCSI CDBs without transport information (initiator, target, and other metadata). 2. Add virtio-scsi and perhaps SPI SCSI transport dissectors. 3. A simpler approach I'm missing? :) Suggestions appreciated. Stefan
Re: [Qemu-devel] [PATCH] kvm: notify host when guest paniced
On 2012-02-28 10:42, Wen Congyang wrote: At 02/28/2012 05:34 PM, Jan Kiszka Wrote: On 2012-02-28 09:23, Wen Congyang wrote: At 02/27/2012 11:08 PM, Jan Kiszka Wrote: On 2012-02-27 04:01, Wen Congyang wrote: We can know the guest is paniced when the guest runs on xen. But we do not have such feature on kvm. This patch implemnts this feature, and the implementation is the same as xen: register panic notifier, and call hypercall when the guest is paniced. Signed-off-by: Wen Congyang we...@cn.fujitsu.com --- arch/x86/kernel/kvm.c| 12 arch/x86/kvm/svm.c |8 ++-- arch/x86/kvm/vmx.c |8 ++-- arch/x86/kvm/x86.c | 13 +++-- include/linux/kvm.h |1 + include/linux/kvm_para.h |1 + 6 files changed, 37 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f0c6fd6..b928d1d 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = { .notifier_call = kvm_pv_reboot_notify, }; +static int +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused) +{ + kvm_hypercall0(KVM_HC_GUEST_PANIC); + return NOTIFY_DONE; +} + +static struct notifier_block kvm_pv_panic_nb = { + .notifier_call = kvm_pv_panic_notify, +}; + You should split up host and guest-side changes. static u64 kvm_steal_clock(int cpu) { u64 steal; @@ -417,6 +428,7 @@ void __init kvm_guest_init(void) paravirt_ops_setup(); register_reboot_notifier(kvm_pv_reboot_nb); + atomic_notifier_chain_register(panic_notifier_list, kvm_pv_panic_nb); for (i = 0; i KVM_TASK_SLEEP_HASHSIZE; i++) spin_lock_init(async_pf_sleepers[i].lock); if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0b7690e..38b4705 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm) static int vmmcall_interception(struct vcpu_svm *svm) { + int ret; + svm-next_rip = kvm_rip_read(svm-vcpu) + 3; skip_emulated_instruction(svm-vcpu); - kvm_emulate_hypercall(svm-vcpu); - return 1; + ret = kvm_emulate_hypercall(svm-vcpu); + + /* Ignore the error? */ + return ret == 0 ? 0 : 1; Why can't kvm_emulate_hypercall return the right value? kvm_emulate_hypercall() will call kvm_hv_hypercall(), and kvm_hv_hypercall() will return 0 when vcpu's CPL 0. If vcpu's CPL 0, does kvm need to exit and tell it to qemu? No, there is currently no exit to userspace due to hypercalls, neither of HV nor KVM kind. The point is that the return code of kvm_emulate_hypercall is unused so far, so you can easily redefine it to encode continue vs. exit to userspace. Once someone has different needs, this could still be refactored again. So, it is OK to change the return value of kvm_hv_hypercall() if vcpu's CPL 0? Yes, change it to encode what vendor modules need to return to their callers. Jan -- Siemens AG, Corporate Technology, CT T DE IT 1 Corporate Competence Center Embedded Linux
[Qemu-devel] [PATCH 14/21] usb-ehci: Handle ISO packets failing with an error other then NAK
From: Hans de Goede hdego...@redhat.com Before this patch the ehci code was not checking for any other errors other then USB_RET_NAK. This causes 2 problems: 1) Other errors are not reported to the guest. 2) When transactions with the ITD_XACT_IOC bit set completing with another error would not result in USBSTS_INT getting set. I hit this problem when unplugging devices while iso data was streaming from the device to the guest. When this happens it takes a while for the guest to process the unplugging and remove ISO transactions from the ehci schedule, in the mean time these transactions would complete with a result of USB_RET_NODEV, which was not handled. This lead to the Linux guest's usb subsystem hanging, that is it would no longer see new usb devices getting plugged in and running for example lsusb would lead to a stuck (D state) lsusb process. This patch fixes this. Signed-off-by: Hans de Goede hdego...@redhat.com Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb-ehci.c | 22 +++--- 1 files changed, 19 insertions(+), 3 deletions(-) diff --git a/hw/usb-ehci.c b/hw/usb-ehci.c index 048eb76..b95773f 100644 --- a/hw/usb-ehci.c +++ b/hw/usb-ehci.c @@ -1510,11 +1510,27 @@ static int ehci_process_itd(EHCIState *ehci, /* IN */ set_field(itd-transact[i], ret, ITD_XACT_LENGTH); } - -if (itd-transact[i] ITD_XACT_IOC) { -ehci_record_interrupt(ehci, USBSTS_INT); +} else { +switch (ret) { +default: +fprintf(stderr, Unexpected iso usb result: %d\n, ret); +/* Fall through */ +case USB_RET_NODEV: +/* 3.3.2: XACTERR is only allowed on IN transactions */ +if (dir) { +itd-transact[i] |= ITD_XACT_XACTERR; +ehci_record_interrupt(ehci, USBSTS_ERRINT); +} +break; +case USB_RET_BABBLE: +itd-transact[i] |= ITD_XACT_BABBLE; +ehci_record_interrupt(ehci, USBSTS_ERRINT); +break; } } +if (itd-transact[i] ITD_XACT_IOC) { +ehci_record_interrupt(ehci, USBSTS_INT); +} itd-transact[i] = ~ITD_XACT_ACTIVE; } } -- 1.7.1
[Qemu-devel] [PATCH 21/21] usb: Resolve warnings about unassigned bus on usb device creation
From: Jan Kiszka jan.kis...@siemens.com When creating an USB device the old way, there is no way to specify the target bus. Thus the warning issued by usb_create makes no sense and rather confuses our users. Resolve this by passing a bus reference to the usbdevice_init handler and letting those handlers forward it to usb_create. Signed-off-by: Jan Kiszka jan.kis...@siemens.com Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb-bt.c |4 ++-- hw/usb-bus.c| 18 -- hw/usb-msd.c|4 ++-- hw/usb-net.c|4 ++-- hw/usb-serial.c |8 hw/usb.h|7 --- usb-bsd.c |4 ++-- usb-linux.c |4 ++-- vl.c|7 --- 9 files changed, 26 insertions(+), 34 deletions(-) diff --git a/hw/usb-bt.c b/hw/usb-bt.c index 649bdcf..23c39ec 100644 --- a/hw/usb-bt.c +++ b/hw/usb-bt.c @@ -498,14 +498,14 @@ static int usb_bt_initfn(USBDevice *dev) return 0; } -USBDevice *usb_bt_init(HCIInfo *hci) +USBDevice *usb_bt_init(USBBus *bus, HCIInfo *hci) { USBDevice *dev; struct USBBtState *s; if (!hci) return NULL; -dev = usb_create_simple(NULL /* FIXME */, usb-bt-dongle); +dev = usb_create_simple(bus, usb-bt-dongle); if (!dev) { return NULL; } diff --git a/hw/usb-bus.c b/hw/usb-bus.c index ae79a45..70b7ebc 100644 --- a/hw/usb-bus.c +++ b/hw/usb-bus.c @@ -203,13 +203,14 @@ typedef struct LegacyUSBFactory { const char *name; const char *usbdevice_name; -USBDevice *(*usbdevice_init)(const char *params); +USBDevice *(*usbdevice_init)(USBBus *bus, const char *params); } LegacyUSBFactory; static GSList *legacy_usb_factory; void usb_legacy_register(const char *typename, const char *usbdevice_name, - USBDevice *(*usbdevice_init)(const char *params)) + USBDevice *(*usbdevice_init)(USBBus *bus, + const char *params)) { if (usbdevice_name) { LegacyUSBFactory *f = g_malloc0(sizeof(*f)); @@ -224,17 +225,6 @@ USBDevice *usb_create(USBBus *bus, const char *name) { DeviceState *dev; -#if 1 -/* temporary stopgap until all usb is properly qdev-ified */ -if (!bus) { -bus = usb_bus_find(-1); -if (!bus) -return NULL; -error_report(%s: no bus specified, using \%s\ for \%s\, -__FUNCTION__, bus-qbus.name, name); -} -#endif - dev = qdev_create(bus-qbus, name); return USB_DEVICE(dev); } @@ -565,7 +555,7 @@ USBDevice *usbdevice_create(const char *cmdline) } return usb_create_simple(bus, f-name); } -return f-usbdevice_init(params); +return f-usbdevice_init(bus, params); } static void usb_device_class_init(ObjectClass *klass, void *data) diff --git a/hw/usb-msd.c b/hw/usb-msd.c index 5fbd2d0..c6f08a0 100644 --- a/hw/usb-msd.c +++ b/hw/usb-msd.c @@ -568,7 +568,7 @@ static int usb_msd_initfn(USBDevice *dev) return 0; } -static USBDevice *usb_msd_init(const char *filename) +static USBDevice *usb_msd_init(USBBus *bus, const char *filename) { static int nr=0; char id[8]; @@ -611,7 +611,7 @@ static USBDevice *usb_msd_init(const char *filename) } /* create guest device */ -dev = usb_create(NULL /* FIXME */, usb-storage); +dev = usb_create(bus, usb-storage); if (!dev) { return NULL; } diff --git a/hw/usb-net.c b/hw/usb-net.c index 49d5d4d..22b8201 100644 --- a/hw/usb-net.c +++ b/hw/usb-net.c @@ -1353,7 +1353,7 @@ static int usb_net_initfn(USBDevice *dev) return 0; } -static USBDevice *usb_net_init(const char *cmdline) +static USBDevice *usb_net_init(USBBus *bus, const char *cmdline) { USBDevice *dev; QemuOpts *opts; @@ -1371,7 +1371,7 @@ static USBDevice *usb_net_init(const char *cmdline) return NULL; } -dev = usb_create(NULL /* FIXME */, usb-net); +dev = usb_create(bus, usb-net); if (!dev) { return NULL; } diff --git a/hw/usb-serial.c b/hw/usb-serial.c index 52676e8..0aae379 100644 --- a/hw/usb-serial.c +++ b/hw/usb-serial.c @@ -492,7 +492,7 @@ static int usb_serial_initfn(USBDevice *dev) return 0; } -static USBDevice *usb_serial_init(const char *filename) +static USBDevice *usb_serial_init(USBBus *bus, const char *filename) { USBDevice *dev; CharDriverState *cdrv; @@ -535,7 +535,7 @@ static USBDevice *usb_serial_init(const char *filename) if (!cdrv) return NULL; -dev = usb_create(NULL /* FIXME */, usb-serial); +dev = usb_create(bus, usb-serial); if (!dev) { return NULL; } @@ -549,7 +549,7 @@ static USBDevice *usb_serial_init(const char *filename) return dev; } -static USBDevice *usb_braille_init(const char *unused) +static USBDevice *usb_braille_init(USBBus *bus, const char *unused) { USBDevice *dev; CharDriverState *cdrv; @@ -558,7 +558,7 @@ static
[Qemu-devel] [PULL] usb patch queue
Hi, Next batch of usb updates. This one brings packet queuing for uhci and xhci, so we have per-endpoint queues at usb-bus level now. Need to bring those to the usb drivers as next step, so they (especially usb-host) can pipeline requests. Also a bunch of bugfixes in ehci, smartcard emulation and usb redirect. cheers, Gerd The following changes since commit b4bd0b168e9f4898b98308f4a8a089f647a86d16: audio: Add some fall through comments (2012-02-25 18:16:11 +0400) are available in the git repository at: git://git.kraxel.org/qemu usb.39 Alon Levy (4): usb-desc: fix user trigerrable segfaults (!config) libcacard: link with glib for g_strndup usb-ccid: advertise SELF_POWERED libcacard: fix reported ATR length Gerd Hoffmann (10): usb-hid: fix tablet activation usb-ehci: fix reset usb-uhci: cleanup UHCIAsync allocation initialization. usb-uhci: add UHCIQueue usb-uhci: process uhci_handle_td return code via switch. usb-uhci: implement packet queuing usb-xhci: enable packet queuing usb: add tracepoint for usb packet state changes. usb-ehci: sanity-check iso xfers ehci: drop old stuff Hans de Goede (6): usb-ehci: Handle ISO packets failing with an error other then NAK usb-redir: Fix printing of device version usb-redir: Always clear device state on filter reject usb-redir: Let the usb-host know about our device filtering usb-redir: Limit return values returned by iso packets usb-redir: Return USB_RET_NAK when we've no data for an interrupt endpoint Jan Kiszka (1): usb: Resolve warnings about unassigned bus on usb device creation configure |6 +- hw/usb-bt.c|4 +- hw/usb-bus.c | 18 +--- hw/usb-ccid.c |2 +- hw/usb-desc.c | 20 +++- hw/usb-ehci.c | 71 ++--- hw/usb-hid.c |3 + hw/usb-msd.c |4 +- hw/usb-net.c |4 +- hw/usb-serial.c|8 +- hw/usb-uhci.c | 314 +++- hw/usb-xhci.c |6 - hw/usb.c | 27 + hw/usb.h |7 +- libcacard/vcardt.h |4 +- trace-events |3 + usb-bsd.c |4 +- usb-linux.c|4 +- usb-redir.c| 46 ++-- vl.c |7 +- 20 files changed, 317 insertions(+), 245 deletions(-)
[Qemu-devel] [PATCH] Consolidate reads and writes in nbd block device into one common routine
This removes quite some duplicated code. Signed-off-By: Michael Tokarev m...@tls.msk.ru --- block/nbd.c | 94 +++ 1 files changed, 30 insertions(+), 64 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index 161b299..82f2964 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -320,91 +320,57 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags) return result; } -static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov, - int offset) -{ -BDRVNBDState *s = bs-opaque; -struct nbd_request request; -struct nbd_reply reply; - -request.type = NBD_CMD_READ; -request.from = sector_num * 512; -request.len = nb_sectors * 512; - -nbd_coroutine_start(s, request); -if (nbd_co_send_request(s, request, NULL, 0) == -1) { -reply.error = errno; -} else { -nbd_co_receive_reply(s, request, reply, qiov-iov, offset); -} -nbd_coroutine_end(s, request); -return -reply.error; - -} +/* qemu-nbd has a limit of slightly less than 1M per request. Try to + * remain aligned to 4K. */ +#define NBD_MAX_SECTORS 2040 -static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov, - int offset) +static int nbd_co_rwv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov, int iswrite) { BDRVNBDState *s = bs-opaque; struct nbd_request request; struct nbd_reply reply; +int offset = 0; -request.type = NBD_CMD_WRITE; -if (!bdrv_enable_write_cache(bs) (s-nbdflags NBD_FLAG_SEND_FUA)) { +request.type = iswrite ? NBD_CMD_WRITE : NBD_CMD_READ; +if (iswrite !bdrv_enable_write_cache(bs) (s-nbdflags NBD_FLAG_SEND_FUA)) { request.type |= NBD_CMD_FLAG_FUA; } -request.from = sector_num * 512; -request.len = nb_sectors * 512; +/* we split the request into pieces of at most NBD_MAX_SECTORS size + * and process them in a loop... */ +for (;;) { +request.from = sector_num * 512; +request.len = MIN(nb_sectors, NBD_MAX_SECTORS) * 512; + +nbd_coroutine_start(s, request); +if (nbd_co_send_request(s, request, iswrite ? qiov-iov : NULL, 0) == -1) { +reply.error = errno; +} else { +nbd_co_receive_reply(s, request, reply, iswrite ? NULL : qiov-iov, offset); +} +nbd_coroutine_end(s, request); -nbd_coroutine_start(s, request); -if (nbd_co_send_request(s, request, qiov-iov, offset) == -1) { -reply.error = errno; -} else { -nbd_co_receive_reply(s, request, reply, NULL, 0); +offset += NBD_MAX_SECTORS * 512; +sector_num += NBD_MAX_SECTORS; +nb_sectors -= NBD_MAX_SECTORS; +if (reply.error != 0 || nb_sectors = 0) { +/* ..till we hit an error or there's nothing more to process */ +return -reply.error; +} } -nbd_coroutine_end(s, request); -return -reply.error; } -/* qemu-nbd has a limit of slightly less than 1M per request. Try to - * remain aligned to 4K. */ -#define NBD_MAX_SECTORS 2040 - static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { -int offset = 0; -int ret; -while (nb_sectors NBD_MAX_SECTORS) { -ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset); -if (ret 0) { -return ret; -} -offset += NBD_MAX_SECTORS * 512; -sector_num += NBD_MAX_SECTORS; -nb_sectors -= NBD_MAX_SECTORS; -} -return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset); +return nbd_co_rwv(bs, sector_num, nb_sectors, qiov, 0); } static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { -int offset = 0; -int ret; -while (nb_sectors NBD_MAX_SECTORS) { -ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset); -if (ret 0) { -return ret; -} -offset += NBD_MAX_SECTORS * 512; -sector_num += NBD_MAX_SECTORS; -nb_sectors -= NBD_MAX_SECTORS; -} -return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset); +return nbd_co_rwv(bs, sector_num, nb_sectors, qiov, 1); } static int nbd_co_flush(BlockDriverState *bs) -- 1.7.9
Re: [Qemu-devel] [PATCH 2/2] Expose tsc deadline timer cpuid to guest
My point is that qemu-version-A [-cpu whatever] should provide the same VM as qemu-version-B -machine pc-A [-cpu whatever] specifically if you leave out the cpu specification. So the compat machine could establish a feature mask (e.g. append some -tsc_deadline in this case). But, indeed, we need a new channel for this. Yes, if such requirement need to be satisfied, I agree we need a new channel to solve this kind of common issue. As for tsc deadline timer feature exposing, I write an updated patch as attached. 1). It exposes tsc deadline timer feature to guest if in-kernel irqchip is used and kvm has emulated tsc deadline timer; 2). It also authorizes user to control the feature exposing via a cpu feature flag; Thanks, Jinsong From 5b7d5f459b621686e78e437010ce34748bcb9e8e Mon Sep 17 00:00:00 2001 From: Liu, Jinsong jinsong@intel.com Date: Wed, 29 Feb 2012 01:53:15 +0800 Subject: [PATCH] Expose tsc deadline timer feature to guest It exposes tsc deadline timer feature to guest if in-kernel irqchip is used and kvm has emulated tsc deadline timer. It also authorizes user to control the feature exposing via a cpu feature flag. Signed-off-by: Liu, Jinsong jinsong@intel.com --- target-i386/cpu.h |1 + target-i386/cpuid.c |2 +- target-i386/kvm.c |4 3 files changed, 6 insertions(+), 1 deletions(-) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index d92be5d..3409afe 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -399,6 +399,7 @@ #define CPUID_EXT_X2APIC (1 21) #define CPUID_EXT_MOVBE(1 22) #define CPUID_EXT_POPCNT (1 23) +#define CPUID_EXT_TSC_DEADLINE_TIMER (1 24) #define CPUID_EXT_XSAVE(1 26) #define CPUID_EXT_OSXSAVE (1 27) #define CPUID_EXT_HYPERVISOR (1 31) diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c index b9bfeaf..ac4b79c 100644 --- a/target-i386/cpuid.c +++ b/target-i386/cpuid.c @@ -50,7 +50,7 @@ static const char *ext_feature_name[] = { fma, cx16, xtpr, pdcm, NULL, NULL, dca, sse4.1|sse4_1, sse4.2|sse4_2, x2apic, movbe, popcnt, -NULL, aes, xsave, osxsave, +tsc_deadline, aes, xsave, osxsave, avx, NULL, NULL, hypervisor, }; static const char *ext2_feature_name[] = { diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 7079e87..2639699 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -370,6 +370,10 @@ int kvm_arch_init_vcpu(CPUState *env) i = env-cpuid_ext_features CPUID_EXT_HYPERVISOR; env-cpuid_ext_features = kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX); env-cpuid_ext_features |= i; +if (!kvm_irqchip_in_kernel() || +!kvm_check_extension(s, KVM_CAP_TSC_DEADLINE_TIMER)) { +env-cpuid_ext_features = ~CPUID_EXT_TSC_DEADLINE_TIMER; +} env-cpuid_ext2_features = kvm_arch_get_supported_cpuid(s, 0x8001, 0, R_EDX); -- 1.7.1 0001-Expose-tsc-deadline-timer-feature-to-guest.patch Description: 0001-Expose-tsc-deadline-timer-feature-to-guest.patch
[Qemu-devel] [PATCH 20/21] usb-redir: Return USB_RET_NAK when we've no data for an interrupt endpoint
From: Hans de Goede hdego...@redhat.com We should return USB_RET_NAK, rather then a 0 sized packet, when we've no data for an interrupt IN endpoint. Signed-off-by: Hans de Goede hdego...@redhat.com Signed-off-by: Gerd Hoffmann kra...@redhat.com --- usb-redir.c |5 - 1 files changed, 4 insertions(+), 1 deletions(-) diff --git a/usb-redir.c b/usb-redir.c index d905463..755492f 100644 --- a/usb-redir.c +++ b/usb-redir.c @@ -548,7 +548,10 @@ static int usbredir_handle_interrupt_data(USBRedirDevice *dev, /* Check interrupt_error for stream errors */ status = dev-endpoint[EP2I(ep)].interrupt_error; dev-endpoint[EP2I(ep)].interrupt_error = 0; -return usbredir_handle_status(dev, status, 0); +if (status) { +return usbredir_handle_status(dev, status, 0); +} +return USB_RET_NAK; } DPRINTF(interrupt-token-in ep %02X status %d len %d\n, ep, intp-status, intp-len); -- 1.7.1
[Qemu-devel] [PATCH 10/21] usb-desc: fix user trigerrable segfaults (!config)
From: Alon Levy al...@redhat.com Check for dev-config being NULL in two places: USB_REQ_GET_CONFIGURATION and USB_REQ_GET_STATUS. The behavior of USB_REQ_GET_STATUS is unspecified in the Default state, that corresponds to dev-config being NULL (it defaults to NULL and is reset whenever a SET_CONFIGURATION with value 0, or attachment). I implemented it to correspond with the state before ed5a83ddd8c1d8ec7b1015315530cf29949e7c48, the commit moving SET_STATUS to usb-desc; if dev-config is not set we return whatever is in the first configuration. The behavior of USB_REQ_GET_CONFIGURATION is also undefined before any SET_CONFIGURATION, but here we just return 0 (same as specified for the Address state). A win7 guest failed to initialize the device before this patch, segfaulting when GET_STATUS was called with dev-config == NULL. With this patch the passthrough device still doesn't work but the failure is unrelated. Signed-off-by: Alon Levy al...@redhat.com Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb-desc.c | 20 +--- 1 files changed, 17 insertions(+), 3 deletions(-) diff --git a/hw/usb-desc.c b/hw/usb-desc.c index 3c3ed6a..ccf85ad 100644 --- a/hw/usb-desc.c +++ b/hw/usb-desc.c @@ -536,7 +536,11 @@ int usb_desc_handle_control(USBDevice *dev, USBPacket *p, break; case DeviceRequest | USB_REQ_GET_CONFIGURATION: -data[0] = dev-config-bConfigurationValue; +/* + * 9.4.2: 0 should be returned if the device is unconfigured, otherwise + * the non zero value of bConfigurationValue. + */ +data[0] = dev-config ? dev-config-bConfigurationValue : 0; ret = 1; break; case DeviceOutRequest | USB_REQ_SET_CONFIGURATION: @@ -544,9 +548,18 @@ int usb_desc_handle_control(USBDevice *dev, USBPacket *p, trace_usb_set_config(dev-addr, value, ret); break; -case DeviceRequest | USB_REQ_GET_STATUS: +case DeviceRequest | USB_REQ_GET_STATUS: { +const USBDescConfig *config = dev-config ? +dev-config : dev-device-confs[0]; + data[0] = 0; -if (dev-config-bmAttributes 0x40) { +/* + * Default state: Device behavior when this request is received while + *the device is in the Default state is not specified. + * We return the same value that a configured device would return if + * it used the first configuration. + */ +if (config-bmAttributes 0x40) { data[0] |= 1 USB_DEVICE_SELF_POWERED; } if (dev-remote_wakeup) { @@ -555,6 +568,7 @@ int usb_desc_handle_control(USBDevice *dev, USBPacket *p, data[1] = 0x00; ret = 2; break; +} case DeviceOutRequest | USB_REQ_CLEAR_FEATURE: if (value == USB_DEVICE_REMOTE_WAKEUP) { dev-remote_wakeup = 0; -- 1.7.1
[Qemu-devel] [PATCH 03/21] usb-uhci: cleanup UHCIAsync allocation initialization.
Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb-uhci.c |8 +--- 1 files changed, 1 insertions(+), 7 deletions(-) diff --git a/hw/usb-uhci.c b/hw/usb-uhci.c index 2280dc7..ca87290 100644 --- a/hw/usb-uhci.c +++ b/hw/usb-uhci.c @@ -159,15 +159,9 @@ typedef struct UHCI_QH { static UHCIAsync *uhci_async_alloc(UHCIState *s) { -UHCIAsync *async = g_malloc(sizeof(UHCIAsync)); +UHCIAsync *async = g_new0(UHCIAsync, 1); -memset(async-packet, 0, sizeof(async-packet)); async-uhci = s; -async-valid = 0; -async-td= 0; -async-token = 0; -async-done = 0; -async-isoc = 0; usb_packet_init(async-packet); pci_dma_sglist_init(async-sgl, s-dev, 1); -- 1.7.1
[Qemu-devel] [PATCH 18/21] usb-redir: Let the usb-host know about our device filtering
From: Hans de Goede hdego...@redhat.com libusbredirparser-0.3.4 adds 2 new packets which allows us to notify the usb-host: -about the usb device filter we have (if any), so that it knows not the even try to redirect certain devices -when we reject a device based on filtering (in case it tries anyways) Signed-off-by: Hans de Goede hdego...@redhat.com Signed-off-by: Gerd Hoffmann kra...@redhat.com --- configure |2 +- usb-redir.c | 20 2 files changed, 21 insertions(+), 1 deletions(-) diff --git a/configure b/configure index 87474e9..d64b2de 100755 --- a/configure +++ b/configure @@ -2592,7 +2592,7 @@ fi # check for usbredirparser for usb network redirection support if test $usb_redir != no ; then -if $pkg_config --atleast-version=0.3.3 libusbredirparser /dev/null 21 ; then +if $pkg_config --atleast-version=0.3.4 libusbredirparser /dev/null 21 ; then usb_redir=yes usb_redir_cflags=$($pkg_config --cflags libusbredirparser 2/dev/null) usb_redir_libs=$($pkg_config --libs libusbredirparser 2/dev/null) diff --git a/usb-redir.c b/usb-redir.c index c98b14e..6c92fd9 100644 --- a/usb-redir.c +++ b/usb-redir.c @@ -106,6 +106,7 @@ struct AsyncURB { QTAILQ_ENTRY(AsyncURB)next; }; +static void usbredir_hello(void *priv, struct usb_redir_hello_header *h); static void usbredir_device_connect(void *priv, struct usb_redir_device_connect_header *device_connect); static void usbredir_device_disconnect(void *priv); @@ -802,6 +803,7 @@ static void usbredir_open_close_bh(void *opaque) dev-parser-log_func = usbredir_log; dev-parser-read_func = usbredir_read; dev-parser-write_func = usbredir_write; +dev-parser-hello_func = usbredir_hello; dev-parser-device_connect_func = usbredir_device_connect; dev-parser-device_disconnect_func = usbredir_device_disconnect; dev-parser-interface_info_func = usbredir_interface_info; @@ -820,6 +822,7 @@ static void usbredir_open_close_bh(void *opaque) dev-read_buf_size = 0; usbredirparser_caps_set_cap(caps, usb_redir_cap_connect_device_version); +usbredirparser_caps_set_cap(caps, usb_redir_cap_filter); usbredirparser_init(dev-parser, VERSION, caps, USB_REDIR_CAPS_SIZE, 0); usbredirparser_do_write(dev-parser); } @@ -991,6 +994,10 @@ static int usbredir_check_filter(USBRedirDevice *dev) error: usbredir_device_disconnect(dev); +if (usbredirparser_peer_has_cap(dev-parser, usb_redir_cap_filter)) { +usbredirparser_send_filter_reject(dev-parser); +usbredirparser_do_write(dev-parser); +} return -1; } @@ -1016,6 +1023,19 @@ static int usbredir_handle_status(USBRedirDevice *dev, } } +static void usbredir_hello(void *priv, struct usb_redir_hello_header *h) +{ +USBRedirDevice *dev = priv; + +/* Try to send the filter info now that we've the usb-host's caps */ +if (usbredirparser_peer_has_cap(dev-parser, usb_redir_cap_filter) +dev-filter_rules) { +usbredirparser_send_filter_filter(dev-parser, dev-filter_rules, + dev-filter_rules_count); +usbredirparser_do_write(dev-parser); +} +} + static void usbredir_device_connect(void *priv, struct usb_redir_device_connect_header *device_connect) { -- 1.7.1
[Qemu-devel] [PATCH 04/21] usb-uhci: add UHCIQueue
UHCIAsync structs (in-flight requests) grouped in UHCIQueue now. Each (active) usb endpoint gets its own UHCIQueue. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb-uhci.c | 209 - 1 files changed, 118 insertions(+), 91 deletions(-) diff --git a/hw/usb-uhci.c b/hw/usb-uhci.c index ca87290..b8b336f 100644 --- a/hw/usb-uhci.c +++ b/hw/usb-uhci.c @@ -95,23 +95,32 @@ static const char *pid2str(int pid) #endif typedef struct UHCIState UHCIState; +typedef struct UHCIAsync UHCIAsync; +typedef struct UHCIQueue UHCIQueue; /* * Pending async transaction. * 'packet' must be the first field because completion * handler does (UHCIAsync *) pkt cast. */ -typedef struct UHCIAsync { + +struct UHCIAsync { USBPacket packet; QEMUSGList sgl; -UHCIState *uhci; +UHCIQueue *queue; QTAILQ_ENTRY(UHCIAsync) next; uint32_t td; -uint32_t token; -int8_tvalid; uint8_t isoc; uint8_t done; -} UHCIAsync; +}; + +struct UHCIQueue { +uint32_t token; +UHCIState *uhci; +QTAILQ_ENTRY(UHCIQueue) next; +QTAILQ_HEAD(, UHCIAsync) asyncs; +int8_tvalid; +}; typedef struct UHCIPort { USBPort port; @@ -137,7 +146,7 @@ struct UHCIState { uint32_t pending_int_mask; /* Active packets */ -QTAILQ_HEAD(,UHCIAsync) async_pending; +QTAILQ_HEAD(, UHCIQueue) queues; uint8_t num_ports_vmstate; /* Properties */ @@ -157,56 +166,90 @@ typedef struct UHCI_QH { uint32_t el_link; } UHCI_QH; -static UHCIAsync *uhci_async_alloc(UHCIState *s) +static inline int32_t uhci_queue_token(UHCI_TD *td) +{ +/* covers ep, dev, pid - identifies the endpoint */ +return td-token 0x7; +} + +static UHCIQueue *uhci_queue_get(UHCIState *s, UHCI_TD *td) +{ +uint32_t token = uhci_queue_token(td); +UHCIQueue *queue; + +QTAILQ_FOREACH(queue, s-queues, next) { +if (queue-token == token) { +return queue; +} +} + +queue = g_new0(UHCIQueue, 1); +queue-uhci = s; +queue-token = token; +QTAILQ_INIT(queue-asyncs); +QTAILQ_INSERT_HEAD(s-queues, queue, next); +return queue; +} + +static void uhci_queue_free(UHCIQueue *queue) +{ +UHCIState *s = queue-uhci; + +QTAILQ_REMOVE(s-queues, queue, next); +g_free(queue); +} + +static UHCIAsync *uhci_async_alloc(UHCIQueue *queue) { UHCIAsync *async = g_new0(UHCIAsync, 1); -async-uhci = s; +async-queue = queue; usb_packet_init(async-packet); -pci_dma_sglist_init(async-sgl, s-dev, 1); +pci_dma_sglist_init(async-sgl, queue-uhci-dev, 1); return async; } -static void uhci_async_free(UHCIState *s, UHCIAsync *async) +static void uhci_async_free(UHCIAsync *async) { usb_packet_cleanup(async-packet); qemu_sglist_destroy(async-sgl); g_free(async); } -static void uhci_async_link(UHCIState *s, UHCIAsync *async) +static void uhci_async_link(UHCIAsync *async) { -QTAILQ_INSERT_HEAD(s-async_pending, async, next); +UHCIQueue *queue = async-queue; +QTAILQ_INSERT_TAIL(queue-asyncs, async, next); } -static void uhci_async_unlink(UHCIState *s, UHCIAsync *async) +static void uhci_async_unlink(UHCIAsync *async) { -QTAILQ_REMOVE(s-async_pending, async, next); +UHCIQueue *queue = async-queue; +QTAILQ_REMOVE(queue-asyncs, async, next); } -static void uhci_async_cancel(UHCIState *s, UHCIAsync *async) +static void uhci_async_cancel(UHCIAsync *async) { DPRINTF(uhci: cancel td 0x%x token 0x%x done %u\n, async-td, async-token, async-done); if (!async-done) usb_cancel_packet(async-packet); -uhci_async_free(s, async); +uhci_async_free(async); } /* * Mark all outstanding async packets as invalid. * This is used for canceling them when TDs are removed by the HCD. */ -static UHCIAsync *uhci_async_validate_begin(UHCIState *s) +static void uhci_async_validate_begin(UHCIState *s) { -UHCIAsync *async; +UHCIQueue *queue; -QTAILQ_FOREACH(async, s-async_pending, next) { -async-valid--; +QTAILQ_FOREACH(queue, s-queues, next) { +queue-valid--; } -return NULL; } /* @@ -214,77 +257,74 @@ static UHCIAsync *uhci_async_validate_begin(UHCIState *s) */ static void uhci_async_validate_end(UHCIState *s) { -UHCIAsync *curr, *n; +UHCIQueue *queue, *n; +UHCIAsync *async; -QTAILQ_FOREACH_SAFE(curr, s-async_pending, next, n) { -if (curr-valid 0) { +QTAILQ_FOREACH_SAFE(queue, s-queues, next, n) { +if (queue-valid 0) { continue; } -uhci_async_unlink(s, curr); -uhci_async_cancel(s, curr); +while (!QTAILQ_EMPTY(queue-asyncs)) { +async = QTAILQ_FIRST(queue-asyncs); +uhci_async_unlink(async); +uhci_async_cancel(async); +} +uhci_queue_free(queue); } } static void
[Qemu-devel] [PATCH 02/21] usb-ehci: fix reset
Two reset fixes: * pick up s-usbcmd value after ehci_reset call to make sure it keeps the reset value and doesn't get rubbish filled in when val is written back to the mmio register array later on. * make sure the frame timer is zapped on reset. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb-ehci.c |3 ++- 1 files changed, 2 insertions(+), 1 deletions(-) diff --git a/hw/usb-ehci.c b/hw/usb-ehci.c index e699814..b9da26a 100644 --- a/hw/usb-ehci.c +++ b/hw/usb-ehci.c @@ -912,6 +912,7 @@ static void ehci_reset(void *opaque) } } ehci_queues_rip_all(s); +qemu_del_timer(s-frame_timer); } static uint32_t ehci_mem_readb(void *ptr, target_phys_addr_t addr) @@ -1070,7 +1071,7 @@ static void ehci_mem_writel(void *ptr, target_phys_addr_t addr, uint32_t val) if (val USBCMD_HCRESET) { ehci_reset(s); -val = ~USBCMD_HCRESET; +val = s-usbcmd; } /* not supporting dynamic frame list size at the moment */ -- 1.7.1
Re: [Qemu-devel] [PATCH] kvm: notify host when guest paniced
On Tue, Feb 28, 2012 at 11:19:47AM +0100, Jan Kiszka wrote: On 2012-02-28 10:42, Wen Congyang wrote: At 02/28/2012 05:34 PM, Jan Kiszka Wrote: On 2012-02-28 09:23, Wen Congyang wrote: At 02/27/2012 11:08 PM, Jan Kiszka Wrote: On 2012-02-27 04:01, Wen Congyang wrote: We can know the guest is paniced when the guest runs on xen. But we do not have such feature on kvm. This patch implemnts this feature, and the implementation is the same as xen: register panic notifier, and call hypercall when the guest is paniced. Signed-off-by: Wen Congyang we...@cn.fujitsu.com --- arch/x86/kernel/kvm.c| 12 arch/x86/kvm/svm.c |8 ++-- arch/x86/kvm/vmx.c |8 ++-- arch/x86/kvm/x86.c | 13 +++-- include/linux/kvm.h |1 + include/linux/kvm_para.h |1 + 6 files changed, 37 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index f0c6fd6..b928d1d 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -331,6 +331,17 @@ static struct notifier_block kvm_pv_reboot_nb = { .notifier_call = kvm_pv_reboot_notify, }; +static int +kvm_pv_panic_notify(struct notifier_block *nb, unsigned long code, void *unused) +{ + kvm_hypercall0(KVM_HC_GUEST_PANIC); + return NOTIFY_DONE; +} + +static struct notifier_block kvm_pv_panic_nb = { + .notifier_call = kvm_pv_panic_notify, +}; + You should split up host and guest-side changes. static u64 kvm_steal_clock(int cpu) { u64 steal; @@ -417,6 +428,7 @@ void __init kvm_guest_init(void) paravirt_ops_setup(); register_reboot_notifier(kvm_pv_reboot_nb); + atomic_notifier_chain_register(panic_notifier_list, kvm_pv_panic_nb); for (i = 0; i KVM_TASK_SLEEP_HASHSIZE; i++) spin_lock_init(async_pf_sleepers[i].lock); if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0b7690e..38b4705 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1900,10 +1900,14 @@ static int halt_interception(struct vcpu_svm *svm) static int vmmcall_interception(struct vcpu_svm *svm) { + int ret; + svm-next_rip = kvm_rip_read(svm-vcpu) + 3; skip_emulated_instruction(svm-vcpu); - kvm_emulate_hypercall(svm-vcpu); - return 1; + ret = kvm_emulate_hypercall(svm-vcpu); + + /* Ignore the error? */ + return ret == 0 ? 0 : 1; Why can't kvm_emulate_hypercall return the right value? kvm_emulate_hypercall() will call kvm_hv_hypercall(), and kvm_hv_hypercall() will return 0 when vcpu's CPL 0. If vcpu's CPL 0, does kvm need to exit and tell it to qemu? No, there is currently no exit to userspace due to hypercalls, neither of HV nor KVM kind. The point is that the return code of kvm_emulate_hypercall is unused so far, so you can easily redefine it to encode continue vs. exit to userspace. Once someone has different needs, this could still be refactored again. So, it is OK to change the return value of kvm_hv_hypercall() if vcpu's CPL 0? Yes, change it to encode what vendor modules need to return to their callers. Better introduce new request flag and set it in your hypercall emulation. See how triple fault is handled. -- Gleb.
[Qemu-devel] [PATCH 06/21] usb-uhci: implement packet queuing
When a usb device is busy processing a packet (and returns USB_RET_ASYNC), continue walking the transfer descriptor list and process them to fill the request queue. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb-uhci.c | 33 +++-- 1 files changed, 31 insertions(+), 2 deletions(-) diff --git a/hw/usb-uhci.c b/hw/usb-uhci.c index 13298aa..70e3881 100644 --- a/hw/usb-uhci.c +++ b/hw/usb-uhci.c @@ -942,6 +942,34 @@ static int qhdb_insert(QhDb *db, uint32_t addr) return 0; } +static void uhci_fill_queue(UHCIState *s, UHCI_TD *td) +{ +uint32_t int_mask = 0; +uint32_t plink = td-link; +uint32_t token = uhci_queue_token(td); +UHCI_TD ptd; +int ret; + +fprintf(stderr, %s: -- %x\n, __func__, token); +while (is_valid(plink)) { +pci_dma_read(s-dev, plink ~0xf, ptd, sizeof(ptd)); +le32_to_cpus(ptd.link); +le32_to_cpus(ptd.ctrl); +le32_to_cpus(ptd.token); +le32_to_cpus(ptd.buffer); +if (!(ptd.ctrl TD_CTRL_ACTIVE)) { +break; +} +if (uhci_queue_token(ptd) != token) { +break; +} +ret = uhci_handle_td(s, plink, ptd, int_mask); +assert(ret == 2); /* got USB_RET_ASYNC */ +assert(int_mask == 0); +plink = ptd.link; +} +} + static void uhci_process_frame(UHCIState *s) { uint32_t frame_addr, link, old_td_ctrl, val, int_mask; @@ -1044,8 +1072,9 @@ static void uhci_process_frame(UHCIState *s) DPRINTF(uhci: TD 0x%x async. link 0x%x ctrl 0x%x token 0x%x qh 0x%x\n, link, td.link, td.ctrl, td.token, curr_qh); -fprintf(stderr, async td: link %x%s\n, -td.link, is_valid(td.link) ? valid : ); +if (is_valid(td.link)) { +uhci_fill_queue(s, td); +} link = curr_qh ? qh.link : td.link; continue; -- 1.7.1
Re: [Qemu-devel] Wireshark SCSI dissectors for new transports
On Tue, Feb 28, 2012 at 10:19 AM, ronnie sahlberg ronniesahlb...@gmail.com wrote: Since I never got HyperSCSI or mFCP (very short-lived attempt from HBA vendors) those two are the only ones today I think where we miss decode. virt-scsi from QEMU sounds interesting! Great, thanks for your help. First you need a DLT value from the tcpdump folks to wrap your packets in. Okay, I am sending an email to request that. Depending on what the framing looks like, you need at least a wrapper around the SCSI payload that can contain an I_T identifier, then a LUN field, and then scoped per LUN you need a task-tag or similar. Wireshark would need I_T to be able to track initiators and targets separately and form a conversation between an arbitrary pair. A LUN identifier to track different luns on the same I_T separately. Finally it also needs a task-tag so that on a specific ILT nexus it will be able to match a SCSI CDB with DATA-IN/OUT blobs and a SCSI response/sense to make it map well you might need to wrap thing inside a struct scsi_wrapper { initiator identifier target identifier lun task-tag opcode (cdb, datain, dataout, response/sense) scsi * } if your transport also supports multiple datain/out blobs for a single task, in order to reassemble the data we would also need a offset/length for each datain/out blob. All these things make sense, I think it will be pretty straightforward since virtio-scsi uses a simple header/footer for SCSI transport metadata including some of the things you've mentioned. I will post more information once we have the DLT. Stefan
[Qemu-devel] [PATCH 15/21] ehci: drop old stuff
Drop the ehci under development banner. Drop unused inactive (#if 0) code. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb-ehci.c | 30 +- 1 files changed, 1 insertions(+), 29 deletions(-) diff --git a/hw/usb-ehci.c b/hw/usb-ehci.c index b95773f..afc8ccf 100644 --- a/hw/usb-ehci.c +++ b/hw/usb-ehci.c @@ -1471,36 +1471,10 @@ static int ehci_process_itd(EHCIState *ehci, } qemu_sglist_destroy(ehci-isgl); -#if 0 -/* In isoch, there is no facility to indicate a NAK so let's - * instead just complete a zero-byte transaction. Setting - * DBERR seems too draconian. - */ - -if (ret == USB_RET_NAK) { -if (ehci-isoch_pause 0) { -DPRINTF(ISOCH: received a NAK but paused so returning\n); -ehci-isoch_pause--; -return 0; -} else if (ehci-isoch_pause == -1) { -DPRINTF(ISOCH: recv NAK isoch pause inactive, setting\n); -// Pause frindex for up to 50 msec waiting for data from -// remote -ehci-isoch_pause = 50; -return 0; -} else { -DPRINTF(ISOCH: isoch pause timeout! return 0\n); -ret = 0; -} -} else { -DPRINTF(ISOCH: received ACK, clearing pause\n); -ehci-isoch_pause = -1; -} -#else if (ret == USB_RET_NAK) { +/* no data for us, so do a zero-length transfer */ ret = 0; } -#endif if (ret = 0) { if (!dir) { @@ -2389,8 +2363,6 @@ static int usb_ehci_initfn(PCIDevice *dev) memory_region_init_io(s-mem, ehci_mem_ops, s, ehci, MMIO_SIZE); pci_register_bar(s-dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, s-mem); -fprintf(stderr, *** EHCI support is under development ***\n); - return 0; } -- 1.7.1
[Qemu-devel] [PATCH 08/21] usb: add tracepoint for usb packet state changes.
Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb.c | 27 +-- trace-events |3 +++ 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/hw/usb.c b/hw/usb.c index e5b8f33..57fc5e3 100644 --- a/hw/usb.c +++ b/hw/usb.c @@ -26,6 +26,7 @@ #include qemu-common.h #include usb.h #include iov.h +#include trace.h void usb_attach(USBPort *port) { @@ -390,7 +391,6 @@ void usb_packet_init(USBPacket *p) void usb_packet_set_state(USBPacket *p, USBPacketState state) { -#ifdef DEBUG static const char *name[] = { [USB_PACKET_UNDEFINED] = undef, [USB_PACKET_SETUP] = setup, @@ -399,28 +399,11 @@ void usb_packet_set_state(USBPacket *p, USBPacketState state) [USB_PACKET_COMPLETE] = complete, [USB_PACKET_CANCELED] = canceled, }; -static const char *rets[] = { -[-USB_RET_NODEV] = NODEV, -[-USB_RET_NAK]= NAK, -[-USB_RET_STALL] = STALL, -[-USB_RET_BABBLE] = BABBLE, -[-USB_RET_ASYNC] = ASYNC, -}; -char add[16] = ; +USBDevice *dev = p-ep-dev; +USBBus *bus = usb_bus_from_device(dev); -if (state == USB_PACKET_COMPLETE) { -if (p-result 0) { -snprintf(add, sizeof(add), - %s, rets[-p-result]); -} else { -snprintf(add, sizeof(add), - %d, p-result); -} -} -fprintf(stderr, bus %s, port %s, dev %d, ep %d: packet %p: %s - %s%s\n, -p-ep-dev-qdev.parent_bus-name, -p-ep-dev-port-path, -p-ep-dev-addr, p-ep-nr, -p, name[p-state], name[state], add); -#endif +trace_usb_packet_state_change(bus-busnr, dev-port-path, p-ep-nr, + p, name[p-state], name[state]); p-state = state; } diff --git a/trace-events b/trace-events index e918ff6..c5d0f0f 100644 --- a/trace-events +++ b/trace-events @@ -227,6 +227,9 @@ sun4m_iommu_page_get_flags(uint64_t pa, uint64_t iopte, uint32_t ret) get flags sun4m_iommu_translate_pa(uint64_t addr, uint64_t pa, uint32_t iopte) xlate dva %PRIx64 = pa %PRIx64 iopte = %x sun4m_iommu_bad_addr(uint64_t addr) bad addr %PRIx64 +# hw/usb.c +usb_packet_state_change(int bus, const char *port, int ep, void *p, const char *o, const char *n) bus %d, port %s, ep %d, packet %p, state %s - %s + # hw/usb-bus.c usb_port_claim(int bus, const char *port) bus %d, port %s usb_port_attach(int bus, const char *port) bus %d, port %s -- 1.7.1
Re: [Qemu-devel] [PATCH] qom: if @instance_size==0, assign size of object to parent object size
On 02/28/2012 02:03 PM, Paolo Bonzini wrote: Il 28/02/2012 10:43, Igor Mitsyanko ha scritto: On 02/28/2012 12:39 PM, Paolo Bonzini wrote: Il 28/02/2012 08:18, Igor Mitsyanko ha scritto: QOM documentation states that for objects of type with @instance_size == 0 size will be assigned to match parent object's size. But currently this feauture is not implemented and qemu asserts during creation of object with zero instance_size. This patch adjusts actual behaviour in accordance with documentation. You can do it just once, in type_get_parent instead. Sorry, rewind. You can do it in type_class_init instead (you are obviously doing it just once since you assign to type-instance_size). type_class_init mostly deals with class initialization, but it's really the place where a type is hooked up with its parent... Ok, that's obviously a much better approach. Perhaps type_late_init would be a better name. How about simple type_initialization()? -- Mitsyanko Igor ASWG, Moscow RD center, Samsung Electronics email: i.mitsya...@samsung.com
[Qemu-devel] [PATCH 19/21] usb-redir: Limit return values returned by iso packets
From: Hans de Goede hdego...@redhat.com The usbredir protocol uses a status of usb_redir_stall to indicate that an iso data stream has stopped (ie because the urbs failed on resubmit), but iso packets should never return a result of USB_RET_STALL, since iso endpoints cannot stall. So instead simply always return USB_RET_NAK on iso stream errors. Signed-off-by: Hans de Goede hdego...@redhat.com Signed-off-by: Gerd Hoffmann kra...@redhat.com --- usb-redir.c |4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/usb-redir.c b/usb-redir.c index 6c92fd9..d905463 100644 --- a/usb-redir.c +++ b/usb-redir.c @@ -431,7 +431,7 @@ static int usbredir_handle_iso_data(USBRedirDevice *dev, USBPacket *p, /* Check iso_error for stream errors, otherwise its an underrun */ status = dev-endpoint[EP2I(ep)].iso_error; dev-endpoint[EP2I(ep)].iso_error = 0; -return usbredir_handle_status(dev, status, 0); +return status ? USB_RET_NAK : 0; } DPRINTF2(iso-token-in ep %02X status %d len %d queue-size: %d\n, ep, isop-status, isop-len, dev-endpoint[EP2I(ep)].bufpq_size); @@ -439,7 +439,7 @@ static int usbredir_handle_iso_data(USBRedirDevice *dev, USBPacket *p, status = isop-status; if (status != usb_redir_success) { bufp_free(dev, isop, ep); -return usbredir_handle_status(dev, status, 0); +return USB_RET_NAK; } len = isop-len; -- 1.7.1
[Qemu-devel] [PATCH v2] Consolidate reads and writes in nbd block device into one common routine
This removes quite some duplicated code. v2 fixes a bug (uninitialized reply.error) and makes the loop more natural. Signed-off-By: Michael Tokarev m...@tls.msk.ru --- block/nbd.c | 95 +++--- 1 files changed, 31 insertions(+), 64 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index 161b299..a78e212 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -320,91 +320,58 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags) return result; } -static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov, - int offset) -{ -BDRVNBDState *s = bs-opaque; -struct nbd_request request; -struct nbd_reply reply; - -request.type = NBD_CMD_READ; -request.from = sector_num * 512; -request.len = nb_sectors * 512; - -nbd_coroutine_start(s, request); -if (nbd_co_send_request(s, request, NULL, 0) == -1) { -reply.error = errno; -} else { -nbd_co_receive_reply(s, request, reply, qiov-iov, offset); -} -nbd_coroutine_end(s, request); -return -reply.error; - -} +/* qemu-nbd has a limit of slightly less than 1M per request. Try to + * remain aligned to 4K. */ +#define NBD_MAX_SECTORS 2040 -static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov, - int offset) +static int nbd_co_rwv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov, int iswrite) { BDRVNBDState *s = bs-opaque; struct nbd_request request; struct nbd_reply reply; +int offset = 0; -request.type = NBD_CMD_WRITE; -if (!bdrv_enable_write_cache(bs) (s-nbdflags NBD_FLAG_SEND_FUA)) { +request.type = iswrite ? NBD_CMD_WRITE : NBD_CMD_READ; +if (iswrite !bdrv_enable_write_cache(bs) (s-nbdflags NBD_FLAG_SEND_FUA)) { request.type |= NBD_CMD_FLAG_FUA; } +reply.error = 0; + +/* we split the request into pieces of at most NBD_MAX_SECTORS size + * and process them in a loop... */ +do { +request.from = sector_num * 512; +request.len = MIN(nb_sectors, NBD_MAX_SECTORS) * 512; + +nbd_coroutine_start(s, request); +if (nbd_co_send_request(s, request, iswrite ? qiov-iov : NULL, 0) == -1) { +reply.error = errno; +} else { +nbd_co_receive_reply(s, request, reply, iswrite ? NULL : qiov-iov, offset); +} +nbd_coroutine_end(s, request); -request.from = sector_num * 512; -request.len = nb_sectors * 512; +offset += NBD_MAX_SECTORS * 512; +sector_num += NBD_MAX_SECTORS; +nb_sectors -= NBD_MAX_SECTORS; + +/* ..till we hit an error or there's nothing more to process */ +} while (reply.error == 0 nb_sectors 0); -nbd_coroutine_start(s, request); -if (nbd_co_send_request(s, request, qiov-iov, offset) == -1) { -reply.error = errno; -} else { -nbd_co_receive_reply(s, request, reply, NULL, 0); -} -nbd_coroutine_end(s, request); return -reply.error; } -/* qemu-nbd has a limit of slightly less than 1M per request. Try to - * remain aligned to 4K. */ -#define NBD_MAX_SECTORS 2040 - static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { -int offset = 0; -int ret; -while (nb_sectors NBD_MAX_SECTORS) { -ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset); -if (ret 0) { -return ret; -} -offset += NBD_MAX_SECTORS * 512; -sector_num += NBD_MAX_SECTORS; -nb_sectors -= NBD_MAX_SECTORS; -} -return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset); +return nbd_co_rwv(bs, sector_num, nb_sectors, qiov, 0); } static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { -int offset = 0; -int ret; -while (nb_sectors NBD_MAX_SECTORS) { -ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset); -if (ret 0) { -return ret; -} -offset += NBD_MAX_SECTORS * 512; -sector_num += NBD_MAX_SECTORS; -nb_sectors -= NBD_MAX_SECTORS; -} -return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset); +return nbd_co_rwv(bs, sector_num, nb_sectors, qiov, 1); } static int nbd_co_flush(BlockDriverState *bs) -- 1.7.9
[Qemu-devel] [PATCH v2] qcow2: Reject too large header extensions
Image files that make qemu-img info read several gigabytes into the unknown header extensions list are bad. Just fail opening the image if an extension claims to be larger than the header extension area. Signed-off-by: Kevin Wolf kw...@redhat.com --- block/qcow2.c |5 + 1 files changed, 5 insertions(+), 0 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index f68f0e1..eb5ea48 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -108,6 +108,11 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, #ifdef DEBUG_EXT printf(ext.magic = 0x%x\n, ext.magic); #endif +if (ext.len end_offset - offset) { +error_report(Header extension too large); +return -EINVAL; +} + switch (ext.magic) { case QCOW2_EXT_MAGIC_END: return 0; -- 1.7.6.5
Re: [Qemu-devel] [PATCH V7 06/11] pci.c: Add pci_check_bar_overlap
On Sun, Feb 19, 2012 at 13:35, Michael S. Tsirkin m...@redhat.com wrote: On Fri, Feb 17, 2012 at 05:08:40PM +, Anthony PERARD wrote: From: Yuji Shimada shimada-...@necst.nec.co.jp This function helps Xen PCI Passthrough device to check for overlap. Signed-off-by: Yuji Shimada shimada-...@necst.nec.co.jp Signed-off-by: Anthony PERARD anthony.per...@citrix.com As far as I can see, this scans the bus a specific device is on, looking for devices who have conflicting BARs. Returns 1 if found, 0 if not. Not sure what would devices do with this information, really: patches posted just print out a warning which does not seem too useful. This function is just here to print a warning in case an overlap happen, but we would like too keep this warning. Just FYI, if you decided to e.g. disable device in such a case that would be wrong: it is legal to have overlapping BARs as long as there are no accesses. So a legal thing for a guest to do is: Assign BAR1 = abcde Assign BAR2 = abcde - overlaps temporarily Assign BAR1 = 12345 And this means that you can't just check your device has no conflicts when your device is touched: it needs to be checked each time mappings are updated. --- hw/pci.c | 47 +++ hw/pci.h | 3 +++ 2 files changed, 50 insertions(+), 0 deletions(-) diff --git a/hw/pci.c b/hw/pci.c index 678a8c1..75d6529 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -1985,6 +1985,53 @@ MemoryRegion *pci_address_space_io(PCIDevice *dev) return dev-bus-address_space_io; } +int pci_check_bar_overlap(PCIDevice *dev, + pcibus_t addr, pcibus_t size, uint8_t type) This lacks comments documentng what this does, parameters, return values, etc. I'll fix that. +{ + PCIBus *bus = dev-bus; + PCIDevice *devices = NULL; + PCIIORegion *r; + int i, j; + int rc = 0; + + /* check Overlapped to Base Address */ Weird use of upper case. Intentional? Also - what does this comment mean? Don't know, I will remove the comment, it is useless. + for (i = 0; i ARRAY_SIZE(bus-devices); i++) { + devices = bus-devices[i]; + if (!devices) { + continue; + } + + /* skip itself */ itself? Same here, the comment is probably useless, the next line should be easy to understand. + if (devices-devfn == dev-devfn) { + continue; + } + + for (j = 0; j PCI_NUM_REGIONS; j++) { This ignores bridges. I'm not familiar with brigdes. I'll try to take a look. + r = devices-io_regions[j]; + + /* skip different resource type, but don't skip when + * prefetch and non-prefetch memory are compared. + */ + if (type != r-type) { + if (type PCI_BASE_ADDRESS_SPACE_IO || + r-type PCI_BASE_ADDRESS_SPACE_IO) { Do you mean type PCI_BASE_ADDRESS_SPACE_IO != r-type PCI_BASE_ADDRESS_SPACE_IO ? This would not need a comment then. Yes, I'll replace that. + continue; + } + } + + if ((addr (r-addr + r-size)) ((addr + size) r-addr)) { Can ranges_overlap be used? Yes. + printf(Overlapped to device[%02x:%02x.%x][Region:%d] + [Address:%PRIx64h][Size:%PRIx64h]\n, + pci_bus_num(bus), PCI_SLOT(devices-devfn), + PCI_FUNC(devices-devfn), j, r-addr, r-size); That's not how one should report errors. A callback would be better? Thanks, -- Anthony PERARD
Re: [Qemu-devel] [PATCH] qom: if @instance_size==0, assign size of object to parent object size
Il 28/02/2012 12:09, Igor Mitsyanko ha scritto: Perhaps type_late_init would be a better name. How about simple type_initialization()? We use verbs, so it would be type_initialize, but yes. Paolo
Re: [Qemu-devel] [PATCH] kvm: notify host when guest paniced
On 02/27/2012 05:01 AM, Wen Congyang wrote: We can know the guest is paniced when the guest runs on xen. But we do not have such feature on kvm. This patch implemnts this feature, and the implementation is the same as xen: register panic notifier, and call hypercall when the guest is paniced. What's the motivation for this? Xen does this is insufficient. -- error compiling committee.c: too many arguments to function
[Qemu-devel] [PATCH v2 0/3] New sigaltstack backend for coroutine
This series of patches implements coroutines method with sigaltstack. The flow of creation and management of the coroutines is quite similar to the coroutine-ucontext.c. The way to use sigaltstack to achieve the needed stack manipulation is done in a way quite similar to the GNU Portable Threads (file pth_mctx.c, variant 2). This v2 has some corrections and improved patches, but it's essentially the same. At the moment, the default backend is ucontext (the former default method for coroutines). Alex Barcelo (3): coroutine: adding sigaltstack method (.c source) coroutine: adding configure choose mechanism for coroutine backend coroutine: adding configure option for sigaltstack coroutine backend Makefile.objs |4 + configure | 41 +- coroutine-sigaltstack.c | 334 +++ 3 files changed, 371 insertions(+), 8 deletions(-) create mode 100644 coroutine-sigaltstack.c -- 1.7.5.4
[Qemu-devel] [PATCH v2 1/3] coroutine: adding sigaltstack method (.c source)
This file is based in both coroutine-ucontext.c and pth_mctx.c (from the GNU Portable Threads library). The mechanism used to change stacks is the sigaltstack function (variant 2 of the pth library). v2: Some corrections. Moving global variables into thread storage (CoroutineThreadState). Signed-off-by: Alex Barcelo abarc...@ac.upc.edu --- coroutine-sigaltstack.c | 334 +++ 1 files changed, 334 insertions(+), 0 deletions(-) create mode 100644 coroutine-sigaltstack.c diff --git a/coroutine-sigaltstack.c b/coroutine-sigaltstack.c new file mode 100644 index 000..7ff2d33 --- /dev/null +++ b/coroutine-sigaltstack.c @@ -0,0 +1,334 @@ +/* + * sigaltstack coroutine initialization code + * + * Copyright (C) 2006 Anthony Liguori anth...@codemonkey.ws + * Copyright (C) 2011 Kevin Wolf kw...@redhat.com + * Copyright (C) 2012 Alex Barcelo abarc...@ac.upc.edu +** This file is partly based on pth_mctx.c, from the GNU Portable Threads +** Copyright (c) 1999-2006 Ralf S. Engelschall r...@engelschall.com + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see http://www.gnu.org/licenses/. + */ + +/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */ +#ifdef _FORTIFY_SOURCE +#undef _FORTIFY_SOURCE +#endif +#include stdlib.h +#include setjmp.h +#include stdint.h +#include pthread.h +#include signal.h +#include qemu-common.h +#include qemu-coroutine-int.h + +enum { +/* Maximum free pool size prevents holding too many freed coroutines */ +POOL_MAX_SIZE = 64, +}; + +/** Free list to speed up creation */ +static QSLIST_HEAD(, Coroutine) pool = QSLIST_HEAD_INITIALIZER(pool); +static unsigned int pool_size; + +typedef struct { +Coroutine base; +void *stack; +jmp_buf env; +} CoroutineUContext; + +/** + * Per-thread coroutine bookkeeping + */ +typedef struct { +/** Currently executing coroutine */ +Coroutine *current; + +/** The default coroutine */ +CoroutineUContext leader; + +/** Information for the signal handler (trampoline) */ +jmp_buf tr_reenter; +volatile sig_atomic_t tr_called; +void *tr_handler; +} CoroutineThreadState; + +static pthread_key_t thread_state_key; + +static CoroutineThreadState *coroutine_get_thread_state(void) +{ +CoroutineThreadState *s = pthread_getspecific(thread_state_key); + +if (!s) { +s = g_malloc0(sizeof(*s)); +s-current = s-leader.base; +pthread_setspecific(thread_state_key, s); +} +return s; +} + +static void qemu_coroutine_thread_cleanup(void *opaque) +{ +CoroutineThreadState *s = opaque; + +g_free(s); +} + +static void __attribute__((destructor)) coroutine_cleanup(void) +{ +Coroutine *co; +Coroutine *tmp; + +QSLIST_FOREACH_SAFE(co, pool, pool_next, tmp) { +g_free(DO_UPCAST(CoroutineUContext, base, co)-stack); +g_free(co); +} +} + +static void __attribute__((constructor)) coroutine_init(void) +{ +int ret; + +ret = pthread_key_create(thread_state_key, qemu_coroutine_thread_cleanup); +if (ret != 0) { +fprintf(stderr, unable to create leader key: %s\n, strerror(errno)); +abort(); +} +} + +/* boot function + * This is what starts the coroutine, is called from the trampoline + * (from the signal handler when it is not signal handling, read ahead + * for more information). + */ +static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co) +{ +/* Initialize longjmp environment and switch back the caller */ +if (!setjmp(self-env)) { +longjmp(*(jmp_buf *)co-entry_arg, 1); +} + +while (true) { +co-entry(co-entry_arg); +qemu_coroutine_switch(co, co-caller, COROUTINE_TERMINATE); +} +} + +/* + * This is used as the signal handler. This is called with the brand new stack + * (thanks to sigaltstack). We have to return, given that this is a signal + * handler and the sigmask and some other things are changed. + */ +static void coroutine_trampoline(int signal) +{ +CoroutineUContext *self; +Coroutine *co; +CoroutineThreadState *coTS; + +/* Get the thread specific information */ +coTS = coroutine_get_thread_state(); +self = coTS-tr_handler; +coTS-tr_called = 1; +co = self-base; + +/* + * Here we have to do a bit of a ping pong between the caller, given that + * this is a signal handler and we
[Qemu-devel] [PATCH v2 3/3] coroutine: adding configure option for sigaltstack coroutine backend
It's possible to use sigaltstack backend with --with-coroutine=sigaltstack v2: changed from enable/disable configure flags Signed-off-by: Alex Barcelo abarc...@ac.upc.edu --- Makefile.objs |4 configure |6 +- 2 files changed, 9 insertions(+), 1 deletions(-) diff --git a/Makefile.objs b/Makefile.objs index 808de6a..9dcd226 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -17,8 +17,12 @@ coroutine-obj-y += qemu-coroutine-sleep.o ifeq ($(CONFIG_UCONTEXT_COROUTINE),y) coroutine-obj-$(CONFIG_POSIX) += coroutine-ucontext.o else +ifeq ($(CONFIG_SIGALTSTACK_COROUTINE),y) +coroutine-obj-$(CONFIG_POSIX) += coroutine-sigaltstack.o +else coroutine-obj-$(CONFIG_POSIX) += coroutine-gthread.o endif +endif coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o ### diff --git a/configure b/configure index 5a12c56..b473936 100755 --- a/configure +++ b/configure @@ -1099,7 +1099,7 @@ echo --enable-usb-redir enable usb network redirection support echo --disable-guest-agentdisable building of the QEMU Guest Agent echo --enable-guest-agent enable building of the QEMU Guest Agent echo --with-coroutine=BACKEND coroutine backend. Supported options: -echogthread, ucontext, windows +echogthread, ucontext, sigaltstack, windows echo echo NOTE: The object files are built at the place where configure is launched exit 1 @@ -2752,6 +2752,8 @@ elif test $coroutine = gthread ; then coroutine_backend=gthread elif test $coroutine = windows ; then coroutine_backend=windows +elif test $coroutine = sigaltstack ; then + coroutine_backend=sigaltstack else echo echo Error: unknown coroutine backend $coroutine @@ -3274,6 +3276,8 @@ fi if test $coroutine_backend = ucontext ; then echo CONFIG_UCONTEXT_COROUTINE=y $config_host_mak +elif test $coroutine_backend = sigaltstack ; then + echo CONFIG_SIGALTSTACK_COROUTINE=y $config_host_mak fi if test $open_by_handle_at = yes ; then -- 1.7.5.4
[Qemu-devel] [PATCH v2 2/3] coroutine: adding configure choose mechanism for coroutine backend
Configure tries, as a default, ucontext functions for the coroutines. But now the user can force another backend by --with-coroutine=BACKEND option v2: Using --with-coroutine=BACKEND instead of enable disable individual configure options Signed-off-by: Alex Barcelo abarc...@ac.upc.edu --- configure | 37 + 1 files changed, 29 insertions(+), 8 deletions(-) diff --git a/configure b/configure index f9d5330..5a12c56 100755 --- a/configure +++ b/configure @@ -191,6 +191,7 @@ opengl= zlib=yes guest_agent=yes libiscsi= +coroutine= # parse CC options first for opt do @@ -771,6 +772,8 @@ for opt do ;; --with-pkgversion=*) pkgversion= ($optarg) ;; + --with-coroutine=*) coroutine=$optarg + ;; --disable-docs) docs=no ;; --enable-docs) docs=yes @@ -1095,6 +1098,8 @@ echo --disable-usb-redir disable usb network redirection support echo --enable-usb-redir enable usb network redirection support echo --disable-guest-agentdisable building of the QEMU Guest Agent echo --enable-guest-agent enable building of the QEMU Guest Agent +echo --with-coroutine=BACKEND coroutine backend. Supported options: +echogthread, ucontext, windows echo echo NOTE: The object files are built at the place where configure is launched exit 1 @@ -2722,21 +2727,36 @@ EOF fi ## -# check if we have makecontext -# (and that it's not a glibc stub which always returns -1) +# check and set a backend for coroutine -ucontext_coroutine=no -if test $darwin != yes; then - cat $TMPC EOF +# default is ucontext, but always fallback to gthread +# windows autodetected by make +if test $coroutine = -o $coroutine = ucontext; then + if test $darwin != yes; then +cat $TMPC EOF #include ucontext.h #ifdef __stub_makecontext #error Ignoring glibc stub makecontext which will always fail #endif int main(void) { makecontext(0, 0, 0); return 0; } EOF - if compile_prog ; then - ucontext_coroutine=yes +if compile_prog ; then +coroutine_backend=ucontext +else + coroutine_backend=gthread +fi + else +echo Silently falling back into gthread backend under darwin fi +elif test $coroutine = gthread ; then + coroutine_backend=gthread +elif test $coroutine = windows ; then + coroutine_backend=windows +else + echo + echo Error: unknown coroutine backend $coroutine + echo + exit 1 fi ## @@ -2930,6 +2950,7 @@ echo usb net redir $usb_redir echo OpenGL support$opengl echo libiscsi support $libiscsi echo build guest agent $guest_agent +echo coroutine backend $coroutine_backend if test $sdl_too_old = yes; then echo - Your SDL version is too old - please upgrade to have SDL support @@ -3251,7 +3272,7 @@ if test $rbd = yes ; then echo CONFIG_RBD=y $config_host_mak fi -if test $ucontext_coroutine = yes ; then +if test $coroutine_backend = ucontext ; then echo CONFIG_UCONTEXT_COROUTINE=y $config_host_mak fi -- 1.7.5.4
[Qemu-devel] [PATCH 05/21] usb-uhci: process uhci_handle_td return code via switch.
Restruct the uhci_handle_td return code processing to make the control flow more clear and the code more readable. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb-uhci.c | 66 +--- 1 files changed, 39 insertions(+), 27 deletions(-) diff --git a/hw/usb-uhci.c b/hw/usb-uhci.c index b8b336f..13298aa 100644 --- a/hw/usb-uhci.c +++ b/hw/usb-uhci.c @@ -1029,49 +1029,61 @@ static void uhci_process_frame(UHCIState *s) pci_dma_write(s-dev, (link ~0xf) + 4, val, sizeof(val)); } -if (ret 0) { -/* interrupted frame */ -break; -} - -if (ret == 2 || ret == 1) { -DPRINTF(uhci: TD 0x%x %s. link 0x%x ctrl 0x%x token 0x%x qh 0x%x\n, -link, ret == 2 ? pend : skip, -td.link, td.ctrl, td.token, curr_qh); +switch (ret) { +case -1: /* interrupted frame */ +goto out; +case 1: /* goto next queue */ +DPRINTF(uhci: TD 0x%x skip. +link 0x%x ctrl 0x%x token 0x%x qh 0x%x\n, +link, td.link, td.ctrl, td.token, curr_qh); link = curr_qh ? qh.link : td.link; continue; -} -/* completed TD */ +case 2: /* got USB_RET_ASYNC */ +DPRINTF(uhci: TD 0x%x async. +link 0x%x ctrl 0x%x token 0x%x qh 0x%x\n, +link, td.link, td.ctrl, td.token, curr_qh); +fprintf(stderr, async td: link %x%s\n, +td.link, is_valid(td.link) ? valid : ); +link = curr_qh ? qh.link : td.link; +continue; -DPRINTF(uhci: TD 0x%x done. link 0x%x ctrl 0x%x token 0x%x qh 0x%x\n, -link, td.link, td.ctrl, td.token, curr_qh); +case 0: /* completed TD */ +DPRINTF(uhci: TD 0x%x done. +link 0x%x ctrl 0x%x token 0x%x qh 0x%x\n, +link, td.link, td.ctrl, td.token, curr_qh); -link = td.link; -td_count++; -bytes_count += (td.ctrl 0x7ff) + 1; +link = td.link; +td_count++; +bytes_count += (td.ctrl 0x7ff) + 1; -if (curr_qh) { - /* update QH element link */ -qh.el_link = link; -val = cpu_to_le32(qh.el_link); -pci_dma_write(s-dev, (curr_qh ~0xf) + 4, val, sizeof(val)); +if (curr_qh) { +/* update QH element link */ +qh.el_link = link; +val = cpu_to_le32(qh.el_link); +pci_dma_write(s-dev, (curr_qh ~0xf) + 4, val, sizeof(val)); -if (!depth_first(link)) { - /* done with this QH */ +if (!depth_first(link)) { +/* done with this QH */ - DPRINTF(uhci: QH 0x%x done. link 0x%x elink 0x%x\n, - curr_qh, qh.link, qh.el_link); +DPRINTF(uhci: QH 0x%x done. link 0x%x elink 0x%x\n, +curr_qh, qh.link, qh.el_link); - curr_qh = 0; - link= qh.link; +curr_qh = 0; +link= qh.link; +} } +break; + +default: +assert(!unknown return code); } /* go to the next entry */ } +out: s-pending_int_mask |= int_mask; } -- 1.7.1
[Qemu-devel] [PATCH 11/21] libcacard: link with glib for g_strndup
From: Alon Levy al...@redhat.com Without it the produced library for make libcacard.la has an unresolved symbol. Signed-off-by: Alon Levy al...@redhat.com Signed-off-by: Gerd Hoffmann kra...@redhat.com --- configure |4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configure b/configure index f9d5330..87474e9 100755 --- a/configure +++ b/configure @@ -2571,8 +2571,8 @@ if test $smartcard != no ; then int main(void) { PK11_FreeSlot(0); return 0; } EOF smartcard_cflags=-I\$(SRC_PATH)/libcacard -libcacard_libs=$($pkg_config --libs nss 2/dev/null) -libcacard_cflags=$($pkg_config --cflags nss 2/dev/null) +libcacard_libs=$($pkg_config --libs nss 2/dev/null) $glib_libs +libcacard_cflags=$($pkg_config --cflags nss 2/dev/null) $glib_cflags if $pkg_config --atleast-version=3.12.8 nss /dev/null 21 \ compile_prog $smartcard_cflags $libcacard_cflags $libcacard_libs; then smartcard_nss=yes -- 1.7.1
[Qemu-devel] [PATCH 01/21] usb-hid: fix tablet activation
Activate usb hid pointer devices (mouse+tablet) unconditionally on polls, even if we NAK the poll due to lack of new events. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb-hid.c |3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/hw/usb-hid.c b/hw/usb-hid.c index 7fc0bd8..37bca78 100644 --- a/hw/usb-hid.c +++ b/hw/usb-hid.c @@ -466,6 +466,9 @@ static int usb_hid_handle_data(USBDevice *dev, USBPacket *p) case USB_TOKEN_IN: if (p-ep-nr == 1) { int64_t curtime = qemu_get_clock_ns(vm_clock); +if (hs-kind == HID_MOUSE || hs-kind == HID_TABLET) { +hid_pointer_activate(hs); +} if (!hid_has_events(hs) (!hs-idle || hs-next_idle_clock - curtime 0)) { return USB_RET_NAK; -- 1.7.1
[Qemu-devel] [PATCH 13/21] libcacard: fix reported ATR length
From: Alon Levy al...@redhat.com Signed-off-by: Alon Levy al...@redhat.com Signed-off-by: Gerd Hoffmann kra...@redhat.com --- libcacard/vcardt.h |4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libcacard/vcardt.h b/libcacard/vcardt.h index 538bdde..d4d8e2e 100644 --- a/libcacard/vcardt.h +++ b/libcacard/vcardt.h @@ -26,8 +26,8 @@ typedef struct VCardEmulStruct VCardEmul; #define MAX_CHANNEL 4 /* create an ATR with appropriate historical bytes */ -#define VCARD_ATR_PREFIX(size) 0x3b, 0x66+(size), 0x00, 0xff, \ - 'V', 'C', 'A', 'R', 'D', '_' +#define VCARD_ATR_PREFIX(size) (0x3b, 0x68+(size), 0x00, 0xff, \ + 'V', 'C', 'A', 'R', 'D', '_') typedef enum { -- 1.7.1
Re: [Qemu-devel] [PATCH v2] Consolidate reads and writes in nbd block device into one common routine
Il 28/02/2012 11:24, Michael Tokarev ha scritto: This removes quite some duplicated code. v2 fixes a bug (uninitialized reply.error) and makes the loop more natural. Signed-off-By: Michael Tokarev m...@tls.msk.ru --- block/nbd.c | 95 +++--- 1 files changed, 31 insertions(+), 64 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index 161b299..a78e212 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -320,91 +320,58 @@ static int nbd_open(BlockDriverState *bs, const char* filename, int flags) return result; } -static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov, - int offset) -{ -BDRVNBDState *s = bs-opaque; -struct nbd_request request; -struct nbd_reply reply; - -request.type = NBD_CMD_READ; -request.from = sector_num * 512; -request.len = nb_sectors * 512; - -nbd_coroutine_start(s, request); -if (nbd_co_send_request(s, request, NULL, 0) == -1) { -reply.error = errno; -} else { -nbd_co_receive_reply(s, request, reply, qiov-iov, offset); -} -nbd_coroutine_end(s, request); -return -reply.error; - -} +/* qemu-nbd has a limit of slightly less than 1M per request. Try to + * remain aligned to 4K. */ +#define NBD_MAX_SECTORS 2040 -static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov, - int offset) +static int nbd_co_rwv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov, int iswrite) Call this nbd_co_rw, and please pass the whole request.type down. { BDRVNBDState *s = bs-opaque; struct nbd_request request; struct nbd_reply reply; +int offset = 0; -request.type = NBD_CMD_WRITE; -if (!bdrv_enable_write_cache(bs) (s-nbdflags NBD_FLAG_SEND_FUA)) { +request.type = iswrite ? NBD_CMD_WRITE : NBD_CMD_READ; +if (iswrite !bdrv_enable_write_cache(bs) (s-nbdflags NBD_FLAG_SEND_FUA)) { request.type |= NBD_CMD_FLAG_FUA; } +reply.error = 0; + +/* we split the request into pieces of at most NBD_MAX_SECTORS size + * and process them in a loop... */ +do { +request.from = sector_num * 512; +request.len = MIN(nb_sectors, NBD_MAX_SECTORS) * 512; + +nbd_coroutine_start(s, request); +if (nbd_co_send_request(s, request, iswrite ? qiov-iov : NULL, 0) == -1) { The last 0 needs to be offset. +reply.error = errno; +} else { +nbd_co_receive_reply(s, request, reply, iswrite ? NULL : qiov-iov, offset); +} +nbd_coroutine_end(s, request); -request.from = sector_num * 512; -request.len = nb_sectors * 512; +offset += NBD_MAX_SECTORS * 512; +sector_num += NBD_MAX_SECTORS; +nb_sectors -= NBD_MAX_SECTORS; + +/* ..till we hit an error or there's nothing more to process */ +} while (reply.error == 0 nb_sectors 0); -nbd_coroutine_start(s, request); -if (nbd_co_send_request(s, request, qiov-iov, offset) == -1) { -reply.error = errno; -} else { -nbd_co_receive_reply(s, request, reply, NULL, 0); -} -nbd_coroutine_end(s, request); return -reply.error; } -/* qemu-nbd has a limit of slightly less than 1M per request. Try to - * remain aligned to 4K. */ -#define NBD_MAX_SECTORS 2040 - static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { -int offset = 0; -int ret; -while (nb_sectors NBD_MAX_SECTORS) { -ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset); -if (ret 0) { -return ret; -} -offset += NBD_MAX_SECTORS * 512; -sector_num += NBD_MAX_SECTORS; -nb_sectors -= NBD_MAX_SECTORS; -} -return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset); +return nbd_co_rwv(bs, sector_num, nb_sectors, qiov, 0); } static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { -int offset = 0; -int ret; -while (nb_sectors NBD_MAX_SECTORS) { -ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset); -if (ret 0) { -return ret; -} -offset += NBD_MAX_SECTORS * 512; -sector_num += NBD_MAX_SECTORS; -nb_sectors -= NBD_MAX_SECTORS; -} -return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset); +return nbd_co_rwv(bs, sector_num, nb_sectors, qiov, 1); } ... but thinking more about it, why don't you leave nbd_co_readv_1/nbd_co_writev_1 alone, and create a
[Qemu-devel] [PATCH 09/21] usb-ehci: sanity-check iso xfers
This patch adds a sanity check to itd processing to make sure the endpoint addressed by the guest is actually an iso endpoint. Also verify that usb drivers don't return USB_RET_ASYNC which is illegal for iso xfers. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb-ehci.c | 16 ++-- 1 files changed, 10 insertions(+), 6 deletions(-) diff --git a/hw/usb-ehci.c b/hw/usb-ehci.c index b9da26a..048eb76 100644 --- a/hw/usb-ehci.c +++ b/hw/usb-ehci.c @@ -1459,12 +1459,16 @@ static int ehci_process_itd(EHCIState *ehci, dev = ehci_find_device(ehci, devaddr); ep = usb_ep_get(dev, pid, endp); -usb_packet_setup(ehci-ipacket, pid, ep); -usb_packet_map(ehci-ipacket, ehci-isgl); - -ret = usb_handle_packet(dev, ehci-ipacket); - -usb_packet_unmap(ehci-ipacket); +if (ep-type == USB_ENDPOINT_XFER_ISOC) { +usb_packet_setup(ehci-ipacket, pid, ep); +usb_packet_map(ehci-ipacket, ehci-isgl); +ret = usb_handle_packet(dev, ehci-ipacket); +assert(ret != USB_RET_ASYNC); +usb_packet_unmap(ehci-ipacket); +} else { +DPRINTF(ISOCH: attempt to addess non-iso endpoint\n); +ret = USB_RET_NAK; +} qemu_sglist_destroy(ehci-isgl); #if 0 -- 1.7.1
[Qemu-devel] [PATCH 12/21] usb-ccid: advertise SELF_POWERED
From: Alon Levy al...@redhat.com Before commit ed5a83ddd8c1d8ec7b1015315530cf29949e7c48 each device provided it's own response to USB_REQ_GET_STATUS, but after it that response was based on bmAttributes, which was errounously set for usb-ccid as 0xa0 and not 0xe0. Signed-off-by: Alon Levy al...@redhat.com Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb-ccid.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/hw/usb-ccid.c b/hw/usb-ccid.c index 0b2ac80..ce01e34 100644 --- a/hw/usb-ccid.c +++ b/hw/usb-ccid.c @@ -447,7 +447,7 @@ static const USBDescDevice desc_device = { { .bNumInterfaces= 1, .bConfigurationValue = 1, -.bmAttributes = 0xa0, +.bmAttributes = 0xe0, .bMaxPower = 50, .nif = 1, .ifs = desc_iface0, -- 1.7.1
Re: [Qemu-devel] linux guests and ksm performance
On 24.02.2012 08:23, Stefan Hajnoczi wrote: On Fri, Feb 24, 2012 at 6:53 AM, Stefan Hajnoczistefa...@gmail.com wrote: On Fri, Feb 24, 2012 at 6:41 AM, Stefan Hajnoczistefa...@gmail.com wrote: On Thu, Feb 23, 2012 at 7:08 PM, peter.lie...@gmail.comp...@dlh.net wrote: Stefan Hajnoczistefa...@gmail.com schrieb: On Thu, Feb 23, 2012 at 3:40 PM, Peter Lievenp...@dlh.net wrote: However, in a virtual machine I have not observed the above slow down to that extend while the benefit of zero after free in a virtualisation environment is obvious: 1) zero pages can easily be merged by ksm or other technique. 2) zero (dup) pages are a lot faster to transfer in case of migration. The other approach is a memory page discard mechanism - which obviously requires more code changes than zeroing freed pages. The advantage is that we don't take the brute-force and CPU intensive approach of zeroing pages. It would be like a fine-grained ballooning feature. I dont think that it is cpu intense. All user pages are zeroed anyway, but at allocation time it shouldnt be a big difference in terms of cpu power. It's easy to find a scenario where eagerly zeroing pages is wasteful. Imagine a process that uses all of physical memory. Once it terminates the system is going to run processes that only use a small set of pages. It's pointless zeroing all those pages if we're not going to use them anymore. Perhaps the middle path is to zero pages but do it after a grace timeout. I wonder if this helps eliminate the 2-3% slowdown you noticed when compiling. Gah, it's too early in the morning. I don't think this timer actually makes sense. do you think it makes then sense to make a patchset/proposal to notice a guest kernel about the presense of ksm in the host and switch to zero after free? peter Stefan
[Qemu-devel] Qemu disaggregation in Xen environment
Hello, In the current model, only one instance of qemu is running for each running HVM domain. We are looking at disaggregating qemu to have, for example, an instance to emulate only network controllers, another to emulate block devices, etc... Multiple instances of qemu would run for a single Xen domain. Each one would handle a subset of the hardware. Has someone already looked at it and potentially already submitted code for qemu ? The purpose of this e-mail is to start a discussion and gather opinions on how the qemu developers community would like to see it implemented. A couple of questions comes to mind: 1) How hard would it be to untangle machine specific (PC hardware) emulation from device specific emulation (PCI devices) ? 2) How can we achieve disaggregation from a configuration point of view. Currently, Xen toolstack starts qemu, and tells qemu which device to emulate using the command line. I've heard about a project for creating machine description configuration files for QEMU which could help greatly in dividing up which hardware to emulate in which instance of qemu. What is the status of this project ? Thank you for your answers,
Re: [Qemu-devel] qemu-kvm-1.0 crashes with threaded vnc server?
On 28.02.2012 09:37, Corentin Chary wrote: On Mon, Feb 13, 2012 at 10:24 AM, Peter Lievenp...@dlh.net wrote: Am 11.02.2012 um 09:55 schrieb Corentin Chary: On Thu, Feb 9, 2012 at 7:08 PM, Peter Lievenp...@dlh.net wrote: Hi, is anyone aware if there are still problems when enabling the threaded vnc server? I saw some VMs crashing when using a qemu-kvm build with --enable-vnc-thread. qemu-kvm-1.0[22646]: segfault at 0 ip 7fec1ca7ea0b sp 7fec19d056d0 error 6 in libz.so.1.2.3.3[7fec1ca75000+16000] qemu-kvm-1.0[26056]: segfault at 7f06d8d6e010 ip 7f06e0a30d71 sp 7f06df035748 error 6 in libc-2.11.1.so[7f06e09aa000+17a000] I had no time to debug further. It seems to happen shortly after migrating, but thats uncertain. At least the segfault in libz seems to give a hint to VNC since I cannot image of any other part of qemu-kvm using libz except for VNC server. Thanks, Peter Hi Peter, I found two patches on my git tree that I sent long ago but somehow get lost on the mailing list. I rebased the tree but did not have the time (yet) to test them. http://git.iksaif.net/?p=qemu.git;a=shortlog;h=refs/heads/wip Feel free to try them. If QEMU segfault again, please send a full gdb backtrace / valgrind trace / way to reproduce :). Thanks, Hi Corentin, thanks for rebasing those patches. I remember that I have seen them the last time I noticed (about 1 year ago) that the threaded VNC is crashing. I'm on vacation this week, but I will test them next week and let you know if I can force a crash with them applied. If not we should consider to include them asap. Hi Peter, any news on that ? sorry, i had much trouble debugging nasty slow windows vm problems last 2 weeks. but its still on my list. i'll keep you all posted. peter
Re: [Qemu-devel] [PATCH v3 1/2] qapi: Introduce blockdev-group-snapshot-sync command
Am 28.02.2012 01:33, schrieb Jeff Cody: This is a QAPI/QMP only command to take a snapshot of a group of devices. This is similar to the blockdev-snapshot-sync command, except blockdev-group-snapshot-sync accepts a list devices, filenames, and formats. It is attempted to keep the snapshot of the group atomic; if the creation or open of any of the new snapshots fails, then all of the new snapshots are abandoned, and the name of the snapshot image that failed is returned. The failure case should not interrupt any operations. Rather than use bdrv_close() along with a subsequent bdrv_open() to perform the pivot, the original image is never closed and the new image is placed 'in front' of the original image via manipulation of the BlockDriverState fields. Thus, once the new snapshot image has been successfully created, there are no more failure points before pivoting to the new snapshot. This allows the group of disks to remain consistent with each other, even across snapshot failures. Signed-off-by: Jeff Cody jc...@redhat.com --- block.c | 80 block.h |1 + block_int.h |6 +++ blockdev.c | 133 ++ qapi-schema.json | 38 +++ 5 files changed, 258 insertions(+), 0 deletions(-) diff --git a/block.c b/block.c index 3621d11..393e8bf 100644 --- a/block.c +++ b/block.c @@ -880,6 +880,86 @@ void bdrv_make_anon(BlockDriverState *bs) bs-device_name[0] = '\0'; } +/* + * Add new bs contents at the top of an image chain while the chain is + * live, while keeping required fields on the top layer. + * + * This will modify the BlockDriverState fields, and swap contents + * between bs_new and bs_top. Both bs_new and bs_top are modified. + * + * This function does not create any image files. + */ +void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) +{ +BlockDriverState tmp; + +/* the new bs must not be in bdrv_states */ +bdrv_make_anon(bs_new); + +tmp = *bs_new; + +/* there are some fields that need to stay on the top layer: */ + +/* dev info */ +tmp.dev_ops = bs_top-dev_ops; +tmp.dev_opaque= bs_top-dev_opaque; +tmp.dev = bs_top-dev; +tmp.buffer_alignment = bs_top-buffer_alignment; +tmp.copy_on_read = bs_top-copy_on_read; + +/* i/o timing parameters */ +tmp.slice_time= bs_top-slice_time; +tmp.slice_start = bs_top-slice_start; +tmp.slice_end = bs_top-slice_end; +tmp.io_limits = bs_top-io_limits; +tmp.io_base = bs_top-io_base; +tmp.throttled_reqs= bs_top-throttled_reqs; +tmp.block_timer = bs_top-block_timer; +tmp.io_limits_enabled = bs_top-io_limits_enabled; + +/* geometry */ +tmp.cyls = bs_top-cyls; +tmp.heads = bs_top-heads; +tmp.secs = bs_top-secs; +tmp.translation = bs_top-translation; + +/* r/w error */ +tmp.on_read_error = bs_top-on_read_error; +tmp.on_write_error= bs_top-on_write_error; + +/* i/o status */ +tmp.iostatus_enabled = bs_top-iostatus_enabled; +tmp.iostatus = bs_top-iostatus; + +/* keep the same entry in bdrv_states */ +pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top-device_name); +tmp.list = bs_top-list; + +/* The contents of 'tmp' will become bs_top, as we are + * swapping bs_new and bs_top contents. */ +tmp.backing_hd = bs_new; tmp.backing_file should be set as well (copy from bs_top-filename?) + +/* swap contents of the fixed new bs and the current top */ +*bs_new = *bs_top; +*bs_top = tmp; + +/* clear the copied fields in the new backing file */ +bdrv_detach_dev(bs_new, bs_new-dev); + +qemu_co_queue_init(bs_new-throttled_reqs); +memset(bs_new-io_base, 0, sizeof(bs_new-io_base)); +memset(bs_new-io_limits, 0, sizeof(bs_new-io_limits)); +bdrv_iostatus_disable(bs_new); + +/* we don't use bdrv_io_limits_disable() for this, because we don't want + * to affect or delete the block_timer, as it has been moved to bs_top */ +bs_new-io_limits_enabled = false; +bs_new-block_timer = NULL; +bs_new-slice_time= 0; +bs_new-slice_start = 0; +bs_new-slice_end = 0; +} + void bdrv_delete(BlockDriverState *bs) { assert(!bs-dev); diff --git a/block.h b/block.h index cae289b..190a780 100644 --- a/block.h +++ b/block.h @@ -114,6 +114,7 @@ int bdrv_create(BlockDriver *drv, const char* filename, int bdrv_create_file(const char* filename, QEMUOptionParameter *options); BlockDriverState *bdrv_new(const char *device_name); void bdrv_make_anon(BlockDriverState *bs); +void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
[Qemu-devel] [PATCH 17/21] usb-redir: Always clear device state on filter reject
From: Hans de Goede hdego...@redhat.com Always call usbredir_device_disconnect() when usbredir_check_filter() fails to clean up all the device state (ie received endpoint info). Signed-off-by: Hans de Goede hdego...@redhat.com Signed-off-by: Gerd Hoffmann kra...@redhat.com --- usb-redir.c | 11 +++ 1 files changed, 7 insertions(+), 4 deletions(-) diff --git a/usb-redir.c b/usb-redir.c index 18204be..c98b14e 100644 --- a/usb-redir.c +++ b/usb-redir.c @@ -958,7 +958,7 @@ static int usbredir_check_filter(USBRedirDevice *dev) { if (dev-interface_info.interface_count == 0) { ERROR(No interface info for device\n); -return -1; +goto error; } if (dev-filter_rules) { @@ -966,7 +966,7 @@ static int usbredir_check_filter(USBRedirDevice *dev) usb_redir_cap_connect_device_version)) { ERROR(Device filter specified and peer does not have the connect_device_version capability\n); -return -1; +goto error; } if (usbredirfilter_check( @@ -983,11 +983,15 @@ static int usbredir_check_filter(USBRedirDevice *dev) dev-device_info.product_id, dev-device_info.device_version_bcd, 0) != 0) { -return -1; +goto error; } } return 0; + +error: +usbredir_device_disconnect(dev); +return -1; } /* @@ -1113,7 +1117,6 @@ static void usbredir_interface_info(void *priv, if (usbredir_check_filter(dev)) { ERROR(Device no longer matches filter after interface info change, disconnecting!\n); -usbredir_device_disconnect(dev); } } } -- 1.7.1
[Qemu-devel] [PATCH V2 0/2] QOM: small object creation fix
Eliminate impossibility of creating objects of types with @instance_size == 0. v1-v2: type's instance size now initialized during type initialization. type_class_init() renamed (in additional patch) Igor Mitsyanko (2): qom: if @instance_size==0, assign size of object to parent object size qom/object.c: rename type_class_init() to type_initialize() qom/object.c | 27 +-- 1 files changed, 21 insertions(+), 6 deletions(-) -- 1.7.4.1
[Qemu-devel] [PATCH V2 1/2] qom: if @instance_size==0, assign size of object to parent object size
QOM documentation states that for objects of type with @instance_size == 0 size will be assigned to match parent object's size. But currently this feauture is not implemented and qemu asserts during creation of object with zero instance_size. Set appropriate value for type instance_size during type_class_init() call. object_initialize_with_type() must call type_class_init() before asserting type-instance_size, and object_new_with_type() must call type_class_init() before object allocation. Signed-off-by: Igor Mitsyanko i.mitsya...@samsung.com --- qom/object.c | 19 +-- 1 files changed, 17 insertions(+), 2 deletions(-) diff --git a/qom/object.c b/qom/object.c index aa037d2..6d31bc3 100644 --- a/qom/object.c +++ b/qom/object.c @@ -177,6 +177,19 @@ static size_t type_class_get_size(TypeImpl *ti) return sizeof(ObjectClass); } +static size_t type_object_get_size(TypeImpl *ti) +{ +if (ti-instance_size) { +return ti-instance_size; +} + +if (type_has_parent(ti)) { +return type_object_get_size(type_get_parent(ti)); +} + +return 0; +} + static void type_class_interface_init(TypeImpl *ti, InterfaceImpl *iface) { TypeInfo info = { @@ -203,6 +216,7 @@ static void type_class_init(TypeImpl *ti) } ti-class_size = type_class_get_size(ti); +ti-instance_size = type_object_get_size(ti); ti-class = g_malloc0(ti-class_size); ti-class-type = ti; @@ -264,9 +278,9 @@ void object_initialize_with_type(void *data, TypeImpl *type) Object *obj = data; g_assert(type != NULL); -g_assert(type-instance_size = sizeof(Object)); - type_class_init(type); + +g_assert(type-instance_size = sizeof(Object)); g_assert(type-abstract == false); memset(obj, 0, type-instance_size); @@ -356,6 +370,7 @@ Object *object_new_with_type(Type type) Object *obj; g_assert(type != NULL); +type_class_init(type); obj = g_malloc(type-instance_size); object_initialize_with_type(obj, type); -- 1.7.4.1
[Qemu-devel] [PATCH V2 2/2] qom/object.c: rename type_class_init() to type_initialize()
Function name type_class_init() gave us a wrong impression of separation of type's class and object entities initialization. Name type_initialize() is more appropriate for type_class_init() function (considering what operations it performs). Signed-off-by: Igor Mitsyanko i.mitsya...@samsung.com --- qom/object.c | 12 ++-- 1 files changed, 6 insertions(+), 6 deletions(-) diff --git a/qom/object.c b/qom/object.c index 6d31bc3..9acc624 100644 --- a/qom/object.c +++ b/qom/object.c @@ -206,7 +206,7 @@ static void type_class_interface_init(TypeImpl *ti, InterfaceImpl *iface) g_free(name); } -static void type_class_init(TypeImpl *ti) +static void type_initialize(TypeImpl *ti) { size_t class_size = sizeof(ObjectClass); int i; @@ -224,7 +224,7 @@ static void type_class_init(TypeImpl *ti) if (type_has_parent(ti)) { TypeImpl *parent = type_get_parent(ti); -type_class_init(parent); +type_initialize(parent); class_size = parent-class_size; g_assert(parent-class_size = ti-class_size); @@ -278,7 +278,7 @@ void object_initialize_with_type(void *data, TypeImpl *type) Object *obj = data; g_assert(type != NULL); -type_class_init(type); +type_initialize(type); g_assert(type-instance_size = sizeof(Object)); g_assert(type-abstract == false); @@ -370,7 +370,7 @@ Object *object_new_with_type(Type type) Object *obj; g_assert(type != NULL); -type_class_init(type); +type_initialize(type); obj = g_malloc(type-instance_size); object_initialize_with_type(obj, type); @@ -543,7 +543,7 @@ ObjectClass *object_class_by_name(const char *typename) return NULL; } -type_class_init(type); +type_initialize(type); return type-class; } @@ -563,7 +563,7 @@ static void object_class_foreach_tramp(gpointer key, gpointer value, TypeImpl *type = value; ObjectClass *k; -type_class_init(type); +type_initialize(type); k = type-class; if (!data-include_abstract type-abstract) { -- 1.7.4.1
Re: [Qemu-devel] [PATCH V2 0/2] QOM: small object creation fix
Il 28/02/2012 12:57, Igor Mitsyanko ha scritto: Eliminate impossibility of creating objects of types with @instance_size == 0. v1-v2: type's instance size now initialized during type initialization. type_class_init() renamed (in additional patch) Igor Mitsyanko (2): qom: if @instance_size==0, assign size of object to parent object size qom/object.c: rename type_class_init() to type_initialize() qom/object.c | 27 +-- 1 files changed, 21 insertions(+), 6 deletions(-) Reviewed-by: Paolo Bonzini pbonz...@redhat.com Paolo
[Qemu-devel] [PATCH 07/21] usb-xhci: enable packet queuing
qemu usb core has packet queues now, so flip lets the switch. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb-xhci.c |6 -- 1 files changed, 0 insertions(+), 6 deletions(-) diff --git a/hw/usb-xhci.c b/hw/usb-xhci.c index 008b0b5..fc5b542 100644 --- a/hw/usb-xhci.c +++ b/hw/usb-xhci.c @@ -1769,12 +1769,6 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, unsigned int epid epctx-retry = xfer; break; } - -/* - * Qemu usb can't handle multiple in-flight xfers. - * Stop here for now. - */ -break; } } -- 1.7.1
[Qemu-devel] [PATCH 16/21] usb-redir: Fix printing of device version
From: Hans de Goede hdego...@redhat.com The device version is in bcd format, which requires some special handling to print. Signed-off-by: Hans de Goede hdego...@redhat.com Signed-off-by: Gerd Hoffmann kra...@redhat.com --- usb-redir.c |6 -- 1 files changed, 4 insertions(+), 2 deletions(-) diff --git a/usb-redir.c b/usb-redir.c index a59b347..18204be 100644 --- a/usb-redir.c +++ b/usb-redir.c @@ -1049,8 +1049,10 @@ static void usbredir_device_connect(void *priv, usb_redir_cap_connect_device_version)) { INFO(attaching %s device %04x:%04x version %d.%d class %02x\n, speed, device_connect-vendor_id, device_connect-product_id, - device_connect-device_version_bcd 8, - device_connect-device_version_bcd 0xff, + ((device_connect-device_version_bcd 0xf000) 12) * 10 + + ((device_connect-device_version_bcd 0x0f00) 8), + ((device_connect-device_version_bcd 0x00f0) 4) * 10 + + ((device_connect-device_version_bcd 0x000f) 0), device_connect-device_class); } else { INFO(attaching %s device %04x:%04x class %02x\n, speed, -- 1.7.1
Re: [Qemu-devel] [PATCH v2] qcow2: Reject too large header extensions
On Tue, Feb 28, 2012 at 10:26 AM, Kevin Wolf kw...@redhat.com wrote: Image files that make qemu-img info read several gigabytes into the unknown header extensions list are bad. Just fail opening the image if an extension claims to be larger than the header extension area. Signed-off-by: Kevin Wolf kw...@redhat.com --- block/qcow2.c | 5 + 1 files changed, 5 insertions(+), 0 deletions(-) Reviewed-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
Re: [Qemu-devel] [PATCH v6 2/4] cadence_ttc: initial version of device model
Thinking about that, I realised why I don't like the following line: +s-reg_value = (uint32_t)((x + interval) % interval); This assumes x -interval, which is not always true. This would mean you have wrapped twice or more in one time step, which I am assuming is a fatal error condition, as It means your software has missed interrupts and all sort of race conditions would occur. I would personally prefer to assert !(x -interval) and have qemu hw_error or something, as in these cases QEMU can just not handle your super quick timer wrap around. No, not fatal at all. On a heavily loaded host it's normal for qemu[1] to stall for tens of miliseconds at a time. In extreme cases several seconds at a time, though we've fixed most of those. This is greater than the interval of many guest periodic events. A linux guest with HZ=1000 (even HZ=100) will routinely loose timer ticks. By my reading your timers have a configurable limit, so the obvious configuration is a limit of 25000 (2.5MHz / 100), and trigger the jiffy tick off the timer wrap/reload. If your guest OS assumes their ISR will complete before timer triggers again then it will break. That's their problem. Any guest that relies on consistent realtime performance/behavior is going to malfunction when run inside qemu. This is only one of several ways that qemu behavior can differ from that of real hardware. You can workaround some of these issues with -icount, though that has its own set of problems. One of which is that is doesn't support KVM or SMP[2]. guests. Paul [1] The same applies for KVM. [2] SMP may be fixable. KVM probably not.
Re: [Qemu-devel] linux guests and ksm performance
On 28.02.2012 13:05, Stefan Hajnoczi wrote: On Tue, Feb 28, 2012 at 11:46 AM, Peter Lievenp...@dlh.net wrote: On 24.02.2012 08:23, Stefan Hajnoczi wrote: On Fri, Feb 24, 2012 at 6:53 AM, Stefan Hajnoczistefa...@gmail.com wrote: On Fri, Feb 24, 2012 at 6:41 AM, Stefan Hajnoczistefa...@gmail.com wrote: On Thu, Feb 23, 2012 at 7:08 PM, peter.lie...@gmail.comp...@dlh.net wrote: Stefan Hajnoczistefa...@gmail.comschrieb: On Thu, Feb 23, 2012 at 3:40 PM, Peter Lievenp...@dlh.netwrote: However, in a virtual machine I have not observed the above slow down to that extend while the benefit of zero after free in a virtualisation environment is obvious: 1) zero pages can easily be merged by ksm or other technique. 2) zero (dup) pages are a lot faster to transfer in case of migration. The other approach is a memory page discard mechanism - which obviously requires more code changes than zeroing freed pages. The advantage is that we don't take the brute-force and CPU intensive approach of zeroing pages. It would be like a fine-grained ballooning feature. I dont think that it is cpu intense. All user pages are zeroed anyway, but at allocation time it shouldnt be a big difference in terms of cpu power. It's easy to find a scenario where eagerly zeroing pages is wasteful. Imagine a process that uses all of physical memory. Once it terminates the system is going to run processes that only use a small set of pages. It's pointless zeroing all those pages if we're not going to use them anymore. Perhaps the middle path is to zero pages but do it after a grace timeout. I wonder if this helps eliminate the 2-3% slowdown you noticed when compiling. Gah, it's too early in the morning. I don't think this timer actually makes sense. do you think it makes then sense to make a patchset/proposal to notice a guest kernel about the presense of ksm in the host and switch to zero after free? I think your idea is interesting - whether or not people are happy with it will depend on the performance impact. It seems reasonable to me. could you support/help me in implementing and publishing this approach? Peter
[Qemu-devel] [PULL] Memory core space reduction
This is the current memory queue (posted as two separate series before my vacation). When applied, the overhead of 16 bytes/page is reduced to basically nil. Avi Kivity (30): ioport: change portio_list not to use memory_region_set_offset() memory: remove memory_region_set_offset() memory: add shorthand for invoking a callback on all listeners memory: switch memory listeners to a QTAILQ memory: code motion: move MEMORY_LISTENER_CALL() memory: move ioeventfd ops to MemoryListener memory: add a readonly attribute to MemoryRegionSection memory: don't pass -readable attribute to cpu_register_physical_memory_log memory: use a MemoryListener for core memory map updates too memory: drop AddressSpaceOps memory: allow MemoryListeners to observe a specific address space xen: ignore I/O memory regions memory: split memory listener for the two address spaces memory: support stateless memory listeners memory: change memory registration to rebuild the memory map on each change memory: remove first level of l1_phys_map memory: unify phys_map last level with intermediate levels memory: store MemoryRegionSection pointers in phys_map memory: compress phys_map node pointers to 16 bits memory: fix RAM subpages in newly initialized pages memory: unify the two branches of cpu_register_physical_memory_log() memory: move tlb flush to MemoryListener commit callback memory: make phys_page_find() return a MemoryRegionSection memory: give phys_page_find() its own tree search loop memory: simplify multipage/subpage registration memory: replace phys_page_find_alloc() with phys_page_set() memory: switch phys_page_set() to a recursive implementation memory: change phys_page_set() to set multiple pages memory: unify PhysPageEntry::node and ::leaf memory: allow phys_map tree paths to terminate early exec-obsolete.h |5 +- exec.c | 875 --- hw/vhost.c | 33 ++- ioport.c| 25 ++- ioport.h|1 + kvm-all.c | 97 ++- memory.c| 328 +- memory.h| 26 +- xen-all.c | 33 ++- 9 files changed, 910 insertions(+), 513 deletions(-) -- error compiling committee.c: too many arguments to function
[Qemu-devel] [PULL] Memory core space reduction
[repost with pull info, brain not yet back up to speed] This is the current memory queue (posted as two separate series before my vacation). When applied, the overhead of 16 bytes/page is reduced to basically nil. Please pull from: git://git.kernel.org/pub/scm/virt/kvm/qemu-kvm.git memory/core Avi Kivity (30): ioport: change portio_list not to use memory_region_set_offset() memory: remove memory_region_set_offset() memory: add shorthand for invoking a callback on all listeners memory: switch memory listeners to a QTAILQ memory: code motion: move MEMORY_LISTENER_CALL() memory: move ioeventfd ops to MemoryListener memory: add a readonly attribute to MemoryRegionSection memory: don't pass -readable attribute to cpu_register_physical_memory_log memory: use a MemoryListener for core memory map updates too memory: drop AddressSpaceOps memory: allow MemoryListeners to observe a specific address space xen: ignore I/O memory regions memory: split memory listener for the two address spaces memory: support stateless memory listeners memory: change memory registration to rebuild the memory map on each change memory: remove first level of l1_phys_map memory: unify phys_map last level with intermediate levels memory: store MemoryRegionSection pointers in phys_map memory: compress phys_map node pointers to 16 bits memory: fix RAM subpages in newly initialized pages memory: unify the two branches of cpu_register_physical_memory_log() memory: move tlb flush to MemoryListener commit callback memory: make phys_page_find() return a MemoryRegionSection memory: give phys_page_find() its own tree search loop memory: simplify multipage/subpage registration memory: replace phys_page_find_alloc() with phys_page_set() memory: switch phys_page_set() to a recursive implementation memory: change phys_page_set() to set multiple pages memory: unify PhysPageEntry::node and ::leaf memory: allow phys_map tree paths to terminate early exec-obsolete.h |5 +- exec.c | 875 --- hw/vhost.c | 33 ++- ioport.c| 25 ++- ioport.h|1 + kvm-all.c | 97 ++- memory.c| 328 +- memory.h| 26 +- xen-all.c | 33 ++- 9 files changed, 910 insertions(+), 513 deletions(-) -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [PULL] Memory core space reduction
On 02/28/2012 02:25 PM, Avi Kivity wrote: [repost with pull info, brain not yet back up to speed] This is the current memory queue (posted as two separate series before my vacation). When applied, the overhead of 16 bytes/page is reduced to basically nil. Please pull from: git://git.kernel.org/pub/scm/virt/kvm/qemu-kvm.git memory/core Michael, this may fix your issues with 64-bit BARs. -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [PATCH 6/6] kvm: Fix dirty tracking with large kernel page size
On 02/24/2012 02:23 AM, David Gibson wrote: From: Benjamin Herrenschmidt b...@kernel.crashing.org If the kernel page size is larger than TARGET_PAGE_SIZE, which happens for example on ppc64 with kernels compiled for 64K pages, the dirty tracking doesn't work. Cc: Avi Kivity a...@redhat.com Cc: Marcelo Tossatti mtossa...@redhat.com Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org Signed-off-by: David Gibson da...@gibson.dropbear.id.au --- kvm-all.c |7 --- 1 files changed, 4 insertions(+), 3 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index 5e188bf..3f8cfd9 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -348,10 +348,11 @@ static int kvm_set_migration_log(int enable) static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section, unsigned long *bitmap) { -unsigned int i, j; + unsigned int i, j; unsigned long page_number, c; target_phys_addr_t addr, addr1; unsigned int len = ((section-size / TARGET_PAGE_SIZE) + HOST_LONG_BITS - 1) / HOST_LONG_BITS; +unsigned long hpratio = getpagesize() / TARGET_PAGE_SIZE; What if TARGET_PAGE_SIZE getpagesize()? Or is that impossible? -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [PATCH v2] Consolidate reads and writes in nbd block device into one common routine
On 28.02.2012 15:35, Paolo Bonzini wrote: Il 28/02/2012 11:24, Michael Tokarev ha scritto: This removes quite some duplicated code. [] +static int nbd_co_rwv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov, int iswrite) Call this nbd_co_rw, and please pass the whole request.type down. Originally it is readV and writeV, so why it should not be rwV ? By passing whole request.type (NBD_CMD_WRITE or NBD_CMD_WRITE|NBD_CMD_FLAG_FUA or NBD_CMD_READ) the condition (iswrite currently) will be larger (request.type != NBD_CMD_READ). Also, if someday we'll have additional flag for READ as we already do for write, whole thing will be even more difficult to read. { BDRVNBDState *s = bs-opaque; struct nbd_request request; struct nbd_reply reply; +int offset = 0; -request.type = NBD_CMD_WRITE; -if (!bdrv_enable_write_cache(bs) (s-nbdflags NBD_FLAG_SEND_FUA)) { +request.type = iswrite ? NBD_CMD_WRITE : NBD_CMD_READ; +if (iswrite !bdrv_enable_write_cache(bs) (s-nbdflags NBD_FLAG_SEND_FUA)) { request.type |= NBD_CMD_FLAG_FUA; } +reply.error = 0; + +/* we split the request into pieces of at most NBD_MAX_SECTORS size + * and process them in a loop... */ +do { +request.from = sector_num * 512; +request.len = MIN(nb_sectors, NBD_MAX_SECTORS) * 512; + +nbd_coroutine_start(s, request); +if (nbd_co_send_request(s, request, iswrite ? qiov-iov : NULL, 0) == -1) { The last 0 needs to be offset. Indeed, this is a bug. I think I tested it with large requests but it looks like only for reads. [] ... but thinking more about it, why don't you leave nbd_co_readv_1/nbd_co_writev_1 alone, and create a nbd_split_rw function that takes a function pointer? Because each of these nbd_co_*_1 does the same thing, the diff. is only quiv-iov vs NULL. While reading the original code it was the first thing I did - consolidated nbd_co_*_1 into nbd_co_* ;) Actually it might be a good idea to have single bdrv-bdrv_co_readwritev method instead of two -- the path of each read and write jumps between specific read-or-write routine and common readwrite routine several times. I see only one correction which needs (really!) to be done - that's fixing the bug with offset. Do you still not agree? Thanks, /mjt
[Qemu-devel] [Bug 942299] Re: Regression in booting HelenOS/ppc under Qemu
Just for the reference, commit 41557447d30eeb944e42069513df13585f5e6c7f Author: Alexander Graf ag...@suse.de Date: Fri Sep 10 15:08:34 2010 + PPC: Redesign interrupt trigger path According to the Book3S spec, the interrupt context starts with an MSR value that is rather simple. If we leave out the HV case, it's almost always 0. To reflect this, let's redesign the way that MSR value gets calculated. Using this, we also squash the bug where MSR_POW can slip through into the interrupt handler MSR. Reported-by: Thomas Monjalon thomas.monja...@openwide.fr Signed-off-by: Alexander Graf ag...@suse.de Signed-off-by: Edgar E. Iglesias edgar.igles...@gmail.com -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/942299 Title: Regression in booting HelenOS/ppc under Qemu Status in Home for various HelenOS development branches: New Status in QEMU: New Bug description: Mark Cave-Ayland identified Qemu commit 41557447d30eeb944e42069513df13585f5e6c7f as the first bad commit which prevents HelenOS/ppc from booting with OpenBIOS taken from Qemu 0.11.1. Note that we deliberately use the OpenBIOS from Qemu 0.11.1 because there is another suspected, unrelated bug in newer versions of OpenBIOS. For reproducing purposes, the following HelenOS image can be used (latest HelenOS exhibits the same behavior): http://www.helenos.org/releases/HelenOS-0.4.2-ppc32.iso Commit 41557447d30eeb944e42069513df13585f5e6c7f will result in HelenOS not making any further progress after it prints init: Spawning /srv/devfs. The previous commit, f844c817d726cd2bdb431aa41c8217891ede2eaf, works fine - HelenOS will spawn a couple of shell instances and will be accepting user input. To manage notifications about this bug go to: https://bugs.launchpad.net/helenos/+bug/942299/+subscriptions
Re: [Qemu-devel] [PATCH v2] Consolidate reads and writes in nbd block device into one common routine
Il 28/02/2012 13:35, Michael Tokarev ha scritto: On 28.02.2012 15:35, Paolo Bonzini wrote: Il 28/02/2012 11:24, Michael Tokarev ha scritto: This removes quite some duplicated code. [] +static int nbd_co_rwv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov, int iswrite) Call this nbd_co_rw, and please pass the whole request.type down. Originally it is readV and writeV, so why it should not be rwV ? It's more consistent with block.c. By passing whole request.type (NBD_CMD_WRITE or NBD_CMD_WRITE|NBD_CMD_FLAG_FUA or NBD_CMD_READ) the condition (iswrite currently) will be larger (request.type != NBD_CMD_READ). Also, if someday we'll have additional flag for READ as we already do for write, whole thing will be even more difficult to read. Sure, but why should a generic function deal with NBD_CMD_FLAG_FUA? { BDRVNBDState *s = bs-opaque; struct nbd_request request; struct nbd_reply reply; +int offset = 0; -request.type = NBD_CMD_WRITE; -if (!bdrv_enable_write_cache(bs) (s-nbdflags NBD_FLAG_SEND_FUA)) { +request.type = iswrite ? NBD_CMD_WRITE : NBD_CMD_READ; +if (iswrite !bdrv_enable_write_cache(bs) (s-nbdflags NBD_FLAG_SEND_FUA)) { request.type |= NBD_CMD_FLAG_FUA; } +reply.error = 0; + +/* we split the request into pieces of at most NBD_MAX_SECTORS size + * and process them in a loop... */ +do { +request.from = sector_num * 512; +request.len = MIN(nb_sectors, NBD_MAX_SECTORS) * 512; + +nbd_coroutine_start(s, request); +if (nbd_co_send_request(s, request, iswrite ? qiov-iov : NULL, 0) == -1) { The last 0 needs to be offset. Indeed, this is a bug. I think I tested it with large requests but it looks like only for reads. ... but thinking more about it, why don't you leave nbd_co_readv_1/nbd_co_writev_1 alone, and create a nbd_split_rw function that takes a function pointer? Because each of these nbd_co_*_1 does the same thing, the diff. is only quiv-iov vs NULL. While reading the original code it was the first thing I did - consolidated nbd_co_*_1 into nbd_co_* ;) And offset. I needed to check that non-0 offsets are fine when the iov is NULL; it's not obvious. Actually it might be a good idea to have single bdrv-bdrv_co_readwritev method instead of two -- the path of each read and write jumps between specific read-or-write routine and common readwrite routine several times. Small amounts of duplicate code can be better than functions with many ifs or complicated conditions. I see only one correction which needs (really!) to be done - that's fixing the bug with offset. Do you still not agree? I still disagree. :) I will accept the patch with the function pointer though. Paolo
Re: [Qemu-devel] linux guests and ksm performance
On 02/23/2012 06:42 PM, Stefan Hajnoczi wrote: On Thu, Feb 23, 2012 at 3:40 PM, Peter Lieven p...@dlh.net wrote: However, in a virtual machine I have not observed the above slow down to that extend while the benefit of zero after free in a virtualisation environment is obvious: 1) zero pages can easily be merged by ksm or other technique. 2) zero (dup) pages are a lot faster to transfer in case of migration. The other approach is a memory page discard mechanism - which obviously requires more code changes than zeroing freed pages. The advantage is that we don't take the brute-force and CPU intensive approach of zeroing pages. It would be like a fine-grained ballooning feature. I hope someone will follow up saying this has already been done or prototyped :). It already exists - that's the balloon code. Right now it's host driven, but maybe we can modify it to allow the guest to initiate balloon inflations. -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] linux guests and ksm performance
On 02/24/2012 08:41 AM, Stefan Hajnoczi wrote: I dont think that it is cpu intense. All user pages are zeroed anyway, but at allocation time it shouldnt be a big difference in terms of cpu power. It's easy to find a scenario where eagerly zeroing pages is wasteful. Imagine a process that uses all of physical memory. Once it terminates the system is going to run processes that only use a small set of pages. It's pointless zeroing all those pages if we're not going to use them anymore. In the long term, we will use them, except if the guest is completely idle. The scenario in which zeroing is expensive is when the page is refilled through DMA. In that case the zeroing was wasted. This is a pretty common scenario in pagecache intensive workloads. -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [PATCH v2] Consolidate reads and writes in nbd block device into one common routine
On 28.02.2012 17:03, Paolo Bonzini wrote: Il 28/02/2012 13:35, Michael Tokarev ha scritto: On 28.02.2012 15:35, Paolo Bonzini wrote: Il 28/02/2012 11:24, Michael Tokarev ha scritto: This removes quite some duplicated code. [] +static int nbd_co_rwv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov, int iswrite) Call this nbd_co_rw, and please pass the whole request.type down. Originally it is readV and writeV, so why it should not be rwV ? It's more consistent with block.c. By passing whole request.type (NBD_CMD_WRITE or NBD_CMD_WRITE|NBD_CMD_FLAG_FUA or NBD_CMD_READ) the condition (iswrite currently) will be larger (request.type != NBD_CMD_READ). Also, if someday we'll have additional flag for READ as we already do for write, whole thing will be even more difficult to read. Sure, but why should a generic function deal with NBD_CMD_FLAG_FUA? I can pass both iswrite and request.type here - to avoid possible complications in the area of adding more nbd-specific meanings/flags to request.type. But that becomes more ugly. [] ... but thinking more about it, why don't you leave nbd_co_readv_1/nbd_co_writev_1 alone, and create a nbd_split_rw function that takes a function pointer? Because each of these nbd_co_*_1 does the same thing, the diff. is only quiv-iov vs NULL. While reading the original code it was the first thing I did - consolidated nbd_co_*_1 into nbd_co_* ;) And offset. I needed to check that non-0 offsets are fine when the iov is NULL; it's not obvious. It isn't indeed. Both send_request and recv_reply only checks iov and ignores offset if iov is NULL. Actually it might be a good idea to have single bdrv-bdrv_co_readwritev method instead of two -- the path of each read and write jumps between specific read-or-write routine and common readwrite routine several times. Small amounts of duplicate code can be better than functions with many ifs or complicated conditions. Sure thing. But when the code path is so twisted - common-specific- common- specific - it makes very difficult to get the bigger picture. I see only one correction which needs (really!) to be done - that's fixing the bug with offset. Do you still not agree? I still disagree. :) I will accept the patch with the function pointer though. With separate nbd_co_*_1 it isn't worth the effort. When it all is in a _small_ single routine (the resulting function is actually very small), whole logic is immediately visible. In particular, the FUA bit which is set for every _part_ of write request - it wasn't visible till I integrated nbd_co_writev_1 into nbd_co_writev. Thanks, /mjt
Re: [Qemu-devel] linux guests and ksm performance
On 28.02.2012 14:16, Avi Kivity wrote: On 02/24/2012 08:41 AM, Stefan Hajnoczi wrote: I dont think that it is cpu intense. All user pages are zeroed anyway, but at allocation time it shouldnt be a big difference in terms of cpu power. It's easy to find a scenario where eagerly zeroing pages is wasteful. Imagine a process that uses all of physical memory. Once it terminates the system is going to run processes that only use a small set of pages. It's pointless zeroing all those pages if we're not going to use them anymore. In the long term, we will use them, except if the guest is completely idle. The scenario in which zeroing is expensive is when the page is refilled through DMA. In that case the zeroing was wasted. This is a pretty common scenario in pagecache intensive workloads. Avi, what do you think of the proposal to give the guest vm a hint that the host is running ksm? In that case the administrator has already chosen that saving physical memory is more important than performance to him? Peter
[Qemu-devel] [PATCH] We should check virtio_load return code
Otherwise we crash on error. Signed-off-by: Orit Wasserman owass...@redhat.com Signed-off-by: Ulrich Obergfell uober...@redhat.com --- hw/virtio-balloon.c|6 +- hw/virtio-blk.c|7 ++- hw/virtio-net.c|6 +- hw/virtio-scsi.c |7 ++- hw/virtio-serial-bus.c |6 +- 5 files changed, 27 insertions(+), 5 deletions(-) diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c index ce9d2c9..075ed87 100644 --- a/hw/virtio-balloon.c +++ b/hw/virtio-balloon.c @@ -211,11 +211,15 @@ static void virtio_balloon_save(QEMUFile *f, void *opaque) static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id) { VirtIOBalloon *s = opaque; +int ret; if (version_id != 1) return -EINVAL; -virtio_load(s-vdev, f); +ret = virtio_load(s-vdev, f); +if (ret) { +return ret; +} s-num_pages = qemu_get_be32(f); s-actual = qemu_get_be32(f); diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index 49990f8..d4bb400 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -537,11 +537,16 @@ static void virtio_blk_save(QEMUFile *f, void *opaque) static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id) { VirtIOBlock *s = opaque; +int ret; if (version_id != 2) return -EINVAL; -virtio_load(s-vdev, f); +ret = virtio_load(s-vdev, f); +if (ret) { +return ret; +} + while (qemu_get_sbyte(f)) { VirtIOBlockReq *req = virtio_blk_alloc_request(s); qemu_get_buffer(f, (unsigned char*)req-elem, sizeof(req-elem)); diff --git a/hw/virtio-net.c b/hw/virtio-net.c index bc5e3a8..3f190d4 100644 --- a/hw/virtio-net.c +++ b/hw/virtio-net.c @@ -891,11 +891,15 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id) { VirtIONet *n = opaque; int i; +int ret; if (version_id 2 || version_id VIRTIO_NET_VM_VERSION) return -EINVAL; -virtio_load(n-vdev, f); +ret = virtio_load(n-vdev, f); +if (ret) { +return ret; +} qemu_get_buffer(f, n-mac, ETH_ALEN); n-tx_waiting = qemu_get_be32(f); diff --git a/hw/virtio-scsi.c b/hw/virtio-scsi.c index e607edc..9797847 100644 --- a/hw/virtio-scsi.c +++ b/hw/virtio-scsi.c @@ -558,7 +558,12 @@ static void virtio_scsi_save(QEMUFile *f, void *opaque) static int virtio_scsi_load(QEMUFile *f, void *opaque, int version_id) { VirtIOSCSI *s = opaque; -virtio_load(s-vdev, f); +int ret; + +ret = virtio_load(s-vdev, f); +if (ret) { +return ret; +} return 0; } diff --git a/hw/virtio-serial-bus.c b/hw/virtio-serial-bus.c index e22940e..4a33872 100644 --- a/hw/virtio-serial-bus.c +++ b/hw/virtio-serial-bus.c @@ -590,13 +590,17 @@ static int virtio_serial_load(QEMUFile *f, void *opaque, int version_id) VirtIOSerialPort *port; uint32_t max_nr_ports, nr_active_ports, ports_map; unsigned int i; +int ret; if (version_id 3) { return -EINVAL; } /* The virtio device */ -virtio_load(s-vdev, f); +ret = virtio_load(s-vdev, f); +if (ret) { +return ret; +} if (version_id 2) { return 0; -- 1.7.6.5
Re: [Qemu-devel] qemu assertion failed with usb on current git master!
On February 27, 2012 at 5:53 PM Erik Rull erik.r...@rdsoftware.de wrote: Gerd Hoffmann wrote: Hi, I'm really sorry, but I don't understand what's happening - I copied the qemu executable on my target system before executing it, but gdb complains that the core file does not match the executable! But except the file paths they are identical. warning: core file may not match specified executable file. Core was generated by `/disc/qemu-system-x86_64 -machine kernel_irqchip=on -serial /dev/ttyS2 -usb -de'. Program terminated with signal 6, Aborted. #0 0xe424 in __kernel_vsyscall () Strange. The backtrace is bogus too. I don't know how to proceed here. Lets try plan b: add a printf right before the assert: --- a/hw/usb.c +++ b/hw/usb.c @@ -356,6 +356,7 @@ void usb_packet_complete(USBDevice *dev, USBPacket *p) while (!QTAILQ_EMPTY(ep-queue)) { p = QTAILQ_FIRST(ep-queue); +fprintf(stderr, %s: packet %p\n, __func__, p); assert(p-state == USB_PACKET_QUEUED); ret = usb_process_one(p); if (ret == USB_RET_ASYNC) { Don't you run into this problem (crash on USB plug in) on your system? I tested it with a Linux guest, there it does not crash! Only with a Windows XP guest! I test with Linux most of the time, but even with windows xp guest it doesn't reproduce here. cheers, Gerd That's a good idea - will test that tomorrow and send the new result file. Have you ever tested a USB CD or DVD drive attached to your guests? I have issues with Windows XP (I get everything running and detected beside the drive letter in Windows Explorer) but it works fine for Linux. Best regards, Erik Find attached the usb.txt = I gzip'ed it to reduce the transfer size. I added the p-state to the fprintf, maybe this helps. fprintf(stderr, %s: packet: %p %d\n, __func__, p,p?p-state:-1); Best regards, Erik usb.txt.gz Description: GNU Zip compressed data
Re: [Qemu-devel] [PATCH] We should check virtio_load return code
Il 28/02/2012 14:31, Orit Wasserman ha scritto: Otherwise we crash on error. Signed-off-by: Orit Wasserman owass...@redhat.com Signed-off-by: Ulrich Obergfell uober...@redhat.com --- hw/virtio-balloon.c|6 +- hw/virtio-blk.c|7 ++- hw/virtio-net.c|6 +- hw/virtio-scsi.c |7 ++- hw/virtio-serial-bus.c |6 +- 5 files changed, 27 insertions(+), 5 deletions(-) diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c index ce9d2c9..075ed87 100644 --- a/hw/virtio-balloon.c +++ b/hw/virtio-balloon.c @@ -211,11 +211,15 @@ static void virtio_balloon_save(QEMUFile *f, void *opaque) static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id) { VirtIOBalloon *s = opaque; +int ret; if (version_id != 1) return -EINVAL; -virtio_load(s-vdev, f); +ret = virtio_load(s-vdev, f); +if (ret) { +return ret; +} s-num_pages = qemu_get_be32(f); s-actual = qemu_get_be32(f); diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index 49990f8..d4bb400 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -537,11 +537,16 @@ static void virtio_blk_save(QEMUFile *f, void *opaque) static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id) { VirtIOBlock *s = opaque; +int ret; if (version_id != 2) return -EINVAL; -virtio_load(s-vdev, f); +ret = virtio_load(s-vdev, f); +if (ret) { +return ret; +} + while (qemu_get_sbyte(f)) { VirtIOBlockReq *req = virtio_blk_alloc_request(s); qemu_get_buffer(f, (unsigned char*)req-elem, sizeof(req-elem)); diff --git a/hw/virtio-net.c b/hw/virtio-net.c index bc5e3a8..3f190d4 100644 --- a/hw/virtio-net.c +++ b/hw/virtio-net.c @@ -891,11 +891,15 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id) { VirtIONet *n = opaque; int i; +int ret; if (version_id 2 || version_id VIRTIO_NET_VM_VERSION) return -EINVAL; -virtio_load(n-vdev, f); +ret = virtio_load(n-vdev, f); +if (ret) { +return ret; +} qemu_get_buffer(f, n-mac, ETH_ALEN); n-tx_waiting = qemu_get_be32(f); diff --git a/hw/virtio-scsi.c b/hw/virtio-scsi.c index e607edc..9797847 100644 --- a/hw/virtio-scsi.c +++ b/hw/virtio-scsi.c @@ -558,7 +558,12 @@ static void virtio_scsi_save(QEMUFile *f, void *opaque) static int virtio_scsi_load(QEMUFile *f, void *opaque, int version_id) { VirtIOSCSI *s = opaque; -virtio_load(s-vdev, f); +int ret; + +ret = virtio_load(s-vdev, f); +if (ret) { +return ret; +} return 0; } diff --git a/hw/virtio-serial-bus.c b/hw/virtio-serial-bus.c index e22940e..4a33872 100644 --- a/hw/virtio-serial-bus.c +++ b/hw/virtio-serial-bus.c @@ -590,13 +590,17 @@ static int virtio_serial_load(QEMUFile *f, void *opaque, int version_id) VirtIOSerialPort *port; uint32_t max_nr_ports, nr_active_ports, ports_map; unsigned int i; +int ret; if (version_id 3) { return -EINVAL; } /* The virtio device */ -virtio_load(s-vdev, f); +ret = virtio_load(s-vdev, f); +if (ret) { +return ret; +} if (version_id 2) { return 0; Acked-by: Paolo Bonzini pbonz...@redhat.com
Re: [Qemu-devel] [PATCH v6 2/4] cadence_ttc: initial version of device model
On Tue, Feb 28, 2012 at 10:07 PM, Paul Brook p...@codesourcery.com wrote: Thinking about that, I realised why I don't like the following line: + s-reg_value = (uint32_t)((x + interval) % interval); This assumes x -interval, which is not always true. This would mean you have wrapped twice or more in one time step, which I am assuming is a fatal error condition, as It means your software has missed interrupts and all sort of race conditions would occur. I would personally prefer to assert !(x -interval) and have qemu hw_error or something, as in these cases QEMU can just not handle your super quick timer wrap around. No, not fatal at all. On a heavily loaded host it's normal for qemu[1] to stall for tens of miliseconds at a time. In extreme cases several seconds at a time, though we've fixed most of those. This is greater than the interval of many guest periodic events. A linux guest with HZ=1000 (even HZ=100) will routinely loose timer ticks. By my reading your timers have a configurable limit, so the obvious configuration is a limit of 25000 (2.5MHz / 100), and trigger the jiffy tick off the timer wrap/reload. Yeh i discovered that in testing :) my hw_error did trigger under linux. Fixed that % issue you raised (in v8) such this it will not % a -ve number when the timer multi-wraps. If your guest OS assumes their ISR will complete before timer triggers again then it will break. That's their problem. Any guest that relies on consistent realtime performance/behavior is going to malfunction when run inside qemu. This is only one of several ways that qemu behavior can differ from that of real hardware. You can workaround some of these issues with -icount, though that has its own set of problems. One of which is that is doesn't support KVM or SMP[2]. guests. Paul [1] The same applies for KVM. [2] SMP may be fixable. KVM probably not. Peter
Re: [Qemu-devel] linux guests and ksm performance
On 02/28/2012 03:20 PM, Peter Lieven wrote: On 28.02.2012 14:16, Avi Kivity wrote: On 02/24/2012 08:41 AM, Stefan Hajnoczi wrote: I dont think that it is cpu intense. All user pages are zeroed anyway, but at allocation time it shouldnt be a big difference in terms of cpu power. It's easy to find a scenario where eagerly zeroing pages is wasteful. Imagine a process that uses all of physical memory. Once it terminates the system is going to run processes that only use a small set of pages. It's pointless zeroing all those pages if we're not going to use them anymore. In the long term, we will use them, except if the guest is completely idle. The scenario in which zeroing is expensive is when the page is refilled through DMA. In that case the zeroing was wasted. This is a pretty common scenario in pagecache intensive workloads. Avi, what do you think of the proposal to give the guest vm a hint that the host is running ksm? In that case the administrator has already chosen that saving physical memory is more important than performance to him? It makes some sense. Perhaps through the balloon device, a flag that indicates that voluntary ballooning will be gratefully accepted. -- error compiling committee.c: too many arguments to function
[Qemu-devel] [PATCH 3/3] qemu-ga: add guest-suspend-hybrid
Signed-off-by: Luiz Capitulino lcapitul...@redhat.com --- qapi-schema-guest.json | 23 +++ qga/commands-posix.c | 10 ++ 2 files changed, 33 insertions(+), 0 deletions(-) diff --git a/qapi-schema-guest.json b/qapi-schema-guest.json index b102311..6a75552 100644 --- a/qapi-schema-guest.json +++ b/qapi-schema-guest.json @@ -347,3 +347,26 @@ # Since: 1.1 ## { 'command': 'guest-suspend-ram' } + +## +# @guest-suspend-hybrid +# +# Save guest state to disk and suspend to ram. +# +# This command requires the pm-utils package to be installed in the guest. +# +# IMPORTANT: guest-suspend-hybrid requires QEMU to support the 'system_wakeup' +# command. Thus, it's *required* to query QEMU for the presence of the +# 'system_wakeup' command before issuing guest-suspend-hybrid. +# +# Returns: nothing on success +# If hybrid suspend is not supported, Unsupported +# +# Notes: o This is an asynchronous request. There's no guarantee a response +# will be sent +#o It's strongly recommended to issue the guest-sync command before +# sending commands when the guest resumes +# +# Since: 1.1 +## +{ 'command': 'guest-suspend-hybrid' } diff --git a/qga/commands-posix.c b/qga/commands-posix.c index 134c130..79571bf 100644 --- a/qga/commands-posix.c +++ b/qga/commands-posix.c @@ -715,6 +715,16 @@ void qmp_guest_suspend_ram(Error **err) guest_suspend(pm-suspend, mem, err); } +void qmp_guest_suspend_hybrid(Error **err) +{ +bios_supports_mode(pm-is-supported, --suspend-hybrid, NULL, err); +if (error_is_set(err)) { +return; +} + +guest_suspend(pm-suspend-hybrid, NULL, err); +} + /* register init/cleanup routines for stateful command groups */ void ga_command_state_init(GAState *s, GACommandState *cs) { -- 1.7.9.111.gf3fb0.dirty
[Qemu-devel] [PATCH 1/3] qemu-ga: add guest-suspend-disk
As the command name implies, this command suspends the guest to disk. The suspend operation is implemented by two functions: bios_supports_mode() and guest_suspend(). Both functions are generic enough to be used by other suspend modes (introduced by next commits). Both functions will try to use the scripts provided by the pm-utils package if it's available. If it's not available, a manual method, which consists of directly writing to '/sys/power/state', will be used. To reap terminated children, a new signal handler is installed in the parent to catch SIGCHLD signals and a non-blocking call to waitpid() is done to collect their exit statuses. The statuses, however, are discarded. The approach used to query the guest for suspend support deserves some explanation. It's implemented by bios_supports_mode() and shown below: qemu-ga | create pipe | fork() - | | | | | fork() | -- | || | || | | exec('pm-is-supported') | | | wait() | write exit status to pipe | exit | read pipe This might look complex, but the resulting code is quite simple. The purpose of that approach is to allow qemu-ga to reap its children (semi-)automatically from its SIGCHLD handler. Implementing this the obvious way, that's, doing the exec() call from the first child process, would force us to introduce a more complex way to reap qemu-ga's children. Like registering PIDs to be reaped and having a way to wait for them when returning their exit status to qemu-ga is necessary. The approach explained above avoids that complexity. Signed-off-by: Luiz Capitulino lcapitul...@redhat.com --- qapi-schema-guest.json | 24 ++ qemu-ga.c | 19 +- qga/commands-posix.c | 188 3 files changed, 230 insertions(+), 1 deletions(-) diff --git a/qapi-schema-guest.json b/qapi-schema-guest.json index 706925d..f4e0e1d 100644 --- a/qapi-schema-guest.json +++ b/qapi-schema-guest.json @@ -295,3 +295,27 @@ ## { 'command': 'guest-fsfreeze-thaw', 'returns': 'int' } + +## +# @guest-suspend-disk +# +# Suspend guest to disk. +# +# This command tries to execute the scripts provided by the pm-utils package. +# If it's not available, the suspend operation will be performed by manually +# writing to a sysfs file. +# +# For the best results it's strongly recommended to have the pm-utils +# package installed in the guest. +# +# Returns: nothing on success +# If suspend to disk is not supported, Unsupported +# +# Notes: o This is an asynchronous request. There's no guarantee a response +# will be sent +#o It's strongly recommended to issue the guest-sync command before +# sending commands when the guest resumes +# +# Since: 1.1 +## +{ 'command': 'guest-suspend-disk' } diff --git a/qemu-ga.c b/qemu-ga.c index ba355d8..1c90e6e 100644 --- a/qemu-ga.c +++ b/qemu-ga.c @@ -17,6 +17,7 @@ #include getopt.h #ifndef _WIN32 #include syslog.h +#include sys/wait.h #endif #include json-streamer.h #include json-parser.h @@ -73,9 +74,16 @@ static void quit_handler(int sig) } #ifndef _WIN32 +/* reap _all_ terminated children */ +static void child_handler(int sig) +{ +int status; +while (waitpid(-1, status, WNOHANG) 0) /* NOTHING */; +} + static gboolean register_signal_handlers(void) { -struct sigaction sigact; +struct sigaction sigact, sigact_chld; int ret; memset(sigact, 0, sizeof(struct sigaction)); @@ -91,6 +99,15 @@ static gboolean register_signal_handlers(void) g_error(error configuring signal handler: %s, strerror(errno)); return false; } + +memset(sigact_chld, 0, sizeof(struct sigaction)); +sigact_chld.sa_handler = child_handler; +sigact_chld.sa_flags = SA_NOCLDSTOP; +ret = sigaction(SIGCHLD, sigact_chld, NULL); +if (ret == -1) { +g_error(error configuring signal handler: %s, strerror(errno)); +} + return true; } #endif diff --git a/qga/commands-posix.c b/qga/commands-posix.c index 126127a..af785f5 100644 --- a/qga/commands-posix.c +++ b/qga/commands-posix.c @@ -23,6 +23,7 @@ #include sys/types.h #include sys/ioctl.h +#include sys/wait.h #include qga/guest-agent-core.h #include qga-qmp-commands.h #include qerror.h @@ -30,6 +31,22 @@ static GAState *ga_state; +static void reopen_fd_to_null(int fd) +{ +int nullfd; + +nullfd = open(/dev/null, O_RDWR); +if (nullfd 0) { +return; +} + +dup2(nullfd, fd); + +if (nullfd != fd) { +close(nullfd); +} +} + void qmp_guest_shutdown(bool has_mode, const char *mode, Error **err) { int ret; @@ -517,6 +534,177 @@ int64_t qmp_guest_fsfreeze_thaw(Error
Re: [Qemu-devel] [PATCH 6/8] gtk: add support for screen scaling and full screen (v2)
Am 27.02.2012 21:10, schrieb Stefan Weil: Am 27.02.2012 00:46, schrieb Anthony Liguori: Basic menu items to enter full screen mode and zoom in/out. Unlike SDL, we don't allow arbitrary scaling based on window resizing. The current behavior with SDL causes a lot of problems for me. Sometimes I accidentally resize the window a tiny bit while trying to move it (Ubuntu's 1-pixel window decorations don't help here). After that, scaling is now active and if the screen changes size again, badness ensues since the aspect ratio is skewed. Allowing zooming by 25% in and out should cover most use cases. We can add a more flexible scaling later but for now, I think this is a more friendly behavior. Signed-off-by: Anthony Liguori aligu...@us.ibm.com --- v1 - v2 - fix scaling (Paolo) - use ctrl-alt-+ instead of ctrl-alt-= for zoom --- ui/gtk.c | 92 +++--- 1 files changed, 88 insertions(+), 4 deletions(-) diff --git a/ui/gtk.c b/ui/gtk.c index 0dac807..578cb94 100644 --- a/ui/gtk.c +++ b/ui/gtk.c [...] + s-full_screen_item = gtk_check_menu_item_new_with_mnemonic(_Full Screen); I suggest using the GTK standard widget GTK_STOCK_FULLSCREEN here. That's not a check menu item, so some more changes will be needed. Full screen mode does not need a check menu item, because you only see the menu item when it is not in full screen mode. Tried Alt-V in full screen mode? ;-) (Yes, I'd consider it a bug) Kevin
Re: [Qemu-devel] [PATCH v4] qemu-ga: Add guest-network-info command
On Mon, 27 Feb 2012 20:07:28 -0600 Michael Roth mdr...@linux.vnet.ibm.com wrote: What about something like this instead: { 'enum': 'GuestIpAddressType', 'data': [ 'ipv4', 'ipv6' ] } { 'type': 'GuestIpAddress', 'data': {'ip-address': 'str', 'ip-address-type': 'GuestIpAddressType', 'prefix': 'int'} } { 'type': 'GuestNetworkInterface', 'data': {'interface': {'name': 'str', '*hardware-address': 'str', '*ip-addresses': ['GuestIpAddress'] } } } { 'type': 'GuestNetworkInfo', 'data': { 'interfaces': ['GuestNetworkInterfaces'] } } { 'command': 'guest-network-info', 'returns': 'GuestNetworkInfo' } In the future we might have: { 'type': 'GuestNetworkInfo', 'data': { 'interfaces': ['GuestNetworkInterfaces'], 'routes': ['GuestNetworkRoute'], 'bridges': ['GuestNetworkBridge'], 'firewall-rules': ['firewall-rule'], # yikes etc. } } Both approaches are fine to me, but another possibility is to split this into multiple commands, like guest-interfaces-info, guest-routes-info etc. This would allow for simpler commands with less clutter. Once we settle down on this I can send another version for review. Personally, if guest agent would report description (see my other e-mail [1]) I don't see big advantage in introducing dozens of error codes here. descriptions are mapped to QERRs though, so it'd only be useful if you defined specific errors for these cases. I agree with Luiz, but at the same time it's not exactly tractable to enumerate all possible errors for every command into a unique QERR except for common things like FD_NOT_FOUND. So maybe just a QERR_QGA_INTERFACE_ENUMERATION_FAILED, that took a stringified error message? I don't really have a strong opinion either way. Well, turns out I'm not sure what to do here either. On the one hand it's a huge work (and probably unnecessary) to add all possible errors. On the other hand, it's really hard to debug a problem when all information you have is a generic error. As this a relatively simple query command, I'm fine with simple/generic errors.
[Qemu-devel] [PATCH v4 2/2] QMP: Add qmp command for blockdev-group-snapshot-sync
This adds the QMP command for blockdev-group-snapshot-sync. It takes an array in as the input, for the argument devlist. The array consists of the following elements: + device:device to snapshot. e.g. ide-hd0, virtio0 + snapshot-file: path file for the snapshot image. e.g. /tmp/file.img + format:snapshot format. e.g., qcow2. Optional There is no HMP equivalent for the command. Signed-off-by: Jeff Cody jc...@redhat.com --- qmp-commands.hx | 39 +++ 1 files changed, 39 insertions(+), 0 deletions(-) diff --git a/qmp-commands.hx b/qmp-commands.hx index b5e2ab8..15d35c1 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -665,6 +665,45 @@ EQMP .args_type = device:B, .mhandler.cmd_new = qmp_marshal_input_block_job_cancel, }, +{ +.name = blockdev-group-snapshot-sync, +.args_type = devlist:O, +.params = device:B,snapshot-file:s,format:s?, +.mhandler.cmd_new = qmp_marshal_input_blockdev_group_snapshot_sync, +}, + +SQMP +blockdev-group-snapshot-sync +-- + +Synchronous snapshot of one or more block devices. A list array input +is accepted, that contains the device and snapshot file information for +each device in group. The default format, if not specified, is qcow2. + +If there is any failure creating or opening a new snapshot, all snapshots +for the group are abandoned, and the original disks pre-snapshot attempt +are used. + + +Arguments: + +devlist array: +- device: device name to snapshot (json-string) +- snapshot-file: name of new image file (json-string) +- format: format of new image (json-string, optional) + +Example: + +- { execute: blockdev-group-snapshot-sync, arguments: + { devlist: [{ device: ide-hd0, + snapshot-file: /some/place/my-image, + format: qcow2 }, +{ device: ide-hd1, + snapshot-file: /some/place/my-image2, + format: qcow2 }] } } +- { return: {} } + +EQMP { .name = blockdev-snapshot-sync, -- 1.7.9.rc2.1.g69204
[Qemu-devel] [PATCH v4 1/2] qapi: Introduce blockdev-group-snapshot-sync command
This is a QAPI/QMP only command to take a snapshot of a group of devices. This is similar to the blockdev-snapshot-sync command, except blockdev-group-snapshot-sync accepts a list devices, filenames, and formats. It is attempted to keep the snapshot of the group atomic; if the creation or open of any of the new snapshots fails, then all of the new snapshots are abandoned, and the name of the snapshot image that failed is returned. The failure case should not interrupt any operations. Rather than use bdrv_close() along with a subsequent bdrv_open() to perform the pivot, the original image is never closed and the new image is placed 'in front' of the original image via manipulation of the BlockDriverState fields. Thus, once the new snapshot image has been successfully created, there are no more failure points before pivoting to the new snapshot. This allows the group of disks to remain consistent with each other, even across snapshot failures. Signed-off-by: Jeff Cody jc...@redhat.com --- block.c | 81 + block.h |1 + block_int.h |6 +++ blockdev.c | 132 ++ qapi-schema.json | 38 +++ 5 files changed, 258 insertions(+), 0 deletions(-) diff --git a/block.c b/block.c index 3621d11..90232d9 100644 --- a/block.c +++ b/block.c @@ -880,6 +880,87 @@ void bdrv_make_anon(BlockDriverState *bs) bs-device_name[0] = '\0'; } +/* + * Add new bs contents at the top of an image chain while the chain is + * live, while keeping required fields on the top layer. + * + * This will modify the BlockDriverState fields, and swap contents + * between bs_new and bs_top. Both bs_new and bs_top are modified. + * + * This function does not create any image files. + */ +void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) +{ +BlockDriverState tmp; + +/* the new bs must not be in bdrv_states */ +bdrv_make_anon(bs_new); + +tmp = *bs_new; + +/* there are some fields that need to stay on the top layer: */ + +/* dev info */ +tmp.dev_ops = bs_top-dev_ops; +tmp.dev_opaque= bs_top-dev_opaque; +tmp.dev = bs_top-dev; +tmp.buffer_alignment = bs_top-buffer_alignment; +tmp.copy_on_read = bs_top-copy_on_read; + +/* i/o timing parameters */ +tmp.slice_time= bs_top-slice_time; +tmp.slice_start = bs_top-slice_start; +tmp.slice_end = bs_top-slice_end; +tmp.io_limits = bs_top-io_limits; +tmp.io_base = bs_top-io_base; +tmp.throttled_reqs= bs_top-throttled_reqs; +tmp.block_timer = bs_top-block_timer; +tmp.io_limits_enabled = bs_top-io_limits_enabled; + +/* geometry */ +tmp.cyls = bs_top-cyls; +tmp.heads = bs_top-heads; +tmp.secs = bs_top-secs; +tmp.translation = bs_top-translation; + +/* r/w error */ +tmp.on_read_error = bs_top-on_read_error; +tmp.on_write_error= bs_top-on_write_error; + +/* i/o status */ +tmp.iostatus_enabled = bs_top-iostatus_enabled; +tmp.iostatus = bs_top-iostatus; + +/* keep the same entry in bdrv_states */ +pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top-device_name); +tmp.list = bs_top-list; + +/* The contents of 'tmp' will become bs_top, as we are + * swapping bs_new and bs_top contents. */ +tmp.backing_hd = bs_new; +pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top-filename); + +/* swap contents of the fixed new bs and the current top */ +*bs_new = *bs_top; +*bs_top = tmp; + +/* clear the copied fields in the new backing file */ +bdrv_detach_dev(bs_new, bs_new-dev); + +qemu_co_queue_init(bs_new-throttled_reqs); +memset(bs_new-io_base, 0, sizeof(bs_new-io_base)); +memset(bs_new-io_limits, 0, sizeof(bs_new-io_limits)); +bdrv_iostatus_disable(bs_new); + +/* we don't use bdrv_io_limits_disable() for this, because we don't want + * to affect or delete the block_timer, as it has been moved to bs_top */ +bs_new-io_limits_enabled = false; +bs_new-block_timer = NULL; +bs_new-slice_time= 0; +bs_new-slice_start = 0; +bs_new-slice_end = 0; +} + void bdrv_delete(BlockDriverState *bs) { assert(!bs-dev); diff --git a/block.h b/block.h index cae289b..190a780 100644 --- a/block.h +++ b/block.h @@ -114,6 +114,7 @@ int bdrv_create(BlockDriver *drv, const char* filename, int bdrv_create_file(const char* filename, QEMUOptionParameter *options); BlockDriverState *bdrv_new(const char *device_name); void bdrv_make_anon(BlockDriverState *bs); +void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top); void bdrv_delete(BlockDriverState *bs); int bdrv_parse_cache_flags(const char *mode, int *flags); int bdrv_file_open(BlockDriverState **pbs, const char *filename,
[Qemu-devel] [PATCH] usb: queue can have async packets too
Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hw/usb.c |3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/hw/usb.c b/hw/usb.c index 57fc5e3..fc41d62 100644 --- a/hw/usb.c +++ b/hw/usb.c @@ -356,6 +356,9 @@ void usb_packet_complete(USBDevice *dev, USBPacket *p) while (!QTAILQ_EMPTY(ep-queue)) { p = QTAILQ_FIRST(ep-queue); +if (p-state == USB_PACKET_ASYNC) { +break; +} assert(p-state == USB_PACKET_QUEUED); ret = usb_process_one(p); if (ret == USB_RET_ASYNC) { -- 1.7.1