Re: [Qemu-devel] [PATCH 14/22] usb: Add packet combining functions
Hi, For handle_combined_data, yes, as usb_ep_combine_input_packets can cause multiple packets to get submitted, since if a combined packet ends with a packet, which does not have its short_not_ok flag set, another packet can be safely pipelined after it. This is not useful for usb mass storage, but very usefull for usb serial port converters. Ah, I see. You can have a queue with -- say -- 16 packets which will get combined into 4 groups with 4 packets each - 4 callbacks. I think handle_combined_data() should get USBCombinedPacket passed instead of USBPacket. It is cleaner API-wise. Likewise for the other usb_combined_* functions. Allowing to call usb_ep_combine_input_packets on any endpoint (except iso which is a special case anyway) would be good too I think, then it is possible for usb drivers to operate on USBCombinedPackets everywhere. BTW: I think USBPacketGroup would be a better name for USBCombinedPacket. cheers, Gerd
[Qemu-devel] [PATCH 03/15] pseries: Implement qemu initiated shutdowns using EPOW events
At present, using 'system_powerdown' from the monitor or otherwise instructing qemu to (cleanly) shut down a pseries guest will not work, because we did not have a method of signalling the shutdown request to the guest. PAPR does include a usable mechanism for this, though it is rather more involved than the equivalent on x86. This involves sending an EPOW (Environmental and POwer Warning) event through the PAPR event and error logging mechanism, which also has a number of other functions. This patch implements just enough of the event/error logging functionality to be able to send a shutdown event to the guest. At least with modern guest kernels and a userspace that is up and running, this means that system_powerdown from the qemu monitor should now work correctly on pseries guests. Signed-off-by: David Gibson da...@gibson.dropbear.id.au --- v2: * Coding style cleanups suggested by Blue Swirl --- hw/ppc/Makefile.objs |1 + hw/spapr.c | 14 ++- hw/spapr.h | 11 ++ hw/spapr_events.c| 320 ++ 4 files changed, 344 insertions(+), 2 deletions(-) create mode 100644 hw/spapr_events.c diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs index 951e407..8fe2123 100644 --- a/hw/ppc/Makefile.objs +++ b/hw/ppc/Makefile.objs @@ -11,6 +11,7 @@ obj-y += ppc_newworld.o obj-$(CONFIG_PSERIES) += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o obj-$(CONFIG_PSERIES) += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o obj-$(CONFIG_PSERIES) += spapr_pci.o pci-hotplug.o spapr_iommu.o +obj-$(CONFIG_PSERIES) += spapr_events.o # PowerPC 4xx boards obj-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o obj-y += ppc440_bamboo.o diff --git a/hw/spapr.c b/hw/spapr.c index 09b8e99..64c35a8 100644 --- a/hw/spapr.c +++ b/hw/spapr.c @@ -232,7 +232,8 @@ static void *spapr_create_fdt_skel(const char *cpu_model, target_phys_addr_t initrd_size, target_phys_addr_t kernel_size, const char *boot_device, - const char *kernel_cmdline) + const char *kernel_cmdline, + uint32_t epow_irq) { void *fdt; CPUPPCState *env; @@ -403,6 +404,8 @@ static void *spapr_create_fdt_skel(const char *cpu_model, _FDT((fdt_property(fdt, ibm,associativity-reference-points, refpoints, sizeof(refpoints; +_FDT((fdt_property_cell(fdt, rtas-error-log-max, RTAS_ERROR_LOG_MAX))); + _FDT((fdt_end_node(fdt))); /* interrupt controller */ @@ -433,6 +436,9 @@ static void *spapr_create_fdt_skel(const char *cpu_model, _FDT((fdt_end_node(fdt))); +/* event-sources */ +spapr_events_fdt_skel(fdt, epow_irq); + _FDT((fdt_end_node(fdt))); /* close root node */ _FDT((fdt_finish(fdt))); @@ -794,6 +800,9 @@ static void ppc_spapr_init(ram_addr_t ram_size, spapr-icp = xics_system_init(XICS_IRQS); spapr-next_irq = 16; +/* Set up EPOW events infrastructure */ +spapr_events_init(spapr); + /* Set up IOMMU */ spapr_iommu_init(); @@ -902,7 +911,8 @@ static void ppc_spapr_init(ram_addr_t ram_size, spapr-fdt_skel = spapr_create_fdt_skel(cpu_model, initrd_base, initrd_size, kernel_size, -boot_device, kernel_cmdline); +boot_device, kernel_cmdline, +spapr-epow_irq); assert(spapr-fdt_skel != NULL); } diff --git a/hw/spapr.h b/hw/spapr.h index e984e3f..3d02911 100644 --- a/hw/spapr.h +++ b/hw/spapr.h @@ -26,6 +26,12 @@ typedef struct sPAPREnvironment { int rtc_offset; char *cpu_model; bool has_graphics; + +/* Data for Environmental and POwer Warnings (EPOW events) */ +uint32_t epow_irq; +Notifier epow_notifier; +void *pending_epow; +uint32_t next_plid; } sPAPREnvironment; #define H_SUCCESS 0 @@ -335,7 +341,12 @@ typedef struct sPAPRTCE { #define SPAPR_VIO_BASE_LIOBN0x #define SPAPR_PCI_BASE_LIOBN0x8000 +#define RTAS_ERROR_LOG_MAX 2048 + + void spapr_iommu_init(void); +void spapr_events_init(sPAPREnvironment *spapr); +void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq); DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size); void spapr_tce_free(DMAContext *dma); void spapr_tce_reset(DMAContext *dma); diff --git a/hw/spapr_events.c b/hw/spapr_events.c new file mode 100644 index 000..4b19612 --- /dev/null +++ b/hw/spapr_events.c @@ -0,0 +1,320 @@ +/* + * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator + * + * RTAS events handling + * + * Copyright (c) 2012 David Gibson, IBM Corporation. + * + * Permission is hereby granted, free of
Re: [Qemu-devel] [PATCH v11 01/14] target-mips: Add ASE DSP internal functions
On Thu, Oct 18, 2012 at 09:53:20AM +0800, Jia Liu wrote: Hi Aurelien, On Wed, Oct 17, 2012 at 11:15 PM, Aurelien Jarno aurel...@aurel32.net wrote: On Wed, Oct 17, 2012 at 11:39:00AM +0800, Jia Liu wrote: Hi Aurelien, On Wed, Oct 17, 2012 at 7:20 AM, Aurelien Jarno aurel...@aurel32.net wrote: On Tue, Oct 16, 2012 at 12:39:05AM +0800, Jia Liu wrote: +/* a[0] is LO, a[1] is HI. */ +static inline void mipsdsp_sat64_acc_sub_q63(int64_t *ret, + int32_t ac, + int64_t *a, + CPUMIPSState *env) +{ +uint32_t temp64, temp63; +int64_t temp[2]; +int64_t acc[2]; +int64_t temp_sum; + +temp[0] = a[0]; +temp[1] = a[1]; + +acc[0] = env-active_tc.LO[ac]; +acc[1] = env-active_tc.HI[ac]; + +temp_sum = acc[0] - temp[0]; +if (MIPSDSP_OVERFLOW(acc[0], -temp[0], temp_sum, 0x8000ull)) { +acc[1] -= 1; +} +acc[0] = temp_sum; + +temp_sum = acc[1] - temp[1]; +acc[1] = temp_sum; + +temp64 = acc[1] 0x01; +temp63 = (acc[0] 63) 0x01; + +/* MIPSDSP_OVERFLOW only can check if a 64 bits sub is overflow, + * there are two 128 bits value subed then check the 63/64 bits are equal + * or not.*/ If you disagree with what I say, you can send mail, there is no need to put it as a comment. That said MIPSDSP_OVERFLOW doesn't work only on 64-bit values, it can work other size, as it is done elsewhere in this patch. The only thing it checked is the highest bit of the two arguments and the result. Therefore if you pass the highest part of the values, it can work. I did agree with you, just didn't totally get your point. MIPSDSP_OVERFLOW used to check overflow, but here, 128bit + 128bit, low 64bit overflow need to be checked, so, in fact, I'm not sure what should do. Is this code right? static inline void mipsdsp_sat64_acc_sub_q63(uint64_t *ret, int32_t ac, uint64_t *a, CPUMIPSState *env) { uint32_t temp64; uint64_t temp[2]; uint64_t acc[2]; temp[0] = a[0]; temp[1] = a[1]; acc[0] = env-active_tc.LO[ac]; acc[1] = env-active_tc.HI[ac]; temp[1] = acc[1] - temp[1]; temp[0] = acc[0] - temp[0]; temp64 = temp[1] 0x01; if (temp64 ^ MIPSDSP_OVERFLOW(acc[0], temp[0], temp[0], (0x01ull 63))) { if (temp64 == 1) { ret[0] = (0x01ull 63); ret[1] = ~0ull; } else { ret[0] = (0x01ull 63) - 1; ret[1] = 0x00; } set_DSPControl_overflow_flag(1, 16 + ac, env); } else { ret[0] = temp[0]; ret[1] = acc[0] temp[0] ? temp[1] : temp[1] - 1; } } I don't think xoring temp64 with MIPSDSP_OVERFLOW is correct. What about about something like that (untested): | static inline void mipsdsp_sat64_acc_sub_q63(uint64_t *ret, | int32_t ac, | uint64_t *a, | CPUMIPSState *env) | { | ret[0] = env-active_tc.LO[ac] - a[0]; | ret[1] = env-active_tc.HI[ac] - a[1]; | In the MIPS-DSP manual, the function is function sat64AccumulateSubQ63( acc1..0, a127..0 ) temp128..0 ← HI[acc]63 || HI[acc]63..0 || LO[acc]63..0 temp128..0 ← temp - a127..0 if ( temp64 ≠ temp63 ) then if ( temp64 = 1 ) then temp127..0 ← 0x8000 else temp127..0 ← 0x7FFF endif DSPControlouflag:16+acc ← 1 endif return temp127..0 endfunction sat64AccumulateSubQ63 | if (MIPSDSP_OVERFLOW(env-active_tc.LO[ac], -a[0], ret[0], 0x8000ull)) { The bit 64 will influence the overflow of bits 0-63. That is, if bit 64 is 1, and bits 0-63 overflowed, it won't be caught. So, if we use this code, it won't be handled. I think you are correct, we have to take into account the overflow part, but also the original value of the temp64 bit. | if ((ret[1] - 1) 1) { | ret[0] = (0x01ull 63); | ret[1] = ~0ull; | } else { | ret[0] = (0x01ull 63) - 1; | ret[1] = 0x00; | } | set_DSPControl_overflow_flag(1, 16 + ac, env); | } | } | The same applies for the add function, but also to some other places in the code. Also note that you might want to have two version of MIPSDSP_OVERFLOW(), one for add (like the current one), and one for sub (without the ^ -1), so that you don't have to pass the negative value
[Qemu-devel] [PATCH v3] qemu-img: document 'info --backing-chain'
Signed-off-by: Kashyap Chamarthy kashyap...@gmail.com --- qemu-img-cmds.hx | 4 ++-- qemu-img.texi| 21 - 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index 0ef82e9ac7a89d607e1f2c8e38461cee5b438dba..a18136302d2ca7a7540672f1b9ef89603e89edc0 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -34,9 +34,9 @@ STEXI ETEXI DEF(info, img_info, -info [-f fmt] [--output=ofmt] filename) +info [-f fmt] [--output=ofmt] [--backing-chain] filename) STEXI -@item info [-f @var{fmt}] [--output=@var{ofmt}] @var{filename} +@item info [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename} ETEXI DEF(snapshot, img_snapshot, diff --git a/qemu-img.texi b/qemu-img.texi index 8b05f2c42801a2535ab4390b47bc415eb880625a..fa4ced6e5a24eed0adf1b44b471d6959055a58a1 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -28,6 +28,10 @@ Command parameters: is the disk image format. It is guessed automatically in most cases. See below for a description of the supported disk formats. +@item --backing-chain +will enumerate information about backing files in a disk image chain. Refer +below for further description. + @item size is the disk image size in bytes. Optional suffixes @code{k} or @code{K} (kilobyte, 1024) @code{M} (megabyte, 1024k) and @code{G} (gigabyte, 1024M) @@ -129,7 +133,7 @@ created as a copy on write image of the specified base image; the @var{backing_file} should have the same content as the input's base image, however the path, image format, etc may differ. -@item info [-f @var{fmt}] [--output=@var{ofmt}] @var{filename} +@item info [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename} Give information about the disk image @var{filename}. Use it in particular to know the size reserved on disk which can be different @@ -137,6 +141,21 @@ from the displayed size. If VM snapshots are stored in the disk image, they are displayed too. The command can output in the format @var{ofmt} which is either @code{human} or @code{json}. +If a disk image has a backing file chain, information about each disk image in +the chain can be recursively enumerated by using the option @code{--backing-chain}. + +For instance, if you have an image chain like: + +@example +base.qcow2 - snap1.qcow2 - snap2.qcow2 +@end example + +To enumerate information about each disk image in the above chain, starting from top to base, do: + +@example +qemu-img info --backing-chain snap2.qcow2 +@end example + @item snapshot [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot} ] @var{filename} List, apply, create or delete snapshots in image @var{filename}. -- 1.7.12.1
Re: [Qemu-devel] [PATCH 3/7] serial: add windows inf file for the pci card to docs
On 10/17/12 22:12, Hervé Poussineau wrote: Signed-off-by: Gerd Hoffmann address@hidden --- docs/qemupciserial.inf | 109 1 files changed, 109 insertions(+), 0 deletions(-) create mode 100644 docs/qemupciserial.inf [...] Did you try to add emulation of a PCI serial card natively recognized by Windows XP (at least), like SIIG CyberSerial card? Thats the only one. Looked at the linux code for it, a comment mumbled something about a eeprom holding card configuration, /me figured I better don't go that route. If it would have been a simple card with just a 16550 nothing else I would have picked it. cheers, Gerd
Re: [Qemu-devel] [PATCH v2] pci: Return PCI_INTX_DISABLED when no bus INTx routing support
On 2012-10-18 00:13, Alex Williamson wrote: Rather than assert, simply return PCI_INTX_DISABLED when we don't have a pci_route_irq_fn. PIIX already returns DISABLED for an invalid pin, so users already deal with this state. Users of this interface should only be acting on an ENABLED or INVERTED return value (though we really have no support for INVERTED). Also complain loudly when we hit this so we don't forget it's missing. Signed-off-by: Alex Williamson alex.william...@redhat.com --- v2: Turn up the annoyance factor for hitting this hw/pci.c |8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hw/pci.c b/hw/pci.c index 83d262a..6a66b32 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -1094,7 +1094,13 @@ PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin) pin = bus-map_irq(dev, pin); dev = bus-parent_dev; } while (dev); -assert(bus-route_intx_to_irq); + +if (!bus-route_intx_to_irq) { +error_report(PCI: Bug - unimplemented PCI INTx routing (%s)\n, + object_get_typename(OBJECT(bus-qbus.parent))); +return (PCIINTxRoute) { PCI_INTX_DISABLED, -1 }; +} + return bus-route_intx_to_irq(bus-irq_opaque, pin); } I'm fine with this. I also see this as dead code in x86 (any x86 chipset will support this API, for sure), but maybe it helps on other archs. So: Acked-by: Jan Kiszka jan.kis...@siemens.com Jan -- Siemens AG, Corporate Technology, CT RTC ITP SDP-DE Corporate Competence Center Embedded Linux
[Qemu-devel] 1.1.1 - 1.1.2 migrate / managed save issue
I'm using libvirt 0.10.2 and I had qemu-kvm 1.1.1 running all my VMs. I used libvirt's managedsave command to pause all the VMs and write them to disk and when I brought up the machine again I had upgraded to qemu-kvm 1.1.2 and attempted to resume the VMs from their state. It unfortunately fails. During the life of the VM I did not attempt to adjust the amount of memory it had via the balloon device unless of course libvirt did behind the scenes on me. Below is the command line invocation and the error: LC_ALL=C PATH=/bin:/sbin:/bin:/sbin:/usr/bin:/usr/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin:/opt/bin HOME=/root USER=root QEMU_AUDIO_DRV=spice /usr/bin/qemu-kvm -name expo -S -M pc-1.0 -cpu Penryn,+pdcm,+xtpr,+tm2,+est,+smx,+vmx,+ds_cpl,+monitor,+dtes64,+pbe,+tm,+ht,+ss,+acpi,+ds,+vme -enable-kvm -m 1024 -smp 1,sockets=1,cores=1,threads=1 -uuid 19034754-aa3f-9671-d247-1bc53134e3f0 -no-user-config -nodefaults -chardev socket,id=charmonitor,path=/var/lib/libvirt/qemu/expo.monitor,server,nowait -mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc -no-shutdown -device virtio-serial-pci,id=virtio-serial0,bus=pci.0,addr=0x4 -device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -drive file=/var/lib/libvirt/images/expo.img,if=none,id=drive-ide0-0-0,format=raw,cache=none -device ide-hd,bus=ide.0,unit=0,drive=drive-ide0-0-0,id=ide0-0-0,bootindex=1 -netdev tap,fd=23,id=hostnet0 -device rtl8139,netdev=hostnet0,id=net0,mac=52:54:00:0b:29:d9,bus=pci.0,addr=0x3 -chardev pty,id=charserial0 -device isa-serial,chardev=charserial0,id=serial0 -chardev spicevmc,id=charchannel0,name=vdagent -device virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,id=channel0,name=com.redhat.spice.0 -spice port=5901,addr=127.0.0.1,disable-ticketing -vga qxl -global qxl-vga.vram_size=67108864 -incoming fd:20 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x5 char device redirected to /dev/pts/7 qemu: warning: error while loading state for instance 0x0 of device 'ram' load of migration failed -- Doug Goldstein
Re: [Qemu-devel] [PATCH 1/6] qdev: rework device properties.
On 10/17/12 22:14, Eduardo Habkost wrote: On Wed, Jul 15, 2009 at 01:43:31PM +0200, Gerd Hoffmann wrote: [...] diff --git a/hw/qdev-properties.c b/hw/qdev-properties.c new file mode 100644 index 000..8b0d0ff --- /dev/null +++ b/hw/qdev-properties.c @@ -0,0 +1,246 @@ Gerd, could you clarify what's the copyright/license of this file? (I mean, at least the copyright/license of the initial version of the file you wrote, below). I am CCing all other authors that touched the file (according to git logs), so they can clarify what's the license they assumed for the file and their contributions. gplv2+ cheers, Gerd
[Qemu-devel] [PATCH 02/15] target-ppc: Rework storage of VPA registration state
With PAPR guests, hypercalls allow registration of the Virtual Processor Area (VPA), SLB shadow and dispatch trace log (DTL), each of which allow for certain communication between the guest and hypervisor. Currently, we store the addresses of the three areas and the size of the dtl in CPUPPCState. The SLB shadow and DTL are variable sized, with the size being retrieved from within the registered memory area at the hypercall time. This size can later be overwritten with other information, however, so we need to save the size as of registration time. We already do this for the DTL, but not for the SLB shadow, so this patch fixes that. In addition, we change the storage of the VPA information to use fixed size integer types which will make life easier for syncing this data with KVM, which we will need in future. Signed-off-by: David Gibson da...@gibson.dropbear.id.au --- hw/spapr_hcall.c| 26 ++ target-ppc/cpu.h|7 +++ target-ppc/translate_init.c |7 --- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/hw/spapr_hcall.c b/hw/spapr_hcall.c index 762493a..621dabd 100644 --- a/hw/spapr_hcall.c +++ b/hw/spapr_hcall.c @@ -366,26 +366,26 @@ static target_ulong register_vpa(CPUPPCState *env, target_ulong vpa) return H_PARAMETER; } -env-vpa = vpa; +env-vpa_addr = vpa; -tmp = ldub_phys(env-vpa + VPA_SHARED_PROC_OFFSET); +tmp = ldub_phys(env-vpa_addr + VPA_SHARED_PROC_OFFSET); tmp |= VPA_SHARED_PROC_VAL; -stb_phys(env-vpa + VPA_SHARED_PROC_OFFSET, tmp); +stb_phys(env-vpa_addr + VPA_SHARED_PROC_OFFSET, tmp); return H_SUCCESS; } static target_ulong deregister_vpa(CPUPPCState *env, target_ulong vpa) { -if (env-slb_shadow) { +if (env-slb_shadow_addr) { return H_RESOURCE; } -if (env-dispatch_trace_log) { +if (env-dtl_addr) { return H_RESOURCE; } -env-vpa = 0; +env-vpa_addr = 0; return H_SUCCESS; } @@ -407,18 +407,20 @@ static target_ulong register_slb_shadow(CPUPPCState *env, target_ulong addr) return H_PARAMETER; } -if (!env-vpa) { +if (!env-vpa_addr) { return H_RESOURCE; } -env-slb_shadow = addr; +env-slb_shadow_addr = addr; +env-slb_shadow_size = size; return H_SUCCESS; } static target_ulong deregister_slb_shadow(CPUPPCState *env, target_ulong addr) { -env-slb_shadow = 0; +env-slb_shadow_addr = 0; +env-slb_shadow_size = 0; return H_SUCCESS; } @@ -437,11 +439,11 @@ static target_ulong register_dtl(CPUPPCState *env, target_ulong addr) return H_PARAMETER; } -if (!env-vpa) { +if (!env-vpa_addr) { return H_RESOURCE; } -env-dispatch_trace_log = addr; +env-dtl_addr = addr; env-dtl_size = size; return H_SUCCESS; @@ -449,7 +451,7 @@ static target_ulong register_dtl(CPUPPCState *env, target_ulong addr) static target_ulong deregister_dtl(CPUPPCState *env, target_ulong addr) { -env-dispatch_trace_log = 0; +env-dtl_addr = 0; env-dtl_size = 0; return H_SUCCESS; diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index faf4404..eb2f43d 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -1045,10 +1045,9 @@ struct CPUPPCState { #endif #if defined(TARGET_PPC64) !defined(CONFIG_USER_ONLY) -target_phys_addr_t vpa; -target_phys_addr_t slb_shadow; -target_phys_addr_t dispatch_trace_log; -uint32_t dtl_size; +uint64_t vpa_addr; +uint64_t slb_shadow_addr, slb_shadow_size; +uint64_t dtl_addr, dtl_size; #endif /* TARGET_PPC64 */ int error_code; diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index a972287..831b169 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -10425,9 +10425,10 @@ static void ppc_cpu_reset(CPUState *s) env-error_code = 0; #if defined(TARGET_PPC64) !defined(CONFIG_USER_ONLY) -env-vpa = 0; -env-slb_shadow = 0; -env-dispatch_trace_log = 0; +env-vpa_addr = 0; +env-slb_shadow_addr = 0; +env-slb_shadow_size = 0; +env-dtl_addr = 0; env-dtl_size = 0; #endif /* TARGET_PPC64 */ -- 1.7.10.4
Re: [Qemu-devel] [PATCH 1/6] qdev: rework device properties.
On 17/10/12 22:14, Eduardo Habkost wrote: On Wed, Jul 15, 2009 at 01:43:31PM +0200, Gerd Hoffmann wrote: [...] diff --git a/hw/qdev-properties.c b/hw/qdev-properties.c new file mode 100644 index 000..8b0d0ff --- /dev/null +++ b/hw/qdev-properties.c @@ -0,0 +1,246 @@ Gerd, could you clarify what's the copyright/license of this file? (I mean, at least the copyright/license of the initial version of the file you wrote, below). I am CCing all other authors that touched the file (according to git logs), so they can clarify what's the license they assumed for the file and their contributions. I assumed GPLv2+.
[Qemu-devel] [PATCH 06/15] pseries: Use #define for XICS base irq number
From: Ben Herrenschmidt b...@kernel.crashing.org Currently the lowest real irq number for the XICS irq controller (as opposed to numbers reserved for IPIs and other special purposes) is hard coded as 16 in two places - in xics_system_init() and in spapr.c. As well as being generally bad practice, we're going to need to change this number soon to fit in with the in-kernel XICS implementation. This patch adds a #define for this number to avoid future breakage. Signed-off-by: Michael Ellerman mich...@ellerman.id.au Signed-off-by: Ben Herrenschmidt b...@kernel.crashing.org Signed-off-by: David Gibson da...@gibson.dropbear.id.au --- hw/spapr.c |2 +- hw/xics.c |2 +- hw/xics.h |1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/spapr.c b/hw/spapr.c index 64c35a8..789c941 100644 --- a/hw/spapr.c +++ b/hw/spapr.c @@ -798,7 +798,7 @@ static void ppc_spapr_init(ram_addr_t ram_size, /* Set up Interrupt Controller */ spapr-icp = xics_system_init(XICS_IRQS); -spapr-next_irq = 16; +spapr-next_irq = XICS_IRQ_BASE; /* Set up EPOW events infrastructure */ spapr_events_init(spapr); diff --git a/hw/xics.c b/hw/xics.c index 7a899dd..db01fe3 100644 --- a/hw/xics.c +++ b/hw/xics.c @@ -548,7 +548,7 @@ struct icp_state *xics_system_init(int nr_irqs) ics = g_malloc0(sizeof(*ics)); ics-nr_irqs = nr_irqs; -ics-offset = 16; +ics-offset = XICS_IRQ_BASE; ics-irqs = g_malloc0(nr_irqs * sizeof(struct ics_irq_state)); icp-ics = ics; diff --git a/hw/xics.h b/hw/xics.h index 6817268..c3bf008 100644 --- a/hw/xics.h +++ b/hw/xics.h @@ -28,6 +28,7 @@ #define __XICS_H__ #define XICS_IPI0x2 +#define XICS_IRQ_BASE 0x10 struct icp_state; -- 1.7.10.4
[Qemu-devel] [PATCH 05/15] pseries: Clean up inconsistent variable name in xics.c
Throughout xics.c 'nr' is used to refer to a global interrupt number, and 'server' is used to refer to an interrupt server number (i.e. CPU number). Except in icp_set_mfrr(), where 'nr' is used as a server number. Fix this confusing inconsistency. Signed-off-by: David Gibson da...@gibson.dropbear.id.au --- hw/xics.c |6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/xics.c b/hw/xics.c index ce88aa7..7a899dd 100644 --- a/hw/xics.c +++ b/hw/xics.c @@ -108,13 +108,13 @@ static void icp_set_cppr(struct icp_state *icp, int server, uint8_t cppr) } } -static void icp_set_mfrr(struct icp_state *icp, int nr, uint8_t mfrr) +static void icp_set_mfrr(struct icp_state *icp, int server, uint8_t mfrr) { -struct icp_server_state *ss = icp-ss + nr; +struct icp_server_state *ss = icp-ss + server; ss-mfrr = mfrr; if (mfrr CPPR(ss)) { -icp_check_ipi(icp, nr); +icp_check_ipi(icp, server); } } -- 1.7.10.4
Re: [Qemu-devel] Disabling KVM on the fly
Il 17/10/2012 20:37, Jan Kiszka ha scritto: On 2012-10-17 18:44, Paolo Bonzini wrote: Il 17/10/2012 18:37, Clemens Kolbitsch ha scritto: Guys, I know this is question might seem a bit odd, but I'm curious: Has anyone ever tried to write code to disable KVM on the fly / is it at all possible? I have a situation where I need to use TCG for certain parts of the code, but would love to have acceleration for everything else. My idea was to pause the VM, then use the snapshotting mechanism to dump the state, and then to resume the snapshot, but writing the KVM state into the non-KVM structures. As a start, you can try using migrate exec:catfoo.save with a KVM machine and -incoming 'exec:cat foo.save' with a TCG machine. The main problem should be that TCG doesn't implement kvmclock. If you disable the KVM interrupt controller and timer (which is just an implementation detail, not a hardware difference), Unnecessary. Both models (KVM in-kernel and QEMU userspace) are compatible - in the absence of bugs. He wants to really switch it on the fly---not just migrate out and in---and for that you need to disable the KVM-specific devices. But loading a KVM image into TCG lets non-trival guests lock up. Likely due to differences in the CPU virtualization/emulation (MSRs...). Perhaps that can be mitigated by using an older machine model. Start with something simple like a pentium2 and work up from there... Paolo
Re: [Qemu-devel] [PATCH v2] pci: Return PCI_INTX_DISABLED when no bus INTx routing support
On Wed, Oct 17, 2012 at 04:13:12PM -0600, Alex Williamson wrote: Rather than assert, simply return PCI_INTX_DISABLED when we don't have a pci_route_irq_fn. PIIX already returns DISABLED for an invalid pin, so users already deal with this state. Users of this interface should only be acting on an ENABLED or INVERTED return value (though we really have no support for INVERTED). Also complain loudly when we hit this so we don't forget it's missing. Signed-off-by: Alex Williamson alex.william...@redhat.com --- Thanks, applied. v2: Turn up the annoyance factor for hitting this hw/pci.c |8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hw/pci.c b/hw/pci.c index 83d262a..6a66b32 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -1094,7 +1094,13 @@ PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin) pin = bus-map_irq(dev, pin); dev = bus-parent_dev; } while (dev); -assert(bus-route_intx_to_irq); + +if (!bus-route_intx_to_irq) { +error_report(PCI: Bug - unimplemented PCI INTx routing (%s)\n, + object_get_typename(OBJECT(bus-qbus.parent))); +return (PCIINTxRoute) { PCI_INTX_DISABLED, -1 }; +} + return bus-route_intx_to_irq(bus-irq_opaque, pin); }
Re: [Qemu-devel] nvram and boot order
On 18.10.2012, at 03:18, Benjamin Herrenschmidt b...@kernel.crashing.org wrote: On Thu, 2012-10-18 at 11:09 +1100, David Gibson wrote: That's horrible; if you use -boot just once it will clobber a persistent NVRAM's boot order. I see that a means of changing the default boot order from management tools is desirable, but that shouldn't be the normal behaviour of -boot. And the objections to (2) apply even more strongly - we'd need to translate arbitrary -boot strings to NVRAM representation which may not be at all straightforward from the information qemu has available. It may not be straight forward, but it's what makes the most sense from a user's PoV. Bollocks. Using -boot to override the normal boot sequence permanently changing the normal boot sequence absoultely does not make sense from a user's PoV. I strongly agree with David here. -boot should not change the persistent state. I think Anthony and you are looking at 2 different use cases, each with their own sane reasoning. You want to have the chance to override the boot order temporarily for things like cd boot or quick guest rescue missions. You also want to be able to permanently change the guest's boot order from a management tool. At that same place you want to be able to display it, so you don't have to boot your vm to know what it would be doing. As for device detection logic, both face the same problems. You need to be able to say 'boot from cd-rom first temporarily' just the same as you need to be able to say 'boot from the first cd-rom as first boot option permanently'. The permanent change needs to be possible with the vm turned off though. I suppose that Anthony's reasoning is that we can implement temporary in the management layer (or even qemu) if we have the permanent mechanism, by switching back to the previous state after shutdown if the guest written boot order didn't change. I don't mind personally if we have one interface for temporary and persistent or 2 separate ones, but I think we should aim for having both options available in the long run. Though doing permanent changes first and reverting them later could raise problems when you kill your vm, since that wouldn't clean up the temporary change. In our case, the persistent state will have been carefully crafted by complicated scripts by the distro installer, and while I may want to use -boot to boot once off a cd image or similar, I certainly don't want that to affect my nvram setting pointing to the right on-disk bootloader. Additionally I don't want qemu to have to understand all the intricacies of expressing OFW boot path if we can avoid it. Yes, the same problem as EFI for example is facing. The solution here is as simple as it gets: a new device name space. Instead of having a boot list entry saying 'boot from device x, part y, file z' you would get an entry saying 'boot from /qemu/disk0' and leave the rest to the firmware. The good thing about this approach is that it again is persistable and can be used in boot order lists. So you can directly translate -boot cd into /qemu/disk0,/qemu/cdrom. And if you screwed up your guest boot config, just put that order in by hand into permanent config. Qemu gives as much info as it can and let the firmware itself inside the guest figure things out. Yes, that's the only chance we have really. Even for bootindex, which could for example get translated to /qemu/pci/0.10.0/disk0 which again would then get aliased to the actual disk device node behind pci device 0.10.0 (first disk) by SLOF. In fact, I don't want Qemu to know anything about our internal nvram format. This is a business between the guest FW and the guest OS. The only thing qemu is allowed to do is wipe it out if asked to do so :-) It might be useful to use fdt in nvram to store the permanent boot order. That way QEMU / management tools have the chance to make persistent changes. Everyone around already understands fdt anyways :). Um.. as far as I can tell that's a point in favour of my position. It makes it impossible for qemu to correctly describe boot sequences using these devices in the terms firmware uses internally. On the other hand it certainly is possible for qemu to pass bootorder=cd (or whatever) to the firmware via device tree of fw_cfg and have firmware locally interpret that in tersm of what it knows about available devices. This is more/less what happens with -boot today. IE. If you pass c SLOF looks for a bootable disk (though arguably the algorithm could be improved), d for a bootable optical media etc... We definitely want something a bit more expressive and in some case might even be able to pass down from the command line a full path to an actual device but we don't necessarily want qemu to understand the nvram format of this. Make it an expressive representation that makes sense to qemu, and let the FW translate that to something it
Re: [Qemu-devel] [PATCH 1/6] qdev: rework device properties.
On 2012-10-17 22:14, Eduardo Habkost wrote: On Wed, Jul 15, 2009 at 01:43:31PM +0200, Gerd Hoffmann wrote: [...] diff --git a/hw/qdev-properties.c b/hw/qdev-properties.c new file mode 100644 index 000..8b0d0ff --- /dev/null +++ b/hw/qdev-properties.c @@ -0,0 +1,246 @@ Gerd, could you clarify what's the copyright/license of this file? (I mean, at least the copyright/license of the initial version of the file you wrote, below). I am CCing all other authors that touched the file (according to git logs), so they can clarify what's the license they assumed for the file and their contributions. Under the license the original author considered for this file. If that is unclear, GPLv2. Jan -- Siemens AG, Corporate Technology, CT RTC ITP SDP-DE Corporate Competence Center Embedded Linux
Re: [Qemu-devel] [PATCH 13/15] target-pcc: Convert ppcemb_tlb_t to use fixed 64-bit RPN
On 18.10.2012, at 07:50, David Gibson da...@gibson.dropbear.id.au wrote: Currently the ppcemb_tlb_t struct, used on a number of embedded ppc models to represent a TLB entry contains a target_phys_addr_t. That works reasonably for now, but is troublesome for saving the state, which we'll want to do in future. target_phys_addr_t is a large enough type to contain a physical address for any supported machine - and can thus, in theory at least, vary depending on what machines are enabled other than the one we're actually using right now. This makes it unsuitable for describing in vmstate. Target_phys_addr_t is actually 64bit for all ppc targets today since some 32 bit boards support more than 32 bit address space ;). The change still is fine though, as it makes that bit explicit. Alex This patch therefore changes ppcemb_tlb_t to use a fixed 64-bit integer which we know is sufficient for all the machines which use this structure. Signed-off-by: David Gibson da...@gibson.dropbear.id.au --- target-ppc/cpu.h |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index cde6da0..f30e0c7 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -355,7 +355,7 @@ struct ppc6xx_tlb_t { typedef struct ppcemb_tlb_t ppcemb_tlb_t; struct ppcemb_tlb_t { -target_phys_addr_t RPN; +uint64_t RPN; target_ulong EPN; target_ulong PID; target_ulong size; -- 1.7.10.4
Re: [Qemu-devel] hdd and ssd passthrough
Il 17/10/2012 23:20, Mario Giammarco ha scritto: Hello, I hope I am in the right list. I would like to pass a real disk to a guest (freenas) in kvm/qemu without iommu/vt-d. I need guest to be able to: - use hdd smart; - configure hdd params like power saving; - understand real make and model of hdd; - understand when the hdd is an ssd. This is really a libvirt question more than QEMU... anyway if your guest is new enough (Linux 3.4 or RHEL/CentOS 6.3) you can probably use virtio-scsi with XML that looks like this (using virsh edit): controller type='scsi' model='virtio-scsi'/ disk type='block' device='lun' driver name='qemu' type='raw' cache='none'/ source dev='/dev/sdb'/ target dev='sda' bus='scsi'/ /disk For passthrough you need the device='lun' setting. You can also use virsh from the command line to achieve the same. Write the controller and disk elements to a new file, like hba.xml and sdb.xml. Then: # virsh attach-device --persistent Guest1 ~/hba.xml # virsh attach-device --persistent Guest1 ~/sdb.xml virt-manager and virt-install don't yet fully support virtio-scsi. Paolo
[Qemu-devel] [Bug 1065325] Re: qemu-system-arm hangs on SIGUSR1 on OS X 10.8.2
This seems like an OS bug (kernel or Cocoa). The usage of the signal is common to all QEMU system emulation targets, so it's not dependent on Stellaris. Try an older version of OS X, or try a different video backend (e.g. VNC). That may help isolating the problem. -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/1065325 Title: qemu-system-arm hangs on SIGUSR1 on OS X 10.8.2 Status in QEMU: New Bug description: I built the latest version of QEMU commit b4ae3cfa57b8c1bdbbd7b7d420971e9171203ade Date: Mon Oct 1 12:34:37 2012 +1000 My system is: Darwin localhost 12.2.0 Darwin Kernel Version 12.2.0: Sat Aug 25 00:48:52 PDT 2012; root:xnu-2050.18.24~1/RELEASE_X86_64 x86_64 localhost:qemu oliverks$ gcc -v Using built-in specs. Target: i686-apple-darwin11 Configured with: /private/var/tmp/llvmgcc42/llvmgcc42-2336.11~28/src/configure --disable-checking --enable-werror --prefix=/Applications/Xcode.app/Contents/Developer/usr/llvm-gcc-4.2 --mandir=/share/man --enable-languages=c,objc,c++,obj-c++ --program-prefix=llvm- --program-transform-name=/^[cg][^.-]*$/s/$/-4.2/ --with-slibdir=/usr/lib --build=i686-apple-darwin11 --enable-llvm=/private/var/tmp/llvmgcc42/llvmgcc42-2336.11~28/dst-llvmCore/Developer/usr/local --program-prefix=i686-apple-darwin11- --host=x86_64-apple-darwin11 --target=i686-apple-darwin11 --with-gxx-include-dir=/usr/include/c++/4.2.1 Thread model: posix gcc version 4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2336.11.00) Shortly after start up I freeze. I am running the command line ./arm-softmmu/qemu-system-arm -M lm3s811evb -kernel ../FreeRTOSV7.2.0/FreeRTOS/Demo/CORTEX_LM3S811_GCC/gcc/RTOSDemo.axf The hang appears to occur due to this signal being sent static void qemu_tcg_init_cpu_signals(void) { sigset_t set; struct sigaction sigact; memset(sigact, 0, sizeof(sigact)); sigact.sa_handler = cpu_signal; sigaction(SIG_IPI, sigact, NULL); // -- Signal that hangs system sigemptyset(set); sigaddset(set, SIG_IPI); pthread_sigmask(SIG_UNBLOCK, set, NULL); } Oliver To manage notifications about this bug go to: https://bugs.launchpad.net/qemu/+bug/1065325/+subscriptions
[Qemu-devel] [0/15] pseries patch queue
Hi Alex, Here's the current state of the pseries pending patch queue. Some of these I think you already have, but I've lost track of what's in your tree but not in mainline yet. The biggest thing is the PAPR NVRAM implementation, and the SLOF update to use it. This doesn't yet implement all the boot order stuff that's been discussed - what it implements is no worse than what we have now, and there will be further patches coming to bring us towards the desired behaviour. This series also has what I hope is finally a correct version of the FPU state extension. Most of the rest is cleanups to the XICS code in preparation for supporting the in-kernel XICS implementation.
[Qemu-devel] [PATCH 08/15] pseries: Move XICS initialization before cpu initialization
From: Ben Herrenschmidt b...@kernel.crashing.org Currently, the pseries machine initializes the cpus, then the XICS interrupt controller. However, to support the upcoming in-kernel XICS implementation we will need to initialize the irq controller before the vcpus. This patch makes the necesssary rearrangement. This means the xics init code can no longer auto-detect the number of cpus (interrupt servers in XICS terminology) and so we must pass that in explicitly from the platform code. Signed-off-by: Michael Ellerman mich...@ellerman.id.au Signed-off-by: Ben Herrenschmidt b...@kernel.crashing.org Signed-off-by: David Gibson da...@gibson.dropbear.id.au --- hw/spapr.c | 11 +++ hw/xics.c | 52 +++- hw/xics.h |3 ++- 3 files changed, 32 insertions(+), 34 deletions(-) diff --git a/hw/spapr.c b/hw/spapr.c index 789c941..e8dbd97 100644 --- a/hw/spapr.c +++ b/hw/spapr.c @@ -744,6 +744,11 @@ static void ppc_spapr_init(ram_addr_t ram_size, spapr-htab_shift++; } +/* Set up Interrupt Controller before we create the VCPUs */ +spapr-icp = xics_system_init(smp_cpus * kvmppc_smt_threads() / smp_threads, + XICS_IRQS); +spapr-next_irq = XICS_IRQ_BASE; + /* init CPUs */ if (cpu_model == NULL) { cpu_model = kvm_enabled() ? host : POWER7; @@ -756,6 +761,8 @@ static void ppc_spapr_init(ram_addr_t ram_size, } env = cpu-env; +xics_cpu_setup(spapr-icp, env); + /* Set time-base frequency to 512 MHz */ cpu_ppc_tb_init(env, TIMEBASE_FREQ); @@ -796,10 +803,6 @@ static void ppc_spapr_init(ram_addr_t ram_size, g_free(filename); -/* Set up Interrupt Controller */ -spapr-icp = xics_system_init(XICS_IRQS); -spapr-next_irq = XICS_IRQ_BASE; - /* Set up EPOW events infrastructure */ spapr_events_init(spapr); diff --git a/hw/xics.c b/hw/xics.c index 6b08430..8860355 100644 --- a/hw/xics.c +++ b/hw/xics.c @@ -507,42 +507,36 @@ static void xics_reset(void *opaque) } } -struct icp_state *xics_system_init(int nr_irqs) +void xics_cpu_setup(struct icp_state *icp, CPUPPCState *env) { -CPUPPCState *env; -int max_server_num; -struct icp_state *icp; -struct ics_state *ics; +struct icp_server_state *ss = icp-ss[env-cpu_index]; -max_server_num = -1; -for (env = first_cpu; env != NULL; env = env-next_cpu) { -if (env-cpu_index max_server_num) { -max_server_num = env-cpu_index; -} -} +assert(env-cpu_index icp-nr_servers); -icp = g_malloc0(sizeof(*icp)); -icp-nr_servers = max_server_num + 1; -icp-ss = g_malloc0(icp-nr_servers*sizeof(struct icp_server_state)); +switch (PPC_INPUT(env)) { +case PPC_FLAGS_INPUT_POWER7: +ss-output = env-irq_inputs[POWER7_INPUT_INT]; +break; -for (env = first_cpu; env != NULL; env = env-next_cpu) { -struct icp_server_state *ss = icp-ss[env-cpu_index]; +case PPC_FLAGS_INPUT_970: +ss-output = env-irq_inputs[PPC970_INPUT_INT]; +break; -switch (PPC_INPUT(env)) { -case PPC_FLAGS_INPUT_POWER7: -ss-output = env-irq_inputs[POWER7_INPUT_INT]; -break; +default: +fprintf(stderr, XICS interrupt controller does not support this CPU +bus model\n); +abort(); +} +} -case PPC_FLAGS_INPUT_970: -ss-output = env-irq_inputs[PPC970_INPUT_INT]; -break; +struct icp_state *xics_system_init(int nr_servers, int nr_irqs) +{ +struct icp_state *icp; +struct ics_state *ics; -default: -hw_error(XICS interrupt model does not support this CPU bus - model\n); -exit(1); -} -} +icp = g_malloc0(sizeof(*icp)); +icp-nr_servers = nr_servers; +icp-ss = g_malloc0(icp-nr_servers*sizeof(struct icp_server_state)); ics = g_malloc0(sizeof(*ics)); ics-nr_irqs = nr_irqs; diff --git a/hw/xics.h b/hw/xics.h index c3bf008..b43678a 100644 --- a/hw/xics.h +++ b/hw/xics.h @@ -35,6 +35,7 @@ struct icp_state; qemu_irq xics_get_qirq(struct icp_state *icp, int irq); void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi); -struct icp_state *xics_system_init(int nr_irqs); +struct icp_state *xics_system_init(int nr_servers, int nr_irqs); +void xics_cpu_setup(struct icp_state *icp, CPUPPCState *env); #endif /* __XICS_H__ */ -- 1.7.10.4
[Qemu-devel] [PATCH 13/15] target-pcc: Convert ppcemb_tlb_t to use fixed 64-bit RPN
Currently the ppcemb_tlb_t struct, used on a number of embedded ppc models to represent a TLB entry contains a target_phys_addr_t. That works reasonably for now, but is troublesome for saving the state, which we'll want to do in future. target_phys_addr_t is a large enough type to contain a physical address for any supported machine - and can thus, in theory at least, vary depending on what machines are enabled other than the one we're actually using right now. This makes it unsuitable for describing in vmstate. This patch therefore changes ppcemb_tlb_t to use a fixed 64-bit integer which we know is sufficient for all the machines which use this structure. Signed-off-by: David Gibson da...@gibson.dropbear.id.au --- target-ppc/cpu.h |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index cde6da0..f30e0c7 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -355,7 +355,7 @@ struct ppc6xx_tlb_t { typedef struct ppcemb_tlb_t ppcemb_tlb_t; struct ppcemb_tlb_t { -target_phys_addr_t RPN; +uint64_t RPN; target_ulong EPN; target_ulong PID; target_ulong size; -- 1.7.10.4
[Qemu-devel] [PATCH 12/15] pseries: Split xics irq configuration from state information
Currently the XICS irq controller code has a per-irq state structure which amongst other things includes whether the interrupt is level or message triggered - this is configured by the platform code, and is not directly visible to the guest. This leads to a slightly awkward construct at reset time where we need to reset everything in the state structure _except_ the lsi/msi flag, which needs to retain the information given at platform init time. More importantly this flag will make matching the qemu state to the KVM state for the upcoming in-kernel XICS implementation more awkward. This patch, therefore, removes this flag from the per-irq state structure, instead adding a parallel array giving the lsi/msi configuration per irq. Signed-off-by: David Gibson da...@gibson.dropbear.id.au --- hw/xics.c | 20 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/hw/xics.c b/hw/xics.c index 403afdb..5e20f0f 100644 --- a/hw/xics.c +++ b/hw/xics.c @@ -179,13 +179,13 @@ struct ics_irq_state { #define XICS_STATUS_REJECTED 0x4 #define XICS_STATUS_MASKED_PENDING 0x8 uint8_t status; -bool lsi; }; struct ics_state { int nr_irqs; int offset; qemu_irq *qirqs; +bool *islsi; struct ics_irq_state *irqs; struct icp_state *icp; }; @@ -254,9 +254,8 @@ static void set_irq_lsi(struct ics_state *ics, int srcno, int val) static void ics_set_irq(void *opaque, int srcno, int val) { struct ics_state *ics = (struct ics_state *)opaque; -struct ics_irq_state *irq = ics-irqs + srcno; -if (irq-lsi) { +if (ics-islsi[srcno]) { set_irq_lsi(ics, srcno, val); } else { set_irq_msi(ics, srcno, val); @@ -293,7 +292,7 @@ static void ics_write_xive(struct ics_state *ics, int nr, int server, trace_xics_ics_write_xive(nr, srcno, server, priority); -if (irq-lsi) { +if (ics-islsi[srcno]) { write_xive_lsi(ics, srcno); } else { write_xive_msi(ics, srcno); @@ -314,10 +313,8 @@ static void ics_resend(struct ics_state *ics) int i; for (i = 0; i ics-nr_irqs; i++) { -struct ics_irq_state *irq = ics-irqs + i; - /* FIXME: filter by server#? */ -if (irq-lsi) { +if (ics-islsi[i]) { resend_lsi(ics, i); } else { resend_msi(ics, i); @@ -332,7 +329,7 @@ static void ics_eoi(struct ics_state *ics, int nr) trace_xics_ics_eoi(nr); -if (irq-lsi) { +if (ics-islsi[srcno]) { irq-status = ~XICS_STATUS_SENT; } } @@ -354,7 +351,7 @@ void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi) { assert(ics_valid_irq(icp-ics, irq)); -icp-ics-irqs[irq - icp-ics-offset].lsi = lsi; +icp-ics-islsi[irq - icp-ics-offset] = lsi; } static target_ulong h_cppr(CPUPPCState *env, sPAPREnvironment *spapr, @@ -515,10 +512,8 @@ static void xics_reset(void *opaque) qemu_set_irq(icp-ss[i].output, 0); } +memset(ics-irqs, 0, sizeof(struct ics_irq_state) * ics-nr_irqs); for (i = 0; i ics-nr_irqs; i++) { -/* Reset everything *except* the type */ -ics-irqs[i].server = 0; -ics-irqs[i].status = 0; ics-irqs[i].priority = 0xff; ics-irqs[i].saved_priority = 0xff; } @@ -559,6 +554,7 @@ struct icp_state *xics_system_init(int nr_servers, int nr_irqs) ics-nr_irqs = nr_irqs; ics-offset = XICS_IRQ_BASE; ics-irqs = g_malloc0(nr_irqs * sizeof(struct ics_irq_state)); +ics-islsi = g_malloc0(nr_irqs * sizeof(bool)); icp-ics = ics; ics-icp = icp; -- 1.7.10.4
[Qemu-devel] [PATCH 07/15] pseries: Cleanup duplications of ics_valid_irq() code
A couple of places in xics.c open-coded the same logic as is already implemented in ics_valid_irq(). This patch fixes the code duplication. Signed-off-by: David Gibson da...@gibson.dropbear.id.au --- hw/xics.c |6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/hw/xics.c b/hw/xics.c index db01fe3..6b08430 100644 --- a/hw/xics.c +++ b/hw/xics.c @@ -326,8 +326,7 @@ static void ics_eoi(struct ics_state *ics, int nr) qemu_irq xics_get_qirq(struct icp_state *icp, int irq) { -if ((irq icp-ics-offset) -|| (irq = (icp-ics-offset + icp-ics-nr_irqs))) { +if (!ics_valid_irq(icp-ics, irq)) { return NULL; } @@ -336,8 +335,7 @@ qemu_irq xics_get_qirq(struct icp_state *icp, int irq) void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi) { -assert((irq = icp-ics-offset) -(irq (icp-ics-offset + icp-ics-nr_irqs))); +assert(ics_valid_irq(icp-ics, irq)); icp-ics-irqs[irq - icp-ics-offset].lsi = lsi; } -- 1.7.10.4
[Qemu-devel] [PATCH 10/15] pseries: Allow RTAS tokens without a qemu handler
From: Ben Herrenschmidt b...@kernel.crashing.org Kernel-based RTAS calls will not have a qemu handler, but will still be registered in qemu in order to be assigned a token number and appear in the device-tree. Let's test for the name being NULL rather than the handler when deciding to skip an entry while building the device-tree Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org Signed-off-by: David Gibson da...@gibson.dropbear.id.au --- hw/spapr_rtas.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/spapr_rtas.c b/hw/spapr_rtas.c index 38c105f..72cad53 100644 --- a/hw/spapr_rtas.c +++ b/hw/spapr_rtas.c @@ -298,7 +298,7 @@ int spapr_rtas_device_tree_setup(void *fdt, target_phys_addr_t rtas_addr, for (i = 0; i TOKEN_MAX; i++) { struct rtas_call *call = rtas_table[i]; -if (!call-fn) { +if (!call-name) { continue; } -- 1.7.10.4
[Qemu-devel] [PATCH 04/15] target-ppc: Extend FPU state for newer POWER CPUs
This patch adds some extra FPU state to CPUPPCState. Specifically, fpscr is extended to a target_ulong bits, since some recent (64 bit) CPUs now have more status bits than fit inside 32 bits. Also, we add the 32 VSR registers present on CPUs with VSX (these extend the standard FP regs, which together with the Altivec/VMX registers form a 64 x 128bit register file for VSX). We don't actually support the instructions using these extra registers in TCG yet, but we still need a place to store the state so we can sync it with KVM and savevm/loadvm it. This patch updates the savevm code to not fail on the extended state, but also does not actually save it - that's a project for another patch. Signed-off-by: David Gibson da...@gibson.dropbear.id.au --- v2: * Used target_ulong instead of uint64_t, since the extended state is used only on ppc64 targets. * Fixed the TCG mapping of fpscr to match the new type. --- target-ppc/cpu.h |4 +++- target-ppc/machine.c |8 ++-- target-ppc/translate.c | 29 ++--- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index eb2f43d..cde6da0 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -963,7 +963,7 @@ struct CPUPPCState { /* floating point registers */ float64 fpr[32]; /* floating point status and control register */ -uint32_t fpscr; +target_ulong fpscr; /* Next instruction pointer */ target_ulong nip; @@ -1014,6 +1014,8 @@ struct CPUPPCState { /* Altivec registers */ ppc_avr_t avr[32]; uint32_t vscr; +/* VSX registers */ +uint64_t vsr[32]; /* SPE registers */ uint64_t spe_acc; uint32_t spe_fscr; diff --git a/target-ppc/machine.c b/target-ppc/machine.c index 21ce757..5e7bc00 100644 --- a/target-ppc/machine.c +++ b/target-ppc/machine.c @@ -6,6 +6,7 @@ void cpu_save(QEMUFile *f, void *opaque) { CPUPPCState *env = (CPUPPCState *)opaque; unsigned int i, j; +uint32_t fpscr; for (i = 0; i 32; i++) qemu_put_betls(f, env-gpr[i]); @@ -30,7 +31,8 @@ void cpu_save(QEMUFile *f, void *opaque) u.d = env-fpr[i]; qemu_put_be64(f, u.l); } -qemu_put_be32s(f, env-fpscr); +fpscr = env-fpscr; +qemu_put_be32s(f, fpscr); qemu_put_sbe32s(f, env-access_type); #if defined(TARGET_PPC64) qemu_put_betls(f, env-asr); @@ -90,6 +92,7 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) CPUPPCState *env = (CPUPPCState *)opaque; unsigned int i, j; target_ulong sdr1; +uint32_t fpscr; for (i = 0; i 32; i++) qemu_get_betls(f, env-gpr[i]); @@ -114,7 +117,8 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) u.l = qemu_get_be64(f); env-fpr[i] = u.d; } -qemu_get_be32s(f, env-fpscr); +qemu_get_be32s(f, fpscr); +env-fpscr = fpscr; qemu_get_sbe32s(f, env-access_type); #if defined(TARGET_PPC64) qemu_get_betls(f, env-asr); diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 1042268..56725e6 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -68,7 +68,7 @@ static TCGv cpu_cfar; #endif static TCGv cpu_xer; static TCGv cpu_reserve; -static TCGv_i32 cpu_fpscr; +static TCGv cpu_fpscr; static TCGv_i32 cpu_access_type; #include gen-icount.h @@ -163,8 +163,8 @@ void ppc_translate_init(void) offsetof(CPUPPCState, reserve_addr), reserve_addr); -cpu_fpscr = tcg_global_mem_new_i32(TCG_AREG0, - offsetof(CPUPPCState, fpscr), fpscr); +cpu_fpscr = tcg_global_mem_new(TCG_AREG0, + offsetof(CPUPPCState, fpscr), fpscr); cpu_access_type = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUPPCState, access_type), access_type); @@ -2302,6 +2302,7 @@ GEN_FLOAT_B(neg, 0x08, 0x01, 0, PPC_FLOAT); /* mcrfs */ static void gen_mcrfs(DisasContext *ctx) { +TCGv tmp = tcg_temp_new(); int bfa; if (unlikely(!ctx-fpu_enabled)) { @@ -2309,9 +2310,11 @@ static void gen_mcrfs(DisasContext *ctx) return; } bfa = 4 * (7 - crfS(ctx-opcode)); -tcg_gen_shri_i32(cpu_crf[crfD(ctx-opcode)], cpu_fpscr, bfa); +tcg_gen_shri_tl(tmp, cpu_fpscr, bfa); +tcg_gen_trunc_tl_i32(cpu_crf[crfD(ctx-opcode)], tmp); +tcg_temp_free(tmp); tcg_gen_andi_i32(cpu_crf[crfD(ctx-opcode)], cpu_crf[crfD(ctx-opcode)], 0xf); -tcg_gen_andi_i32(cpu_fpscr, cpu_fpscr, ~(0xF bfa)); +tcg_gen_andi_tl(cpu_fpscr, cpu_fpscr, ~(0xF bfa)); } /* mffs */ @@ -2322,7 +2325,7 @@ static void gen_mffs(DisasContext *ctx) return; } gen_reset_fpstatus(); -tcg_gen_extu_i32_i64(cpu_fpr[rD(ctx-opcode)], cpu_fpscr); +tcg_gen_extu_tl_i64(cpu_fpr[rD(ctx-opcode)], cpu_fpscr); gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], 0,
[Qemu-devel] [PATCH 09/15] pseries: Return the token when we register an RTAS call
From: Michael Ellerman mich...@ellerman.id.au The kernel will soon be able to service some RTAS calls. However the choice of tokens will still be up to userspace. To support this have spapr_rtas_register() return the token that is allocated for an RTAS call, that allows the calling code to tell the kernel what the token value is. Signed-off-by: Michael Ellerman mich...@ellerman.id.au Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org Signed-off-by: David Gibson da...@gibson.dropbear.id.au --- hw/spapr.h |2 +- hw/spapr_rtas.c |4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/spapr.h b/hw/spapr.h index 3d02911..89c70b5 100644 --- a/hw/spapr.h +++ b/hw/spapr.h @@ -323,7 +323,7 @@ static inline void rtas_st(target_ulong phys, int n, uint32_t val) typedef void (*spapr_rtas_fn)(sPAPREnvironment *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets); -void spapr_rtas_register(const char *name, spapr_rtas_fn fn); +int spapr_rtas_register(const char *name, spapr_rtas_fn fn); target_ulong spapr_rtas_call(sPAPREnvironment *spapr, uint32_t token, uint32_t nargs, target_ulong args, uint32_t nret, target_ulong rets); diff --git a/hw/spapr_rtas.c b/hw/spapr_rtas.c index b96078b..38c105f 100644 --- a/hw/spapr_rtas.c +++ b/hw/spapr_rtas.c @@ -239,7 +239,7 @@ target_ulong spapr_rtas_call(sPAPREnvironment *spapr, return H_PARAMETER; } -void spapr_rtas_register(const char *name, spapr_rtas_fn fn) +int spapr_rtas_register(const char *name, spapr_rtas_fn fn) { int i; @@ -255,7 +255,7 @@ void spapr_rtas_register(const char *name, spapr_rtas_fn fn) rtas_next-name = name; rtas_next-fn = fn; -rtas_next++; +return (rtas_next++ - rtas_table) + TOKEN_BASE; } int spapr_rtas_device_tree_setup(void *fdt, target_phys_addr_t rtas_addr, -- 1.7.10.4
[Qemu-devel] [PATCH 01/15] pseries: Don't allow duplicate registration of hcalls or RTAS calls
Currently the pseries machine code allows a callback to be registered for a hypercall number twice, as long as it's the same callback the second time. We don't test for duplicate registrations of RTAS callbacks at all so it will effectively be last registratiojn wins. This was originally done because it was awkward to ensure that the registration happened exactly once, but the code has since been restructured so that's no longer the case. Duplicate registration of a hypercall or RTAS call could well suggest a duplicate initialization which could cause other problems, so this patch makes duplicate registrations a bug, to prevent the old behaviour from hiding other bugs. Signed-off-by: David Gibson da...@gibson.dropbear.id.au --- hw/spapr_hcall.c |3 +-- hw/spapr_rtas.c |9 + 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/hw/spapr_hcall.c b/hw/spapr_hcall.c index 194d9c2..762493a 100644 --- a/hw/spapr_hcall.c +++ b/hw/spapr_hcall.c @@ -670,11 +670,10 @@ void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn) } else { assert((opcode = KVMPPC_HCALL_BASE) (opcode = KVMPPC_HCALL_MAX)); - slot = kvmppc_hypercall_table[opcode - KVMPPC_HCALL_BASE]; } -assert(!(*slot) || (fn == *slot)); +assert(!(*slot)); *slot = fn; } diff --git a/hw/spapr_rtas.c b/hw/spapr_rtas.c index b808f80..b96078b 100644 --- a/hw/spapr_rtas.c +++ b/hw/spapr_rtas.c @@ -241,6 +241,15 @@ target_ulong spapr_rtas_call(sPAPREnvironment *spapr, void spapr_rtas_register(const char *name, spapr_rtas_fn fn) { +int i; + +for (i = 0; i (rtas_next - rtas_table); i++) { +if (strcmp(name, rtas_table[i].name) == 0) { +fprintf(stderr, RTAS call \%s\ registered twice\n, name); +exit(1); +} +} + assert(rtas_next (rtas_table + TOKEN_MAX)); rtas_next-name = name; -- 1.7.10.4
Re: [Qemu-devel] [PATCH v3] qemu-img: document 'info --backing-chain'
On Thu, Oct 18, 2012 at 11:25:34AM +0530, Kashyap Chamarthy wrote: Signed-off-by: Kashyap Chamarthy kashyap...@gmail.com --- qemu-img-cmds.hx | 4 ++-- qemu-img.texi| 21 - 2 files changed, 22 insertions(+), 3 deletions(-) Reviewed-by: Stefan Hajnoczi stefa...@redhat.com
Re: [Qemu-devel] [Qemu-trivial] [PATCH] block: bdrv_create(): don't leak cco.filename on error
On Wed, Oct 17, 2012 at 04:45:25PM -0300, Luiz Capitulino wrote: Signed-off-by: Luiz Capitulino lcapitul...@redhat.com --- block.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) Reviewed-by: Stefan Hajnoczi stefa...@redhat.com
Re: [Qemu-devel] [PATCH 2/2] Add check_rxov into VMState.
On Wed, Oct 17, 2012 at 08:31:47PM +0200, Dmitry Fleytman wrote: @@ -1100,6 +1100,11 @@ static bool is_version_1(void *opaque, int version_id) return version_id == 1; } +static bool is_version_3(void *opaque, int version_id) +{ +return version_id == 1; +} version_id == 3?
[Qemu-devel] [PATCH 01/72] fix virtfs
Signed-off-by: Juan Quintela quint...@redhat.com --- fsdev/virtfs-proxy-helper.c | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fsdev/virtfs-proxy-helper.c b/fsdev/virtfs-proxy-helper.c index f9a8270..771dc4e 100644 --- a/fsdev/virtfs-proxy-helper.c +++ b/fsdev/virtfs-proxy-helper.c @@ -282,6 +282,7 @@ static int send_status(int sockfd, struct iovec *iovec, int status) */ static int setfsugid(int uid, int gid) { +int ret; /* * We still need DAC_OVERRIDE because we don't change * supplementary group ids, and hence may be subjected DAC rules @@ -290,8 +291,14 @@ static int setfsugid(int uid, int gid) CAP_DAC_OVERRIDE, }; -setfsgid(gid); -setfsuid(uid); +ret = setfsgid(gid); +if (ret 0) { +return ret; +} +ret = setfsuid(uid); +if (ret 0) { +return ret; +} if (uid != 0 || gid != 0) { return do_cap_set(cap_list, ARRAY_SIZE(cap_list), 0); -- 1.7.11.7
[Qemu-devel] [PATCH 13/30] savevm: New save live migration method: pending
Code just now does (simplified for clarity) if (qemu_savevm_state_iterate(s-file) == 1) { vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); qemu_savevm_state_complete(s-file); } Problem here is that qemu_savevm_state_iterate() returns 1 when it knows that remaining memory to sent takes less than max downtime. But this means that we could end spending 2x max_downtime, one downtime in qemu_savevm_iterate, and the other in qemu_savevm_state_complete. Changed code to: pending_size = qemu_savevm_state_pending(s-file, max_size); DPRINTF(pending size %lu max %lu\n, pending_size, max_size); if (pending_size = max_size) { ret = qemu_savevm_state_iterate(s-file); } else { vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); qemu_savevm_state_complete(s-file); } So what we do is: at current network speed, we calculate the maximum number of bytes we can sent: max_size. Then we ask every save_live section how much they have pending. If they are less than max_size, we move to complete phase, otherwise we do an iterate one. This makes things much simpler, because now individual sections don't have to caluclate the bandwidth (it was implossible to do right from there). Signed-off-by: Juan Quintela quint...@redhat.com --- arch_init.c | 48 ++-- block-migration.c | 49 ++--- buffered_file.c | 25 ++--- migration.c | 22 +++--- migration.h | 2 +- savevm.c | 19 +++ sysemu.h | 1 + vmstate.h | 1 + 8 files changed, 83 insertions(+), 84 deletions(-) diff --git a/arch_init.c b/arch_init.c index 2d29828..1eefef8 100644 --- a/arch_init.c +++ b/arch_init.c @@ -608,12 +608,9 @@ static int ram_save_setup(QEMUFile *f, void *opaque) static int ram_save_iterate(QEMUFile *f, void *opaque) { -uint64_t bytes_transferred_last; -double bwidth = 0; int ret; int i; -uint64_t expected_downtime; -MigrationState *s = migrate_get_current(); +int64_t t0; qemu_mutex_lock_ramlist(); @@ -621,9 +618,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) reset_ram_globals(); } -bytes_transferred_last = bytes_transferred; -bwidth = qemu_get_clock_ns(rt_clock); - +t0 = qemu_get_clock_ns(rt_clock); i = 0; while ((ret = qemu_file_rate_limit(f)) == 0) { int bytes_sent; @@ -641,7 +636,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) iterations */ if ((i 63) == 0) { -uint64_t t1 = (qemu_get_clock_ns(rt_clock) - bwidth) / 100; +uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 100; if (t1 MAX_WAIT) { DPRINTF(big wait: % PRIu64 milliseconds, %d iterations\n, t1, i); @@ -655,31 +650,10 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) return ret; } -bwidth = qemu_get_clock_ns(rt_clock) - bwidth; -bwidth = (bytes_transferred - bytes_transferred_last) / bwidth; - -/* if we haven't transferred anything this round, force - * expected_downtime to a very high value, but without - * crashing */ -if (bwidth == 0) { -bwidth = 0.01; -} - qemu_mutex_unlock_ramlist(); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); -expected_downtime = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth; -DPRINTF(ram_save_live: expected(% PRIu64 ) = max( PRIu64 )?\n, -expected_downtime, migrate_max_downtime()); - -if (expected_downtime = migrate_max_downtime()) { -migration_bitmap_sync(); -expected_downtime = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth; -s-expected_downtime = expected_downtime / 100; /* ns - ms */ - -return expected_downtime = migrate_max_downtime(); -} -return 0; +return i; } static int ram_save_complete(QEMUFile *f, void *opaque) @@ -712,6 +686,19 @@ static int ram_save_complete(QEMUFile *f, void *opaque) return 0; } +static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) +{ +uint64_t remaining_size; + +remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; + +if (remaining_size max_size) { +migration_bitmap_sync(); +remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; +} +return remaining_size; +} + static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) { int ret, rc = 0; @@ -897,6 +884,7 @@ SaveVMHandlers savevm_ram_handlers = { .save_live_setup = ram_save_setup, .save_live_iterate = ram_save_iterate, .save_live_complete = ram_save_complete, +.save_live_pending = ram_save_pending, .load_state = ram_load, .cancel = ram_migration_cancel, }; diff --git a/block-migration.c b/block-migration.c index 71b9601..5db01fe 100644 ---
[Qemu-devel] [PATCH 23/30] migration: print times for end phase
Signed-off-by: Juan Quintela quint...@redhat.com --- block.c | 6 ++ cpus.c | 17 + migration.c | 9 + savevm.c| 13 + 4 files changed, 45 insertions(+) diff --git a/block.c b/block.c index e95f613..9cd6ffe 100644 --- a/block.c +++ b/block.c @@ -2648,9 +2648,15 @@ int bdrv_get_flags(BlockDriverState *bs) void bdrv_flush_all(void) { BlockDriverState *bs; +int64_t start_time, end_time; + +start_time = qemu_get_clock_ms(rt_clock); QTAILQ_FOREACH(bs, bdrv_states, list) { bdrv_flush(bs); +end_time = qemu_get_clock_ms(rt_clock); +printf(time flush device %s: %ld\n, bs-filename, + end_time - start_time); } } diff --git a/cpus.c b/cpus.c index 191cbf5..63e51f1 100644 --- a/cpus.c +++ b/cpus.c @@ -435,14 +435,31 @@ int cpu_is_stopped(CPUArchState *env) static void do_vm_stop(RunState state) { +int64_t start_time, end_time; + if (runstate_is_running()) { +start_time = qemu_get_clock_ms(rt_clock); cpu_disable_ticks(); +end_time = qemu_get_clock_ms(rt_clock); +printf(time cpu_disable_ticks %ld\n, end_time - start_time); pause_all_vcpus(); +end_time = qemu_get_clock_ms(rt_clock); +printf(time pause_all_vcpus %ld\n, end_time - start_time); runstate_set(state); +end_time = qemu_get_clock_ms(rt_clock); +printf(time runstate_set %ld\n, end_time - start_time); vm_state_notify(0, state); +end_time = qemu_get_clock_ms(rt_clock); +printf(time vmstate_notify %ld\n, end_time - start_time); bdrv_drain_all(); +end_time = qemu_get_clock_ms(rt_clock); +printf(time bdrv_drain_all %ld\n, end_time - start_time); bdrv_flush_all(); +end_time = qemu_get_clock_ms(rt_clock); +printf(time bdrv_flush_all %ld\n, end_time - start_time); monitor_protocol_event(QEVENT_STOP, NULL); +end_time = qemu_get_clock_ms(rt_clock); +printf(time monitor_protocol_event %ld\n, end_time - start_time); } } diff --git a/migration.c b/migration.c index e6ff1f1..3856a99 100644 --- a/migration.c +++ b/migration.c @@ -697,19 +697,26 @@ static void *buffered_file_thread(void *opaque) DPRINTF(done iterating\n); start_time = qemu_get_clock_ms(rt_clock); qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); +end_time = qemu_get_clock_ms(rt_clock); +printf(wakeup_request %ld\n, end_time - start_time); if (old_vm_running) { vm_stop(RUN_STATE_FINISH_MIGRATE); } else { vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); } +end_time = qemu_get_clock_ms(rt_clock); +printf(vm_stop 2 %ld\n, end_time - start_time); ret = qemu_savevm_state_complete(m-file); if (ret 0) { qemu_mutex_unlock_iothread(); break; } else { +end_time = qemu_get_clock_ms(rt_clock); +printf(complete without error 3a %ld\n, end_time - start_time); migrate_fd_completed(m); } end_time = qemu_get_clock_ms(rt_clock); +printf(completed %ld\n, end_time - start_time); m-total_time = end_time - m-total_time; m-downtime = end_time - start_time; if (m-state != MIG_STATE_COMPLETED) { @@ -717,6 +724,8 @@ static void *buffered_file_thread(void *opaque) vm_start(); } } +end_time = qemu_get_clock_ms(rt_clock); +printf(end completed stage %ld\n, end_time - start_time); last_round = true; } } diff --git a/savevm.c b/savevm.c index a68c37b..3ebde5c 100644 --- a/savevm.c +++ b/savevm.c @@ -1629,9 +1629,14 @@ int qemu_savevm_state_complete(QEMUFile *f) { SaveStateEntry *se; int ret; +int64_t t1; +int64_t t0 = qemu_get_clock_ms(rt_clock); cpu_synchronize_all_states(); +t1 = qemu_get_clock_ms(rt_clock); +printf(synchronize_all_states %ld\n, t1 - t0); +t0 = t1; QTAILQ_FOREACH(se, savevm_handlers, entry) { if (!se-ops || !se-ops-save_live_complete) { continue; @@ -1652,6 +1657,11 @@ int qemu_savevm_state_complete(QEMUFile *f) return ret; } } +t1 = qemu_get_clock_ms(rt_clock); + +printf(migrate RAM %ld\n, t1 - t0); + +t0 = t1; QTAILQ_FOREACH(se, savevm_handlers, entry) { int len; @@ -1676,6 +1686,9 @@ int qemu_savevm_state_complete(QEMUFile *f) trace_savevm_section_end(se-section_id); } +t1 = qemu_get_clock_ms(rt_clock); + +printf(migrate rest devices %ld\n, t1 - t0); qemu_put_byte(f,
[Qemu-devel] [PATCH 22/30] migration: unfold rest of migrate_fd_put_ready() into thread
This will allow us finer control in next patches. Signed-off-by: Juan Quintela quint...@redhat.com --- migration.c | 95 ++--- 1 file changed, 41 insertions(+), 54 deletions(-) diff --git a/migration.c b/migration.c index 7206866..e6ff1f1 100644 --- a/migration.c +++ b/migration.c @@ -644,54 +644,6 @@ static int64_t buffered_get_rate_limit(void *opaque) return s-xfer_limit; } -static bool migrate_fd_put_ready(MigrationState *s, uint64_t max_size) -{ -int ret; -uint64_t pending_size; -bool last_round = false; - -qemu_mutex_lock_iothread(); -DPRINTF(iterate\n); -pending_size = qemu_savevm_state_pending(s-file, max_size); -DPRINTF(pending size %lu max %lu\n, pending_size, max_size); -if (pending_size = max_size) { -ret = qemu_savevm_state_iterate(s-file); -if (ret 0) { -migrate_fd_error(s); -} -} else { -int old_vm_running = runstate_is_running(); -int64_t start_time, end_time; - -DPRINTF(done iterating\n); -start_time = qemu_get_clock_ms(rt_clock); -qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); -if (old_vm_running) { -vm_stop(RUN_STATE_FINISH_MIGRATE); -} else { -vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); -} - -if (qemu_savevm_state_complete(s-file) 0) { -migrate_fd_error(s); -} else { -migrate_fd_completed(s); -} -end_time = qemu_get_clock_ms(rt_clock); -s-total_time = end_time - s-total_time; -s-downtime = end_time - start_time; -if (s-state != MIG_STATE_COMPLETED) { -if (old_vm_running) { -vm_start(); -} -} -last_round = true; -} -qemu_mutex_unlock_iothread(); - -return last_round; -} - /* 100ms xfer_limit is the limit that we should write each 100ms */ #define BUFFER_DELAY 100 @@ -716,6 +668,7 @@ static void *buffered_file_thread(void *opaque) while (true) { int64_t current_time = qemu_get_clock_ms(rt_clock); +uint64_t pending_size; qemu_mutex_lock_iothread(); if (m-state != MIG_STATE_ACTIVE) { @@ -727,6 +680,46 @@ static void *buffered_file_thread(void *opaque) qemu_mutex_unlock_iothread(); break; } +if (s-bytes_xfer s-xfer_limit) { +DPRINTF(iterate\n); +pending_size = qemu_savevm_state_pending(m-file, max_size); +DPRINTF(pending size %lu max %lu\n, pending_size, max_size); +if (pending_size = max_size) { +ret = qemu_savevm_state_iterate(m-file); +if (ret 0) { +qemu_mutex_unlock_iothread(); +break; +} +} else { +int old_vm_running = runstate_is_running(); +int64_t start_time, end_time; + +DPRINTF(done iterating\n); +start_time = qemu_get_clock_ms(rt_clock); +qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); +if (old_vm_running) { +vm_stop(RUN_STATE_FINISH_MIGRATE); +} else { +vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); +} +ret = qemu_savevm_state_complete(m-file); +if (ret 0) { +qemu_mutex_unlock_iothread(); +break; +} else { +migrate_fd_completed(m); +} +end_time = qemu_get_clock_ms(rt_clock); +m-total_time = end_time - m-total_time; +m-downtime = end_time - start_time; +if (m-state != MIG_STATE_COMPLETED) { +if (old_vm_running) { +vm_start(); +} +} +last_round = true; +} +} qemu_mutex_unlock_iothread(); if (current_time = initial_time + BUFFER_DELAY) { @@ -747,12 +740,6 @@ static void *buffered_file_thread(void *opaque) usleep((initial_time + BUFFER_DELAY - current_time)*1000); } buffered_flush(s); - -DPRINTF(file is ready\n); -if (s-bytes_xfer s-xfer_limit) { -DPRINTF(notifying client\n); -last_round = migrate_fd_put_ready(m, max_size); -} } out: -- 1.7.11.7
[Qemu-devel] [PATCH 20/30] migration: move begining stage to the migration thread
Signed-off-by: Juan Quintela quint...@redhat.com --- migration.c | 30 +- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/migration.c b/migration.c index 65f96b7..8cacbc3 100644 --- a/migration.c +++ b/migration.c @@ -647,7 +647,6 @@ static int64_t buffered_get_rate_limit(void *opaque) static bool migrate_fd_put_ready(MigrationState *s, uint64_t max_size) { int ret; -static bool first_time = true; uint64_t pending_size; bool last_round = false; @@ -657,17 +656,6 @@ static bool migrate_fd_put_ready(MigrationState *s, uint64_t max_size) qemu_mutex_unlock_iothread(); return false; } -if (first_time) { -first_time = false; -DPRINTF(beginning savevm\n); -ret = qemu_savevm_state_begin(s-file, s-params); -if (ret 0) { -DPRINTF(failed, %d\n, ret); -migrate_fd_error(s); -qemu_mutex_unlock_iothread(); -return false; -} -} DPRINTF(iterate\n); pending_size = qemu_savevm_state_pending(s-file, max_size); @@ -716,9 +704,21 @@ static bool migrate_fd_put_ready(MigrationState *s, uint64_t max_size) static void *buffered_file_thread(void *opaque) { QEMUFileBuffered *s = opaque; +MigrationState *m = s-migration_state; int64_t initial_time = qemu_get_clock_ms(rt_clock); int64_t max_size = 0; bool last_round = false; +int ret; + +qemu_mutex_lock_iothread(); +DPRINTF(beginning savevm\n); +ret = qemu_savevm_state_begin(m-file, m-params); +if (ret 0) { +DPRINTF(failed, %d\n, ret); +qemu_mutex_unlock_iothread(); +goto out; +} +qemu_mutex_unlock_iothread(); while (true) { int64_t current_time = qemu_get_clock_ms(rt_clock); @@ -748,10 +748,14 @@ static void *buffered_file_thread(void *opaque) DPRINTF(file is ready\n); if (s-bytes_xfer s-xfer_limit) { DPRINTF(notifying client\n); -last_round = migrate_fd_put_ready(s-migration_state, max_size); +last_round = migrate_fd_put_ready(m, max_size); } } +out: +if (ret 0) { +migrate_fd_error(m); +} g_free(s-buffer); g_free(s); return NULL; -- 1.7.11.7
[Qemu-devel] [PATCH 29/30] migration: Only go to the iterate stage if there is anything to send
Signed-off-by: Orit Wasserman owass...@redhat.com Signed-off-by: Juan Quintela quint...@redhat.com --- migration.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migration.c b/migration.c index 3856a99..554d79a 100644 --- a/migration.c +++ b/migration.c @@ -684,7 +684,7 @@ static void *buffered_file_thread(void *opaque) DPRINTF(iterate\n); pending_size = qemu_savevm_state_pending(m-file, max_size); DPRINTF(pending size %lu max %lu\n, pending_size, max_size); -if (pending_size = max_size) { +if (pending_size pending_size = max_size) { ret = qemu_savevm_state_iterate(m-file); if (ret 0) { qemu_mutex_unlock_iothread(); -- 1.7.11.7
Re: [Qemu-devel] [PATCHv2 1/2] signal: added a wrapper for sigprocmask function
On 17 October 2012 23:06, Alex Barcelo abarc...@ac.upc.edu wrote: On Wed, Oct 17, 2012 at 5:01 PM, Peter Maydell peter.mayd...@linaro.org wrote: In my comments on v1 of this patch I wrote: I think all the uses of sigprocmask() in linux-user/signal.c also need to be do_sigprocmask(), as they are the guest trying to control its signal mask (via the mask it specifies for running signal handlers, or the mask it passes back when restoring context on return from a signal handler). I saw sigprocmask being used only inside sigreturn functions (hope I checked it correctly). I thought (maybe wrongly) that sigreturn should not be wrapped, just as internal sigprocmask calls are not proxyfied through do_sigprocmask. Sigreturn functions should not be called from guest directly, so they should not be a threat. And if some application uses it... well, then it is its fault, as POSIX does not guarantee any behavior (am I right?) sigreturn functions operate on a structure passed in from the guest. When the kernel enters a signal handler it stores the state of the interrupted process on that process' stack before setting it up to run the signal handler function. Return from the signal handler involves the kernel (or in this case QEMU) restoring all that state. Part of that state is the signal mask. Since the guest might have changed that state, we must not trust it and so it goes through the wrapper. I can't tell if it is worth doing it, or if there is any real case where qemu-user behavior is improving. The point is consistency of design. Masks from the guest go through the wrapper; masks used internally do not. PS: if you disagree with a point in code review (and reviewers are not always right!) it is better to send an email making the case for why you disagree. If you just ignore it and send v2 patches then you're forcing reviewers to hunt through your patch all over again to check whether you paid attention the first time round... -- PMM
[Qemu-devel] [PATCH 17/30] migration: move migration_fd_put_ready()
Put it near its use and un-export it. Signed-off-by: Juan Quintela quint...@redhat.com --- migration.c | 132 ++-- migration.h | 1 - 2 files changed, 66 insertions(+), 67 deletions(-) diff --git a/migration.c b/migration.c index b1567f3..a99653e 100644 --- a/migration.c +++ b/migration.c @@ -300,72 +300,6 @@ ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data, return ret; } -bool migrate_fd_put_ready(MigrationState *s, uint64_t max_size) -{ -int ret; -static bool first_time = true; -uint64_t pending_size; -bool last_round = false; - -qemu_mutex_lock_iothread(); -if (s-state != MIG_STATE_ACTIVE) { -DPRINTF(put_ready returning because of non-active state\n); -qemu_mutex_unlock_iothread(); -return false; -} -if (first_time) { -first_time = false; -DPRINTF(beginning savevm\n); -ret = qemu_savevm_state_begin(s-file, s-params); -if (ret 0) { -DPRINTF(failed, %d\n, ret); -migrate_fd_error(s); -qemu_mutex_unlock_iothread(); -return false; -} -} - -DPRINTF(iterate\n); -pending_size = qemu_savevm_state_pending(s-file, max_size); -DPRINTF(pending size %lu max %lu\n, pending_size, max_size); -if (pending_size = max_size) { -ret = qemu_savevm_state_iterate(s-file); -if (ret 0) { -migrate_fd_error(s); -} -} else { -int old_vm_running = runstate_is_running(); -int64_t start_time, end_time; - -DPRINTF(done iterating\n); -start_time = qemu_get_clock_ms(rt_clock); -qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); -if (old_vm_running) { -vm_stop(RUN_STATE_FINISH_MIGRATE); -} else { -vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); -} - -if (qemu_savevm_state_complete(s-file) 0) { -migrate_fd_error(s); -} else { -migrate_fd_completed(s); -} -end_time = qemu_get_clock_ms(rt_clock); -s-total_time = end_time - s-total_time; -s-downtime = end_time - start_time; -if (s-state != MIG_STATE_COMPLETED) { -if (old_vm_running) { -vm_start(); -} -} -last_round = true; -} -qemu_mutex_unlock_iothread(); - -return last_round; -} - static void migrate_fd_cancel(MigrationState *s) { if (s-state != MIG_STATE_ACTIVE) @@ -718,6 +652,72 @@ static int64_t buffered_get_rate_limit(void *opaque) return s-xfer_limit; } +static bool migrate_fd_put_ready(MigrationState *s, uint64_t max_size) +{ +int ret; +static bool first_time = true; +uint64_t pending_size; +bool last_round = false; + +qemu_mutex_lock_iothread(); +if (s-state != MIG_STATE_ACTIVE) { +DPRINTF(put_ready returning because of non-active state\n); +qemu_mutex_unlock_iothread(); +return false; +} +if (first_time) { +first_time = false; +DPRINTF(beginning savevm\n); +ret = qemu_savevm_state_begin(s-file, s-params); +if (ret 0) { +DPRINTF(failed, %d\n, ret); +migrate_fd_error(s); +qemu_mutex_unlock_iothread(); +return false; +} +} + +DPRINTF(iterate\n); +pending_size = qemu_savevm_state_pending(s-file, max_size); +DPRINTF(pending size %lu max %lu\n, pending_size, max_size); +if (pending_size = max_size) { +ret = qemu_savevm_state_iterate(s-file); +if (ret 0) { +migrate_fd_error(s); +} +} else { +int old_vm_running = runstate_is_running(); +int64_t start_time, end_time; + +DPRINTF(done iterating\n); +start_time = qemu_get_clock_ms(rt_clock); +qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); +if (old_vm_running) { +vm_stop(RUN_STATE_FINISH_MIGRATE); +} else { +vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); +} + +if (qemu_savevm_state_complete(s-file) 0) { +migrate_fd_error(s); +} else { +migrate_fd_completed(s); +} +end_time = qemu_get_clock_ms(rt_clock); +s-total_time = end_time - s-total_time; +s-downtime = end_time - start_time; +if (s-state != MIG_STATE_COMPLETED) { +if (old_vm_running) { +vm_start(); +} +} +last_round = true; +} +qemu_mutex_unlock_iothread(); + +return last_round; +} + /* 100ms xfer_limit is the limit that we should write each 100ms */ #define BUFFER_DELAY 100 diff --git a/migration.h b/migration.h index 40c8e9c..f4c4d1e 100644 --- a/migration.h +++ b/migration.h @@ -81,7 +81,6 @@ void migrate_fd_connect(MigrationState *s); ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data,
[Qemu-devel] [PATCH 00/30] Migration thread 20121017 edition
Hi This series apply on top of the refactoring that I sent yesterday. Changes from the last version include: - buffered_file.c is gone, its functionality is merged in migration.c special attention to the megre of buffered_file_thread() migration_file_put_notify(). - Some more bitmap handling optimizations (thanks to Orit Paolo for suggestions and code and Vinod for testing) Please review. Included is the pointer to the full tree. Thanks, Juan. The following changes since commit b6348f29d033d5a8a26f633d2ee94362595f32a4: target-arm/translate: Fix RRX operands (2012-10-17 19:56:46 +0200) are available in the git repository at: http://repo.or.cz/r/qemu/quintela.git migration-thread-20121017 for you to fetch changes up to 486dabc29f56d8f0e692395d4a6cd483b3a77f01: ram: optimize migration bitmap walking (2012-10-18 09:20:34 +0200) v3: This is work in progress on top of the previous migration series just sent. - Introduces a thread for migration instead of using a timer and callback - remove the writting to the fd from the iothread lock - make the writes synchronous - Introduce a new pending method that returns how many bytes are pending for one save live section - last patch just shows printfs to see where the time is being spent on the migration complete phase. (yes it pollutes all uses of stop on the monitor) So far I have found that we spent a lot of time on bdrv_flush_all() It can take from 1ms to 600ms (yes, it is not a typo). That dwarfs the migration default downtime time (30ms). Stop all vcpus: - it works now (after the changes on qemu_cpu_is_vcpu on the previous series) caveat is that the time that brdv_flush_all() takes is unpredictable. Any silver bullets? Paolo suggested to call for migration completion phase: bdrv_aio_flush_all(); Sent the dirty pages; bdrv_drain_all() brdv_flush_all() another round through the bitmap in case that completions have changed some page Paolo, did I get it right? Any other suggestion? - migrate_cancel() is not properly implemented (as in the film that we take no locks, ...) - expected_downtime is not calculated. I am about to merge migrate_fd_put_ready buffered_thread() and that would make trivial to calculate. It outputs something like: wakeup_request 0 time cpu_disable_ticks 0 time pause_all_vcpus 1 time runstate_set 1 time vmstate_notify 2 time bdrv_drain_all 2 time flush device /dev/disk/by-path/ip-192.168.10.200:3260-iscsi-iqn.2010-12.org.trasno:iscsi.lvm-lun-1: 3 time flush device : 3 time flush device : 3 time flush device : 3 time bdrv_flush_all 5 time monitor_protocol_event 5 vm_stop 2 5 synchronize_all_states 1 migrate RAM 37 migrate rest devices 1 complete without error 3a 44 completed 45 end completed stage 45 As you can see, we estimate that we can sent all pending data in 30ms, it took 37ms to send the RAM (that is what we calculate). So estimation is quite good. What it gives me lots of variation is on the line with device name of time flush device. That is what varies between 1ms to 600ms This is in a completely idle guest. I am running: while (1) { uint64_t delay; if (gettimeofday(t0, NULL) != 0) perror(gettimeofday 1); if (usleep(ms2us(10)) != 0) perror(usleep); if (gettimeofday(t1, NULL) != 0) perror(gettimeofday 2); t1.tv_usec -= t0.tv_usec; if (t1.tv_usec 0) { t1.tv_usec += 100; t1.tv_sec--; } t1.tv_sec -= t0.tv_sec; delay = t1.tv_sec * 1000 + t1.tv_usec/1000; if (delay 100) printf(delay of %ld ms\n, delay); } To see the latency inside the guest (i.e. ask for a 10ms sleep, and see how long it takes). [root@d1 ~]# ./timer delay of 161 ms delay of 135 ms delay of 143 ms delay of 132 ms delay of 131 ms delay of 141 ms delay of 113 ms delay of 119 ms delay of 114 ms But that values are independent of migration. Without even starting the migration, idle guest doing nothing, we get it sometimes. Juan Quintela (27): buffered_file: Move from using a timer to use a thread migration: make qemu_fopen_ops_buffered() return void migration: stop all cpus correctly migration: make writes blocking migration: remove unfreeze logic migration: take finer locking buffered_file: Unfold the trick to restart generating migration data buffered_file: don't flush on put buffer buffered_file: unfold buffered_append in buffered_put_buffer savevm: New save live migration method: pending migration: include qemu-file.h migration-fd: remove duplicate include migration: move buffered_file.c code into migration.c migration: move migration_fd_put_ready() migration: Inline qemu_fopen_ops_buffered into migrate_fd_connect migration: move
[Qemu-devel] [PATCH 19/30] migration: move migration notifier
At this point, it is waranteed that state is ACTIVE. Old position didn't assured hat. Signed-off-by: Juan Quintela quint...@redhat.com --- migration.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/migration.c b/migration.c index 1f783e1..65f96b7 100644 --- a/migration.c +++ b/migration.c @@ -432,8 +432,6 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk, } return; } - -notifier_list_notify(migration_state_notifiers, s); } void qmp_migrate_cancel(Error **errp) @@ -779,4 +777,5 @@ void migrate_fd_connect(MigrationState *migration_state) qemu_thread_create(s-thread, buffered_file_thread, s, QEMU_THREAD_DETACHED); +notifier_list_notify(migration_state_notifiers, s); } -- 1.7.11.7
[Qemu-devel] [PATCH 27/30] ram: Use memory_region_test_and_clear_dirty
This avoids having to do two walks over the dirty bitmap, once reading the dirty bits, and anthoer cleaning them. Signed-off-by: Juan Quintela quint...@redhat.com --- arch_init.c | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch_init.c b/arch_init.c index 6f39ebd..8391375 100644 --- a/arch_init.c +++ b/arch_init.c @@ -390,13 +390,12 @@ static void migration_bitmap_sync(void) QLIST_FOREACH(block, ram_list.blocks, next) { for (addr = 0; addr block-length; addr += TARGET_PAGE_SIZE) { -if (memory_region_get_dirty(block-mr, addr, TARGET_PAGE_SIZE, -DIRTY_MEMORY_MIGRATION)) { +if (memory_region_test_and_clear_dirty(block-mr, + addr, TARGET_PAGE_SIZE, + DIRTY_MEMORY_MIGRATION)) { migration_bitmap_set_dirty(block-mr, addr); } } -memory_region_reset_dirty(block-mr, 0, block-length, - DIRTY_MEMORY_MIGRATION); } trace_migration_bitmap_sync_end(migration_dirty_pages - num_dirty_pages_init); -- 1.7.11.7
[Qemu-devel] [PATCH 25/30] ram: Add last_sent_block
This is the last block from where we have sent data. Signed-off-by: Orit Wasserman owass...@redhat.com Signed-off-by: Juan Quintela quint...@redhat.com --- arch_init.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch_init.c b/arch_init.c index 8ac4c94..6f39ebd 100644 --- a/arch_init.c +++ b/arch_init.c @@ -336,6 +336,8 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, /* This is the last block that we have visited serching for dirty pages */ static RAMBlock *last_seen_block; +/* This is the last block from where we have sent data */ +static RAMBlock *last_sent_block; static ram_addr_t last_offset; static unsigned long *migration_bitmap; static uint64_t migration_dirty_pages; @@ -433,7 +435,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage) mr = block-mr; if (migration_bitmap_test_and_reset_dirty(mr, offset)) { uint8_t *p; -int cont = (block == last_seen_block) ? +int cont = (block == last_sent_block) ? RAM_SAVE_FLAG_CONTINUE : 0; p = memory_region_get_ram_ptr(mr) + offset; @@ -462,6 +464,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage) /* if page is unmodified, continue to the next */ if (bytes_sent != 0) { +last_sent_block = block; break; } } @@ -560,6 +563,7 @@ static void ram_migration_cancel(void *opaque) static void reset_ram_globals(void) { last_seen_block = NULL; +last_sent_block = NULL; last_offset = 0; last_version = ram_list.version; sort_ram_list(); -- 1.7.11.7
Re: [Qemu-devel] [PATCH 2/2] Add check_rxov into VMState.
Oops, you are right :) On Thu, Oct 18, 2012 at 9:24 AM, Stefan Hajnoczi stefa...@gmail.com wrote: On Wed, Oct 17, 2012 at 08:31:47PM +0200, Dmitry Fleytman wrote: @@ -1100,6 +1100,11 @@ static bool is_version_1(void *opaque, int version_id) return version_id == 1; } +static bool is_version_3(void *opaque, int version_id) +{ +return version_id == 1; +} version_id == 3? -- Dmitry Fleytman Technology Expert and Consultant, Daynix Computing Ltd. Cell: +972-54-2819481 Skype: dmitry.fleytman
[Qemu-devel] [PATCH 18/30] migration: Inline qemu_fopen_ops_buffered into migrate_fd_connect
Signed-off-by: Juan Quintela quint...@redhat.com --- migration.c | 9 ++--- migration.h | 2 -- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/migration.c b/migration.c index a99653e..1f783e1 100644 --- a/migration.c +++ b/migration.c @@ -345,12 +345,6 @@ bool migration_has_failed(MigrationState *s) s-state == MIG_STATE_ERROR); } -void migrate_fd_connect(MigrationState *s) -{ -s-state = MIG_STATE_ACTIVE; -qemu_fopen_ops_buffered(s); -} - static MigrationState *migrate_init(const MigrationParams *params) { MigrationState *s = migrate_get_current(); @@ -765,10 +759,11 @@ static void *buffered_file_thread(void *opaque) return NULL; } -void qemu_fopen_ops_buffered(MigrationState *migration_state) +void migrate_fd_connect(MigrationState *migration_state) { QEMUFileBuffered *s; +migration_state-state = MIG_STATE_ACTIVE; s = g_malloc0(sizeof(*s)); s-migration_state = migration_state; diff --git a/migration.h b/migration.h index f4c4d1e..60a2335 100644 --- a/migration.h +++ b/migration.h @@ -127,6 +127,4 @@ int migrate_use_xbzrle(void); int64_t migrate_xbzrle_cache_size(void); int64_t xbzrle_cache_resize(int64_t new_size); - -void qemu_fopen_ops_buffered(MigrationState *migration_state); #endif -- 1.7.11.7
[Qemu-devel] [PATCH 01/30] split MRU ram list
From: Paolo Bonzini pbonz...@redhat.com Outside the execution threads the normal, non-MRU-ized order of the RAM blocks should always be enough. So manage two separate lists, which will have separate locking rules. Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Juan Quintela quint...@redhat.com --- arch_init.c | 1 + cpu-all.h | 4 +++- exec.c | 18 +- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/arch_init.c b/arch_init.c index e6effe8..4293557 100644 --- a/arch_init.c +++ b/arch_init.c @@ -48,6 +48,7 @@ #include qemu/page_cache.h #include qmp-commands.h #include trace.h +#include cpu-all.h #ifdef DEBUG_ARCH_INIT #define DPRINTF(fmt, ...) \ diff --git a/cpu-all.h b/cpu-all.h index 6aa7e58..6558a6f 100644 --- a/cpu-all.h +++ b/cpu-all.h @@ -490,8 +490,9 @@ typedef struct RAMBlock { ram_addr_t offset; ram_addr_t length; uint32_t flags; -char idstr[256]; +QLIST_ENTRY(RAMBlock) next_mru; QLIST_ENTRY(RAMBlock) next; +char idstr[256]; #if defined(__linux__) !defined(TARGET_S390X) int fd; #endif @@ -499,6 +500,7 @@ typedef struct RAMBlock { typedef struct RAMList { uint8_t *phys_dirty; +QLIST_HEAD(, RAMBlock) blocks_mru; QLIST_HEAD(, RAMBlock) blocks; } RAMList; extern RAMList ram_list; diff --git a/exec.c b/exec.c index e63ad0d..718bbc2 100644 --- a/exec.c +++ b/exec.c @@ -56,6 +56,7 @@ #include xen-mapcache.h #include trace.h #endif +#include cpu-all.h #include cputlb.h @@ -112,7 +113,10 @@ static uint8_t *code_gen_ptr; int phys_ram_fd; static int in_migration; -RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) }; +RAMList ram_list = { +.blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks), +.blocks_mru = QLIST_HEAD_INITIALIZER(ram_list.blocks_mru) +}; static MemoryRegion *system_memory; static MemoryRegion *system_io; @@ -633,6 +637,7 @@ bool tcg_enabled(void) void cpu_exec_init_all(void) { #if !defined(CONFIG_USER_ONLY) +qemu_mutex_init(ram_list.mutex); memory_map_init(); io_mem_init(); #endif @@ -2568,6 +2573,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, new_block-length = size; QLIST_INSERT_HEAD(ram_list.blocks, new_block, next); +QLIST_INSERT_HEAD(ram_list.blocks_mru, new_block, next_mru); ram_list.phys_dirty = g_realloc(ram_list.phys_dirty, last_ram_offset() TARGET_PAGE_BITS); @@ -2595,6 +2601,7 @@ void qemu_ram_free_from_ptr(ram_addr_t addr) QLIST_FOREACH(block, ram_list.blocks, next) { if (addr == block-offset) { QLIST_REMOVE(block, next); +QLIST_REMOVE(block, next_mru); g_free(block); return; } @@ -2608,6 +2615,7 @@ void qemu_ram_free(ram_addr_t addr) QLIST_FOREACH(block, ram_list.blocks, next) { if (addr == block-offset) { QLIST_REMOVE(block, next); +QLIST_REMOVE(block, next_mru); if (block-flags RAM_PREALLOC_MASK) { ; } else if (mem_path) { @@ -2713,12 +2721,12 @@ void *qemu_get_ram_ptr(ram_addr_t addr) { RAMBlock *block; -QLIST_FOREACH(block, ram_list.blocks, next) { +QLIST_FOREACH(block, ram_list.blocks_mru, next_mru) { if (addr - block-offset block-length) { /* Move this entry to to start of the list. */ if (block != QLIST_FIRST(ram_list.blocks)) { -QLIST_REMOVE(block, next); -QLIST_INSERT_HEAD(ram_list.blocks, block, next); +QLIST_REMOVE(block, next_mru); +QLIST_INSERT_HEAD(ram_list.blocks_mru, block, next_mru); } if (xen_enabled()) { /* We need to check if the requested address is in the RAM @@ -2813,7 +2821,7 @@ int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr) return 0; } -QLIST_FOREACH(block, ram_list.blocks, next) { +QLIST_FOREACH(block, ram_list.blocks_mru, next_mru) { /* This case append when the block is not mapped. */ if (block-host == NULL) { continue; -- 1.7.11.7
Re: [Qemu-devel] [PATCH 1/6] qdev: rework device properties.
Il 17/10/2012 23:50, Eduardo Habkost ha scritto: I suppose that's the usual assumption when the file doesn't have an explicit license, as it's the license specified on the LICENSE file. The only problem is that the LICENSE file doesn't specify the GPL version, so it's a bit complicated. Some opinions can be found here: http://article.gmane.org/gmane.comp.emulators.qemu/122665. Unless every single author replies and accepts the adoption of another license (which I find very unlikely), I plan to submit a patch adding a GPLv2+ license header. GPLv2+ should be the default license for files without a heading, except if the history of the file shows that the code came originally from the Linux kernel or another GPLv2-only project/file. Paolo
[Qemu-devel] [PATCH 00/30] Migration thread 20121017 edition
Hi This series apply on top of the refactoring that I sent yesterday. Changes from the last version include: - buffered_file.c is gone, its functionality is merged in migration.c special attention to the megre of buffered_file_thread() migration_file_put_notify(). - Some more bitmap handling optimizations (thanks to Orit Paolo for suggestions and code and Vinod for testing) Please review. Included is the pointer to the full tree. Thanks, Juan. The following changes since commit b6348f29d033d5a8a26f633d2ee94362595f32a4: target-arm/translate: Fix RRX operands (2012-10-17 19:56:46 +0200) are available in the git repository at: http://repo.or.cz/r/qemu/quintela.git migration-thread-20121017 for you to fetch changes up to 486dabc29f56d8f0e692395d4a6cd483b3a77f01: ram: optimize migration bitmap walking (2012-10-18 09:20:34 +0200) v3: This is work in progress on top of the previous migration series just sent. - Introduces a thread for migration instead of using a timer and callback - remove the writting to the fd from the iothread lock - make the writes synchronous - Introduce a new pending method that returns how many bytes are pending for one save live section - last patch just shows printfs to see where the time is being spent on the migration complete phase. (yes it pollutes all uses of stop on the monitor) So far I have found that we spent a lot of time on bdrv_flush_all() It can take from 1ms to 600ms (yes, it is not a typo). That dwarfs the migration default downtime time (30ms). Stop all vcpus: - it works now (after the changes on qemu_cpu_is_vcpu on the previous series) caveat is that the time that brdv_flush_all() takes is unpredictable. Any silver bullets? Paolo suggested to call for migration completion phase: bdrv_aio_flush_all(); Sent the dirty pages; bdrv_drain_all() brdv_flush_all() another round through the bitmap in case that completions have changed some page Paolo, did I get it right? Any other suggestion? - migrate_cancel() is not properly implemented (as in the film that we take no locks, ...) - expected_downtime is not calculated. I am about to merge migrate_fd_put_ready buffered_thread() and that would make trivial to calculate. It outputs something like: wakeup_request 0 time cpu_disable_ticks 0 time pause_all_vcpus 1 time runstate_set 1 time vmstate_notify 2 time bdrv_drain_all 2 time flush device /dev/disk/by-path/ip-192.168.10.200:3260-iscsi-iqn.2010-12.org.trasno:iscsi.lvm-lun-1: 3 time flush device : 3 time flush device : 3 time flush device : 3 time bdrv_flush_all 5 time monitor_protocol_event 5 vm_stop 2 5 synchronize_all_states 1 migrate RAM 37 migrate rest devices 1 complete without error 3a 44 completed 45 end completed stage 45 As you can see, we estimate that we can sent all pending data in 30ms, it took 37ms to send the RAM (that is what we calculate). So estimation is quite good. What it gives me lots of variation is on the line with device name of time flush device. That is what varies between 1ms to 600ms This is in a completely idle guest. I am running: while (1) { uint64_t delay; if (gettimeofday(t0, NULL) != 0) perror(gettimeofday 1); if (usleep(ms2us(10)) != 0) perror(usleep); if (gettimeofday(t1, NULL) != 0) perror(gettimeofday 2); t1.tv_usec -= t0.tv_usec; if (t1.tv_usec 0) { t1.tv_usec += 100; t1.tv_sec--; } t1.tv_sec -= t0.tv_sec; delay = t1.tv_sec * 1000 + t1.tv_usec/1000; if (delay 100) printf(delay of %ld ms\n, delay); } To see the latency inside the guest (i.e. ask for a 10ms sleep, and see how long it takes). [root@d1 ~]# ./timer delay of 161 ms delay of 135 ms delay of 143 ms delay of 132 ms delay of 131 ms delay of 141 ms delay of 113 ms delay of 119 ms delay of 114 ms But that values are independent of migration. Without even starting the migration, idle guest doing nothing, we get it sometimes. Juan Quintela (27): buffered_file: Move from using a timer to use a thread migration: make qemu_fopen_ops_buffered() return void migration: stop all cpus correctly migration: make writes blocking migration: remove unfreeze logic migration: take finer locking buffered_file: Unfold the trick to restart generating migration data buffered_file: don't flush on put buffer buffered_file: unfold buffered_append in buffered_put_buffer savevm: New save live migration method: pending migration: include qemu-file.h migration-fd: remove duplicate include migration: move buffered_file.c code into migration.c migration: move migration_fd_put_ready() migration: Inline qemu_fopen_ops_buffered into migrate_fd_connect migration: move
Re: [Qemu-devel] [PATCH 1/2] Ignore RX tail kicks when RX disabled.
On Wed, Oct 17, 2012 at 08:31:46PM +0200, Dmitry Fleytman wrote: Device RX initization from driver's side consists of following steps: 1. Initialize head and tail of RX ring to 0 2. Enable Rx (set bit in RCTL register) 3. Allocate buffers, fill descriptors 4. Write ring tail Forth operation signals hardware that RX buffers available and it may start packets indication. Current implementation treats first operation (write 0 to ring tail) as signal of buffers availability and starts data transfers as soon as RX enable indicaton arrives. This is not correct because there is a chance that ring is still empty (third action not performed yet) and then memory corruption occures. Any idea what the point of hw/e1000.c check_rxov is? I see nothing in the datasheet that requires these semantics. The Linux e1000 driver never enables the RXO (rx fifo overflow) interrupt, only RXDMT0 (receive descriptor minimum threshold). This means hw/e1000.c will not upset the Linux e1000 driver when e1000_receive() gets called with check_rxov == 1 and RDH == RDT == 0. BTW the Linux e1000 driver does not follow the sequence recommended in the datasheet 14.4 Receive Initialization, which would avoid the weird window of time where RDH == RDT == 0. If we get rid of check_rxov and always check rxbuf space then we have the correct behavior. I'm a little nervous of simply dropping it because its purpose is unclear to me :(. Stefan
[Qemu-devel] [PATCH 10/30] buffered_file: Unfold the trick to restart generating migration data
This was needed before due to the way that the callbacks worked. Signed-off-by: Juan Quintela quint...@redhat.com --- buffered_file.c | 17 - 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/buffered_file.c b/buffered_file.c index a3aba2a..7692950 100644 --- a/buffered_file.c +++ b/buffered_file.c @@ -113,14 +113,6 @@ static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, in return error; } -if (pos == 0 size == 0) { -DPRINTF(file is ready\n); -if (s-bytes_xfer s-xfer_limit) { -DPRINTF(notifying client\n); -migrate_fd_put_ready(s-migration_state); -} -} - return size; } @@ -216,8 +208,15 @@ static void *buffered_file_thread(void *opaque) /* usleep expects microseconds */ usleep((expire_time - current_time)*1000); } -buffered_put_buffer(s, NULL, 0, 0); +buffered_flush(s); + +DPRINTF(file is ready\n); +if (s-bytes_xfer s-xfer_limit) { +DPRINTF(notifying client\n); +migrate_fd_put_ready(s-migration_state); +} } + g_free(s-buffer); g_free(s); return NULL; -- 1.7.11.7
[Qemu-devel] [PATCH 09/30] migration: take finer locking
Instead of locking the whole migration_thread inside loop, just lock migration_fd_put_notify, that is what interacts with the rest of the world. Signed-off-by: Juan Quintela quint...@redhat.com --- buffered_file.c | 2 -- migration.c | 5 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/buffered_file.c b/buffered_file.c index 876cc8c..a3aba2a 100644 --- a/buffered_file.c +++ b/buffered_file.c @@ -216,9 +216,7 @@ static void *buffered_file_thread(void *opaque) /* usleep expects microseconds */ usleep((expire_time - current_time)*1000); } -qemu_mutex_lock_iothread(); buffered_put_buffer(s, NULL, 0, 0); -qemu_mutex_unlock_iothread(); } g_free(s-buffer); g_free(s); diff --git a/migration.c b/migration.c index 29ee710..82c2663 100644 --- a/migration.c +++ b/migration.c @@ -305,8 +305,10 @@ void migrate_fd_put_ready(MigrationState *s) int ret; static bool first_time = true; +qemu_mutex_lock_iothread(); if (s-state != MIG_STATE_ACTIVE) { DPRINTF(put_ready returning because of non-active state\n); +qemu_mutex_unlock_iothread(); return; } if (first_time) { @@ -316,6 +318,7 @@ void migrate_fd_put_ready(MigrationState *s) if (ret 0) { DPRINTF(failed, %d\n, ret); migrate_fd_error(s); +qemu_mutex_unlock_iothread(); return; } } @@ -351,6 +354,8 @@ void migrate_fd_put_ready(MigrationState *s) } } } +qemu_mutex_unlock_iothread(); + } static void migrate_fd_cancel(MigrationState *s) -- 1.7.11.7
[Qemu-devel] [PATCH 15/30] migration-fd: remove duplicate include
Signed-off-by: Juan Quintela quint...@redhat.com --- migration-fd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/migration-fd.c b/migration-fd.c index 25f0245..8f6b55f 100644 --- a/migration-fd.c +++ b/migration-fd.c @@ -20,7 +20,6 @@ #include qemu-char.h #include qemu-file.h #include block.h -#include qemu_socket.h //#define DEBUG_MIGRATION_FD -- 1.7.11.7
[Qemu-devel] [PATCH 16/30] migration: move buffered_file.c code into migration.c
This only moves the code (also from buffered_file.h to migration.h). Fix whitespace until checkpatch is happy. Signed-off-by: Juan Quintela quint...@redhat.com --- Makefile.objs | 2 +- buffered_file.c | 244 buffered_file.h | 22 - migration.c | 218 +- migration.h | 1 + 5 files changed, 219 insertions(+), 268 deletions(-) delete mode 100644 buffered_file.c delete mode 100644 buffered_file.h diff --git a/Makefile.objs b/Makefile.objs index 74b3542..3de8f27 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -73,7 +73,7 @@ extra-obj-$(CONFIG_LINUX) += fsdev/ common-obj-y += tcg-runtime.o host-utils.o main-loop.o common-obj-y += input.o -common-obj-y += buffered_file.o migration.o migration-tcp.o +common-obj-y += migration.o migration-tcp.o common-obj-y += qemu-char.o #aio.o common-obj-y += block-migration.o iohandler.o common-obj-y += pflib.o diff --git a/buffered_file.c b/buffered_file.c deleted file mode 100644 index c21f847..000 --- a/buffered_file.c +++ /dev/null @@ -1,244 +0,0 @@ -/* - * QEMU buffered QEMUFile - * - * Copyright IBM, Corp. 2008 - * - * Authors: - * Anthony Liguori aligu...@us.ibm.com - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * - * Contributions after 2012-01-13 are licensed under the terms of the - * GNU GPL, version 2 or (at your option) any later version. - */ - -#include qemu-common.h -#include hw/hw.h -#include qemu-timer.h -#include qemu-char.h -#include buffered_file.h -#include qemu-thread.h - -//#define DEBUG_BUFFERED_FILE - -typedef struct QEMUFileBuffered -{ -MigrationState *migration_state; -QEMUFile *file; -size_t bytes_xfer; -size_t xfer_limit; -uint8_t *buffer; -size_t buffer_size; -size_t buffer_capacity; -QemuThread thread; -} QEMUFileBuffered; - -#ifdef DEBUG_BUFFERED_FILE -#define DPRINTF(fmt, ...) \ -do { printf(buffered-file: fmt, ## __VA_ARGS__); } while (0) -#else -#define DPRINTF(fmt, ...) \ -do { } while (0) -#endif - -static ssize_t buffered_flush(QEMUFileBuffered *s) -{ -size_t offset = 0; -ssize_t ret = 0; - -DPRINTF(flushing %zu byte(s) of data\n, s-buffer_size); - -while (s-bytes_xfer s-xfer_limit offset s-buffer_size) { - -ret = migrate_fd_put_buffer(s-migration_state, s-buffer + offset, -s-buffer_size - offset); -if (ret = 0) { -DPRINTF(error flushing data, %zd\n, ret); -break; -} else { -DPRINTF(flushed %zd byte(s)\n, ret); -offset += ret; -s-bytes_xfer += ret; -} -} - -DPRINTF(flushed %zu of %zu byte(s)\n, offset, s-buffer_size); -memmove(s-buffer, s-buffer + offset, s-buffer_size - offset); -s-buffer_size -= offset; - -if (ret 0) { -return ret; -} -return offset; -} - -static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size) -{ -QEMUFileBuffered *s = opaque; -ssize_t error; - -DPRINTF(putting %d bytes at % PRId64 \n, size, pos); - -error = qemu_file_get_error(s-file); -if (error) { -DPRINTF(flush when error, bailing: %s\n, strerror(-error)); -return error; -} - -if (size = 0) { -return size; -} - -if (size (s-buffer_capacity - s-buffer_size)) { -DPRINTF(increasing buffer capacity from %zu by %zu\n, -s-buffer_capacity, size + 1024); - -s-buffer_capacity += size + 1024; - -s-buffer = g_realloc(s-buffer, s-buffer_capacity); -} - -memcpy(s-buffer + s-buffer_size, buf, size); -s-buffer_size += size; - -return size; -} - -static int buffered_close(void *opaque) -{ -QEMUFileBuffered *s = opaque; -ssize_t ret = 0; -int ret2; - -DPRINTF(closing\n); - -s-xfer_limit = INT_MAX; -while (!qemu_file_get_error(s-file) s-buffer_size) { -ret = buffered_flush(s); -if (ret 0) { -break; -} -} - -ret2 = migrate_fd_close(s-migration_state); -if (ret = 0) { -ret = ret2; -} -ret = migrate_fd_close(s-migration_state); -s-migration_state-complete = true; -return ret; -} - -/* - * The meaning of the return values is: - * 0: We can continue sending - * 1: Time to stop - * negative: There has been an error - */ -static int buffered_rate_limit(void *opaque) -{ -QEMUFileBuffered *s = opaque; -int ret; - -ret = qemu_file_get_error(s-file); -if (ret) { -return ret; -} - -if (s-bytes_xfer s-xfer_limit) -return 1; - -return 0; -} - -static int64_t buffered_set_rate_limit(void *opaque, int64_t new_rate) -{ -QEMUFileBuffered *s = opaque; -if (qemu_file_get_error(s-file)) { -goto out; -} -if (new_rate SIZE_MAX) { -
[Qemu-devel] [PATCH 28/30] fix memory.c
Signed-off-by: Juan Quintela quint...@redhat.com --- memory.c | 4 +++- memory.h | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/memory.c b/memory.c index 126fb63..4d0fa96 100644 --- a/memory.c +++ b/memory.c @@ -1089,7 +1089,9 @@ bool memory_region_test_and_clear_dirty(MemoryRegion *mr, ret = cpu_physical_memory_get_dirty(mr-ram_addr + addr, size, 1 client); if (ret) { -cpu_physical_memory_set_dirty_range(mr-ram_addr + addr, size, -1); +cpu_physical_memory_reset_dirty(mr-ram_addr + addr, +mr-ram_addr + addr + size, +1 client); } return ret; } diff --git a/memory.h b/memory.h index 08af012..0dcc0f4 100644 --- a/memory.h +++ b/memory.h @@ -449,7 +449,8 @@ void memory_region_set_dirty(MemoryRegion *mr, target_phys_addr_t addr, */ bool memory_region_test_and_clear_dirty(MemoryRegion *mr, target_phys_addr_t addr, -target_phys_addr_t size, unsigned client) +target_phys_addr_t size, +unsigned client); /** * memory_region_sync_dirty_bitmap: Synchronize a region's dirty bitmap with * any external TLBs (e.g. kvm) -- 1.7.11.7
Re: [Qemu-devel] [PATCH 13/15] target-pcc: Convert ppcemb_tlb_t to use fixed 64-bit RPN
On Thu, Oct 18, 2012 at 08:37:20AM +0200, Alexander Graf wrote: On 18.10.2012, at 07:50, David Gibson da...@gibson.dropbear.id.au wrote: Currently the ppcemb_tlb_t struct, used on a number of embedded ppc models to represent a TLB entry contains a target_phys_addr_t. That works reasonably for now, but is troublesome for saving the state, which we'll want to do in future. target_phys_addr_t is a large enough type to contain a physical address for any supported machine - and can thus, in theory at least, vary depending on what machines are enabled other than the one we're actually using right now. This makes it unsuitable for describing in vmstate. Target_phys_addr_t is actually 64bit for all ppc targets today since some 32 bit boards support more than 32 bit address space ;). Yes, I know. In fact since recently it's 64bit always on everything. The change still is fine though, as it makes that bit explicit. Yes. What this is leading to is the new savevm code - there are no vmstate helpers for target_phys_addr_t and my attempt to add them met with at least semi-convincing arguments against. -- David Gibson| I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
[Qemu-devel] [PATCH 03/30] protect the ramlist with a separate mutex
From: Umesh Deshpande udesh...@redhat.com Add the new mutex that protects shared state between ram_save_live and the iothread. If the iothread mutex has to be taken together with the ramlist mutex, the iothread shall always be _outside_. Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Umesh Deshpande udesh...@redhat.com Signed-off-by: Juan Quintela quint...@redhat.com --- arch_init.c | 9 - cpu-all.h | 8 exec.c | 23 +-- 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/arch_init.c b/arch_init.c index b47313d..2d29828 100644 --- a/arch_init.c +++ b/arch_init.c @@ -553,7 +553,6 @@ static void ram_migration_cancel(void *opaque) migration_end(); } - static void reset_ram_globals(void) { last_block = NULL; @@ -573,6 +572,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) bitmap_set(migration_bitmap, 1, ram_pages); migration_dirty_pages = ram_pages; +qemu_mutex_lock_ramlist(); bytes_transferred = 0; reset_ram_globals(); @@ -600,6 +600,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) qemu_put_be64(f, block-length); } +qemu_mutex_unlock_ramlist(); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); return 0; @@ -614,6 +615,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) uint64_t expected_downtime; MigrationState *s = migrate_get_current(); +qemu_mutex_lock_ramlist(); + if (ram_list.version != last_version) { reset_ram_globals(); } @@ -662,6 +665,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) bwidth = 0.01; } +qemu_mutex_unlock_ramlist(); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); expected_downtime = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth; @@ -682,6 +686,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque) { migration_bitmap_sync(); +qemu_mutex_lock_ramlist(); + /* try transferring iterative blocks of memory */ /* flush all remaining blocks regardless of rate limiting */ @@ -697,6 +703,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) } memory_global_dirty_log_stop(); +qemu_mutex_unlock_ramlist(); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); g_free(migration_bitmap); diff --git a/cpu-all.h b/cpu-all.h index e07c91c..2bafcdd 100644 --- a/cpu-all.h +++ b/cpu-all.h @@ -22,6 +22,7 @@ #include qemu-common.h #include qemu-tls.h #include cpu-common.h +#include qemu-thread.h /* some important defines: * @@ -490,7 +491,9 @@ typedef struct RAMBlock { ram_addr_t offset; ram_addr_t length; uint32_t flags; +/* Protected by the iothread lock. */ QLIST_ENTRY(RAMBlock) next_mru; +/* Protected by the ramlist lock. */ QLIST_ENTRY(RAMBlock) next; char idstr[256]; #if defined(__linux__) !defined(TARGET_S390X) @@ -499,9 +502,12 @@ typedef struct RAMBlock { } RAMBlock; typedef struct RAMList { +QemuMutex mutex; +/* Protected by the iothread lock. */ uint8_t *phys_dirty; uint32_t version; QLIST_HEAD(, RAMBlock) blocks_mru; +/* Protected by the ramlist lock. */ QLIST_HEAD(, RAMBlock) blocks; } RAMList; extern RAMList ram_list; @@ -521,6 +527,8 @@ extern int mem_prealloc; void dump_exec_info(FILE *f, fprintf_function cpu_fprintf); ram_addr_t last_ram_offset(void); +void qemu_mutex_lock_ramlist(void); +void qemu_mutex_unlock_ramlist(void); #endif /* !CONFIG_USER_ONLY */ int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr, diff --git a/exec.c b/exec.c index 1e04711..cf9de92 100644 --- a/exec.c +++ b/exec.c @@ -637,6 +637,7 @@ bool tcg_enabled(void) void cpu_exec_init_all(void) { #if !defined(CONFIG_USER_ONLY) +qemu_mutex_init(ram_list.mutex); memory_map_init(); io_mem_init(); #endif @@ -2329,6 +2330,16 @@ void qemu_flush_coalesced_mmio_buffer(void) kvm_flush_coalesced_mmio_buffer(); } +void qemu_mutex_lock_ramlist(void) +{ +qemu_mutex_lock(ram_list.mutex); +} + +void qemu_mutex_unlock_ramlist(void) +{ +qemu_mutex_unlock(ram_list.mutex); +} + #if defined(__linux__) !defined(TARGET_S390X) #include sys/vfs.h @@ -2510,6 +2521,7 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev) } pstrcat(new_block-idstr, sizeof(new_block-idstr), name); +qemu_mutex_lock_ramlist(); QLIST_FOREACH(block, ram_list.blocks, next) { if (block != new_block !strcmp(block-idstr, new_block-idstr)) { fprintf(stderr, RAMBlock \%s\ already registered, abort!\n, @@ -2517,6 +2529,7 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev) abort(); } } +qemu_mutex_unlock_ramlist(); } static int memory_try_enable_merging(void *addr, size_t len) @@ -2540,6 +2553,7 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, size = TARGET_PAGE_ALIGN(size); new_block =
[Qemu-devel] [PATCH 30/30] ram: optimize migration bitmap walking
Instead of testing each page individually, we search what is the next dirty page with a bitmap operation. We have to reorganize the code to move from a for loop, to a while(dirty) loop. Signed-off-by: Juan Quintela quint...@redhat.com --- arch_init.c | 45 ++--- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/arch_init.c b/arch_init.c index 8391375..cd6ebef 100644 --- a/arch_init.c +++ b/arch_init.c @@ -343,18 +343,21 @@ static unsigned long *migration_bitmap; static uint64_t migration_dirty_pages; static uint32_t last_version; -static inline bool migration_bitmap_test_and_reset_dirty(MemoryRegion *mr, - ram_addr_t offset) +static inline +ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, + ram_addr_t start) { -bool ret; -int nr = (mr-ram_addr + offset) TARGET_PAGE_BITS; +unsigned long base = mr-ram_addr TARGET_PAGE_BITS; +unsigned long nr = base + (start TARGET_PAGE_BITS); +unsigned long size = base + (int128_get64(mr-size) TARGET_PAGE_BITS); -ret = test_and_clear_bit(nr, migration_bitmap); +unsigned long next = find_next_bit(migration_bitmap, size, nr); -if (ret) { +if (next size) { +clear_bit(next, migration_bitmap); migration_dirty_pages--; } -return ret; +return (next - base) TARGET_PAGE_BITS; } static inline bool migration_bitmap_set_dirty(MemoryRegion *mr, @@ -423,6 +426,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage) { RAMBlock *block = last_seen_block; ram_addr_t offset = last_offset; +bool complete_round = false; int bytes_sent = -1; MemoryRegion *mr; ram_addr_t current_addr; @@ -430,9 +434,21 @@ static int ram_save_block(QEMUFile *f, bool last_stage) if (!block) block = QLIST_FIRST(ram_list.blocks); -do { +while(true) { mr = block-mr; -if (migration_bitmap_test_and_reset_dirty(mr, offset)) { +offset = migration_bitmap_find_and_reset_dirty(mr, offset); +if (complete_round block == last_seen_block +offset = last_offset) { +break; +} +if (offset = block-length) { +offset = 0; +block = QLIST_NEXT(block, next); +if (!block) { +block = QLIST_FIRST(ram_list.blocks); +complete_round = true; +} +} else { uint8_t *p; int cont = (block == last_sent_block) ? RAM_SAVE_FLAG_CONTINUE : 0; @@ -467,16 +483,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage) break; } } - -offset += TARGET_PAGE_SIZE; -if (offset = block-length) { -offset = 0; -block = QLIST_NEXT(block, next); -if (!block) -block = QLIST_FIRST(ram_list.blocks); -} -} while (block != last_seen_block || offset != last_offset); - +} last_seen_block = block; last_offset = offset; -- 1.7.11.7
[Qemu-devel] [PATCH 24/30] ram: rename last_block to last_seen_block
Signed-off-by: Juan Quintela quint...@redhat.com --- arch_init.c | 16 ++-- 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch_init.c b/arch_init.c index 1eefef8..8ac4c94 100644 --- a/arch_init.c +++ b/arch_init.c @@ -332,7 +332,10 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, return bytes_sent; } -static RAMBlock *last_block; + +/* This is the last block that we have visited serching for dirty pages + */ +static RAMBlock *last_seen_block; static ram_addr_t last_offset; static unsigned long *migration_bitmap; static uint64_t migration_dirty_pages; @@ -417,7 +420,7 @@ static void migration_bitmap_sync(void) static int ram_save_block(QEMUFile *f, bool last_stage) { -RAMBlock *block = last_block; +RAMBlock *block = last_seen_block; ram_addr_t offset = last_offset; int bytes_sent = -1; MemoryRegion *mr; @@ -430,7 +433,8 @@ static int ram_save_block(QEMUFile *f, bool last_stage) mr = block-mr; if (migration_bitmap_test_and_reset_dirty(mr, offset)) { uint8_t *p; -int cont = (block == last_block) ? RAM_SAVE_FLAG_CONTINUE : 0; +int cont = (block == last_seen_block) ? +RAM_SAVE_FLAG_CONTINUE : 0; p = memory_region_get_ram_ptr(mr) + offset; @@ -469,9 +473,9 @@ static int ram_save_block(QEMUFile *f, bool last_stage) if (!block) block = QLIST_FIRST(ram_list.blocks); } -} while (block != last_block || offset != last_offset); +} while (block != last_seen_block || offset != last_offset); -last_block = block; +last_seen_block = block; last_offset = offset; return bytes_sent; @@ -555,7 +559,7 @@ static void ram_migration_cancel(void *opaque) static void reset_ram_globals(void) { -last_block = NULL; +last_seen_block = NULL; last_offset = 0; last_version = ram_list.version; sort_ram_list(); -- 1.7.11.7
Re: [Qemu-devel] [RFC V2 00/20] QCOW2 deduplication
Can you use glib GTree instead? Ok I am posting this patchset in order to have an early feedback regarding the design. Since it's still not working, it's hard to tell how effective it is in practice. I hope to have something working soon. Best regards Benoît
Re: [Qemu-devel] [PATCH 1/2] Ignore RX tail kicks when RX disabled.
Stefan, The real purpose of check_rxov it a bit confusing indeed, mainly because of unclear name (rename?), however it works as following: There are 2 possible when RDT == RDH for RX ring: 1. Device used all the buffers from ring, no empty buffers available 2. Driver fully refilled the ring and all buffers are empty and ready to use check_rxov is used to distinguish these 2 cases: 1. It must be 1 initially (init, reset, etc.) 2. It must be set to one when device uses buffer 3. It must be set to 0 when driver adds buffer to the ring check_rxov == 1 - ring is empty check_rxov == 0 - ring is full Indeed, RX init sequence doesn't look logical, however this is the way all Intel driver behave from e1000 and up to ixgbe. Also see some explanation here: http://permalink.gmane.org/gmane.linux.kernel/1375917 If we drop check_rxov and always treat RDH == RDT as empty ring we'll probably get correct behavior for current Linux driver's code (needs testing of course), however we have no idea how Windows drivers work. Also drivers tend to change... Dmitry. On Thu, Oct 18, 2012 at 10:09 AM, Stefan Hajnoczi stefa...@gmail.com wrote: On Wed, Oct 17, 2012 at 08:31:46PM +0200, Dmitry Fleytman wrote: Device RX initization from driver's side consists of following steps: 1. Initialize head and tail of RX ring to 0 2. Enable Rx (set bit in RCTL register) 3. Allocate buffers, fill descriptors 4. Write ring tail Forth operation signals hardware that RX buffers available and it may start packets indication. Current implementation treats first operation (write 0 to ring tail) as signal of buffers availability and starts data transfers as soon as RX enable indicaton arrives. This is not correct because there is a chance that ring is still empty (third action not performed yet) and then memory corruption occures. Any idea what the point of hw/e1000.c check_rxov is? I see nothing in the datasheet that requires these semantics. The Linux e1000 driver never enables the RXO (rx fifo overflow) interrupt, only RXDMT0 (receive descriptor minimum threshold). This means hw/e1000.c will not upset the Linux e1000 driver when e1000_receive() gets called with check_rxov == 1 and RDH == RDT == 0. BTW the Linux e1000 driver does not follow the sequence recommended in the datasheet 14.4 Receive Initialization, which would avoid the weird window of time where RDH == RDT == 0. If we get rid of check_rxov and always check rxbuf space then we have the correct behavior. I'm a little nervous of simply dropping it because its purpose is unclear to me :(. Stefan -- Dmitry Fleytman Technology Expert and Consultant, Daynix Computing Ltd. Cell: +972-54-2819481 Skype: dmitry.fleytman
Re: [Qemu-devel] [PATCH 21/30] migration: move exit condition to migration thread
Il 18/10/2012 09:30, Juan Quintela ha scritto: -if (s-migration_state-complete) { +qemu_mutex_lock_iothread(); So, was it a bug that we were accessing -complete without the BQL? +if (m-state != MIG_STATE_ACTIVE) { +DPRINTF(put_ready returning because of non-active state\n); The contents of the debug message obsolete. Besides, I would just put the two branches in the same if (m-state != MIG_STATE_ACTIVE || m-complete). +qemu_mutex_unlock_iothread(); break; } +if (m-complete) { +qemu_mutex_unlock_iothread(); +break; +} +qemu_mutex_unlock_iothread(); + Paolo
Re: [Qemu-devel] [PATCH 1/2] Ignore RX tail kicks when RX disabled.
Hello, Stefan The problem occurs between steps 2 and 3. Let's say packet arrives after step 2 is done by driver. Head and tail are 0 because of step 1 Check_rxov is 0 because of two reasons: 1. On startup it is 0 by default 2. It is zeroed by setting ring tail to 0 on first step Then first check ( __if (!(s-mac_reg[RCTL] E1000_RCTL_EN))__ ) passes because RX enabled on step 2. e1000_has_rxbufs() returs true because it treats equal head and tail as fully filled ring when check_rxov is 0: static bool e1000_has_rxbufs(E1000State *s, size_t total_size) { [...] if (total_size = s-rxbuf_size) { return s-mac_reg[RDH] != s-mac_reg[RDT] || !s-check_rxov; [...] } else if (s-mac_reg[RDH] s-mac_reg[RDT] || !s-check_rxov) { [...] } So QEMU reads uninitialized descriptor and tries to perform DMA to arbitrary address from descriptor. Depending on address value it corrupts guest memory or abort()'s here: void *qemu_get_ram_ptr(ram_addr_t addr) { [...] fprintf(stderr, Bad ram offset % PRIx64 \n, (uint64_t)addr); abort(); [...] } Thanks for review, Dmitry. On Thu, Oct 18, 2012 at 9:31 AM, Stefan Hajnoczi stefa...@gmail.com wrote: On Wed, Oct 17, 2012 at 08:31:46PM +0200, Dmitry Fleytman wrote: Device RX initization from driver's side consists of following steps: 1. Initialize head and tail of RX ring to 0 2. Enable Rx (set bit in RCTL register) 3. Allocate buffers, fill descriptors 4. Write ring tail Forth operation signals hardware that RX buffers available and it may start packets indication. Current implementation treats first operation (write 0 to ring tail) as signal of buffers availability and starts data transfers as soon as RX enable indicaton arrives. This is not correct because there is a chance that ring is still empty (third action not performed yet) and then memory corruption occures. The existing code tries to prevent this: e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size) { [...] if (!(s-mac_reg[RCTL] E1000_RCTL_EN)) return -1; [...] total_size = size + fcs_len(s); if (!e1000_has_rxbufs(s, total_size)) { set_ics(s, 0, E1000_ICS_RXO); return -1; } Why are these checks not enough? Which memory gets corrupted? Stefan -- Dmitry Fleytman Technology Expert and Consultant, Daynix Computing Ltd. Cell: +972-54-2819481 Skype: dmitry.fleytman
Re: [Qemu-devel] [PATCH 22/30] migration: unfold rest of migrate_fd_put_ready() into thread
Il 18/10/2012 09:30, Juan Quintela ha scritto: This will allow us finer control in next patches. Signed-off-by: Juan Quintela quint...@redhat.com --- migration.c | 95 ++--- 1 file changed, 41 insertions(+), 54 deletions(-) diff --git a/migration.c b/migration.c index 7206866..e6ff1f1 100644 --- a/migration.c +++ b/migration.c @@ -644,54 +644,6 @@ static int64_t buffered_get_rate_limit(void *opaque) return s-xfer_limit; } -static bool migrate_fd_put_ready(MigrationState *s, uint64_t max_size) -{ -int ret; -uint64_t pending_size; -bool last_round = false; - -qemu_mutex_lock_iothread(); -DPRINTF(iterate\n); -pending_size = qemu_savevm_state_pending(s-file, max_size); -DPRINTF(pending size %lu max %lu\n, pending_size, max_size); -if (pending_size = max_size) { -ret = qemu_savevm_state_iterate(s-file); -if (ret 0) { -migrate_fd_error(s); -} -} else { -int old_vm_running = runstate_is_running(); -int64_t start_time, end_time; - -DPRINTF(done iterating\n); -start_time = qemu_get_clock_ms(rt_clock); -qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); -if (old_vm_running) { -vm_stop(RUN_STATE_FINISH_MIGRATE); -} else { -vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); -} - -if (qemu_savevm_state_complete(s-file) 0) { -migrate_fd_error(s); -} else { -migrate_fd_completed(s); -} -end_time = qemu_get_clock_ms(rt_clock); -s-total_time = end_time - s-total_time; -s-downtime = end_time - start_time; -if (s-state != MIG_STATE_COMPLETED) { -if (old_vm_running) { -vm_start(); -} -} -last_round = true; -} -qemu_mutex_unlock_iothread(); - -return last_round; -} - /* 100ms xfer_limit is the limit that we should write each 100ms */ #define BUFFER_DELAY 100 @@ -716,6 +668,7 @@ static void *buffered_file_thread(void *opaque) while (true) { int64_t current_time = qemu_get_clock_ms(rt_clock); +uint64_t pending_size; qemu_mutex_lock_iothread(); if (m-state != MIG_STATE_ACTIVE) { @@ -727,6 +680,46 @@ static void *buffered_file_thread(void *opaque) qemu_mutex_unlock_iothread(); break; } +if (s-bytes_xfer s-xfer_limit) { +DPRINTF(iterate\n); +pending_size = qemu_savevm_state_pending(m-file, max_size); +DPRINTF(pending size %lu max %lu\n, pending_size, max_size); +if (pending_size = max_size) { +ret = qemu_savevm_state_iterate(m-file); So RAM migration is still being run inside the BQL, isn't it? +if (ret 0) { +qemu_mutex_unlock_iothread(); +break; There's a lot of qemu_mutex_unlock_iothread(); break; in this function. Perhaps it is better if you make an invariant that the loop is entered and exited with the BQL taken, and it is only unlocked in the middle. It makes sense once you fold everything in migration.c. It can be a separate patch though. +} +} else { +int old_vm_running = runstate_is_running(); +int64_t start_time, end_time; + +DPRINTF(done iterating\n); +start_time = qemu_get_clock_ms(rt_clock); +qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); +if (old_vm_running) { +vm_stop(RUN_STATE_FINISH_MIGRATE); +} else { +vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); +} +ret = qemu_savevm_state_complete(m-file); +if (ret 0) { +qemu_mutex_unlock_iothread(); +break; +} else { +migrate_fd_completed(m); +} +end_time = qemu_get_clock_ms(rt_clock); +m-total_time = end_time - m-total_time; +m-downtime = end_time - start_time; +if (m-state != MIG_STATE_COMPLETED) { +if (old_vm_running) { +vm_start(); +} +} +last_round = true; +} +} qemu_mutex_unlock_iothread(); if (current_time = initial_time + BUFFER_DELAY) { @@ -747,12 +740,6 @@ static void *buffered_file_thread(void *opaque) usleep((initial_time + BUFFER_DELAY - current_time)*1000); } buffered_flush(s); - -DPRINTF(file is ready\n); -if (s-bytes_xfer s-xfer_limit) { -
[Qemu-devel] [PATCH 21/30] migration: move exit condition to migration thread
Signed-off-by: Juan Quintela quint...@redhat.com --- migration.c | 17 ++--- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/migration.c b/migration.c index 8cacbc3..7206866 100644 --- a/migration.c +++ b/migration.c @@ -651,12 +651,6 @@ static bool migrate_fd_put_ready(MigrationState *s, uint64_t max_size) bool last_round = false; qemu_mutex_lock_iothread(); -if (s-state != MIG_STATE_ACTIVE) { -DPRINTF(put_ready returning because of non-active state\n); -qemu_mutex_unlock_iothread(); -return false; -} - DPRINTF(iterate\n); pending_size = qemu_savevm_state_pending(s-file, max_size); DPRINTF(pending size %lu max %lu\n, pending_size, max_size); @@ -723,9 +717,18 @@ static void *buffered_file_thread(void *opaque) while (true) { int64_t current_time = qemu_get_clock_ms(rt_clock); -if (s-migration_state-complete) { +qemu_mutex_lock_iothread(); +if (m-state != MIG_STATE_ACTIVE) { +DPRINTF(put_ready returning because of non-active state\n); +qemu_mutex_unlock_iothread(); break; } +if (m-complete) { +qemu_mutex_unlock_iothread(); +break; +} +qemu_mutex_unlock_iothread(); + if (current_time = initial_time + BUFFER_DELAY) { uint64_t transferred_bytes = s-bytes_xfer; uint64_t time_spent = current_time - initial_time; -- 1.7.11.7
Re: [Qemu-devel] [PATCH 1/2] Ignore RX tail kicks when RX disabled.
On Wed, Oct 17, 2012 at 08:31:46PM +0200, Dmitry Fleytman wrote: Device RX initization from driver's side consists of following steps: 1. Initialize head and tail of RX ring to 0 2. Enable Rx (set bit in RCTL register) 3. Allocate buffers, fill descriptors 4. Write ring tail Forth operation signals hardware that RX buffers available and it may start packets indication. Current implementation treats first operation (write 0 to ring tail) as signal of buffers availability and starts data transfers as soon as RX enable indicaton arrives. This is not correct because there is a chance that ring is still empty (third action not performed yet) and then memory corruption occures. The existing code tries to prevent this: e1000_receive(NetClientState *nc, const uint8_t *buf, size_t size) { [...] if (!(s-mac_reg[RCTL] E1000_RCTL_EN)) return -1; [...] total_size = size + fcs_len(s); if (!e1000_has_rxbufs(s, total_size)) { set_ics(s, 0, E1000_ICS_RXO); return -1; } Why are these checks not enough? Which memory gets corrupted? Stefan
[Qemu-devel] [PATCH 26/30] memory: introduce memory_region_test_and_clear_dirty
This function avoids having to do two calls, one to test the dirty bit, and other to reset it. Signed-off-by: Juan Quintela quint...@redhat.com --- memory.c | 15 +++ memory.h | 17 + 2 files changed, 32 insertions(+) diff --git a/memory.c b/memory.c index 4f3ade0..126fb63 100644 --- a/memory.c +++ b/memory.c @@ -1080,6 +1080,21 @@ void memory_region_set_dirty(MemoryRegion *mr, target_phys_addr_t addr, return cpu_physical_memory_set_dirty_range(mr-ram_addr + addr, size, -1); } +bool memory_region_test_and_clear_dirty(MemoryRegion *mr, +target_phys_addr_t addr, +target_phys_addr_t size, unsigned client) +{ +bool ret; +assert(mr-terminates); +ret = cpu_physical_memory_get_dirty(mr-ram_addr + addr, size, +1 client); +if (ret) { +cpu_physical_memory_set_dirty_range(mr-ram_addr + addr, size, -1); +} +return ret; +} + + void memory_region_sync_dirty_bitmap(MemoryRegion *mr) { FlatRange *fr; diff --git a/memory.h b/memory.h index 37ce151..08af012 100644 --- a/memory.h +++ b/memory.h @@ -434,6 +434,23 @@ void memory_region_set_dirty(MemoryRegion *mr, target_phys_addr_t addr, target_phys_addr_t size); /** + * memory_region_test_and_clear_dirty: Check whether a range of bytes is dirty + * for a specified client. It clears them. + * + * Checks whether a range of bytes has been written to since the last + * call to memory_region_reset_dirty() with the same @client. Dirty logging + * must be enabled. + * + * @mr: the memory region being queried. + * @addr: the address (relative to the start of the region) being queried. + * @size: the size of the range being queried. + * @client: the user of the logging information; %DIRTY_MEMORY_MIGRATION or + * %DIRTY_MEMORY_VGA. + */ +bool memory_region_test_and_clear_dirty(MemoryRegion *mr, +target_phys_addr_t addr, +target_phys_addr_t size, unsigned client) +/** * memory_region_sync_dirty_bitmap: Synchronize a region's dirty bitmap with * any external TLBs (e.g. kvm) * -- 1.7.11.7
Re: [Qemu-devel] [PATCH 28/30] fix memory.c
Il 18/10/2012 09:30, Juan Quintela ha scritto: Signed-off-by: Juan Quintela quint...@redhat.com --- memory.c | 4 +++- memory.h | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/memory.c b/memory.c index 126fb63..4d0fa96 100644 --- a/memory.c +++ b/memory.c @@ -1089,7 +1089,9 @@ bool memory_region_test_and_clear_dirty(MemoryRegion *mr, ret = cpu_physical_memory_get_dirty(mr-ram_addr + addr, size, 1 client); if (ret) { -cpu_physical_memory_set_dirty_range(mr-ram_addr + addr, size, -1); +cpu_physical_memory_reset_dirty(mr-ram_addr + addr, +mr-ram_addr + addr + size, +1 client); } return ret; } diff --git a/memory.h b/memory.h index 08af012..0dcc0f4 100644 --- a/memory.h +++ b/memory.h @@ -449,7 +449,8 @@ void memory_region_set_dirty(MemoryRegion *mr, target_phys_addr_t addr, */ bool memory_region_test_and_clear_dirty(MemoryRegion *mr, target_phys_addr_t addr, -target_phys_addr_t size, unsigned client) +target_phys_addr_t size, +unsigned client); /** * memory_region_sync_dirty_bitmap: Synchronize a region's dirty bitmap with * any external TLBs (e.g. kvm) This should be squashed in patch 26. Paolo
[Qemu-devel] [PATCH 11/30] buffered_file: don't flush on put buffer
We call buffered_put_buffer with iothread held, and buffered_flush() does synchronous writes. We only want to do the synchronous writes outside. Signed-off-by: Juan Quintela quint...@redhat.com --- buffered_file.c | 6 -- 1 file changed, 6 deletions(-) diff --git a/buffered_file.c b/buffered_file.c index 7692950..f508026 100644 --- a/buffered_file.c +++ b/buffered_file.c @@ -107,12 +107,6 @@ static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, in buffered_append(s, buf, size); } -error = buffered_flush(s); -if (error 0) { -DPRINTF(buffered flush error. bailing: %s\n, strerror(-error)); -return error; -} - return size; } -- 1.7.11.7
[Qemu-devel] [PATCH 08/30] migration: remove unfreeze logic
Now that we have a thread, and blocking writes, we don't need it. Signed-off-by: Juan Quintela quint...@redhat.com --- buffered_file.c | 24 +--- migration.c | 23 --- migration.h | 1 - 3 files changed, 1 insertion(+), 47 deletions(-) diff --git a/buffered_file.c b/buffered_file.c index 6395b37..876cc8c 100644 --- a/buffered_file.c +++ b/buffered_file.c @@ -26,7 +26,6 @@ typedef struct QEMUFileBuffered { MigrationState *migration_state; QEMUFile *file; -int freeze_output; size_t bytes_xfer; size_t xfer_limit; uint8_t *buffer; @@ -70,13 +69,6 @@ static ssize_t buffered_flush(QEMUFileBuffered *s) ret = migrate_fd_put_buffer(s-migration_state, s-buffer + offset, s-buffer_size - offset); -if (ret == -EAGAIN) { -DPRINTF(backend not ready, freezing\n); -ret = 0; -s-freeze_output = 1; -break; -} - if (ret = 0) { DPRINTF(error flushing data, %zd\n, ret); break; @@ -110,9 +102,6 @@ static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, in return error; } -DPRINTF(unfreezing output\n); -s-freeze_output = 0; - if (size 0) { DPRINTF(buffering %d bytes\n, size - offset); buffered_append(s, buf, size); @@ -126,7 +115,7 @@ static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, in if (pos == 0 size == 0) { DPRINTF(file is ready\n); -if (!s-freeze_output s-bytes_xfer s-xfer_limit) { +if (s-bytes_xfer s-xfer_limit) { DPRINTF(notifying client\n); migrate_fd_put_ready(s-migration_state); } @@ -149,12 +138,6 @@ static int buffered_close(void *opaque) if (ret 0) { break; } -if (s-freeze_output) { -ret = migrate_fd_wait_for_unfreeze(s-migration_state); -if (ret 0) { -break; -} -} } ret2 = migrate_fd_close(s-migration_state); @@ -181,8 +164,6 @@ static int buffered_rate_limit(void *opaque) if (ret) { return ret; } -if (s-freeze_output) -return 1; if (s-bytes_xfer s-xfer_limit) return 1; @@ -227,9 +208,6 @@ static void *buffered_file_thread(void *opaque) if (s-migration_state-complete) { break; } -if (s-freeze_output) { -continue; -} if (current_time = expire_time) { s-bytes_xfer = 0; expire_time = current_time + BUFFER_DELAY; diff --git a/migration.c b/migration.c index a8b2f4a..29ee710 100644 --- a/migration.c +++ b/migration.c @@ -367,29 +367,6 @@ static void migrate_fd_cancel(MigrationState *s) migrate_fd_cleanup(s); } -int migrate_fd_wait_for_unfreeze(MigrationState *s) -{ -int ret; - -DPRINTF(wait for unfreeze\n); -if (s-state != MIG_STATE_ACTIVE) -return -EINVAL; - -do { -fd_set wfds; - -FD_ZERO(wfds); -FD_SET(s-fd, wfds); - -ret = select(s-fd + 1, NULL, wfds, NULL, NULL); -} while (ret == -1 (s-get_error(s)) == EINTR); - -if (ret == -1) { -return -s-get_error(s); -} -return 0; -} - int migrate_fd_close(MigrationState *s) { return s-close(s); diff --git a/migration.h b/migration.h index a63c5d5..505f792 100644 --- a/migration.h +++ b/migration.h @@ -82,7 +82,6 @@ void migrate_fd_connect(MigrationState *s); ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data, size_t size); void migrate_fd_put_ready(MigrationState *s); -int migrate_fd_wait_for_unfreeze(MigrationState *s); int migrate_fd_close(MigrationState *s); void add_migration_state_change_notifier(Notifier *notify); -- 1.7.11.7
[Qemu-devel] [PATCH 04/30] buffered_file: Move from using a timer to use a thread
We still protect everything except the wait with the iothread lock. But we moved from a timer to a thread. Steps one by one. We also need to detect when we have finished with a variable complete. Signed-off-by: Juan Quintela quint...@redhat.com --- buffered_file.c | 58 +++-- 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/buffered_file.c b/buffered_file.c index ed92df1..4b90d54 100644 --- a/buffered_file.c +++ b/buffered_file.c @@ -18,6 +18,7 @@ #include qemu-timer.h #include qemu-char.h #include buffered_file.h +#include qemu-thread.h //#define DEBUG_BUFFERED_FILE @@ -31,7 +32,8 @@ typedef struct QEMUFileBuffered uint8_t *buffer; size_t buffer_size; size_t buffer_capacity; -QEMUTimer *timer; +QemuThread thread; +bool complete; } QEMUFileBuffered; #ifdef DEBUG_BUFFERED_FILE @@ -160,11 +162,8 @@ static int buffered_close(void *opaque) if (ret = 0) { ret = ret2; } -qemu_del_timer(s-timer); -qemu_free_timer(s-timer); -g_free(s-buffer); -g_free(s); - +ret = migrate_fd_close(s-migration_state); +s-complete = true; return ret; } @@ -215,23 +214,38 @@ static int64_t buffered_get_rate_limit(void *opaque) return s-xfer_limit; } -static void buffered_rate_tick(void *opaque) +/* 10ms xfer_limit is the limit that we should write each 10ms */ +#define BUFFER_DELAY 100 + +static void *buffered_file_thread(void *opaque) { QEMUFileBuffered *s = opaque; +int64_t expire_time = qemu_get_clock_ms(rt_clock) + BUFFER_DELAY; -if (qemu_file_get_error(s-file)) { -buffered_close(s); -return; -} - -qemu_mod_timer(s-timer, qemu_get_clock_ms(rt_clock) + 100); - -if (s-freeze_output) -return; - -s-bytes_xfer = 0; +while (true) { +int64_t current_time = qemu_get_clock_ms(rt_clock); -buffered_put_buffer(s, NULL, 0, 0); +if (s-complete) { +break; +} +if (s-freeze_output) { +continue; +} +if (current_time = expire_time) { +s-bytes_xfer = 0; +expire_time = current_time + BUFFER_DELAY; +} +if (s-bytes_xfer = s-xfer_limit) { +/* usleep expects microseconds */ +usleep((expire_time - current_time)*1000); +} +qemu_mutex_lock_iothread(); +buffered_put_buffer(s, NULL, 0, 0); +qemu_mutex_unlock_iothread(); +} +g_free(s-buffer); +g_free(s); +return NULL; } QEMUFile *qemu_fopen_ops_buffered(MigrationState *migration_state) @@ -242,15 +256,15 @@ QEMUFile *qemu_fopen_ops_buffered(MigrationState *migration_state) s-migration_state = migration_state; s-xfer_limit = migration_state-bandwidth_limit / 10; +s-complete = false; s-file = qemu_fopen_ops(s, buffered_put_buffer, NULL, buffered_close, buffered_rate_limit, buffered_set_rate_limit, buffered_get_rate_limit); -s-timer = qemu_new_timer_ms(rt_clock, buffered_rate_tick, s); - -qemu_mod_timer(s-timer, qemu_get_clock_ms(rt_clock) + 100); +qemu_thread_create(s-thread, buffered_file_thread, s, + QEMU_THREAD_DETACHED); return s-file; } -- 1.7.11.7
[Qemu-devel] [PATCH 05/30] migration: make qemu_fopen_ops_buffered() return void
We want the file assignment to happen before the thread is created to avoid locking, so we just do it before creating the thread. Signed-off-by: Juan Quintela quint...@redhat.com --- buffered_file.c | 13 ++--- buffered_file.h | 2 +- migration.c | 2 +- migration.h | 1 + 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/buffered_file.c b/buffered_file.c index 4b90d54..6395b37 100644 --- a/buffered_file.c +++ b/buffered_file.c @@ -33,7 +33,6 @@ typedef struct QEMUFileBuffered size_t buffer_size; size_t buffer_capacity; QemuThread thread; -bool complete; } QEMUFileBuffered; #ifdef DEBUG_BUFFERED_FILE @@ -163,7 +162,7 @@ static int buffered_close(void *opaque) ret = ret2; } ret = migrate_fd_close(s-migration_state); -s-complete = true; +s-migration_state-complete = true; return ret; } @@ -225,7 +224,7 @@ static void *buffered_file_thread(void *opaque) while (true) { int64_t current_time = qemu_get_clock_ms(rt_clock); -if (s-complete) { +if (s-migration_state-complete) { break; } if (s-freeze_output) { @@ -248,7 +247,7 @@ static void *buffered_file_thread(void *opaque) return NULL; } -QEMUFile *qemu_fopen_ops_buffered(MigrationState *migration_state) +void qemu_fopen_ops_buffered(MigrationState *migration_state) { QEMUFileBuffered *s; @@ -256,15 +255,15 @@ QEMUFile *qemu_fopen_ops_buffered(MigrationState *migration_state) s-migration_state = migration_state; s-xfer_limit = migration_state-bandwidth_limit / 10; -s-complete = false; +s-migration_state-complete = false; s-file = qemu_fopen_ops(s, buffered_put_buffer, NULL, buffered_close, buffered_rate_limit, buffered_set_rate_limit, buffered_get_rate_limit); +migration_state-file = s-file; + qemu_thread_create(s-thread, buffered_file_thread, s, QEMU_THREAD_DETACHED); - -return s-file; } diff --git a/buffered_file.h b/buffered_file.h index ef010fe..8a246fd 100644 --- a/buffered_file.h +++ b/buffered_file.h @@ -17,6 +17,6 @@ #include hw/hw.h #include migration.h -QEMUFile *qemu_fopen_ops_buffered(MigrationState *migration_state); +void qemu_fopen_ops_buffered(MigrationState *migration_state); #endif diff --git a/migration.c b/migration.c index 62e0304..02f4ffa 100644 --- a/migration.c +++ b/migration.c @@ -431,7 +431,7 @@ void migrate_fd_connect(MigrationState *s) int ret; s-state = MIG_STATE_ACTIVE; -s-file = qemu_fopen_ops_buffered(s); +qemu_fopen_ops_buffered(s); DPRINTF(beginning savevm\n); ret = qemu_savevm_state_begin(s-file, s-params); diff --git a/migration.h b/migration.h index 1c3e9b7..a63c5d5 100644 --- a/migration.h +++ b/migration.h @@ -45,6 +45,7 @@ struct MigrationState int64_t dirty_pages_rate; bool enabled_capabilities[MIGRATION_CAPABILITY_MAX]; int64_t xbzrle_cache_size; +bool complete; }; void process_incoming_migration(QEMUFile *f); -- 1.7.11.7
[Qemu-devel] [PATCH 06/30] migration: stop all cpus correctly
You can only stop all cpus from the iothread or an vcpu. As we want to do it from the migration_thread, we need to do this dance with the botton handlers. This patch is a request for ideas. I can move this function to cpus.c, but wondered if there is an easy way of doing this? Signed-off-by: Juan Quintela quint...@redhat.com --- migration.c | 29 + 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/migration.c b/migration.c index 02f4ffa..05ef1a3 100644 --- a/migration.c +++ b/migration.c @@ -20,6 +20,7 @@ #include sysemu.h #include block.h #include qemu_socket.h +#include qemu-thread.h #include block-migration.h #include qmp-commands.h @@ -322,11 +323,22 @@ ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data, void migrate_fd_put_ready(MigrationState *s) { int ret; +static bool first_time = true; if (s-state != MIG_STATE_ACTIVE) { DPRINTF(put_ready returning because of non-active state\n); return; } +if (first_time) { +first_time = false; +DPRINTF(beginning savevm\n); +ret = qemu_savevm_state_begin(s-file, s-params); +if (ret 0) { +DPRINTF(failed, %d\n, ret); +migrate_fd_error(s); +return; +} +} DPRINTF(iterate\n); ret = qemu_savevm_state_iterate(s-file); @@ -339,7 +351,11 @@ void migrate_fd_put_ready(MigrationState *s) DPRINTF(done iterating\n); start_time = qemu_get_clock_ms(rt_clock); qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); -vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); +if (old_vm_running) { +vm_stop(RUN_STATE_FINISH_MIGRATE); +} else { +vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); +} if (qemu_savevm_state_complete(s-file) 0) { migrate_fd_error(s); @@ -428,19 +444,8 @@ bool migration_has_failed(MigrationState *s) void migrate_fd_connect(MigrationState *s) { -int ret; - s-state = MIG_STATE_ACTIVE; qemu_fopen_ops_buffered(s); - -DPRINTF(beginning savevm\n); -ret = qemu_savevm_state_begin(s-file, s-params); -if (ret 0) { -DPRINTF(failed, %d\n, ret); -migrate_fd_error(s); -return; -} -migrate_fd_put_ready(s); } static MigrationState *migrate_init(const MigrationParams *params) -- 1.7.11.7
[Qemu-devel] [PATCH 07/30] migration: make writes blocking
Move all the writes to the migration_thread, and make writings blocking. Notice that are still using the iothread for everything that we do. Signed-off-by: Juan Quintela quint...@redhat.com --- migration-exec.c | 2 -- migration-fd.c | 6 -- migration-tcp.c | 19 ++- migration-unix.c | 2 -- migration.c | 21 - qemu-file.h | 5 - savevm.c | 5 - 7 files changed, 2 insertions(+), 58 deletions(-) diff --git a/migration-exec.c b/migration-exec.c index 6c97db9..908f22e 100644 --- a/migration-exec.c +++ b/migration-exec.c @@ -76,8 +76,6 @@ int exec_start_outgoing_migration(MigrationState *s, const char *command) goto err_after_open; } -socket_set_nonblock(s-fd); - s-opaque = qemu_popen(f, w); s-close = exec_close; diff --git a/migration-fd.c b/migration-fd.c index 7335167..aba1be7 100644 --- a/migration-fd.c +++ b/migration-fd.c @@ -81,11 +81,6 @@ int fd_start_outgoing_migration(MigrationState *s, const char *fdname) goto err_after_get_fd; } -if (fcntl(s-fd, F_SETFL, O_NONBLOCK) == -1) { -DPRINTF(Unable to set nonblocking mode on file descriptor\n); -goto err_after_open; -} - s-get_error = fd_errno; s-write = fd_write; s-close = fd_close; @@ -93,7 +88,6 @@ int fd_start_outgoing_migration(MigrationState *s, const char *fdname) migrate_fd_connect(s); return 0; -err_after_open: close(s-fd); err_after_get_fd: return -1; diff --git a/migration-tcp.c b/migration-tcp.c index a15c2b8..812ae22 100644 --- a/migration-tcp.c +++ b/migration-tcp.c @@ -53,21 +53,6 @@ static int tcp_close(MigrationState *s) return r; } -static void tcp_wait_for_connect(int fd, void *opaque) -{ -MigrationState *s = opaque; - -if (fd 0) { -DPRINTF(migrate connect error\n); -s-fd = -1; -migrate_fd_error(s); -} else { -DPRINTF(migrate connect success\n); -s-fd = fd; -migrate_fd_connect(s); -} -} - int tcp_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp) { @@ -75,12 +60,12 @@ int tcp_start_outgoing_migration(MigrationState *s, const char *host_port, s-write = socket_write; s-close = tcp_close; -s-fd = inet_nonblocking_connect(host_port, tcp_wait_for_connect, s, - errp); +s-fd = inet_connect(host_port, errp); if (error_is_set(errp)) { migrate_fd_error(s); return -1; } +migrate_fd_connect(s); return 0; } diff --git a/migration-unix.c b/migration-unix.c index 169de88..bb41bac 100644 --- a/migration-unix.c +++ b/migration-unix.c @@ -96,8 +96,6 @@ int unix_start_outgoing_migration(MigrationState *s, const char *path) return -errno; } -socket_set_nonblock(s-fd); - do { ret = connect(s-fd, (struct sockaddr *)addr, sizeof(addr)); if (ret == -1) { diff --git a/migration.c b/migration.c index 05ef1a3..a8b2f4a 100644 --- a/migration.c +++ b/migration.c @@ -247,10 +247,6 @@ static int migrate_fd_cleanup(MigrationState *s) { int ret = 0; -if (s-fd != -1) { -qemu_set_fd_handler2(s-fd, NULL, NULL, NULL, NULL); -} - if (s-file) { DPRINTF(closing file\n); ret = qemu_fclose(s-file); @@ -285,18 +281,6 @@ static void migrate_fd_completed(MigrationState *s) notifier_list_notify(migration_state_notifiers, s); } -static void migrate_fd_put_notify(void *opaque) -{ -MigrationState *s = opaque; -int ret; - -qemu_set_fd_handler2(s-fd, NULL, NULL, NULL, NULL); -ret = qemu_file_put_notify(s-file); -if (ret) { -migrate_fd_error(s); -} -} - ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data, size_t size) { @@ -313,10 +297,6 @@ ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data, if (ret == -1) ret = -(s-get_error(s)); -if (ret == -EAGAIN) { -qemu_set_fd_handler2(s-fd, NULL, NULL, migrate_fd_put_notify, s); -} - return ret; } @@ -412,7 +392,6 @@ int migrate_fd_wait_for_unfreeze(MigrationState *s) int migrate_fd_close(MigrationState *s) { -qemu_set_fd_handler2(s-fd, NULL, NULL, NULL, NULL); return s-close(s); } diff --git a/qemu-file.h b/qemu-file.h index 9c8985b..e88892c 100644 --- a/qemu-file.h +++ b/qemu-file.h @@ -104,11 +104,6 @@ int64_t qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate); int64_t qemu_file_get_rate_limit(QEMUFile *f); int qemu_file_get_error(QEMUFile *f); -/* Try to send any outstanding data. This function is useful when output is - * halted due to rate limiting or EAGAIN errors occur as it can be used to - * resume output. */ -int qemu_file_put_notify(QEMUFile *f); - static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv) { qemu_put_be64(f, *pv); diff --git a/savevm.c
Re: [Qemu-devel] [PATCH 22/30] migration: unfold rest of migrate_fd_put_ready() into thread
Il 18/10/2012 10:39, Paolo Bonzini ha scritto: in this function. Perhaps it is better if you make an invariant that the loop is entered and exited with the BQL taken, and it is only unlocked in the middle. It makes sense once you fold everything in migration.c. i.e. this: diff --git a/migration.c b/migration.c index 554d79a..bd9fe2d 100644 --- a/migration.c +++ b/migration.c @@ -661,23 +661,18 @@ static void *buffered_file_thread(void *opaque) ret = qemu_savevm_state_begin(m-file, m-params); if (ret 0) { DPRINTF(failed, %d\n, ret); -qemu_mutex_unlock_iothread(); goto out; } -qemu_mutex_unlock_iothread(); while (true) { int64_t current_time = qemu_get_clock_ms(rt_clock); uint64_t pending_size; -qemu_mutex_lock_iothread(); if (m-state != MIG_STATE_ACTIVE) { DPRINTF(put_ready returning because of non-active state\n); -qemu_mutex_unlock_iothread(); break; } if (m-complete) { -qemu_mutex_unlock_iothread(); break; } if (s-bytes_xfer s-xfer_limit) { @@ -687,7 +682,6 @@ static void *buffered_file_thread(void *opaque) if (pending_size pending_size = max_size) { ret = qemu_savevm_state_iterate(m-file); if (ret 0) { -qemu_mutex_unlock_iothread(); break; } } else { @@ -708,7 +702,6 @@ static void *buffered_file_thread(void *opaque) printf(vm_stop 2 %ld\n, end_time - start_time); ret = qemu_savevm_state_complete(m-file); if (ret 0) { -qemu_mutex_unlock_iothread(); break; } else { end_time = qemu_get_clock_ms(rt_clock); @@ -729,8 +722,8 @@ static void *buffered_file_thread(void *opaque) last_round = true; } } -qemu_mutex_unlock_iothread(); +qemu_mutex_unlock_iothread(); if (current_time = initial_time + BUFFER_DELAY) { uint64_t transferred_bytes = s-bytes_xfer; uint64_t time_spent = current_time - initial_time; @@ -749,9 +742,11 @@ static void *buffered_file_thread(void *opaque) usleep((initial_time + BUFFER_DELAY - current_time)*1000); } buffered_flush(s); +qemu_mutex_lock_iothread(); } out: +qemu_mutex_unlock_iothread(); if (ret 0) { migrate_fd_error(m); } BTW the completion (basically the else starting at int old_vm_running = runstate_is_running() looks much nicer in a separate function, like: if (pending_size pending_size = max_size) { ret = qemu_savevm_state_iterate(m-file); } else { ret = migration_complete(m); last_round = true; } if (ret 0) { break; } Paolo
Re: [Qemu-devel] [PATCH 04/30] buffered_file: Move from using a timer to use a thread
Il 18/10/2012 09:30, Juan Quintela ha scritto: +} +if (s-bytes_xfer = s-xfer_limit) { +/* usleep expects microseconds */ +usleep((expire_time - current_time)*1000); +} g_usleep please. Paolo
[Qemu-devel] [PATCH 12/30] buffered_file: unfold buffered_append in buffered_put_buffer
It was the only user, and now buffered_put_buffer just do the append Signed-off-by: Juan Quintela quint...@redhat.com --- buffered_file.c | 33 ++--- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/buffered_file.c b/buffered_file.c index f508026..fcb3907 100644 --- a/buffered_file.c +++ b/buffered_file.c @@ -42,22 +42,6 @@ typedef struct QEMUFileBuffered do { } while (0) #endif -static void buffered_append(QEMUFileBuffered *s, -const uint8_t *buf, size_t size) -{ -if (size (s-buffer_capacity - s-buffer_size)) { -DPRINTF(increasing buffer capacity from %zu by %zu\n, -s-buffer_capacity, size + 1024); - -s-buffer_capacity += size + 1024; - -s-buffer = g_realloc(s-buffer, s-buffer_capacity); -} - -memcpy(s-buffer + s-buffer_size, buf, size); -s-buffer_size += size; -} - static ssize_t buffered_flush(QEMUFileBuffered *s) { size_t offset = 0; @@ -102,11 +86,22 @@ static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, in return error; } -if (size 0) { -DPRINTF(buffering %d bytes\n, size - offset); -buffered_append(s, buf, size); +if (size = 0) { +return size; } +if (size (s-buffer_capacity - s-buffer_size)) { +DPRINTF(increasing buffer capacity from %zu by %zu\n, +s-buffer_capacity, size + 1024); + +s-buffer_capacity += size + 1024; + +s-buffer = g_realloc(s-buffer, s-buffer_capacity); +} + +memcpy(s-buffer + s-buffer_size, buf, size); +s-buffer_size += size; + return size; } -- 1.7.11.7
Re: [Qemu-devel] [PATCH 16/30] migration: move buffered_file.c code into migration.c
Il 18/10/2012 09:30, Juan Quintela ha scritto: This only moves the code (also from buffered_file.h to migration.h). Fix whitespace until checkpatch is happy. While I agree with this patch, it is also a conflict magnet. My migration-in-a-coroutine cleanups will touch buffered_file.c, you're warned... Paolo Signed-off-by: Juan Quintela quint...@redhat.com --- Makefile.objs | 2 +- buffered_file.c | 244 buffered_file.h | 22 - migration.c | 218 +- migration.h | 1 + 5 files changed, 219 insertions(+), 268 deletions(-) delete mode 100644 buffered_file.c delete mode 100644 buffered_file.h diff --git a/Makefile.objs b/Makefile.objs index 74b3542..3de8f27 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -73,7 +73,7 @@ extra-obj-$(CONFIG_LINUX) += fsdev/ common-obj-y += tcg-runtime.o host-utils.o main-loop.o common-obj-y += input.o -common-obj-y += buffered_file.o migration.o migration-tcp.o +common-obj-y += migration.o migration-tcp.o common-obj-y += qemu-char.o #aio.o common-obj-y += block-migration.o iohandler.o common-obj-y += pflib.o diff --git a/buffered_file.c b/buffered_file.c deleted file mode 100644 index c21f847..000 --- a/buffered_file.c +++ /dev/null @@ -1,244 +0,0 @@ -/* - * QEMU buffered QEMUFile - * - * Copyright IBM, Corp. 2008 - * - * Authors: - * Anthony Liguori aligu...@us.ibm.com - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - * - * Contributions after 2012-01-13 are licensed under the terms of the - * GNU GPL, version 2 or (at your option) any later version. - */ - -#include qemu-common.h -#include hw/hw.h -#include qemu-timer.h -#include qemu-char.h -#include buffered_file.h -#include qemu-thread.h - -//#define DEBUG_BUFFERED_FILE - -typedef struct QEMUFileBuffered -{ -MigrationState *migration_state; -QEMUFile *file; -size_t bytes_xfer; -size_t xfer_limit; -uint8_t *buffer; -size_t buffer_size; -size_t buffer_capacity; -QemuThread thread; -} QEMUFileBuffered; - -#ifdef DEBUG_BUFFERED_FILE -#define DPRINTF(fmt, ...) \ -do { printf(buffered-file: fmt, ## __VA_ARGS__); } while (0) -#else -#define DPRINTF(fmt, ...) \ -do { } while (0) -#endif - -static ssize_t buffered_flush(QEMUFileBuffered *s) -{ -size_t offset = 0; -ssize_t ret = 0; - -DPRINTF(flushing %zu byte(s) of data\n, s-buffer_size); - -while (s-bytes_xfer s-xfer_limit offset s-buffer_size) { - -ret = migrate_fd_put_buffer(s-migration_state, s-buffer + offset, -s-buffer_size - offset); -if (ret = 0) { -DPRINTF(error flushing data, %zd\n, ret); -break; -} else { -DPRINTF(flushed %zd byte(s)\n, ret); -offset += ret; -s-bytes_xfer += ret; -} -} - -DPRINTF(flushed %zu of %zu byte(s)\n, offset, s-buffer_size); -memmove(s-buffer, s-buffer + offset, s-buffer_size - offset); -s-buffer_size -= offset; - -if (ret 0) { -return ret; -} -return offset; -} - -static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size) -{ -QEMUFileBuffered *s = opaque; -ssize_t error; - -DPRINTF(putting %d bytes at % PRId64 \n, size, pos); - -error = qemu_file_get_error(s-file); -if (error) { -DPRINTF(flush when error, bailing: %s\n, strerror(-error)); -return error; -} - -if (size = 0) { -return size; -} - -if (size (s-buffer_capacity - s-buffer_size)) { -DPRINTF(increasing buffer capacity from %zu by %zu\n, -s-buffer_capacity, size + 1024); - -s-buffer_capacity += size + 1024; - -s-buffer = g_realloc(s-buffer, s-buffer_capacity); -} - -memcpy(s-buffer + s-buffer_size, buf, size); -s-buffer_size += size; - -return size; -} - -static int buffered_close(void *opaque) -{ -QEMUFileBuffered *s = opaque; -ssize_t ret = 0; -int ret2; - -DPRINTF(closing\n); - -s-xfer_limit = INT_MAX; -while (!qemu_file_get_error(s-file) s-buffer_size) { -ret = buffered_flush(s); -if (ret 0) { -break; -} -} - -ret2 = migrate_fd_close(s-migration_state); -if (ret = 0) { -ret = ret2; -} -ret = migrate_fd_close(s-migration_state); -s-migration_state-complete = true; -return ret; -} - -/* - * The meaning of the return values is: - * 0: We can continue sending - * 1: Time to stop - * negative: There has been an error - */ -static int buffered_rate_limit(void *opaque) -{ -QEMUFileBuffered *s = opaque; -int ret; - -ret
[Qemu-devel] [PATCH 02/30] add a version number to ram_list
From: Umesh Deshpande udesh...@redhat.com This will be used to detect if last_block might have become invalid across different calls to ram_save_live. Signed-off-by: Paolo Bonzini pbonz...@redhat.com Signed-off-by: Umesh Deshpande udesh...@redhat.com Signed-off-by: Juan Quintela quint...@redhat.com --- arch_init.c | 7 ++- cpu-all.h | 1 + exec.c | 5 - 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/arch_init.c b/arch_init.c index 4293557..b47313d 100644 --- a/arch_init.c +++ b/arch_init.c @@ -336,6 +336,7 @@ static RAMBlock *last_block; static ram_addr_t last_offset; static unsigned long *migration_bitmap; static uint64_t migration_dirty_pages; +static uint32_t last_version; static inline bool migration_bitmap_test_and_reset_dirty(MemoryRegion *mr, ram_addr_t offset) @@ -406,7 +407,6 @@ static void migration_bitmap_sync(void) } } - /* * ram_save_block: Writes a page of memory to the stream f * @@ -558,6 +558,7 @@ static void reset_ram_globals(void) { last_block = NULL; last_offset = 0; +last_version = ram_list.version; sort_ram_list(); } @@ -613,6 +614,10 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) uint64_t expected_downtime; MigrationState *s = migrate_get_current(); +if (ram_list.version != last_version) { +reset_ram_globals(); +} + bytes_transferred_last = bytes_transferred; bwidth = qemu_get_clock_ns(rt_clock); diff --git a/cpu-all.h b/cpu-all.h index 6558a6f..e07c91c 100644 --- a/cpu-all.h +++ b/cpu-all.h @@ -500,6 +500,7 @@ typedef struct RAMBlock { typedef struct RAMList { uint8_t *phys_dirty; +uint32_t version; QLIST_HEAD(, RAMBlock) blocks_mru; QLIST_HEAD(, RAMBlock) blocks; } RAMList; diff --git a/exec.c b/exec.c index 718bbc2..1e04711 100644 --- a/exec.c +++ b/exec.c @@ -637,7 +637,6 @@ bool tcg_enabled(void) void cpu_exec_init_all(void) { #if !defined(CONFIG_USER_ONLY) -qemu_mutex_init(ram_list.mutex); memory_map_init(); io_mem_init(); #endif @@ -2575,6 +2574,8 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, QLIST_INSERT_HEAD(ram_list.blocks, new_block, next); QLIST_INSERT_HEAD(ram_list.blocks_mru, new_block, next_mru); +ram_list.version++; + ram_list.phys_dirty = g_realloc(ram_list.phys_dirty, last_ram_offset() TARGET_PAGE_BITS); memset(ram_list.phys_dirty + (new_block-offset TARGET_PAGE_BITS), @@ -2602,6 +2603,7 @@ void qemu_ram_free_from_ptr(ram_addr_t addr) if (addr == block-offset) { QLIST_REMOVE(block, next); QLIST_REMOVE(block, next_mru); +ram_list.version++; g_free(block); return; } @@ -2616,6 +2618,7 @@ void qemu_ram_free(ram_addr_t addr) if (addr == block-offset) { QLIST_REMOVE(block, next); QLIST_REMOVE(block, next_mru); +ram_list.version++; if (block-flags RAM_PREALLOC_MASK) { ; } else if (mem_path) { -- 1.7.11.7
Re: [Qemu-devel] [PATCH 00/30] Migration thread 20121017 edition
Il 18/10/2012 09:29, Juan Quintela ha scritto: v3: This is work in progress on top of the previous migration series just sent. - Introduces a thread for migration instead of using a timer and callback - remove the writting to the fd from the iothread lock - make the writes synchronous - Introduce a new pending method that returns how many bytes are pending for one save live section - last patch just shows printfs to see where the time is being spent on the migration complete phase. (yes it pollutes all uses of stop on the monitor) So far I have found that we spent a lot of time on bdrv_flush_all() It can take from 1ms to 600ms (yes, it is not a typo). That dwarfs the migration default downtime time (30ms). Stop all vcpus: - it works now (after the changes on qemu_cpu_is_vcpu on the previous series) caveat is that the time that brdv_flush_all() takes is unpredictable. Any silver bullets? You could reuse the block live migration item. In block_save_pending, start a bdrv_aio_flush() on all block devices that have already completed the previous one. But that's not a regression in the migration thread, isn't it? Paolo
[Qemu-devel] [PATCH 14/30] migration: include qemu-file.h
They don't use/know anything about buffered-file. Signed-off-by: Juan Quintela quint...@redhat.com --- migration-exec.c | 2 +- migration-fd.c | 2 +- migration-tcp.c | 2 +- migration-unix.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/migration-exec.c b/migration-exec.c index 908f22e..c451545 100644 --- a/migration-exec.c +++ b/migration-exec.c @@ -19,7 +19,7 @@ #include qemu_socket.h #include migration.h #include qemu-char.h -#include buffered_file.h +#include qemu-file.h #include block.h #include sys/types.h #include sys/wait.h diff --git a/migration-fd.c b/migration-fd.c index aba1be7..25f0245 100644 --- a/migration-fd.c +++ b/migration-fd.c @@ -18,7 +18,7 @@ #include migration.h #include monitor.h #include qemu-char.h -#include buffered_file.h +#include qemu-file.h #include block.h #include qemu_socket.h diff --git a/migration-tcp.c b/migration-tcp.c index 812ae22..fdf8b0f 100644 --- a/migration-tcp.c +++ b/migration-tcp.c @@ -17,7 +17,7 @@ #include qemu_socket.h #include migration.h #include qemu-char.h -#include buffered_file.h +#include qemu-file.h #include block.h //#define DEBUG_MIGRATION_TCP diff --git a/migration-unix.c b/migration-unix.c index bb41bac..64bc0c3 100644 --- a/migration-unix.c +++ b/migration-unix.c @@ -17,7 +17,7 @@ #include qemu_socket.h #include migration.h #include qemu-char.h -#include buffered_file.h +#include qemu-file.h #include block.h //#define DEBUG_MIGRATION_UNIX -- 1.7.11.7
Re: [Qemu-devel] [RFC V2 01/20] qcow2: Add deduplication to the qcow2 specification.
That's still too sparse for a formal documentation - what is the format of the deduplication table, and what do the bits in that table imply with regards to how the rest of the qcow2 file is used? I will add this in the next iteration. Best regards Benoît
Re: [Qemu-devel] [Patch]KVM: enabling per domain PLE
-Original Message- From: Avi Kivity [mailto:a...@redhat.com] Sent: Wednesday, October 17, 2012 6:12 PM To: Hu, Xuekun Cc: k...@vger.kernel.org; qemu-devel@nongnu.org; Zhang, Xiantao Subject: Re: [Patch]KVM: enabling per domain PLE On 10/17/2012 10:02 AM, Hu, Xuekun wrote: The problem with this is that it requires an administrator to understand the workload, not only of the guest, but also of other guests on the machine. With low overcommit, a high PLE window reduces unneeded exits, but with high overcommit we need those exits to reduce spinning. In addition, most kvm hosts don't have an administrator. They are controlled by a management system, which means we'll need some algorithm in userspace to control the PLE window. Taking the two together, we need a dynamic (for changing workloads) algorithm. There are threads discussing this dynamic algorithm, we are making slow progress because it's such a difficult problem, but I think this is much more useful than anything requiring user intervention. Avi, agreed that dynamic adaptive ple should be the best solution. However currently it is a difficult problem like you said. Our solution just gives user a choice who know how to set the two PLE values. So the solution is a compromise solution, which should be better than nothing, for now? :-) Let's see how the PLE thread works out. Yes the patches give the user control, but we need to make sure the user knows how to control it (in fact your patch doesn't even update the documentation). Just throwing out a new ioctl, even if it is documented, doesn't mean that userspace will begin to use it, or that users will exploit it. Do you have a specific use case in mind? Yes, some cloud vendors already knew that different PLE values has big performance impact on their applications. They want one interface for them to set. And I think the big cloud vendors should have administrators that have experience on PLE tuning. :-) -- error compiling committee.c: too many arguments to function
[Qemu-devel] [PATCH] qmp: handle stop/cont in INMIGRATE state
Right now, stop followed by an incoming migration will let the virtual machine start. cont before an incoming migration instead will fail. This is bad because the actual behavior is not predictable; it is racy with respect to the start of the incoming migration. That's because incoming migration is blocking, and thus will delay the processing of stop/cont until the end of the migration. In addition, there's nothing that really prevents the user from typing the block device's passwords before incoming migration is done, so we may as well allow that. Both things can be fixed by just toggling the autostart variable when stop/cont are called in INMIGRATE state. Signed-off-by: Paolo Bonzini pbonz...@redhat.com --- qmp.c | 17 +++-- 1 file modificato, 11 inserzioni(+), 6 rimozioni(-) diff --git a/qmp.c b/qmp.c index 36c54c5..2c8d559 100644 --- a/qmp.c +++ b/qmp.c @@ -85,7 +85,11 @@ void qmp_quit(Error **err) void qmp_stop(Error **errp) { -vm_stop(RUN_STATE_PAUSED); +if (runstate_check(RUN_STATE_INMIGRATE)) { +autostart = 0; +} else { +vm_stop(RUN_STATE_PAUSED); +} } void qmp_system_reset(Error **errp) @@ -144,10 +148,7 @@ void qmp_cont(Error **errp) { Error *local_err = NULL; -if (runstate_check(RUN_STATE_INMIGRATE)) { -error_set(errp, QERR_MIGRATION_EXPECTED); -return; -} else if (runstate_check(RUN_STATE_INTERNAL_ERROR) || +if (runstate_check(RUN_STATE_INTERNAL_ERROR) || runstate_check(RUN_STATE_SHUTDOWN)) { error_set(errp, QERR_RESET_REQUIRED); return; @@ -162,7 +163,11 @@ void qmp_cont(Error **errp) return; } -vm_start(); +if (runstate_check(RUN_STATE_INMIGRATE)) { +autostart = 1; +} else { +vm_start(); +} } void qmp_system_wakeup(Error **errp) -- 1.7.12.1
[Qemu-devel] [PATCH] vnc-tls: Fix compilation with newer versions of GNU-TLS
In my installation of GNU-TLS (v3.0.23) the type gnutls_anon_server_credentials is marked deprecated, so -Werror breaks compilation. Simply replacing it with the newer ..._t version fixed the compilation on my machine (Slackware 14.0). I cannot tell how far back this new type goes, at least the header file in RHEL 5.0 (v1.4.1) seems to have it already. If someone finds a broken distribution, tell me and I insert some compat code. Signed-off-by: Andre Przywara andre.przyw...@amd.com --- ui/vnc-tls.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ui/vnc-tls.c b/ui/vnc-tls.c index a7f7d07..ba3827b 100644 --- a/ui/vnc-tls.c +++ b/ui/vnc-tls.c @@ -99,9 +99,9 @@ static ssize_t vnc_tls_pull(gnutls_transport_ptr_t transport, } -static gnutls_anon_server_credentials vnc_tls_initialize_anon_cred(void) +static gnutls_anon_server_credentials_t vnc_tls_initialize_anon_cred(void) { -gnutls_anon_server_credentials anon_cred; +gnutls_anon_server_credentials_t anon_cred; int ret; if ((ret = gnutls_anon_allocate_server_credentials(anon_cred)) 0) { @@ -382,7 +382,7 @@ int vnc_tls_client_setup(struct VncState *vs, } } else { -gnutls_anon_server_credentials anon_cred = vnc_tls_initialize_anon_cred(); +gnutls_anon_server_credentials_t anon_cred = vnc_tls_initialize_anon_cred(); if (!anon_cred) { gnutls_deinit(vs-tls.session); vs-tls.session = NULL; -- 1.7.12.1
Re: [Qemu-devel] [PATCH] vnc-tls: Fix compilation with newer versions of GNU-TLS
On 10/18/12 11:16, Andre Przywara wrote: In my installation of GNU-TLS (v3.0.23) the type gnutls_anon_server_credentials is marked deprecated, so -Werror breaks compilation. Simply replacing it with the newer ..._t version fixed the compilation on my machine (Slackware 14.0). I cannot tell how far back this new type goes, at least the header file in RHEL 5.0 (v1.4.1) seems to have it already. If someone finds a broken distribution, tell me and I insert some compat code. Acked-by: Gerd Hoffmann kra...@redhat.com cheers, Gerd
Re: [Qemu-devel] [PATCH 2/3] chardev: fix QemuOpts lifecycle
Hi, @@ -2864,6 +2868,7 @@ void qemu_chr_delete(CharDriverState *chr) chr-chr_close(chr); g_free(chr-filename); g_free(chr-label); +qemu_opts_del(chr-opts); g_free(chr); } Didn't consider this in my first review, but is chardev_init_func() fine with this? Basically, if we chardev_remove a device created by it (is this possible?) then the opts will be freed but it will remain inserted in its QemuOptsList. Hmm? Don't see your issue here. qemu_opts_del will also unlink from QemuOptsList ... cheers, Gerd
Re: [Qemu-devel] [RFC PATCH v3 06/19] Implement -dimm command line option
On Wed, Oct 17, 2012 at 12:03:51PM +0200, Avi Kivity wrote: On 10/17/2012 11:19 AM, Vasilis Liaskovitis wrote: I don't think so, but probably there's a limit of DIMMs that real controllers have, something like 8 max. In the case of i440fx specifically, do you mean that we should model the DRB (Dram row boundary registers in section 3.2.19 of the i440fx spec) ? The i440fx DRB registers only supports up to 8 DRAM rows (let's say 1 row maps 1-1 to a DimmDevice for this discussion) and only supports up to 2GB of memory afaict (bit 31 and above is ignored). I 'd rather not model this part of the i440fx - having only 8 DIMMs seems too restrictive. The rest of the patchset supports up to 255 DIMMs so it would be a waste imho to model an old pc memory controller that only supports 8 DIMMs. There was also an old discussion about i440fx modeling here: https://lists.nongnu.org/archive/html/qemu-devel/2011-07/msg02705.html the general direction was that i440fx is too old and we don't want to precisely emulate the DRB registers, since they lack flexibility. Possible solutions: 1) is there a newer and more flexible chipset that we could model? Look for q35 on this list. thanks, I 'll take a look. It sounds like the other options below are more straightforward now, but let me know if you prefer q35 integration as a priority. 2) model and document ^--- the critical bit a generic (non-existent) i440fx that would support more and larger DIMMs. E.g. support 255 DIMMs. If we want to use a description similar to the i440fx DRB registers, the registers would take up a lot of space. In i440fx there is one 8-bit DRB register per DIMM, and DRB[i] describes how many 8MB chunks are contained in DIMMs 0...i. So, the register values are cumulative (and total described memory cannot exceed 256x8MB = 2GB) Our i440fx has already been extended by support for pci and cpu hotplug, and I see no reason not to extend it for memory. We can allocate extra mmio space for registers if needed. Usually I'm against this sort of thing, but in this case we don't have much choice. ok We could for example model: - an 8-bit non-cumulative register for each DIMM, denoting how many 128MB chunks it contains. This allowes 32GB for each DIMM, and with 255 DIMMs we describe a bit less than 8TB. These registers require 255 bytes. - a 16-bit cumulative register for each DIMM again for 128MB chunks. This allows us to describe 8TB of memory (but the registers take up double the space, because they describe cumulative memory amounts) There is no reason to save space. Why not have two 64-bit registers per DIMM, one describing the size and the other the base address, both in bytes? Use a few low order bits for control. Do we want this generic scheme above to be tied into the i440fx/pc machine? Or have it as a separate generic memory bus / pmc usable by others (e.g. in hw/dimm.c)? The 64-bit values you describe are already part of DimmDevice properties, but they are not hardware registers described as part of a chipset. In terms of control bits, did you want to mimic some other chipset registers? - any examples would be useful. 3) let everything be handled/abstracted by dimmbus - the chipset DRB modelling is not done (at least for i440fx, other machines could). This is the least precise in terms of emulation. On the other hand, if we are not really trying to emulate the real (too restrictive) hardware, does it matter? We could emulate base memory using the chipset, and extra memory using the scheme above. This allows guests that are tied to the chipset to work, and guests that have more awareness (seabios) to use the extra features. But if we use the real i440fx pmc DRBs for base memory, this means base memory would be = 2GB, right? Sounds like we 'd need to change the DRBs anyway to describe useful amounts of base memory (e.g. 512MB chunks and check against address lines [36:29] can describe base memory up to 64GB, though that's still limiting for very large VMs). But we'd be diverting from the real hardware again. Then we can model base memory with tweaked i440fx pmc's DRB registers - we could only use DRB[0] (one DIMM describing all of base memory) or more. DIMMs would be allowed to be hotplugged in the generic mem-controller scheme only (unless it makes sense to allow hotplug in the remaining pmc DRBs and start using the generic scheme once we run out of emulated DRBs) thanks, - Vasilis
Re: [Qemu-devel] [PATCH v3] qemu-img: document 'info --backing-chain'
Am 18.10.2012 07:55, schrieb Kashyap Chamarthy: Signed-off-by: Kashyap Chamarthy kashyap...@gmail.com --- qemu-img-cmds.hx | 4 ++-- qemu-img.texi| 21 - 2 files changed, 22 insertions(+), 3 deletions(-) Thanks, applied to the block branch. Kevin
Re: [Qemu-devel] [PATCH] block: bdrv_create(): don't leak cco.filename on error
Am 17.10.2012 21:45, schrieb Luiz Capitulino: Signed-off-by: Luiz Capitulino lcapitul...@redhat.com Thanks, applied to the block branch. Kevin
[Qemu-devel] [PATCH V13 2/6] make path_has_protocol non static
We will use path_has_protocol outside block.c, so just make it public. Signed-off-by: Dong Xu Wang wdon...@linux.vnet.ibm.com Reviewed-by: Michael Roth mdr...@linux.vnet.ibm.com --- block.c |2 +- block.h |1 + 2 files changed, 2 insertions(+), 1 deletions(-) diff --git a/block.c b/block.c index f639655..03ba485 100644 --- a/block.c +++ b/block.c @@ -198,7 +198,7 @@ static void bdrv_io_limits_intercept(BlockDriverState *bs, } /* check if the path starts with protocol: */ -static int path_has_protocol(const char *path) +int path_has_protocol(const char *path) { const char *p; diff --git a/block.h b/block.h index 7842d85..364ba04 100644 --- a/block.h +++ b/block.h @@ -329,6 +329,7 @@ char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn); char *get_human_readable_size(char *buf, int buf_size, int64_t size); int path_is_absolute(const char *path); +int path_has_protocol(const char *path); void path_combine(char *dest, int dest_size, const char *base_path, const char *filename); -- 1.7.1
[Qemu-devel] [PATCH V13 0/6] add-cow file format
It will introduce a new file format: add-cow. The add-cow file format makes it possible to perform copy-on-write on top of a raw disk image. When we know that no backing file clusters remain visible (e.g. we have streamed the entire image and copied all data from the backing file), then it is possible to discard the add-cow file and use the raw image file directly. This feature adds the copy-on-write feature to raw files (which cannot support it natively) while allowing us to get full performance again later when we no longer need copy-on-write. add-cow can benefit from other available functions, such as path_has_protocol and qed_read_string, so we will make them public. snapshot_blkdev are not supported now for add-cow. Will add it in futher patches. These patches are using QemuOpts parser, former patches could be found here: http://patchwork.ozlabs.org/patch/191347/ v12-v13: 1) Use QemuOpts, not QEMUOptionParameter 2) cluster_size configuable 3) Refactor block-cache.c 4) Correct qemu-iotests script. 5) Other bug fix. v11-v12: 1) Removed un-used feature bit. 2) Share cache code with qcow2.c. 3) Remove snapshot_blkdev support, will add it in another patch. 5) COW Bitmap field in add-cow file will be multiple of 65536. 6) fix grammer and typo. Dong Xu Wang (6): docs: document for add-cow file format make path_has_protocol non static qed_read_string to bdrv_read_string rename qcow2-cache.c to block-cache.c add-cow file format core code. qemu-iotests: add add-cow iotests support. block.c | 29 ++- block.h |3 + block/Makefile.objs |4 +- block/add-cow.c | 693 ++ block/add-cow.h | 85 + block/block-cache.c | 321 +++ block/block-cache.h | 77 + block/qcow2-cache.c | 323 block/qcow2-cluster.c| 54 ++-- block/qcow2-refcount.c | 67 +++-- block/qcow2.c| 21 +- block/qcow2.h| 24 +-- block/qed.c | 34 +-- block_int.h |2 + docs/specs/add-cow.txt | 139 + tests/qemu-iotests/017 |2 +- tests/qemu-iotests/020 |2 +- tests/qemu-iotests/common|6 + tests/qemu-iotests/common.rc | 15 +- trace-events | 13 +- 20 files changed, 1465 insertions(+), 449 deletions(-) create mode 100644 block/add-cow.c create mode 100644 block/add-cow.h create mode 100644 block/block-cache.c create mode 100644 block/block-cache.h delete mode 100644 block/qcow2-cache.c create mode 100644 docs/specs/add-cow.txt
[Qemu-devel] [PATCH V13 1/6] docs: document for add-cow file format
Document for add-cow format, the usage and spec of add-cow are introduced. Signed-off-by: Dong Xu Wang wdon...@linux.vnet.ibm.com --- docs/specs/add-cow.txt | 139 1 files changed, 139 insertions(+), 0 deletions(-) create mode 100644 docs/specs/add-cow.txt diff --git a/docs/specs/add-cow.txt b/docs/specs/add-cow.txt new file mode 100644 index 000..dc1e107 --- /dev/null +++ b/docs/specs/add-cow.txt @@ -0,0 +1,139 @@ +== General == + +The raw file format does not support backing files or copy on write feature. +The add-cow image format makes it possible to use backing files with raw +image by keeping a separate .add-cow metadata file. Once all sectors +have been written into the raw image it is safe to discard the .add-cow +and backing files, then we can use the raw image directly. + +An example usage of add-cow would look like:: +(ubuntu.img is a disk image which has been installed OS.) +1) Create a raw image with the same size of ubuntu.img +qemu-img create -f raw test.raw 8G +2) Create an add-cow image which will store dirty bitmap +qemu-img create -f add-cow test.add-cow \ +-o backing_file=ubuntu.img,image_file=test.raw +3) Run qemu with add-cow image +qemu -drive if=virtio,file=test.add-cow + +test.raw may be larger than ubuntu.img, in that case, the size of test.add-cow +will be calculated from the size of test.raw. + +=Specification= + +The file format looks like this: + + +---+-+-+ + | Header| Reserved |COW bitmap | + +---+-+-+ + +All numbers in add-cow are stored in Little Endian byte order. + +== Header == + +The Header is included in the first bytes: +(#define HEADER_SIZE (4096 * header_size)) +Byte0 - 7: magic +add-cow magic string (ADD_COW\xff). + +8 - 11:version +Version number (only valid value is 1 now). + +12 - 15:backing file name offset +Offset in the add-cow file at which the backing file +name is stored (NB: The string is not nul-terminated). +If backing file name does NOT exist, this field will be +0. Must be between 80 and [HEADER_SIZE - 2](a file name +must be at least 1 byte). + +16 - 19:backing file name size +Length of the backing file name in bytes. It will be 0 +if the backing file name offset is 0. If backing file +name offset is non-zero, then it must be non-zero. Must +be less than [HEADER_SIZE - 80] to fit in the reserved +part of the header. + +20 - 23:image file name offset +Offset in the add-cow file at which the image file name +is stored (NB: The string is not null terminated). It +must be between 80 and [HEADER_SIZE - 2]. + +24 - 27:image file name size +Length of the image file name in bytes. +Must be less than [HEADER_SIZE - 80] to fit in the reserved +part of the header. + +28 - 31:cluster bits +Number of bits that are used for addressing an offset +within a cluster (1 cluster_bits is the cluster size). +Must not be less than 9 (i.e. 512 byte clusters). + +Note: qemu as of today has an implementation limit of 2 MB +as the maximum cluster size and won't be able to open images +with larger cluster sizes. + +32 - 39:features +Bitmask of features. An implementation can safely ignore +any unknown bits that are set. + +Bit 0: All allocated bit. If this bit is set then +backing file and COW bitmap will not be used, +and can read from or write to image file directly. + +Bits 1-63: Reserved (set to 0) + +40 - 47:optional features +Not used now. Reserved for future use. It must be set to 0. +And must be ignored while reading. + +48 - 51:header size +The header field is variable-sized. This field indicates +how many 4096 bytes will be used to store add-cow header. +In add-cow v1, it is fixed to 1, so the header size will +be 4096 * 1 = 4096 bytes. + +52 - 67:backing file format +
[Qemu-devel] [PATCH V13 4/6] rename qcow2-cache.c to block-cache.c
We will re-use qcow2-cache as block layer common cache code, so change its name and made some changes, define a struct named BlockTableType, pass BlockTableType and table size parameters to block cache initialization function. Signed-off-by: Dong Xu Wang wdon...@linux.vnet.ibm.com --- block/Makefile.objs|3 +- block/block-cache.c| 317 +++ block/block-cache.h| 76 +++ block/qcow2-cache.c| 323 block/qcow2-cluster.c | 54 + block/qcow2-refcount.c | 67 ++- block/qcow2.c | 21 ++-- block/qcow2.h | 24 +--- trace-events | 13 +- 9 files changed, 483 insertions(+), 415 deletions(-) create mode 100644 block/block-cache.c create mode 100644 block/block-cache.h delete mode 100644 block/qcow2-cache.c diff --git a/block/Makefile.objs b/block/Makefile.objs index 554f429..f128b78 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -1,5 +1,6 @@ block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o -block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o +block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o +block-obj-y += block-cache.o block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o block-obj-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o diff --git a/block/block-cache.c b/block/block-cache.c new file mode 100644 index 000..bf5c57c --- /dev/null +++ b/block/block-cache.c @@ -0,0 +1,317 @@ +/* + * QEMU Block Layer Cache + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Dong Xu Wang wdon...@linux.vnet.ibm.com + * + * This file is based on qcow2-cache.c, see its copyrights below: + * + * L2/refcount table cache for the QCOW2 format + * + * Copyright (c) 2010 Kevin Wolf kw...@redhat.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the Software), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include block_int.h +#include qemu-common.h +#include trace.h +#include block-cache.h + +BlockCache *block_cache_create(BlockDriverState *bs, int num_tables, + size_t cluster_size, BlockTableType type) +{ +BlockCache *c; +int i; + +c = g_malloc0(sizeof(*c)); +c-size = num_tables; +c-entries = g_malloc0(sizeof(*c-entries) * num_tables); +c-table_type = type; +c-cluster_size = cluster_size; + +for (i = 0; i c-size; i++) { +c-entries[i].table = qemu_blockalign(bs, cluster_size); +} + +return c; +} + +int block_cache_destroy(BlockDriverState *bs, BlockCache *c) +{ +int i; + +for (i = 0; i c-size; i++) { +assert(c-entries[i].ref == 0); +qemu_vfree(c-entries[i].table); +} + +g_free(c-entries); +g_free(c); + +return 0; +} + +static int block_cache_flush_dependency(BlockDriverState *bs, BlockCache *c) +{ +int ret; + +ret = block_cache_flush(bs, c-depends); +if (ret 0) { +return ret; +} + +c-depends = NULL; +c-depends_on_flush = false; + +return 0; +} + +static int block_cache_entry_flush(BlockDriverState *bs, BlockCache *c, int i) +{ +int ret = 0; + +if (!c-entries[i].dirty || !c-entries[i].offset) { +return 0; +} + +trace_block_cache_entry_flush(qemu_coroutine_self(), c-table_type, i); + +if (c-depends) { +ret = block_cache_flush_dependency(bs, c); +} else if (c-depends_on_flush) { +ret = bdrv_flush(bs-file); +if (ret = 0) { +c-depends_on_flush = false; +} +} + +if (ret 0) { +return ret; +} + +if (c-table_type == BLOCK_TABLE_REF) { +BLKDBG_EVENT(bs-file, BLKDBG_REFBLOCK_UPDATE_PART); +} else if (c-table_type == BLOCK_TABLE_L2) { +BLKDBG_EVENT(bs-file, BLKDBG_L2_UPDATE); +} + +ret = bdrv_pwrite(bs-file, c-entries[i].offset, +
[Qemu-devel] [PATCH V13 6/6] qemu-iotests: add add-cow iotests support.
This patch will use qemu-iotests to test add-cow file format. Signed-off-by: Dong Xu Wang wdon...@linux.vnet.ibm.com --- tests/qemu-iotests/017 |2 +- tests/qemu-iotests/020 |2 +- tests/qemu-iotests/common|6 ++ tests/qemu-iotests/common.rc | 15 ++- 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/tests/qemu-iotests/017 b/tests/qemu-iotests/017 index 66951eb..d31432f 100755 --- a/tests/qemu-iotests/017 +++ b/tests/qemu-iotests/017 @@ -40,7 +40,7 @@ trap _cleanup; exit \$status 0 1 2 3 15 . ./common.pattern # Any format supporting backing files -_supported_fmt qcow qcow2 vmdk qed +_supported_fmt qcow qcow2 vmdk qed add-cow _supported_proto generic _supported_os Linux diff --git a/tests/qemu-iotests/020 b/tests/qemu-iotests/020 index 2fb0ff8..3dbb495 100755 --- a/tests/qemu-iotests/020 +++ b/tests/qemu-iotests/020 @@ -42,7 +42,7 @@ trap _cleanup; exit \$status 0 1 2 3 15 . ./common.pattern # Any format supporting backing files -_supported_fmt qcow qcow2 vmdk qed +_supported_fmt qcow qcow2 vmdk qed add-cow _supported_proto generic _supported_os Linux diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common index 1f6fdf5..4c06895 100644 --- a/tests/qemu-iotests/common +++ b/tests/qemu-iotests/common @@ -128,6 +128,7 @@ common options check options -rawtest raw (default) -cowtest cow +-add-cowtest add-cow -qcow test qcow -qcow2 test qcow2 -qedtest qed @@ -163,6 +164,11 @@ testlist options xpand=false ;; + -add-cow) +IMGFMT=add-cow +xpand=false +;; + -qcow) IMGFMT=qcow xpand=false diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index d534e94..f48b02a 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -97,6 +97,16 @@ _make_test_img() fi if [ \( $IMGFMT = qcow2 -o $IMGFMT = qed \) -a -n $CLUSTER_SIZE ]; then optstr=$(_optstr_add $optstr cluster_size=$CLUSTER_SIZE) +elif [ $IMGFMT = add-cow ]; then +local IMG=$TEST_IMG.raw +if [ $1 = -b ]; then +IMG=$IMG.b +$QEMU_IMG create -f raw $IMG $image_size/dev/null +extra_img_options=-o image_file=$IMG $extra_img_options +else +$QEMU_IMG create -f raw $IMG $image_size/dev/null +extra_img_options=-o image_file=$IMG +fi fi if [ -n $optstr ]; then @@ -114,7 +124,8 @@ _make_test_img() -e s# compat='[^']*'##g \ -e s# compat6=\\(on\\|off\\)##g \ -e s# static=\\(on\\|off\\)##g \ --e s# lazy_refcounts=\\(on\\|off\\)##g +-e s# lazy_refcounts=\\(on\\|off\\)##g \ +-e s# image_file='[^']*'##g } _cleanup_test_img() @@ -125,6 +136,8 @@ _cleanup_test_img() rm -f $TEST_DIR/t.$IMGFMT rm -f $TEST_DIR/t.$IMGFMT.orig rm -f $TEST_DIR/t.$IMGFMT.base +rm -f $TEST_DIR/t.$IMGFMT.raw +rm -f $TEST_DIR/t.$IMGFMT.raw.b ;; rbd) -- 1.7.1
[Qemu-devel] [PATCH V13 5/6] add-cow file format core code.
add-cow file format core code. It use block-cache.c as cache code. It lacks of snapshot_blkdev support. Signed-off-by: Dong Xu Wang wdon...@linux.vnet.ibm.com --- block/Makefile.objs |1 + block/add-cow.c | 693 +++ block/add-cow.h | 85 +++ block/block-cache.c |4 + block/block-cache.h |1 + block_int.h |2 + 6 files changed, 786 insertions(+), 0 deletions(-) create mode 100644 block/add-cow.c create mode 100644 block/add-cow.h diff --git a/block/Makefile.objs b/block/Makefile.objs index f128b78..ed9222d 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -1,5 +1,6 @@ block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o +block-obj-y += add-cow.o block-obj-y += block-cache.o block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o diff --git a/block/add-cow.c b/block/add-cow.c new file mode 100644 index 000..15c86ab --- /dev/null +++ b/block/add-cow.c @@ -0,0 +1,693 @@ +/* + * QEMU ADD-COW Disk Format + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Dong Xu Wang wdon...@linux.vnet.ibm.com + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include qemu-common.h +#include block_int.h +#include module.h +#include add-cow.h + +static void add_cow_header_le_to_cpu(const AddCowHeader *le, AddCowHeader *cpu) +{ +cpu-magic = le64_to_cpu(le-magic); +cpu-version= le32_to_cpu(le-version); + +cpu-backing_filename_offset= le32_to_cpu(le-backing_filename_offset); +cpu-backing_filename_size = le32_to_cpu(le-backing_filename_size); + +cpu-image_filename_offset = le32_to_cpu(le-image_filename_offset); +cpu-image_filename_size= le32_to_cpu(le-image_filename_size); + +cpu-cluster_bits = le32_to_cpu(le-cluster_bits); +cpu-features = le64_to_cpu(le-features); +cpu-optional_features = le64_to_cpu(le-optional_features); +cpu-header_pages_size = le32_to_cpu(le-header_pages_size); + +memcpy(cpu-backing_fmt, le-backing_fmt, sizeof(cpu-backing_fmt)); +memcpy(cpu-image_fmt, le-image_fmt, sizeof(cpu-image_fmt)); +} + +static void add_cow_header_cpu_to_le(const AddCowHeader *cpu, AddCowHeader *le) +{ +le-magic = cpu_to_le64(cpu-magic); +le-version = cpu_to_le32(cpu-version); + +le-backing_filename_offset = cpu_to_le32(cpu-backing_filename_offset); +le-backing_filename_size = cpu_to_le32(cpu-backing_filename_size); + +le-image_filename_offset = cpu_to_le32(cpu-image_filename_offset); +le-image_filename_size = cpu_to_le32(cpu-image_filename_size); + +le-cluster_bits= cpu_to_le32(cpu-cluster_bits); +le-features= cpu_to_le64(cpu-features); +le-optional_features = cpu_to_le64(cpu-optional_features); +le-header_pages_size = cpu_to_le32(cpu-header_pages_size); +memcpy(le-backing_fmt, cpu-backing_fmt, sizeof(cpu-backing_fmt)); +memcpy(le-image_fmt, cpu-image_fmt, sizeof(cpu-image_fmt)); +} + +static int add_cow_probe(const uint8_t *buf, int buf_size, const char *filename) +{ +const AddCowHeader *header = (const AddCowHeader *)buf; + +if (le64_to_cpu(header-magic) == ADD_COW_MAGIC +le32_to_cpu(header-version) == ADD_COW_VERSION) { +return 100; +} else { +return 0; +} +} + +static int add_cow_create(const char *filename, QemuOpts *opts) +{ +AddCowHeader header = { +.magic = ADD_COW_MAGIC, +.version = ADD_COW_VERSION, +.features = 0, +.optional_features = 0, +.header_pages_size = ADD_COW_DEFAULT_PAGE_SIZE, +}; +AddCowHeader le_header; +int64_t image_len = 0; +const char *backing_filename = NULL; +const char *backing_fmt = NULL; +const char *image_filename = NULL; +const char *image_format = NULL; +BlockDriverState *bs, *image_bs = NULL, *backing_bs = NULL; +BlockDriver *drv = bdrv_find_format(add-cow); +BDRVAddCowState s; +size_t cluster_size; +int ret; + +image_len = qemu_opt_get_number(opts, BLOCK_OPT_SIZE, 0); +backing_filename = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); +backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); +image_filename = qemu_opt_get(opts, BLOCK_OPT_IMAGE_FILE); +image_format = qemu_opt_get(opts, BLOCK_OPT_IMAGE_FMT); +cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, + ADD_COW_CLUSTER_SIZE); + +header.cluster_bits = ffs(cluster_size) - 1; +if (header.cluster_bits MIN_CLUSTER_BITS || +header.cluster_bits
Re: [Qemu-devel] [PATCH 3/3] chardev: add hotplug support.
On 10/17/12 12:09, Gerd Hoffmann wrote: This patch adds chardev_add and chardev_remove monitor commands. They work similar to the netdev_{add,del} commands. The hmp version of chardev_add accepts like the -chardev command line option does. The qmp version expects the arguments being passed as named parameters. Trying another approach, see attached patch. This adds backend-specific qemu commands to add chardevs, with just the parameters needed for the specific backend. Starting with file and tty, both accepting a path. 'file' is nice for testing, 'tty' very useful when hotplugging serial devices on the host. Others can be added as needed. We probably don't need all of them. For example hotplugging the 'stdio' chardev doesn't make much sense. Advantage #1: Cleaner API. Advantage #2: No legacy syntax headache when qomifying chardevs. Comments? cheers, Gerd From 7f80af55530c9a448e4aed5a41c76a8e9514a27c Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann kra...@redhat.com Date: Thu, 11 Oct 2012 14:53:00 +0200 Subject: [PATCH] chardev: add hotplug support. This patch adds chardev_add_file, chardev_add_tty and chardev_remove monitor commands. chardev_add_file and chardev_add_tty expect an id and a path, they create a file/tty chardev. chardev_del just takes an id argument and zaps the chardev specified. Signed-off-by: Gerd Hoffmann kra...@redhat.com --- hmp-commands.hx | 44 +++ hmp.c| 29 hmp.h|3 ++ qapi-schema.json | 43 ++ qemu-char.c | 41 + qemu-char.h |2 + qmp-commands.hx | 76 ++ 7 files changed, 238 insertions(+), 0 deletions(-) diff --git a/hmp-commands.hx b/hmp-commands.hx index e0b537d..ecfc497 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1404,6 +1404,50 @@ passed since 1970, i.e. unix epoch. ETEXI { +.name = chardev_add_file, +.args_type = id:s,path:s, +.params = id path , +.help = add file chardev, +.mhandler.cmd = hmp_chardev_add_file, +}, + +STEXI +@item chardev_add_file id path +@findex chardev_add_file + +ETEXI + +{ +.name = chardev_add_tty, +.args_type = id:s,path:s, +.params = id path , +.help = add tty chardev, +.mhandler.cmd = hmp_chardev_add_tty, +}, + +STEXI +@item chardev_add_tty id path +@findex chardev_add_tty + +ETEXI + +{ +.name = chardev_remove, +.args_type = id:s, +.params = id, +.help = remove chardev, +.mhandler.cmd = hmp_chardev_remove, +}, + +STEXI +@item chardev_remove id +@findex chardev_remove + +Removes the chardev @var{id}. + +ETEXI + +{ .name = info, .args_type = item:s?, .params = [subcommand], diff --git a/hmp.c b/hmp.c index 70bdec2..346dd29 100644 --- a/hmp.c +++ b/hmp.c @@ -1209,3 +1209,32 @@ void hmp_screen_dump(Monitor *mon, const QDict *qdict) qmp_screendump(filename, err); hmp_handle_error(mon, err); } + +static void hmp_chardev_add_path(Monitor *mon, const QDict *qdict, + const char *backend) +{ +const char *id = qdict_get_str(qdict, args); +const char *path = qdict_get_str(qdict, path); +Error *local_err = NULL; + +qmp_chardev_add_path(id, path, backend, local_err); +hmp_handle_error(mon, local_err); +} + +void hmp_chardev_add_file(Monitor *mon, const QDict *qdict) +{ +hmp_chardev_add_path(mon, qdict, file); +} + +void hmp_chardev_add_tty(Monitor *mon, const QDict *qdict) +{ +hmp_chardev_add_path(mon, qdict, tty); +} + +void hmp_chardev_remove(Monitor *mon, const QDict *qdict) +{ +Error *local_err = NULL; + +qmp_chardev_remove(qdict_get_str(qdict, id), local_err); +hmp_handle_error(mon, local_err); +} diff --git a/hmp.h b/hmp.h index 71ea384..107941d 100644 --- a/hmp.h +++ b/hmp.h @@ -75,5 +75,8 @@ void hmp_getfd(Monitor *mon, const QDict *qdict); void hmp_closefd(Monitor *mon, const QDict *qdict); void hmp_send_key(Monitor *mon, const QDict *qdict); void hmp_screen_dump(Monitor *mon, const QDict *qdict); +void hmp_chardev_add_file(Monitor *mon, const QDict *qdict); +void hmp_chardev_add_tty(Monitor *mon, const QDict *qdict); +void hmp_chardev_remove(Monitor *mon, const QDict *qdict); #endif diff --git a/qapi-schema.json b/qapi-schema.json index f9dbdae..76e765b 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -2796,3 +2796,46 @@ # Since: 0.14.0 ## { 'command': 'screendump', 'data': {'filename': 'str'} } + +## +# @chardev-add-file: +# +# Add a file chardev +# +# @id: the chardev's ID, must be unique +# @path: file path +# +# Returns: Nothing on success +# +# Since: 1.3.0 +## +{ 'command': 'chardev-add-file', 'data': {'id' : 'str', + 'path' : 'str'
[Qemu-devel] [PATCH V13 3/6] qed_read_string to bdrv_read_string
Make qed_read_string function to a common interface, so move it to block.c. Signed-off-by: Dong Xu Wang wdon...@linux.vnet.ibm.com --- block.c | 27 +++ block.h |2 ++ block/qed.c | 34 -- 3 files changed, 33 insertions(+), 30 deletions(-) diff --git a/block.c b/block.c index 03ba485..9afb7b5 100644 --- a/block.c +++ b/block.c @@ -215,6 +215,33 @@ int path_has_protocol(const char *path) return *p == ':'; } +/** + * Read a string of known length from the image file + * + * @bs: Image file + * @offset: File offset to start of string, in bytes + * @n: String length in bytes + * @buf:Destination buffer + * @buflen: Destination buffer length in bytes + * @ret:0 on success, -errno on failure + * + * The string is NUL-terminated. + */ +int bdrv_read_string(BlockDriverState *bs, uint64_t offset, size_t n, + char *buf, size_t buflen) +{ +int ret; +if (n = buflen) { +return -EINVAL; +} +ret = bdrv_pread(bs, offset, buf, n); +if (ret 0) { +return ret; +} +buf[n] = '\0'; +return 0; +} + int path_is_absolute(const char *path) { #ifdef _WIN32 diff --git a/block.h b/block.h index 364ba04..166e00c 100644 --- a/block.h +++ b/block.h @@ -168,6 +168,8 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, const void *buf, int count); int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); +int bdrv_read_string(BlockDriverState *bs, uint64_t offset, size_t n, + char *buf, size_t buflen); int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, diff --git a/block/qed.c b/block/qed.c index 0a9dbe8..096de21 100644 --- a/block/qed.c +++ b/block/qed.c @@ -217,33 +217,6 @@ static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size, } /** - * Read a string of known length from the image file - * - * @file: Image file - * @offset: File offset to start of string, in bytes - * @n: String length in bytes - * @buf:Destination buffer - * @buflen: Destination buffer length in bytes - * @ret:0 on success, -errno on failure - * - * The string is NUL-terminated. - */ -static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n, - char *buf, size_t buflen) -{ -int ret; -if (n = buflen) { -return -EINVAL; -} -ret = bdrv_pread(file, offset, buf, n); -if (ret 0) { -return ret; -} -buf[n] = '\0'; -return 0; -} - -/** * Allocate new clusters * * @s: QED state @@ -437,9 +410,10 @@ static int bdrv_qed_open(BlockDriverState *bs, int flags) return -EINVAL; } -ret = qed_read_string(bs-file, s-header.backing_filename_offset, - s-header.backing_filename_size, bs-backing_file, - sizeof(bs-backing_file)); +ret = bdrv_read_string(bs-file, s-header.backing_filename_offset, + s-header.backing_filename_size, + bs-backing_file, + sizeof(bs-backing_file)); if (ret 0) { return ret; } -- 1.7.1
[Qemu-devel] [PATCH 0/9] Incoming migration coroutine
Juan, here is the implementation of migration-in-a-coroutine, applying on top of your pull request. With these patches, the monitor (and in the future the NBD server) is responsive during migration. The first ten patches are just cleanups, generalizing some parts of QEMUFile and improving the way migration sockets are closed. The last two actually implement the feature. They are the opposite change of the nonblocking-blocking change that you implemented for the migration thread. However, the change is much simpler because we have no timers, and because of the use of coroutines. Without coroutines (and as in non-threaded migration), you have to proceed in two steps: first collect data in a buffer, then write it. This lets you handle EAGAIN only at precise points in buffered_flush/buffered_put_buffer, so that you can restart writing in migrate_fd_put_notify. This checkpointing is the reason why QEMUFileBuffered exists. With coroutines, you can just stop whenever you want with qemu_coroutine_yield. As soon as select tells you that you can read, you'll re-enter directly in qemu_get_buffer, read more data and pass it to the loading routines. Thanks, Paolo Paolo Bonzini (12): migration: unify stdio-based QEMUFile operations migration: consolidate QEMUFile methods in a single QEMUFileOps struct migration: add qemu_get_fd migration: replace qemu_stdio_fd with qemu_get_fd migration: clean up server sockets and handlers before invoking process_incoming_migration migration: use migrate_fd_close in migrate_fd_cleanup migration: use closesocket, not close migration: xxx_close will only be called once migration: close socket QEMUFile from socket_close migration: move qemu_fclose to process_incoming_migration migration: handle EAGAIN while reading QEMUFile migration: move process_incoming_migration to a coroutine buffered_file.c | 21 +-- migration-exec.c | 19 +++--- migration-fd.c | 36 +-- migration-tcp.c | 19 +++--- migration-unix.c | 17 +++-- migration.c | 47 +- qemu-file.h | 23 --- savevm.c | 188 --- 8 file modificati, 215 inserzioni(+), 155 rimozioni(-) -- 1.7.12.1
[Qemu-devel] [PATCH 02/12] migration: consolidate QEMUFile methods in a single QEMUFileOps struct
Signed-off-by: Paolo Bonzini pbonz...@redhat.com --- buffered_file.c | 13 --- qemu-file.h | 16 + savevm.c| 108 +++- 3 file modificati, 79 inserzioni(+), 58 rimozioni(-) diff --git a/buffered_file.c b/buffered_file.c index ed92df1..a5c0b12 100644 --- a/buffered_file.c +++ b/buffered_file.c @@ -234,6 +234,14 @@ static void buffered_rate_tick(void *opaque) buffered_put_buffer(s, NULL, 0, 0); } +static const QEMUFileOps buffered_file_ops = { +.put_buffer = buffered_put_buffer, +.close = buffered_close, +.rate_limit = buffered_rate_limit, +.get_rate_limit = buffered_get_rate_limit, +.set_rate_limit = buffered_set_rate_limit, +}; + QEMUFile *qemu_fopen_ops_buffered(MigrationState *migration_state) { QEMUFileBuffered *s; @@ -243,10 +251,7 @@ QEMUFile *qemu_fopen_ops_buffered(MigrationState *migration_state) s-migration_state = migration_state; s-xfer_limit = migration_state-bandwidth_limit / 10; -s-file = qemu_fopen_ops(s, buffered_put_buffer, NULL, - buffered_close, buffered_rate_limit, - buffered_set_rate_limit, -buffered_get_rate_limit); +s-file = qemu_fopen_ops(s, buffered_file_ops); s-timer = qemu_new_timer_ms(rt_clock, buffered_rate_tick, s); diff --git a/qemu-file.h b/qemu-file.h index 9c8985b..c89e8e0 100644 --- a/qemu-file.h +++ b/qemu-file.h @@ -59,12 +59,16 @@ typedef int (QEMUFileRateLimit)(void *opaque); typedef int64_t (QEMUFileSetRateLimit)(void *opaque, int64_t new_rate); typedef int64_t (QEMUFileGetRateLimit)(void *opaque); -QEMUFile *qemu_fopen_ops(void *opaque, QEMUFilePutBufferFunc *put_buffer, - QEMUFileGetBufferFunc *get_buffer, - QEMUFileCloseFunc *close, - QEMUFileRateLimit *rate_limit, - QEMUFileSetRateLimit *set_rate_limit, - QEMUFileGetRateLimit *get_rate_limit); +typedef struct QEMUFileOps { +QEMUFilePutBufferFunc *put_buffer; +QEMUFileGetBufferFunc *get_buffer; +QEMUFileCloseFunc *close; +QEMUFileRateLimit *rate_limit; +QEMUFileSetRateLimit *set_rate_limit; +QEMUFileGetRateLimit *get_rate_limit; +} QEMUFileOps; + +QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops); QEMUFile *qemu_fopen(const char *filename, const char *mode); QEMUFile *qemu_fdopen(int fd, const char *mode); QEMUFile *qemu_fopen_socket(int fd); diff --git a/savevm.c b/savevm.c index 7068390..c3ce00a 100644 --- a/savevm.c +++ b/savevm.c @@ -162,12 +162,7 @@ void qemu_announce_self(void) #define IO_BUF_SIZE 32768 struct QEMUFile { -QEMUFilePutBufferFunc *put_buffer; -QEMUFileGetBufferFunc *get_buffer; -QEMUFileCloseFunc *close; -QEMUFileRateLimit *rate_limit; -QEMUFileSetRateLimit *set_rate_limit; -QEMUFileGetRateLimit *get_rate_limit; +const QEMUFileOps *ops; void *opaque; int is_write; @@ -256,6 +251,16 @@ static int stdio_fclose(void *opaque) return ret; } +static const QEMUFileOps stdio_pipe_read_ops = { +.get_buffer = stdio_get_buffer, +.close = stdio_pclose +}; + +static const QEMUFileOps stdio_pipe_write_ops = { +.put_buffer = stdio_put_buffer, +.close = stdio_pclose +}; + QEMUFile *qemu_popen(FILE *stdio_file, const char *mode) { QEMUFileStdio *s; @@ -270,11 +275,9 @@ QEMUFile *qemu_popen(FILE *stdio_file, const char *mode) s-stdio_file = stdio_file; if(mode[0] == 'r') { -s-file = qemu_fopen_ops(s, NULL, stdio_get_buffer, stdio_pclose, -NULL, NULL, NULL); +s-file = qemu_fopen_ops(s, stdio_pipe_read_ops); } else { -s-file = qemu_fopen_ops(s, stdio_put_buffer, NULL, stdio_pclose, -NULL, NULL, NULL); +s-file = qemu_fopen_ops(s, stdio_pipe_write_ops); } return s-file; } @@ -302,6 +305,16 @@ int qemu_stdio_fd(QEMUFile *f) return fd; } +static const QEMUFileOps stdio_file_read_ops = { +.get_buffer = stdio_get_buffer, +.close = stdio_fclose +}; + +static const QEMUFileOps stdio_file_write_ops = { +.put_buffer = stdio_put_buffer, +.close = stdio_fclose +}; + QEMUFile *qemu_fdopen(int fd, const char *mode) { QEMUFileStdio *s; @@ -319,11 +332,9 @@ QEMUFile *qemu_fdopen(int fd, const char *mode) goto fail; if(mode[0] == 'r') { -s-file = qemu_fopen_ops(s, NULL, stdio_get_buffer, stdio_fclose, -NULL, NULL, NULL); +s-file = qemu_fopen_ops(s, stdio_file_read_ops); } else { -s-file = qemu_fopen_ops(s, stdio_put_buffer, NULL, stdio_fclose, -NULL, NULL, NULL); +s-file = qemu_fopen_ops(s, stdio_file_write_ops); } return s-file; @@ -332,13 +343,17 @@
[Qemu-devel] [PATCH 07/12] migration: use closesocket, not close
Windows requires this. Migration does not quite work under Windows but let's be uniform across QEMU. Signed-off-by: Paolo Bonzini pbonz...@redhat.com --- migration-tcp.c | 6 +++--- 1 file modificato, 3 inserzioni(+), 3 rimozioni(-) diff --git a/migration-tcp.c b/migration-tcp.c index 1bd8481..e797d23 100644 --- a/migration-tcp.c +++ b/migration-tcp.c @@ -45,7 +45,7 @@ static int tcp_close(MigrationState *s) int r = 0; DPRINTF(tcp_close\n); if (s-fd != -1) { -if (close(s-fd) 0) { +if (closesocket(s-fd) 0) { r = -errno; } s-fd = -1; @@ -97,7 +97,7 @@ static void tcp_accept_incoming_migration(void *opaque) c = qemu_accept(s, (struct sockaddr *)addr, addrlen); } while (c == -1 socket_error() == EINTR); qemu_set_fd_handler2(s, NULL, NULL, NULL, NULL); -close(s); +closesocket(s); DPRINTF(accepted migration\n); @@ -115,7 +115,7 @@ static void tcp_accept_incoming_migration(void *opaque) process_incoming_migration(f); qemu_fclose(f); out: -close(c); +closesocket(c); } int tcp_start_incoming_migration(const char *host_port, Error **errp) -- 1.7.12.1