On Tue, Apr 09, 2013 at 05:43:43PM +0200, Paolo Bonzini wrote: > Using qemu_memalign only leaves the RAM zero by chance, because libc > will usually use mmap to satisfy our huge requests. But memory will > not be zero when using MALLOC_PERTURB_ with a nonzero value. In the > case of incoming migration, this breaks a recently-introduced > invariant (commit f1c7279, migration: do not sent zero pages in > bulk stage, 2013-03-26). > > To fix this, use mmap ourselves to get a well-aligned, always zero > block for the RAM. Mmap-ed memory is easy to "trim" at the sides. > > This also removes the need to do something special on valgrind > (see commit c2a8238a, Support running QEMU on Valgrind, 2011-10-31), > thus effectively reverts that patch.
Hi Paolo, This patch introduced a regression bug in hot-removing device. (reproduce: 100%) (gdb) r --enable-kvm -mon chardev=qmp,mode=control,pretty=on -chardev socket,id=qmp,host=localhost,port=1234,server,nowait -vnc :0 -monitor stdio -vnc :0 -m 2000 /images/RHEL-Server-6.4-64-virtio.qcow2 -device virtio-net-pci,netdev=ndev1,id=id1 -netdev tap,id=ndev1 -device e1000,netdev=ndev2,id=id2 -netdev tap,id=ndev2 Starting program: /home/devel/qemu/x86_64-softmmu/qemu-system-x86_64 --enable-kvm -mon chardev=qmp,mode=control,pretty=on -chardev socket,id=qmp,host=localhost,port=1234,server,nowait -vnc :0 -monitor stdio -vnc :0 -m 2000 /images/RHEL-Server-6.4-64-virtio.qcow2 -device virtio-net-pci,netdev=ndev1,id=id1 -netdev tap,id=ndev1 -device e1000,netdev=ndev2,id=id2 -netdev tap,id=ndev2 (qemu) info network id1: index=0,type=nic,model=virtio-net-pci,macaddr=52:54:00:12:34:56 \ ndev1: index=0,type=tap,ifname=tap0,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown id2: index=0,type=nic,model=e1000,macaddr=52:54:00:12:34:57 \ ndev2: index=0,type=tap,ifname=tap1,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown (qemu) netdev_del ndev1 /etc/qemu-ifdown: could not launch network script (qemu) device_del id1 Program received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x7ffff2adc700 (LWP 9766)] 0x00007ffff50cc6ec in free () from /lib64/libc.so.6 (gdb) bt full #0 0x00007ffff50cc6ec in free () from /lib64/libc.so.6 No symbol table info available. #1 0x00007ffff7efbb1b in qemu_vfree (ptr=0x7ffff1200000) at util/oslib-posix.c:135 No locals. #2 0x00007ffff7de7358 in qemu_ram_free (addr=2105868288) at /home/devel/qemu/exec.c:1161 block = 0x7ffff8ddc370 #3 0x00007ffff7e5270e in memory_region_destructor_ram (mr=0x7ffff8de3e00) at /home/devel/qemu/memory.c:763 No locals. #4 0x00007ffff7e53269 in memory_region_destroy (mr=0x7ffff8de3e00) at /home/devel/qemu/memory.c:1025 __PRETTY_FUNCTION__ = "memory_region_destroy" #5 0x00007ffff7cdcf35 in pci_del_option_rom (pdev=0x7ffff8de3830) at hw/pci/pci.c:1975 No locals. #6 0x00007ffff7cda333 in pci_unregister_device (dev=0x7ffff8de3830) at hw/pci/pci.c:901 pci_dev = 0x7ffff8de3830 pc = 0x7ffff8de3770 #7 0x00007ffff7c644a9 in device_unrealize (dev=0x7ffff8de3830, errp=0x7ffff2adb710) at hw/core/qdev.c:190 rc = 32767 dc = 0x7ffff8de3770 #8 0x00007ffff7c65dfb in device_set_realized (obj=0x7ffff8de3830, value=false, err=0x0) at hw/core/qdev.c:714 dev = 0x7ffff8de3830 dc = 0x7ffff8de3770 local_err = 0x0 #9 0x00007ffff7d8a807 in property_set_bool (obj=0x7ffff8de3830, v=0x7fffe4024240, opaque=0x7ffff8de36e0, name=0x7ffff7f2074e "realized", errp=0x0) at qom/object.c:1235 prop = 0x7ffff8de36e0 value = false local_err = 0x0 #10 0x00007ffff7d892a3 in object_property_set (obj=0x7ffff8de3830, v=0x7fffe4024240, name=0x7ffff7f2074e "realized", errp=0x0) at qom/object.c:715 prop = 0x7ffff8de60d0 #11 0x00007ffff7d8aac2 in object_property_set_qobject (obj=0x7ffff8de3830, value=0x7fffe40086e0, name=0x7ffff7f2074e "realized", errp=0x0) at qom/qom-qobject.c:24 mi = 0x7fffe4024240 #12 0x00007ffff7d895aa in object_property_set_bool (obj=0x7ffff8de3830, value=false, name=0x7ffff7f2074e "realized", errp=0x0) at qom/object.c:778 qbool = 0x7fffe40086e0 #13 0x00007ffff7c66133 in device_unparent (obj=0x7ffff8de3830) at hw/core/qdev.c:792 dev = 0x7ffff8de3830 bus = 0x7ffff7f1bbd0 event_data = 0x7ffff7d8886b have_realized = true #14 0x00007ffff7d88420 in object_unparent (obj=0x7ffff8de3830) at qom/object.c:367 No locals. #15 0x00007ffff7c648d9 in qdev_free (dev=0x7ffff8de3830) at hw/core/qdev.c:285 No locals. #16 0x00007ffff7c30f99 in acpi_piix_eject_slot (s=0x7ffff8dddd90, slots=8) at hw/acpi/piix4.c:306 qdev = 0x7ffff8de3830 dev = 0x7ffff8de3830 pc = 0x7ffff8de3770 kid = 0x7ffff8dee780 next = 0x7ffff8de11b0 bus = 0x7ffff8d6ab10 slot = 3 slot_free = true #17 0x00007ffff7c31b5c in pci_write (opaque=0x7ffff8dddd90, addr=8, data=8, size=4) at hw/acpi/piix4.c:569 No locals. #18 0x00007ffff7e500ff in memory_region_write_accessor (opaque=0x7ffff8dde598, addr=8, value=0x7ffff2adbb00, size=4, shift=0, mask=4294967295) at /home/devel/qemu/memory.c:334 mr = 0x7ffff8dde598 tmp = 8 #19 0x00007ffff7e501e1 in access_with_adjusted_size (addr=8, value=0x7ffff2adbb00, size=4, access_size_min=1, access_size_max=4, access= 0x7ffff7e50073 <memory_region_write_accessor>, opaque=0x7ffff8dde598) at /home/devel/qemu/memory.c:364 access_mask = 4294967295 access_size = 4 i = 0 #20 0x00007ffff7e50669 in memory_region_iorange_write (iorange=0x7ffff8dcf940, offset=8, width=4, data=8) at /home/devel/qemu/memory.c:439 mrio = 0x7ffff8dcf940 mr = 0x7ffff8dde598 __PRETTY_FUNCTION__ = "memory_region_iorange_write" #21 0x00007ffff7e48a18 in ioport_writel_thunk (opaque=0x7ffff8dcf940, addr=44552, data=8) at /home/devel/qemu/ioport.c:226 ioport = 0x7ffff8dcf940 #22 0x00007ffff7e482e3 in ioport_write (index=2, address=44552, data=8) at /home/devel/qemu/ioport.c:83 default_func = {0x7ffff7e48337 <default_ioport_writeb>, 0x7ffff7e483da <default_ioport_writew>, 0x7ffff7e48485 <default_ioport_writel>} func = 0x7ffff7e489be <ioport_writel_thunk> #23 0x00007ffff7e48f93 in cpu_outl (addr=44552, val=8) at /home/devel/qemu/ioport.c:303 No locals. #24 0x00007ffff7e4c9e7 in kvm_handle_io (port=44552, data=0x7ffff20d9000, direction=1, size=4, count=1) at /home/devel/qemu/kvm-all.c:1430 i = 0 ptr = 0x7ffff20d9000 "\b" #25 0x00007ffff7e4d001 in kvm_cpu_exec (env=0x7ffff8d4be80) at /home/devel/qemu/kvm-all.c:1575 cpu = 0x7ffff8d4bd70 run = 0x7ffff20d8000 ret = 0 run_ret = 0 ... > Reviewed-by: Juan Quintela <quint...@redhat.com> > Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> > --- > v2->v3: use MAP_FAILED. You learn something every day. > > util/oslib-posix.c | 35 ++++++++++++++++++----------------- > 1 file changed, 18 insertions(+), 17 deletions(-) > > diff --git a/util/oslib-posix.c b/util/oslib-posix.c > index 4e4b819..bda62c0 100644 > --- a/util/oslib-posix.c > +++ b/util/oslib-posix.c > @@ -40,7 +40,6 @@ extern int daemon(int, int); > Valgrind does not support alignments larger than 1 MiB, > therefore we need special code which handles running on Valgrind. */ > # define QEMU_VMALLOC_ALIGN (512 * 4096) > -# define CONFIG_VALGRIND > #elif defined(__linux__) && defined(__s390x__) > /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */ > # define QEMU_VMALLOC_ALIGN (256 * 4096) > @@ -52,12 +51,8 @@ extern int daemon(int, int); > #include "sysemu/sysemu.h" > #include "trace.h" > #include "qemu/sockets.h" > +#include <sys/mman.h> > > -#if defined(CONFIG_VALGRIND) > -static int running_on_valgrind = -1; > -#else > -# define running_on_valgrind 0 > -#endif > #ifdef CONFIG_LINUX > #include <sys/syscall.h> > #endif > @@ -108,22 +103,28 @@ void *qemu_memalign(size_t alignment, size_t size) > /* alloc shared memory pages */ > void *qemu_vmalloc(size_t size) > { > - void *ptr; > size_t align = QEMU_VMALLOC_ALIGN; > + size_t total = size + align - getpagesize(); > + void *ptr = mmap(0, total, PROT_READ | PROT_WRITE, > + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); > + size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; > > -#if defined(CONFIG_VALGRIND) > - if (running_on_valgrind < 0) { > - /* First call, test whether we are running on Valgrind. > - This is a substitute for RUNNING_ON_VALGRIND from valgrind.h. */ > - const char *ld = getenv("LD_PRELOAD"); > - running_on_valgrind = (ld != NULL && strstr(ld, "vgpreload")); > + if (ptr == MAP_FAILED) { > + fprintf(stderr, "Failed to allocate %zu B: %s\n", > + size, strerror(errno)); > + abort(); > } > -#endif > > - if (size < align || running_on_valgrind) { > - align = getpagesize(); > + ptr += offset; > + total -= offset; > + > + if (offset > 0) { > + munmap(ptr - offset, offset); > } > - ptr = qemu_memalign(align, size); > + if (total > size) { > + munmap(ptr + size, total - size); > + } > + > trace_qemu_vmalloc(size, ptr); > return ptr; > } > -- > 1.8.1.4 > -- Amos.