Hello trying to migrate a VM from one host to another, a big VM with 96GB of RAM, I found that when the migration completes, the VM goes to a paused satte and cannot be resumed. The libvirt/qemu log it gives is this:
2016-09-28T12:18:15.679176Z qemu-kvm: error while loading state section id 2(ram) 2016-09-28T12:18:15.680010Z qemu-kvm: load of migration failed: Input/output error 2016-09-28 12:18:15.872+0000: shutting down 2016-09-28 12:22:21.467+0000: starting up libvirt version: 1.2.17, package: 13.el7_2.5 (CentOS BuildSystem <http://bugs.centos.org>, 2016-06-23-14:23:27, worker1.bsys.centos.org), qemu version: 2.3.0 (qemu-kvm-ev-2.3.0-31.el7.16.1) LC_ALL=C PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin QEMU_AUDIO_DRV=spice /usr/libexec/qemu-kvm -name front04.billydomain.com -S -machine pc-i440fx-rhel7.2.0,accel=kvm,usb=off -cpu Haswell-noTSX -m size=100663296k,slots=16,maxmem=4294967296k -realtime mlock=off -smp 32,sockets=16,cores=1,threads=2 -numa node,nodeid=0,cpus=0-31,mem=98304 -uuid 4511d1c0-6607-418f-ae75-34f605b2ad68 -smbios type=1,manufacturer=oVirt,product=oVirt Node,version=7-2.1511.el7.centos.2.10,serial=4C4C4544-004A-3310-8054-B2C04F474432,uuid=4511d1c0-6607-418f-ae75-34f605b2ad68 -no-user-config -nodefaults -chardev socket,id=charmonitor,path=/var/lib/libvirt/qemu/ domain-front04.billydomain.com/monitor.sock,server,nowait -mon chardev=charmonitor,id=monitor,mode=control -rtc base=2016-09-28T14:22:21,driftfix=slew -global kvm-pit.lost_tick_policy=discard -no-hpet -no-shutdown -boot strict=on -device piix3-usb-uhci,id=usb,bus=pci.0,addr=0x1.0x2 -device virtio-scsi-pci,id=scsi0,bus=pci.0,addr=0x7 -device virtio-serial-pci,id=virtio-serial0,max_ports=16,bus=pci.0,addr=0x4 -drive if=none,id=drive-ide0-1-0,readonly=on,format=raw -device ide-cd,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0 -drive file=/rhev/data-center/00000001-0001-0001-0001-0000000003e3/ba2bd397-9222-424d-aecc-eb652c0169d9/images/b5b49d5c-2378-4639-9469-362e37ae7473/24fd0d3c-309b-458d-9818-4321023afacf,if=none,id=drive-virtio-disk0,format=qcow2,serial=b5b49d5c-2378-4639-9469-362e37ae7473,cache=none,werror=stop,rerror=stop,aio=threads -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x5,drive=drive-virtio-disk0,id=virtio-disk0,bootindex=1 -drive file=/rhev/data-center/00000001-0001-0001-0001-0000000003e3/ba2bd397-9222-424d-aecc-eb652c0169d9/images/f02ac1ce-52cd-4b81-8b29-f8006d0469e0/ff4e49c6-3084-4234-80a1-18a67615c527,if=none,id=drive-virtio-disk1,format=raw,serial=f02ac1ce-52cd-4b81-8b29-f8006d0469e0,cache=none,werror=stop,rerror=stop,aio=threads -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x8,drive=drive-virtio-disk1,id=virtio-disk1 -netdev tap,fd=30,id=hostnet0,vhost=on,vhostfd=31 -device virtio-net-pci,netdev=hostnet0,id=net0,mac=00:1a:4a:16:01:56,bus=pci.0,addr=0x3 -chardev socket,id=charchannel0,path=/var/lib/libvirt/qemu/channels/4511d1c0-6607-418f-ae75-34f605b2ad68.com.redhat.rhevm.vdsm,server,nowait -device virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,id=channel0,name=com.redhat.rhevm.vdsm -chardev socket,id=charchannel1,path=/var/lib/libvirt/qemu/channels/4511d1c0-6607-418f-ae75-34f605b2ad68.org.qemu.guest_agent.0,server,nowait -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel1,id=channel1,name=org.qemu.guest_agent.0 -chardev spicevmc,id=charchannel2,name=vdagent -device virtserialport,bus=virtio-serial0.0,nr=3,chardev=charchannel2,id=channel2,name=com.redhat.spice.0 -vnc 192.168.10.225:1,password -k es -spice tls-port=5902,addr=192.168.10.225,x509-dir=/etc/pki/vdsm/libvirt-spice,tls-channel=default,tls-channel=main,tls-channel=display,tls-channel=inputs,tls-channel=cursor,tls-channel=playback,tls-channel=record,tls-channel=smartcard,tls-channel=usbredir,seamless-migration=on -k es -device qxl-vga,id=video0,ram_size=67108864,vram_size=8388608,vgamem_mb=16,bus=pci.0,addr=0x2 -incoming tcp:0.0.0.0:49156 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x6 -msg timestamp=on Domain id=5 is tainted: hook-script red_dispatcher_loadvm_commands: KVM: entry failed, hardware error 0x8 RAX=00000000ffffffed RBX=ffff8817ba00c000 RCX=0100000000000000 RDX=0000000000000000 RSI=0000000000000000 RDI=0000000000000046 RBP=ffff8817ba00fe98 RSP=ffff8817ba00fe98 R8 =0000000000000000 R9 =0000000000000000 R10=0000000000000000 R11=0000000000000000 R12=0000000000000006 R13=ffff8817ba00c000 R14=ffff8817ba00c000 R15=0000000000000000 RIP=ffffffff81058e96 RFL=00010286 [--S--P-] CPL=0 II=0 A20=1 SMM=0 HLT=0 ES =0000 0000000000000000 ffffffff 00000000 CS =0010 0000000000000000 ffffffff 00a09b00 DPL=0 CS64 [-RA] SS =0018 0000000000000000 ffffffff 00c09300 DPL=0 DS [-WA] DS =0000 0000000000000000 ffffffff 00000000 FS =0000 0000000000000000 ffffffff 00000000 GS =0000 ffff8817def80000 ffffffff 00000000 LDT=0000 0000000000000000 ffffffff 00000000 TR =0040 ffff8817def93b80 00002087 00008b00 DPL=0 TSS64-busy GDT= ffff8817def89000 0000007f IDT= ffffffffff529000 00000fff CR0=80050033 CR2=00000000ffffffff CR3=00000017b725b000 CR4=001406e0 DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000 DR6=00000000ffff0ff0 DR7=0000000000000400 EFER=0000000000000d01 Code=89 e5 fb 5d c3 66 0f 1f 84 00 00 00 00 00 55 48 89 e5 fb f4 <5d> c3 0f 1f 84 00 00 00 00 00 55 48 89 e5 f4 5d c3 66 0f 1f 84 00 00 00 00 00 55 49 89 ca KVM: entry failed, hardware error 0x8 RAX=00000000ffffffed RBX=ffff8817ba008000 RCX=0100000000000000 RDX=0000000000000000 RSI=0000000000000000 RDI=0000000000000046 RBP=ffff8817ba00be98 RSP=ffff8817ba00be98 R8 =0000000000000000 R9 =0000000000000000 R10=0000000000000000 R11=0000000000000000 R12=0000000000000005 R13=ffff8817ba008000 R14=ffff8817ba008000 R15=0000000000000000 RIP=ffffffff81058e96 RFL=00010286 [--S--P-] CPL=0 II=0 A20=1 SMM=0 HLT=0 ES =0000 0000000000000000 ffffffff 00000000 CS =0010 0000000000000000 ffffffff 00a09b00 DPL=0 CS64 [-RA] SS =0018 0000000000000000 ffffffff 00c09300 DPL=0 DS [-WA] DS =0000 0000000000000000 ffffffff 00000000 FS =0000 0000000000000000 ffffffff 00000000 GS =0000 ffff8817def40000 ffffffff 00000000 LDT=0000 0000000000000000 ffffffff 00000000 TR =0040 ffff8817def53b80 00002087 00008b00 DPL=0 TSS64-busy GDT= ffff8817def49000 0000007f IDT= ffffffffff529000 00000fff CR0=80050033 CR2=00000000ffffffff CR3=00000017b3c9a000 CR4=001406e0 DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000 DR6=00000000ffff0ff0 DR7=0000000000000400 EFER=0000000000000d01 Code=89 e5 fb 5d c3 66 0f 1f 84 00 00 00 00 00 55 48 89 e5 fb f4 <5d> c3 0f 1f 84 00 00 00 00 00 55 48 89 e5 f4 5d c3 66 0f 1f 84 00 00 00 00 00 55 49 89 ca KVM: entry failed, hardware error 0x80000021 If you're running a guest on an Intel machine without unrestricted mode support, the failure can be most likely due to the guest entering an invalid state for Intel VT. For example, the guest maybe running in big real mode which is not supported on less recent Intel processors. EAX=ffffffed EBX=ba020000 ECX=00000000 EDX=00000000 ESI=00000000 EDI=00000046 EBP=ba023e98 ESP=ba023e98 EIP=81058e96 EFL=00000002 [-------] CPL=0 II=0 A20=1 SMM=0 HLT=0 ES =0000 00000000 0000ffff 00009300 DPL=0 DS [-WA] CS =f000 ffff0000 0000ffff 00009b00 DPL=0 CS16 [-RA] SS =0000 00000000 0000ffff 00009300 DPL=0 DS [-WA] DS =0000 00000000 0000ffff 00009300 DPL=0 DS [-WA] FS =0000 00000000 0000ffff 00009300 DPL=0 DS [-WA] GS =0000 00000000 0000ffff 00009300 DPL=0 DS [-WA] LDT=0000 00000000 0000ffff 00008200 DPL=0 LDT TR =0000 00000000 0000ffff 00008b00 DPL=0 TSS64-busy GDT= 0000000000000000 0000ffff IDT= 0000000000000000 0000ffff CR0=80050033 CR2=00007fd826ac20a0 CR3=000000003516c000 CR4=00140060 DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000 DR6=00000000ffff0ff0 DR7=0000000000000400 EFER=0000000000000d01 Code=?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? <??> ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? ?? Searching for errors like this I found some bug report about kernel issues but I don't think it's the case, other VMs spawned from the same image migrate without any issue. I have toi say that the original host running the VM has some RAM problem (ECC multibit fault in one DIMM). Maybe that's the problem? How can I properly read this error log? Thanks -- Davide Ferrari Senior Systems Engineer
_______________________________________________ Users mailing list Users@ovirt.org http://lists.ovirt.org/mailman/listinfo/users