Package: libvirt-bin
Version: 0.9.12-11+deb7u1
Severity: normal
Tags: upstream patch

Hi,

Libvirtd occasionally segfaults like this (running under gdb without
arguments):

[...]
2013-07-25 18:23:28.420+0000: 7845: info : 
virSecurityDACRestoreSecurityFileLabel:144 : Restoring DAC user and group on 
'/dev/lant/elm'
2013-07-25 18:23:28.420+0000: 7845: info : virSecurityDACSetOwnership:100 : 
Setting DAC user and group on '/dev/dm-5' to '0:0'
2013-07-25 18:23:28.529+0000: 7842: info : virSecurityDACSetOwnership:100 : 
Setting DAC user and group on '/dev/lant/elm' to '114:124'
2013-07-25 18:23:28.979+0000: 7844: info : 
virSecurityDACRestoreSecurityFileLabel:144 : Restoring DAC user and group on 
'/dev/lant/elm'
2013-07-25 18:23:28.979+0000: 7844: info : virSecurityDACSetOwnership:100 : 
Setting DAC user and group on '/dev/dm-5' to '0:0'
2013-07-25 18:23:29.082+0000: 7842: info : virSecurityDACSetOwnership:100 : 
Setting DAC user and group on '/dev/lant/elm' to '114:124'
2013-07-25 18:23:29.559+0000: 7837: error : qemuMonitorIO:612 : internal error 
End of file from monitor
2013-07-25 18:23:29.559+0000: 7837: info : 
virSecurityDACRestoreSecurityFileLabel:144 : Restoring DAC user and group on 
'/dev/lant/elm'
2013-07-25 18:23:29.559+0000: 7837: info : virSecurityDACSetOwnership:100 : 
Setting DAC user and group on '/dev/dm-5' to '0:0'
2013-07-25 18:23:29.560+0000: 7845: error : virDomainDefFormatInternal:12402 : 
internal error unexpected domain type -602374752
2013-07-25 18:23:29.560+0000: 7845: warning : qemuDomainObjSaveJob:672 : Failed 
to save status on vm (null)

Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7ffff055b700 (LWP 7845)]
__pthread_mutex_lock (mutex=mutex@entry=0x203a203437343a73) at 
pthread_mutex_lock.c:50
50      pthread_mutex_lock.c: No such file or directory.
(gdb) thread apply all bt

Thread 11 (Thread 0x7fffedd56700 (LWP 7850)):
#0  pthread_cond_wait@@GLIBC_2.3.2 () at 
../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:162
#1  0x00007ffff77c43a6 in virCondWait (c=c@entry=0x76f460, m=m@entry=0x76f3a8) 
at /build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threads-pthread.c:117
#2  0x00007ffff77c480b in virThreadPoolWorker (opaque=opaque@entry=0x778f70) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threadpool.c:103
#3  0x00007ffff77c41d9 in virThreadHelper (data=<optimized out>) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threads-pthread.c:161
#4  0x00007ffff5088b50 in start_thread (arg=<optimized out>) at 
pthread_create.c:304
#5  0x00007ffff49cba7d in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:112
#6  0x0000000000000000 in ?? ()

Thread 10 (Thread 0x7fffee557700 (LWP 7849)):
#0  pthread_cond_wait@@GLIBC_2.3.2 () at 
../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:162
#1  0x00007ffff77c43a6 in virCondWait (c=c@entry=0x76f460, m=m@entry=0x76f3a8) 
at /build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threads-pthread.c:117
#2  0x00007ffff77c480b in virThreadPoolWorker (opaque=opaque@entry=0x779100) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threadpool.c:103
#3  0x00007ffff77c41d9 in virThreadHelper (data=<optimized out>) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threads-pthread.c:161
#4  0x00007ffff5088b50 in start_thread (arg=<optimized out>) at 
pthread_create.c:304
#5  0x00007ffff49cba7d in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:112
#6  0x0000000000000000 in ?? ()

Thread 9 (Thread 0x7fffeed58700 (LWP 7848)):
#0  __lll_lock_wait () at 
../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:136
#1  0x00007ffff508b339 in _L_lock_926 () from 
/lib/x86_64-linux-gnu/libpthread.so.0
#2  0x00007ffff508b15b in __pthread_mutex_lock (mutex=mutex@entry=0x7ea8f0) at 
pthread_mutex_lock.c:61
#3  0x00007ffff77c4335 in virMutexLock (m=m@entry=0x7ea8f0) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threads-pthread.c:85
#4  0x000000000048a005 in qemuDriverLock (driver=driver@entry=0x7ea8f0) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/qemu/qemu_conf.c:66
#5  0x000000000045b645 in qemudListDomains (conn=<optimized out>, ids=0x80b810, 
nids=2)
    at /build/libvirt-FsA54o/libvirt-0.9.12/./src/qemu/qemu_driver.c:1335
#6  0x00007ffff7837480 in virConnectListDomains (conn=0x7fffdc18d4e0, 
ids=0x80b810, maxids=2) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/libvirt.c:1883
#7  0x000000000043a8b6 in remoteDispatchListDomains (ret=0x80a4a0, 
args=0x82ae80, rerr=0x7fffeed57c90, client=<optimized out>, server=<optimized 
out>, 
    msg=<optimized out>) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./daemon/remote_dispatch.h:7790
#8  remoteDispatchListDomainsHelper (server=<optimized out>, client=<optimized 
out>, msg=<optimized out>, rerr=0x7fffeed57c90, args=0x82ae80, ret=0x80a4a0)
    at /build/libvirt-FsA54o/libvirt-0.9.12/./daemon/remote_dispatch.h:7758
#9  0x00007ffff788b800 in virNetServerProgramDispatchCall (msg=0x7fffe40f7510, 
client=0x7fffe400ad00, server=0x76f280, prog=0x778fe0)
    at /build/libvirt-FsA54o/libvirt-0.9.12/./src/rpc/virnetserverprogram.c:423
#10 virNetServerProgramDispatch (prog=0x778fe0, server=server@entry=0x76f280, 
client=0x7fffe400ad00, msg=0x7fffe40f7510)
    at /build/libvirt-FsA54o/libvirt-0.9.12/./src/rpc/virnetserverprogram.c:296
#11 0x00007ffff7887581 in virNetServerHandleJob (jobOpaque=<optimized out>, 
opaque=0x76f280)
    at /build/libvirt-FsA54o/libvirt-0.9.12/./src/rpc/virnetserver.c:161
#12 0x00007ffff77c474e in virThreadPoolWorker (opaque=opaque@entry=0x778f70) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threadpool.c:144
#13 0x00007ffff77c41d9 in virThreadHelper (data=<optimized out>) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threads-pthread.c:161
#14 0x00007ffff5088b50 in start_thread (arg=<optimized out>) at 
pthread_create.c:304
#15 0x00007ffff49cba7d in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:112
#16 0x0000000000000000 in ?? ()

Thread 8 (Thread 0x7fffef559700 (LWP 7847)):
#0  pthread_cond_wait@@GLIBC_2.3.2 () at 
../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:162
#1  0x00007ffff77c43a6 in virCondWait (c=c@entry=0x76f460, m=m@entry=0x76f3a8) 
at /build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threads-pthread.c:117
#2  0x00007ffff77c480b in virThreadPoolWorker (opaque=opaque@entry=0x779100) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threadpool.c:103
#3  0x00007ffff77c41d9 in virThreadHelper (data=<optimized out>) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threads-pthread.c:161
#4  0x00007ffff5088b50 in start_thread (arg=<optimized out>) at 
pthread_create.c:304
#5  0x00007ffff49cba7d in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:112
#6  0x0000000000000000 in ?? ()

Thread 7 (Thread 0x7fffefd5a700 (LWP 7846)):
#0  pthread_cond_wait@@GLIBC_2.3.2 () at 
../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:162
#1  0x00007ffff77c43a6 in virCondWait (c=c@entry=0x76f460, m=m@entry=0x76f3a8) 
at /build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threads-pthread.c:117
#2  0x00007ffff77c480b in virThreadPoolWorker (opaque=opaque@entry=0x778f70) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threadpool.c:103
#3  0x00007ffff77c41d9 in virThreadHelper (data=<optimized out>) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threads-pthread.c:161
#4  0x00007ffff5088b50 in start_thread (arg=<optimized out>) at 
pthread_create.c:304
#5  0x00007ffff49cba7d in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:112
#6  0x0000000000000000 in ?? ()

Thread 6 (Thread 0x7ffff055b700 (LWP 7845)):
#0  __pthread_mutex_lock (mutex=mutex@entry=0x203a203437343a73) at 
pthread_mutex_lock.c:50
#1  0x00007ffff77c4335 in virMutexLock (m=m@entry=0x203a203437343a73) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threads-pthread.c:85
#2  0x0000000000466fb5 in qemuAgentLock (mon=mon@entry=0x203a203437343a73) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/qemu/qemu_agent.c:134
#3  0x0000000000467c62 in qemuAgentClose (mon=0x203a203437343a73) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/qemu/qemu_agent.c:820
#4  0x0000000000491189 in qemuProcessStop (driver=driver@entry=0x7ea8f0, 
vm=vm@entry=0x7fffe4003530, migrated=migrated@entry=0, 
    reason=reason@entry=VIR_DOMAIN_SHUTOFF_DESTROYED) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/qemu/qemu_process.c:3944
#5  0x000000000045a43f in qemuDomainDestroyFlags (dom=<optimized out>, 
flags=<optimized out>)
    at /build/libvirt-FsA54o/libvirt-0.9.12/./src/qemu/qemu_driver.c:1861
#6  0x00007ffff7837f06 in virDomainDestroy (domain=domain@entry=0x897aa0) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/libvirt.c:2254
#7  0x000000000043c8ba in remoteDispatchDomainDestroy (args=<optimized out>, 
rerr=0x7ffff055ac90, client=0x7fffe400b490, server=<optimized out>, 
    msg=<optimized out>) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./daemon/remote_dispatch.h:1063
#8  remoteDispatchDomainDestroyHelper (server=<optimized out>, 
client=0x7fffe400b490, msg=<optimized out>, rerr=0x7ffff055ac90, 
args=<optimized out>, 
    ret=<optimized out>) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./daemon/remote_dispatch.h:1041
#9  0x00007ffff788b800 in virNetServerProgramDispatchCall (msg=0x7fffe4077430, 
client=0x7fffe400b490, server=0x76f280, prog=0x778fe0)
    at /build/libvirt-FsA54o/libvirt-0.9.12/./src/rpc/virnetserverprogram.c:423
#10 virNetServerProgramDispatch (prog=0x778fe0, server=server@entry=0x76f280, 
client=0x7fffe400b490, msg=0x7fffe4077430)
    at /build/libvirt-FsA54o/libvirt-0.9.12/./src/rpc/virnetserverprogram.c:296
#11 0x00007ffff7887581 in virNetServerHandleJob (jobOpaque=<optimized out>, 
opaque=0x76f280)
    at /build/libvirt-FsA54o/libvirt-0.9.12/./src/rpc/virnetserver.c:161
#12 0x00007ffff77c474e in virThreadPoolWorker (opaque=opaque@entry=0x779100) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threadpool.c:144
#13 0x00007ffff77c41d9 in virThreadHelper (data=<optimized out>) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threads-pthread.c:161
#14 0x00007ffff5088b50 in start_thread (arg=<optimized out>) at 
pthread_create.c:304
#15 0x00007ffff49cba7d in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:112
#16 0x0000000000000000 in ?? ()

Thread 5 (Thread 0x7ffff0d5c700 (LWP 7844)):
#0  pthread_cond_wait@@GLIBC_2.3.2 () at 
../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:162
#1  0x00007ffff77c43a6 in virCondWait (c=c@entry=0x76f3d0, m=m@entry=0x76f3a8) 
at /build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threads-pthread.c:117
#2  0x00007ffff77c47eb in virThreadPoolWorker (opaque=opaque@entry=0x778f70) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threadpool.c:103
#3  0x00007ffff77c41d9 in virThreadHelper (data=<optimized out>) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threads-pthread.c:161
#4  0x00007ffff5088b50 in start_thread (arg=<optimized out>) at 
pthread_create.c:304
#5  0x00007ffff49cba7d in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:112
#6  0x0000000000000000 in ?? ()

Thread 4 (Thread 0x7ffff155d700 (LWP 7843)):
#0  pthread_cond_wait@@GLIBC_2.3.2 () at 
../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:162
#1  0x00007ffff77c43a6 in virCondWait (c=c@entry=0x76f3d0, m=m@entry=0x76f3a8) 
at /build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threads-pthread.c:117
#2  0x00007ffff77c47eb in virThreadPoolWorker (opaque=opaque@entry=0x779100) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threadpool.c:103
#3  0x00007ffff77c41d9 in virThreadHelper (data=<optimized out>) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threads-pthread.c:161
#4  0x00007ffff5088b50 in start_thread (arg=<optimized out>) at 
pthread_create.c:304
#5  0x00007ffff49cba7d in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:112
#6  0x0000000000000000 in ?? ()

Thread 3 (Thread 0x7ffff1d5e700 (LWP 7842)):
#0  pthread_cond_wait@@GLIBC_2.3.2 () at 
../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:162
#1  0x00007ffff77c43a6 in virCondWait (c=c@entry=0x76f3d0, m=m@entry=0x76f3a8) 
at /build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threads-pthread.c:117
#2  0x00007ffff77c47eb in virThreadPoolWorker (opaque=opaque@entry=0x778f70) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threadpool.c:103
#3  0x00007ffff77c41d9 in virThreadHelper (data=<optimized out>) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threads-pthread.c:161
#4  0x00007ffff5088b50 in start_thread (arg=<optimized out>) at 
pthread_create.c:304
#5  0x00007ffff49cba7d in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:112
#6  0x0000000000000000 in ?? ()

Thread 2 (Thread 0x7ffff255f700 (LWP 7841)):
#0  pthread_cond_wait@@GLIBC_2.3.2 () at 
../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:162
#1  0x00007ffff77c43a6 in virCondWait (c=c@entry=0x76f3d0, m=m@entry=0x76f3a8) 
at /build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threads-pthread.c:117
#2  0x00007ffff77c47eb in virThreadPoolWorker (opaque=opaque@entry=0x779100) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threadpool.c:103
#3  0x00007ffff77c41d9 in virThreadHelper (data=<optimized out>) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/threads-pthread.c:161
#4  0x00007ffff5088b50 in start_thread (arg=<optimized out>) at 
pthread_create.c:304
#5  0x00007ffff49cba7d in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:112
#6  0x0000000000000000 in ?? ()

Thread 1 (Thread 0x7ffff7e2f820 (LWP 7837)):
#0  0x00007ffff49c0e33 in *__GI___poll (fds=<optimized out>, nfds=<optimized 
out>, nfds@entry=10, timeout=timeout@entry=4796)
    at ../sysdeps/unix/sysv/linux/poll.c:87
#1  0x00007ffff77b4a38 in virEventPollRunOnce () at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/event_poll.c:620
#2  0x00007ffff77b38b7 in virEventRunDefaultImpl () at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/util/event.c:247
#3  0x00007ffff7887d7d in virNetServerRun (srv=srv@entry=0x76f280) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./src/rpc/virnetserver.c:712
#4  0x0000000000423ab1 in main (argc=<optimized out>, argv=<optimized out>) at 
/build/libvirt-FsA54o/libvirt-0.9.12/./daemon/libvirtd.c:1138

To provoke this, I started

$ while virsh --quiet -r -c qemu:///system list --uuid; do :; done

in one terminal, and

# while virsh --quiet create ../td/wh.xml; do virsh --quiet destroy wh; done

in another.  It may take several minutes, but in the end libvirtd either
segfaults or deadlocks.  However, adding the following upstream patches
seems to fix both issues (at least I could not reproduce either anymore):

f1b4021b38f9485c50d386af6f682ecfc8025af5 (mainly to ease backporting the
real fix, but sounds useful on its own right, too)
81621f3e6e45e8681cc18ae49404736a0e772a11 (with obvious changes to backport)

Please consider adding something like the attached patches in the next stable
update.
-- 
Thanks,
Feri.
commit f1b4021b38f9485c50d386af6f682ecfc8025af5
Author: Daniel P. Berrange <berra...@redhat.com>
Date:   Thu Sep 27 10:07:03 2012 +0100

    Don't ignore return value of qemuProcessKill
    
    When calling qemuProcessKill from the virDomainDestroy impl
    in QEMU, do not ignore the return value. This ensures that
    if QEMU fails to respond to SIGKILL, the caller will know
    about the failure.
    
    Signed-off-by: Daniel P. Berrange <berra...@redhat.com>

Index: libvirt-0.9.12/src/qemu/qemu_driver.c
===================================================================
--- libvirt-0.9.12.orig/src/qemu/qemu_driver.c	2013-07-25 15:13:48.526055520 +0200
+++ libvirt-0.9.12/src/qemu/qemu_driver.c	2013-07-25 15:40:25.282132402 +0200
@@ -1839,7 +1839,11 @@
             goto cleanup;
         }
     } else {
-        ignore_value(qemuProcessKill(driver, vm, VIR_QEMU_PROCESS_KILL_FORCE));
+        if (qemuProcessKill(driver, vm, VIR_QEMU_PROCESS_KILL_FORCE) < 0) {
+            qemuReportError(VIR_ERR_OPERATION_FAILED, "%s",
+                            _("failed to kill qemu process with SIGTERM"));
+            goto cleanup;
+        }
     }
 
     /* We need to prevent monitor EOF callback from doing our work (and sending
commit 81621f3e6e45e8681cc18ae49404736a0e772a11
Author: Daniel P. Berrange <berra...@redhat.com>
Date:   Fri Jan 18 14:33:51 2013 +0000

    Fix race condition when destroying guests
    
    When running virDomainDestroy, we need to make sure that no other
    background thread cleans up the domain while we're doing our work.
    This can happen if we release the domain object while in the
    middle of work, because the monitor might detect EOF in this window.
    For this reason we have a 'beingDestroyed' flag to stop the monitor
    from doing its normal cleanup. Unfortunately this flag was only
    being used to protect qemuDomainBeginJob, and not qemuProcessKill
    
    This left open a race condition where either libvirtd could crash,
    or alternatively report bogus error messages about the domain already
    having been destroyed to the caller
    
    Signed-off-by: Daniel P. Berrange <berra...@redhat.com>

Index: libvirt-0.9.12/src/qemu/qemu_driver.c
===================================================================
--- libvirt-0.9.12.orig/src/qemu/qemu_driver.c	2013-07-25 15:15:31.000000000 +0200
+++ libvirt-0.9.12/src/qemu/qemu_driver.c	2013-07-25 15:18:28.902103192 +0200
@@ -1827,6 +1827,12 @@
 
     qemuDomainSetFakeReboot(driver, vm, false);
 
+
+    /* We need to prevent monitor EOF callback from doing our work (and sending
+     * misleading events) while the vm is unlocked inside BeginJob/ProcessKill API
+     */
+    priv->beingDestroyed = true;
+
     /* Although qemuProcessStop does this already, there may
      * be an outstanding job active. We want to make sure we
      * can kill the process even if a job is active. Killing
@@ -1834,23 +1840,20 @@
      */
     if (flags & VIR_DOMAIN_DESTROY_GRACEFUL) {
         if (qemuProcessKill(driver, vm, 0) < 0) {
+            priv->beingDestroyed = false;
             qemuReportError(VIR_ERR_OPERATION_FAILED, "%s",
                             _("failed to kill qemu process with SIGTERM"));
             goto cleanup;
         }
     } else {
         if (qemuProcessKill(driver, vm, VIR_QEMU_PROCESS_KILL_FORCE) < 0) {
+            priv->beingDestroyed = false;
             qemuReportError(VIR_ERR_OPERATION_FAILED, "%s",
                             _("failed to kill qemu process with SIGTERM"));
             goto cleanup;
         }
     }
 
-    /* We need to prevent monitor EOF callback from doing our work (and sending
-     * misleading events) while the vm is unlocked inside BeginJob API
-     */
-    priv->beingDestroyed = true;
-
     if (qemuDomainObjBeginJobWithDriver(driver, vm, QEMU_JOB_DESTROY) < 0)
         goto cleanup;
 

Reply via email to