Hi, all, We were running OpenStack with Ubuntu and libvirt 0.9.10. We found that libvirt monitor command not working well. There were a lot of error in libvirtd.log like this 2013-02-07 06:07:39.000+0000: 18112: error : qemuDomainObjBeginJobInternal:773 : Timed out during operation: cannot acquire state change lock
We dig into libvirtd by strace and find one of the thread only have the following command futex(0x7f69ac0ec0ec, FUTEX_WAIT_PRIVATE, 2717, NULL It seems this thread waiting for reply but nothing came back thus other threads would wait for it. We also saw there is a function called virCondWaitUntil(). Is it safe for us to modify the code from virCondWait() to virCondWaitUntil() to prevent such deadlock scenario? Thanks. Following is the gdb -p 'libvirt.pid' and 'thread id' and 'bt full' #0 0x00007f69c8c1dd84 in pthread_cond_wait@@GLIBC_2.3.2 () from /lib/x86_64-linux-gnu/libpthread.so.0 No symbol table info available. #1 0x00007f69c9ee884a in virCondWait (c=<optimized out>, m=<optimized out>) at util/threads-pthread.c:117 ret = <optimized out> #2 0x000000000049c749 in qemuMonitorSend (mon=0x7f69ac0ec0c0, msg=<optimized out>) at qemu/qemu_monitor.c:826 ret = -1 __func__ = "qemuMonitorSend" __FUNCTION__ = "qemuMonitorSend" #3 0x00000000004ac8ed in qemuMonitorJSONCommandWithFd (mon=0x7f69ac0ec0c0, cmd=0x7f6998028280, scm_fd=-1, reply=0x7f69c57829f8) at qemu/qemu_monitor_json.c:230 ret = -1 msg = {txFD = -1, txBuffer = 0x7f69980e9b00 "{\"execute\":\"query-balloon\",\"id\":\"libvirt-1359\"}\r\n", txOffset = 49, txLength = 49, rxBuffer = 0x0, rxLength = 0, rxObject = 0x0, finished = false, passwordHandler = 0, passwordOpaque = 0x0} cmdstr = 0x7f69980ef2f0 "{\"execute\":\"query-balloon\",\"id\":\"libvirt-1359\"}" id = 0x7f69980b0a20 "libvirt-1359" exe = <optimized out> __FUNCTION__ = "qemuMonitorJSONCommandWithFd" __func__ = "qemuMonitorJSONCommandWithFd" #4 0x00000000004ae794 in qemuMonitorJSONGetBalloonInfo (mon=0x7f69ac0ec0c0, currmem=0x7f69c5782a48) at qemu/qemu_monitor_json.c:1190 ret = <optimized out> cmd = 0x7f6998028280 reply = 0x0 __FUNCTION__ = "qemuMonitorJSONGetBalloonInfo" #5 0x0000000000457451 in qemudDomainGetInfo (dom=<optimized out>, info=0x7f69c5782b50) at qemu/qemu_driver.c:2181 priv = 0x7f69a0093b00 driver = 0x7f69b80ca8e0 vm = 0x7f69a0093370 ret = -1 err = <optimized out> balloon = <optimized out> __FUNCTION__ = "qemudDomainGetInfo" #6 0x00007f69c9f63eda in virDomainGetInfo (domain=0x7f69980e3650, info=0x7f69c5782b50) at libvirt.c:4230 ret = <optimized out> conn = <optimized out> __func__ = "virDomainGetInfo" __FUNCTION__ = "virDomainGetInfo" #7 0x0000000000439bca in remoteDispatchDomainGetInfo (ret=0x7f6998000c20, args=<optimized out>, rerr=0x7f69c5782c50, client=0x157e730, server=<optimized out>, msg=<optimized out>) at remote_dispatch.h:1640 rv = -1 tmp = {state = 1 '\001', maxMem = 2097152, memory = 0, nrVirtCpu = 0, cpuTime = 5981880000000} dom = 0x7f69980e3650 priv = <optimized out> #8 remoteDispatchDomainGetInfoHelper (server=<optimized out>, client=0x157e730, msg=<optimized out>, rerr=0x7f69c5782c50, args=<optimized out>, ret=0x7f6998000c20) at remote_dispatch.h:1616 __func__ = "remoteDispatchDomainGetInfoHelper" #9 0x00007f69c9fbb915 in virNetServerProgramDispatchCall (msg=0x1689cc0, client=0x157e730, server=0x1577c90, prog=0x15825d0) at rpc/virnetserverprogram.c:416 ret = 0x7f6998000c20 "" rv = -1 i = <optimized out> arg = 0x7f6998027950 "\360e\n\230i\177" dispatcher = 0x73de40 rerr = {code = 0, domain = 0, message = 0x0, level = 0, dom = 0x0, str1 = 0x0, str2 = 0x0, str3 = 0x0, int1 = 0, int2 = 0, net = 0x0} #10 virNetServerProgramDispatch (prog=0x15825d0, server=0x1577c90, client=0x157e730, msg=0x1689cc0) at rpc/virnetserverprogram.c:289 ret = -1 rerr = {code = 0, domain = 0, message = 0x0, level = 0, dom = 0x0, str1 = 0x0, str2 = 0x0, str3 = 0x0, int1 = 0, int2 = 0, net = 0x0} __func__ = "virNetServerProgramDispatch" __FUNCTION__ = "virNetServerProgramDispatch" #11 0x00007f69c9fb6461 in virNetServerHandleJob (jobOpaque=<optimized out>, opaque=0x1577c90) at rpc/virnetserver.c:164 srv = 0x1577c90 job = 0x155dfa0 __func__ = "virNetServerHandleJob" #12 0x00007f69c9ee8e3e in virThreadPoolWorker (opaque=<optimized out>) at util/threadpool.c:144 data = 0x0 pool = 0x1577d80 cond = 0x1577de0 priority = false job = 0x162dd20 #13 0x00007f69c9ee84e6 in virThreadHelper (data=<optimized out>) at util/threads-pthread.c:161 args = 0x0 local = {func = 0x7f69c9ee8d00 <virThreadPoolWorker>, opaque = 0x1559f90} #14 0x00007f69c8c19e9a in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0 No symbol table info available. #15 0x00007f69c89474bd in clone () from /lib/x86_64-linux-gnu/libc.so.6 No symbol table info available. #16 0x0000000000000000 in ?? () No symbol table info available. Regards, Chun-Hung
-- libvir-list mailing list libvir-list@redhat.com https://www.redhat.com/mailman/listinfo/libvir-list