As I was testing simple golang hello work app to see why it hangs (or does not terminate gracefully) as described by issue #850 I discovered number of threads waiting to be woken up from futex code calls. Here are the relevant thread stack traces (198, 199, 200 are the ones calling futex code):
(gdb) osv thread 12 (named ">init") (gdb) bt #0 0x00000000003e1aa6 in sched::thread::switch_to (this=0xffff800001417040, this@entry=0xbdbdd0) at arch/x64/arch-switch.hh:74 #1 0x00000000003e49c2 in sched::cpu::reschedule_from_interrupt (this=0xffff800000f2b040, called_from_yield=called_from_yield@entry=false, preempt_after=..., preempt_after@entry=...) at core/sched.cc:339 #2 0x00000000003e4e7c in sched::cpu::schedule () at core/sched.cc:228 #3 0x00000000003e5542 in sched::thread::wait (this=this@entry=0xffff800001612040) at core/sched.cc:1214 #4 0x000000000036e7a8 in sched::thread::do_wait_for<sched::thread::dummy_lock, sched::wait_object<osv::acpi_interrupt::process_interrupts()::{lambda()#1}>, sched::wait_object<osv::acpi_interrupt::process_interrupts()::{lambda()#2}> >(sched::thread::dummy_lock&, sched::wait_object<osv::acpi_interrupt::process_interrupts()::{lambda()#1}>&&, sched::wait_object<osv::acpi_interrupt::process_interrupts()::{lambda()#2}>&&) (mtx=<synthetic pointer>) at include/osv/sched.hh:1206 #5 sched::thread::wait_for<osv::acpi_interrupt::process_interrupts()::{lambda()#1}, osv::acpi_interrupt::process_interrupts()::{lambda()#2}>(osv::acpi_interrupt::process_interrupts()::{lambda()#1}&&, osv::acpi_interrupt::process_interrupts()::{lambda()#2}&&) () at include/osv/sched.hh:1230 #6 osv::acpi_interrupt::process_interrupts (this=0xffffa00001382380) at drivers/acpi.cc:244 #7 osv::acpi_interrupt::acpi_interrupt(unsigned int, unsigned int (*)(void*), void*)::{lambda()#1}::operator()() const (__closure=<optimized out>) at drivers/acpi.cc:228 #8 std::_Function_handler<void (), osv::acpi_interrupt::acpi_interrupt(unsigned int, unsigned int (*)(void*), void*)::{lambda()#1}>::_M_invoke(std::_Any_data const&) (__functor=...) at /usr/include/c++/5/functional:1871 #9 0x00000000003e7157 in sched::thread_main_c (t=0xffff800001612040) at arch/x64/arch-switch.hh:193 #10 0x000000000038ab23 in thread_main () at arch/x64/entry.S:113 (gdb) osv thread 13 (named ">init") (gdb) bt #0 0x00000000003e1aa6 in sched::thread::switch_to (this=0xffff800001417040, this@entry=0xffff800001d4d040) at arch/x64/arch-switch.hh:74 #1 0x00000000003e49c2 in sched::cpu::reschedule_from_interrupt (this=0xffff800000f2b040, called_from_yield=called_from_yield@entry=false, preempt_after=..., preempt_after@entry=...) at core/sched.cc:339 #2 0x00000000003e4e7c in sched::cpu::schedule () at core/sched.cc:228 #3 0x00000000003e6d72 in sched::thread::wait (this=0xffff800001629040) at core/sched.cc:1214 #4 sched::thread::do_wait_until<sched::noninterruptible, lockfree::mutex, sched::thread::reaper::reap()::<lambda()> > (pred=..., mtx=...) at include/osv/sched.hh:1068 #5 sched::thread::wait_until<sched::thread::reaper::reap()::<lambda()> > (pred=..., mtx=...) at include/osv/sched.hh:1086 #6 sched::thread::reaper::reap (this=0xffffa00001382b80) at core/sched.cc:1553 #7 0x00000000003e6eec in sched::thread::reaper::<lambda()>::operator() (__closure=<optimized out>) at core/sched.cc:1544 #8 std::_Function_handler<void(), sched::thread::reaper::reaper()::<lambda()> >::_M_invoke(const std::_Any_data &) (__functor=...) at /usr/include/c++/5/functional:1871 #9 0x00000000003e7157 in sched::thread_main_c (t=0xffff800001629040) at arch/x64/arch-switch.hh:193 #10 0x000000000038ab23 in thread_main () at arch/x64/entry.S:113 (gdb) osv thread 27 (named ">init") (gdb) bt #0 0x00000000003e1aa6 in sched::thread::switch_to (this=0xffff800001417040, this@entry=0xffff8000013c0040) at arch/x64/arch-switch.hh:74 #1 0x00000000003e49c2 in sched::cpu::reschedule_from_interrupt (this=0xffff800000f2b040, called_from_yield=called_from_yield@entry=false, preempt_after=..., preempt_after@entry=...) at core/sched.cc:339 #2 0x00000000003e4e7c in sched::cpu::schedule () at core/sched.cc:228 #3 0x00000000003e5542 in sched::thread::wait (this=this@entry=0xffff800001d4d040) at core/sched.cc:1214 #4 0x00000000004171f0 in sched::thread::do_wait_until<sched::noninterruptible, sched::thread::dummy_lock, osv::application::join()::<lambda()> > ( mtx=<synthetic pointer>, pred=...) at include/osv/sched.hh:1068 #5 sched::thread::wait_until<osv::application::join()::<lambda()> > (pred=...) at include/osv/sched.hh:1079 #6 osv::application::join (this=0xffffa00002530810) at core/app.cc:234 #7 0x0000000000213b08 in do_main_thread (_main_args=<optimized out>) at loader.cc:481 #8 0x00000000004485d6 in pthread_private::pthread::<lambda()>::operator() (__closure=0xffffa0000147ab00) at libc/pthread.cc:114 #9 std::_Function_handler<void(), pthread_private::pthread::pthread(void* (*)(void*), void*, sigset_t, const pthread_private::thread_attr*)::<lambda()> >::_M_invoke(const std::_Any_data &) (__functor=...) at /usr/include/c++/5/functional:1871 #10 0x00000000003e7157 in sched::thread_main_c (t=0xffff800001d4d040) at arch/x64/arch-switch.hh:193 #11 0x000000000038ab23 in thread_main () at arch/x64/entry.S:113 *(gdb) osv thread 198 // named ">>>init"* *(gdb) bt* *#0 0x00000000003e1aa6 in sched::thread::switch_to (this=this@entry=0xffff800001417040) at arch/x64/arch-switch.hh:74* *#1 0x00000000003e49c2 in sched::cpu::reschedule_from_interrupt (this=0xffff800000f2b040, called_from_yield=called_from_yield@entry=false, * * preempt_after=..., preempt_after@entry=...) at core/sched.cc:339* *#2 0x00000000003e4e7c in sched::cpu::schedule () at core/sched.cc:228* *#3 0x00000000003e5542 in sched::thread::wait (this=this@entry=0xffff800002a45040) at core/sched.cc:1214* *#4 0x000000000040f915 in sched::thread::do_wait_for<lockfree::mutex, sched::wait_object<waitqueue> >(lockfree::mutex&, sched::wait_object<waitqueue>&&) (mtx=...) at include/osv/sched.hh:1206* *#5 sched::thread::wait_for<waitqueue&> (mtx=...) at include/osv/sched.hh:1222* *#6 waitqueue::wait (this=this@entry=0xffffa00002983fd0, mtx=...) at core/waitqueue.cc:56* *#7 0x00000000003d7d93 in futex (uaddr=<optimized out>, op=<optimized out>, val=<optimized out>, timeout=0x0, uaddr2=<optimized out>, * * val3=<optimized out>) at linux.cc:95* *#8 0x00000000003d81e7 in syscall (number=number@entry=202) at linux.cc:368* *#9 0x00000000003d8c64 in syscall_wrapper (number=202, p1=17592206340496, p2=0, p3=0, p4=0, p5=0, p6=0) at linux.cc:429* *#10 0x000000000038ab98 in syscall_entry () at arch/x64/entry.S:261* *#11 0x00001000010a1ce3 in runtime.futex () at /home/wkozaczuk/tools/go/src/runtime/sys_linux_amd64.s:425* *Backtrace stopped: previous frame inner to this frame (corrupt stack?)* *(gdb) osv thread 199 // named ">>>>init"* *(gdb) bt* *#0 0x00000000003e1aa6 in sched::thread::switch_to (this=0xffff800001417040, this@entry=0xffff80000111f040) at arch/x64/arch-switch.hh:74* *#1 0x00000000003e49c2 in sched::cpu::reschedule_from_interrupt (this=0xffff800000f2b040, called_from_yield=called_from_yield@entry=false, * * preempt_after=..., preempt_after@entry=...) at core/sched.cc:339* *#2 0x00000000003e4e7c in sched::cpu::schedule () at core/sched.cc:228* *#3 0x00000000003e5542 in sched::thread::wait (this=this@entry=0xffff800002b42040) at core/sched.cc:1214* *#4 0x00000000003d7cdd in sched::thread::do_wait_for<lockfree::mutex, sched::wait_object<sched::timer>, sched::wait_object<waitqueue> >(lockfree::mutex&, sched::wait_object<sched::timer>&&, sched::wait_object<waitqueue>&&) (mtx=...) at include/osv/sched.hh:1206* *#5 sched::thread::wait_for<sched::timer&, waitqueue&> (mtx=...) at include/osv/sched.hh:1222* *#6 futex (uaddr=<optimized out>, op=<optimized out>, val=<optimized out>, timeout=<optimized out>, uaddr2=<optimized out>, val3=<optimized out>)* * at linux.cc:85* *#7 0x00000000003d81e7 in syscall (number=number@entry=202) at linux.cc:368* *#8 0x00000000003d8c64 in syscall_wrapper (number=202, p1=17592206338904, p2=0, p3=0, p4=35184376675864, p5=0, p6=0) at linux.cc:429* *#9 0x000000000038ab98 in syscall_entry () at arch/x64/entry.S:261* *#10 0x00001000010a1ce3 in runtime.futex () at /home/wkozaczuk/tools/go/src/runtime/sys_linux_amd64.s:425* *Backtrace stopped: previous frame inner to this frame (corrupt stack?)* *(gdb) osv thread 200 // named ">>>>init"* *(gdb) bt* *#0 0x00000000003e1aa6 in sched::thread::switch_to (this=0xffff800001417040, this@entry=0xffff800002a45040) at arch/x64/arch-switch.hh:74* *#1 0x00000000003e49c2 in sched::cpu::reschedule_from_interrupt (this=0xffff800000f2b040, called_from_yield=called_from_yield@entry=false, * * preempt_after=..., preempt_after@entry=...) at core/sched.cc:339* *#2 0x00000000003e4e7c in sched::cpu::schedule () at core/sched.cc:228* *#3 0x00000000003e5542 in sched::thread::wait (this=this@entry=0xffff800002c4d040) at core/sched.cc:1214* *#4 0x000000000040f915 in sched::thread::do_wait_for<lockfree::mutex, sched::wait_object<waitqueue> >(lockfree::mutex&, sched::wait_object<waitqueue>&&) (mtx=...) at include/osv/sched.hh:1206* *#5 sched::thread::wait_for<waitqueue&> (mtx=...) at include/osv/sched.hh:1222* *#6 waitqueue::wait (this=this@entry=0xffffa00002983e50, mtx=...) at core/waitqueue.cc:56* *#7 0x00000000003d7d93 in futex (uaddr=<optimized out>, op=<optimized out>, val=<optimized out>, timeout=0x0, uaddr2=<optimized out>, * * val3=<optimized out>) at linux.cc:95* *#8 0x00000000003d81e7 in syscall (number=number@entry=202) at linux.cc:368* *#9 0x00000000003d8c64 in syscall_wrapper (number=202, p1=842350602512, p2=0, p3=0, p4=0, p5=0, p6=0) at linux.cc:429* *#10 0x000000000038ab98 in syscall_entry () at arch/x64/entry.S:261* *#11 0x00001000010a1ce3 in runtime.futex () at /home/wkozaczuk/tools/go/src/runtime/sys_linux_amd64.s:425* *Backtrace stopped: previous frame inner to this frame (corrupt stack?)* Here is also the output from the app with some debug statements from futex calls: futex:WAIT from thread: 198, *uaddr=[0], val=[0] futex:WAKE from thread: 200, *uaddr=[1], val=[1] futex:WAIT from thread: 200, *uaddr=[0], val=[0] syscall(): unimplemented system call 267 futex:WAIT from thread: 197, *uaddr=[0], val=[0] futex:WAKE from thread: 199, *uaddr=[1], val=[1] futex:WAIT from thread: 198, *uaddr=[0], val=[0] futex:WAKE from thread: 200, *uaddr=[1], val=[1] futex:WAIT from thread: 200, *uaddr=[0], val=[0] futex:WAKE from thread: 197, *uaddr=[1], val=[1] futex:WAKE from thread: 200, *uaddr=[1], val=[1] futex:WAIT from thread: 197, *uaddr=[0], val=[0] futex:WAIT from thread: 200, *uaddr=[0], val=[0] futex:WAKE from thread: 198, *uaddr=[1], val=[1] futex:WAIT from thread: 198, *uaddr=[0], val=[0] Hello, 世界 futex:WAKE from thread: 199, *uaddr=[1], val=[1] futex:WAIT from thread: 198, *uaddr=[0], val=[0] Go version: go1.8.3 futex:WAKE from thread: 199, *uaddr=[1], val=[1] futex:WAIT from thread: 197, *uaddr=[0], val=[0] futex:WAKE from thread: 198, *uaddr=[1], val=[1] futex:WAIT from thread: 198, *uaddr=[0], val=[0] futex:WAKE from thread: 199, *uaddr=[1], val=[1] futex:WAIT from thread: 198, *uaddr=[0], val=[0] futex:WAIT from thread: 199, *uaddr=[0], val=[0] futex:WAIT from thread: 199, *uaddr=[0], val=[0] As you can see the thread 197 calling futex was actually terminated when I connected with gdb. Here is also the portion of code in linux.cc as it is a little different from master. I took it from @benoit go branch and added couple of debug statements: 50 // We don't expect applications to use the Linux futex() system call (it is 51 // normally only used to implement higher-level synchronization mechanisms), 52 // but unfortunately gcc's C++ runtime uses a subset of futex in the 53 // __cxa__guard_* functions, which safeguard the concurrent initialization 54 // of function-scope static objects. We only implement here this subset. 55 // The __cxa_guard_* functions only call futex in the rare case of contention, 56 // in fact so rarely that OSv existed for a year before anyone noticed futex 57 // was missing. So the performance of this implementation is not critical. 58 static std::unordered_map<void*, std::shared_ptr<waitqueue>> queues; 59 static mutex queues_mutex; 60 enum { 61 FUTEX_WAIT = 0, 62 FUTEX_WAKE = 1, 63 FUTEX_PRIVATE_FLAG = 128, 64 FUTEX_CLOCK_REALTIME = 256, 65 FUTEX_CMD_MASK = ~(FUTEX_PRIVATE_FLAG|FUTEX_CLOCK_REALTIME), 66 }; 67 68 int futex(int *uaddr, int op, int val, const struct timespec *timeout, 69 int *uaddr2, int val3) 70 { 71 switch (op & FUTEX_CMD_MASK) { 72 case FUTEX_WAIT: 73 debug_always("futex:WAIT from thread: %d, *uaddr=[%d], val=[%d]\n", sched::thread::current()->id(), *uaddr, val); 74 WITH_LOCK(queues_mutex) { 75 if (*uaddr == val) { 76 auto i = queues.find(uaddr); 77 if (i == queues.end()) { 78 queues[uaddr] = std::make_shared<waitqueue>(); 79 } 80 std::shared_ptr<waitqueue> q = queues[uaddr]; 81 if (timeout) { 82 sched::timer tmr(*sched::thread::current()); 83 tmr.set(std::chrono::seconds(timeout->tv_sec) + 84 std::chrono::nanoseconds(timeout->tv_nsec)); 85 sched::thread::wait_for(queues_mutex, tmr, *q); 86 // FIXME: testing if tmr was expired isn't quite right - 87 // we could have had both a wakeup and timer expiration 88 // racing. It would be more correct to check if we were 89 // waken by a FUTEX_WAKE. But how? 90 if (tmr.expired()) { 91 errno = ETIMEDOUT; 92 return -1; 93 } 94 } else { 95 q->wait(queues_mutex); 96 } 97 return 0; 98 } else { 99 errno = EWOULDBLOCK; 100 return -1; 101 } 102 } 103 case FUTEX_WAKE: 104 debug_always("futex:WAKE from thread: %d, *uaddr=[%d], val=[%d]\n", sched::thread::current()->id(), *uaddr, val); 105 if(val < 0) { 106 errno = EINVAL; 107 return -1; 108 } 109 110 WITH_LOCK(queues_mutex) { 111 auto i = queues.find(uaddr); 112 if (i != queues.end()) { 113 int waken = 0; 114 while( (val > waken) && !(i->second->empty()) ) { 115 i->second->wake_one(queues_mutex); 116 waken++; 117 } 118 if(i->second->empty()) { 119 queues.erase(i); 120 } 121 return waken; 122 } 123 } 124 return 0; 125 default: 126 abort("Unimplemented futex() operation %d\n", op); 127 } 128 } I cannot see any clear deadlock situation so I am wondering what might be going on. Anybody any ideas? -- You received this message because you are subscribed to the Google Groups "OSv Development" group. To unsubscribe from this group and stop receiving emails from it, send an email to osv-dev+unsubscr...@googlegroups.com. For more options, visit https://groups.google.com/d/optout.