Source: rheolef Version: 6.7-5 Severity: serious https://buildd.debian.org/status/package.php?p=rheolef&suite=sid
... mpirun -np 1 ./form_mass_bdr_tst -app P2 -weight yz -I my_cube_TP-5-v2 left >/dev/null 2>/dev/null mpirun -np 2 ./form_mass_bdr_tst -app P2 -weight yz -I my_cube_TP-5-v2 left >/dev/null 2>/dev/null mpirun -np 3 ./form_mass_bdr_tst -app P2 -weight yz -I my_cube_TP-5-v2 left >/dev/null 2>/dev/null mpirun -np 1 ./form_mass_bdr_tst -app P2 -weight yz -I my_cube_TP-5-v2 right >/dev/null 2>/dev/null mpirun -np 2 ./form_mass_bdr_tst -app P2 -weight yz -I my_cube_TP-5-v2 right >/dev/null 2>/dev/null E: Build killed with signal TERM after 150 minutes of inactivity I've reproduced this on i386, two processes are running forever (aborted after 6 hours on a fast CPU) with 100% CPU. Backtraces: Thread 3 (Thread 0xf50ffb40 (LWP 29032)): #0 0xf7ed6db9 in __kernel_vsyscall () #1 0xf70fabd3 in __GI___poll (fds=0xf47005d0, nfds=2, timeout=3600000) at ../sysdeps/unix/sysv/linux/poll.c:29 #2 0xf5caed4a in poll (__timeout=3600000, __nfds=2, __fds=0xf47005d0) at /usr/include/i386-linux-gnu/bits/poll2.h:46 #3 poll_dispatch (base=0x578eb9c0, tv=0xf50f9bfc) at poll.c:165 #4 0xf5ca59e9 in opal_libevent2022_event_base_loop (base=<optimized out>, flags=<optimized out>) at event.c:1630 #5 0xf5c6b3bd in progress_engine (obj=0x578eb950) at runtime/opal_progress_threads.c:105 #6 0xf5df6316 in start_thread (arg=0xf50ffb40) at pthread_create.c:465 #7 0xf7105296 in clone () at ../sysdeps/unix/sysv/linux/i386/clone.S:108 Thread 2 (Thread 0xf5ac5b40 (LWP 29031)): #0 0xf7ed6db9 in __kernel_vsyscall () #1 0xf71053fa in __GI_epoll_pwait (epfd=7, events=0x578ea930, maxevents=32, timeout=-1, set=0x0) at ../sysdeps/unix/sysv/linux/epoll_pwait.c:42 #2 0xf710569a in epoll_wait (epfd=7, events=0x578ea930, maxevents=32, timeout=-1) at ../sysdeps/unix/sysv/linux/epoll_wait.c:30 #3 0xf5ca199a in epoll_dispatch (base=0x578ea7a0, tv=0x0) at epoll.c:407 #4 0xf5ca59e9 in opal_libevent2022_event_base_loop (base=<optimized out>, flags=<optimized out>) at event.c:1630 #5 0xf5af23eb in progress_engine (obj=0x578ea7a0) at src/util/progress_threads.c:52 #6 0xf5df6316 in start_thread (arg=0xf5ac5b40) at pthread_create.c:465 #7 0xf7105296 in clone () at ../sysdeps/unix/sysv/linux/i386/clone.S:108 Thread 1 (Thread 0xf5b4fe00 (LWP 29002)): #0 0xf7ed67f5 in ?? () #1 0xf7ed6b43 in __vdso_clock_gettime () #2 0xf7112961 in __GI___clock_gettime (clock_id=1, tp=0xffb74194) at ../sysdeps/unix/clock_gettime.c:115 #3 0xf5cc3297 in opal_timer_linux_get_usec_clock_gettime () at timer_linux_component.c:197 #4 0xf5c669c3 in opal_progress () at runtime/opal_progress.c:197 #5 0xf74b5e05 in sync_wait_st (sync=<optimized out>) at ../opal/threads/wait_sync.h:80 #6 ompi_request_default_wait_all (count=2, requests=0xffb742e4, statuses=0x0) at request/req_wait.c:221 #7 0xf750640d in ompi_coll_base_allreduce_intra_recursivedoubling (sbuf=0x57951030, rbuf=0x57a9b400, count=2, dtype=0xf7565140 <ompi_mpi_unsigned>, op=0xf7573e60 <ompi_mpi_op_sum>, comm=0xf7569520 <ompi_mpi_comm_world>, module=0x57976fa0) at base/coll_base_allreduce.c:225 #8 0xe991f640 in ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf=0x57951030, rbuf=0x57a9b400, count=2, dtype=0xf7565140 <ompi_mpi_unsigned>, op=0xf7573e60 <ompi_mpi_op_sum>, comm=0xf7569520 <ompi_mpi_comm_world>, module=0x57976fa0) at coll_tuned_decision_fixed.c:66 #9 0xf74c5b77 in PMPI_Allreduce (sendbuf=0x57951030, recvbuf=0x57a9b400, count=2, datatype=0xf7565140 <ompi_mpi_unsigned>, op=0xf7573e60 <ompi_mpi_op_sum>, comm=0xf7569520 <ompi_mpi_comm_world>) at pallreduce.c:107 #10 0xf7b476cf in boost::mpi::detail::all_reduce_impl<unsigned int, std::plus<unsigned int> > (comm=..., in_values=0x57951030, n=n@entry=2, out_values=0x57a9b400) at /usr/include/boost/mpi/collectives/all_reduce.hpp:36 #11 0xf7b58fc0 in boost::mpi::all_reduce<unsigned int, std::plus<unsigned int> > (out_values=<optimized out>, n=2, in_values=<optimized out>, comm=..., op=...) at /usr/include/boost/mpi/collectives/all_reduce.hpp:93 #12 rheolef::mpi_assembly_begin<std::multimap<unsigned int, unsigned int, std::less<unsigned int>, rheolef::heap_allocator<std::pair<unsigned int, unsigned int> > >, rheolef::disarray_rep<rheolef::index_set, rheolef::distributed, std::allocator<rheolef::index_set> >::message_type, rheolef::apply_iterator<std::_Rb_tree_iterator<std::pair<unsigned int const, unsigned int> >, rheolef::first_op<std::pair<unsigned int const, unsigned int> > > > (stash=..., first_stash_idx=..., last_stash_idx=..., ownership=..., receive=..., send=...) at ../../include/rheolef/mpi_assembly_begin.h:113 #13 0xf7b5a346 in rheolef::disarray_rep<rheolef::index_set, rheolef::distributed, std::allocator<rheolef::index_set> >::dis_entry_assembly_begin<rheolef::index_set_add_op<rheolef::index_set> > (this=0x57acab70, my_set_op=...) at ../../include/rheolef/disarray_mpi.icc:223 #14 rheolef::disarray<rheolef::index_set, rheolef::distributed, std::allocator<rheolef::index_set> >::dis_entry_assembly_begin (this=<optimized out>) at ../../include/rheolef/disarray.h:592 #15 rheolef::disarray<rheolef::index_set, rheolef::distributed, std::allocator<rheolef::index_set> >::dis_entry_assembly (this=<optimized out>) at ../../include/rheolef/disarray.h:594 #16 rheolef::geo_rep<double, rheolef::distributed>::set_element_side_index (this=<optimized out>, side_dim=<optimized out>) at geo_mpi_get.cc:461 #17 0xf7b5f25a in rheolef::geo_rep<double, rheolef::distributed>::get (this=<optimized out>, ips=...) at geo_mpi_get.cc:965 #18 0xf7b60a48 in rheolef::geo_rep<double, rheolef::distributed>::load (this=<optimized out>, filename=..., comm=...) at geo_mpi_get.cc:989 #19 0xf7b3030a in rheolef::geo_load<double, rheolef::distributed> (name=...) at geo.cc:172 #20 0x56592bf8 in rheolef::geo_basic<double, rheolef::distributed>::geo_basic (comm=..., name="\360\265\225W\030\000\000\000\030\000\000\000\227@YV\320<ZV\002\000\000\000\000\060\bn\003\000\000\000y+YV@K\267\377\000\000\000\000\000\360\035\367\000\000\000\000\000\000\000\000\203w\002\367\000\360\035\367\000\360\035\367\000\000\000\000\203w\002\367\003\000\000\000\324K\267\377\344K\267\377dK\267\377\003\000\000\000\324K\267\377\000\360\035\367\352w\356\367\001\000\000\000\000\000\000\000\000\360\035\367\000\000\000\000\000\000\000\000Tl\324\tEf\254c", '\000' <repeats 12 times>, "\320K\267\377\000\200\355\367\354\202\355\367\350\210\355\367\003\000\000\000\320<ZV\003\000\000\000\310@YV\000\000\000\000\371@YV`+YV\003\000\000\000\324K\267\377"..., this=0xffb74ac0) at ../../include/rheolef/geo.h:1460 #21 main (argc=<optimized out>, argv=<optimized out>) at space_tst.cc:26 Thread 3 (Thread 0xf51ffb40 (LWP 29033)): #0 0xf7fb7db9 in __kernel_vsyscall () #1 0xf71dbbd3 in __GI___poll (fds=0xf48005d0, nfds=2, timeout=3600000) at ../sysdeps/unix/sysv/linux/poll.c:29 #2 0xf5d8fd4a in poll (__timeout=3600000, __nfds=2, __fds=0xf48005d0) at /usr/include/i386-linux-gnu/bits/poll2.h:46 #3 poll_dispatch (base=0x57eef9c0, tv=0xf51f9bfc) at poll.c:165 #4 0xf5d869e9 in opal_libevent2022_event_base_loop (base=<optimized out>, flags=<optimized out>) at event.c:1630 #5 0xf5d4c3bd in progress_engine (obj=0x57eef950) at runtime/opal_progress_threads.c:105 #6 0xf5ed7316 in start_thread (arg=0xf51ffb40) at pthread_create.c:465 #7 0xf71e6296 in clone () at ../sysdeps/unix/sysv/linux/i386/clone.S:108 Thread 2 (Thread 0xf5ba6b40 (LWP 29030)): #0 0xf7fb7db9 in __kernel_vsyscall () #1 0xf71e63fa in __GI_epoll_pwait (epfd=7, events=0x57eee930, maxevents=32, timeout=-1, set=0x0) at ../sysdeps/unix/sysv/linux/epoll_pwait.c:42 #2 0xf71e669a in epoll_wait (epfd=7, events=0x57eee930, maxevents=32, timeout=-1) at ../sysdeps/unix/sysv/linux/epoll_wait.c:30 #3 0xf5d8299a in epoll_dispatch (base=0x57eee7a0, tv=0x0) at epoll.c:407 #4 0xf5d869e9 in opal_libevent2022_event_base_loop (base=<optimized out>, flags=<optimized out>) at event.c:1630 #5 0xf5bd33eb in progress_engine (obj=0x57eee7a0) at src/util/progress_threads.c:52 #6 0xf5ed7316 in start_thread (arg=0xf5ba6b40) at pthread_create.c:465 #7 0xf71e6296 in clone () at ../sysdeps/unix/sysv/linux/i386/clone.S:108 Thread 1 (Thread 0xf5c30e00 (LWP 29003)): #0 0xf7fb77f5 in ?? () #1 0xf7fb7b43 in __vdso_clock_gettime () #2 0xf71f3961 in __GI___clock_gettime (clock_id=1, tp=0xffa8c4b4) at ../sysdeps/unix/clock_gettime.c:115 #3 0xf5da4297 in opal_timer_linux_get_usec_clock_gettime () at timer_linux_component.c:197 #4 0xf5d479c3 in opal_progress () at runtime/opal_progress.c:197 #5 0xf7596e05 in sync_wait_st (sync=<optimized out>) at ../opal/threads/wait_sync.h:80 #6 ompi_request_default_wait_all (count=2, requests=0xffa8c604, statuses=0x0) at request/req_wait.c:221 #7 0xf75e740d in ompi_coll_base_allreduce_intra_recursivedoubling (sbuf=0x5809b980, rbuf=0x580805b0, count=139, dtype=0xf7646140 <ompi_mpi_unsigned>, op=0xf7655660 <ompi_mpi_op_max>, comm=0xf764a520 <ompi_mpi_comm_world>, module=0x57f74810) at base/coll_base_allreduce.c:225 #8 0xf1a05640 in ompi_coll_tuned_allreduce_intra_dec_fixed (sbuf=0x5809b980, rbuf=0x580805b0, count=139, dtype=0xf7646140 <ompi_mpi_unsigned>, op=0xf7655660 <ompi_mpi_op_max>, comm=0xf764a520 <ompi_mpi_comm_world>, module=0x57f74810) at coll_tuned_decision_fixed.c:66 #9 0xf75a6b77 in PMPI_Allreduce (sendbuf=0x5809b980, recvbuf=0x580805b0, count=139, datatype=0xf7646140 <ompi_mpi_unsigned>, op=0xf7655660 <ompi_mpi_op_max>, comm=0xf764a520 <ompi_mpi_comm_world>) at pallreduce.c:107 #10 0xf7c2862f in boost::mpi::detail::all_reduce_impl<unsigned int, boost::mpi::maximum<unsigned int> > (comm=..., in_values=0x5809b980, n=139, out_values=0x580805b0) at /usr/include/boost/mpi/collectives/all_reduce.hpp:36 #11 0xf7c4019f in boost::mpi::all_reduce<unsigned int, boost::mpi::maximum<unsigned int> > (out_values=<optimized out>, n=<optimized out>, in_values=<optimized out>, comm=..., op=...) at /usr/include/boost/mpi/collectives/all_reduce.hpp:93 #12 rheolef::geo_rep<double, rheolef::distributed>::get (this=<optimized out>, ips=...) at geo_mpi_get.cc:942 #13 0xf7c41a48 in rheolef::geo_rep<double, rheolef::distributed>::load (this=<optimized out>, filename=..., comm=...) at geo_mpi_get.cc:989 #14 0xf7c1130a in rheolef::geo_load<double, rheolef::distributed> (name=...) at geo.cc:172 #15 0x5658abf8 in rheolef::geo_basic<double, rheolef::distributed>::geo_basic (comm=..., name="\000\327\365W\030\000\000\000\030\000\000\000\227\300XV\320\274YV\002\000\000\000\000\240x1\003\000\000\000y\253XV\360\313\250\377\000\000\000\000\000\000,\367\000\000\000\000\000\000\000\000\203\207\020\367\000\000,\367\000\000,\367\000\000\000\000\203\207\020\367\003\000\000\000\204\314\250\377\224\314\250\377\024\314\250\377\003\000\000\000\204\314\250\377\000\000,\367\352\207\374\367\001\000\000\000\000\000\000\000\000\000,\367\000\000\000\000\000\000\000\000\276\"\333\071\257HBI", '\000' <repeats 12 times>, "\200\314\250\377\000\220\373\367\354\222\373\367\350\230\373\367\003\000\000\000\320\274YV\003\000\000\000\310\300XV\000\000\000\000\371\300XV`\253XV\003\000\000\000\204\314\250\377"..., this=0xffa8cb70) at ../../include/rheolef/geo.h:1460 #16 main (argc=<optimized out>, argv=<optimized out>) at space_tst.cc:26 -- debian-science-maintainers mailing list debian-science-maintainers@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-science-maintainers