[ https://issues.apache.org/jira/browse/IMPALA-10276?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17223682#comment-17223682 ]
Qifan Chen edited comment on IMPALA-10276 at 10/30/20, 2:44 PM: ---------------------------------------------------------------- The following stack is seen with another impala-asf-master-core build. {code:java} Thread 428 (crashed) 0 libc-2.17.so + 0x351f7 rax = 0x0000000000000000 rdx = 0x0000000000000006 rcx = 0xffffffffffffffff rbx = 0x0000000000000004 rsi = 0x00000000000056f6 rdi = 0x0000000000000625 rbp = 0x00007f6ff3421250 rsp = 0x00007f6ff3420ed8 r8 = 0x0000000000000000 r9 = 0x00007f6ff3420d50 r10 = 0x0000000000000008 r11 = 0x0000000000000202 r12 = 0x00000000076fd5c0 r13 = 0x000000000000005b r14 = 0x00000000076fd5c4 r15 = 0x00000000076f5be0 rip = 0x00007f710c02f1f7 Found by: given as instruction pointer in context 1 impalad!google::LogMessage::Flush() + 0x1eb rbp = 0x00007f6ff3421330 rsp = 0x00007f6ff3421260 rip = 0x000000000523a4cb Found by: previous frame's frame pointer 2 impalad!google::LogMessageFatal::~LogMessageFatal() + 0x9 rbx = 0x0000000000000001 rbp = 0x00007f6ff3421360 rsp = 0x00007f6ff3421300 r12 = 0x0000000007704af8 r13 = 0x0000000000000000 r14 = 0x0000000000000000 r15 = 0x00000000144743a0 rip = 0x000000000523e0c9 Found by: call frame info 3 impalad!impala::RuntimeProfileBase::total_time_counter() const [runtime-profile.h : 181 + 0xc] rbx = 0x0000000000000001 rbp = 0x00007f6ff3421360 rsp = 0x00007f6ff3421310 r12 = 0x0000000007704af8 r13 = 0x0000000000000000 r14 = 0x0000000000000000 r15 = 0x00000000144743a0 rip = 0x00000000022d6784 Found by: call frame info 4 impalad!impala::Coordinator::GetNext(impala::QueryResultSet*, int, bool*, long) [coordinator.cc : 870 + 0x16] rbx = 0x0000000000000000 rbp = 0x00007f6ff34215a0 rsp = 0x00007f6ff3421370 r12 = 0x0000000000000000 r13 = 0x0000000000000000 r14 = 0x0000000000000000 r15 = 0x00000000144743a0 rip = 0x0000000002d1411b Found by: call frame info 5 impalad!impala::ClientRequestState::FetchRowsInternal(int, impala::QueryResultSet*, long) [client-request-state.cc : 1090 + 0x31] rbx = 0x0000000000000000 rbp = 0x00007f6ff34219e0 rsp = 0x00007f6ff34215b0 r12 = 0x000000003d60c000 r13 = 0x0000000000000000 r14 = 0x0000000000000000 r15 = 0x00000000144743a0 rip = 0x000000000254fa10 Found by: call frame info 6 impalad!impala::ClientRequestState::FetchRows(int, impala::QueryResultSet*, long) [client-request-state.cc : 938 + 0x1e] rbx = 0x0000000000000000 rbp = 0x00007f6ff3421a50 rsp = 0x00007f6ff34219f0 r12 = 0x000000003d60c000 r13 = 0x0000000000000000 r14 = 0x0000000000000000 r15 = 0x00000000144743a0 rip = 0x000000000254dda0 Found by: call frame info 7 impalad!impala::ImpalaServer::FetchInternal(impala::TUniqueId, bool, int, beeswax::Results*) [impala-beeswax-server.cc : 614 + 0x37] rbx = 0x0000000000000000 rbp = 0x00007f6ff3421c20 rsp = 0x00007f6ff3421a60 r12 = 0x000000003d60c000 r13 = 0x0000000000000000 r14 = 0x0000000000000000 r15 = 0x00000000144743a0 rip = 0x0000000002571f8d Found by: call frame info 8 impalad!impala::ImpalaServer::fetch(beeswax::Results&, beeswax::QueryHandle const&, bool, int) [impala-beeswax-server.cc : 191 + 0x32] rbx = 0x0000000000000000 rbp = 0x00007f6ff3421ea0 rsp = 0x00007f6ff3421c30 r12 = 0x0000000000000000 r13 = 0x0000000000000000 r14 = 0x0000000000000000 r15 = 0x00000000144743a0 rip = 0x000000000256c04d Found by: call frame info 9 impalad!beeswax::BeeswaxServiceProcessor::process_fetch(int, apache::thrift::protocol::TProtocol*, apache::thrift::protocol::TProtocol*, void*) [BeeswaxService.cpp : 3398 + 0x51] rbx = 0x0000000002b44ca4 rbp = 0x00007f6ff3422270 rsp = 0x00007f6ff3421eb0 r12 = 0x0000000000000000 r13 = 0x00000000327105c0 r14 = 0x0000000000000001 r15 = 0x00000000144743a0 rip = 0x0000000002b7c129 Found by: call frame info 10 impalad!beeswax::BeeswaxServiceProcessor::dispatchCall(apache::thrift::protocol::TProtocol*, apache::thrift::protocol::TProtocol*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, int, void*) [BeeswaxService.cpp : 3200 + 0x1d] rbx = 0x0000000002b44ca4 rbp = 0x00007f6ff3422370 rsp = 0x00007f6ff3422280 r12 = 0x0000000000000000 r13 = 0x00000000327105c0 r14 = 0x0000000000000001 r15 = 0x00000000144743a0 rip = 0x0000000002b7a6e2 Found by: call frame info 11 impalad!impala::ImpalaServiceProcessor::dispatchCall(apache::thrift::protocol::TProtocol*, apache::thrift::protocol::TProtocol*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, int, void*) [ImpalaService.cpp : 1824 + 0x25] rbx = 0x0000000002b44ca4 rbp = 0x00007f6ff34223c0 rsp = 0x00007f6ff3422380 r12 = 0x0000000000000000 r13 = 0x00000000327105c0 r14 = 0x0000000000000001 r15 = 0x00000000144743a0 rip = 0x0000000002b44d3b Found by: call frame info 12 impalad!apache::thrift::TDispatchProcessor::process(boost::shared_ptr<apache::thrift::protocol::TProtocol>, boost::shared_ptr<apache::thrift::protocol::TProtocol>, void*) [TDispatchProcessor.h : 121 + 0x4d] rbx = 0x0000000002b44ca4 rbp = 0x00007f6ff3422440 rsp = 0x00007f6ff34223d0 r12 = 0x0000000000000000 r13 = 0x00000000327105c0 r14 = 0x0000000000000001 r15 = 0x00000000144743a0 rip = 0x0000000001cd76c8 Found by: call frame info 13 impalad!apache::thrift::server::TAcceptQueueServer::Task::run() [TAcceptQueueServer.cpp : 84 + 0x99] rbx = 0x0000000000000000 rbp = 0x00007f6ff3422640 rsp = 0x00007f6ff3422450 r12 = 0x0000000000000000 r13 = 0x0000000000000001 r14 = 0x0000000000000001 r15 = 0x00000000144743a0 rip = 0x00000000021ef35d Found by: call frame info {code} The failure point is the DCHECK() at line 181. {code:java} 178 /// Returns the counter for the total elapsed time. 179 Counter* total_time_counter() const { 180 auto it = counter_map_.find(TOTAL_TIME_COUNTER_NAME); 181 DCHECK(it != counter_map_.end()); 182 return it->second; 183 } {code} was (Author: sql_forever): The following stack is seen with another impala-asf-master-core build. {code:java} Thread 428 (crashed) 0 libc-2.17.so + 0x351f7 rax = 0x0000000000000000 rdx = 0x0000000000000006 rcx = 0xffffffffffffffff rbx = 0x0000000000000004 rsi = 0x00000000000056f6 rdi = 0x0000000000000625 rbp = 0x00007f6ff3421250 rsp = 0x00007f6ff3420ed8 r8 = 0x0000000000000000 r9 = 0x00007f6ff3420d50 r10 = 0x0000000000000008 r11 = 0x0000000000000202 r12 = 0x00000000076fd5c0 r13 = 0x000000000000005b r14 = 0x00000000076fd5c4 r15 = 0x00000000076f5be0 rip = 0x00007f710c02f1f7 Found by: given as instruction pointer in context 1 impalad!google::LogMessage::Flush() + 0x1eb rbp = 0x00007f6ff3421330 rsp = 0x00007f6ff3421260 rip = 0x000000000523a4cb Found by: previous frame's frame pointer 2 impalad!google::LogMessageFatal::~LogMessageFatal() + 0x9 rbx = 0x0000000000000001 rbp = 0x00007f6ff3421360 rsp = 0x00007f6ff3421300 r12 = 0x0000000007704af8 r13 = 0x0000000000000000 r14 = 0x0000000000000000 r15 = 0x00000000144743a0 rip = 0x000000000523e0c9 Found by: call frame info 3 impalad!impala::RuntimeProfileBase::total_time_counter() const [runtime-profile.h : 181 + 0xc] rbx = 0x0000000000000001 rbp = 0x00007f6ff3421360 rsp = 0x00007f6ff3421310 r12 = 0x0000000007704af8 r13 = 0x0000000000000000 r14 = 0x0000000000000000 r15 = 0x00000000144743a0 rip = 0x00000000022d6784 Found by: call frame info 4 impalad!impala::Coordinator::GetNext(impala::QueryResultSet*, int, bool*, long) [coordinator.cc : 870 + 0x16] rbx = 0x0000000000000000 rbp = 0x00007f6ff34215a0 rsp = 0x00007f6ff3421370 r12 = 0x0000000000000000 r13 = 0x0000000000000000 r14 = 0x0000000000000000 r15 = 0x00000000144743a0 rip = 0x0000000002d1411b Found by: call frame info 5 impalad!impala::ClientRequestState::FetchRowsInternal(int, impala::QueryResultSet*, long) [client-request-state.cc : 1090 + 0x31] rbx = 0x0000000000000000 rbp = 0x00007f6ff34219e0 rsp = 0x00007f6ff34215b0 r12 = 0x000000003d60c000 r13 = 0x0000000000000000 r14 = 0x0000000000000000 r15 = 0x00000000144743a0 rip = 0x000000000254fa10 Found by: call frame info 6 impalad!impala::ClientRequestState::FetchRows(int, impala::QueryResultSet*, long) [client-request-state.cc : 938 + 0x1e] rbx = 0x0000000000000000 rbp = 0x00007f6ff3421a50 rsp = 0x00007f6ff34219f0 r12 = 0x000000003d60c000 r13 = 0x0000000000000000 r14 = 0x0000000000000000 r15 = 0x00000000144743a0 rip = 0x000000000254dda0 Found by: call frame info 7 impalad!impala::ImpalaServer::FetchInternal(impala::TUniqueId, bool, int, beeswax::Results*) [impala-beeswax-server.cc : 614 + 0x37] rbx = 0x0000000000000000 rbp = 0x00007f6ff3421c20 rsp = 0x00007f6ff3421a60 r12 = 0x000000003d60c000 r13 = 0x0000000000000000 r14 = 0x0000000000000000 r15 = 0x00000000144743a0 rip = 0x0000000002571f8d Found by: call frame info 8 impalad!impala::ImpalaServer::fetch(beeswax::Results&, beeswax::QueryHandle const&, bool, int) [impala-beeswax-server.cc : 191 + 0x32] rbx = 0x0000000000000000 rbp = 0x00007f6ff3421ea0 rsp = 0x00007f6ff3421c30 r12 = 0x0000000000000000 r13 = 0x0000000000000000 r14 = 0x0000000000000000 r15 = 0x00000000144743a0 rip = 0x000000000256c04d Found by: call frame info 9 impalad!beeswax::BeeswaxServiceProcessor::process_fetch(int, apache::thrift::protocol::TProtocol*, apache::thrift::protocol::TProtocol*, void*) [BeeswaxService.cpp : 3398 + 0x51] rbx = 0x0000000002b44ca4 rbp = 0x00007f6ff3422270 rsp = 0x00007f6ff3421eb0 r12 = 0x0000000000000000 r13 = 0x00000000327105c0 r14 = 0x0000000000000001 r15 = 0x00000000144743a0 rip = 0x0000000002b7c129 Found by: call frame info 10 impalad!beeswax::BeeswaxServiceProcessor::dispatchCall(apache::thrift::protocol::TProtocol*, apache::thrift::protocol::TProtocol*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, int, void*) [BeeswaxService.cpp : 3200 + 0x1d] rbx = 0x0000000002b44ca4 rbp = 0x00007f6ff3422370 rsp = 0x00007f6ff3422280 r12 = 0x0000000000000000 r13 = 0x00000000327105c0 r14 = 0x0000000000000001 r15 = 0x00000000144743a0 rip = 0x0000000002b7a6e2 Found by: call frame info 11 impalad!impala::ImpalaServiceProcessor::dispatchCall(apache::thrift::protocol::TProtocol*, apache::thrift::protocol::TProtocol*, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, int, void*) [ImpalaService.cpp : 1824 + 0x25] rbx = 0x0000000002b44ca4 rbp = 0x00007f6ff34223c0 rsp = 0x00007f6ff3422380 r12 = 0x0000000000000000 r13 = 0x00000000327105c0 r14 = 0x0000000000000001 r15 = 0x00000000144743a0 rip = 0x0000000002b44d3b Found by: call frame info 12 impalad!apache::thrift::TDispatchProcessor::process(boost::shared_ptr<apache::thrift::protocol::TProtocol>, boost::shared_ptr<apache::thrift::protocol::TProtocol>, void*) [TDispatchProcessor.h : 121 + 0x4d] rbx = 0x0000000002b44ca4 rbp = 0x00007f6ff3422440 rsp = 0x00007f6ff34223d0 r12 = 0x0000000000000000 r13 = 0x00000000327105c0 r14 = 0x0000000000000001 r15 = 0x00000000144743a0 rip = 0x0000000001cd76c8 Found by: call frame info 13 impalad!apache::thrift::server::TAcceptQueueServer::Task::run() [TAcceptQueueServer.cpp : 84 + 0x99] rbx = 0x0000000000000000 rbp = 0x00007f6ff3422640 rsp = 0x00007f6ff3422450 r12 = 0x0000000000000000 r13 = 0x0000000000000001 r14 = 0x0000000000000001 r15 = 0x00000000144743a0 rip = 0x00000000021ef35d Found by: call frame info {code} > Release build sees SIGSEGV when updating the total time counter > --------------------------------------------------------------- > > Key: IMPALA-10276 > URL: https://issues.apache.org/jira/browse/IMPALA-10276 > Project: IMPALA > Issue Type: Bug > Components: Backend > Affects Versions: Impala 4.0 > Reporter: Joe McDonnell > Priority: Critical > Labels: broken-build, flaky > > A recent release build saw an Impalad crash with the following stack: > {noformat} > Crash reason: SIGSEGV > Crash address: 0x11 > #0 raise () from /lib64/libc.so.6 > #1 abort () from /lib64/libc.so.6 > #2 os::abort(bool) () from > /usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so > #3 VMError::report_and_die() () from > /usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so > #4 JVM_handle_linux_signal () from > /usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so > #5 signalHandler(int, siginfo*, void*) () from > /usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so > #6 <signal handler called> > #7 UpdateCounter (this=0x7f93adc88030) at > /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/repos/Impala/be/src/util/runtime-profile-counters.h:933 > #8 impala::ScopedTimer<impala::MonotonicStopWatch>::~ScopedTimer > (this=0x7f93adc88030, __in_chrg=<optimized out>) at > /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/repos/Impala/be/src/util/runtime-profile-counters.h:954 > #9 impala::Coordinator::GetNext (this=this@entry=0x13ad09e00, > results=results@entry=0xa7e18460, max_rows=max_rows@entry=-1, > eos=eos@entry=0x7f93adc880ef, > block_on_wait_time_us=block_on_wait_time_us@entry=0) at > /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/repos/Impala/be/src/runtime/coordinator.cc:864 > #10 impala::ClientRequestState::FetchRowsInternal > (this=this@entry=0x11568000, max_rows=max_rows@entry=-1, > fetched_rows=fetched_rows@entry=0xa7e18460, > block_on_wait_time_us=block_on_wait_time_us@entry=0) at > /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/repos/Impala/be/src/service/client-request-state.cc:1090 > #11 impala::ClientRequestState::FetchRows (this=0x11568000, > max_rows=max_rows@entry=-1, fetched_rows=fetched_rows@entry=0xa7e18460, > block_on_wait_time_us=0) at > /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/repos/Impala/be/src/service/client-request-state.cc:938 > #12 impala::ImpalaServer::FetchInternal (this=this@entry=0xdd35b00, > query_id=..., start_over=start_over@entry=false, > fetch_size=fetch_size@entry=-1, > query_results=query_results@entry=0x7f93adc88498) at > /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/repos/Impala/be/src/service/impala-beeswax-server.cc:614 > #13 impala::ImpalaServer::fetch (this=0xdd35b00, query_results=..., > beeswax_handle=..., start_over=<optimized out>, fetch_size=-1) at > /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/repos/Impala/be/src/service/impala-beeswax-server.cc:191 > #14 beeswax::BeeswaxServiceProcessor::process_fetch (this=0x102b0c60, > seqid=0, iprot=<optimized out>, oprot=0xd45d0980, callContext=<optimized > out>) at > /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/repos/Impala/be/generated-sources/gen-cpp/BeeswaxService.cpp:3398 > #15 beeswax::BeeswaxServiceProcessor::dispatchCall (this=0x102b0c60, > iprot=0xd45d1e00, oprot=0xd45d0980, fname=..., seqid=0, > callContext=0x12ea9a80) at > /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/repos/Impala/be/generated-sources/gen-cpp/BeeswaxService.cpp:3200 > #16 apache::thrift::TDispatchProcessor::process (this=0x102b0c60, in=..., > out=..., connectionContext=0x12ea9a80) at > /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/Impala-Toolchain/toolchain-packages-gcc7.5.0/thrift-0.9.3-p8/include/thrift/TDispatchProcessor.h:121 > #17 apache::thrift::server::TAcceptQueueServer::Task::run (this=0x11a92e80) > at > /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/repos/Impala/be/src/rpc/TAcceptQueueServer.cpp:84 > #18 operator() (a2=<optimized out>, a1=..., p=<optimized out>, > this=<optimized out>) at > /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/Impala-Toolchain/toolchain-packages-gcc7.5.0/boost-1.61.0-p2/include/boost/bind/mem_fn_template.hpp:280{noformat} > The code is updating the total time counter in the runtime profile via a > ScopedTimer. > This has been seen once -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-all-unsubscr...@impala.apache.org For additional commands, e-mail: issues-all-h...@impala.apache.org