[ 
https://issues.apache.org/jira/browse/IMPALA-10276?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17223682#comment-17223682
 ] 

Qifan Chen edited comment on IMPALA-10276 at 10/30/20, 2:44 PM:
----------------------------------------------------------------

The following stack is seen with another impala-asf-master-core build.


{code:java}
Thread 428 (crashed)
 0  libc-2.17.so + 0x351f7
    rax = 0x0000000000000000   rdx = 0x0000000000000006
    rcx = 0xffffffffffffffff   rbx = 0x0000000000000004
    rsi = 0x00000000000056f6   rdi = 0x0000000000000625
    rbp = 0x00007f6ff3421250   rsp = 0x00007f6ff3420ed8
     r8 = 0x0000000000000000    r9 = 0x00007f6ff3420d50
    r10 = 0x0000000000000008   r11 = 0x0000000000000202
    r12 = 0x00000000076fd5c0   r13 = 0x000000000000005b
    r14 = 0x00000000076fd5c4   r15 = 0x00000000076f5be0
    rip = 0x00007f710c02f1f7
    Found by: given as instruction pointer in context
 1  impalad!google::LogMessage::Flush() + 0x1eb
    rbp = 0x00007f6ff3421330   rsp = 0x00007f6ff3421260
    rip = 0x000000000523a4cb
    Found by: previous frame's frame pointer
 2  impalad!google::LogMessageFatal::~LogMessageFatal() + 0x9
    rbx = 0x0000000000000001   rbp = 0x00007f6ff3421360
    rsp = 0x00007f6ff3421300   r12 = 0x0000000007704af8
    r13 = 0x0000000000000000   r14 = 0x0000000000000000
    r15 = 0x00000000144743a0   rip = 0x000000000523e0c9
    Found by: call frame info
 3  impalad!impala::RuntimeProfileBase::total_time_counter() const 
[runtime-profile.h : 181 + 0xc]
    rbx = 0x0000000000000001   rbp = 0x00007f6ff3421360
    rsp = 0x00007f6ff3421310   r12 = 0x0000000007704af8
    r13 = 0x0000000000000000   r14 = 0x0000000000000000
    r15 = 0x00000000144743a0   rip = 0x00000000022d6784
    Found by: call frame info
 4  impalad!impala::Coordinator::GetNext(impala::QueryResultSet*, int, bool*, 
long) [coordinator.cc : 870 + 0x16]
    rbx = 0x0000000000000000   rbp = 0x00007f6ff34215a0
    rsp = 0x00007f6ff3421370   r12 = 0x0000000000000000
    r13 = 0x0000000000000000   r14 = 0x0000000000000000
    r15 = 0x00000000144743a0   rip = 0x0000000002d1411b
    Found by: call frame info
 5  impalad!impala::ClientRequestState::FetchRowsInternal(int, 
impala::QueryResultSet*, long) [client-request-state.cc : 1090 + 0x31]
    rbx = 0x0000000000000000   rbp = 0x00007f6ff34219e0
    rsp = 0x00007f6ff34215b0   r12 = 0x000000003d60c000
    r13 = 0x0000000000000000   r14 = 0x0000000000000000
    r15 = 0x00000000144743a0   rip = 0x000000000254fa10
    Found by: call frame info
 6  impalad!impala::ClientRequestState::FetchRows(int, impala::QueryResultSet*, 
long) [client-request-state.cc : 938 + 0x1e]
    rbx = 0x0000000000000000   rbp = 0x00007f6ff3421a50
    rsp = 0x00007f6ff34219f0   r12 = 0x000000003d60c000
    r13 = 0x0000000000000000   r14 = 0x0000000000000000
    r15 = 0x00000000144743a0   rip = 0x000000000254dda0
    Found by: call frame info
 7  impalad!impala::ImpalaServer::FetchInternal(impala::TUniqueId, bool, int, 
beeswax::Results*) [impala-beeswax-server.cc : 614 + 0x37]
    rbx = 0x0000000000000000   rbp = 0x00007f6ff3421c20
    rsp = 0x00007f6ff3421a60   r12 = 0x000000003d60c000
    r13 = 0x0000000000000000   r14 = 0x0000000000000000
    r15 = 0x00000000144743a0   rip = 0x0000000002571f8d
    Found by: call frame info
 8  impalad!impala::ImpalaServer::fetch(beeswax::Results&, beeswax::QueryHandle 
const&, bool, int) [impala-beeswax-server.cc : 191 + 0x32]
    rbx = 0x0000000000000000   rbp = 0x00007f6ff3421ea0
    rsp = 0x00007f6ff3421c30   r12 = 0x0000000000000000
    r13 = 0x0000000000000000   r14 = 0x0000000000000000
    r15 = 0x00000000144743a0   rip = 0x000000000256c04d
    Found by: call frame info
 9  impalad!beeswax::BeeswaxServiceProcessor::process_fetch(int, 
apache::thrift::protocol::TProtocol*, apache::thrift::protocol::TProtocol*, 
void*) [BeeswaxService.cpp : 3398 + 0x51]
    rbx = 0x0000000002b44ca4   rbp = 0x00007f6ff3422270
    rsp = 0x00007f6ff3421eb0   r12 = 0x0000000000000000
    r13 = 0x00000000327105c0   r14 = 0x0000000000000001
    r15 = 0x00000000144743a0   rip = 0x0000000002b7c129
    Found by: call frame info
10  
impalad!beeswax::BeeswaxServiceProcessor::dispatchCall(apache::thrift::protocol::TProtocol*,
 apache::thrift::protocol::TProtocol*, std::__cxx11::basic_string<char, 
std::char_traits<char>, std::allocator<char> > const&, int, void*) 
[BeeswaxService.cpp : 3200 + 0x1d]
    rbx = 0x0000000002b44ca4   rbp = 0x00007f6ff3422370
    rsp = 0x00007f6ff3422280   r12 = 0x0000000000000000
    r13 = 0x00000000327105c0   r14 = 0x0000000000000001
    r15 = 0x00000000144743a0   rip = 0x0000000002b7a6e2
    Found by: call frame info
11  
impalad!impala::ImpalaServiceProcessor::dispatchCall(apache::thrift::protocol::TProtocol*,
 apache::thrift::protocol::TProtocol*, std::__cxx11::basic_string<char, 
std::char_traits<char>, std::allocator<char> > const&, int, void*) 
[ImpalaService.cpp : 1824 + 0x25]
    rbx = 0x0000000002b44ca4   rbp = 0x00007f6ff34223c0
    rsp = 0x00007f6ff3422380   r12 = 0x0000000000000000
    r13 = 0x00000000327105c0   r14 = 0x0000000000000001
    r15 = 0x00000000144743a0   rip = 0x0000000002b44d3b
    Found by: call frame info
12  
impalad!apache::thrift::TDispatchProcessor::process(boost::shared_ptr<apache::thrift::protocol::TProtocol>,
 boost::shared_ptr<apache::thrift::protocol::TProtocol>, void*) 
[TDispatchProcessor.h : 121 + 0x4d]
    rbx = 0x0000000002b44ca4   rbp = 0x00007f6ff3422440
    rsp = 0x00007f6ff34223d0   r12 = 0x0000000000000000
    r13 = 0x00000000327105c0   r14 = 0x0000000000000001
    r15 = 0x00000000144743a0   rip = 0x0000000001cd76c8
    Found by: call frame info
13  impalad!apache::thrift::server::TAcceptQueueServer::Task::run() 
[TAcceptQueueServer.cpp : 84 + 0x99]
    rbx = 0x0000000000000000   rbp = 0x00007f6ff3422640
    rsp = 0x00007f6ff3422450   r12 = 0x0000000000000000
    r13 = 0x0000000000000001   r14 = 0x0000000000000001
    r15 = 0x00000000144743a0   rip = 0x00000000021ef35d
    Found by: call frame info
{code}

The failure point is the DCHECK() at line 181. 

{code:java}
178   /// Returns the counter for the total elapsed time.                       
               
179   Counter* total_time_counter() const {                                     
               
180     auto it = counter_map_.find(TOTAL_TIME_COUNTER_NAME);                   
               
181     DCHECK(it != counter_map_.end());                                       
               
182     return it->second;                                                      
               
183   }
{code}




was (Author: sql_forever):
The following stack is seen with another impala-asf-master-core build.


{code:java}
Thread 428 (crashed)
 0  libc-2.17.so + 0x351f7
    rax = 0x0000000000000000   rdx = 0x0000000000000006
    rcx = 0xffffffffffffffff   rbx = 0x0000000000000004
    rsi = 0x00000000000056f6   rdi = 0x0000000000000625
    rbp = 0x00007f6ff3421250   rsp = 0x00007f6ff3420ed8
     r8 = 0x0000000000000000    r9 = 0x00007f6ff3420d50
    r10 = 0x0000000000000008   r11 = 0x0000000000000202
    r12 = 0x00000000076fd5c0   r13 = 0x000000000000005b
    r14 = 0x00000000076fd5c4   r15 = 0x00000000076f5be0
    rip = 0x00007f710c02f1f7
    Found by: given as instruction pointer in context
 1  impalad!google::LogMessage::Flush() + 0x1eb
    rbp = 0x00007f6ff3421330   rsp = 0x00007f6ff3421260
    rip = 0x000000000523a4cb
    Found by: previous frame's frame pointer
 2  impalad!google::LogMessageFatal::~LogMessageFatal() + 0x9
    rbx = 0x0000000000000001   rbp = 0x00007f6ff3421360
    rsp = 0x00007f6ff3421300   r12 = 0x0000000007704af8
    r13 = 0x0000000000000000   r14 = 0x0000000000000000
    r15 = 0x00000000144743a0   rip = 0x000000000523e0c9
    Found by: call frame info
 3  impalad!impala::RuntimeProfileBase::total_time_counter() const 
[runtime-profile.h : 181 + 0xc]
    rbx = 0x0000000000000001   rbp = 0x00007f6ff3421360
    rsp = 0x00007f6ff3421310   r12 = 0x0000000007704af8
    r13 = 0x0000000000000000   r14 = 0x0000000000000000
    r15 = 0x00000000144743a0   rip = 0x00000000022d6784
    Found by: call frame info
 4  impalad!impala::Coordinator::GetNext(impala::QueryResultSet*, int, bool*, 
long) [coordinator.cc : 870 + 0x16]
    rbx = 0x0000000000000000   rbp = 0x00007f6ff34215a0
    rsp = 0x00007f6ff3421370   r12 = 0x0000000000000000
    r13 = 0x0000000000000000   r14 = 0x0000000000000000
    r15 = 0x00000000144743a0   rip = 0x0000000002d1411b
    Found by: call frame info
 5  impalad!impala::ClientRequestState::FetchRowsInternal(int, 
impala::QueryResultSet*, long) [client-request-state.cc : 1090 + 0x31]
    rbx = 0x0000000000000000   rbp = 0x00007f6ff34219e0
    rsp = 0x00007f6ff34215b0   r12 = 0x000000003d60c000
    r13 = 0x0000000000000000   r14 = 0x0000000000000000
    r15 = 0x00000000144743a0   rip = 0x000000000254fa10
    Found by: call frame info
 6  impalad!impala::ClientRequestState::FetchRows(int, impala::QueryResultSet*, 
long) [client-request-state.cc : 938 + 0x1e]
    rbx = 0x0000000000000000   rbp = 0x00007f6ff3421a50
    rsp = 0x00007f6ff34219f0   r12 = 0x000000003d60c000
    r13 = 0x0000000000000000   r14 = 0x0000000000000000
    r15 = 0x00000000144743a0   rip = 0x000000000254dda0
    Found by: call frame info
 7  impalad!impala::ImpalaServer::FetchInternal(impala::TUniqueId, bool, int, 
beeswax::Results*) [impala-beeswax-server.cc : 614 + 0x37]
    rbx = 0x0000000000000000   rbp = 0x00007f6ff3421c20
    rsp = 0x00007f6ff3421a60   r12 = 0x000000003d60c000
    r13 = 0x0000000000000000   r14 = 0x0000000000000000
    r15 = 0x00000000144743a0   rip = 0x0000000002571f8d
    Found by: call frame info
 8  impalad!impala::ImpalaServer::fetch(beeswax::Results&, beeswax::QueryHandle 
const&, bool, int) [impala-beeswax-server.cc : 191 + 0x32]
    rbx = 0x0000000000000000   rbp = 0x00007f6ff3421ea0
    rsp = 0x00007f6ff3421c30   r12 = 0x0000000000000000
    r13 = 0x0000000000000000   r14 = 0x0000000000000000
    r15 = 0x00000000144743a0   rip = 0x000000000256c04d
    Found by: call frame info
 9  impalad!beeswax::BeeswaxServiceProcessor::process_fetch(int, 
apache::thrift::protocol::TProtocol*, apache::thrift::protocol::TProtocol*, 
void*) [BeeswaxService.cpp : 3398 + 0x51]
    rbx = 0x0000000002b44ca4   rbp = 0x00007f6ff3422270
    rsp = 0x00007f6ff3421eb0   r12 = 0x0000000000000000
    r13 = 0x00000000327105c0   r14 = 0x0000000000000001
    r15 = 0x00000000144743a0   rip = 0x0000000002b7c129
    Found by: call frame info
10  
impalad!beeswax::BeeswaxServiceProcessor::dispatchCall(apache::thrift::protocol::TProtocol*,
 apache::thrift::protocol::TProtocol*, std::__cxx11::basic_string<char, 
std::char_traits<char>, std::allocator<char> > const&, int, void*) 
[BeeswaxService.cpp : 3200 + 0x1d]
    rbx = 0x0000000002b44ca4   rbp = 0x00007f6ff3422370
    rsp = 0x00007f6ff3422280   r12 = 0x0000000000000000
    r13 = 0x00000000327105c0   r14 = 0x0000000000000001
    r15 = 0x00000000144743a0   rip = 0x0000000002b7a6e2
    Found by: call frame info
11  
impalad!impala::ImpalaServiceProcessor::dispatchCall(apache::thrift::protocol::TProtocol*,
 apache::thrift::protocol::TProtocol*, std::__cxx11::basic_string<char, 
std::char_traits<char>, std::allocator<char> > const&, int, void*) 
[ImpalaService.cpp : 1824 + 0x25]
    rbx = 0x0000000002b44ca4   rbp = 0x00007f6ff34223c0
    rsp = 0x00007f6ff3422380   r12 = 0x0000000000000000
    r13 = 0x00000000327105c0   r14 = 0x0000000000000001
    r15 = 0x00000000144743a0   rip = 0x0000000002b44d3b
    Found by: call frame info
12  
impalad!apache::thrift::TDispatchProcessor::process(boost::shared_ptr<apache::thrift::protocol::TProtocol>,
 boost::shared_ptr<apache::thrift::protocol::TProtocol>, void*) 
[TDispatchProcessor.h : 121 + 0x4d]
    rbx = 0x0000000002b44ca4   rbp = 0x00007f6ff3422440
    rsp = 0x00007f6ff34223d0   r12 = 0x0000000000000000
    r13 = 0x00000000327105c0   r14 = 0x0000000000000001
    r15 = 0x00000000144743a0   rip = 0x0000000001cd76c8
    Found by: call frame info
13  impalad!apache::thrift::server::TAcceptQueueServer::Task::run() 
[TAcceptQueueServer.cpp : 84 + 0x99]
    rbx = 0x0000000000000000   rbp = 0x00007f6ff3422640
    rsp = 0x00007f6ff3422450   r12 = 0x0000000000000000
    r13 = 0x0000000000000001   r14 = 0x0000000000000001
    r15 = 0x00000000144743a0   rip = 0x00000000021ef35d
    Found by: call frame info
{code}


> Release build sees SIGSEGV when updating the total time counter
> ---------------------------------------------------------------
>
>                 Key: IMPALA-10276
>                 URL: https://issues.apache.org/jira/browse/IMPALA-10276
>             Project: IMPALA
>          Issue Type: Bug
>          Components: Backend
>    Affects Versions: Impala 4.0
>            Reporter: Joe McDonnell
>            Priority: Critical
>              Labels: broken-build, flaky
>
> A recent release build saw an Impalad crash with the following stack:
> {noformat}
> Crash reason:  SIGSEGV
> Crash address: 0x11
> #0 raise () from /lib64/libc.so.6
> #1 abort () from /lib64/libc.so.6
> #2 os::abort(bool) () from 
> /usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so
> #3 VMError::report_and_die() () from 
> /usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so
> #4 JVM_handle_linux_signal () from 
> /usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so
> #5 signalHandler(int, siginfo*, void*) () from 
> /usr/java/jdk1.8.0_144/jre/lib/amd64/server/libjvm.so
> #6  <signal handler called>
> #7 UpdateCounter (this=0x7f93adc88030) at 
> /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/repos/Impala/be/src/util/runtime-profile-counters.h:933
> #8 impala::ScopedTimer<impala::MonotonicStopWatch>::~ScopedTimer 
> (this=0x7f93adc88030, __in_chrg=<optimized out>) at 
> /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/repos/Impala/be/src/util/runtime-profile-counters.h:954
> #9 impala::Coordinator::GetNext (this=this@entry=0x13ad09e00, 
> results=results@entry=0xa7e18460, max_rows=max_rows@entry=-1, 
> eos=eos@entry=0x7f93adc880ef, 
> block_on_wait_time_us=block_on_wait_time_us@entry=0) at 
> /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/repos/Impala/be/src/runtime/coordinator.cc:864
> #10 impala::ClientRequestState::FetchRowsInternal 
> (this=this@entry=0x11568000, max_rows=max_rows@entry=-1, 
> fetched_rows=fetched_rows@entry=0xa7e18460, 
> block_on_wait_time_us=block_on_wait_time_us@entry=0) at 
> /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/repos/Impala/be/src/service/client-request-state.cc:1090
> #11 impala::ClientRequestState::FetchRows (this=0x11568000, 
> max_rows=max_rows@entry=-1, fetched_rows=fetched_rows@entry=0xa7e18460, 
> block_on_wait_time_us=0) at 
> /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/repos/Impala/be/src/service/client-request-state.cc:938
> #12 impala::ImpalaServer::FetchInternal (this=this@entry=0xdd35b00, 
> query_id=..., start_over=start_over@entry=false, 
> fetch_size=fetch_size@entry=-1, 
> query_results=query_results@entry=0x7f93adc88498) at 
> /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/repos/Impala/be/src/service/impala-beeswax-server.cc:614
> #13 impala::ImpalaServer::fetch (this=0xdd35b00, query_results=..., 
> beeswax_handle=..., start_over=<optimized out>, fetch_size=-1) at 
> /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/repos/Impala/be/src/service/impala-beeswax-server.cc:191
> #14 beeswax::BeeswaxServiceProcessor::process_fetch (this=0x102b0c60, 
> seqid=0, iprot=<optimized out>, oprot=0xd45d0980, callContext=<optimized 
> out>) at 
> /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/repos/Impala/be/generated-sources/gen-cpp/BeeswaxService.cpp:3398
> #15 beeswax::BeeswaxServiceProcessor::dispatchCall (this=0x102b0c60, 
> iprot=0xd45d1e00, oprot=0xd45d0980, fname=..., seqid=0, 
> callContext=0x12ea9a80) at 
> /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/repos/Impala/be/generated-sources/gen-cpp/BeeswaxService.cpp:3200
> #16 apache::thrift::TDispatchProcessor::process (this=0x102b0c60, in=..., 
> out=..., connectionContext=0x12ea9a80) at 
> /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/Impala-Toolchain/toolchain-packages-gcc7.5.0/thrift-0.9.3-p8/include/thrift/TDispatchProcessor.h:121
> #17 apache::thrift::server::TAcceptQueueServer::Task::run (this=0x11a92e80) 
> at 
> /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/repos/Impala/be/src/rpc/TAcceptQueueServer.cpp:84
> #18 operator() (a2=<optimized out>, a1=..., p=<optimized out>, 
> this=<optimized out>) at 
> /data/jenkins/workspace/impala-cdpd-master-staging-exhaustive-release/Impala-Toolchain/toolchain-packages-gcc7.5.0/boost-1.61.0-p2/include/boost/bind/mem_fn_template.hpp:280{noformat}
> The code is updating the total time counter in the runtime profile via a 
> ScopedTimer. 
> This has been seen once



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-all-unsubscr...@impala.apache.org
For additional commands, e-mail: issues-all-h...@impala.apache.org

Reply via email to