[ 
https://issues.apache.org/jira/browse/ARROW-18351?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

ASF GitHub Bot updated ARROW-18351:
-----------------------------------
    Labels: pull-request-available  (was: )

> [C++][Flight] Crash in UcxErrorHandlingTest.TestDoExchange
> ----------------------------------------------------------
>
>                 Key: ARROW-18351
>                 URL: https://issues.apache.org/jira/browse/ARROW-18351
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: C++, FlightRPC
>            Reporter: Antoine Pitrou
>            Assignee: David Li
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 10m
>  Remaining Estimate: 0h
>
> I get a non-deterministic crash in the Flight UCX tests.
> {code}
> [----------] 3 tests from UcxErrorHandlingTest
> [ RUN      ] UcxErrorHandlingTest.TestGetFlightInfo
> [       OK ] UcxErrorHandlingTest.TestGetFlightInfo (24 ms)
> [ RUN      ] UcxErrorHandlingTest.TestDoPut
> [       OK ] UcxErrorHandlingTest.TestDoPut (15 ms)
> [ RUN      ] UcxErrorHandlingTest.TestDoExchange
> /arrow/cpp/src/arrow/util/future.cc:125:  Check failed: 
> !IsFutureFinished(state_) Future already marked finished
> {code}
> Here is the GDB backtrace:
> {code}
> #0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
> #1  0x00007f18c49cd7f1 in __GI_abort () at abort.c:79
> #2  0x00007f18c5854e00 in arrow::util::CerrLog::~CerrLog 
> (this=0x7f18a81607b0, __in_chrg=<optimized out>) at 
> /arrow/cpp/src/arrow/util/logging.cc:72
> #3  0x00007f18c5854e1c in arrow::util::CerrLog::~CerrLog 
> (this=0x7f18a81607b0, __in_chrg=<optimized out>) at 
> /arrow/cpp/src/arrow/util/logging.cc:74
> #4  0x00007f18c5855181 in arrow::util::ArrowLog::~ArrowLog 
> (this=0x7f18c07fc380, __in_chrg=<optimized out>) at 
> /arrow/cpp/src/arrow/util/logging.cc:250
> #5  0x00007f18c5826f86 in arrow::ConcreteFutureImpl::DoMarkFinishedOrFailed 
> (this=0x7f18a815f030, state=arrow::FutureState::FAILURE)
>     at /arrow/cpp/src/arrow/util/future.cc:125
> #6  0x00007f18c58265af in arrow::ConcreteFutureImpl::DoMarkFailed 
> (this=0x7f18a815f030) at /arrow/cpp/src/arrow/util/future.cc:40
> #7  0x00007f18c5827660 in arrow::FutureImpl::MarkFailed (this=0x7f18a815f030) 
> at /arrow/cpp/src/arrow/util/future.cc:195
> #8  0x00007f18c80ff8d8 in 
> arrow::Future<std::shared_ptr<arrow::flight::transport::ucx::Frame> 
> >::DoMarkFinished (this=0x7f18a815efb0, res=...)
>     at /arrow/cpp/src/arrow/util/future.h:660
> #9  0x00007f18c80fb37d in 
> arrow::Future<std::shared_ptr<arrow::flight::transport::ucx::Frame> 
> >::MarkFinished (this=0x7f18a815efb0, res=...)
>     at /arrow/cpp/src/arrow/util/future.h:403
> #10 0x00007f18c80f5ae3 in 
> arrow::flight::transport::ucx::UcpCallDriver::Impl::Push 
> (this=0x7f18a804d2d0, status=...)
>     at /arrow/cpp/src/arrow/flight/transport/ucx/ucx_internal.cc:780
> #11 0x00007f18c80f5c1f in 
> arrow::flight::transport::ucx::UcpCallDriver::Impl::RecvActiveMessage 
> (this=0x7f18a804d2d0, header=0x7f18c8081865, header_length=12, 
>     data=0x7f18c8081864, data_length=1, param=0x7f18c07fc680) at 
> /arrow/cpp/src/arrow/flight/transport/ucx/ucx_internal.cc:791
> #12 0x00007f18c80f7d29 in 
> arrow::flight::transport::ucx::UcpCallDriver::RecvActiveMessage 
> (this=0x7f18b80017e0, header=0x7f18c8081865, header_length=12, 
>     data=0x7f18c8081864, data_length=1, param=0x7f18c07fc680) at 
> /arrow/cpp/src/arrow/flight/transport/ucx/ucx_internal.cc:1082
> #13 0x00007f18c80e3ea4 in arrow::flight::transport::ucx::(anonymous 
> namespace)::UcxServerImpl::HandleIncomingActiveMessage (self=0x7f18a80259a0, 
>     header=0x7f18c8081865, header_length=12, data=0x7f18c8081864, 
> data_length=1, param=0x7f18c07fc680)
>     at /arrow/cpp/src/arrow/flight/transport/ucx/ucx_server.cc:586
> #14 0x00007f18c4661a09 in ucp_am_invoke_cb (recv_flags=<optimized out>, 
> reply_ep=<optimized out>, data_length=1, data=<optimized out>, 
>     user_hdr_length=<optimized out>, user_hdr=0x7f18c8081865, am_id=4132, 
> worker=<optimized out>) at core/ucp_am.c:1220
> #15 ucp_am_handler_common (name=<synthetic pointer>, recv_flags=<optimized 
> out>, am_flags=0, reply_ep=<optimized out>, total_length=<optimized out>, 
>     am_hdr=0x7f18c808185c, worker=<optimized out>) at core/ucp_am.c:1289
> #16 ucp_am_handler_reply (am_arg=<optimized out>, am_data=<optimized out>, 
> am_length=<optimized out>, am_flags=<optimized out>) at core/ucp_am.c:1327
> #17 0x00007f18c28e3f1c in uct_iface_invoke_am (flags=0, length=29, 
> data=0x7f18c808185c, id=<optimized out>, iface=0x7f18a8027e20)
>     at /usr/local/src/conda/ucx-1.13.1/src/uct/base/uct_iface.h:861
> #18 uct_mm_iface_invoke_am (flags=0, length=29, data=0x7f18c808185c, 
> am_id=<optimized out>, iface=0x7f18a8027e20) at sm/mm/base/mm_iface.h:256
> #19 uct_mm_iface_process_recv (iface=0x7f18a8027e20) at 
> sm/mm/base/mm_iface.c:256
> #20 uct_mm_iface_poll_fifo (iface=0x7f18a8027e20) at sm/mm/base/mm_iface.c:304
> #21 uct_mm_iface_progress (tl_iface=0x7f18a8027e20) at 
> sm/mm/base/mm_iface.c:357
> #22 0x00007f18c4686e22 in ucs_callbackq_dispatch (cbq=<optimized out>) at 
> /usr/local/src/conda/ucx-1.13.1/src/ucs/datastruct/callbackq.h:211
> #23 uct_worker_progress (worker=<optimized out>) at 
> /usr/local/src/conda/ucx-1.13.1/src/uct/api/uct.h:2638
> #24 ucp_worker_progress (worker=0x7f18a80008d0) at core/ucp_worker.c:2782
> #25 0x00007f18c80f586f in 
> arrow::flight::transport::ucx::UcpCallDriver::Impl::MakeProgress 
> (this=0x7f18a804d2d0)
>     at /arrow/cpp/src/arrow/flight/transport/ucx/ucx_internal.cc:759
> #26 0x00007f18c80f2e40 in 
> arrow::flight::transport::ucx::UcpCallDriver::Impl::ReadNextFrame 
> (this=0x7f18a804d2d0)
>     at /arrow/cpp/src/arrow/flight/transport/ucx/ucx_internal.cc:449
> #27 0x00007f18c80f7661 in 
> arrow::flight::transport::ucx::UcpCallDriver::ReadNextFrame 
> (this=0x7f18b80017e0)
>     at /arrow/cpp/src/arrow/flight/transport/ucx/ucx_internal.cc:1037
> #28 0x00007f18c80dc7cd in arrow::flight::transport::ucx::(anonymous 
> namespace)::PutServerStream::ReadImpl (this=0x7f18c07fcb60, 
> data=0x7f18c07fcaf0)
>     at /arrow/cpp/src/arrow/flight/transport/ucx/ucx_server.cc:140
> #29 0x00007f18c80dc525 in arrow::flight::transport::ucx::(anonymous 
> namespace)::PutServerStream::ReadData (this=0x7f18c07fcb60, 
> data=0x7f18c07fcaf0)
>     at /arrow/cpp/src/arrow/flight/transport/ucx/ucx_server.cc:120
> #30 0x00007f18c80e18c9 in arrow::flight::transport::ucx::(anonymous 
> namespace)::UcxServerImpl::HandleDoExchange (this=0x7f18b405c430, 
> driver=0x7f18b80017e0)
>     at /arrow/cpp/src/arrow/flight/transport/ucx/ucx_server.cc:414
> #31 0x00007f18c80e2008 in arrow::flight::transport::ucx::(anonymous 
> namespace)::UcxServerImpl::HandleOneCall (this=0x7f18b405c430, 
> driver=0x7f18b80017e0, 
>     frame=0x7f18a804df80) at 
> /arrow/cpp/src/arrow/flight/transport/ucx/ucx_server.cc:426
> #32 0x00007f18c80e2d21 in arrow::flight::transport::ucx::(anonymous 
> namespace)::UcxServerImpl::WorkerLoop (this=0x7f18b405c430, 
> request=0x7f18b80016d0)
>     at /arrow/cpp/src/arrow/flight/transport/ucx/ucx_server.cc:493
> #33 0x00007f18c80e335e in operator() (__closure=0x7f18b4050c30) at 
> /arrow/cpp/src/arrow/flight/transport/ucx/ucx_server.cc:520
> #34 0x00007f18c80ecb1a in 
> arrow::detail::ContinueFuture::operator()<arrow::flight::transport::ucx::(anonymous
>  
> namespace)::UcxServerImpl::DriveConnections()::<lambda()>&>(arrow::Future<arrow::internal::Empty>,
>  struct {...} &) const (this=0x7f18b4050c28, next=..., f=...) at 
> /arrow/cpp/src/arrow/util/future.h:133
> #35 0x00007f18c80ec9e1 in std::__invoke_impl<void, 
> arrow::detail::ContinueFuture&, arrow::Future<arrow::internal::Empty>&, 
> arrow::flight::transport::ucx::(anonymous 
> namespace)::UcxServerImpl::DriveConnections()::<lambda()>&>(std::__invoke_other,
>  arrow::detail::ContinueFuture &) (__f=...)
>     at 
> /opt/conda/envs/arrow/x86_64-conda-linux-gnu/include/c++/10.4.0/bits/invoke.h:60
> #36 0x00007f18c80ec883 in std::__invoke<arrow::detail::ContinueFuture&, 
> arrow::Future<arrow::internal::Empty>&, 
> arrow::flight::transport::ucx::(anonymous 
> namespace)::UcxServerImpl::DriveConnections()::<lambda()>&>(arrow::detail::ContinueFuture
>  &) (__fn=...)
>     at 
> /opt/conda/envs/arrow/x86_64-conda-linux-gnu/include/c++/10.4.0/bits/invoke.h:95
> #37 0x00007f18c80ec71c in 
> std::_Bind<arrow::detail::ContinueFuture(arrow::Future<arrow::internal::Empty>,
>  arrow::flight::transport::ucx::(anonymous 
> namespace)::UcxServerImpl::DriveConnections()::<lambda()>)>::__call<void, 0, 
> 1>(std::tuple<> &&, std::_Index_tuple<0, 1>) (this=0x7f18b4050c28, __args=...)
>     at 
> /opt/conda/envs/arrow/x86_64-conda-linux-gnu/include/c++/10.4.0/functional:416
> #38 0x00007f18c80ec5b9 in 
> std::_Bind<arrow::detail::ContinueFuture(arrow::Future<arrow::internal::Empty>,
>  arrow::flight::transport::ucx::(anonymous namespace)::---Type <return> to 
> continue, or q <return> to quit---
> UcxServerImpl::DriveConnections()::<lambda()>)>::operator()<>(void) 
> (this=0x7f18b4050c28)
>     at 
> /opt/conda/envs/arrow/x86_64-conda-linux-gnu/include/c++/10.4.0/functional:499
> #39 0x00007f18c80ec440 in 
> arrow::internal::FnOnce<void()>::FnImpl<std::_Bind<arrow::detail::ContinueFuture(arrow::Future<arrow::internal::Empty>,
>  arrow::flight::transport::ucx::(anonymous 
> namespace)::UcxServerImpl::DriveConnections()::<lambda()>)> >::invoke(void) 
> (this=0x7f18b4050c20)
>     at /arrow/cpp/src/arrow/util/functional.h:152
> #40 0x00007f18c587331d in arrow::internal::FnOnce<void ()>::operator()() && 
> (this=0x7f18c07fd110) at /arrow/cpp/src/arrow/util/functional.h:140
> #41 0x00007f18c587130f in arrow::internal::WorkerLoop (state=..., it=...) at 
> /arrow/cpp/src/arrow/util/thread_pool.cc:262
> #42 0x00007f18c587227c in operator() (__closure=0x7f18b40517d8) at 
> /arrow/cpp/src/arrow/util/thread_pool.cc:423
> #43 0x00007f18c587b8b6 in std::__invoke_impl<void, 
> arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::<lambda()> 
> >(std::__invoke_other, struct {...} &&)
>     (__f=...) at 
> /opt/conda/envs/arrow/x86_64-conda-linux-gnu/include/c++/10.4.0/bits/invoke.h:60
> #44 0x00007f18c587b86b in 
> std::__invoke<arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::<lambda()>
>  >(struct {...} &&) (__fn=...)
>     at 
> /opt/conda/envs/arrow/x86_64-conda-linux-gnu/include/c++/10.4.0/bits/invoke.h:95
> #45 0x00007f18c587b818 in 
> std::thread::_Invoker<std::tuple<arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::<lambda()>
>  > >::_M_invoke<0>(std::_Index_tuple<0>) (this=0x7f18b40517d8) at 
> /opt/conda/envs/arrow/x86_64-conda-linux-gnu/include/c++/10.4.0/thread:264
> #46 0x00007f18c587b65c in 
> std::thread::_Invoker<std::tuple<arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::<lambda()>
>  > >::operator()(void) (
>     this=0x7f18b40517d8) at 
> /opt/conda/envs/arrow/x86_64-conda-linux-gnu/include/c++/10.4.0/thread:271
> #47 0x00007f18c587b580 in 
> std::thread::_State_impl<std::thread::_Invoker<std::tuple<arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::<lambda()>
>  > > >::_M_run(void) (this=0x7f18b40517d0) at 
> /opt/conda/envs/arrow/x86_64-conda-linux-gnu/include/c++/10.4.0/thread:215
> #48 0x00007f18c4e4ba93 in std::execute_native_thread_routine (__p=<optimized 
> out>)
>     at 
> /home/conda/feedstock_root/build_artifacts/gcc_compilers_1666516830325/work/build/x86_64-conda-linux-gnu/libstdc++-v3/include/new_allocator.h:82
> #49 0x00007f18c47756db in start_thread (arg=0x7f18c07fe700) at 
> pthread_create.c:463
> #50 0x00007f18c4aae61f in clone () at 
> ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to