Amir Aliev created ARROW-17474:
----------------------------------

             Summary: Pandas read_parquet failr on pyarrow level
                 Key: ARROW-17474
                 URL: https://issues.apache.org/jira/browse/ARROW-17474
             Project: Apache Arrow
          Issue Type: Bug
          Components: Parquet
         Environment: Fedora 33
            Reporter: Amir Aliev


When reading any of the set of parquet files in pandas with pyarrow parquet 
engine I am getting "terminate called without an active exception | Aborted 
(core dumped)" error with a 50% chance. I have run this through gdb for the 
following backtrace:

 

#0  0x00007ffff7ad69d5 in raise () from /lib64/libc.so.6
#1  0x00007ffff7abf8a4 in abort () from /lib64/libc.so.6
#2  0x00007fffe6c68926 in __gnu_cxx::__verbose_terminate_handler() [clone 
.cold] () from /lib64/libstdc++.so.6
#3  0x00007fffe6c741ac in __cxxabiv1::__terminate(void (*)()) () from 
/lib64/libstdc++.so.6
#4  0x00007fffe6c74217 in std::terminate() () from /lib64/libstdc++.so.6
#5  0x00007fffe6c73bcc in __gxx_personality_v0 () from /lib64/libstdc++.so.6
#6  0x00007ffff497dd94 in _Unwind_ForcedUnwind_Phase2 () from 
/lib64/libgcc_s.so.1
#7  0x00007ffff497e482 in _Unwind_ForcedUnwind () from /lib64/libgcc_s.so.1
#8  0x00007ffff7a89c26 in __pthread_unwind () from /lib64/libpthread.so.0
#9  0x00007ffff7a816a2 in pthread_exit () from /lib64/libpthread.so.0
#10 0x00007ffff7b1f34a in pthread_exit () from /lib64/libc.so.6
#11 0x00007ffff7e3657b in PyThread_exit_thread () from 
/lib64/libpython3.9.so.1.0
#12 0x00007ffff7cc5a8d in take_gil.cold () from /lib64/libpython3.9.so.1.0
#13 0x00007ffff7d8f656 in PyEval_RestoreThread () from 
/lib64/libpython3.9.so.1.0
#14 0x00007ffff7e68480 in PyGILState_Ensure () from /lib64/libpython3.9.so.1.0
#15 0x00007fffe6f000e3 in std::_Sp_counted_ptr<arrow::py::PyBuffer*, 
(__gnu_cxx::_Lock_policy)2>::_M_dispose() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_python.so.900
#16 0x00007fffe7519c4a in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#17 0x00007fffe751badd in arrow::Future<std::shared_ptr<arrow::Buffer> 
>::SetResult(arrow::Result<std::shared_ptr<arrow::Buffer> 
>)::\{lambda(void*)#1}::_FUN(void*) () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#18 0x00007fffe8085929 in arrow::ConcreteFutureImpl::~ConcreteFutureImpl() () 
from /home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#19 0x00007fffe811da3a in arrow::io::internal::ReadRangeCache::Impl::~Impl() () 
from /home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#20 0x00007fffe6643eda in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libparquet.so.900
#21 0x00007fffe66ec05e in parquet::ParquetFileReader::~ParquetFileReader() () 
from /home/inc/.local/lib/python3.9/site-packages/pyarrow/libparquet.so.900
#22 0x00007fffe676b98a in parquet::arrow::(anonymous 
namespace)::FileReaderImpl::~FileReaderImpl() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libparquet.so.900
#23 0x00007fffe6643eda in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libparquet.so.900
#24 0x00007fffe67c950a in 
std::_Function_handler<arrow::Future<std::function<arrow::Future<std::shared_ptr<arrow::RecordBatch>
 > ()> > (), parquet::arrow::RowGroupGenerator>::_M_manager(std::_Any_data&, 
std::_Any_data const&, std::_Manager_operation) ()
   from /home/inc/.local/lib/python3.9/site-packages/pyarrow/libparquet.so.900
#25 0x00007fffe67bbe8e in 
std::_Sp_counted_ptr_inplace<arrow::MergedGenerator<std::shared_ptr<arrow::RecordBatch>
 >::State, 
std::allocator<arrow::MergedGenerator<std::shared_ptr<arrow::RecordBatch> 
>::State>, (__gnu_cxx::_Lock_policy)2>::_M_dispose() ()
   from /home/inc/.local/lib/python3.9/site-packages/pyarrow/libparquet.so.900
#26 0x00007fffe6643eda in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libparquet.so.900
#27 0x00007fffe67ba122 in 
std::_Function_handler<arrow::Future<std::shared_ptr<arrow::RecordBatch> > (), 
arrow::MergedGenerator<std::shared_ptr<arrow::RecordBatch> > 
>::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation) 
()
   from /home/inc/.local/lib/python3.9/site-packages/pyarrow/libparquet.so.900
#28 0x00007fffdf126209 in 
std::_Sp_counted_ptr_inplace<arrow::dataset::SlicingGenerator::State, 
std::allocator<arrow::dataset::SlicingGenerator::State>, 
(__gnu_cxx::_Lock_policy)2>::_M_dispose() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#29 0x00007fffdf26493a in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#30 0x00007fffdf127392 in 
std::_Function_handler<arrow::Future<std::shared_ptr<arrow::RecordBatch> > (), 
arrow::dataset::SlicingGenerator>::_M_manager(std::_Any_data&, std::_Any_data 
const&, std::_Manager_operation) () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#31 0x00007fffdf11b56a in 
std::_Sp_counted_ptr_inplace<arrow::SerialReadaheadGenerator<std::shared_ptr<arrow::RecordBatch>
 >::State, 
std::allocator<arrow::SerialReadaheadGenerator<std::shared_ptr<arrow::RecordBatch>
 >::State>, (__gnu_cxx::_Lock_policy)2>::_M_dispose() ()
   from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#32 0x00007fffdf26493a in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#33 0x00007fffdf126732 in 
std::_Function_handler<arrow::Future<std::shared_ptr<arrow::RecordBatch> > (), 
arrow::SerialReadaheadGenerator<std::shared_ptr<arrow::RecordBatch> > 
>::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation) 
()
   from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#34 0x00007fffdf128ae6 in 
arrow::Future<std::function<arrow::Future<std::shared_ptr<arrow::RecordBatch> > 
()> 
>::SetResult(arrow::Result<std::function<arrow::Future<std::shared_ptr<arrow::RecordBatch>
 > ()> >)::\{lambda(void*)#1}::_FUN(void*) ()
   from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#35 0x00007fffe8085929 in arrow::ConcreteFutureImpl::~ConcreteFutureImpl() () 
from /home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#36 0x00007fffdf26493a in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#37 0x00007fffdf26493a in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#38 0x00007fffdf1267e2 in 
std::_Function_handler<arrow::Future<std::shared_ptr<arrow::RecordBatch> > (), 
arrow::FutureFirstGenerator<std::shared_ptr<arrow::RecordBatch> > 
>::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation) 
()
   from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#39 0x00007fffdf1261d9 in 
std::_Sp_counted_ptr_inplace<arrow::DefaultIfEmptyGenerator<std::shared_ptr<arrow::RecordBatch>
 >::State, 
std::allocator<arrow::DefaultIfEmptyGenerator<std::shared_ptr<arrow::RecordBatch>
 >::State>, (__gnu_cxx::_Lock_policy)2>::_M_dispose() ()
   from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#40 0x00007fffdf26493a in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#41 0x00007fffdf127232 in 
std::_Function_handler<arrow::Future<std::shared_ptr<arrow::RecordBatch> > (), 
arrow::DefaultIfEmptyGenerator<std::shared_ptr<arrow::RecordBatch> > 
>::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation) 
()
   from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#42 0x00007fffdf1261a9 in 
std::_Sp_counted_ptr_inplace<arrow::EnumeratingGenerator<std::shared_ptr<arrow::RecordBatch>
 >::State, 
std::allocator<arrow::EnumeratingGenerator<std::shared_ptr<arrow::RecordBatch> 
>::State>, (__gnu_cxx::_Lock_policy)2>::_M_dispose() ()
   from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#43 0x00007fffdf26493a in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#44 0x00007fffdf127182 in 
std::_Function_handler<arrow::Future<arrow::Enumerated<std::shared_ptr<arrow::RecordBatch>
 > > (), arrow::EnumeratingGenerator<std::shared_ptr<arrow::RecordBatch> > 
>::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation) 
()
   from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#45 0x00007fffdf128a26 in 
arrow::Future<std::function<arrow::Future<arrow::Enumerated<std::shared_ptr<arrow::RecordBatch>
 > > ()> 
>::SetResult(arrow::Result<std::function<arrow::Future<arrow::Enumerated<std::shared_ptr<arrow::RecordBatch>
 > > ()> >)::\{lambda(void*)#1}::_FUN(void*) ()
   from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#46 0x00007fffe8085929 in arrow::ConcreteFutureImpl::~ConcreteFutureImpl() () 
from /home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#47 0x00007fffdf26493a in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#48 0x00007fffdf26493a in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#49 0x00007fffdf1270d2 in 
std::_Function_handler<arrow::Future<arrow::Enumerated<std::shared_ptr<arrow::RecordBatch>
 > > (), 
arrow::FutureFirstGenerator<arrow::Enumerated<std::shared_ptr<arrow::RecordBatch>
 > > >::_M_manager(std::_Any_data&, std::_Any_data const&, 
std::_Manager_operation) ()
   from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#50 0x00007fffdf14a6dd in 
std::_Sp_counted_ptr_inplace<arrow::MappingGenerator<arrow::Enumerated<std::shared_ptr<arrow::RecordBatch>
 >, arrow::dataset::EnumeratedRecordBatch>::State, 
std::allocator<arrow::MappingGenerator<arrow::Enumerated<std::shared_ptr<arrow::RecordBatch>
 >, arrow::dataset::EnumeratedRecordBatch>::State>, 
(__gnu_cxx::_Lock_policy)2>::_M_dispose() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#51 0x00007fffdf26493a in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#52 0x00007fffdf127022 in 
std::_Function_handler<arrow::Future<arrow::dataset::EnumeratedRecordBatch> (), 
arrow::MappingGenerator<arrow::Enumerated<std::shared_ptr<arrow::RecordBatch> 
>, arrow::dataset::EnumeratedRecordBatch> >::_M_manager(std::_Any_data&, 
std::_Any_data const&, std::_Manager_operation) ()
   from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#53 0x00007fffdf12cd1e in 
std::_Sp_counted_ptr_inplace<arrow::MergedGenerator<arrow::dataset::EnumeratedRecordBatch>::State,
 
std::allocator<arrow::MergedGenerator<arrow::dataset::EnumeratedRecordBatch>::State>,
 (__gnu_cxx::_Lock_policy)2>::_M_dispose() ()
   from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#54 0x00007fffdf26493a in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#55 0x00007fffdf1269f2 in 
std::_Function_handler<arrow::Future<arrow::dataset::EnumeratedRecordBatch> (), 
arrow::MergedGenerator<arrow::dataset::EnumeratedRecordBatch> 
>::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation) 
()
   from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#56 0x00007fffdf14a5e2 in 
std::_Sp_counted_ptr_inplace<arrow::ReadaheadGenerator<arrow::dataset::EnumeratedRecordBatch>::State,
 
std::allocator<arrow::ReadaheadGenerator<arrow::dataset::EnumeratedRecordBatch>::State>,
 (__gnu_cxx::_Lock_policy)2>::_M_dispose() ()
   from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#57 0x00007fffdf26493a in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#58 0x00007fffdf126942 in 
std::_Function_handler<arrow::Future<arrow::dataset::EnumeratedRecordBatch> (), 
arrow::ReadaheadGenerator<arrow::dataset::EnumeratedRecordBatch> 
>::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation) 
()
   from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#59 0x00007fffdf12ba28 in 
std::_Sp_counted_ptr_inplace<arrow::MappingGenerator<arrow::dataset::EnumeratedRecordBatch,
 nonstd::optional_lite::optional<arrow::compute::ExecBatch> >::State, 
std::allocator<arrow::MappingGenerator<arrow::dataset::EnumeratedRecordBatch, 
nonstd::optional_lite::optional<arrow::compute::ExecBatch> >::State>, 
(__gnu_cxx::_Lock_policy)2>::_M_dispose() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#60 0x00007fffdf26493a in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/_dataset.cpython-39-x86_64-linux-gnu.so
#61 0x00007fffdf126892 in 
std::_Function_handler<arrow::Future<nonstd::optional_lite::optional<arrow::compute::ExecBatch>
 > (), arrow::MappingGenerator<arrow::dataset::EnumeratedRecordBatch, 
nonstd::optional_lite::optional<arrow::compute::ExecBatch> > 
>::_M_manager(std::_Any_data&, std::_Any_data const&, std::_Manager_operation) 
() from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#62 0x00007fffe7e805db in arrow::compute::(anonymous 
namespace)::SourceNode::~SourceNode() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#63 0x00007fffe7f355e3 in arrow::compute::(anonymous 
namespace)::ExecPlanImpl::~ExecPlanImpl() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#64 0x00007fffe7f35a52 in std::_Sp_counted_ptr<arrow::compute::(anonymous 
namespace)::ExecPlanImpl*, (__gnu_cxx::_Lock_policy)2>::_M_dispose() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#65 0x00007fffdf0f0a32 in arrow::internal::FnOnce<void (arrow::FutureImpl 
const&)>::FnImpl<arrow::Future<arrow::internal::Empty>::WrapStatusyOnComplete::Callback<arrow::dataset::(anonymous
 
namespace)::AsyncScanner::ScanBatchesUnorderedAsync(arrow::internal::Executor*, 
bool, bool)::\{lambda(...)#1}::operator()(...) 
const::\{lambda()#1}::operator()() const::\{lambda(arrow::Status const&)#1}> 
>::~FnImpl() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow_dataset.so.900
#66 0x00007fffe808a416 in 
arrow::ConcreteFutureImpl::DoMarkFinishedOrFailed(arrow::FutureState) () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#67 0x00007fffe75b33ff in void 
arrow::Future<arrow::internal::Empty>::MarkFinished<arrow::internal::Empty, 
void>(arrow::Status) () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#68 0x00007fffe7f16f25 in arrow::internal::FnOnce<void (arrow::FutureImpl 
const&)>::FnImpl<arrow::Future<arrow::internal::Empty>::WrapStatusyOnComplete::Callback<arrow::compute::(anonymous
 namespace)::ExecPlanImpl::EndTaskGroup()::\{lambda(arrow::Status const&)#1}> 
>::invoke(arrow::FutureImpl const&) ()
   from /home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#69 0x00007fffe808a408 in 
arrow::ConcreteFutureImpl::DoMarkFinishedOrFailed(arrow::FutureState) () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#70 0x00007fffe75b33ff in void 
arrow::Future<arrow::internal::Empty>::MarkFinished<arrow::internal::Empty, 
void>(arrow::Status) () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#71 0x00007fffe80cf46b in arrow::internal::FnOnce<void (arrow::FutureImpl 
const&)>::FnImpl<arrow::Future<arrow::internal::Empty>::WrapStatusyOnComplete::Callback<arrow::util::AsyncTaskGroup::AddTaskUnlocked(arrow::Future<arrow::internal::Empty>
 const&, arrow::util::Mutex::Guard)::\{lambda(arrow::Status const&)#1}> 
>::invoke(arrow::FutureImpl const&) () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#72 0x00007fffe808a408 in 
arrow::ConcreteFutureImpl::DoMarkFinishedOrFailed(arrow::FutureState) () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#73 0x00007fffe75b33ff in void 
arrow::Future<arrow::internal::Empty>::MarkFinished<arrow::internal::Empty, 
void>(arrow::Status) () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#74 0x00007fffe7f3a5cd in arrow::internal::FnOnce<void 
()>::FnImpl<std::_Bind<arrow::detail::ContinueFuture 
(arrow::Future<arrow::internal::Empty>, std::function<arrow::Status ()>)> 
>::invoke() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#75 0x00007fffe8066e7b in 
std::thread::_State_impl<std::thread::_Invoker<std::tuple<arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::\{lambda()#1}>
 > >::_M_run() () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#76 0x00007fffe8f7d5f0 in execute_native_thread_routine () from 
/home/inc/.local/lib/python3.9/site-packages/pyarrow/libarrow.so.900
#77 0x00007ffff7a803f9 in start_thread () from /lib64/libpthread.so.0
#78 0x00007ffff7b9ab53 in clone () from /lib64/libc.so.6



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to