pitrou commented on issue #38304:
URL: https://github.com/apache/arrow/issues/38304#issuecomment-1766066618
Looking at the Valgrind output in more detail, it seems there are two
different leaks, both of which are reported in a thread pool's worker thread:
* one leak related to `OPENSSL_init_crypto`:
```
==9667==
==9667== 904 bytes in 1 blocks are definitely lost in loss record 59 of 69
==9667== at 0x4849724: malloc (vg_replace_malloc.c:431)
==9667== by 0x705B989: CRYPTO_zalloc (in
/opt/conda/envs/arrow/lib/libcrypto.so.3)
==9667== by 0x70114A9: ossl_err_get_state_int (in
/opt/conda/envs/arrow/lib/libcrypto.so.3)
==9667== by 0x7012189: ERR_set_mark (in
/opt/conda/envs/arrow/lib/libcrypto.so.3)
==9667== by 0x6F959EF: CONF_modules_load_file_ex (in
/opt/conda/envs/arrow/lib/libcrypto.so.3)
==9667== by 0x6F95DD5: ossl_config_int (in
/opt/conda/envs/arrow/lib/libcrypto.so.3)
==9667== by 0x705A82B: ossl_init_config_ossl_ (in
/opt/conda/envs/arrow/lib/libcrypto.so.3)
==9667== by 0x6C15EE7: __pthread_once_slow (pthread_once.c:116)
==9667== by 0x7068919: CRYPTO_THREAD_run_once (in
/opt/conda/envs/arrow/lib/libcrypto.so.3)
==9667== by 0x705B0B3: OPENSSL_init_crypto (in
/opt/conda/envs/arrow/lib/libcrypto.so.3)
==9667== by 0x700F75C: ossl_engine_table_select (in
/opt/conda/envs/arrow/lib/libcrypto.so.3)
==9667== by 0x71409D5: RAND_get_rand_method (in
/opt/conda/envs/arrow/lib/libcrypto.so.3)
==9667== by 0x7141319: RAND_bytes_ex (in
/opt/conda/envs/arrow/lib/libcrypto.so.3)
==9667== by 0x4CA9D3E: parquet::encryption::RandBytes(unsigned char*,
int) (encryption_internal.cc:649)
==9667== by 0x4CADFBB:
parquet::encryption::CryptoFactory::GetFileEncryptionProperties(parquet::encryption::KmsConnectionConfig
const&, parquet::encryption::EncryptionConfiguration const&,
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >
const&, std::shared_ptr<arrow::fs::FileSystem> const&) (crypto_factory.cc:75)
==9667== by 0x49A9145:
arrow::dataset::ParquetFileFormat::MakeWriter(std::shared_ptr<arrow::io::OutputStream>,
std::shared_ptr<arrow::Schema>,
std::shared_ptr<arrow::dataset::FileWriteOptions>, arrow::fs::FileLocator)
const (file_parquet.cc:707)
==9667== by 0x48F7FD4: arrow::dataset::internal::(anonymous
namespace)::OpenWriter(arrow::dataset::FileSystemDatasetWriteOptions const&,
std::shared_ptr<arrow::Schema>, std::__cxx11::basic_string<char,
std::char_traits<char>, std::allocator<char> > const&) (dataset_writer.cc:131)
==9667== by 0x48F8202: arrow::dataset::internal::(anonymous
namespace)::DatasetWriterFileQueue::Start(arrow::util::AsyncTaskScheduler*,
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >
const&)::{lambda()#1}::operator()() const::{lambda()#1}::operator()() const
(dataset_writer.cc:149)
==9667== by 0x48F92E4:
std::enable_if<((!std::is_void<arrow::Status>::value)&&(!arrow::detail::is_future<arrow::Status>::value))&&((!arrow::Future<arrow::internal::Empty>::is_empty)||std::is_same<arrow::Status,
arrow::Status>::value), void>::type
arrow::detail::ContinueFuture::operator()<arrow::dataset::internal::(anonymous
namespace)::DatasetWriterFileQueue::Start(arrow::util::AsyncTaskScheduler*,
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >
const&)::{lambda()#1}::operator()() const::{lambda()#1}&, , arrow::Status,
arrow::Future<arrow::internal::Empty> >(arrow::Future<arrow::internal::Empty>,
arrow::dataset::internal::(anonymous
namespace)::DatasetWriterFileQueue::Start(arrow::util::AsyncTaskScheduler*,
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >
const&)::{lambda()#1}::operator()() const::{lambda()#1}&) const (future.h:150)
==9667== by 0x48F937C: void std::__invoke_impl<void,
arrow::detail::ContinueFuture&, arrow::Future<arrow::internal::Empty>&,
arrow::dataset::internal::(anonymous
namespace)::DatasetWriterFileQueue::Start(arrow::util::AsyncTaskScheduler*,
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >
const&)::{lambda()#1}::operator()() const::{lambda()#1}&>(std::__invoke_other,
arrow::detail::ContinueFuture&, arrow::Future<arrow::internal::Empty>&,
arrow::dataset::internal::(anonymous
namespace)::DatasetWriterFileQueue::Start(arrow::util::AsyncTaskScheduler*,
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >
const&)::{lambda()#1}::operator()() const::{lambda()#1}&) (invoke.h:61)
==9667== by 0x48F93D8: __invoke<arrow::detail::ContinueFuture&,
arrow::Future<arrow::internal::Empty>&, arrow::dataset::internal::(anonymous
namespace)::DatasetWriterFileQueue::Start(arrow::util::AsyncTaskScheduler*,
const std::string&)::<lambda()>::<lambda()>&> (invoke.h:96)
==9667== by 0x48F93D8: __call<void, 0, 1> (functional:495)
==9667== by 0x48F93D8: operator()<> (functional:580)
==9667== by 0x48F93D8: arrow::internal::FnOnce<void
()>::FnImpl<std::_Bind<arrow::detail::ContinueFuture
(arrow::Future<arrow::internal::Empty>, arrow::dataset::internal::(anonymous
namespace)::DatasetWriterFileQueue::Start(arrow::util::AsyncTaskScheduler*,
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >
const&)::{lambda()#1}::operator()() const::{lambda()#1})> >::invoke()
(functional.h:152)
==9667== by 0x56D3C9F: arrow::internal::FnOnce<void ()>::operator()() &&
(functional.h:140)
==9667== by 0x56D6524:
arrow::internal::WorkerLoop(std::shared_ptr<arrow::internal::ThreadPool::State>,
std::_List_iterator<std::thread>) (thread_pool.cc:457)
==9667== by 0x56D67FD:
arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::{lambda()#1}::operator()()
const (thread_pool.cc:618)
==9667== by 0x56D6852: __invoke_impl<void,
arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::<lambda()> >
(invoke.h:61)
==9667== by 0x56D6852:
__invoke<arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::<lambda()> >
(invoke.h:96)
==9667== by 0x56D6852: _M_invoke<0> (std_thread.h:279)
==9667== by 0x56D6852: operator() (std_thread.h:286)
==9667== by 0x56D6852:
std::thread::_State_impl<std::thread::_Invoker<std::tuple<arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::{lambda()#1}>
> >::_M_run() (std_thread.h:231)
==9667== by 0x6A41E94: execute_native_thread_routine (thread.cc:104)
==9667== by 0x6C10AC2: start_thread (pthread_create.c:442)
==9667== by 0x6CA1BF3: clone (clone.S:100)
==9667==
```
* one leak related to `rand_new_drbg`:
```
==9667== 2,464 (40 direct, 2,424 indirect) bytes in 1 blocks are definitely
lost in loss record 67 of 69
==9667== at 0x4849724: malloc (vg_replace_malloc.c:431)
==9667== by 0x705B989: CRYPTO_zalloc (in
/opt/conda/envs/arrow/lib/libcrypto.so.3)
==9667== by 0x7039036: EVP_RAND_CTX_new (in
/opt/conda/envs/arrow/lib/libcrypto.so.3)
==9667== by 0x71401FC: rand_new_drbg (in
/opt/conda/envs/arrow/lib/libcrypto.so.3)
==9667== by 0x71412E0: RAND_get0_public (in
/opt/conda/envs/arrow/lib/libcrypto.so.3)
==9667== by 0x7141350: RAND_bytes_ex (in
/opt/conda/envs/arrow/lib/libcrypto.so.3)
==9667== by 0x4CAB6BB:
parquet::encryption::AesEncryptor::AesEncryptorImpl::Encrypt(unsigned char
const*, int, unsigned char const*, int, unsigned char const*, int, unsigned
char*) (encryption_internal.cc:170)
==9667== by 0x4CAB77E:
parquet::encryption::AesEncryptor::Encrypt(unsigned char const*, int, unsigned
char const*, int, unsigned char const*, int, unsigned char*)
(encryption_internal.cc:304)
==9667== by 0x4C2608C: parquet::Encryptor::Encrypt(unsigned char const*,
int, unsigned char*) (internal_file_encryptor.cc:37)
==9667== by 0x4BB9C05:
parquet::SerializedPageWriter::WriteDictionaryPage(parquet::DictionaryPage
const&) (column_writer.cc:291)
==9667== by 0x4BD09BE:
parquet::TypedColumnWriterImpl<parquet::PhysicalType<(parquet::Type::type)2>
>::WriteDictionaryPage() (column_writer.cc:1338)
==9667== by 0x4BC70E4: parquet::ColumnWriterImpl::Close()
(column_writer.cc:1065)
==9667== by 0x4BC727E:
parquet::TypedColumnWriterImpl<parquet::PhysicalType<(parquet::Type::type)2>
>::Close() (column_writer.cc:1212)
==9667== by 0x4B5F519: parquet::arrow::(anonymous
namespace)::ArrowColumnWriterV2::Write(parquet::ArrowWriteContext*)
(writer.cc:160)
==9667== by 0x4B63138:
parquet::arrow::FileWriterImpl::WriteColumnChunk(std::shared_ptr<arrow::ChunkedArray>
const&, long, long) (writer.cc:346)
==9667== by 0x4B5EAAD:
parquet::arrow::FileWriterImpl::WriteTable(arrow::Table const&,
long)::{lambda(long, long)#1}::operator()(long, long) const (writer.cc:374)
==9667== by 0x4B61581:
parquet::arrow::FileWriterImpl::WriteTable(arrow::Table const&, long)
(writer.cc:387)
==9667== by 0x49A71CD:
arrow::dataset::ParquetFileWriter::Write(std::shared_ptr<arrow::RecordBatch>
const&) (file_parquet.cc:748)
==9667== by 0x48F926E: arrow::dataset::internal::(anonymous
namespace)::DatasetWriterFileQueue::WriteNext(std::shared_ptr<arrow::RecordBatch>)::{lambda()#1}::operator()()
const (dataset_writer.cc:236)
==9667== by 0x48F9404:
std::enable_if<((!std::is_void<arrow::Status>::value)&&(!arrow::detail::is_future<arrow::Status>::value))&&((!arrow::Future<arrow::internal::Empty>::is_empty)||std::is_same<arrow::Status,
arrow::Status>::value), void>::type
arrow::detail::ContinueFuture::operator()<arrow::dataset::internal::(anonymous
namespace)::DatasetWriterFileQueue::WriteNext(std::shared_ptr<arrow::RecordBatch>)::{lambda()#1}&,
, arrow::Status, arrow::Future<arrow::internal::Empty>
>(arrow::Future<arrow::internal::Empty>, arrow::dataset::internal::(anonymous
namespace)::DatasetWriterFileQueue::WriteNext(std::shared_ptr<arrow::RecordBatch>)::{lambda()#1}&)
const (future.h:150)
==9667== by 0x48F949C: void std::__invoke_impl<void,
arrow::detail::ContinueFuture&, arrow::Future<arrow::internal::Empty>&,
arrow::dataset::internal::(anonymous
namespace)::DatasetWriterFileQueue::WriteNext(std::shared_ptr<arrow::RecordBatch>)::{lambda()#1}&>(std::__invoke_other,
arrow::detail::ContinueFuture&, arrow::Future<arrow::internal::Empty>&,
arrow::dataset::internal::(anonymous
namespace)::DatasetWriterFileQueue::WriteNext(std::shared_ptr<arrow::RecordBatch>)::{lambda()#1}&)
(invoke.h:61)
==9667== by 0x48F94F8: __invoke<arrow::detail::ContinueFuture&,
arrow::Future<arrow::internal::Empty>&, arrow::dataset::internal::(anonymous
namespace)::DatasetWriterFileQueue::WriteNext(std::shared_ptr<arrow::RecordBatch>)::<lambda()>&>
(invoke.h:96)
==9667== by 0x48F94F8: __call<void, 0, 1> (functional:495)
==9667== by 0x48F94F8: operator()<> (functional:580)
==9667== by 0x48F94F8: arrow::internal::FnOnce<void
()>::FnImpl<std::_Bind<arrow::detail::ContinueFuture
(arrow::Future<arrow::internal::Empty>, arrow::dataset::internal::(anonymous
namespace)::DatasetWriterFileQueue::WriteNext(std::shared_ptr<arrow::RecordBatch>)::{lambda()#1})>
>::invoke() (functional.h:152)
==9667== by 0x56D3C9F: arrow::internal::FnOnce<void ()>::operator()() &&
(functional.h:140)
==9667== by 0x56D6524:
arrow::internal::WorkerLoop(std::shared_ptr<arrow::internal::ThreadPool::State>,
std::_List_iterator<std::thread>) (thread_pool.cc:457)
==9667== by 0x56D67FD:
arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::{lambda()#1}::operator()()
const (thread_pool.cc:618)
==9667== by 0x56D6852: __invoke_impl<void,
arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::<lambda()> >
(invoke.h:61)
==9667== by 0x56D6852:
__invoke<arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::<lambda()> >
(invoke.h:96)
==9667== by 0x56D6852: _M_invoke<0> (std_thread.h:279)
==9667== by 0x56D6852: operator() (std_thread.h:286)
==9667== by 0x56D6852:
std::thread::_State_impl<std::thread::_Invoker<std::tuple<arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::{lambda()#1}>
> >::_M_run() (std_thread.h:231)
==9667== by 0x6A41E94: execute_native_thread_routine (thread.cc:104)
==9667== by 0x6C10AC2: start_thread (pthread_create.c:442)
==9667== by 0x6CA1BF3: clone (clone.S:100)
==9667==
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]