pitrou commented on issue #38304:
URL: https://github.com/apache/arrow/issues/38304#issuecomment-1766066618

   Looking at the Valgrind output in more detail, it seems there are two 
different leaks, both of which are reported in a thread pool's worker thread:
   * one leak related to `OPENSSL_init_crypto`:
   ```
   ==9667== 
   ==9667== 904 bytes in 1 blocks are definitely lost in loss record 59 of 69
   ==9667==    at 0x4849724: malloc (vg_replace_malloc.c:431)
   ==9667==    by 0x705B989: CRYPTO_zalloc (in 
/opt/conda/envs/arrow/lib/libcrypto.so.3)
   ==9667==    by 0x70114A9: ossl_err_get_state_int (in 
/opt/conda/envs/arrow/lib/libcrypto.so.3)
   ==9667==    by 0x7012189: ERR_set_mark (in 
/opt/conda/envs/arrow/lib/libcrypto.so.3)
   ==9667==    by 0x6F959EF: CONF_modules_load_file_ex (in 
/opt/conda/envs/arrow/lib/libcrypto.so.3)
   ==9667==    by 0x6F95DD5: ossl_config_int (in 
/opt/conda/envs/arrow/lib/libcrypto.so.3)
   ==9667==    by 0x705A82B: ossl_init_config_ossl_ (in 
/opt/conda/envs/arrow/lib/libcrypto.so.3)
   ==9667==    by 0x6C15EE7: __pthread_once_slow (pthread_once.c:116)
   ==9667==    by 0x7068919: CRYPTO_THREAD_run_once (in 
/opt/conda/envs/arrow/lib/libcrypto.so.3)
   ==9667==    by 0x705B0B3: OPENSSL_init_crypto (in 
/opt/conda/envs/arrow/lib/libcrypto.so.3)
   ==9667==    by 0x700F75C: ossl_engine_table_select (in 
/opt/conda/envs/arrow/lib/libcrypto.so.3)
   ==9667==    by 0x71409D5: RAND_get_rand_method (in 
/opt/conda/envs/arrow/lib/libcrypto.so.3)
   ==9667==    by 0x7141319: RAND_bytes_ex (in 
/opt/conda/envs/arrow/lib/libcrypto.so.3)
   ==9667==    by 0x4CA9D3E: parquet::encryption::RandBytes(unsigned char*, 
int) (encryption_internal.cc:649)
   ==9667==    by 0x4CADFBB: 
parquet::encryption::CryptoFactory::GetFileEncryptionProperties(parquet::encryption::KmsConnectionConfig
 const&, parquet::encryption::EncryptionConfiguration const&, 
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > 
const&, std::shared_ptr<arrow::fs::FileSystem> const&) (crypto_factory.cc:75)
   ==9667==    by 0x49A9145: 
arrow::dataset::ParquetFileFormat::MakeWriter(std::shared_ptr<arrow::io::OutputStream>,
 std::shared_ptr<arrow::Schema>, 
std::shared_ptr<arrow::dataset::FileWriteOptions>, arrow::fs::FileLocator) 
const (file_parquet.cc:707)
   ==9667==    by 0x48F7FD4: arrow::dataset::internal::(anonymous 
namespace)::OpenWriter(arrow::dataset::FileSystemDatasetWriteOptions const&, 
std::shared_ptr<arrow::Schema>, std::__cxx11::basic_string<char, 
std::char_traits<char>, std::allocator<char> > const&) (dataset_writer.cc:131)
   ==9667==    by 0x48F8202: arrow::dataset::internal::(anonymous 
namespace)::DatasetWriterFileQueue::Start(arrow::util::AsyncTaskScheduler*, 
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > 
const&)::{lambda()#1}::operator()() const::{lambda()#1}::operator()() const 
(dataset_writer.cc:149)
   ==9667==    by 0x48F92E4: 
std::enable_if<((!std::is_void<arrow::Status>::value)&&(!arrow::detail::is_future<arrow::Status>::value))&&((!arrow::Future<arrow::internal::Empty>::is_empty)||std::is_same<arrow::Status,
 arrow::Status>::value), void>::type 
arrow::detail::ContinueFuture::operator()<arrow::dataset::internal::(anonymous 
namespace)::DatasetWriterFileQueue::Start(arrow::util::AsyncTaskScheduler*, 
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > 
const&)::{lambda()#1}::operator()() const::{lambda()#1}&, , arrow::Status, 
arrow::Future<arrow::internal::Empty> >(arrow::Future<arrow::internal::Empty>, 
arrow::dataset::internal::(anonymous 
namespace)::DatasetWriterFileQueue::Start(arrow::util::AsyncTaskScheduler*, 
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > 
const&)::{lambda()#1}::operator()() const::{lambda()#1}&) const (future.h:150)
   ==9667==    by 0x48F937C: void std::__invoke_impl<void, 
arrow::detail::ContinueFuture&, arrow::Future<arrow::internal::Empty>&, 
arrow::dataset::internal::(anonymous 
namespace)::DatasetWriterFileQueue::Start(arrow::util::AsyncTaskScheduler*, 
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > 
const&)::{lambda()#1}::operator()() const::{lambda()#1}&>(std::__invoke_other, 
arrow::detail::ContinueFuture&, arrow::Future<arrow::internal::Empty>&, 
arrow::dataset::internal::(anonymous 
namespace)::DatasetWriterFileQueue::Start(arrow::util::AsyncTaskScheduler*, 
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > 
const&)::{lambda()#1}::operator()() const::{lambda()#1}&) (invoke.h:61)
   ==9667==    by 0x48F93D8: __invoke<arrow::detail::ContinueFuture&, 
arrow::Future<arrow::internal::Empty>&, arrow::dataset::internal::(anonymous 
namespace)::DatasetWriterFileQueue::Start(arrow::util::AsyncTaskScheduler*, 
const std::string&)::<lambda()>::<lambda()>&> (invoke.h:96)
   ==9667==    by 0x48F93D8: __call<void, 0, 1> (functional:495)
   ==9667==    by 0x48F93D8: operator()<> (functional:580)
   ==9667==    by 0x48F93D8: arrow::internal::FnOnce<void 
()>::FnImpl<std::_Bind<arrow::detail::ContinueFuture 
(arrow::Future<arrow::internal::Empty>, arrow::dataset::internal::(anonymous 
namespace)::DatasetWriterFileQueue::Start(arrow::util::AsyncTaskScheduler*, 
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > 
const&)::{lambda()#1}::operator()() const::{lambda()#1})> >::invoke() 
(functional.h:152)
   ==9667==    by 0x56D3C9F: arrow::internal::FnOnce<void ()>::operator()() && 
(functional.h:140)
   ==9667==    by 0x56D6524: 
arrow::internal::WorkerLoop(std::shared_ptr<arrow::internal::ThreadPool::State>,
 std::_List_iterator<std::thread>) (thread_pool.cc:457)
   ==9667==    by 0x56D67FD: 
arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::{lambda()#1}::operator()()
 const (thread_pool.cc:618)
   ==9667==    by 0x56D6852: __invoke_impl<void, 
arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::<lambda()> > 
(invoke.h:61)
   ==9667==    by 0x56D6852: 
__invoke<arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::<lambda()> > 
(invoke.h:96)
   ==9667==    by 0x56D6852: _M_invoke<0> (std_thread.h:279)
   ==9667==    by 0x56D6852: operator() (std_thread.h:286)
   ==9667==    by 0x56D6852: 
std::thread::_State_impl<std::thread::_Invoker<std::tuple<arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::{lambda()#1}>
 > >::_M_run() (std_thread.h:231)
   ==9667==    by 0x6A41E94: execute_native_thread_routine (thread.cc:104)
   ==9667==    by 0x6C10AC2: start_thread (pthread_create.c:442)
   ==9667==    by 0x6CA1BF3: clone (clone.S:100)
   ==9667== 
   ```
   
   * one leak related to `rand_new_drbg`:
   ```
   ==9667== 2,464 (40 direct, 2,424 indirect) bytes in 1 blocks are definitely 
lost in loss record 67 of 69
   ==9667==    at 0x4849724: malloc (vg_replace_malloc.c:431)
   ==9667==    by 0x705B989: CRYPTO_zalloc (in 
/opt/conda/envs/arrow/lib/libcrypto.so.3)
   ==9667==    by 0x7039036: EVP_RAND_CTX_new (in 
/opt/conda/envs/arrow/lib/libcrypto.so.3)
   ==9667==    by 0x71401FC: rand_new_drbg (in 
/opt/conda/envs/arrow/lib/libcrypto.so.3)
   ==9667==    by 0x71412E0: RAND_get0_public (in 
/opt/conda/envs/arrow/lib/libcrypto.so.3)
   ==9667==    by 0x7141350: RAND_bytes_ex (in 
/opt/conda/envs/arrow/lib/libcrypto.so.3)
   ==9667==    by 0x4CAB6BB: 
parquet::encryption::AesEncryptor::AesEncryptorImpl::Encrypt(unsigned char 
const*, int, unsigned char const*, int, unsigned char const*, int, unsigned 
char*) (encryption_internal.cc:170)
   ==9667==    by 0x4CAB77E: 
parquet::encryption::AesEncryptor::Encrypt(unsigned char const*, int, unsigned 
char const*, int, unsigned char const*, int, unsigned char*) 
(encryption_internal.cc:304)
   ==9667==    by 0x4C2608C: parquet::Encryptor::Encrypt(unsigned char const*, 
int, unsigned char*) (internal_file_encryptor.cc:37)
   ==9667==    by 0x4BB9C05: 
parquet::SerializedPageWriter::WriteDictionaryPage(parquet::DictionaryPage 
const&) (column_writer.cc:291)
   ==9667==    by 0x4BD09BE: 
parquet::TypedColumnWriterImpl<parquet::PhysicalType<(parquet::Type::type)2> 
>::WriteDictionaryPage() (column_writer.cc:1338)
   ==9667==    by 0x4BC70E4: parquet::ColumnWriterImpl::Close() 
(column_writer.cc:1065)
   ==9667==    by 0x4BC727E: 
parquet::TypedColumnWriterImpl<parquet::PhysicalType<(parquet::Type::type)2> 
>::Close() (column_writer.cc:1212)
   ==9667==    by 0x4B5F519: parquet::arrow::(anonymous 
namespace)::ArrowColumnWriterV2::Write(parquet::ArrowWriteContext*) 
(writer.cc:160)
   ==9667==    by 0x4B63138: 
parquet::arrow::FileWriterImpl::WriteColumnChunk(std::shared_ptr<arrow::ChunkedArray>
 const&, long, long) (writer.cc:346)
   ==9667==    by 0x4B5EAAD: 
parquet::arrow::FileWriterImpl::WriteTable(arrow::Table const&, 
long)::{lambda(long, long)#1}::operator()(long, long) const (writer.cc:374)
   ==9667==    by 0x4B61581: 
parquet::arrow::FileWriterImpl::WriteTable(arrow::Table const&, long) 
(writer.cc:387)
   ==9667==    by 0x49A71CD: 
arrow::dataset::ParquetFileWriter::Write(std::shared_ptr<arrow::RecordBatch> 
const&) (file_parquet.cc:748)
   ==9667==    by 0x48F926E: arrow::dataset::internal::(anonymous 
namespace)::DatasetWriterFileQueue::WriteNext(std::shared_ptr<arrow::RecordBatch>)::{lambda()#1}::operator()()
 const (dataset_writer.cc:236)
   ==9667==    by 0x48F9404: 
std::enable_if<((!std::is_void<arrow::Status>::value)&&(!arrow::detail::is_future<arrow::Status>::value))&&((!arrow::Future<arrow::internal::Empty>::is_empty)||std::is_same<arrow::Status,
 arrow::Status>::value), void>::type 
arrow::detail::ContinueFuture::operator()<arrow::dataset::internal::(anonymous 
namespace)::DatasetWriterFileQueue::WriteNext(std::shared_ptr<arrow::RecordBatch>)::{lambda()#1}&,
 , arrow::Status, arrow::Future<arrow::internal::Empty> 
>(arrow::Future<arrow::internal::Empty>, arrow::dataset::internal::(anonymous 
namespace)::DatasetWriterFileQueue::WriteNext(std::shared_ptr<arrow::RecordBatch>)::{lambda()#1}&)
 const (future.h:150)
   ==9667==    by 0x48F949C: void std::__invoke_impl<void, 
arrow::detail::ContinueFuture&, arrow::Future<arrow::internal::Empty>&, 
arrow::dataset::internal::(anonymous 
namespace)::DatasetWriterFileQueue::WriteNext(std::shared_ptr<arrow::RecordBatch>)::{lambda()#1}&>(std::__invoke_other,
 arrow::detail::ContinueFuture&, arrow::Future<arrow::internal::Empty>&, 
arrow::dataset::internal::(anonymous 
namespace)::DatasetWriterFileQueue::WriteNext(std::shared_ptr<arrow::RecordBatch>)::{lambda()#1}&)
 (invoke.h:61)
   ==9667==    by 0x48F94F8: __invoke<arrow::detail::ContinueFuture&, 
arrow::Future<arrow::internal::Empty>&, arrow::dataset::internal::(anonymous 
namespace)::DatasetWriterFileQueue::WriteNext(std::shared_ptr<arrow::RecordBatch>)::<lambda()>&>
 (invoke.h:96)
   ==9667==    by 0x48F94F8: __call<void, 0, 1> (functional:495)
   ==9667==    by 0x48F94F8: operator()<> (functional:580)
   ==9667==    by 0x48F94F8: arrow::internal::FnOnce<void 
()>::FnImpl<std::_Bind<arrow::detail::ContinueFuture 
(arrow::Future<arrow::internal::Empty>, arrow::dataset::internal::(anonymous 
namespace)::DatasetWriterFileQueue::WriteNext(std::shared_ptr<arrow::RecordBatch>)::{lambda()#1})>
 >::invoke() (functional.h:152)
   ==9667==    by 0x56D3C9F: arrow::internal::FnOnce<void ()>::operator()() && 
(functional.h:140)
   ==9667==    by 0x56D6524: 
arrow::internal::WorkerLoop(std::shared_ptr<arrow::internal::ThreadPool::State>,
 std::_List_iterator<std::thread>) (thread_pool.cc:457)
   ==9667==    by 0x56D67FD: 
arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::{lambda()#1}::operator()()
 const (thread_pool.cc:618)
   ==9667==    by 0x56D6852: __invoke_impl<void, 
arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::<lambda()> > 
(invoke.h:61)
   ==9667==    by 0x56D6852: 
__invoke<arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::<lambda()> > 
(invoke.h:96)
   ==9667==    by 0x56D6852: _M_invoke<0> (std_thread.h:279)
   ==9667==    by 0x56D6852: operator() (std_thread.h:286)
   ==9667==    by 0x56D6852: 
std::thread::_State_impl<std::thread::_Invoker<std::tuple<arrow::internal::ThreadPool::LaunchWorkersUnlocked(int)::{lambda()#1}>
 > >::_M_run() (std_thread.h:231)
   ==9667==    by 0x6A41E94: execute_native_thread_routine (thread.cc:104)
   ==9667==    by 0x6C10AC2: start_thread (pthread_create.c:442)
   ==9667==    by 0x6CA1BF3: clone (clone.S:100)
   ==9667==
   ``` 
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to