This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new fd57b7ae1c GH-38304: [C++][Parquet] Fix Valgrind memory leak in 
arrow-dataset-file-parquet-encryption-test (#38306)
fd57b7ae1c is described below

commit fd57b7ae1cc29bad579dc0d2661f3417bada0334
Author: Antoine Pitrou <[email protected]>
AuthorDate: Wed Oct 18 12:04:50 2023 +0200

    GH-38304: [C++][Parquet] Fix Valgrind memory leak in 
arrow-dataset-file-parquet-encryption-test (#38306)
    
    If OpenSSL initializes itself from a non-main thread, it can fail 
deallocating all memory at shutdown.
    
    This is really a benign leak, but we don't want any spurious CI errors.
    
    * Closes: #38304
    
    Authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 .../arrow/dataset/file_parquet_encryption_test.cc  |  8 +++++
 cpp/src/parquet/CMakeLists.txt                     |  3 +-
 cpp/src/parquet/encryption/encryption_internal.cc  | 13 ++++++++-
 cpp/src/parquet/encryption/encryption_internal.h   |  8 +++++
 .../encryption/encryption_internal_nossl.cc        |  2 ++
 cpp/src/parquet/encryption/openssl_internal.cc     | 34 ++++++++++++++++++++++
 cpp/src/parquet/encryption/openssl_internal.h      | 28 ++++++++++++++++++
 7 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc 
b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc
index 572f15e100..87028eb6e2 100644
--- a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc
@@ -34,6 +34,7 @@
 #include "parquet/arrow/reader.h"
 #include "parquet/encryption/crypto_factory.h"
 #include "parquet/encryption/encryption.h"
+#include "parquet/encryption/encryption_internal.h"
 #include "parquet/encryption/kms_client.h"
 #include "parquet/encryption/test_in_memory_kms.h"
 
@@ -58,6 +59,13 @@ class DatasetEncryptionTest : public ::testing::Test {
   // partitioning scheme. The function also checks if the written files exist 
in the file
   // system.
   static void SetUpTestSuite() {
+#ifdef ARROW_VALGRIND
+    // Not necessary otherwise, but prevents a Valgrind leak by making sure
+    // OpenSSL initialization is done from the main thread
+    // (see GH-38304 for analysis).
+    ::parquet::encryption::EnsureBackendInitialized();
+#endif
+
     // Creates a mock file system using the current time point.
     EXPECT_OK_AND_ASSIGN(file_system_, fs::internal::MockFileSystem::Make(
                                            std::chrono::system_clock::now(), 
{}));
diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index 3cdd24dbe9..0d04ec3e30 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -234,7 +234,8 @@ if(ARROW_HAVE_RUNTIME_AVX2)
 endif()
 
 if(PARQUET_REQUIRE_ENCRYPTION)
-  set(PARQUET_SRCS ${PARQUET_SRCS} encryption/encryption_internal.cc)
+  set(PARQUET_SRCS ${PARQUET_SRCS} encryption/encryption_internal.cc
+                   encryption/openssl_internal.cc)
   # Encryption key management
   set(PARQUET_SRCS
       ${PARQUET_SRCS}
diff --git a/cpp/src/parquet/encryption/encryption_internal.cc 
b/cpp/src/parquet/encryption/encryption_internal.cc
index 6e66efeff6..b1770be533 100644
--- a/cpp/src/parquet/encryption/encryption_internal.cc
+++ b/cpp/src/parquet/encryption/encryption_internal.cc
@@ -16,6 +16,7 @@
 // under the License.
 
 #include "parquet/encryption/encryption_internal.h"
+
 #include <openssl/aes.h>
 #include <openssl/evp.h>
 #include <openssl/rand.h>
@@ -27,6 +28,7 @@
 #include <string>
 #include <vector>
 
+#include "parquet/encryption/openssl_internal.h"
 #include "parquet/exception.h"
 
 using parquet::ParquetException;
@@ -92,6 +94,8 @@ class AesEncryptor::AesEncryptorImpl {
 
 AesEncryptor::AesEncryptorImpl::AesEncryptorImpl(ParquetCipher::type alg_id, 
int key_len,
                                                  bool metadata, bool 
write_length) {
+  openssl::EnsureInitialized();
+
   ctx_ = nullptr;
 
   length_buffer_length_ = write_length ? kBufferSizeLength : 0;
@@ -358,6 +362,8 @@ AesDecryptor::~AesDecryptor() {}
 
 AesDecryptor::AesDecryptorImpl::AesDecryptorImpl(ParquetCipher::type alg_id, 
int key_len,
                                                  bool metadata, bool 
contains_length) {
+  openssl::EnsureInitialized();
+
   ctx_ = nullptr;
   length_buffer_length_ = contains_length ? kBufferSizeLength : 0;
   ciphertext_size_delta_ = length_buffer_length_ + kNonceLength;
@@ -646,6 +652,11 @@ void QuickUpdatePageAad(int32_t new_page_ordinal, 
std::string* AAD) {
   std::memcpy(AAD->data() + AAD->length() - 2, page_ordinal_bytes.data(), 2);
 }
 
-void RandBytes(unsigned char* buf, int num) { RAND_bytes(buf, num); }
+void RandBytes(unsigned char* buf, int num) {
+  openssl::EnsureInitialized();
+  RAND_bytes(buf, num);
+}
+
+void EnsureBackendInitialized() { openssl::EnsureInitialized(); }
 
 }  // namespace parquet::encryption
diff --git a/cpp/src/parquet/encryption/encryption_internal.h 
b/cpp/src/parquet/encryption/encryption_internal.h
index 77921d8731..1bdf47c56f 100644
--- a/cpp/src/parquet/encryption/encryption_internal.h
+++ b/cpp/src/parquet/encryption/encryption_internal.h
@@ -130,4 +130,12 @@ void QuickUpdatePageAad(int32_t new_page_ordinal, 
std::string* AAD);
 // Wraps OpenSSL RAND_bytes function
 void RandBytes(unsigned char* buf, int num);
 
+// Ensure OpenSSL is initialized.
+//
+// This is only necessary in specific situations since OpenSSL otherwise
+// initializes itself automatically. For example, under Valgrind, a memory
+// leak will be reported if OpenSSL is initialized for the first time from
+// a worker thread; calling this function from the main thread prevents this.
+void EnsureBackendInitialized();
+
 }  // namespace parquet::encryption
diff --git a/cpp/src/parquet/encryption/encryption_internal_nossl.cc 
b/cpp/src/parquet/encryption/encryption_internal_nossl.cc
index 0241923474..ead868643b 100644
--- a/cpp/src/parquet/encryption/encryption_internal_nossl.cc
+++ b/cpp/src/parquet/encryption/encryption_internal_nossl.cc
@@ -114,4 +114,6 @@ void QuickUpdatePageAad(int32_t new_page_ordinal, 
std::string* AAD) {
 
 void RandBytes(unsigned char* buf, int num) { ThrowOpenSSLRequiredException(); 
}
 
+void EnsureBackendInitialized() {}
+
 }  // namespace parquet::encryption
diff --git a/cpp/src/parquet/encryption/openssl_internal.cc 
b/cpp/src/parquet/encryption/openssl_internal.cc
new file mode 100644
index 0000000000..05f2773532
--- /dev/null
+++ b/cpp/src/parquet/encryption/openssl_internal.cc
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/encryption/openssl_internal.h"
+
+#include <openssl/crypto.h>
+
+#include "parquet/exception.h"
+
+namespace parquet::encryption::openssl {
+
+void EnsureInitialized() {
+  // Initialize ciphers and random engines
+  if (!OPENSSL_init_crypto(OPENSSL_INIT_ENGINE_ALL_BUILTIN | 
OPENSSL_INIT_ADD_ALL_CIPHERS,
+                           NULL)) {
+    throw ParquetException("OpenSSL initialization failed");
+  }
+}
+
+}  // namespace parquet::encryption::openssl
diff --git a/cpp/src/parquet/encryption/openssl_internal.h 
b/cpp/src/parquet/encryption/openssl_internal.h
new file mode 100644
index 0000000000..096aaf9640
--- /dev/null
+++ b/cpp/src/parquet/encryption/openssl_internal.h
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace parquet::encryption::openssl {
+
+void EnsureInitialized();
+
+}  // namespace parquet::encryption::openssl

Reply via email to