This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new fd57b7ae1c GH-38304: [C++][Parquet] Fix Valgrind memory leak in
arrow-dataset-file-parquet-encryption-test (#38306)
fd57b7ae1c is described below
commit fd57b7ae1cc29bad579dc0d2661f3417bada0334
Author: Antoine Pitrou <[email protected]>
AuthorDate: Wed Oct 18 12:04:50 2023 +0200
GH-38304: [C++][Parquet] Fix Valgrind memory leak in
arrow-dataset-file-parquet-encryption-test (#38306)
If OpenSSL initializes itself from a non-main thread, it can fail
deallocating all memory at shutdown.
This is really a benign leak, but we don't want any spurious CI errors.
* Closes: #38304
Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
.../arrow/dataset/file_parquet_encryption_test.cc | 8 +++++
cpp/src/parquet/CMakeLists.txt | 3 +-
cpp/src/parquet/encryption/encryption_internal.cc | 13 ++++++++-
cpp/src/parquet/encryption/encryption_internal.h | 8 +++++
.../encryption/encryption_internal_nossl.cc | 2 ++
cpp/src/parquet/encryption/openssl_internal.cc | 34 ++++++++++++++++++++++
cpp/src/parquet/encryption/openssl_internal.h | 28 ++++++++++++++++++
7 files changed, 94 insertions(+), 2 deletions(-)
diff --git a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc
b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc
index 572f15e100..87028eb6e2 100644
--- a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc
+++ b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc
@@ -34,6 +34,7 @@
#include "parquet/arrow/reader.h"
#include "parquet/encryption/crypto_factory.h"
#include "parquet/encryption/encryption.h"
+#include "parquet/encryption/encryption_internal.h"
#include "parquet/encryption/kms_client.h"
#include "parquet/encryption/test_in_memory_kms.h"
@@ -58,6 +59,13 @@ class DatasetEncryptionTest : public ::testing::Test {
// partitioning scheme. The function also checks if the written files exist
in the file
// system.
static void SetUpTestSuite() {
+#ifdef ARROW_VALGRIND
+ // Not necessary otherwise, but prevents a Valgrind leak by making sure
+ // OpenSSL initialization is done from the main thread
+ // (see GH-38304 for analysis).
+ ::parquet::encryption::EnsureBackendInitialized();
+#endif
+
// Creates a mock file system using the current time point.
EXPECT_OK_AND_ASSIGN(file_system_, fs::internal::MockFileSystem::Make(
std::chrono::system_clock::now(),
{}));
diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index 3cdd24dbe9..0d04ec3e30 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -234,7 +234,8 @@ if(ARROW_HAVE_RUNTIME_AVX2)
endif()
if(PARQUET_REQUIRE_ENCRYPTION)
- set(PARQUET_SRCS ${PARQUET_SRCS} encryption/encryption_internal.cc)
+ set(PARQUET_SRCS ${PARQUET_SRCS} encryption/encryption_internal.cc
+ encryption/openssl_internal.cc)
# Encryption key management
set(PARQUET_SRCS
${PARQUET_SRCS}
diff --git a/cpp/src/parquet/encryption/encryption_internal.cc
b/cpp/src/parquet/encryption/encryption_internal.cc
index 6e66efeff6..b1770be533 100644
--- a/cpp/src/parquet/encryption/encryption_internal.cc
+++ b/cpp/src/parquet/encryption/encryption_internal.cc
@@ -16,6 +16,7 @@
// under the License.
#include "parquet/encryption/encryption_internal.h"
+
#include <openssl/aes.h>
#include <openssl/evp.h>
#include <openssl/rand.h>
@@ -27,6 +28,7 @@
#include <string>
#include <vector>
+#include "parquet/encryption/openssl_internal.h"
#include "parquet/exception.h"
using parquet::ParquetException;
@@ -92,6 +94,8 @@ class AesEncryptor::AesEncryptorImpl {
AesEncryptor::AesEncryptorImpl::AesEncryptorImpl(ParquetCipher::type alg_id,
int key_len,
bool metadata, bool
write_length) {
+ openssl::EnsureInitialized();
+
ctx_ = nullptr;
length_buffer_length_ = write_length ? kBufferSizeLength : 0;
@@ -358,6 +362,8 @@ AesDecryptor::~AesDecryptor() {}
AesDecryptor::AesDecryptorImpl::AesDecryptorImpl(ParquetCipher::type alg_id,
int key_len,
bool metadata, bool
contains_length) {
+ openssl::EnsureInitialized();
+
ctx_ = nullptr;
length_buffer_length_ = contains_length ? kBufferSizeLength : 0;
ciphertext_size_delta_ = length_buffer_length_ + kNonceLength;
@@ -646,6 +652,11 @@ void QuickUpdatePageAad(int32_t new_page_ordinal,
std::string* AAD) {
std::memcpy(AAD->data() + AAD->length() - 2, page_ordinal_bytes.data(), 2);
}
-void RandBytes(unsigned char* buf, int num) { RAND_bytes(buf, num); }
+void RandBytes(unsigned char* buf, int num) {
+ openssl::EnsureInitialized();
+ RAND_bytes(buf, num);
+}
+
+void EnsureBackendInitialized() { openssl::EnsureInitialized(); }
} // namespace parquet::encryption
diff --git a/cpp/src/parquet/encryption/encryption_internal.h
b/cpp/src/parquet/encryption/encryption_internal.h
index 77921d8731..1bdf47c56f 100644
--- a/cpp/src/parquet/encryption/encryption_internal.h
+++ b/cpp/src/parquet/encryption/encryption_internal.h
@@ -130,4 +130,12 @@ void QuickUpdatePageAad(int32_t new_page_ordinal,
std::string* AAD);
// Wraps OpenSSL RAND_bytes function
void RandBytes(unsigned char* buf, int num);
+// Ensure OpenSSL is initialized.
+//
+// This is only necessary in specific situations since OpenSSL otherwise
+// initializes itself automatically. For example, under Valgrind, a memory
+// leak will be reported if OpenSSL is initialized for the first time from
+// a worker thread; calling this function from the main thread prevents this.
+void EnsureBackendInitialized();
+
} // namespace parquet::encryption
diff --git a/cpp/src/parquet/encryption/encryption_internal_nossl.cc
b/cpp/src/parquet/encryption/encryption_internal_nossl.cc
index 0241923474..ead868643b 100644
--- a/cpp/src/parquet/encryption/encryption_internal_nossl.cc
+++ b/cpp/src/parquet/encryption/encryption_internal_nossl.cc
@@ -114,4 +114,6 @@ void QuickUpdatePageAad(int32_t new_page_ordinal,
std::string* AAD) {
void RandBytes(unsigned char* buf, int num) { ThrowOpenSSLRequiredException();
}
+void EnsureBackendInitialized() {}
+
} // namespace parquet::encryption
diff --git a/cpp/src/parquet/encryption/openssl_internal.cc
b/cpp/src/parquet/encryption/openssl_internal.cc
new file mode 100644
index 0000000000..05f2773532
--- /dev/null
+++ b/cpp/src/parquet/encryption/openssl_internal.cc
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "parquet/encryption/openssl_internal.h"
+
+#include <openssl/crypto.h>
+
+#include "parquet/exception.h"
+
+namespace parquet::encryption::openssl {
+
+void EnsureInitialized() {
+ // Initialize ciphers and random engines
+ if (!OPENSSL_init_crypto(OPENSSL_INIT_ENGINE_ALL_BUILTIN |
OPENSSL_INIT_ADD_ALL_CIPHERS,
+ NULL)) {
+ throw ParquetException("OpenSSL initialization failed");
+ }
+}
+
+} // namespace parquet::encryption::openssl
diff --git a/cpp/src/parquet/encryption/openssl_internal.h
b/cpp/src/parquet/encryption/openssl_internal.h
new file mode 100644
index 0000000000..096aaf9640
--- /dev/null
+++ b/cpp/src/parquet/encryption/openssl_internal.h
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace parquet::encryption::openssl {
+
+void EnsureInitialized();
+
+} // namespace parquet::encryption::openssl