This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new d586ed1981 GH-49752: [C++][Gandiva] Fix potential buffer overrun in
Gandiva SSL function (#49780)
d586ed1981 is described below
commit d586ed1981e794b634be63e819855fc6ca86c8ed
Author: Logan Riggs <[email protected]>
AuthorDate: Mon Jun 1 18:18:19 2026 -0700
GH-49752: [C++][Gandiva] Fix potential buffer overrun in Gandiva SSL
function (#49780)
### Rationale for this change
Fixes security related problems found in gdv_hash_using_openssl. Those
problems were not deemed to be a security risk.
### What changes are included in this PR?
**[hash_utils.h:41, hash_utils.cc:66]** Removed GANDIVA_EXPORT from
gdv_hash_using_openssl — it's an internal helper, not part of the public API.
**[hash_utils.cc:105]** Changed && → || in the validation condition. The
original only errored when both checks failed; now it errors when either
result_length != hash_digest_size or result_buf_size != (2 * hash_digest_size).
**[hash_utils.cc:135]** Fixed snprintf buffer size, so it correctly
accounts for the already-written bytes and prevents potential out-of-bounds
writes. Allocate result_buf_size + 1 bytes — the extra byte absorbs the final
null terminator. Pass result_buf_size - result_buff_index + 1 to snprintf —
reflects the actual remaining space (2 hex chars + 1 null = 3 bytes on the last
call), preventing any potential overflow if the format ever changed.
### Are these changes tested?
Yes, unit tests.
### Are there any user-facing changes?
No.
* GitHub Issue: #49752
Authored-by: [email protected] <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
cpp/src/gandiva/hash_utils.cc | 91 +++++++++++++++++++++++--------------------
cpp/src/gandiva/hash_utils.h | 6 ---
2 files changed, 48 insertions(+), 49 deletions(-)
diff --git a/cpp/src/gandiva/hash_utils.cc b/cpp/src/gandiva/hash_utils.cc
index ad856e1e23..c5631cf93a 100644
--- a/cpp/src/gandiva/hash_utils.cc
+++ b/cpp/src/gandiva/hash_utils.cc
@@ -21,49 +21,12 @@
#include "gandiva/gdv_function_stubs.h"
#include "openssl/evp.h"
-namespace gandiva {
-
-/// Hashes a generic message using the SHA512 algorithm
-GANDIVA_EXPORT
-const char* gdv_sha512_hash(int64_t context, const void* message, size_t
message_length,
- int32_t* out_length) {
- constexpr int sha512_result_length = 128;
- return gdv_hash_using_openssl(context, message, message_length, EVP_sha512(),
- sha512_result_length, out_length);
-}
-
-/// Hashes a generic message using the SHA256 algorithm
-GANDIVA_EXPORT
-const char* gdv_sha256_hash(int64_t context, const void* message, size_t
message_length,
- int32_t* out_length) {
- constexpr int sha256_result_length = 64;
- return gdv_hash_using_openssl(context, message, message_length, EVP_sha256(),
- sha256_result_length, out_length);
-}
-
-/// Hashes a generic message using the SHA1 algorithm
-GANDIVA_EXPORT
-const char* gdv_sha1_hash(int64_t context, const void* message, size_t
message_length,
- int32_t* out_length) {
- constexpr int sha1_result_length = 40;
- return gdv_hash_using_openssl(context, message, message_length, EVP_sha1(),
- sha1_result_length, out_length);
-}
-
-GANDIVA_EXPORT
-const char* gdv_md5_hash(int64_t context, const void* message, size_t
message_length,
- int32_t* out_length) {
- constexpr int md5_result_length = 32;
- return gdv_hash_using_openssl(context, message, message_length, EVP_md5(),
- md5_result_length, out_length);
-}
-
-/// \brief Hashes a generic message using SHA algorithm.
+namespace {
+/// \brief Hashes a generic message using a hash/digest algorithm.
///
/// It uses the EVP API in the OpenSSL library to generate
/// the hash. The type of the hash is defined by the
/// \b hash_type \b parameter.
-GANDIVA_EXPORT
const char* gdv_hash_using_openssl(int64_t context, const void* message,
size_t message_length, const EVP_MD*
hash_type,
uint32_t result_buf_size, int32_t*
out_length) {
@@ -100,9 +63,10 @@ const char* gdv_hash_using_openssl(int64_t context, const
void* message,
}
unsigned int result_length;
- EVP_DigestFinal_ex(md_ctx, result, &result_length);
+ int evp_result = EVP_DigestFinal_ex(md_ctx, result, &result_length);
- if (result_length != hash_digest_size && result_buf_size != (2 *
hash_digest_size)) {
+ if (evp_result != evp_success_status || result_length != hash_digest_size ||
+ result_buf_size != (2 * hash_digest_size)) {
gdv_fn_context_set_error_msg(context,
"Could not obtain the hash for the defined
value");
EVP_MD_CTX_free(md_ctx);
@@ -112,8 +76,10 @@ const char* gdv_hash_using_openssl(int64_t context, const
void* message,
return "";
}
+ // Allocate one extra byte beyond result_buf_size so that the null terminator
+ // written by the final snprintf call does not land past the end of the
buffer.
auto result_buffer =
- reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context,
result_buf_size));
+ reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context,
result_buf_size + 1));
if (result_buffer == nullptr) {
gdv_fn_context_set_error_msg(context,
@@ -132,7 +98,8 @@ const char* gdv_hash_using_openssl(int64_t context, const
void* message,
unsigned char hex_number = result[j];
result_buff_index +=
- snprintf(result_buffer + result_buff_index, result_buf_size, "%02x",
hex_number);
+ snprintf(result_buffer + result_buff_index,
+ result_buf_size - result_buff_index + 1, "%02x", hex_number);
}
// Free the resources used by the EVP to avoid memory leaks
@@ -142,6 +109,44 @@ const char* gdv_hash_using_openssl(int64_t context, const
void* message,
*out_length = result_buf_size;
return result_buffer;
}
+} // namespace
+
+namespace gandiva {
+
+/// Hashes a generic message using the SHA512 algorithm
+GANDIVA_EXPORT
+const char* gdv_sha512_hash(int64_t context, const void* message, size_t
message_length,
+ int32_t* out_length) {
+ constexpr int sha512_result_length = 128;
+ return gdv_hash_using_openssl(context, message, message_length, EVP_sha512(),
+ sha512_result_length, out_length);
+}
+
+/// Hashes a generic message using the SHA256 algorithm
+GANDIVA_EXPORT
+const char* gdv_sha256_hash(int64_t context, const void* message, size_t
message_length,
+ int32_t* out_length) {
+ constexpr int sha256_result_length = 64;
+ return gdv_hash_using_openssl(context, message, message_length, EVP_sha256(),
+ sha256_result_length, out_length);
+}
+
+/// Hashes a generic message using the SHA1 algorithm
+GANDIVA_EXPORT
+const char* gdv_sha1_hash(int64_t context, const void* message, size_t
message_length,
+ int32_t* out_length) {
+ constexpr int sha1_result_length = 40;
+ return gdv_hash_using_openssl(context, message, message_length, EVP_sha1(),
+ sha1_result_length, out_length);
+}
+
+GANDIVA_EXPORT
+const char* gdv_md5_hash(int64_t context, const void* message, size_t
message_length,
+ int32_t* out_length) {
+ constexpr int md5_result_length = 32;
+ return gdv_hash_using_openssl(context, message, message_length, EVP_md5(),
+ md5_result_length, out_length);
+}
GANDIVA_EXPORT
uint64_t gdv_double_to_long(double value) {
diff --git a/cpp/src/gandiva/hash_utils.h b/cpp/src/gandiva/hash_utils.h
index 06e988496b..0636184b29 100644
--- a/cpp/src/gandiva/hash_utils.h
+++ b/cpp/src/gandiva/hash_utils.h
@@ -21,7 +21,6 @@
#include <cstdint>
#include <cstdlib>
#include "gandiva/visibility.h"
-#include "openssl/evp.h"
namespace gandiva {
@@ -37,11 +36,6 @@ GANDIVA_EXPORT
const char* gdv_sha1_hash(int64_t context, const void* message, size_t
message_length,
int32_t* out_length);
-GANDIVA_EXPORT
-const char* gdv_hash_using_openssl(int64_t context, const void* message,
- size_t message_length, const EVP_MD*
hash_type,
- uint32_t result_buf_size, int32_t*
out_length);
-
GANDIVA_EXPORT
const char* gdv_md5_hash(int64_t context, const void* message, size_t
message_length,
int32_t* out_length);