This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new d586ed1981 GH-49752: [C++][Gandiva] Fix potential buffer overrun in 
Gandiva SSL function (#49780)
d586ed1981 is described below

commit d586ed1981e794b634be63e819855fc6ca86c8ed
Author: Logan Riggs <[email protected]>
AuthorDate: Mon Jun 1 18:18:19 2026 -0700

    GH-49752: [C++][Gandiva] Fix potential buffer overrun in Gandiva SSL 
function (#49780)
    
    ### Rationale for this change
    Fixes security related problems found in gdv_hash_using_openssl. Those 
problems were not deemed to be a security risk.
    
    ### What changes are included in this PR?
    
    **[hash_utils.h:41, hash_utils.cc:66]** Removed GANDIVA_EXPORT from 
gdv_hash_using_openssl — it's an internal helper, not part of the public API.
    
    **[hash_utils.cc:105]** Changed && → || in the validation condition. The 
original only errored when both checks failed; now it errors when either 
result_length != hash_digest_size or result_buf_size != (2 * hash_digest_size).
    
    **[hash_utils.cc:135]** Fixed snprintf buffer size, so it correctly 
accounts for the already-written bytes and prevents potential out-of-bounds 
writes. Allocate result_buf_size + 1 bytes — the extra byte absorbs the final 
null terminator. Pass result_buf_size - result_buff_index + 1 to snprintf — 
reflects the actual remaining space (2 hex chars + 1 null = 3 bytes on the last 
call), preventing any potential overflow if the format ever changed.
    
    ### Are these changes tested?
    Yes, unit tests.
    
    ### Are there any user-facing changes?
    No.
    * GitHub Issue: #49752
    
    Authored-by: [email protected] <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 cpp/src/gandiva/hash_utils.cc | 91 +++++++++++++++++++++++--------------------
 cpp/src/gandiva/hash_utils.h  |  6 ---
 2 files changed, 48 insertions(+), 49 deletions(-)

diff --git a/cpp/src/gandiva/hash_utils.cc b/cpp/src/gandiva/hash_utils.cc
index ad856e1e23..c5631cf93a 100644
--- a/cpp/src/gandiva/hash_utils.cc
+++ b/cpp/src/gandiva/hash_utils.cc
@@ -21,49 +21,12 @@
 #include "gandiva/gdv_function_stubs.h"
 #include "openssl/evp.h"
 
-namespace gandiva {
-
-/// Hashes a generic message using the SHA512 algorithm
-GANDIVA_EXPORT
-const char* gdv_sha512_hash(int64_t context, const void* message, size_t 
message_length,
-                            int32_t* out_length) {
-  constexpr int sha512_result_length = 128;
-  return gdv_hash_using_openssl(context, message, message_length, EVP_sha512(),
-                                sha512_result_length, out_length);
-}
-
-/// Hashes a generic message using the SHA256 algorithm
-GANDIVA_EXPORT
-const char* gdv_sha256_hash(int64_t context, const void* message, size_t 
message_length,
-                            int32_t* out_length) {
-  constexpr int sha256_result_length = 64;
-  return gdv_hash_using_openssl(context, message, message_length, EVP_sha256(),
-                                sha256_result_length, out_length);
-}
-
-/// Hashes a generic message using the SHA1 algorithm
-GANDIVA_EXPORT
-const char* gdv_sha1_hash(int64_t context, const void* message, size_t 
message_length,
-                          int32_t* out_length) {
-  constexpr int sha1_result_length = 40;
-  return gdv_hash_using_openssl(context, message, message_length, EVP_sha1(),
-                                sha1_result_length, out_length);
-}
-
-GANDIVA_EXPORT
-const char* gdv_md5_hash(int64_t context, const void* message, size_t 
message_length,
-                         int32_t* out_length) {
-  constexpr int md5_result_length = 32;
-  return gdv_hash_using_openssl(context, message, message_length, EVP_md5(),
-                                md5_result_length, out_length);
-}
-
-/// \brief Hashes a generic message using SHA algorithm.
+namespace {
+/// \brief Hashes a generic message using a hash/digest algorithm.
 ///
 /// It uses the EVP API in the OpenSSL library to generate
 /// the hash. The type of the hash is defined by the
 /// \b hash_type \b parameter.
-GANDIVA_EXPORT
 const char* gdv_hash_using_openssl(int64_t context, const void* message,
                                    size_t message_length, const EVP_MD* 
hash_type,
                                    uint32_t result_buf_size, int32_t* 
out_length) {
@@ -100,9 +63,10 @@ const char* gdv_hash_using_openssl(int64_t context, const 
void* message,
   }
 
   unsigned int result_length;
-  EVP_DigestFinal_ex(md_ctx, result, &result_length);
+  int evp_result = EVP_DigestFinal_ex(md_ctx, result, &result_length);
 
-  if (result_length != hash_digest_size && result_buf_size != (2 * 
hash_digest_size)) {
+  if (evp_result != evp_success_status || result_length != hash_digest_size ||
+      result_buf_size != (2 * hash_digest_size)) {
     gdv_fn_context_set_error_msg(context,
                                  "Could not obtain the hash for the defined 
value");
     EVP_MD_CTX_free(md_ctx);
@@ -112,8 +76,10 @@ const char* gdv_hash_using_openssl(int64_t context, const 
void* message,
     return "";
   }
 
+  // Allocate one extra byte beyond result_buf_size so that the null terminator
+  // written by the final snprintf call does not land past the end of the 
buffer.
   auto result_buffer =
-      reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
result_buf_size));
+      reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 
result_buf_size + 1));
 
   if (result_buffer == nullptr) {
     gdv_fn_context_set_error_msg(context,
@@ -132,7 +98,8 @@ const char* gdv_hash_using_openssl(int64_t context, const 
void* message,
 
     unsigned char hex_number = result[j];
     result_buff_index +=
-        snprintf(result_buffer + result_buff_index, result_buf_size, "%02x", 
hex_number);
+        snprintf(result_buffer + result_buff_index,
+                 result_buf_size - result_buff_index + 1, "%02x", hex_number);
   }
 
   // Free the resources used by the EVP to avoid memory leaks
@@ -142,6 +109,44 @@ const char* gdv_hash_using_openssl(int64_t context, const 
void* message,
   *out_length = result_buf_size;
   return result_buffer;
 }
+}  // namespace
+
+namespace gandiva {
+
+/// Hashes a generic message using the SHA512 algorithm
+GANDIVA_EXPORT
+const char* gdv_sha512_hash(int64_t context, const void* message, size_t 
message_length,
+                            int32_t* out_length) {
+  constexpr int sha512_result_length = 128;
+  return gdv_hash_using_openssl(context, message, message_length, EVP_sha512(),
+                                sha512_result_length, out_length);
+}
+
+/// Hashes a generic message using the SHA256 algorithm
+GANDIVA_EXPORT
+const char* gdv_sha256_hash(int64_t context, const void* message, size_t 
message_length,
+                            int32_t* out_length) {
+  constexpr int sha256_result_length = 64;
+  return gdv_hash_using_openssl(context, message, message_length, EVP_sha256(),
+                                sha256_result_length, out_length);
+}
+
+/// Hashes a generic message using the SHA1 algorithm
+GANDIVA_EXPORT
+const char* gdv_sha1_hash(int64_t context, const void* message, size_t 
message_length,
+                          int32_t* out_length) {
+  constexpr int sha1_result_length = 40;
+  return gdv_hash_using_openssl(context, message, message_length, EVP_sha1(),
+                                sha1_result_length, out_length);
+}
+
+GANDIVA_EXPORT
+const char* gdv_md5_hash(int64_t context, const void* message, size_t 
message_length,
+                         int32_t* out_length) {
+  constexpr int md5_result_length = 32;
+  return gdv_hash_using_openssl(context, message, message_length, EVP_md5(),
+                                md5_result_length, out_length);
+}
 
 GANDIVA_EXPORT
 uint64_t gdv_double_to_long(double value) {
diff --git a/cpp/src/gandiva/hash_utils.h b/cpp/src/gandiva/hash_utils.h
index 06e988496b..0636184b29 100644
--- a/cpp/src/gandiva/hash_utils.h
+++ b/cpp/src/gandiva/hash_utils.h
@@ -21,7 +21,6 @@
 #include <cstdint>
 #include <cstdlib>
 #include "gandiva/visibility.h"
-#include "openssl/evp.h"
 
 namespace gandiva {
 
@@ -37,11 +36,6 @@ GANDIVA_EXPORT
 const char* gdv_sha1_hash(int64_t context, const void* message, size_t 
message_length,
                           int32_t* out_length);
 
-GANDIVA_EXPORT
-const char* gdv_hash_using_openssl(int64_t context, const void* message,
-                                   size_t message_length, const EVP_MD* 
hash_type,
-                                   uint32_t result_buf_size, int32_t* 
out_length);
-
 GANDIVA_EXPORT
 const char* gdv_md5_hash(int64_t context, const void* message, size_t 
message_length,
                          int32_t* out_length);

Reply via email to