This is an automated email from the ASF dual-hosted git repository.

pitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new ed536d18f7 GH-50149: [C++][Parquet] Avoid process abort when encoding 
fuzzer encounters OOM (#50150)
ed536d18f7 is described below

commit ed536d18f7afc2c2964211c3625ffd29bf3e8ded
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Jun 11 10:02:51 2026 +0200

    GH-50149: [C++][Parquet] Avoid process abort when encoding fuzzer 
encounters OOM (#50150)
    
    ### Rationale for this change
    
    In the Parquet encoding fuzzer, an OOM error when trying to roundtrip the 
encoding payload leads to a hard error that is reported as an issue on 
OSS-Fuzz. This should be converted to a soft error, i.e. a potential log 
message but not a process abort.
    
    ### Are these changes tested?
    
    By additional fuzz regression file as well as manually.
    
    ### Are there any user-facing changes?
    
    No.
    
    * GitHub Issue: #50149
    
    Authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/parquet/arrow/fuzz_encoding_internal.cc | 32 +++++++++++++++----------
 testing                                         |  2 +-
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/cpp/src/parquet/arrow/fuzz_encoding_internal.cc 
b/cpp/src/parquet/arrow/fuzz_encoding_internal.cc
index 4270eb5437..a007739116 100644
--- a/cpp/src/parquet/arrow/fuzz_encoding_internal.cc
+++ b/cpp/src/parquet/arrow/fuzz_encoding_internal.cc
@@ -290,23 +290,24 @@ struct TypedFuzzEncoding {
     }
 
     // Re-encode and re-decode using roundtrip encoding
-    {
-      auto compare_chunk = [&](int offset, std::span<const c_type> 
chunk_values) {
-        return CompareChunkAgainstReference(offset, chunk_values);
-      };
+    auto compare_chunk = [&](int offset, std::span<const c_type> chunk_values) 
{
+      return CompareChunkAgainstReference(offset, chunk_values);
+    };
+    auto do_roundtrip = [&]() -> Status {
       auto encoder = MakeEncoder(roundtrip_encoding_);
       BEGIN_PARQUET_CATCH_EXCEPTIONS
       if constexpr (arrow_supported()) {
         encoder->Put(*reference_array_);
         auto reencoded_buffer = encoder->FlushValues();
         auto reencoded_data = reencoded_buffer->template span_as<uint8_t>();
-        auto array = DecodeArrow(roundtrip_encoding_, 
reencoded_data).ValueOrDie();
-        ARROW_CHECK_OK(array->ValidateFull());
-        ARROW_CHECK_OK(CompareAgainstReference(array));
+        ARROW_ASSIGN_OR_RAISE(auto array,
+                              DecodeArrow(roundtrip_encoding_, 
reencoded_data));
+        RETURN_NOT_OK(array->ValidateFull());
+        RETURN_NOT_OK(CompareAgainstReference(array));
         // Compare with reading raw values
         for (const int chunk_size : chunk_sizes()) {
-          ARROW_CHECK_OK(RunOnDecodedChunks(roundtrip_encoding_, 
reencoded_data,
-                                            chunk_size, compare_chunk));
+          RETURN_NOT_OK(RunOnDecodedChunks(roundtrip_encoding_, reencoded_data,
+                                           chunk_size, compare_chunk));
         }
       } else {
         encoder->Put(reference_values_.data(),
@@ -315,14 +316,19 @@ struct TypedFuzzEncoding {
         auto reencoded_data = reencoded_buffer->template span_as<uint8_t>();
         // Vary chunk sizes
         for (const int chunk_size : chunk_sizes()) {
-          ARROW_CHECK_OK(RunOnDecodedChunks(roundtrip_encoding_, 
reencoded_data,
-                                            chunk_size, compare_chunk));
+          RETURN_NOT_OK(RunOnDecodedChunks(roundtrip_encoding_, reencoded_data,
+                                           chunk_size, compare_chunk));
         }
       }
       END_PARQUET_CATCH_EXCEPTIONS
+      return Status::OK();
+    };
+    Status roundtrip_status = do_roundtrip();
+    // OOM when attempting to roundtrip is not a hard failure, any other error 
is.
+    if (!roundtrip_status.IsOutOfMemory()) {
+      ARROW_CHECK_OK(roundtrip_status);
     }
-
-    return Status::OK();
+    return roundtrip_status;
   }
 
  protected:
diff --git a/testing b/testing
index 9cfebfef89..1d74fce2b6 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit 9cfebfef8982fb8612e0a2c59059752bd32321a3
+Subproject commit 1d74fce2b6bb30158f254fc292252f4a87fc67a3

Reply via email to