This is an automated email from the ASF dual-hosted git repository.

pitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new adecb17f39 GH-49803: [C++][CI] Avoid aborting when fuzz-mutated IPC 
file has less batches (#49804)
adecb17f39 is described below

commit adecb17f390aded6a2af8f5333ee10bd7a09f07f
Author: Antoine Pitrou <[email protected]>
AuthorDate: Tue Apr 21 09:01:05 2026 +0200

    GH-49803: [C++][CI] Avoid aborting when fuzz-mutated IPC file has less 
batches (#49804)
    
    ### Rationale for this change
    
    The IPC file fuzzer can remove batches from the IPC file footer while the 
batches do exist physically in the embedded IPC stream.
    
    This causes problem in differential fuzzing, because comparing the results 
of the IPC stream reader against the IPC file reader then fails.
    
    This issue was found by OSS-Fuzz: 
https://issues.oss-fuzz.com/issues/503357759
    
    ### What changes are included in this PR?
    
    Skip differential fuzzing against the IPC stream reader if the number of 
batches read from the IPC file footer is not equal.
    
    ### Are these changes tested?
    
    Yes, by existing fuzz regression file.
    
    ### Are there any user-facing changes?
    
    No.
    
    * GitHub Issue: #49803
    
    Authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/ipc/reader.cc | 27 +++++++++++++++++----------
 testing                     |  2 +-
 2 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 5800813843..f7b9c779ab 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -2867,19 +2867,26 @@ Status FuzzIpcFile(const uint8_t* data, int64_t size) {
     auto maybe_stream_result = do_stream_read();
     final_status &= maybe_stream_result.status();
     if (maybe_stream_result.ok()) {
-      if (maybe_read_result->schema->Equals(maybe_stream_result->schema,
-                                            /*check_metadata=*/true)) {
-        // XXX: in some rare cases, an IPC file might read unequal to the 
enclosed
-        // IPC stream, for example if the footer skips some batches or orders 
the
-        // batches differently. We should revisit this if the fuzzer generates 
such
-        // files.
-        compare_result(*maybe_stream_result);
-      } else {
-        // The fuzzer might have mutated the schema definition that is 
duplicated
-        // in the IPC file footer, in which case the comparison above would 
fail.
+      if (!maybe_read_result->schema->Equals(maybe_stream_result->schema,
+                                             /*check_metadata=*/true)) {
+        // The fuzzer may have mutated the schema definition that is duplicated
+        // in the IPC file footer, in which case the comparison would fail.
         final_status &= Status::TypeError(
             "Schema mismatch between IPC stream and IPC file footer, skipping "
             "comparison");
+      } else if (maybe_read_result->batches.size() !=
+                 maybe_stream_result->batches.size()) {
+        // The footer of a fuzzer-mutated IPC file might have added or removed 
some
+        // batches that are physically present in the IPC stream. In this case 
we
+        // don't want to abort with a comparison failure.
+        // XXX There might be more elaborate cases where the fuzzer reorders
+        // batches in the IPC file without adding or removing any, which is 
going
+        // to be considerably more difficult to detect.
+        final_status &= Status::Invalid(
+            "Different number of batches between IPC stream and IPC file 
footer, "
+            "skipping comparison");
+      } else {
+        compare_result(*maybe_stream_result);
       }
     }
   }
diff --git a/testing b/testing
index 249079a810..190638e1b1 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit 249079a810caedda6898464003c7ef8a47efeeae
+Subproject commit 190638e1b14af926601dbe0a95caa1940dafedd8

Reply via email to