This is an automated email from the ASF dual-hosted git repository.

etseidl pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new f790721187 fix(parquet): validate INT96 column metadata statistics 
(#10003)
f790721187 is described below

commit f7907211873fef5f80ae22b1c3779dd24041e940
Author: Minh Vu <[email protected]>
AuthorDate: Thu May 21 18:03:54 2026 +0200

    fix(parquet): validate INT96 column metadata statistics (#10003)
    
    # Which issue does this PR close?
    
    Closes #10002.
    
    # Rationale for this change
    
    Malformed Parquet footer metadata can contain INT96 statistics whose
    encoded min or max value is longer than 12 bytes. The footer metadata
    conversion path checked that INT96 statistics were at least 12 bytes,
    but then asserted they were exactly 12 bytes. That allowed malformed
    input to panic instead of returning an error.
    
    The page-statistics path already returns an error for non-12-byte INT96
    statistics, so this change makes the footer metadata path behave
    consistently.
    
    # What changes are included in this PR?
    
    This PR replaces the INT96 min/max length assertions in footer metadata
    statistics conversion with explicit `ParquetError` returns.
    
    It also adds a regression test covering overlong INT96 min and max
    values in column metadata statistics.
    
    # Are these changes tested?
    
    Yes. I ran:
    
    - `cargo fmt --all`
    - `cargo +stable fmt --all -- --check`
    - `cargo fmt -p parquet -- --check --config skip_children=true $(find
    ./parquet -name "*.rs" ! -name format.rs)`
    - `cargo test -p parquet --lib
    
file::metadata::thrift::tests::test_convert_stats_returns_error_for_overlong_int96_statistics`
    - `cargo test -p parquet --lib file::metadata::thrift::tests`
    - `cargo test -p parquet`
    - `cargo check -p parquet --all-targets`
    - `cargo clippy -p parquet --all-targets --all-features -- -D warnings`
    
    # Are there any user-facing changes?
    
    Malformed INT96 column metadata statistics now return an error instead
    of panicking.
---
 parquet/src/file/metadata/thrift/mod.rs | 48 +++++++++++++++++++++++++++++++--
 1 file changed, 46 insertions(+), 2 deletions(-)

diff --git a/parquet/src/file/metadata/thrift/mod.rs 
b/parquet/src/file/metadata/thrift/mod.rs
index 9be697c0fe..d5a0112a5e 100644
--- a/parquet/src/file/metadata/thrift/mod.rs
+++ b/parquet/src/file/metadata/thrift/mod.rs
@@ -274,13 +274,17 @@ fn convert_stats(
                 Type::INT96 => {
                     // INT96 statistics may not be correct, because comparison 
is signed
                     let min = if let Some(data) = min {
-                        assert_eq!(data.len(), 12);
+                        if data.len() != 12 {
+                            return Err(general_err!("Incorrect Int96 min 
statistics"));
+                        }
                         Some(Int96::try_from_le_slice(data)?)
                     } else {
                         None
                     };
                     let max = if let Some(data) = max {
-                        assert_eq!(data.len(), 12);
+                        if data.len() != 12 {
+                            return Err(general_err!("Incorrect Int96 max 
statistics"));
+                        }
                         Some(Int96::try_from_le_slice(data)?)
                     } else {
                         None
@@ -1916,6 +1920,46 @@ pub(crate) mod tests {
         assert_eq!(decoded_zero.null_count_opt(), Some(0));
     }
 
+    #[test]
+    fn test_convert_stats_returns_error_for_overlong_int96_statistics() {
+        let primitive =
+            crate::schema::types::Type::primitive_type_builder("col", 
PhysicalType::INT96)
+                .build()
+                .unwrap();
+        let column_descr = Arc::new(ColumnDescriptor::new(
+            Arc::new(primitive),
+            0,
+            0,
+            ColumnPath::new(vec![]),
+        ));
+        let invalid = (0..13).collect::<Vec<_>>();
+
+        let make_stats = |min, max| super::Statistics {
+            max,
+            min,
+            null_count: Some(0),
+            distinct_count: None,
+            max_value: None,
+            min_value: None,
+            is_max_value_exact: None,
+            is_min_value_exact: None,
+        };
+
+        let err = super::convert_stats(&column_descr, 
Some(make_stats(Some(&invalid), None)))
+            .unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Parquet error: Incorrect Int96 min statistics"
+        );
+
+        let err = super::convert_stats(&column_descr, Some(make_stats(None, 
Some(&invalid))))
+            .unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Parquet error: Incorrect Int96 max statistics"
+        );
+    }
+
     #[test]
     fn malformed_bool_field_returns_error_not_panic() {
         let page_header = PageHeader {

Reply via email to