This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 4e4a430fd3 fix(ipc): Avoid panic on malformed compressed buffer prefix 
(#9802)
4e4a430fd3 is described below

commit 4e4a430fd380bbc2a5ac55d62759e70f0da07fe6
Author: pchintar <[email protected]>
AuthorDate: Sat Apr 25 11:21:01 2026 -0400

    fix(ipc): Avoid panic on malformed compressed buffer prefix (#9802)
    
    # Which issue does this PR close?
    
    - Closes #9801 .
    
    # Rationale for this change
    
    The IPC format specifies that compressed buffers are encoded as:
    
    > `[8 bytes uncompressed length] + compressed data`
    
    The current implementation assumes this invariant when reading the
    prefix during decompression.
    
    However, in the reader path, buffers are constructed from metadata
    (`offset`, `length`) and passed to the decompression logic without
    validating that they contain at least the required 8-byte prefix.
    
    In contrast, Parquet defensively validates input before reading
    fixed-size prefixes, e.g.:
    
    ```rust
    while input_len >= PREFIX_LEN { ... }
    if input_len < expected_compressed_size as usize { return Err(...) }
    ```
    
    This ensures malformed or truncated input does not cause panics.
    
    In IPC, however, the prefix is read without a prior length check, so
    malformed/truncated input (buffer < 8 bytes) can lead to a panic instead
    of returning an error.
    
    # What changes are included in this PR?
    
    * Add a bounds check in `read_uncompressed_size` to ensure `buffer.len()
    >= 8`
    * Return `ArrowError::IpcError` when the prefix is missing
    * Propagate the error in `decompress_to_buffer`
    
    # Are these changes tested?
    
    Yes.
    
    * Added `test_read_uncompressed_size_rejects_short_prefix`
    
    * Verifies that a buffer shorter than 8 bytes returns an error instead
    of panicking
    * Existing compression tests remain unchanged and pass
    
    # Are there any user-facing changes?
    
    No.
---
 arrow-ipc/src/compression.rs | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/arrow-ipc/src/compression.rs b/arrow-ipc/src/compression.rs
index a4cf99cb86..ff6e83dfdd 100644
--- a/arrow-ipc/src/compression.rs
+++ b/arrow-ipc/src/compression.rs
@@ -183,7 +183,7 @@ impl CompressionCodec {
     ) -> Result<Buffer, ArrowError> {
         // read the first 8 bytes to determine if the data is
         // compressed
-        let decompressed_length = read_uncompressed_size(input);
+        let decompressed_length = read_uncompressed_size(input)?;
         let buffer = if decompressed_length == 0 {
             // empty
             Buffer::from([])
@@ -326,11 +326,16 @@ fn decompress_zstd(
 ///   LENGTH_NO_COMPRESSED_DATA: indicate that the data that follows is not 
compressed
 ///    0: indicate that there is no data
 ///   positive number: indicate the uncompressed length for the following data
+/// Returns an error if the input buffer is shorter than 8 bytes
 #[inline]
-fn read_uncompressed_size(buffer: &[u8]) -> i64 {
-    let len_buffer = &buffer[0..8];
-    // 64-bit little-endian signed integer
-    i64::from_le_bytes(len_buffer.try_into().unwrap())
+fn read_uncompressed_size(buffer: &[u8]) -> Result<i64, ArrowError> {
+    let len_buffer = buffer.get(..LENGTH_OF_PREFIX_DATA as 
usize).ok_or_else(|| {
+        ArrowError::IpcError(format!(
+            "Compressed IPC buffer is too short: expected at least 
{LENGTH_OF_PREFIX_DATA} bytes, got {}",
+            buffer.len()
+        ))
+    })?;
+    Ok(i64::from_le_bytes(len_buffer.try_into().unwrap()))
 }
 
 #[cfg(test)]
@@ -372,4 +377,16 @@ mod tests {
             .unwrap();
         assert_eq!(input_bytes, result.as_slice());
     }
+
+    #[test]
+    fn test_read_uncompressed_size_rejects_short_prefix() {
+        let err = super::read_uncompressed_size(&[1, 2, 3, 4, 5, 6, 7])
+            .expect_err("short compressed IPC prefix should return an error");
+
+        assert!(
+            err.to_string()
+                .contains("Compressed IPC buffer is too short"),
+            "unexpected error: {err}"
+        );
+    }
 }

Reply via email to