This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 4e4a430fd3 fix(ipc): Avoid panic on malformed compressed buffer prefix
(#9802)
4e4a430fd3 is described below
commit 4e4a430fd380bbc2a5ac55d62759e70f0da07fe6
Author: pchintar <[email protected]>
AuthorDate: Sat Apr 25 11:21:01 2026 -0400
fix(ipc): Avoid panic on malformed compressed buffer prefix (#9802)
# Which issue does this PR close?
- Closes #9801 .
# Rationale for this change
The IPC format specifies that compressed buffers are encoded as:
> `[8 bytes uncompressed length] + compressed data`
The current implementation assumes this invariant when reading the
prefix during decompression.
However, in the reader path, buffers are constructed from metadata
(`offset`, `length`) and passed to the decompression logic without
validating that they contain at least the required 8-byte prefix.
In contrast, Parquet defensively validates input before reading
fixed-size prefixes, e.g.:
```rust
while input_len >= PREFIX_LEN { ... }
if input_len < expected_compressed_size as usize { return Err(...) }
```
This ensures malformed or truncated input does not cause panics.
In IPC, however, the prefix is read without a prior length check, so
malformed/truncated input (buffer < 8 bytes) can lead to a panic instead
of returning an error.
# What changes are included in this PR?
* Add a bounds check in `read_uncompressed_size` to ensure `buffer.len()
>= 8`
* Return `ArrowError::IpcError` when the prefix is missing
* Propagate the error in `decompress_to_buffer`
# Are these changes tested?
Yes.
* Added `test_read_uncompressed_size_rejects_short_prefix`
* Verifies that a buffer shorter than 8 bytes returns an error instead
of panicking
* Existing compression tests remain unchanged and pass
# Are there any user-facing changes?
No.
---
arrow-ipc/src/compression.rs | 27 ++++++++++++++++++++++-----
1 file changed, 22 insertions(+), 5 deletions(-)
diff --git a/arrow-ipc/src/compression.rs b/arrow-ipc/src/compression.rs
index a4cf99cb86..ff6e83dfdd 100644
--- a/arrow-ipc/src/compression.rs
+++ b/arrow-ipc/src/compression.rs
@@ -183,7 +183,7 @@ impl CompressionCodec {
) -> Result<Buffer, ArrowError> {
// read the first 8 bytes to determine if the data is
// compressed
- let decompressed_length = read_uncompressed_size(input);
+ let decompressed_length = read_uncompressed_size(input)?;
let buffer = if decompressed_length == 0 {
// empty
Buffer::from([])
@@ -326,11 +326,16 @@ fn decompress_zstd(
/// LENGTH_NO_COMPRESSED_DATA: indicate that the data that follows is not
compressed
/// 0: indicate that there is no data
/// positive number: indicate the uncompressed length for the following data
+/// Returns an error if the input buffer is shorter than 8 bytes
#[inline]
-fn read_uncompressed_size(buffer: &[u8]) -> i64 {
- let len_buffer = &buffer[0..8];
- // 64-bit little-endian signed integer
- i64::from_le_bytes(len_buffer.try_into().unwrap())
+fn read_uncompressed_size(buffer: &[u8]) -> Result<i64, ArrowError> {
+ let len_buffer = buffer.get(..LENGTH_OF_PREFIX_DATA as
usize).ok_or_else(|| {
+ ArrowError::IpcError(format!(
+ "Compressed IPC buffer is too short: expected at least
{LENGTH_OF_PREFIX_DATA} bytes, got {}",
+ buffer.len()
+ ))
+ })?;
+ Ok(i64::from_le_bytes(len_buffer.try_into().unwrap()))
}
#[cfg(test)]
@@ -372,4 +377,16 @@ mod tests {
.unwrap();
assert_eq!(input_bytes, result.as_slice());
}
+
+ #[test]
+ fn test_read_uncompressed_size_rejects_short_prefix() {
+ let err = super::read_uncompressed_size(&[1, 2, 3, 4, 5, 6, 7])
+ .expect_err("short compressed IPC prefix should return an error");
+
+ assert!(
+ err.to_string()
+ .contains("Compressed IPC buffer is too short"),
+ "unexpected error: {err}"
+ );
+ }
}