This is an automated email from the ASF dual-hosted git repository.
CTTY pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git
The following commit(s) were added to refs/heads/main by this push:
new 3d8ab5e03 fix(puffin): add missing magic number validation for puffins
(#2416)
3d8ab5e03 is described below
commit 3d8ab5e03fa8d269600a00eac7e5d01deaf1a3a6
Author: dentiny <[email protected]>
AuthorDate: Fri May 15 16:34:18 2026 -0700
fix(puffin): add missing magic number validation for puffins (#2416)
## What changes are included in this PR?
We have two functions to read puffin metadata: `read` and
`read_with_prefetch`. In theory these two functions should be similarly
the same apart from IO-related logic, but `read_with_prefetch` seems to
miss magic number validation:
https://github.com/apache/iceberg-rust/blob/4f21e503afc5363c10fafac917f0febe136c464e/crates/iceberg/src/puffin/metadata.rs#L284
In terms of implementation, I thought about extracting validation+footer
parse into a separate function, but the code is very simple here, don't
think it necessary.
## Are these changes tested?
Yes, the newly added regression test fails for the current codebase
---
crates/iceberg/src/puffin/metadata.rs | 39 ++++++++++++++++++++++++++++++++++-
1 file changed, 38 insertions(+), 1 deletion(-)
diff --git a/crates/iceberg/src/puffin/metadata.rs
b/crates/iceberg/src/puffin/metadata.rs
index e2dfc10c2..10cfd265d 100644
--- a/crates/iceberg/src/puffin/metadata.rs
+++ b/crates/iceberg/src/puffin/metadata.rs
@@ -324,7 +324,11 @@ impl FileMetadata {
return FileMetadata::read(input_file).await;
}
- // Read footer based on prefetchi hint
+ // Validate file header magic
+ let first_four_bytes =
file_read.read(0..FileMetadata::MAGIC_LENGTH.into()).await?;
+ FileMetadata::check_magic(&first_four_bytes)?;
+
+ // Read footer based on prefetch hint
let start = input_file_length - prefetch_hint as u64;
let end = input_file_length;
let footer_bytes = file_read.read(start..end).await?;
@@ -388,6 +392,7 @@ mod tests {
use bytes::Bytes;
use tempfile::TempDir;
+ use crate::ErrorKind;
use crate::io::{FileIO, InputFile};
use crate::puffin::metadata::{BlobMetadata, CompressionCodec,
FileMetadata};
use crate::puffin::test_utils::{
@@ -958,6 +963,38 @@ mod tests {
assert_eq!(file_metadata, zstd_compressed_metric_file_metadata());
}
+ #[tokio::test]
+ async fn test_read_with_incorrect_header_magic() {
+ let temp_dir = TempDir::new().unwrap();
+
+ let prefetch_hint: u8 = 64;
+ let mut bytes = vec![];
+ // Invalid header magic
+ bytes.extend([0x00, 0x00, 0x00, 0x00]);
+ // Intentionally keep file size larger than prefetch_hint.
+ bytes.extend(vec![0u8; prefetch_hint as usize]);
+ // Valid footer: magic + payload + footer struct
+ bytes.extend(FileMetadata::MAGIC);
+ bytes.extend(empty_footer_payload_bytes());
+ bytes.extend(empty_footer_payload_bytes_length_bytes());
+ bytes.extend(vec![0, 0, 0, 0]); // flags
+ bytes.extend(FileMetadata::MAGIC);
+
+ let input_file = input_file_with_bytes(&temp_dir, &bytes).await;
+
+ assert_eq!(
+ FileMetadata::read(&input_file).await.unwrap_err().kind(),
+ ErrorKind::DataInvalid,
+ );
+ assert_eq!(
+ FileMetadata::read_with_prefetch(&input_file, prefetch_hint)
+ .await
+ .unwrap_err()
+ .kind(),
+ ErrorKind::DataInvalid,
+ );
+ }
+
#[tokio::test]
async fn test_gzip_compression_allowed_in_metadata() {
let temp_dir = TempDir::new().unwrap();