This is an automated email from the ASF dual-hosted git repository.

CTTY pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new 3d8ab5e03 fix(puffin): add missing magic number validation for puffins 
(#2416)
3d8ab5e03 is described below

commit 3d8ab5e03fa8d269600a00eac7e5d01deaf1a3a6
Author: dentiny <[email protected]>
AuthorDate: Fri May 15 16:34:18 2026 -0700

    fix(puffin): add missing magic number validation for puffins (#2416)
    
    ## What changes are included in this PR?
    
    We have two functions to read puffin metadata: `read` and
    `read_with_prefetch`. In theory these two functions should be similarly
    the same apart from IO-related logic, but `read_with_prefetch` seems to
    miss magic number validation:
    
https://github.com/apache/iceberg-rust/blob/4f21e503afc5363c10fafac917f0febe136c464e/crates/iceberg/src/puffin/metadata.rs#L284
    
    In terms of implementation, I thought about extracting validation+footer
    parse into a separate function, but the code is very simple here, don't
    think it necessary.
    
    ## Are these changes tested?
    
    Yes, the newly added regression test fails for the current codebase
---
 crates/iceberg/src/puffin/metadata.rs | 39 ++++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/crates/iceberg/src/puffin/metadata.rs 
b/crates/iceberg/src/puffin/metadata.rs
index e2dfc10c2..10cfd265d 100644
--- a/crates/iceberg/src/puffin/metadata.rs
+++ b/crates/iceberg/src/puffin/metadata.rs
@@ -324,7 +324,11 @@ impl FileMetadata {
                 return FileMetadata::read(input_file).await;
             }
 
-            // Read footer based on prefetchi hint
+            // Validate file header magic
+            let first_four_bytes = 
file_read.read(0..FileMetadata::MAGIC_LENGTH.into()).await?;
+            FileMetadata::check_magic(&first_four_bytes)?;
+
+            // Read footer based on prefetch hint
             let start = input_file_length - prefetch_hint as u64;
             let end = input_file_length;
             let footer_bytes = file_read.read(start..end).await?;
@@ -388,6 +392,7 @@ mod tests {
     use bytes::Bytes;
     use tempfile::TempDir;
 
+    use crate::ErrorKind;
     use crate::io::{FileIO, InputFile};
     use crate::puffin::metadata::{BlobMetadata, CompressionCodec, 
FileMetadata};
     use crate::puffin::test_utils::{
@@ -958,6 +963,38 @@ mod tests {
         assert_eq!(file_metadata, zstd_compressed_metric_file_metadata());
     }
 
+    #[tokio::test]
+    async fn test_read_with_incorrect_header_magic() {
+        let temp_dir = TempDir::new().unwrap();
+
+        let prefetch_hint: u8 = 64;
+        let mut bytes = vec![];
+        // Invalid header magic
+        bytes.extend([0x00, 0x00, 0x00, 0x00]);
+        // Intentionally keep file size larger than prefetch_hint.
+        bytes.extend(vec![0u8; prefetch_hint as usize]);
+        // Valid footer: magic + payload + footer struct
+        bytes.extend(FileMetadata::MAGIC);
+        bytes.extend(empty_footer_payload_bytes());
+        bytes.extend(empty_footer_payload_bytes_length_bytes());
+        bytes.extend(vec![0, 0, 0, 0]); // flags
+        bytes.extend(FileMetadata::MAGIC);
+
+        let input_file = input_file_with_bytes(&temp_dir, &bytes).await;
+
+        assert_eq!(
+            FileMetadata::read(&input_file).await.unwrap_err().kind(),
+            ErrorKind::DataInvalid,
+        );
+        assert_eq!(
+            FileMetadata::read_with_prefetch(&input_file, prefetch_hint)
+                .await
+                .unwrap_err()
+                .kind(),
+            ErrorKind::DataInvalid,
+        );
+    }
+
     #[tokio::test]
     async fn test_gzip_compression_allowed_in_metadata() {
         let temp_dir = TempDir::new().unwrap();

Reply via email to