This is an automated email from the ASF dual-hosted git repository.

liurenjie1024 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new 8dd4a221 Read ManifestList V1 with V2 projection. (#1482)
8dd4a221 is described below

commit 8dd4a2216728c6fcad42a4d48ea7e39bef30517d
Author: Alex Stephen <[email protected]>
AuthorDate: Mon Jul 14 02:55:57 2025 -0700

    Read ManifestList V1 with V2 projection. (#1482)
    
    ## Which issue does this PR close?
    
    - Closes #1471
    
    ## What changes are included in this PR?
    
    
    On ManifestList data files in v1, this sets the default content-type to
    DATA (1).
    
    ## Are these changes tested?
---
 crates/iceberg/src/avro/schema.rs          | 13 ++++++---
 crates/iceberg/src/spec/manifest/_serde.rs | 46 ++++++++++++++++++++++++++----
 crates/iceberg/src/spec/manifest/entry.rs  | 14 ++++-----
 3 files changed, 56 insertions(+), 17 deletions(-)

diff --git a/crates/iceberg/src/avro/schema.rs 
b/crates/iceberg/src/avro/schema.rs
index 2411c318..b08a6730 100644
--- a/crates/iceberg/src/avro/schema.rs
+++ b/crates/iceberg/src/avro/schema.rs
@@ -81,6 +81,14 @@ impl SchemaVisitor for SchemaToAvroSchema {
             field_schema = avro_optional(field_schema)?;
         }
 
+        let default = if let Some(literal) = &field.initial_default {
+            Some(literal.clone().try_into_json(&field.field_type)?)
+        } else if !field.required {
+            Some(Value::Null)
+        } else {
+            None
+        };
+
         let mut avro_record_field = AvroRecordField {
             name: field.name.clone(),
             schema: field_schema,
@@ -88,13 +96,10 @@ impl SchemaVisitor for SchemaToAvroSchema {
             position: 0,
             doc: field.doc.clone(),
             aliases: None,
-            default: None,
+            default,
             custom_attributes: Default::default(),
         };
 
-        if !field.required {
-            avro_record_field.default = Some(Value::Null);
-        }
         avro_record_field.custom_attributes.insert(
             FIELD_ID_PROP.to_string(),
             Value::Number(Number::from(field.id)),
diff --git a/crates/iceberg/src/spec/manifest/_serde.rs 
b/crates/iceberg/src/spec/manifest/_serde.rs
index 97923c7a..fd7bc2e6 100644
--- a/crates/iceberg/src/spec/manifest/_serde.rs
+++ b/crates/iceberg/src/spec/manifest/_serde.rs
@@ -330,9 +330,8 @@ mod tests {
         assert_eq!(ret, expected_ret, "Negative i64 entry should be ignored!");
     }
 
-    #[tokio::test]
-    async fn test_data_file_serialize_deserialize() {
-        let schema = Arc::new(
+    fn schema() -> Arc<Schema> {
+        Arc::new(
             Schema::builder()
                 .with_fields(vec![
                     Arc::new(NestedField::optional(
@@ -353,8 +352,11 @@ mod tests {
                 ])
                 .build()
                 .unwrap(),
-        );
-        let data_files = vec![DataFile {
+        )
+    }
+
+    fn data_files() -> Vec<DataFile> {
+        vec![DataFile {
             content: DataContentType::Data,
             file_path: 
"s3://testbucket/iceberg_data/iceberg_ctl/iceberg_db/iceberg_tbl/data/00000-7-45268d71-54eb-476c-b42c-942d880c04a1-00001.parquet".to_string(),
             file_format: DataFileFormat::Parquet,
@@ -376,7 +378,13 @@ mod tests {
             referenced_data_file: None,
             content_offset: None,
             content_size_in_bytes: None,
-        }];
+        }]
+    }
+
+    #[tokio::test]
+    async fn test_data_file_serialize_deserialize() {
+        let schema = schema();
+        let data_files = data_files();
 
         let mut buffer = Vec::new();
         let _ = write_data_files_to_avro(
@@ -398,4 +406,30 @@ mod tests {
 
         assert_eq!(data_files, actual_data_file);
     }
+
+    #[tokio::test]
+    async fn test_data_file_serialize_deserialize_v1_data_on_v2_reader() {
+        let schema = schema();
+        let data_files = data_files();
+
+        let mut buffer = Vec::new();
+        let _ = write_data_files_to_avro(
+            &mut buffer,
+            data_files.clone().into_iter(),
+            &StructType::new(vec![]),
+            FormatVersion::V1,
+        )
+        .unwrap();
+
+        let actual_data_file = read_data_files_from_avro(
+            &mut Cursor::new(buffer),
+            &schema,
+            0,
+            &StructType::new(vec![]),
+            FormatVersion::V2,
+        )
+        .unwrap();
+
+        assert_eq!(actual_data_file[0].content, DataContentType::Data)
+    }
 }
diff --git a/crates/iceberg/src/spec/manifest/entry.rs 
b/crates/iceberg/src/spec/manifest/entry.rs
index 7d2f982d..7ba9efb3 100644
--- a/crates/iceberg/src/spec/manifest/entry.rs
+++ b/crates/iceberg/src/spec/manifest/entry.rs
@@ -24,8 +24,8 @@ use typed_builder::TypedBuilder;
 use crate::avro::schema_to_avro_schema;
 use crate::error::Result;
 use crate::spec::{
-    DataContentType, DataFile, INITIAL_SEQUENCE_NUMBER, ListType, 
ManifestFile, MapType,
-    NestedField, NestedFieldRef, PrimitiveType, Schema, StructType, Type,
+    DataContentType, DataFile, INITIAL_SEQUENCE_NUMBER, ListType, Literal, 
ManifestFile, MapType,
+    NestedField, NestedFieldRef, PrimitiveLiteral, PrimitiveType, Schema, 
StructType, Type,
 };
 use crate::{Error, ErrorKind};
 
@@ -232,11 +232,11 @@ static FILE_SEQUENCE_NUMBER: Lazy<NestedFieldRef> = {
 
 static CONTENT: Lazy<NestedFieldRef> = {
     Lazy::new(|| {
-        Arc::new(NestedField::required(
-            134,
-            "content",
-            Type::Primitive(PrimitiveType::Int),
-        ))
+        Arc::new(
+            NestedField::required(134, "content", 
Type::Primitive(PrimitiveType::Int))
+                // 0 refers to DataContentType::DATA
+                
.with_initial_default(Literal::Primitive(PrimitiveLiteral::Int(0))),
+        )
     })
 };
 

Reply via email to