liurenjie1024 commented on code in PR #797:
URL: https://github.com/apache/iceberg-rust/pull/797#discussion_r1899889761
##########
crates/iceberg/src/spec/manifest.rs:
##########
@@ -656,6 +656,38 @@ mod _const_schema {
})
};
+ fn data_file_fields_v2(partition_type: StructType) -> Vec<NestedFieldRef> {
Review Comment:
```suggestion
fn data_file_fields_v2(partition_type: &StructType) ->
Vec<NestedFieldRef> {
```
##########
crates/iceberg/src/spec/manifest.rs:
##########
@@ -656,6 +656,38 @@ mod _const_schema {
})
};
+ fn data_file_fields_v2(partition_type: StructType) -> Vec<NestedFieldRef> {
+ vec![
+ CONTENT.clone(),
+ FILE_PATH.clone(),
+ FILE_FORMAT.clone(),
+ Arc::new(NestedField::required(
+ 102,
+ "partition",
+ Type::Struct(partition_type),
+ )),
+ RECORD_COUNT.clone(),
+ FILE_SIZE_IN_BYTES.clone(),
+ COLUMN_SIZES.clone(),
+ VALUE_COUNTS.clone(),
+ NULL_VALUE_COUNTS.clone(),
+ NAN_VALUE_COUNTS.clone(),
+ LOWER_BOUNDS.clone(),
+ UPPER_BOUNDS.clone(),
+ KEY_METADATA.clone(),
+ SPLIT_OFFSETS.clone(),
+ EQUALITY_IDS.clone(),
+ SORT_ORDER_ID.clone(),
+ ]
+ }
+
+ pub(super) fn data_file_schema_v2(partition_type: StructType) ->
Result<AvroSchema, Error> {
Review Comment:
```suggestion
pub(super) fn data_file_schema_v2(partition_type: &StructType) ->
Result<AvroSchema, Error> {
```
##########
crates/iceberg/src/spec/manifest.rs:
##########
@@ -1189,6 +1211,47 @@ impl DataFile {
self.sort_order_id
}
}
+
+/// Convert data files to avro bytes.
+pub fn data_files_to_avro(
Review Comment:
To maintain api consistency, I would suggest to have sth like following:
```rust
/// comments
pub struct DataFileWriter<W> {
writer: W,
version: FormatVersion,
partition_type: &StructType
}
impl<W> DataFileWriter<W> {
pub fn v1_writer(w: W, partition_type: &StructType) {
}
}
impl<W: Write> DataFileWriter {
pub fn append(data_file: &DataFile) { ... }
pub fn fluse(mut self) {...}
}
```
It would be better to have similar data structures for reader.
##########
crates/iceberg/src/spec/manifest.rs:
##########
@@ -665,62 +697,52 @@ mod _const_schema {
Arc::new(NestedField::required(
2,
"data_file",
- Type::Struct(StructType::new(vec![
- CONTENT.clone(),
- FILE_PATH.clone(),
- FILE_FORMAT.clone(),
- Arc::new(NestedField::required(
- 102,
- "partition",
- Type::Struct(partition_type),
- )),
- RECORD_COUNT.clone(),
- FILE_SIZE_IN_BYTES.clone(),
- COLUMN_SIZES.clone(),
- VALUE_COUNTS.clone(),
- NULL_VALUE_COUNTS.clone(),
- NAN_VALUE_COUNTS.clone(),
- LOWER_BOUNDS.clone(),
- UPPER_BOUNDS.clone(),
- KEY_METADATA.clone(),
- SPLIT_OFFSETS.clone(),
- EQUALITY_IDS.clone(),
- SORT_ORDER_ID.clone(),
- ])),
+
Type::Struct(StructType::new(data_file_fields_v2(partition_type))),
)),
];
let schema = Schema::builder().with_fields(fields).build()?;
schema_to_avro_schema("manifest_entry", &schema)
}
+ fn data_file_fields_v1(partition_type: StructType) -> Vec<NestedFieldRef> {
Review Comment:
```suggestion
fn data_file_fields_v1(partition_type: &StructType) ->
Vec<NestedFieldRef> {
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]