Fokko commented on PR #742:
URL: https://github.com/apache/iceberg-rust/pull/742#issuecomment-2515564242
Did some checks:
First `metadata.json`:
```json
{
"format-version" : 2,
"table-uuid" : "eb83b77f-c2c3-473c-a138-444a3de61213",
"location" :
"s3://icebergdata/demo/iceberg/rust/append_partition_data_file_test",
"last-sequence-number" : 0,
"last-updated-ms" : 1733259665987,
"last-column-id" : 3,
"current-schema-id" : 0,
"schemas" : [ {
"type" : "struct",
"schema-id" : 0,
"identifier-field-ids" : [ 2 ],
"fields" : [ {
"id" : 1,
"name" : "foo",
"required" : false,
"type" : "string"
}, {
"id" : 2,
"name" : "bar",
"required" : true,
"type" : "int"
}, {
"id" : 3,
"name" : "baz",
"required" : false,
"type" : "boolean"
} ]
} ],
"default-spec-id" : 0,
"partition-specs" : [ {
"spec-id" : 0,
"fields" : [ {
"name" : "id",
"transform" : "identity",
"source-id" : 2,
"field-id" : 1000
} ]
} ],
"last-partition-id" : 1000,
"default-sort-order-id" : 0,
"sort-orders" : [ {
"order-id" : 0,
"fields" : [ ]
} ],
"properties" : {
"write.parquet.compression-codec" : "zstd"
},
"current-snapshot-id" : -1,
"refs" : { },
"snapshots" : [ ],
"statistics" : [ ],
"partition-statistics" : [ ],
"snapshot-log" : [ ],
"metadata-log" : [ ]
}
```
Without a commit, the `current-snapshot-id` should be `null` instead of `-1`.
After the commit:
```json
{
"format-version" : 2,
"table-uuid" : "eb83b77f-c2c3-473c-a138-444a3de61213",
"location" :
"s3://icebergdata/demo/iceberg/rust/append_partition_data_file_test",
"last-sequence-number" : 1,
"last-updated-ms" : 1733259666572,
"last-column-id" : 3,
"current-schema-id" : 0,
"schemas" : [ {
"type" : "struct",
"schema-id" : 0,
"identifier-field-ids" : [ 2 ],
"fields" : [ {
"id" : 1,
"name" : "foo",
"required" : false,
"type" : "string"
}, {
"id" : 2,
"name" : "bar",
"required" : true,
"type" : "int"
}, {
"id" : 3,
"name" : "baz",
"required" : false,
"type" : "boolean"
} ]
} ],
"default-spec-id" : 0,
"partition-specs" : [ {
"spec-id" : 0,
"fields" : [ {
"name" : "id",
"transform" : "identity",
"source-id" : 2,
"field-id" : 1000
} ]
} ],
"last-partition-id" : 1000,
"default-sort-order-id" : 0,
"sort-orders" : [ {
"order-id" : 0,
"fields" : [ ]
} ],
"properties" : {
"write.parquet.compression-codec" : "zstd"
},
"current-snapshot-id" : 8826880672679595429,
"refs" : {
"main" : {
"snapshot-id" : 8826880672679595429,
"type" : "branch"
}
},
"snapshots" : [ {
"sequence-number" : 1,
"snapshot-id" : 8826880672679595429,
"timestamp-ms" : 1733259666572,
"summary" : {
"operation" : "append"
},
"manifest-list" :
"s3://icebergdata/demo/iceberg/rust/append_partition_data_file_test/metadata/snap-8826880672679595429-0-01938e53-a487-7ee2-a75e-c061dea0853c.avro",
"schema-id" : 0
} ],
"statistics" : [ ],
"partition-statistics" : [ ],
"snapshot-log" : [ {
"timestamp-ms" : 1733259666572,
"snapshot-id" : 8826880672679595429
} ],
"metadata-log" : [ {
"timestamp-ms" : 1733259665987,
"metadata-file" :
"s3://icebergdata/demo/iceberg/rust/append_partition_data_file_test/metadata/00000-647ca34f-8a7b-4a44-8d28-775bc62ef650.metadata.json"
} ]
}
```
Which looks good.
```json
{
"manifest_path":
"s3://icebergdata/demo/iceberg/rust/append_partition_data_file_test/metadata/01938e53-a487-7ee2-a75e-c061dea0853c-m0.avro",
"manifest_length": 3391,
"partition_spec_id": 0,
"content": 0,
"sequence_number": 1,
"min_sequence_number": 1,
"added_snapshot_id": 8826880672679595000,
"added_files_count": 1,
"existing_files_count": 0,
"deleted_files_count": 0,
"added_rows_count": 2,
"existing_rows_count": 0,
"deleted_rows_count": 0,
"partitions": {
"array": [
{
"contains_null": false,
"contains_nan": null,
"lower_bound": {
"bytes": "d\u0000\u0000\u0000"
},
"upper_bound": {
"bytes": "d\u0000\u0000\u0000"
}
}
]
},
"key_metadata": null
}
```
Which also looks good. The snapshot:
```json
{
"status": 1,
"snapshot_id": null,
"sequence_number": null,
"file_sequence_number": null,
"data_file": {
"content": 0,
"file_path":
"s3://icebergdata/demo/iceberg/rust/append_partition_data_file_test/data/test-00000.parquet",
"file_format": "PARQUET",
"partition": {
"id": {
"int": 100
}
},
"record_count": 2,
"file_size_in_bytes": 1160,
"column_sizes": {
"array": [
{
"key": 3,
"value": 36
},
{
"key": 1,
"value": 74
},
{
"key": 2,
"value": 55
}
]
},
"value_counts": {
"array": [
{
"key": 1,
"value": 2
},
{
"key": 3,
"value": 2
},
{
"key": 2,
"value": 2
}
]
},
"null_value_counts": {
"array": [
{
"key": 2,
"value": 0
},
{
"key": 3,
"value": 0
},
{
"key": 1,
"value": 0
}
]
},
"nan_value_counts": {
"array": []
},
"lower_bounds": {
"array": [
{
"key": 2,
"value": "d\u0000\u0000\u0000"
},
{
"key": 3,
"value": "\u0000"
},
{
"key": 1,
"value": "foo1"
}
]
},
"upper_bounds": {
"array": [
{
"key": 1,
"value": "foo2"
},
{
"key": 2,
"value": "d\u0000\u0000\u0000"
},
{
"key": 3,
"value": "\u0001"
}
]
},
"key_metadata": {
"bytes": ""
},
"split_offsets": {
"array": [
4
]
},
"equality_ids": {
"array": []
},
"sort_order_id": null
}
}
```
Probably we just want to set the `key_metadata` to `null` instead of empty
bytes.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]