a-agmon commented on issue #338:
URL: https://github.com/apache/iceberg-rust/issues/338#issuecomment-2092089365
> Can you share the metadata JSON? I don't think the field ID resolution is
being applied, described in issue #353. `added_data_files_count` is the old
name since in V2 it also included delete files. The name should not matter and
should be corrected as in #354.
Thanks @Fokko and @zeodtr
Here is the metadata.json (I had to anonymize a few things).
It was generated by Trino (via its DBT connector). The referenced
ManifestList follows
```
{
"format-version" : 2,
"table-uuid" : "aa3b9ef5-c067-4a08-8e9e-8a061d6c64e1",
"location" :
"s3://*********/dbt/***.db/*********-e562cf0876494c9f9d61e4f044f16ce8",
"last-sequence-number" : 1,
"last-updated-ms" : 1714657784414,
"last-column-id" : 12,
"current-schema-id" : 0,
"schemas" : [ {
"type" : "struct",
"schema-id" : 0,
"fields" : [ {
"id" : 1,
"name" : "row_id",
"required" : false,
"type" : "string"
}, {
"id" : 2,
"name" : "dt",
"required" : false,
"type" : "date"
}, {
"id" : 3,
"name" : "sub_acc_id",
"required" : false,
"type" : "string"
}, {
"id" : 4,
"name" : "master_acc_id",
"required" : false,
"type" : "string"
}
........... shortened
, {
"id" : 12,
"name" : "total_quantity",
"required" : false,
"type" : "long"
} ]
} ],
"default-spec-id" : 0,
"partition-specs" : [ {
"spec-id" : 0,
"fields" : [ {
"name" : "dt_month",
"transform" : "month",
"source-id" : 2,
"field-id" : 1000
} ]
} ],
"last-partition-id" : 1000,
"default-sort-order-id" : 0,
"sort-orders" : [ {
"order-id" : 0,
"fields" : [ ]
} ],
"properties" : {
"write.format.default" : "PARQUET",
"write.parquet.compression-codec" : "zstd"
},
"current-snapshot-id" : 6685531058123427778,
"refs" : {
"main" : {
"snapshot-id" : 6685531058123427778,
"type" : "branch"
}
},
"snapshots" : [ {
"sequence-number" : 1,
"snapshot-id" : 6685531058123427778,
"timestamp-ms" : 1714657783993,
"summary" : {
"operation" : "append",
"trino_query_id" : "20240502_134523_18961_n5bhn",
"added-data-files" : "503",
"added-records" : "176720085",
"added-files-size" : "3221301527",
"changed-partition-count" : "32",
"total-records" : "176720085",
"total-files-size" : "3221301527",
"total-data-files" : "503",
"total-delete-files" : "0",
"total-position-deletes" : "0",
"total-equality-deletes" : "0"
},
"manifest-list" :
"s3://*********/dbt/***.db/*********-e562cf0876494c9f9d61e4f044f16ce8/metadata/snap-6685531058123427778-1-2584bfc0-abb3-4257-a61c-0498a8e29dc4.avro",
"schema-id" : 0
} ],
"statistics" : [ {
"snapshot-id" : 6685531058123427778,
"statistics-path" :
"s3://*********/dbt/***.db/*********-e562cf0876494c9f9d61e4f044f16ce8/metadata/20240502_134523_18961_n5bhn-748b8bf4-ef54-4882-8d7f-2f6b9edbe85b.stats",
"file-size-in-bytes" : 294179,
"file-footer-size-in-bytes" : 2418,
"blob-metadata" : [ {
"type" : "apache-datasketches-theta-v1",
"snapshot-id" : 6685531058123427778,
"sequence-number" : 1,
"fields" : [ 1 ],
"properties" : {
"ndv" : "172900302"
}
}, {
"type" : "apache-datasketches-theta-v1",
"snapshot-id" : 6685531058123427778,
"sequence-number" : 1,
"fields" : [ 2 ],
"properties" : {
"ndv" : "944"
}
}, {
"type" : "apache-datasketches-theta-v1",
"snapshot-id" : 6685531058123427778,
"sequence-number" : 1,
"fields" : [ 3 ],
"properties" : {
"ndv" : "47786"
}
}, {
"type" : "apache-datasketches-theta-v1",
"snapshot-id" : 6685531058123427778,
"sequence-number" : 1,
"fields" : [ 4 ],
"properties" : {
"ndv" : "47513"
}
}, {
"type" : "apache-datasketches-theta-v1",
"snapshot-id" : 6685531058123427778,
"sequence-number" : 1,
"fields" : [ 5 ],
"properties" : {
"ndv" : "195336"
}
}, {
"type" : "apache-datasketches-theta-v1",
"snapshot-id" : 6685531058123427778,
"sequence-number" : 1,
"fields" : [ 6 ],
"properties" : {
"ndv" : "171901"
}
}, {
"type" : "apache-datasketches-theta-v1",
"snapshot-id" : 6685531058123427778,
"sequence-number" : 1,
"fields" : [ 7 ],
"properties" : {
"ndv" : "7237440"
}
}, {
"type" : "apache-datasketches-theta-v1",
"snapshot-id" : 6685531058123427778,
"sequence-number" : 1,
"fields" : [ 8 ],
"properties" : {
"ndv" : "4"
}
}, {
"type" : "apache-datasketches-theta-v1",
"snapshot-id" : 6685531058123427778,
"sequence-number" : 1,
"fields" : [ 9 ],
"properties" : {
"ndv" : "42995"
}
}, {
"type" : "apache-datasketches-theta-v1",
"snapshot-id" : 6685531058123427778,
"sequence-number" : 1,
"fields" : [ 10 ],
"properties" : {
"ndv" : "47504"
}
}, {
"type" : "apache-datasketches-theta-v1",
"snapshot-id" : 6685531058123427778,
"sequence-number" : 1,
"fields" : [ 11 ],
"properties" : {
"ndv" : "10"
}
}, {
"type" : "apache-datasketches-theta-v1",
"snapshot-id" : 6685531058123427778,
"sequence-number" : 1,
"fields" : [ 12 ],
"properties" : {
"ndv" : "583133"
}
} ]
} ],
"partition-statistics" : [ ],
"snapshot-log" : [ {
"timestamp-ms" : 1714657783993,
"snapshot-id" : 6685531058123427778
} ],
"metadata-log" : [ {
"timestamp-ms" : 1714657783993,
"metadata-file" :
"s3://*********/dbt/***.db/*********-e562cf0876494c9f9d61e4f044f16ce8/metadata/00000-52cc6f6a-259c-4ca8-ba89-cd3a99b9eedb.metadata.json"
} ]
}
```
The referenced manifest
```
{
"manifest_path":"s3://*********/dbt/***.db/*********-e562cf0876494c9f9d61e4f044f16ce8/metadata/2584bfc0-abb3-4257-a61c-0498a8e29dc4-m0.avro",
"manifest_length":98246,
"partition_spec_id":0,
"content":0,
"sequence_number":1,
"min_sequence_number":1,
"added_snapshot_id":6685531058123427778,
"added_files_count":503,
"existing_files_count":0,
"deleted_files_count":0,
"added_rows_count":176720085,
"existing_rows_count":0,
"deleted_rows_count":0,
"partitions":{
"array":[
{
"contains_null":false,
"contains_nan":{
"boolean":false
},
"lower_bound":{
"bytes":"m\u0002\u0000\u0000"
},
"upper_bound":{
"bytes":"\u0002\u0000\u0000"
}
}
]
}
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]