kevinjqliu commented on code in PR #392:
URL: https://github.com/apache/iceberg-python/pull/392#discussion_r1482123800
##########
tests/integration/test_rest_schema.py:
##########
@@ -2497,3 +2500,32 @@ def
test_two_add_schemas_in_a_single_transaction(catalog: Catalog) -> None:
assert "Updates in a single commit need to be unique, duplicate: <class
'pyiceberg.table.AddSchemaUpdate'>" in str(
exc_info.value
)
+
+
[email protected]
+def test_create_table_integrity_after_fresh_assignment(catalog: Catalog) ->
None:
+ schema = Schema(
+ NestedField(field_id=5, name="col_uuid", field_type=UUIDType(),
required=False),
+ NestedField(field_id=4, name="col_fixed", field_type=FixedType(25),
required=False),
+ )
+ partition_spec = PartitionSpec(
+ PartitionField(source_id=5, field_id=1000,
transform=IdentityTransform(), name="col_uuid"), spec_id=0
+ )
+ sort_order = SortOrder(SortField(source_id=4,
transform=IdentityTransform()))
+ tbl_name = "default.test_create_integrity"
+ try:
+ catalog.drop_table(tbl_name)
+ except NoSuchTableError:
+ pass
+ tbl = catalog.create_table(identifier=tbl_name, schema=schema,
partition_spec=partition_spec, sort_order=sort_order)
+ expected_schema = Schema(
+ NestedField(field_id=1, name="col_uuid", field_type=UUIDType(),
required=False),
+ NestedField(field_id=2, name="col_fixed", field_type=FixedType(25),
required=False),
+ )
+ expected_spec = PartitionSpec(
+ PartitionField(source_id=1, field_id=1000,
transform=IdentityTransform(), name="col_uuid"), spec_id=0
+ )
+ expected_sort_order = SortOrder(SortField(source_id=2,
transform=IdentityTransform()))
+ assert tbl.schema() == expected_schema
+ assert tbl.spec() == expected_spec
+ assert tbl.sort_order() == expected_sort_order
Review Comment:
`SortOrder` doesn't seem to have a `__eq__` function defined.
https://github.com/apache/iceberg-python/blob/cec051f230edfb584f1267e505ee218305389c11/pyiceberg/table/sorting.py#L127-L164
What is the behavior here?
##########
tests/integration/test_rest_schema.py:
##########
@@ -2497,3 +2500,32 @@ def
test_two_add_schemas_in_a_single_transaction(catalog: Catalog) -> None:
assert "Updates in a single commit need to be unique, duplicate: <class
'pyiceberg.table.AddSchemaUpdate'>" in str(
exc_info.value
)
+
+
[email protected]
+def test_create_table_integrity_after_fresh_assignment(catalog: Catalog) ->
None:
+ schema = Schema(
+ NestedField(field_id=5, name="col_uuid", field_type=UUIDType(),
required=False),
+ NestedField(field_id=4, name="col_fixed", field_type=FixedType(25),
required=False),
+ )
+ partition_spec = PartitionSpec(
+ PartitionField(source_id=5, field_id=1000,
transform=IdentityTransform(), name="col_uuid"), spec_id=0
+ )
+ sort_order = SortOrder(SortField(source_id=4,
transform=IdentityTransform()))
+ tbl_name = "default.test_create_integrity"
+ try:
+ catalog.drop_table(tbl_name)
+ except NoSuchTableError:
+ pass
+ tbl = catalog.create_table(identifier=tbl_name, schema=schema,
partition_spec=partition_spec, sort_order=sort_order)
+ expected_schema = Schema(
+ NestedField(field_id=1, name="col_uuid", field_type=UUIDType(),
required=False),
+ NestedField(field_id=2, name="col_fixed", field_type=FixedType(25),
required=False),
+ )
+ expected_spec = PartitionSpec(
+ PartitionField(source_id=1, field_id=1000,
transform=IdentityTransform(), name="col_uuid"), spec_id=0
+ )
+ expected_sort_order = SortOrder(SortField(source_id=2,
transform=IdentityTransform()))
+ assert tbl.schema() == expected_schema
Review Comment:
nit, should we also check `tbl.schema().schema_id` since `Schema`'s `__eq__`
doesn't check for that
https://github.com/apache/iceberg-python/blob/cec051f230edfb584f1267e505ee218305389c11/pyiceberg/schema.py#L104-L118
##########
tests/integration/test_rest_schema.py:
##########
@@ -2497,3 +2500,32 @@ def
test_two_add_schemas_in_a_single_transaction(catalog: Catalog) -> None:
assert "Updates in a single commit need to be unique, duplicate: <class
'pyiceberg.table.AddSchemaUpdate'>" in str(
exc_info.value
)
+
+
[email protected]
+def test_create_table_integrity_after_fresh_assignment(catalog: Catalog) ->
None:
+ schema = Schema(
+ NestedField(field_id=5, name="col_uuid", field_type=UUIDType(),
required=False),
+ NestedField(field_id=4, name="col_fixed", field_type=FixedType(25),
required=False),
+ )
+ partition_spec = PartitionSpec(
+ PartitionField(source_id=5, field_id=1000,
transform=IdentityTransform(), name="col_uuid"), spec_id=0
+ )
+ sort_order = SortOrder(SortField(source_id=4,
transform=IdentityTransform()))
+ tbl_name = "default.test_create_integrity"
+ try:
+ catalog.drop_table(tbl_name)
+ except NoSuchTableError:
+ pass
+ tbl = catalog.create_table(identifier=tbl_name, schema=schema,
partition_spec=partition_spec, sort_order=sort_order)
+ expected_schema = Schema(
+ NestedField(field_id=1, name="col_uuid", field_type=UUIDType(),
required=False),
+ NestedField(field_id=2, name="col_fixed", field_type=FixedType(25),
required=False),
+ )
+ expected_spec = PartitionSpec(
+ PartitionField(source_id=1, field_id=1000,
transform=IdentityTransform(), name="col_uuid"), spec_id=0
+ )
+ expected_sort_order = SortOrder(SortField(source_id=2,
transform=IdentityTransform()))
+ assert tbl.schema() == expected_schema
+ assert tbl.spec() == expected_spec
Review Comment:
`PartitionSpec`'s `__eq__` checks for the `spec_id`
https://github.com/apache/iceberg-python/blob/cec051f230edfb584f1267e505ee218305389c11/pyiceberg/partitioning.py#L110-L119
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]