This is an automated email from the ASF dual-hosted git repository.
kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 41258189 fix: Add check table UUID to detect table replacement (#2890)
41258189 is described below
commit 41258189173f94c9099fa55f149c7bc11c2033a1
Author: geruh <[email protected]>
AuthorDate: Thu Jan 15 16:12:08 2026 -0800
fix: Add check table UUID to detect table replacement (#2890)
# Rationale for this change
This PR adds table UUID validation on refresh and commit to detect when
a table has been replaced. For example, if a table is dropped and
recreated with the same name, this prevents accidentally operating on a
different table than expected.
Modeled after the Java implementation.
https://github.com/apache/iceberg/blob/main/core/src/main/java/org/apache/iceberg/BaseMetastoreTableOperations.java#L202-L209
Python was missing this check.
## Are these changes tested?
Added some tests at the table and catalog level
## Are there any user-facing changes?
no
---------
Co-authored-by: Kevin Liu <[email protected]>
---
pyiceberg/table/__init__.py | 13 +++++++
tests/catalog/test_rest.py | 88 +++++++++++++++++++++++++++++++++++++++++++++
tests/table/test_init.py | 18 ++++++++++
3 files changed, 119 insertions(+)
diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index ae5eb400..9fdf5a70 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -1133,6 +1133,7 @@ class Table:
An updated instance of the same Iceberg table
"""
fresh = self.catalog.load_table(self._identifier)
+ self._check_uuid(self.metadata, fresh.metadata)
self.metadata = fresh.metadata
self.io = fresh.io
self.metadata_location = fresh.metadata_location
@@ -1513,9 +1514,21 @@ class Table:
"""Return the snapshot references in the table."""
return self.metadata.refs
+ @staticmethod
+ def _check_uuid(current_metadata: TableMetadata, new_metadata:
TableMetadata) -> None:
+ """Validate that the table UUID matches after refresh."""
+ current = current_metadata.table_uuid
+ refreshed = new_metadata.table_uuid
+
+ if current != refreshed:
+ raise ValueError(f"Table UUID does not match: current={current} !=
refreshed={refreshed}")
+
def _do_commit(self, updates: tuple[TableUpdate, ...], requirements:
tuple[TableRequirement, ...]) -> None:
response = self.catalog.commit_table(self, requirements, updates)
+ # Ensure table uuid has not changed
+ self._check_uuid(self.metadata, response.metadata)
+
#
https://github.com/apache/iceberg/blob/f6faa58/core/src/main/java/org/apache/iceberg/CatalogUtil.java#L527
# delete old metadata if METADATA_DELETE_AFTER_COMMIT_ENABLED is set
to true and uses
# TableProperties.METADATA_PREVIOUS_VERSIONS_MAX to determine how many
previous versions to keep -
diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py
index 8b9cbd89..37c373cc 100644
--- a/tests/catalog/test_rest.py
+++ b/tests/catalog/test_rest.py
@@ -57,6 +57,7 @@ from pyiceberg.table.metadata import TableMetadataV1
from pyiceberg.table.sorting import SortField, SortOrder
from pyiceberg.transforms import IdentityTransform, TruncateTransform
from pyiceberg.typedef import RecursiveDict
+from pyiceberg.types import StringType
from pyiceberg.utils.config import Config
TEST_URI = "https://iceberg-test-catalog/"
@@ -1165,6 +1166,9 @@ def test_create_staged_table_200(
example_table_metadata_with_no_location: dict[str, Any],
example_table_metadata_no_snapshot_v1_rest_json: dict[str, Any],
) -> None:
+ expected_table_uuid =
example_table_metadata_with_no_location["metadata"]["table-uuid"]
+ example_table_metadata_no_snapshot_v1_rest_json["metadata"]["table-uuid"]
= expected_table_uuid
+
rest_mock.post(
f"{TEST_URI}v1/namespaces/fokko/tables",
json=example_table_metadata_with_no_location,
@@ -2226,3 +2230,87 @@ class TestRestCatalogClose:
# View endpoints should be supported when enabled
catalog._check_endpoint(Capability.V1_LIST_VIEWS)
catalog._check_endpoint(Capability.V1_DELETE_VIEW)
+
+
+def test_table_uuid_check_on_commit(rest_mock: Mocker,
example_table_metadata_v2: dict[str, Any]) -> None:
+ """Test that UUID mismatch is detected on commit response (matches Java
RESTTableOperations behavior)."""
+ original_uuid = "9c12d441-03fe-4693-9a96-a0705ddf69c1"
+ different_uuid = "550e8400-e29b-41d4-a716-446655440000"
+ metadata_location = "s3://warehouse/database/table/metadata.json"
+
+ rest_mock.get(
+ f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
+ json={
+ "metadata-location": metadata_location,
+ "metadata": example_table_metadata_v2,
+ "config": {},
+ },
+ status_code=200,
+ request_headers=TEST_HEADERS,
+ )
+
+ catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN)
+ table = catalog.load_table(("namespace", "table_name"))
+
+ assert str(table.metadata.table_uuid) == original_uuid
+
+ metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid":
different_uuid}
+
+ rest_mock.post(
+ f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
+ json={
+ "metadata-location": metadata_location,
+ "metadata": metadata_with_different_uuid,
+ },
+ status_code=200,
+ request_headers=TEST_HEADERS,
+ )
+
+ with pytest.raises(ValueError) as exc_info:
+ table.update_schema().add_column("new_col", StringType()).commit()
+
+ assert "Table UUID does not match" in str(exc_info.value)
+ assert f"current={original_uuid}" in str(exc_info.value)
+ assert f"refreshed={different_uuid}" in str(exc_info.value)
+
+
+def test_table_uuid_check_on_refresh(rest_mock: Mocker,
example_table_metadata_v2: dict[str, Any]) -> None:
+ original_uuid = "9c12d441-03fe-4693-9a96-a0705ddf69c1"
+ different_uuid = "550e8400-e29b-41d4-a716-446655440000"
+ metadata_location = "s3://warehouse/database/table/metadata.json"
+
+ rest_mock.get(
+ f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
+ json={
+ "metadata-location": metadata_location,
+ "metadata": example_table_metadata_v2,
+ "config": {},
+ },
+ status_code=200,
+ request_headers=TEST_HEADERS,
+ )
+
+ catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN)
+ table = catalog.load_table(("namespace", "table_name"))
+
+ assert str(table.metadata.table_uuid) == original_uuid
+
+ metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid":
different_uuid}
+
+ rest_mock.get(
+ f"{TEST_URI}v1/namespaces/namespace/tables/table_name",
+ json={
+ "metadata-location": metadata_location,
+ "metadata": metadata_with_different_uuid,
+ "config": {},
+ },
+ status_code=200,
+ request_headers=TEST_HEADERS,
+ )
+
+ with pytest.raises(ValueError) as exc_info:
+ table.refresh()
+
+ assert "Table UUID does not match" in str(exc_info.value)
+ assert f"current={original_uuid}" in str(exc_info.value)
+ assert f"refreshed={different_uuid}" in str(exc_info.value)
diff --git a/tests/table/test_init.py b/tests/table/test_init.py
index e40513fe..ff5fbbf3 100644
--- a/tests/table/test_init.py
+++ b/tests/table/test_init.py
@@ -1639,3 +1639,21 @@ def model_roundtrips(model: BaseModel) -> bool:
if model != type(model).model_validate(model_data):
pytest.fail(f"model {type(model)} did not roundtrip successfully")
return True
+
+
+def test_check_uuid_raises_when_mismatch(table_v2: Table,
example_table_metadata_v2: dict[str, Any]) -> None:
+ different_uuid = "550e8400-e29b-41d4-a716-446655440000"
+ metadata_with_different_uuid = {**example_table_metadata_v2, "table-uuid":
different_uuid}
+ new_metadata = TableMetadataV2(**metadata_with_different_uuid)
+
+ with pytest.raises(ValueError) as exc_info:
+ Table._check_uuid(table_v2.metadata, new_metadata)
+
+ assert "Table UUID does not match" in str(exc_info.value)
+ assert different_uuid in str(exc_info.value)
+
+
+def test_check_uuid_passes_when_match(table_v2: Table,
example_table_metadata_v2: dict[str, Any]) -> None:
+ new_metadata = TableMetadataV2(**example_table_metadata_v2)
+ # Should not raise with same uuid
+ Table._check_uuid(table_v2.metadata, new_metadata)