This is an automated email from the ASF dual-hosted git repository.

kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new fb789047 maint: common catalog integration test suite (#2090)
fb789047 is described below

commit fb78904714e47e8ca1d281bc4a4796347faa3310
Author: Jayce Slesar <[email protected]>
AuthorDate: Sun Jul 20 15:52:52 2025 -0400

    maint: common catalog integration test suite (#2090)
    
    In pursuit of https://github.com/apache/iceberg-python/issues/813
    
    ---------
    
    Co-authored-by: Kevin Liu <[email protected]>
---
 dev/docker-compose-integration.yml           |   1 +
 pyiceberg/catalog/hive.py                    |   2 +-
 pyiceberg/catalog/rest/__init__.py           |   2 +-
 tests/catalog/test_hive.py                   |   2 +-
 tests/conftest.py                            |   2 +-
 tests/integration/test_catalog.py            | 316 +++++++++++++++++++++++++++
 tests/integration/test_writes/test_writes.py |   2 +-
 7 files changed, 322 insertions(+), 5 deletions(-)

diff --git a/dev/docker-compose-integration.yml 
b/dev/docker-compose-integration.yml
index 500a042e..c901b2ee 100644
--- a/dev/docker-compose-integration.yml
+++ b/dev/docker-compose-integration.yml
@@ -53,6 +53,7 @@ services:
       - CATALOG_WAREHOUSE=s3://warehouse/
       - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
       - CATALOG_S3_ENDPOINT=http://minio:9000
+      - CATALOG_JDBC_STRICT__MODE=true
   minio:
     image: minio/minio
     container_name: pyiceberg-minio
diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py
index cc9cd028..eef6bbad 100644
--- a/pyiceberg/catalog/hive.py
+++ b/pyiceberg/catalog/hive.py
@@ -800,7 +800,7 @@ class HiveCatalog(MetastoreCatalog):
             if removals:
                 for key in removals:
                     if key in parameters:
-                        parameters[key] = None
+                        parameters.pop(key)
                         removed.add(key)
             if updates:
                 for key, value in updates.items():
diff --git a/pyiceberg/catalog/rest/__init__.py 
b/pyiceberg/catalog/rest/__init__.py
index 6215d17a..0972d779 100644
--- a/pyiceberg/catalog/rest/__init__.py
+++ b/pyiceberg/catalog/rest/__init__.py
@@ -505,7 +505,7 @@ class RestCatalog(Catalog):
         try:
             response.raise_for_status()
         except HTTPError as exc:
-            _handle_non_200_response(exc, {409: TableAlreadyExistsError})
+            _handle_non_200_response(exc, {409: TableAlreadyExistsError, 404: 
NoSuchNamespaceError})
         return TableResponse.model_validate_json(response.text)
 
     @retry(**_RETRY_ARGS)
diff --git a/tests/catalog/test_hive.py b/tests/catalog/test_hive.py
index a36425eb..1edb4f72 100644
--- a/tests/catalog/test_hive.py
+++ b/tests/catalog/test_hive.py
@@ -1156,7 +1156,7 @@ def test_update_namespace_properties(hive_database: 
HiveDatabase) -> None:
             name="default",
             description=None,
             locationUri=hive_database.locationUri,
-            parameters={"test": None, "label": "core"},
+            parameters={"label": "core"},
             privileges=None,
             ownerName=None,
             ownerType=1,
diff --git a/tests/conftest.py b/tests/conftest.py
index 7b5256f3..584b6c63 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -2324,7 +2324,7 @@ def clean_up(test_catalog: Catalog) -> None:
         database_name = database_tuple[0]
         if "my_iceberg_database-" in database_name:
             for identifier in test_catalog.list_tables(database_name):
-                test_catalog.purge_table(identifier)
+                test_catalog.drop_table(identifier)
             test_catalog.drop_namespace(database_name)
 
 
diff --git a/tests/integration/test_catalog.py 
b/tests/integration/test_catalog.py
new file mode 100644
index 00000000..123aca1b
--- /dev/null
+++ b/tests/integration/test_catalog.py
@@ -0,0 +1,316 @@
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+from pathlib import Path, PosixPath
+from typing import Generator, List
+
+import pytest
+
+from pyiceberg.catalog import Catalog, MetastoreCatalog
+from pyiceberg.catalog.hive import HiveCatalog
+from pyiceberg.catalog.memory import InMemoryCatalog
+from pyiceberg.catalog.rest import RestCatalog
+from pyiceberg.catalog.sql import SqlCatalog
+from pyiceberg.exceptions import (
+    NamespaceAlreadyExistsError,
+    NamespaceNotEmptyError,
+    NoSuchNamespaceError,
+    NoSuchTableError,
+    TableAlreadyExistsError,
+)
+from pyiceberg.io import WAREHOUSE
+from pyiceberg.schema import Schema
+from tests.conftest import clean_up
+
+
[email protected](scope="function")
+def memory_catalog(tmp_path: PosixPath) -> Generator[Catalog, None, None]:
+    test_catalog = InMemoryCatalog(
+        "test.in_memory.catalog", **{WAREHOUSE: 
tmp_path.absolute().as_posix(), "test.key": "test.value"}
+    )
+    yield test_catalog
+
+    clean_up(test_catalog)
+
+
[email protected](scope="function")
+def sqlite_catalog_memory(warehouse: Path) -> Generator[Catalog, None, None]:
+    test_catalog = SqlCatalog("sqlitememory", uri="sqlite:///:memory:", 
warehouse=f"file://{warehouse}")
+
+    yield test_catalog
+
+    clean_up(test_catalog)
+
+
[email protected](scope="function")
+def sqlite_catalog_file(warehouse: Path) -> Generator[Catalog, None, None]:
+    test_catalog = SqlCatalog("sqlitefile", 
uri=f"sqlite:////{warehouse}/sql-catalog.db", warehouse=f"file://{warehouse}")
+
+    yield test_catalog
+
+    clean_up(test_catalog)
+
+
[email protected](scope="function")
+def rest_catalog() -> Generator[Catalog, None, None]:
+    test_catalog = RestCatalog("rest", uri="http://localhost:8181";)
+
+    yield test_catalog
+
+    clean_up(test_catalog)
+
+
[email protected](scope="function")
+def hive_catalog() -> Generator[Catalog, None, None]:
+    test_catalog = HiveCatalog(
+        "test_hive_catalog",
+        **{
+            "uri": "http://localhost:9083";,
+            "s3.endpoint": "http://localhost:9000";,
+            "s3.access-key-id": "admin",
+            "s3.secret-access-key": "password",
+        },
+    )
+    yield test_catalog
+    clean_up(test_catalog)
+
+
+CATALOGS = [
+    pytest.lazy_fixture("memory_catalog"),
+    pytest.lazy_fixture("sqlite_catalog_memory"),
+    pytest.lazy_fixture("sqlite_catalog_file"),
+    pytest.lazy_fixture("rest_catalog"),
+    pytest.lazy_fixture("hive_catalog"),
+]
+
+
[email protected]
[email protected]("test_catalog", CATALOGS)
+def test_create_table_with_default_location(
+    test_catalog: Catalog, table_schema_nested: Schema, table_name: str, 
database_name: str
+) -> None:
+    identifier = (database_name, table_name)
+    test_catalog.create_namespace(database_name)
+    test_catalog.create_table(identifier, table_schema_nested)
+    table = test_catalog.load_table(identifier)
+    assert table.name() == identifier
+    assert MetastoreCatalog._parse_metadata_version(table.metadata_location) 
== 0
+
+
[email protected]
[email protected]("test_catalog", CATALOGS)
+def test_create_table_with_invalid_database(test_catalog: Catalog, 
table_schema_nested: Schema, table_name: str) -> None:
+    identifier = ("invalid", table_name)
+    with pytest.raises(NoSuchNamespaceError):
+        test_catalog.create_table(identifier, table_schema_nested)
+
+
[email protected]
[email protected]("test_catalog", CATALOGS)
+def test_create_duplicated_table(test_catalog: Catalog, table_schema_nested: 
Schema, database_name: str, table_name: str) -> None:
+    test_catalog.create_namespace(database_name)
+    test_catalog.create_table((database_name, table_name), table_schema_nested)
+    with pytest.raises(TableAlreadyExistsError):
+        test_catalog.create_table((database_name, table_name), 
table_schema_nested)
+
+
[email protected]
[email protected]("test_catalog", CATALOGS)
+def test_create_table_if_not_exists_duplicated_table(
+    test_catalog: Catalog, table_schema_nested: Schema, database_name: str, 
table_name: str
+) -> None:
+    test_catalog.create_namespace(database_name)
+    table1 = test_catalog.create_table((database_name, table_name), 
table_schema_nested)
+    table2 = test_catalog.create_table_if_not_exists((database_name, 
table_name), table_schema_nested)
+    assert table1.name() == table2.name()
+
+
[email protected]
[email protected]("test_catalog", CATALOGS)
+def test_load_table(test_catalog: Catalog, table_schema_nested: Schema, 
database_name: str, table_name: str) -> None:
+    identifier = (database_name, table_name)
+    test_catalog.create_namespace(database_name)
+    table = test_catalog.create_table(identifier, table_schema_nested)
+    loaded_table = test_catalog.load_table(identifier)
+    assert table.name() == loaded_table.name()
+    assert table.metadata_location == loaded_table.metadata_location
+    assert table.metadata == loaded_table.metadata
+
+
[email protected]
[email protected]("test_catalog", CATALOGS)
+def test_list_tables(test_catalog: Catalog, table_schema_nested: Schema, 
database_name: str, table_list: List[str]) -> None:
+    test_catalog.create_namespace(database_name)
+    for table_name in table_list:
+        test_catalog.create_table((database_name, table_name), 
table_schema_nested)
+    identifier_list = test_catalog.list_tables(database_name)
+    assert len(identifier_list) == len(table_list)
+    for table_name in table_list:
+        assert (database_name, table_name) in identifier_list
+
+
[email protected]
[email protected]("test_catalog", CATALOGS)
+def test_rename_table(test_catalog: Catalog, table_schema_nested: Schema, 
table_name: str, database_name: str) -> None:
+    new_database_name = f"{database_name}_new"
+    test_catalog.create_namespace(database_name)
+    test_catalog.create_namespace(new_database_name)
+    new_table_name = f"rename-{table_name}"
+    identifier = (database_name, table_name)
+    table = test_catalog.create_table(identifier, table_schema_nested)
+    assert table.name() == identifier
+    new_identifier = (new_database_name, new_table_name)
+    test_catalog.rename_table(identifier, new_identifier)
+    new_table = test_catalog.load_table(new_identifier)
+    assert new_table.name() == new_identifier
+    assert new_table.metadata_location == table.metadata_location
+    with pytest.raises(NoSuchTableError):
+        test_catalog.load_table(identifier)
+
+
[email protected]
[email protected]("test_catalog", CATALOGS)
+def test_drop_table(test_catalog: Catalog, table_schema_nested: Schema, 
table_name: str, database_name: str) -> None:
+    identifier = (database_name, table_name)
+    test_catalog.create_namespace(database_name)
+    table = test_catalog.create_table(identifier, table_schema_nested)
+    assert table.name() == identifier
+    test_catalog.drop_table(identifier)
+    with pytest.raises(NoSuchTableError):
+        test_catalog.load_table(identifier)
+
+
[email protected]
[email protected]("test_catalog", CATALOGS)
+def test_purge_table(test_catalog: Catalog, table_schema_nested: Schema, 
table_name: str, database_name: str) -> None:
+    if isinstance(test_catalog, HiveCatalog):
+        pytest.skip("HiveCatalog does not support purge_table operation yet")
+
+    identifier = (database_name, table_name)
+    test_catalog.create_namespace(database_name)
+    test_catalog.create_table(identifier, table_schema_nested)
+    table = test_catalog.load_table(identifier)
+    assert table.name() == identifier
+    test_catalog.purge_table(identifier)
+    with pytest.raises(NoSuchTableError):
+        test_catalog.load_table(identifier)
+
+
[email protected]
[email protected]("test_catalog", CATALOGS)
+def test_table_exists(test_catalog: Catalog, table_schema_nested: Schema, 
database_name: str, table_name: str) -> None:
+    test_catalog.create_namespace(database_name)
+    test_catalog.create_table((database_name, table_name), table_schema_nested)
+    assert test_catalog.table_exists((database_name, table_name)) is True
+
+
[email protected]
[email protected]("test_catalog", CATALOGS)
+def test_create_namespace(test_catalog: Catalog, database_name: str) -> None:
+    test_catalog.create_namespace(database_name)
+    assert (database_name,) in test_catalog.list_namespaces()
+
+
[email protected]
[email protected]("test_catalog", CATALOGS)
+def test_create_duplicate_namespace(test_catalog: Catalog, database_name: str) 
-> None:
+    test_catalog.create_namespace(database_name)
+    with pytest.raises(NamespaceAlreadyExistsError):
+        test_catalog.create_namespace(database_name)
+
+
[email protected]
[email protected]("test_catalog", CATALOGS)
+def test_create_namepsace_if_not_exists(test_catalog: Catalog, database_name: 
str) -> None:
+    test_catalog.create_namespace(database_name)
+    test_catalog.create_namespace_if_not_exists(database_name)
+    assert (database_name,) in test_catalog.list_namespaces()
+
+
[email protected]
[email protected]("test_catalog", CATALOGS)
+def test_create_namespace_with_comment(test_catalog: Catalog, database_name: 
str) -> None:
+    test_properties = {
+        "comment": "this is a test description",
+    }
+    test_catalog.create_namespace(namespace=database_name, 
properties=test_properties)
+    loaded_database_list = test_catalog.list_namespaces()
+    assert (database_name,) in loaded_database_list
+    properties = test_catalog.load_namespace_properties(database_name)
+    assert properties["comment"] == "this is a test description"
+
+
[email protected]
[email protected]("test_catalog", CATALOGS)
+def test_list_namespaces(test_catalog: Catalog, database_list: List[str]) -> 
None:
+    for database_name in database_list:
+        test_catalog.create_namespace(database_name)
+    db_list = test_catalog.list_namespaces()
+    for database_name in database_list:
+        assert (database_name,) in db_list
+    assert len(test_catalog.list_namespaces(list(database_list)[0])) == 0
+
+
[email protected]
[email protected]("test_catalog", CATALOGS)
+def test_drop_namespace(test_catalog: Catalog, table_schema_nested: Schema, 
table_name: str, database_name: str) -> None:
+    test_catalog.create_namespace(database_name)
+    assert (database_name,) in test_catalog.list_namespaces()
+    test_catalog.create_table((database_name, table_name), table_schema_nested)
+    with pytest.raises(NamespaceNotEmptyError):
+        test_catalog.drop_namespace(database_name)
+    test_catalog.drop_table((database_name, table_name))
+    test_catalog.drop_namespace(database_name)
+    assert (database_name,) not in test_catalog.list_namespaces()
+
+
[email protected]
[email protected]("test_catalog", CATALOGS)
+def test_load_namespace_properties(test_catalog: Catalog, database_name: str) 
-> None:
+    test_properties = {
+        "comment": "this is a test description",
+        "test_property1": "1",
+        "test_property2": "2",
+        "test_property3": "3",
+    }
+    test_catalog.create_namespace(database_name, test_properties)
+    listed_properties = test_catalog.load_namespace_properties(database_name)
+    for k, v in test_properties.items():
+        assert v == listed_properties[k]
+
+
[email protected]
[email protected]("test_catalog", CATALOGS)
+def test_update_namespace_properties(test_catalog: Catalog, database_name: 
str) -> None:
+    test_properties = {
+        "comment": "this is a test description",
+        "test_property1": "1",
+        "test_property2": "2",
+        "test_property3": "3",
+    }
+    removals = {"test_property1", "test_property2", "test_property3", 
"should_not_removed"}
+    updates = {"test_property4": "4", "test_property5": "5", "comment": 
"updated test description"}
+    test_catalog.create_namespace(database_name, test_properties)
+    update_report = test_catalog.update_namespace_properties(database_name, 
removals, updates)
+    for k in updates.keys():
+        assert k in update_report.updated
+    for k in removals:
+        if k == "should_not_removed":
+            assert k in update_report.missing
+        else:
+            assert k in update_report.removed
+    assert "updated test description" == 
test_catalog.load_namespace_properties(database_name)["comment"]
diff --git a/tests/integration/test_writes/test_writes.py 
b/tests/integration/test_writes/test_writes.py
index 624bf0d8..e63883c1 100644
--- a/tests/integration/test_writes/test_writes.py
+++ b/tests/integration/test_writes/test_writes.py
@@ -1540,7 +1540,7 @@ def 
test_rest_catalog_with_empty_catalog_name_append_data(session_catalog: Catal
 
 @pytest.mark.integration
 def test_table_v1_with_null_nested_namespace(session_catalog: Catalog, 
arrow_table_with_null: pa.Table) -> None:
-    identifier = "default.lower.table_v1_with_null_nested_namespace"
+    identifier = "default.table_v1_with_null_nested_namespace"
     tbl = _create_table(session_catalog, identifier, {"format-version": "1"}, 
[arrow_table_with_null])
     assert tbl.format_version == 1, f"Expected v1, got: v{tbl.format_version}"
 

Reply via email to