This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 4ba9a8c2 Add catalog properties for Catalog Tests (#2982)
4ba9a8c2 is described below
commit 4ba9a8c2649cf26c868d2f7a45962d7b72146e76
Author: Alex Stephen <[email protected]>
AuthorDate: Sun Feb 15 13:13:12 2026 -0800
Add catalog properties for Catalog Tests (#2982)
<!--
Thanks for opening a pull request!
-->
<!-- In the case this PR will resolve an issue, please replace
${GITHUB_ISSUE_ID} below with the actual Github issue id. -->
<!-- Closes #${GITHUB_ISSUE_ID} -->
# Rationale for this change
I've added a list of catalog-level properties to help the catalog tests.
The goal is that the catalog tests work off of "features" and don't have
exceptions for various catalogs.
As part of this, I discovered a discrepancy where HiveCatalog throws a
different error than everyone else. I'm happy to change it back.
## Are these changes tested?
Mostly a test change.
## Are there any user-facing changes?
<!-- In the case of user-facing changes, please add the changelog label.
-->
---
pyiceberg/catalog/__init__.py | 5 +++-
pyiceberg/catalog/hive.py | 2 +-
pyiceberg/catalog/noop.py | 3 +++
tests/conftest.py | 54 +++++++++++++++++++++++++++++++++++++++
tests/integration/test_catalog.py | 42 ++++++++++++++++--------------
5 files changed, 85 insertions(+), 21 deletions(-)
diff --git a/pyiceberg/catalog/__init__.py b/pyiceberg/catalog/__init__.py
index 31552155..b4ca9a2e 100644
--- a/pyiceberg/catalog/__init__.py
+++ b/pyiceberg/catalog/__init__.py
@@ -733,9 +733,9 @@ class Catalog(ABC):
return ".".join(segment.strip() for segment in tuple_identifier)
+ @abstractmethod
def supports_server_side_planning(self) -> bool:
"""Check if the catalog supports server-side scan planning."""
- return False
@staticmethod
def identifier_to_database(
@@ -836,6 +836,9 @@ class MetastoreCatalog(Catalog, ABC):
def __init__(self, name: str, **properties: str):
super().__init__(name, **properties)
+ def supports_server_side_planning(self) -> bool:
+ return False
+
def create_table_transaction(
self,
identifier: str | Identifier,
diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py
index 1bec186c..486f2dd0 100644
--- a/pyiceberg/catalog/hive.py
+++ b/pyiceberg/catalog/hive.py
@@ -729,7 +729,7 @@ class HiveCatalog(MetastoreCatalog):
open_client.drop_database(database_name, deleteData=False,
cascade=False)
except InvalidOperationException as e:
raise NamespaceNotEmptyError(f"Database {database_name} is not
empty") from e
- except MetaException as e:
+ except (MetaException, NoSuchObjectException) as e:
raise NoSuchNamespaceError(f"Database does not exists:
{database_name}") from e
def list_tables(self, namespace: str | Identifier) -> list[Identifier]:
diff --git a/pyiceberg/catalog/noop.py b/pyiceberg/catalog/noop.py
index 0dc6fdb7..62f8552a 100644
--- a/pyiceberg/catalog/noop.py
+++ b/pyiceberg/catalog/noop.py
@@ -85,6 +85,9 @@ class NoopCatalog(Catalog):
def drop_table(self, identifier: str | Identifier) -> None:
raise NotImplementedError
+ def supports_server_side_planning(self) -> bool:
+ raise NotImplementedError
+
def purge_table(self, identifier: str | Identifier) -> None:
raise NotImplementedError
diff --git a/tests/conftest.py b/tests/conftest.py
index e042924b..5c85f49a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -49,8 +49,13 @@ from pydantic_core import to_json
from pytest_lazy_fixtures import lf
from pyiceberg.catalog import Catalog, load_catalog
+from pyiceberg.catalog.bigquery_metastore import BigQueryMetastoreCatalog
+from pyiceberg.catalog.dynamodb import DynamoDbCatalog
+from pyiceberg.catalog.glue import GlueCatalog
+from pyiceberg.catalog.hive import HiveCatalog
from pyiceberg.catalog.memory import InMemoryCatalog
from pyiceberg.catalog.noop import NoopCatalog
+from pyiceberg.catalog.rest import RestCatalog
from pyiceberg.catalog.sql import SqlCatalog
from pyiceberg.expressions import BoundReference
from pyiceberg.io import (
@@ -98,6 +103,7 @@ from pyiceberg.types import (
UUIDType,
)
from pyiceberg.utils.datetime import datetime_to_millis
+from pyiceberg.utils.properties import property_as_bool
if TYPE_CHECKING:
import pyarrow as pa
@@ -3143,3 +3149,51 @@ def test_table_properties() -> dict[str, str]:
"key1": "value1",
"key2": "value2",
}
+
+
+def does_support_purge_table(catalog: Catalog) -> bool:
+ if isinstance(catalog, RestCatalog):
+ return property_as_bool(catalog.properties, "supports_purge_table",
True)
+ if isinstance(catalog, (HiveCatalog, NoopCatalog)):
+ return False
+ return True
+
+
+def does_support_atomic_concurrent_updates(catalog: Catalog) -> bool:
+ if isinstance(catalog, RestCatalog):
+ return property_as_bool(catalog.properties,
"supports_atomic_concurrent_updates", True)
+ if isinstance(catalog, (HiveCatalog, NoopCatalog)):
+ return False
+ return True
+
+
+def does_support_nested_namespaces(catalog: Catalog) -> bool:
+ if isinstance(catalog, RestCatalog):
+ return property_as_bool(catalog.properties,
"supports_nested_namespaces", True)
+ if isinstance(catalog, (HiveCatalog, NoopCatalog, GlueCatalog,
BigQueryMetastoreCatalog, DynamoDbCatalog)):
+ return False
+ return True
+
+
+def does_support_schema_evolution(catalog: Catalog) -> bool:
+ if isinstance(catalog, RestCatalog):
+ return property_as_bool(catalog.properties,
"supports_schema_evolution", True)
+ if isinstance(catalog, (HiveCatalog, NoopCatalog)):
+ return False
+ return True
+
+
+def does_support_slash_in_identifier(catalog: Catalog) -> bool:
+ if isinstance(catalog, RestCatalog):
+ return property_as_bool(catalog.properties,
"supports_slash_in_identifier", True)
+ if isinstance(catalog, (HiveCatalog, NoopCatalog, SqlCatalog)):
+ return False
+ return True
+
+
+def does_support_dot_in_identifier(catalog: Catalog) -> bool:
+ if isinstance(catalog, RestCatalog):
+ return property_as_bool(catalog.properties,
"supports_dot_in_identifier", True)
+ if isinstance(catalog, (HiveCatalog, NoopCatalog, SqlCatalog)):
+ return False
+ return True
diff --git a/tests/integration/test_catalog.py
b/tests/integration/test_catalog.py
index 130d5cd7..590c9019 100644
--- a/tests/integration/test_catalog.py
+++ b/tests/integration/test_catalog.py
@@ -44,7 +44,15 @@ from pyiceberg.table.metadata import INITIAL_SPEC_ID
from pyiceberg.table.sorting import INITIAL_SORT_ORDER_ID, SortField, SortOrder
from pyiceberg.transforms import BucketTransform, DayTransform,
IdentityTransform
from pyiceberg.types import IntegerType, LongType, NestedField, TimestampType,
UUIDType
-from tests.conftest import clean_up
+from tests.conftest import (
+ clean_up,
+ does_support_atomic_concurrent_updates,
+ does_support_dot_in_identifier,
+ does_support_nested_namespaces,
+ does_support_purge_table,
+ does_support_schema_evolution,
+ does_support_slash_in_identifier,
+)
@pytest.fixture(scope="function")
@@ -247,8 +255,8 @@ def test_drop_table(test_catalog: Catalog,
table_schema_nested: Schema, table_na
@pytest.mark.integration
@pytest.mark.parametrize("test_catalog", CATALOGS)
def test_purge_table(test_catalog: Catalog, table_schema_nested: Schema,
table_name: str, database_name: str) -> None:
- if isinstance(test_catalog, HiveCatalog):
- pytest.skip("HiveCatalog does not support purge_table operation yet")
+ if not does_support_purge_table(test_catalog):
+ pytest.skip("Catalog does not support purge_table operation")
identifier = (database_name, table_name)
test_catalog.create_namespace(database_name)
@@ -300,8 +308,8 @@ def test_update_table_transaction(test_catalog: Catalog,
test_schema: Schema, ta
@pytest.mark.integration
@pytest.mark.parametrize("test_catalog", CATALOGS)
def test_update_schema_conflict(test_catalog: Catalog, test_schema: Schema,
table_name: str, database_name: str) -> None:
- if isinstance(test_catalog, HiveCatalog):
- pytest.skip("HiveCatalog fails in this test, need to investigate")
+ if not does_support_atomic_concurrent_updates(test_catalog):
+ pytest.skip("Catalog does not support atomic concurrent updates")
identifier = (database_name, table_name)
@@ -647,8 +655,8 @@ def test_rest_custom_namespace_separator(rest_catalog:
RestCatalog, table_schema
def test_incompatible_partitioned_schema_evolution(
test_catalog: Catalog, test_schema: Schema, test_partition_spec:
PartitionSpec, database_name: str, table_name: str
) -> None:
- if isinstance(test_catalog, HiveCatalog):
- pytest.skip("HiveCatalog does not support schema evolution")
+ if not does_support_schema_evolution(test_catalog):
+ pytest.skip(f"{type(test_catalog).__name__} does not support schema
evolution")
identifier = (database_name, table_name)
test_catalog.create_namespace(database_name)
@@ -676,7 +684,7 @@ def test_incompatible_partitioned_schema_evolution(
@pytest.mark.integration
@pytest.mark.parametrize("test_catalog", CATALOGS)
def test_namespace_with_slash(test_catalog: Catalog) -> None:
- if isinstance(test_catalog, HiveCatalog):
+ if not does_support_slash_in_identifier(test_catalog):
pytest.skip(f"{type(test_catalog).__name__} does not support slash in
namespace")
namespace = ("new/db",)
@@ -701,8 +709,8 @@ def test_namespace_with_slash(test_catalog: Catalog) ->
None:
def test_incompatible_sorted_schema_evolution(
test_catalog: Catalog, test_schema: Schema, test_sort_order: SortOrder,
database_name: str, table_name: str
) -> None:
- if isinstance(test_catalog, HiveCatalog):
- pytest.skip("HiveCatalog does not support schema evolution")
+ if not does_support_schema_evolution(test_catalog):
+ pytest.skip(f"{type(test_catalog).__name__} does not support schema
evolution")
identifier = (database_name, table_name)
test_catalog.create_namespace(database_name)
@@ -721,7 +729,7 @@ def test_incompatible_sorted_schema_evolution(
@pytest.mark.integration
@pytest.mark.parametrize("test_catalog", CATALOGS)
def test_namespace_with_dot(test_catalog: Catalog) -> None:
- if isinstance(test_catalog, (HiveCatalog, SqlCatalog)):
+ if not does_support_dot_in_identifier(test_catalog):
pytest.skip(f"{type(test_catalog).__name__} does not support dot in
namespace")
namespace = ("new.db",)
@@ -734,9 +742,8 @@ def test_namespace_with_dot(test_catalog: Catalog) -> None:
test_catalog.create_namespace(namespace)
assert test_catalog.namespace_exists(namespace)
- # REST Catalog fixture treats this as a hierarchical namespace.
- # Calling list namespaces will get `new`, not `new.db`.
- if isinstance(test_catalog, RestCatalog):
+ # Hierarchical catalogs might treat this as multiple levels.
+ if does_support_nested_namespaces(test_catalog):
namespaces = test_catalog.list_namespaces()
assert ("new",) in namespaces or ("new.db",) in namespaces
else:
@@ -752,7 +759,7 @@ def test_namespace_with_dot(test_catalog: Catalog) -> None:
@pytest.mark.integration
@pytest.mark.parametrize("test_catalog", CATALOGS)
def test_table_name_with_slash(test_catalog: Catalog, table_schema_simple:
Schema) -> None:
- if isinstance(test_catalog, (HiveCatalog, SqlCatalog)):
+ if not does_support_slash_in_identifier(test_catalog):
pytest.skip(f"{type(test_catalog).__name__} does not support slash in
table name")
namespace = ("ns_slash",)
@@ -779,7 +786,7 @@ def test_table_name_with_slash(test_catalog: Catalog,
table_schema_simple: Schem
@pytest.mark.integration
@pytest.mark.parametrize("test_catalog", CATALOGS)
def test_table_name_with_dot(test_catalog: Catalog, table_schema_simple:
Schema) -> None:
- if isinstance(test_catalog, (HiveCatalog, SqlCatalog)):
+ if not does_support_dot_in_identifier(test_catalog):
pytest.skip(f"{type(test_catalog).__name__} does not support dot in
table name")
namespace = ("ns_dot",)
@@ -818,9 +825,6 @@ def test_drop_missing_table(test_catalog: Catalog,
database_name: str) -> None:
@pytest.mark.integration
@pytest.mark.parametrize("test_catalog", CATALOGS)
def test_drop_nonexistent_namespace(test_catalog: Catalog) -> None:
- if isinstance(test_catalog, HiveCatalog):
- pytest.skip("HiveCatalog raises NoSuchObjectException instead of
NoSuchNamespaceError")
-
namespace = ("non_existent_namespace",)
with pytest.raises(NoSuchNamespaceError):
test_catalog.drop_namespace(namespace)