kevinjqliu commented on code in PR #1469:
URL: https://github.com/apache/iceberg-python/pull/1469#discussion_r1902181377
##########
tests/integration/test_rest_catalog.py:
##########
@@ -16,34 +16,788 @@
# under the License.
# pylint:disable=redefined-outer-name
+
+from typing import Any, Dict
+
import pytest
+from pyiceberg.catalog import PropertiesUpdateSummary
from pyiceberg.catalog.rest import RestCatalog
+from pyiceberg.exceptions import (
+ BadRequestError,
+ NamespaceAlreadyExistsError,
+ NoSuchIdentifierError,
+ NoSuchNamespaceError,
+ NoSuchTableError,
+ NoSuchViewError,
+ OAuthError,
+ TableAlreadyExistsError,
+)
+from pyiceberg.io import load_file_io
+from pyiceberg.partitioning import PartitionField, PartitionSpec
+from pyiceberg.schema import Schema
+from pyiceberg.table import Table
+from pyiceberg.table.metadata import TableMetadataV2
+from pyiceberg.table.sorting import NullOrder, SortDirection, SortField,
SortOrder
+from pyiceberg.transforms import IdentityTransform, TruncateTransform
+
+TEST_NAMESPACE_IDENTIFIER = ("rest_integration_ns",)
+TEST_TABLE_IDENTIFIER = ("rest_integration_ns", "rest_integration_tbl")
+TEST_TABLE_IDENTIFIER_RENAME = ("rest_integration_ns",
"renamed_rest_integration_tbl")
+TEST_URI = "http://localhost:8181"
+TEST_CREDENTIALS = "client:secret"
+
+EXAMPLE_table_metadata_no_snapshot_v2 = {
+ "format-version": 2,
+ "table-uuid": "bf289591-dcc0-4234-ad4f-5c3eed811a29",
+ "location":
f"s3://warehouse/{TEST_TABLE_IDENTIFIER[0]}/{TEST_TABLE_IDENTIFIER[1]}",
+ "last-updated-ms": 1657810967051,
+ "last-column-id": 3,
+ "schema": {
+ "type": "struct",
+ "schema-id": 0,
+ "identifier-field-ids": [2],
+ "fields": [
+ {"id": 1, "name": "foo", "required": False, "type": "string"},
+ {"id": 2, "name": "bar", "required": True, "type": "int"},
+ {"id": 3, "name": "baz", "required": False, "type": "boolean"},
+ ],
+ },
+ "current-schema-id": 0,
+ "schemas": [
+ {
+ "type": "struct",
+ "fields": (
+ {"id": 1, "name": "foo", "type": "string", "required": False},
+ {"id": 2, "name": "bar", "type": "int", "required": True},
+ {"id": 3, "name": "baz", "type": "boolean", "required": False},
+ ),
+ "schema-id": 0,
+ "identifier-field-ids": [2],
+ }
+ ],
+ "partition-specs": [{"spec-id": 0, "fields": ()}],
+ "default-spec-id": 0,
+ "last-partition-id": 999,
+ "default-sort-order-id": 0,
+ "sort-orders": [{"order-id": 0, "fields": []}],
+ "properties": {
+ "write.parquet.compression-codec": "zstd",
+ },
+ "refs": {},
+ "snapshots": [],
+ "snapshot-log": [],
+ "metadata-log": [],
+}
+
+EXAMPLE_table_metadata_no_snapshot_partitioned_v2 = {
+ "format-version": 2,
+ "table-uuid": "bf289591-dcc0-4234-ad4f-5c3eed811a29",
+ "location":
f"s3://warehouse/{TEST_TABLE_IDENTIFIER[0]}/{TEST_TABLE_IDENTIFIER[1]}",
+ "last-updated-ms": 1657810967051,
+ "last-column-id": 3,
+ "schema": {
+ "type": "struct",
+ "schema-id": 0,
+ "identifier-field-ids": [2],
+ "fields": [
+ {"id": 1, "name": "foo", "required": False, "type": "string"},
+ {"id": 2, "name": "bar", "required": True, "type": "int"},
+ {"id": 3, "name": "baz", "required": False, "type": "boolean"},
+ ],
+ },
+ "current-schema-id": 0,
+ "schemas": [
+ {
+ "type": "struct",
+ "fields": (
+ {"id": 1, "name": "foo", "type": "string", "required": False},
+ {"id": 2, "name": "bar", "type": "int", "required": True},
+ {"id": 3, "name": "baz", "type": "boolean", "required": False},
+ ),
+ "schema-id": 0,
+ "identifier-field-ids": [2],
+ }
+ ],
+ "partition-specs": [
+ {"spec-id": 0, "fields": ({"source-id": 1, "field-id": 1000,
"transform": "truncate[3]", "name": "id"},)}
+ ],
+ "default-spec-id": 0,
+ "last-partition-id": 1000,
+ "default-sort-order-id": 1,
+ "sort-orders": [
+ {
+ "order-id": 1,
+ "fields": [
+ {"source-id": 2, "transform": "identity", "direction":
SortDirection.ASC, "null-order": NullOrder.NULLS_FIRST}
+ ],
+ }
+ ],
+ "properties": {
+ "owner": "fokko",
+ "write.parquet.compression-codec": "zstd",
+ },
+ "refs": {},
+ "snapshots": [],
+ "snapshot-log": [],
+ "metadata-log": [],
+}
+
+
[email protected]
+def table_metadata_no_snapshot_v2() -> Dict[str, Any]:
+ return EXAMPLE_table_metadata_no_snapshot_v2
+
-TEST_NAMESPACE_IDENTIFIER = "TEST NS"
[email protected]
+def table_metadata_no_snapshot_partitioned_v2() -> Dict[str, Any]:
+ return EXAMPLE_table_metadata_no_snapshot_partitioned_v2
+
+
[email protected]
+def
rest_integration_example_metadata_partitioned_v2(table_metadata_no_snapshot_partitioned_v2:
Dict[str, Any]) -> Dict[str, Any]:
+ return {
+ "metadata-location":
f"s3://warehouse/{TEST_TABLE_IDENTIFIER[0]}/{TEST_TABLE_IDENTIFIER[1]}",
+ "metadata": table_metadata_no_snapshot_partitioned_v2,
+ "config": {
+ "client.factory":
"io.tabular.iceberg.catalog.TabularAwsClientFactory",
+ "region": "us-west-2",
+ },
+ }
+
+
[email protected]
+def example_table_metadata_with_no_location(table_metadata_no_snapshot_v2:
Dict[str, Any]) -> Dict[str, Any]:
+ return {
+ "metadata": table_metadata_no_snapshot_v2,
+ "config": {
+ "client.factory":
"io.tabular.iceberg.catalog.TabularAwsClientFactory",
+ "region": "us-west-2",
+ },
+ }
+
+
[email protected]
+def table_metadata_no_snapshot_rest(table_metadata_no_snapshot_v2: Dict[str,
Any]) -> Dict[str, Any]:
+ return {
+ "metadata-location":
f"s3://warehouse/{TEST_TABLE_IDENTIFIER[0]}/{TEST_TABLE_IDENTIFIER[1]}",
+ "metadata": table_metadata_no_snapshot_v2,
+ "config": {
+ "client.factory":
"io.tabular.iceberg.catalog.TabularAwsClientFactory",
+ "region": "us-west-2",
+ },
+ }
@pytest.mark.integration
[email protected]("catalog", [pytest.lazy_fixture("session_catalog")])
-def test_namespace_exists(catalog: RestCatalog) -> None:
- if not catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER):
[email protected](scope="function")
[email protected]("catalog", [pytest.lazy_fixture("session_catalog"),
pytest.lazy_fixture("test_clean_up")])
Review Comment:
nit: you can use something like this to automatically run clean up after
every test
https://stackoverflow.com/questions/22627659/run-code-before-and-after-each-test-in-py-test
```
@pytest.fixture(autouse=True)
def cleanup():
# Code that will run before your test, for example:
yield
# Code that will run after your test, for example:
```
##########
tests/integration/test_rest_catalog.py:
##########
@@ -16,34 +16,788 @@
# under the License.
# pylint:disable=redefined-outer-name
+
+from typing import Any, Dict
+
import pytest
+from pyiceberg.catalog import PropertiesUpdateSummary
from pyiceberg.catalog.rest import RestCatalog
+from pyiceberg.exceptions import (
+ BadRequestError,
+ NamespaceAlreadyExistsError,
+ NoSuchIdentifierError,
+ NoSuchNamespaceError,
+ NoSuchTableError,
+ NoSuchViewError,
+ OAuthError,
+ TableAlreadyExistsError,
+)
+from pyiceberg.io import load_file_io
+from pyiceberg.partitioning import PartitionField, PartitionSpec
+from pyiceberg.schema import Schema
+from pyiceberg.table import Table
+from pyiceberg.table.metadata import TableMetadataV2
+from pyiceberg.table.sorting import NullOrder, SortDirection, SortField,
SortOrder
+from pyiceberg.transforms import IdentityTransform, TruncateTransform
+
+TEST_NAMESPACE_IDENTIFIER = ("rest_integration_ns",)
+TEST_TABLE_IDENTIFIER = ("rest_integration_ns", "rest_integration_tbl")
+TEST_TABLE_IDENTIFIER_RENAME = ("rest_integration_ns",
"renamed_rest_integration_tbl")
+TEST_URI = "http://localhost:8181"
+TEST_CREDENTIALS = "client:secret"
+
+EXAMPLE_table_metadata_no_snapshot_v2 = {
+ "format-version": 2,
+ "table-uuid": "bf289591-dcc0-4234-ad4f-5c3eed811a29",
+ "location":
f"s3://warehouse/{TEST_TABLE_IDENTIFIER[0]}/{TEST_TABLE_IDENTIFIER[1]}",
+ "last-updated-ms": 1657810967051,
+ "last-column-id": 3,
+ "schema": {
+ "type": "struct",
+ "schema-id": 0,
+ "identifier-field-ids": [2],
+ "fields": [
+ {"id": 1, "name": "foo", "required": False, "type": "string"},
+ {"id": 2, "name": "bar", "required": True, "type": "int"},
+ {"id": 3, "name": "baz", "required": False, "type": "boolean"},
+ ],
+ },
+ "current-schema-id": 0,
+ "schemas": [
+ {
+ "type": "struct",
+ "fields": (
+ {"id": 1, "name": "foo", "type": "string", "required": False},
+ {"id": 2, "name": "bar", "type": "int", "required": True},
+ {"id": 3, "name": "baz", "type": "boolean", "required": False},
+ ),
+ "schema-id": 0,
+ "identifier-field-ids": [2],
+ }
+ ],
+ "partition-specs": [{"spec-id": 0, "fields": ()}],
+ "default-spec-id": 0,
+ "last-partition-id": 999,
+ "default-sort-order-id": 0,
+ "sort-orders": [{"order-id": 0, "fields": []}],
+ "properties": {
+ "write.parquet.compression-codec": "zstd",
+ },
+ "refs": {},
+ "snapshots": [],
+ "snapshot-log": [],
+ "metadata-log": [],
+}
+
+EXAMPLE_table_metadata_no_snapshot_partitioned_v2 = {
+ "format-version": 2,
+ "table-uuid": "bf289591-dcc0-4234-ad4f-5c3eed811a29",
+ "location":
f"s3://warehouse/{TEST_TABLE_IDENTIFIER[0]}/{TEST_TABLE_IDENTIFIER[1]}",
+ "last-updated-ms": 1657810967051,
+ "last-column-id": 3,
+ "schema": {
+ "type": "struct",
+ "schema-id": 0,
+ "identifier-field-ids": [2],
+ "fields": [
+ {"id": 1, "name": "foo", "required": False, "type": "string"},
+ {"id": 2, "name": "bar", "required": True, "type": "int"},
+ {"id": 3, "name": "baz", "required": False, "type": "boolean"},
+ ],
+ },
+ "current-schema-id": 0,
+ "schemas": [
+ {
+ "type": "struct",
+ "fields": (
+ {"id": 1, "name": "foo", "type": "string", "required": False},
+ {"id": 2, "name": "bar", "type": "int", "required": True},
+ {"id": 3, "name": "baz", "type": "boolean", "required": False},
+ ),
+ "schema-id": 0,
+ "identifier-field-ids": [2],
+ }
+ ],
+ "partition-specs": [
+ {"spec-id": 0, "fields": ({"source-id": 1, "field-id": 1000,
"transform": "truncate[3]", "name": "id"},)}
+ ],
+ "default-spec-id": 0,
+ "last-partition-id": 1000,
+ "default-sort-order-id": 1,
+ "sort-orders": [
+ {
+ "order-id": 1,
+ "fields": [
+ {"source-id": 2, "transform": "identity", "direction":
SortDirection.ASC, "null-order": NullOrder.NULLS_FIRST}
+ ],
+ }
+ ],
+ "properties": {
+ "owner": "fokko",
+ "write.parquet.compression-codec": "zstd",
+ },
+ "refs": {},
+ "snapshots": [],
+ "snapshot-log": [],
+ "metadata-log": [],
+}
+
+
[email protected]
+def table_metadata_no_snapshot_v2() -> Dict[str, Any]:
+ return EXAMPLE_table_metadata_no_snapshot_v2
+
-TEST_NAMESPACE_IDENTIFIER = "TEST NS"
[email protected]
+def table_metadata_no_snapshot_partitioned_v2() -> Dict[str, Any]:
+ return EXAMPLE_table_metadata_no_snapshot_partitioned_v2
+
+
[email protected]
+def
rest_integration_example_metadata_partitioned_v2(table_metadata_no_snapshot_partitioned_v2:
Dict[str, Any]) -> Dict[str, Any]:
+ return {
+ "metadata-location":
f"s3://warehouse/{TEST_TABLE_IDENTIFIER[0]}/{TEST_TABLE_IDENTIFIER[1]}",
+ "metadata": table_metadata_no_snapshot_partitioned_v2,
+ "config": {
+ "client.factory":
"io.tabular.iceberg.catalog.TabularAwsClientFactory",
+ "region": "us-west-2",
+ },
+ }
+
+
[email protected]
+def example_table_metadata_with_no_location(table_metadata_no_snapshot_v2:
Dict[str, Any]) -> Dict[str, Any]:
+ return {
+ "metadata": table_metadata_no_snapshot_v2,
+ "config": {
+ "client.factory":
"io.tabular.iceberg.catalog.TabularAwsClientFactory",
+ "region": "us-west-2",
+ },
+ }
+
+
[email protected]
+def table_metadata_no_snapshot_rest(table_metadata_no_snapshot_v2: Dict[str,
Any]) -> Dict[str, Any]:
+ return {
+ "metadata-location":
f"s3://warehouse/{TEST_TABLE_IDENTIFIER[0]}/{TEST_TABLE_IDENTIFIER[1]}",
+ "metadata": table_metadata_no_snapshot_v2,
+ "config": {
+ "client.factory":
"io.tabular.iceberg.catalog.TabularAwsClientFactory",
+ "region": "us-west-2",
+ },
+ }
@pytest.mark.integration
[email protected]("catalog", [pytest.lazy_fixture("session_catalog")])
-def test_namespace_exists(catalog: RestCatalog) -> None:
- if not catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER):
[email protected](scope="function")
[email protected]("catalog", [pytest.lazy_fixture("session_catalog"),
pytest.lazy_fixture("test_clean_up")])
+def test_clean_up(catalog: RestCatalog) -> None:
+ for namespaces_tuple in catalog.list_namespaces():
+ namespace_name = namespaces_tuple[0]
+ if TEST_NAMESPACE_IDENTIFIER[0] in namespace_name:
+ for identifier in catalog.list_tables(namespace_name):
+ catalog.purge_table(identifier)
+ if catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER):
+ catalog.drop_namespace(TEST_NAMESPACE_IDENTIFIER)
Review Comment:
since we only have 2 tables and 1 namespace:
```
TEST_NAMESPACE_IDENTIFIER = ("rest_integration_ns",)
TEST_TABLE_IDENTIFIER = ("rest_integration_ns", "rest_integration_tbl")
TEST_TABLE_IDENTIFIER_RENAME = ("rest_integration_ns",
"renamed_rest_integration_tbl")
```
we can drop all tables in the `TEST_NAMESPACE_IDENTIFIER` namespace, and
then remove the remove
##########
tests/integration/test_rest_catalog.py:
##########
@@ -16,34 +16,788 @@
# under the License.
# pylint:disable=redefined-outer-name
+
+from typing import Any, Dict
+
import pytest
+from pyiceberg.catalog import PropertiesUpdateSummary
from pyiceberg.catalog.rest import RestCatalog
+from pyiceberg.exceptions import (
+ BadRequestError,
+ NamespaceAlreadyExistsError,
+ NoSuchIdentifierError,
+ NoSuchNamespaceError,
+ NoSuchTableError,
+ NoSuchViewError,
+ OAuthError,
+ TableAlreadyExistsError,
+)
+from pyiceberg.io import load_file_io
+from pyiceberg.partitioning import PartitionField, PartitionSpec
+from pyiceberg.schema import Schema
+from pyiceberg.table import Table
+from pyiceberg.table.metadata import TableMetadataV2
+from pyiceberg.table.sorting import NullOrder, SortDirection, SortField,
SortOrder
+from pyiceberg.transforms import IdentityTransform, TruncateTransform
+
+TEST_NAMESPACE_IDENTIFIER = ("rest_integration_ns",)
+TEST_TABLE_IDENTIFIER = ("rest_integration_ns", "rest_integration_tbl")
+TEST_TABLE_IDENTIFIER_RENAME = ("rest_integration_ns",
"renamed_rest_integration_tbl")
+TEST_URI = "http://localhost:8181"
+TEST_CREDENTIALS = "client:secret"
+
+EXAMPLE_table_metadata_no_snapshot_v2 = {
+ "format-version": 2,
+ "table-uuid": "bf289591-dcc0-4234-ad4f-5c3eed811a29",
+ "location":
f"s3://warehouse/{TEST_TABLE_IDENTIFIER[0]}/{TEST_TABLE_IDENTIFIER[1]}",
+ "last-updated-ms": 1657810967051,
+ "last-column-id": 3,
+ "schema": {
+ "type": "struct",
+ "schema-id": 0,
+ "identifier-field-ids": [2],
+ "fields": [
+ {"id": 1, "name": "foo", "required": False, "type": "string"},
+ {"id": 2, "name": "bar", "required": True, "type": "int"},
+ {"id": 3, "name": "baz", "required": False, "type": "boolean"},
+ ],
+ },
+ "current-schema-id": 0,
+ "schemas": [
+ {
+ "type": "struct",
+ "fields": (
+ {"id": 1, "name": "foo", "type": "string", "required": False},
+ {"id": 2, "name": "bar", "type": "int", "required": True},
+ {"id": 3, "name": "baz", "type": "boolean", "required": False},
+ ),
+ "schema-id": 0,
+ "identifier-field-ids": [2],
+ }
+ ],
+ "partition-specs": [{"spec-id": 0, "fields": ()}],
+ "default-spec-id": 0,
+ "last-partition-id": 999,
+ "default-sort-order-id": 0,
+ "sort-orders": [{"order-id": 0, "fields": []}],
+ "properties": {
+ "write.parquet.compression-codec": "zstd",
+ },
+ "refs": {},
+ "snapshots": [],
+ "snapshot-log": [],
+ "metadata-log": [],
+}
+
+EXAMPLE_table_metadata_no_snapshot_partitioned_v2 = {
+ "format-version": 2,
+ "table-uuid": "bf289591-dcc0-4234-ad4f-5c3eed811a29",
+ "location":
f"s3://warehouse/{TEST_TABLE_IDENTIFIER[0]}/{TEST_TABLE_IDENTIFIER[1]}",
+ "last-updated-ms": 1657810967051,
+ "last-column-id": 3,
+ "schema": {
+ "type": "struct",
+ "schema-id": 0,
+ "identifier-field-ids": [2],
+ "fields": [
+ {"id": 1, "name": "foo", "required": False, "type": "string"},
+ {"id": 2, "name": "bar", "required": True, "type": "int"},
+ {"id": 3, "name": "baz", "required": False, "type": "boolean"},
+ ],
+ },
+ "current-schema-id": 0,
+ "schemas": [
+ {
+ "type": "struct",
+ "fields": (
+ {"id": 1, "name": "foo", "type": "string", "required": False},
+ {"id": 2, "name": "bar", "type": "int", "required": True},
+ {"id": 3, "name": "baz", "type": "boolean", "required": False},
+ ),
+ "schema-id": 0,
+ "identifier-field-ids": [2],
+ }
+ ],
+ "partition-specs": [
+ {"spec-id": 0, "fields": ({"source-id": 1, "field-id": 1000,
"transform": "truncate[3]", "name": "id"},)}
+ ],
+ "default-spec-id": 0,
+ "last-partition-id": 1000,
+ "default-sort-order-id": 1,
+ "sort-orders": [
+ {
+ "order-id": 1,
+ "fields": [
+ {"source-id": 2, "transform": "identity", "direction":
SortDirection.ASC, "null-order": NullOrder.NULLS_FIRST}
+ ],
+ }
+ ],
+ "properties": {
+ "owner": "fokko",
+ "write.parquet.compression-codec": "zstd",
+ },
+ "refs": {},
+ "snapshots": [],
+ "snapshot-log": [],
+ "metadata-log": [],
+}
+
+
[email protected]
+def table_metadata_no_snapshot_v2() -> Dict[str, Any]:
+ return EXAMPLE_table_metadata_no_snapshot_v2
+
-TEST_NAMESPACE_IDENTIFIER = "TEST NS"
[email protected]
+def table_metadata_no_snapshot_partitioned_v2() -> Dict[str, Any]:
+ return EXAMPLE_table_metadata_no_snapshot_partitioned_v2
+
+
[email protected]
+def
rest_integration_example_metadata_partitioned_v2(table_metadata_no_snapshot_partitioned_v2:
Dict[str, Any]) -> Dict[str, Any]:
+ return {
+ "metadata-location":
f"s3://warehouse/{TEST_TABLE_IDENTIFIER[0]}/{TEST_TABLE_IDENTIFIER[1]}",
+ "metadata": table_metadata_no_snapshot_partitioned_v2,
+ "config": {
+ "client.factory":
"io.tabular.iceberg.catalog.TabularAwsClientFactory",
+ "region": "us-west-2",
+ },
+ }
+
+
[email protected]
+def example_table_metadata_with_no_location(table_metadata_no_snapshot_v2:
Dict[str, Any]) -> Dict[str, Any]:
+ return {
+ "metadata": table_metadata_no_snapshot_v2,
+ "config": {
+ "client.factory":
"io.tabular.iceberg.catalog.TabularAwsClientFactory",
+ "region": "us-west-2",
+ },
+ }
+
+
[email protected]
+def table_metadata_no_snapshot_rest(table_metadata_no_snapshot_v2: Dict[str,
Any]) -> Dict[str, Any]:
+ return {
+ "metadata-location":
f"s3://warehouse/{TEST_TABLE_IDENTIFIER[0]}/{TEST_TABLE_IDENTIFIER[1]}",
+ "metadata": table_metadata_no_snapshot_v2,
+ "config": {
+ "client.factory":
"io.tabular.iceberg.catalog.TabularAwsClientFactory",
+ "region": "us-west-2",
+ },
+ }
@pytest.mark.integration
[email protected]("catalog", [pytest.lazy_fixture("session_catalog")])
-def test_namespace_exists(catalog: RestCatalog) -> None:
- if not catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER):
[email protected](scope="function")
[email protected]("catalog", [pytest.lazy_fixture("session_catalog"),
pytest.lazy_fixture("test_clean_up")])
+def test_clean_up(catalog: RestCatalog) -> None:
+ for namespaces_tuple in catalog.list_namespaces():
+ namespace_name = namespaces_tuple[0]
+ if TEST_NAMESPACE_IDENTIFIER[0] in namespace_name:
+ for identifier in catalog.list_tables(namespace_name):
+ catalog.purge_table(identifier)
+ if catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER):
+ catalog.drop_namespace(TEST_NAMESPACE_IDENTIFIER)
+
+
[email protected]
[email protected]("catalog,clean_up",
[(pytest.lazy_fixture("session_catalog"),
pytest.lazy_fixture("test_clean_up"))])
+def test_create_namespace_200(catalog: RestCatalog, clean_up: Any) -> None:
Review Comment:
nit: the naming is confusing since we dont assert the 200 status code.
how about just `test_create_namespace`
##########
tests/integration/test_rest_catalog.py:
##########
@@ -16,34 +16,788 @@
# under the License.
# pylint:disable=redefined-outer-name
+
+from typing import Any, Dict
+
import pytest
+from pyiceberg.catalog import PropertiesUpdateSummary
from pyiceberg.catalog.rest import RestCatalog
+from pyiceberg.exceptions import (
+ BadRequestError,
+ NamespaceAlreadyExistsError,
+ NoSuchIdentifierError,
+ NoSuchNamespaceError,
+ NoSuchTableError,
+ NoSuchViewError,
+ OAuthError,
+ TableAlreadyExistsError,
+)
+from pyiceberg.io import load_file_io
+from pyiceberg.partitioning import PartitionField, PartitionSpec
+from pyiceberg.schema import Schema
+from pyiceberg.table import Table
+from pyiceberg.table.metadata import TableMetadataV2
+from pyiceberg.table.sorting import NullOrder, SortDirection, SortField,
SortOrder
+from pyiceberg.transforms import IdentityTransform, TruncateTransform
+
+TEST_NAMESPACE_IDENTIFIER = ("rest_integration_ns",)
+TEST_TABLE_IDENTIFIER = ("rest_integration_ns", "rest_integration_tbl")
+TEST_TABLE_IDENTIFIER_RENAME = ("rest_integration_ns",
"renamed_rest_integration_tbl")
+TEST_URI = "http://localhost:8181"
+TEST_CREDENTIALS = "client:secret"
+
+EXAMPLE_table_metadata_no_snapshot_v2 = {
+ "format-version": 2,
+ "table-uuid": "bf289591-dcc0-4234-ad4f-5c3eed811a29",
+ "location":
f"s3://warehouse/{TEST_TABLE_IDENTIFIER[0]}/{TEST_TABLE_IDENTIFIER[1]}",
+ "last-updated-ms": 1657810967051,
+ "last-column-id": 3,
+ "schema": {
+ "type": "struct",
+ "schema-id": 0,
+ "identifier-field-ids": [2],
+ "fields": [
+ {"id": 1, "name": "foo", "required": False, "type": "string"},
+ {"id": 2, "name": "bar", "required": True, "type": "int"},
+ {"id": 3, "name": "baz", "required": False, "type": "boolean"},
+ ],
+ },
+ "current-schema-id": 0,
+ "schemas": [
+ {
+ "type": "struct",
+ "fields": (
+ {"id": 1, "name": "foo", "type": "string", "required": False},
+ {"id": 2, "name": "bar", "type": "int", "required": True},
+ {"id": 3, "name": "baz", "type": "boolean", "required": False},
+ ),
+ "schema-id": 0,
+ "identifier-field-ids": [2],
+ }
+ ],
+ "partition-specs": [{"spec-id": 0, "fields": ()}],
+ "default-spec-id": 0,
+ "last-partition-id": 999,
+ "default-sort-order-id": 0,
+ "sort-orders": [{"order-id": 0, "fields": []}],
+ "properties": {
+ "write.parquet.compression-codec": "zstd",
+ },
+ "refs": {},
+ "snapshots": [],
+ "snapshot-log": [],
+ "metadata-log": [],
+}
+
+EXAMPLE_table_metadata_no_snapshot_partitioned_v2 = {
+ "format-version": 2,
+ "table-uuid": "bf289591-dcc0-4234-ad4f-5c3eed811a29",
+ "location":
f"s3://warehouse/{TEST_TABLE_IDENTIFIER[0]}/{TEST_TABLE_IDENTIFIER[1]}",
+ "last-updated-ms": 1657810967051,
+ "last-column-id": 3,
+ "schema": {
+ "type": "struct",
+ "schema-id": 0,
+ "identifier-field-ids": [2],
+ "fields": [
+ {"id": 1, "name": "foo", "required": False, "type": "string"},
+ {"id": 2, "name": "bar", "required": True, "type": "int"},
+ {"id": 3, "name": "baz", "required": False, "type": "boolean"},
+ ],
+ },
+ "current-schema-id": 0,
+ "schemas": [
+ {
+ "type": "struct",
+ "fields": (
+ {"id": 1, "name": "foo", "type": "string", "required": False},
+ {"id": 2, "name": "bar", "type": "int", "required": True},
+ {"id": 3, "name": "baz", "type": "boolean", "required": False},
+ ),
+ "schema-id": 0,
+ "identifier-field-ids": [2],
+ }
+ ],
+ "partition-specs": [
+ {"spec-id": 0, "fields": ({"source-id": 1, "field-id": 1000,
"transform": "truncate[3]", "name": "id"},)}
+ ],
+ "default-spec-id": 0,
+ "last-partition-id": 1000,
+ "default-sort-order-id": 1,
+ "sort-orders": [
+ {
+ "order-id": 1,
+ "fields": [
+ {"source-id": 2, "transform": "identity", "direction":
SortDirection.ASC, "null-order": NullOrder.NULLS_FIRST}
+ ],
+ }
+ ],
+ "properties": {
+ "owner": "fokko",
+ "write.parquet.compression-codec": "zstd",
+ },
+ "refs": {},
+ "snapshots": [],
+ "snapshot-log": [],
+ "metadata-log": [],
+}
+
+
[email protected]
+def table_metadata_no_snapshot_v2() -> Dict[str, Any]:
+ return EXAMPLE_table_metadata_no_snapshot_v2
+
-TEST_NAMESPACE_IDENTIFIER = "TEST NS"
[email protected]
+def table_metadata_no_snapshot_partitioned_v2() -> Dict[str, Any]:
+ return EXAMPLE_table_metadata_no_snapshot_partitioned_v2
+
+
[email protected]
+def
rest_integration_example_metadata_partitioned_v2(table_metadata_no_snapshot_partitioned_v2:
Dict[str, Any]) -> Dict[str, Any]:
+ return {
+ "metadata-location":
f"s3://warehouse/{TEST_TABLE_IDENTIFIER[0]}/{TEST_TABLE_IDENTIFIER[1]}",
+ "metadata": table_metadata_no_snapshot_partitioned_v2,
+ "config": {
+ "client.factory":
"io.tabular.iceberg.catalog.TabularAwsClientFactory",
+ "region": "us-west-2",
+ },
+ }
+
+
[email protected]
+def example_table_metadata_with_no_location(table_metadata_no_snapshot_v2:
Dict[str, Any]) -> Dict[str, Any]:
+ return {
+ "metadata": table_metadata_no_snapshot_v2,
+ "config": {
+ "client.factory":
"io.tabular.iceberg.catalog.TabularAwsClientFactory",
+ "region": "us-west-2",
+ },
+ }
+
+
[email protected]
+def table_metadata_no_snapshot_rest(table_metadata_no_snapshot_v2: Dict[str,
Any]) -> Dict[str, Any]:
+ return {
+ "metadata-location":
f"s3://warehouse/{TEST_TABLE_IDENTIFIER[0]}/{TEST_TABLE_IDENTIFIER[1]}",
+ "metadata": table_metadata_no_snapshot_v2,
+ "config": {
+ "client.factory":
"io.tabular.iceberg.catalog.TabularAwsClientFactory",
+ "region": "us-west-2",
+ },
+ }
@pytest.mark.integration
[email protected]("catalog", [pytest.lazy_fixture("session_catalog")])
-def test_namespace_exists(catalog: RestCatalog) -> None:
- if not catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER):
[email protected](scope="function")
[email protected]("catalog", [pytest.lazy_fixture("session_catalog"),
pytest.lazy_fixture("test_clean_up")])
+def test_clean_up(catalog: RestCatalog) -> None:
+ for namespaces_tuple in catalog.list_namespaces():
+ namespace_name = namespaces_tuple[0]
+ if TEST_NAMESPACE_IDENTIFIER[0] in namespace_name:
+ for identifier in catalog.list_tables(namespace_name):
+ catalog.purge_table(identifier)
+ if catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER):
+ catalog.drop_namespace(TEST_NAMESPACE_IDENTIFIER)
+
+
[email protected]
[email protected]("catalog,clean_up",
[(pytest.lazy_fixture("session_catalog"),
pytest.lazy_fixture("test_clean_up"))])
+def test_create_namespace_200(catalog: RestCatalog, clean_up: Any) -> None:
+ catalog.create_namespace(TEST_NAMESPACE_IDENTIFIER)
+ assert TEST_NAMESPACE_IDENTIFIER in catalog.list_namespaces()
+
+
[email protected]
[email protected]("catalog,clean_up",
[(pytest.lazy_fixture("session_catalog"),
pytest.lazy_fixture("test_clean_up"))])
+def test_create_namespace_if_exists_409(catalog: RestCatalog, clean_up: Any)
-> None:
+ catalog.create_namespace(TEST_NAMESPACE_IDENTIFIER)
+ catalog.create_namespace_if_not_exists(TEST_NAMESPACE_IDENTIFIER)
+ assert TEST_NAMESPACE_IDENTIFIER in catalog.list_namespaces()
+
+
[email protected]
[email protected]("catalog,clean_up",
[(pytest.lazy_fixture("session_catalog"),
pytest.lazy_fixture("test_clean_up"))])
+def test_list_namespaces_200(catalog: RestCatalog, clean_up: Any) -> None:
+ catalog.create_namespace(TEST_NAMESPACE_IDENTIFIER)
+ assert catalog.list_namespaces() == [("default",),
TEST_NAMESPACE_IDENTIFIER]
Review Comment:
nit: `TEST_NAMESPACE_IDENTIFIER in catalog.list_namespaces()` so we dont
care about the other namespace
##########
tests/integration/test_rest_catalog.py:
##########
@@ -16,34 +16,788 @@
# under the License.
# pylint:disable=redefined-outer-name
+
+from typing import Any, Dict
+
import pytest
+from pyiceberg.catalog import PropertiesUpdateSummary
from pyiceberg.catalog.rest import RestCatalog
+from pyiceberg.exceptions import (
+ BadRequestError,
+ NamespaceAlreadyExistsError,
+ NoSuchIdentifierError,
+ NoSuchNamespaceError,
+ NoSuchTableError,
+ NoSuchViewError,
+ OAuthError,
+ TableAlreadyExistsError,
+)
+from pyiceberg.io import load_file_io
+from pyiceberg.partitioning import PartitionField, PartitionSpec
+from pyiceberg.schema import Schema
+from pyiceberg.table import Table
+from pyiceberg.table.metadata import TableMetadataV2
+from pyiceberg.table.sorting import NullOrder, SortDirection, SortField,
SortOrder
+from pyiceberg.transforms import IdentityTransform, TruncateTransform
+
+TEST_NAMESPACE_IDENTIFIER = ("rest_integration_ns",)
+TEST_TABLE_IDENTIFIER = ("rest_integration_ns", "rest_integration_tbl")
+TEST_TABLE_IDENTIFIER_RENAME = ("rest_integration_ns",
"renamed_rest_integration_tbl")
+TEST_URI = "http://localhost:8181"
+TEST_CREDENTIALS = "client:secret"
+
+EXAMPLE_table_metadata_no_snapshot_v2 = {
+ "format-version": 2,
+ "table-uuid": "bf289591-dcc0-4234-ad4f-5c3eed811a29",
+ "location":
f"s3://warehouse/{TEST_TABLE_IDENTIFIER[0]}/{TEST_TABLE_IDENTIFIER[1]}",
+ "last-updated-ms": 1657810967051,
+ "last-column-id": 3,
+ "schema": {
+ "type": "struct",
+ "schema-id": 0,
+ "identifier-field-ids": [2],
+ "fields": [
+ {"id": 1, "name": "foo", "required": False, "type": "string"},
+ {"id": 2, "name": "bar", "required": True, "type": "int"},
+ {"id": 3, "name": "baz", "required": False, "type": "boolean"},
+ ],
+ },
+ "current-schema-id": 0,
+ "schemas": [
+ {
+ "type": "struct",
+ "fields": (
+ {"id": 1, "name": "foo", "type": "string", "required": False},
+ {"id": 2, "name": "bar", "type": "int", "required": True},
+ {"id": 3, "name": "baz", "type": "boolean", "required": False},
+ ),
+ "schema-id": 0,
+ "identifier-field-ids": [2],
+ }
+ ],
+ "partition-specs": [{"spec-id": 0, "fields": ()}],
+ "default-spec-id": 0,
+ "last-partition-id": 999,
+ "default-sort-order-id": 0,
+ "sort-orders": [{"order-id": 0, "fields": []}],
+ "properties": {
+ "write.parquet.compression-codec": "zstd",
+ },
+ "refs": {},
+ "snapshots": [],
+ "snapshot-log": [],
+ "metadata-log": [],
+}
+
+EXAMPLE_table_metadata_no_snapshot_partitioned_v2 = {
+ "format-version": 2,
+ "table-uuid": "bf289591-dcc0-4234-ad4f-5c3eed811a29",
+ "location":
f"s3://warehouse/{TEST_TABLE_IDENTIFIER[0]}/{TEST_TABLE_IDENTIFIER[1]}",
+ "last-updated-ms": 1657810967051,
+ "last-column-id": 3,
+ "schema": {
+ "type": "struct",
+ "schema-id": 0,
+ "identifier-field-ids": [2],
+ "fields": [
+ {"id": 1, "name": "foo", "required": False, "type": "string"},
+ {"id": 2, "name": "bar", "required": True, "type": "int"},
+ {"id": 3, "name": "baz", "required": False, "type": "boolean"},
+ ],
+ },
+ "current-schema-id": 0,
+ "schemas": [
+ {
+ "type": "struct",
+ "fields": (
+ {"id": 1, "name": "foo", "type": "string", "required": False},
+ {"id": 2, "name": "bar", "type": "int", "required": True},
+ {"id": 3, "name": "baz", "type": "boolean", "required": False},
+ ),
+ "schema-id": 0,
+ "identifier-field-ids": [2],
+ }
+ ],
+ "partition-specs": [
+ {"spec-id": 0, "fields": ({"source-id": 1, "field-id": 1000,
"transform": "truncate[3]", "name": "id"},)}
+ ],
+ "default-spec-id": 0,
+ "last-partition-id": 1000,
+ "default-sort-order-id": 1,
+ "sort-orders": [
+ {
+ "order-id": 1,
+ "fields": [
+ {"source-id": 2, "transform": "identity", "direction":
SortDirection.ASC, "null-order": NullOrder.NULLS_FIRST}
+ ],
+ }
+ ],
+ "properties": {
+ "owner": "fokko",
+ "write.parquet.compression-codec": "zstd",
+ },
+ "refs": {},
+ "snapshots": [],
+ "snapshot-log": [],
+ "metadata-log": [],
+}
+
+
[email protected]
+def table_metadata_no_snapshot_v2() -> Dict[str, Any]:
+ return EXAMPLE_table_metadata_no_snapshot_v2
+
-TEST_NAMESPACE_IDENTIFIER = "TEST NS"
[email protected]
+def table_metadata_no_snapshot_partitioned_v2() -> Dict[str, Any]:
+ return EXAMPLE_table_metadata_no_snapshot_partitioned_v2
+
+
[email protected]
+def
rest_integration_example_metadata_partitioned_v2(table_metadata_no_snapshot_partitioned_v2:
Dict[str, Any]) -> Dict[str, Any]:
+ return {
+ "metadata-location":
f"s3://warehouse/{TEST_TABLE_IDENTIFIER[0]}/{TEST_TABLE_IDENTIFIER[1]}",
+ "metadata": table_metadata_no_snapshot_partitioned_v2,
+ "config": {
+ "client.factory":
"io.tabular.iceberg.catalog.TabularAwsClientFactory",
+ "region": "us-west-2",
+ },
+ }
+
+
[email protected]
+def example_table_metadata_with_no_location(table_metadata_no_snapshot_v2:
Dict[str, Any]) -> Dict[str, Any]:
+ return {
+ "metadata": table_metadata_no_snapshot_v2,
+ "config": {
+ "client.factory":
"io.tabular.iceberg.catalog.TabularAwsClientFactory",
+ "region": "us-west-2",
+ },
+ }
+
+
[email protected]
+def table_metadata_no_snapshot_rest(table_metadata_no_snapshot_v2: Dict[str,
Any]) -> Dict[str, Any]:
+ return {
+ "metadata-location":
f"s3://warehouse/{TEST_TABLE_IDENTIFIER[0]}/{TEST_TABLE_IDENTIFIER[1]}",
+ "metadata": table_metadata_no_snapshot_v2,
+ "config": {
+ "client.factory":
"io.tabular.iceberg.catalog.TabularAwsClientFactory",
+ "region": "us-west-2",
+ },
+ }
@pytest.mark.integration
[email protected]("catalog", [pytest.lazy_fixture("session_catalog")])
-def test_namespace_exists(catalog: RestCatalog) -> None:
- if not catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER):
[email protected](scope="function")
[email protected]("catalog", [pytest.lazy_fixture("session_catalog"),
pytest.lazy_fixture("test_clean_up")])
+def test_clean_up(catalog: RestCatalog) -> None:
+ for namespaces_tuple in catalog.list_namespaces():
+ namespace_name = namespaces_tuple[0]
+ if TEST_NAMESPACE_IDENTIFIER[0] in namespace_name:
+ for identifier in catalog.list_tables(namespace_name):
+ catalog.purge_table(identifier)
+ if catalog.namespace_exists(TEST_NAMESPACE_IDENTIFIER):
+ catalog.drop_namespace(TEST_NAMESPACE_IDENTIFIER)
+
+
[email protected]
[email protected]("catalog,clean_up",
[(pytest.lazy_fixture("session_catalog"),
pytest.lazy_fixture("test_clean_up"))])
+def test_create_namespace_200(catalog: RestCatalog, clean_up: Any) -> None:
+ catalog.create_namespace(TEST_NAMESPACE_IDENTIFIER)
+ assert TEST_NAMESPACE_IDENTIFIER in catalog.list_namespaces()
+
+
[email protected]
[email protected]("catalog,clean_up",
[(pytest.lazy_fixture("session_catalog"),
pytest.lazy_fixture("test_clean_up"))])
+def test_create_namespace_if_exists_409(catalog: RestCatalog, clean_up: Any)
-> None:
+ catalog.create_namespace(TEST_NAMESPACE_IDENTIFIER)
+ catalog.create_namespace_if_not_exists(TEST_NAMESPACE_IDENTIFIER)
+ assert TEST_NAMESPACE_IDENTIFIER in catalog.list_namespaces()
Review Comment:
nit: similar to above with naming, the test name can be more descriptive. i
think here we're testing that `create_namespace_if_not_exists` does not throw
`NamespaceAlreadyExistsError` when the namespace already exists
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]