This is an automated email from the ASF dual-hosted git repository.

kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new fe0a2378 Make REST catalog namespace separator configurable (#2826)
fe0a2378 is described below

commit fe0a2378933973f440851c56a9df7c10bc4c3148
Author: Alex Stephen <[email protected]>
AuthorDate: Thu Jan 22 09:11:37 2026 -0800

    Make REST catalog namespace separator configurable (#2826)
    
    <!--
    Thanks for opening a pull request!
    -->
    
    <!-- In the case this PR will resolve an issue, please replace
    ${GITHUB_ISSUE_ID} below with the actual Github issue id. -->
    Closes #1183
    
    This adds a configurable namespace separator in the REST Catalog.
    
    # Rationale for this change
    Certain implementations expect a different namespace separator.
    
    ## Are these changes tested?
    Tests included.
    
    ## Are there any user-facing changes?
    
    <!-- In the case of user-facing changes, please add the changelog label.
    -->
---
 pyiceberg/catalog/rest/__init__.py | 46 +++++++++++++++++++++++++++++---------
 tests/catalog/test_rest.py         | 25 +++++++++++++++++++++
 tests/integration/test_catalog.py  | 34 ++++++++++++++++++++++++++++
 3 files changed, 95 insertions(+), 10 deletions(-)

diff --git a/pyiceberg/catalog/rest/__init__.py 
b/pyiceberg/catalog/rest/__init__.py
index d7ef6ec8..e3cc09ed 100644
--- a/pyiceberg/catalog/rest/__init__.py
+++ b/pyiceberg/catalog/rest/__init__.py
@@ -21,6 +21,7 @@ from typing import (
     Any,
     Union,
 )
+from urllib.parse import quote, unquote
 
 from pydantic import ConfigDict, Field, TypeAdapter, field_validator
 from requests import HTTPError, Session
@@ -234,7 +235,8 @@ REST_SCAN_PLANNING_ENABLED_DEFAULT = False
 VIEW_ENDPOINTS_SUPPORTED = "view-endpoints-supported"
 VIEW_ENDPOINTS_SUPPORTED_DEFAULT = False
 
-NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8)
+NAMESPACE_SEPARATOR_PROPERTY = "namespace-separator"
+DEFAULT_NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8)
 
 
 def _retry_hook(retry_state: RetryCallState) -> None:
@@ -330,6 +332,7 @@ class RestCatalog(Catalog):
     _session: Session
     _auth_manager: AuthManager | None
     _supported_endpoints: set[Endpoint]
+    _namespace_separator: str
 
     def __init__(self, name: str, **properties: str):
         """Rest Catalog.
@@ -596,6 +599,16 @@ class RestCatalog(Catalog):
 
         return optional_oauth_param
 
+    def _encode_namespace_path(self, namespace: Identifier) -> str:
+        """
+        Encode a namespace for use as a path parameter in a URL.
+
+        Each part of the namespace is URL-encoded using `urllib.parse.quote`
+        (ensuring characters like '/' are encoded) and then joined by the
+        configured namespace separator.
+        """
+        return self._namespace_separator.join(quote(part, safe="") for part in 
namespace)
+
     def _fetch_config(self) -> None:
         params = {}
         if warehouse_location := self.properties.get(WAREHOUSE_LOCATION):
@@ -628,6 +641,11 @@ class RestCatalog(Catalog):
             if property_as_bool(self.properties, VIEW_ENDPOINTS_SUPPORTED, 
VIEW_ENDPOINTS_SUPPORTED_DEFAULT):
                 self._supported_endpoints.update(VIEW_ENDPOINTS)
 
+        separator_from_properties = 
self.properties.get(NAMESPACE_SEPARATOR_PROPERTY, DEFAULT_NAMESPACE_SEPARATOR)
+        if not separator_from_properties:
+            raise ValueError("Namespace separator cannot be an empty string")
+        self._namespace_separator = unquote(separator_from_properties)
+
     def _identifier_to_validated_tuple(self, identifier: str | Identifier) -> 
Identifier:
         identifier_tuple = self.identifier_to_tuple(identifier)
         if len(identifier_tuple) <= 1:
@@ -638,10 +656,17 @@ class RestCatalog(Catalog):
         self, identifier: str | Identifier | TableIdentifier, kind: 
IdentifierKind = IdentifierKind.TABLE
     ) -> Properties:
         if isinstance(identifier, TableIdentifier):
-            return {"namespace": 
NAMESPACE_SEPARATOR.join(identifier.namespace.root), kind.value: 
identifier.name}
+            return {
+                "namespace": 
self._encode_namespace_path(tuple(identifier.namespace.root)),
+                kind.value: quote(identifier.name, safe=""),
+            }
         identifier_tuple = self._identifier_to_validated_tuple(identifier)
 
-        return {"namespace": NAMESPACE_SEPARATOR.join(identifier_tuple[:-1]), 
kind.value: identifier_tuple[-1]}
+        # Use quote to ensure that '/' aren't treated as path separators.
+        return {
+            "namespace": self._encode_namespace_path(identifier_tuple[:-1]),
+            kind.value: quote(identifier_tuple[-1], safe=""),
+        }
 
     def _split_identifier_for_json(self, identifier: str | Identifier) -> 
dict[str, Identifier | str]:
         identifier_tuple = self._identifier_to_validated_tuple(identifier)
@@ -864,7 +889,7 @@ class RestCatalog(Catalog):
     def list_tables(self, namespace: str | Identifier) -> list[Identifier]:
         self._check_endpoint(Capability.V1_LIST_TABLES)
         namespace_tuple = self._check_valid_namespace_identifier(namespace)
-        namespace_concat = NAMESPACE_SEPARATOR.join(namespace_tuple)
+        namespace_concat = self._encode_namespace_path(namespace_tuple)
         response = self._session.get(self.url(Endpoints.list_tables, 
namespace=namespace_concat))
         try:
             response.raise_for_status()
@@ -950,7 +975,7 @@ class RestCatalog(Catalog):
         if Capability.V1_LIST_VIEWS not in self._supported_endpoints:
             return []
         namespace_tuple = self._check_valid_namespace_identifier(namespace)
-        namespace_concat = NAMESPACE_SEPARATOR.join(namespace_tuple)
+        namespace_concat = self._encode_namespace_path(namespace_tuple)
         response = self._session.get(self.url(Endpoints.list_views, 
namespace=namespace_concat))
         try:
             response.raise_for_status()
@@ -1020,7 +1045,7 @@ class RestCatalog(Catalog):
     def drop_namespace(self, namespace: str | Identifier) -> None:
         self._check_endpoint(Capability.V1_DELETE_NAMESPACE)
         namespace_tuple = self._check_valid_namespace_identifier(namespace)
-        namespace = NAMESPACE_SEPARATOR.join(namespace_tuple)
+        namespace = self._encode_namespace_path(namespace_tuple)
         response = self._session.delete(self.url(Endpoints.drop_namespace, 
namespace=namespace))
         try:
             response.raise_for_status()
@@ -1033,7 +1058,7 @@ class RestCatalog(Catalog):
         namespace_tuple = self.identifier_to_tuple(namespace)
         response = self._session.get(
             self.url(
-                
f"{Endpoints.list_namespaces}?parent={NAMESPACE_SEPARATOR.join(namespace_tuple)}"
+                
f"{Endpoints.list_namespaces}?parent={self._encode_namespace_path(namespace_tuple)}"
                 if namespace_tuple
                 else Endpoints.list_namespaces
             ),
@@ -1049,7 +1074,7 @@ class RestCatalog(Catalog):
     def load_namespace_properties(self, namespace: str | Identifier) -> 
Properties:
         self._check_endpoint(Capability.V1_LOAD_NAMESPACE)
         namespace_tuple = self._check_valid_namespace_identifier(namespace)
-        namespace = NAMESPACE_SEPARATOR.join(namespace_tuple)
+        namespace = self._encode_namespace_path(namespace_tuple)
         response = 
self._session.get(self.url(Endpoints.load_namespace_metadata, 
namespace=namespace))
         try:
             response.raise_for_status()
@@ -1064,7 +1089,7 @@ class RestCatalog(Catalog):
     ) -> PropertiesUpdateSummary:
         self._check_endpoint(Capability.V1_UPDATE_NAMESPACE)
         namespace_tuple = self._check_valid_namespace_identifier(namespace)
-        namespace = NAMESPACE_SEPARATOR.join(namespace_tuple)
+        namespace = self._encode_namespace_path(namespace_tuple)
         payload = {"removals": list(removals or []), "updates": updates}
         response = 
self._session.post(self.url(Endpoints.update_namespace_properties, 
namespace=namespace), json=payload)
         try:
@@ -1081,7 +1106,8 @@ class RestCatalog(Catalog):
     @retry(**_RETRY_ARGS)
     def namespace_exists(self, namespace: str | Identifier) -> bool:
         namespace_tuple = self._check_valid_namespace_identifier(namespace)
-        namespace = NAMESPACE_SEPARATOR.join(namespace_tuple)
+        namespace = self._encode_namespace_path(namespace_tuple)
+
         # fallback in order to work with older rest catalog implementations
         if Capability.V1_NAMESPACE_EXISTS not in self._supported_endpoints:
             try:
diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py
index 2e3b0d50..71341c8b 100644
--- a/tests/catalog/test_rest.py
+++ b/tests/catalog/test_rest.py
@@ -1991,6 +1991,31 @@ def test_rest_catalog_with_google_credentials_path(
     assert actual_headers["Authorization"] == expected_auth_header
 
 
+def test_custom_namespace_separator(rest_mock: Mocker) -> None:
+    custom_separator = "-"
+    namespace_part1 = "some"
+    namespace_part2 = "namespace"
+    # The expected URL path segment should use the literal custom_separator
+    expected_url_path_segment = 
f"{namespace_part1}{custom_separator}{namespace_part2}"
+
+    rest_mock.get(
+        f"{TEST_URI}v1/config",
+        json={"defaults": {}, "overrides": {}},
+        status_code=200,
+    )
+    rest_mock.get(
+        f"{TEST_URI}v1/namespaces/{expected_url_path_segment}",
+        json={"namespace": [namespace_part1, namespace_part2], "properties": 
{"prop": "yes"}},
+        status_code=200,
+        request_headers=TEST_HEADERS,
+    )
+
+    catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, 
**{"namespace-separator": custom_separator})
+    catalog.load_namespace_properties((namespace_part1, namespace_part2))
+
+    assert rest_mock.last_request.url == 
f"{TEST_URI}v1/namespaces/{expected_url_path_segment}"
+
+
 @pytest.mark.filterwarnings(
     "ignore:Deprecated in 0.8.0, will be removed in 1.0.0. Iceberg REST client 
is missing the OAuth2 server URI:DeprecationWarning"
 )
diff --git a/tests/integration/test_catalog.py 
b/tests/integration/test_catalog.py
index 0c776665..fb702625 100644
--- a/tests/integration/test_catalog.py
+++ b/tests/integration/test_catalog.py
@@ -16,6 +16,7 @@
 #  under the License.
 
 import os
+import uuid
 from collections.abc import Generator
 from pathlib import Path, PosixPath
 
@@ -601,3 +602,36 @@ def test_register_table_existing(test_catalog: Catalog, 
table_schema_nested: Sch
     # Assert that registering the table again raises TableAlreadyExistsError
     with pytest.raises(TableAlreadyExistsError):
         test_catalog.register_table(identifier, 
metadata_location=table.metadata_location)
+
+
[email protected]
+def test_rest_custom_namespace_separator(rest_catalog: RestCatalog, 
table_schema_simple: Schema) -> None:
+    """
+    Tests that the REST catalog correctly picks up the namespace-separator 
from the config endpoint.
+    The REST Catalog is configured with a '.' namespace separator.
+    """
+    assert rest_catalog._namespace_separator == "."
+
+    unique_id = uuid.uuid4().hex
+    parent_namespace = (f"test_parent_{unique_id}",)
+    child_namespace_part = "child"
+    full_namespace_tuple = (*parent_namespace, child_namespace_part)
+
+    table_name = "my_table"
+    full_table_identifier_tuple = (*full_namespace_tuple, table_name)
+
+    rest_catalog.create_namespace(namespace=parent_namespace)
+    rest_catalog.create_namespace(namespace=full_namespace_tuple)
+
+    namespaces = rest_catalog.list_namespaces(parent_namespace)
+    assert full_namespace_tuple in namespaces
+
+    # Test with a table
+    table = rest_catalog.create_table(identifier=full_table_identifier_tuple, 
schema=table_schema_simple)
+    assert table.name() == full_table_identifier_tuple
+
+    tables = rest_catalog.list_tables(full_namespace_tuple)
+    assert full_table_identifier_tuple in tables
+
+    loaded_table = 
rest_catalog.load_table(identifier=full_table_identifier_tuple)
+    assert loaded_table.name() == full_table_identifier_tuple

Reply via email to