This is an automated email from the ASF dual-hosted git repository.

mengw15 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git


The following commit(s) were added to refs/heads/main by this push:
     new bfa79a7c69 refactor(amber): stop hardcoding S3 in REST catalog init 
(#4988)
bfa79a7c69 is described below

commit bfa79a7c69e43ac274553468a6cc07dfca5b7a6e
Author: Meng Wang <[email protected]>
AuthorDate: Sat May 16 10:39:03 2026 -0700

    refactor(amber): stop hardcoding S3 in REST catalog init (#4988)
    
    ### What changes were proposed in this PR?
    
    Stop hardcoding `s3.endpoint`, `s3.region`, `s3.path-style-access`,
    `s3.access-key-id` and `s3.secret-access-key` at REST-catalog init in
    both `IcebergUtil.createRestCatalog` (Scala) and
    `iceberg_utils.create_rest_catalog` (Python). Both helpers now pass only
    `warehouse` + catalog `uri` (and on the Scala side the `FileIO` impl
    hint).
    
    **Why:** When a Lakekeeper warehouse is created, its S3 settings
    (endpoint, region, credentials, path-style) are registered against that
    warehouse on the server. At catalog init the client only needs
    `warehouse` + `uri` — Lakekeeper resolves the S3 config from the
    warehouse record and serves it back. The hardcoded `StorageConfig.s3*`
    values on the client were redundant, and forcing them everywhere also
    pinned every warehouse to the single system bucket. Removing them lets
    each warehouse own its own storage settings.
    
    `StorageConfig.s3*` itself is kept —
    `pytexera/storage/large_binary_manager.py` still uses it for the
    non-Iceberg `texera-large-binaries` bucket (R UDF large-binary support),
    which is out of scope.
    
    ### Any related issues, documentation, discussions?
    
    Closes #4987
    
    ### How was this PR tested?
    
    - `sbt "WorkflowCore/compile"` — passes; verifies no other Scala caller
    depends on the removed properties.
    - Python edits parse cleanly via `ast.parse`; the only caller
    (`iceberg_catalog_instance.py`) is updated to match the new
    `create_rest_catalog` signature.
    
    End-to-end verification (warehouse with its own S3 settings → REST
    catalog opened with only `warehouse` + `uri` → table round-trip)
    requires a running Lakekeeper, which CI doesn't have today. #4276
    (draft) wires Lakekeeper into CI; once that lands I'll add the
    integration test on top of it.
    
    ### Was this PR authored or co-authored using generative AI tooling?
    
    Generated-by: Claude Code (Opus 4.7)
    
    ---------
    
    Co-authored-by: Claude Opus 4.7 (1M context) <[email protected]>
---
 .../core/storage/iceberg/iceberg_catalog_instance.py    |  4 ----
 .../main/python/core/storage/iceberg/iceberg_utils.py   | 17 ++---------------
 .../iceberg/test_iceberg_rest_catalog_integration.py    |  4 ----
 .../org/apache/texera/amber/util/IcebergUtil.scala      | 17 +++++------------
 .../org/apache/texera/amber/util/IcebergUtilSpec.scala  | 10 ++++++++++
 5 files changed, 17 insertions(+), 35 deletions(-)

diff --git 
a/amber/src/main/python/core/storage/iceberg/iceberg_catalog_instance.py 
b/amber/src/main/python/core/storage/iceberg/iceberg_catalog_instance.py
index 0059808f9f..65a4f6beec 100644
--- a/amber/src/main/python/core/storage/iceberg/iceberg_catalog_instance.py
+++ b/amber/src/main/python/core/storage/iceberg/iceberg_catalog_instance.py
@@ -60,10 +60,6 @@ class IcebergCatalogInstance:
                     "texera_iceberg",
                     StorageConfig.ICEBERG_REST_CATALOG_WAREHOUSE_NAME,
                     StorageConfig.ICEBERG_REST_CATALOG_URI,
-                    StorageConfig.S3_ENDPOINT,
-                    StorageConfig.S3_REGION,
-                    StorageConfig.S3_AUTH_USERNAME,
-                    StorageConfig.S3_AUTH_PASSWORD,
                 )
             else:
                 raise ValueError(f"Unsupported catalog type: {catalog_type}")
diff --git a/amber/src/main/python/core/storage/iceberg/iceberg_utils.py 
b/amber/src/main/python/core/storage/iceberg/iceberg_utils.py
index 844ef3e00f..c66ba74ba2 100644
--- a/amber/src/main/python/core/storage/iceberg/iceberg_utils.py
+++ b/amber/src/main/python/core/storage/iceberg/iceberg_utils.py
@@ -157,23 +157,15 @@ def create_rest_catalog(
     catalog_name: str,
     warehouse_name: str,
     rest_uri: str,
-    s3_endpoint: str,
-    s3_region: str,
-    s3_username: str,
-    s3_password: str,
 ) -> Catalog:
     """
     Creates a REST catalog instance by connecting to a REST endpoint.
-    - Configures the catalog to interact with a REST endpoint.
     - The warehouse_name parameter specifies the warehouse identifier.
-    - Configures S3FileIO for MinIO/S3 storage backend.
+    - S3 settings (endpoint, region, credentials) are supplied by the REST
+      catalog server at runtime.
     :param catalog_name: the name of the catalog.
     :param warehouse_name: the warehouse identifier.
     :param rest_uri: the URI of the REST catalog endpoint.
-    :param s3_endpoint: the S3 endpoint URL.
-    :param s3_region: the S3 region.
-    :param s3_username: the S3 access key ID.
-    :param s3_password: the S3 secret access key.
     :return: a Catalog instance (REST catalog).
     """
     return load_catalog(
@@ -182,11 +174,6 @@ def create_rest_catalog(
             "type": "rest",
             "uri": rest_uri,
             "warehouse": warehouse_name,
-            "s3.endpoint": s3_endpoint,
-            "s3.access-key-id": s3_username,
-            "s3.secret-access-key": s3_password,
-            "s3.region": s3_region,
-            "s3.path-style-access": "true",
         },
     )
 
diff --git 
a/amber/src/test/python/core/storage/iceberg/test_iceberg_rest_catalog_integration.py
 
b/amber/src/test/python/core/storage/iceberg/test_iceberg_rest_catalog_integration.py
index 642fbb08e5..f023bd70d5 100644
--- 
a/amber/src/test/python/core/storage/iceberg/test_iceberg_rest_catalog_integration.py
+++ 
b/amber/src/test/python/core/storage/iceberg/test_iceberg_rest_catalog_integration.py
@@ -33,10 +33,6 @@ def rest_catalog():
         catalog_name="rest_integration_test",
         warehouse_name="texera",
         rest_uri="http://localhost:8181/catalog/";,
-        s3_endpoint="http://localhost:9000";,
-        s3_region="us-west-2",
-        s3_username="texera_minio",
-        s3_password="password",
     )
 
 
diff --git 
a/common/workflow-core/src/main/scala/org/apache/texera/amber/util/IcebergUtil.scala
 
b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/IcebergUtil.scala
index d6e406a53f..cee293a758 100644
--- 
a/common/workflow-core/src/main/scala/org/apache/texera/amber/util/IcebergUtil.scala
+++ 
b/common/workflow-core/src/main/scala/org/apache/texera/amber/util/IcebergUtil.scala
@@ -108,19 +108,12 @@ object IcebergUtil {
   ): RESTCatalog = {
     val catalog = new RESTCatalog()
 
-    // Build base properties map
-    var properties = Map(
+    // S3 settings (endpoint, region, credentials) are supplied by the REST
+    // catalog server at runtime.
+    val properties = Map(
       "warehouse" -> warehouse,
-      CatalogProperties.URI -> StorageConfig.icebergRESTCatalogUri
-    )
-
-    properties = properties ++ Map(
-      CatalogProperties.FILE_IO_IMPL -> classOf[S3FileIO].getName,
-      "s3.endpoint" -> StorageConfig.s3Endpoint,
-      "s3.access-key-id" -> StorageConfig.s3Username,
-      "s3.secret-access-key" -> StorageConfig.s3Password,
-      "s3.region" -> StorageConfig.s3Region,
-      "s3.path-style-access" -> "true"
+      CatalogProperties.URI -> StorageConfig.icebergRESTCatalogUri,
+      CatalogProperties.FILE_IO_IMPL -> classOf[S3FileIO].getName
     )
 
     catalog.initialize(catalogName, properties.asJava)
diff --git 
a/common/workflow-core/src/test/scala/org/apache/texera/amber/util/IcebergUtilSpec.scala
 
b/common/workflow-core/src/test/scala/org/apache/texera/amber/util/IcebergUtilSpec.scala
index da15a8060d..75a997cbb5 100644
--- 
a/common/workflow-core/src/test/scala/org/apache/texera/amber/util/IcebergUtilSpec.scala
+++ 
b/common/workflow-core/src/test/scala/org/apache/texera/amber/util/IcebergUtilSpec.scala
@@ -22,6 +22,7 @@ package org.apache.texera.amber.util
 import org.apache.texera.amber.core.tuple.{AttributeType, LargeBinary, Schema, 
Tuple}
 import org.apache.texera.amber.util.IcebergUtil.toIcebergSchema
 import org.apache.iceberg.data.GenericRecord
+import org.apache.iceberg.exceptions.RESTException
 import org.apache.iceberg.types.Types
 import org.apache.iceberg.{Schema => IcebergSchema}
 import org.scalatest.flatspec.AnyFlatSpec
@@ -298,4 +299,13 @@ class IcebergUtilSpec extends AnyFlatSpec {
 
     assert(IcebergUtil.fromRecord(record, schema) == tuple)
   }
+
+  it should "surface RESTException when createRestCatalog cannot reach the 
REST endpoint" in {
+    // Property Map is built before any network call. With or without
+    // Lakekeeper reachable, .initialize surfaces a RESTException — the
+    // failure is on the server side, not from Map composition.
+    intercept[RESTException] {
+      IcebergUtil.createRestCatalog("test", "non-existent-warehouse")
+    }
+  }
 }

Reply via email to