This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 2e73a415 Support `s3.signer.endpoint` for the REST catalog (#1029)
2e73a415 is described below

commit 2e73a415bfd3d200a81c956dd0f34db008a8c80b
Author: Guilherme Torres Castro <1149991+guitcas...@users.noreply.github.com>
AuthorDate: Mon Aug 12 12:09:27 2024 -0300

    Support `s3.signer.endpoint` for the REST catalog (#1029)
    
    * s3_signer_endpoint
    
    * prune any trailing whitespaces
    
    Co-authored-by: Fokko Driesprong <fo...@apache.org>
    
    * fallback to default value instead of "endpoint" property
    
    Co-authored-by: Fokko Driesprong <fo...@apache.org>
    
    * fix test_s3v4_rest_signer_endpoint
    
    * Fix missing backtick
    
    Co-authored-by: Fokko Driesprong <fo...@apache.org>
    
    * rename S3_SIGNER_ENDPOINT_DEFAULT_VALUE to S3_SIGNER_ENDPOINT_DEFAULT
    
    * fix s3.signer.endpoint docs
    
    * fk typo in signer
    
    * fix fmt
    
    ---------
    
    Co-authored-by: guilhermecastro <guilherme.cas...@protonmail.com>
    Co-authored-by: Fokko Driesprong <fo...@apache.org>
---
 mkdocs/docs/configuration.md |  3 ++-
 pyiceberg/io/__init__.py     |  2 ++
 pyiceberg/io/fsspec.py       | 12 ++++++----
 tests/io/test_fsspec.py      | 53 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 65 insertions(+), 5 deletions(-)

diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md
index af139b00..dc67b790 100644
--- a/mkdocs/docs/configuration.md
+++ b/mkdocs/docs/configuration.md
@@ -82,7 +82,8 @@ For the FileIO there are several configuration options 
available:
 | s3.secret-access-key | password                 | Configure the static 
secret access key used to access the FileIO.                                    
                                                                                
                                                                     |
 | s3.session-token     | AQoDYXdzEJr...           | Configure the static 
session token used to access the FileIO.                                        
                                                                                
                                                                     |
 | s3.signer            | bearer                   | Configure the signature 
version of the FileIO.                                                          
                                                                                
                                                                  |
-| s3.signer.uri        | http://my.signer:8080/s3 | Configure the remote 
signing uri if it differs from the catalog uri. Remote signing is only 
implemented for `FsspecFileIO`. The final request is sent to 
`<s3.singer.uri>/v1/aws/s3/sign`.                                               
                 |
+| s3.signer.uri        | http://my.signer:8080/s3 | Configure the remote 
signing uri if it differs from the catalog uri. Remote signing is only 
implemented for `FsspecFileIO`. The final request is sent to 
`<s3.signer.uri>/<s3.signer.endpoint>`.                                         
                 |
+| s3.signer.endpoint   | v1/main/s3-sign          | Configure the remote 
signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The 
final request is sent to `<s3.signer.uri>/<s3.signer.endpoint>`. (default : 
v1/aws/s3/sign).                                                            |
 | s3.region            | us-west-2                | Sets the region of the 
bucket                                                                          
                                                                                
                                                                   |
 | s3.proxy-uri         | http://my.proxy.com:8080 | Configure the proxy server 
to be used by the FileIO.                                                       
                                                                                
                                                               |
 | s3.connect-timeout   | 60.0                     | Configure socket 
connection timeout, in seconds.                                                 
                                                                                
                                                                         |
diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py
index 0567af2d..d5f26a17 100644
--- a/pyiceberg/io/__init__.py
+++ b/pyiceberg/io/__init__.py
@@ -58,6 +58,8 @@ S3_REGION = "s3.region"
 S3_PROXY_URI = "s3.proxy-uri"
 S3_CONNECT_TIMEOUT = "s3.connect-timeout"
 S3_SIGNER_URI = "s3.signer.uri"
+S3_SIGNER_ENDPOINT = "s3.signer.endpoint"
+S3_SIGNER_ENDPOINT_DEFAULT = "v1/aws/s3/sign"
 HDFS_HOST = "hdfs.host"
 HDFS_PORT = "hdfs.port"
 HDFS_USER = "hdfs.user"
diff --git a/pyiceberg/io/fsspec.py b/pyiceberg/io/fsspec.py
index d6e4a32a..cbe5d5b6 100644
--- a/pyiceberg/io/fsspec.py
+++ b/pyiceberg/io/fsspec.py
@@ -67,6 +67,8 @@ from pyiceberg.io import (
     S3_REGION,
     S3_SECRET_ACCESS_KEY,
     S3_SESSION_TOKEN,
+    S3_SIGNER_ENDPOINT,
+    S3_SIGNER_ENDPOINT_DEFAULT,
     S3_SIGNER_URI,
     ADLFS_ClIENT_SECRET,
     FileIO,
@@ -86,6 +88,8 @@ def s3v4_rest_signer(properties: Properties, request: 
AWSRequest, **_: Any) -> A
         raise SignError("Signer set, but token is not available")
 
     signer_url = properties.get(S3_SIGNER_URI, properties["uri"]).rstrip("/")
+    signer_endpoint = properties.get(S3_SIGNER_ENDPOINT, 
S3_SIGNER_ENDPOINT_DEFAULT)
+
     signer_headers = {"Authorization": f"Bearer {properties[TOKEN]}"}
     signer_body = {
         "method": request.method,
@@ -94,7 +98,7 @@ def s3v4_rest_signer(properties: Properties, request: 
AWSRequest, **_: Any) -> A
         "headers": {key: [val] for key, val in request.headers.items()},
     }
 
-    response = requests.post(f"{signer_url}/v1/aws/s3/sign", 
headers=signer_headers, json=signer_body)
+    response = requests.post(f"{signer_url}/{signer_endpoint.strip()}", 
headers=signer_headers, json=signer_body)
     try:
         response.raise_for_status()
         response_json = response.json()
@@ -131,9 +135,9 @@ def _s3(properties: Properties) -> AbstractFileSystem:
 
     if signer := properties.get("s3.signer"):
         logger.info("Loading signer %s", signer)
-        if singer_func := SIGNERS.get(signer):
-            singer_func_with_properties = partial(singer_func, properties)
-            register_events["before-sign.s3"] = singer_func_with_properties
+        if signer_func := SIGNERS.get(signer):
+            signer_func_with_properties = partial(signer_func, properties)
+            register_events["before-sign.s3"] = signer_func_with_properties
 
             # Disable the AWS Signer
             config_kwargs["signature_version"] = UNSIGNED
diff --git a/tests/io/test_fsspec.py b/tests/io/test_fsspec.py
index 3dd79e18..fbb18491 100644
--- a/tests/io/test_fsspec.py
+++ b/tests/io/test_fsspec.py
@@ -727,6 +727,59 @@ def test_s3v4_rest_signer(requests_mock: Mocker) -> None:
     }
 
 
+def test_s3v4_rest_signer_endpoint(requests_mock: Mocker) -> None:
+    new_uri = 
"https://other-bucket/metadata/snap-8048355899640248710-1-a5c8ea2d-aa1f-48e8-89f4-1fa69db8c742.avro";
+    endpoint = "v1/main/s3-sign/foo.bar?e=e&b=b&k=k=k&s=s&w=w"
+    requests_mock.post(
+        f"{TEST_URI}/{endpoint}",
+        json={
+            "uri": new_uri,
+            "headers": {
+                "Authorization": [
+                    "AWS4-HMAC-SHA256 
Credential=ASIAQPRZZYGHUT57DL3I/20221017/us-west-2/s3/aws4_request, 
SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, 
Signature=430582a17d61ab02c272896fa59195f277af4bdf2121c441685e589f044bbe02"
+                ],
+                "Host": ["bucket.s3.us-west-2.amazonaws.com"],
+                "User-Agent": ["Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0"],
+                "x-amz-content-sha256": ["UNSIGNED-PAYLOAD"],
+                "X-Amz-Date": ["20221017T102940Z"],
+                "X-Amz-Security-Token": [
+                    
"YQoJb3JpZ2luX2VjEDoaCXVzLXdlc3QtMiJGMEQCID/fFxZP5oaEgQmcwP6XhZa0xSq9lmLSx8ffaWbySfUPAiAesa7sjd/WV4uwRTO0S03y/MWVtgpH+/NyZQ4bZgLVriqrAggTEAEaDDAzMzQwNzIyMjE1OSIMOeFOWhZIurMmAqjsKogCxMCqxX8ZjK0gacAkcDqBCyA7qTSLhdfKQIH/w7WpLBU1km+cRUWWCudan6gZsAq867DBaKEP7qI05DAWr9MChAkgUgyI8/G3Z23ET0gAedf3GsJbakB0F1kklx8jPmj4BPCht9RcTiXiJ5DxTS/cRCcalIQXmPFbaJSqpBusVG2EkWnm1v7VQrNPE2Os2b2P293vpbhwkyCEQiGRVva4Sw9D1sKvqSsK10QCRG+os6dFEOu1kARaXi6pStvR4OVmj7OYeAYjzaFchn7nz2CSae0M4IluiYQ01eQ
 [...]
+                ],
+            },
+            "extensions": {},
+        },
+        status_code=200,
+    )
+
+    request = AWSRequest(
+        method="HEAD",
+        
url="https://bucket/metadata/snap-8048355899640248710-1-a5c8ea2d-aa1f-48e8-89f4-1fa69db8c742.avro";,
+        headers={"User-Agent": "Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0"},
+        data=b"",
+        params={},
+        
auth_path="/metadata/snap-8048355899640248710-1-a5c8ea2d-aa1f-48e8-89f4-1fa69db8c742.avro",
+    )
+    request.context = {
+        "client_region": "us-west-2",
+        "has_streaming_input": False,
+        "auth_type": None,
+        "signing": {"bucket": "bucket"},
+        "retries": {"attempt": 1, "invocation-id": 
"75d143fb-0219-439b-872c-18213d1c8d54"},
+    }
+
+    signed_request = s3v4_rest_signer({"token": "abc", "uri": TEST_URI, 
"s3.signer.endpoint": endpoint}, request)
+
+    assert signed_request.url == new_uri
+    assert dict(signed_request.headers) == {
+        "Authorization": "AWS4-HMAC-SHA256 
Credential=ASIAQPRZZYGHUT57DL3I/20221017/us-west-2/s3/aws4_request, 
SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, 
Signature=430582a17d61ab02c272896fa59195f277af4bdf2121c441685e589f044bbe02",
+        "Host": "bucket.s3.us-west-2.amazonaws.com",
+        "User-Agent": "Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0",
+        "X-Amz-Date": "20221017T102940Z",
+        "X-Amz-Security-Token": 
"YQoJb3JpZ2luX2VjEDoaCXVzLXdlc3QtMiJGMEQCID/fFxZP5oaEgQmcwP6XhZa0xSq9lmLSx8ffaWbySfUPAiAesa7sjd/WV4uwRTO0S03y/MWVtgpH+/NyZQ4bZgLVriqrAggTEAEaDDAzMzQwNzIyMjE1OSIMOeFOWhZIurMmAqjsKogCxMCqxX8ZjK0gacAkcDqBCyA7qTSLhdfKQIH/w7WpLBU1km+cRUWWCudan6gZsAq867DBaKEP7qI05DAWr9MChAkgUgyI8/G3Z23ET0gAedf3GsJbakB0F1kklx8jPmj4BPCht9RcTiXiJ5DxTS/cRCcalIQXmPFbaJSqpBusVG2EkWnm1v7VQrNPE2Os2b2P293vpbhwkyCEQiGRVva4Sw9D1sKvqSsK10QCRG+os6dFEOu1kARaXi6pStvR4OVmj7OYeAYjzaFchn7nz2CSae0
 [...]
+        "x-amz-content-sha256": "UNSIGNED-PAYLOAD",
+    }
+
+
 def test_s3v4_rest_signer_forbidden(requests_mock: Mocker) -> None:
     requests_mock.post(
         f"{TEST_URI}/v1/aws/s3/sign",

Reply via email to