Fokko commented on code in PR #5969:
URL: https://github.com/apache/iceberg/pull/5969#discussion_r997542599


##########
python/pyiceberg/io/fsspec.py:
##########
@@ -15,28 +15,78 @@
 # specific language governing permissions and limitations
 # under the License.
 """FileIO implementation for reading and writing table files that uses fsspec 
compatible filesystems"""
-
-from functools import lru_cache
-from typing import Callable, Union
+import logging
+from functools import lru_cache, partial
+from typing import Callable, Dict, Union
 from urllib.parse import urlparse
 
+import requests
+from botocore import UNSIGNED
+from botocore.awsrequest import AWSRequest
 from fsspec import AbstractFileSystem
+from requests import HTTPError
 from s3fs import S3FileSystem
 
+from pyiceberg.exceptions import SignError
 from pyiceberg.io import FileIO, InputFile, OutputFile
 from pyiceberg.typedef import Properties
 
+logger = logging.getLogger(__name__)
+
+
+def s3v4_rest_signer(properties: Properties, request: AWSRequest, **_) -> 
AWSRequest:
+    signer_url = properties["uri"].rstrip("/")
+    signer_headers = {"Authorization": f"Bearer {properties['token']}"}
+    signer_body = {
+        "method": request.method,
+        "region": request.context["client_region"],
+        "uri": request.url,
+        "headers": {key: [val] for key, val in request.headers.items()},
+    }
+    try:
+        response = requests.post(f"{signer_url}/v1/aws/s3/sign", 
headers=signer_headers, json=signer_body)
+        response.raise_for_status()
+        response_json = response.json()
+    except HTTPError as e:
+        raise SignError(f"Failed to sign request {response.status_code}: 
{signer_body}") from e
+
+    for key, value in response_json["headers"].items():
+        request.headers.add_header(key, ", ".join(value))
+
+    request.url = response_json["uri"]
+
+    return request
+
+
+SIGNERS: Dict[str, Callable[[Properties, AWSRequest], AWSRequest]] = 
{"S3V4RestSigner": s3v4_rest_signer}
+
 
 def _s3(properties: Properties) -> AbstractFileSystem:
     client_kwargs = {
         "endpoint_url": properties.get("s3.endpoint"),
         "aws_access_key_id": properties.get("s3.access-key-id"),
         "aws_secret_access_key": properties.get("s3.secret-access-key"),
     }
+    config_kwargs = {}
+    register_events: Dict[str, Callable] = {}
+
+    if signer := properties.get("s3.signer"):
+        logger.info("Loading signer %s", signer)
+        if singer_func := SIGNERS.get(signer):
+            singer_func_with_properties = partial(singer_func, properties)
+            register_events["before-sign.s3"] = singer_func_with_properties
+
+            # Disable the AWS Signer
+            config_kwargs["signature_version"] = UNSIGNED
+        else:
+            raise ValueError(f"Signer not available: {signer}")
+
+    fs = S3FileSystem(client_kwargs=client_kwargs, config_kwargs=config_kwargs)
 
-    config_kwargs = {"signature_version": properties.get("s3.signer")}
+    for event_name, event_function in register_events.items():
+        fs.s3.meta.events.register_last(event_name, event_function, 
unique_id=1925)

Review Comment:
   Setting a unique ID here is a temporary fix to avoid having multiple signers 
registered at a given time. The problem is that the hooks/events in botocore 
are global, so if you create a new s3fs instance, it would be registered the 
signer twice if we omit the `unique_id`. 
   
   The drawback of this approach is that if you have multiple signers, using a 
different token, only the first one will be registered. My preferred way of 
fixing this is using the 
[`BearerAuth`](https://github.com/boto/botocore/blob/develop/botocore/auth.py#L948-L963)
 so we can attach the token to, and just have a single global event that will 
take care of the signing. This requires a new version of botocore in 
aiobotocore https://github.com/aio-libs/aiobotocore/pull/968 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to