This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new ed799b47a5 Python: Inline the Filesystem imports (#8548)
ed799b47a5 is described below

commit ed799b47a5b5899c9e40a46fc85d7882d0065c37
Author: Fokko Driesprong <[email protected]>
AuthorDate: Tue Sep 12 10:30:01 2023 +0200

    Python: Inline the Filesystem imports (#8548)
    
    * Python: Inline the Filesystem imports
    
    It can be that certain build flags are turned off:
    
    ```
    -DARROW_GCS=ON: Build Arrow with GCS support (requires the GCloud SDK for 
C++)
    -DARROW_HDFS=ON: Arrow integration with libhdfs for accessing the Hadoop 
Filesystem
    ```
    From: 
https://arrow.apache.org/docs/dev/developers/cpp/building.html#optional-components
    
    This will cause an ImportError when importing `pyarrow.py`,
    while it can be that you don't want to use a missing FS.
    Therefore it is better to inline the imports
    
    * Move imports to the top
---
 python/pyiceberg/io/pyarrow.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/python/pyiceberg/io/pyarrow.py b/python/pyiceberg/io/pyarrow.py
index 3453b18e44..e43541f03e 100644
--- a/python/pyiceberg/io/pyarrow.py
+++ b/python/pyiceberg/io/pyarrow.py
@@ -58,11 +58,6 @@ from pyarrow.fs import (
     FileSystem,
     FileType,
     FSSpecHandler,
-    GcsFileSystem,
-    HadoopFileSystem,
-    LocalFileSystem,
-    PyFileSystem,
-    S3FileSystem,
 )
 from sortedcontainers import SortedList
 
@@ -306,6 +301,8 @@ class PyArrowFileIO(FileIO):
 
     def _get_fs(self, scheme: str) -> FileSystem:
         if scheme in {"s3", "s3a", "s3n"}:
+            from pyarrow.fs import S3FileSystem
+
             client_kwargs = {
                 "endpoint_override": self.properties.get(S3_ENDPOINT),
                 "access_key": self.properties.get(S3_ACCESS_KEY_ID),
@@ -319,6 +316,8 @@ class PyArrowFileIO(FileIO):
 
             return S3FileSystem(**client_kwargs)
         elif scheme == "hdfs":
+            from pyarrow.fs import HadoopFileSystem
+
             hdfs_kwargs: Dict[str, Any] = {}
             if host := self.properties.get(HDFS_HOST):
                 hdfs_kwargs["host"] = host
@@ -329,8 +328,11 @@ class PyArrowFileIO(FileIO):
                 hdfs_kwargs["user"] = user
             if kerb_ticket := self.properties.get(HDFS_KERB_TICKET):
                 hdfs_kwargs["kerb_ticket"] = kerb_ticket
+
             return HadoopFileSystem(**hdfs_kwargs)
         elif scheme in {"gs", "gcs"}:
+            from pyarrow.fs import GcsFileSystem
+
             gcs_kwargs: Dict[str, Any] = {}
             if access_token := self.properties.get(GCS_TOKEN):
                 gcs_kwargs["access_token"] = access_token
@@ -342,8 +344,11 @@ class PyArrowFileIO(FileIO):
                 url_parts = urlparse(endpoint)
                 gcs_kwargs["scheme"] = url_parts.scheme
                 gcs_kwargs["endpoint_override"] = url_parts.netloc
+
             return GcsFileSystem(**gcs_kwargs)
         elif scheme == "file":
+            from pyarrow.fs import LocalFileSystem
+
             return LocalFileSystem()
         else:
             raise ValueError(f"Unrecognized filesystem type in URI: {scheme}")
@@ -899,6 +904,8 @@ def project_table(
             from pyiceberg.io.fsspec import FsspecFileIO
 
             if isinstance(table.io, FsspecFileIO):
+                from pyarrow.fs import PyFileSystem
+
                 fs = PyFileSystem(FSSpecHandler(table.io.get_fs(scheme)))
             else:
                 raise ValueError(f"Expected PyArrowFileIO or FsspecFileIO, 
got: {table.io}")

Reply via email to