rdblue commented on code in PR #5588:
URL: https://github.com/apache/iceberg/pull/5588#discussion_r950887899


##########
python/pyiceberg/io/__init__.py:
##########
@@ -218,11 +234,53 @@ def delete(self, location: Union[str, InputFile, 
OutputFile]) -> None:
         """
 
 
-def load_file_io(_: Properties) -> FileIO:
-    # To be implemented in a different PR.
-    # - If py-file-io is present, load the right Python class
-    #   - When the property is missing, map from Java's filo-io to an 
appropriate FileIO
-    # - Extend the FileIO structure with a initialize that pass in properties 
(could also be the constructor?)
+ARROW_FILE_IO = "pyiceberg.io.pyarrow.PyArrowFileIO"
+
+# Mappings from the Java FileIO impl to a Python one. The list is ordered by 
preference.
+# If a implementation isn't installed, it will fall back to the next one.
+JAVA_FILE_IO_MAPPINGS: Dict[str, List[str]] = {
+    "org.apache.iceberg.dell.ecs.EcsFileIO": [ARROW_FILE_IO],
+    "org.apache.iceberg.gcp.gcs.GCSFileIO": [ARROW_FILE_IO],
+    "org.apache.iceberg.hadoop.HadoopFileIO": [ARROW_FILE_IO],
+    "org.apache.iceberg.aliyun.oss.OSSFileIO": [ARROW_FILE_IO],
+    "org.apache.iceberg.io.ResolvingFileIO": [ARROW_FILE_IO],
+    "org.apache.iceberg.aws.s3.S3FileIO": [ARROW_FILE_IO],
+}
+
+
+def _import_file_io(io_impl: str, properties: Properties) -> Optional[FileIO]:
+    try:
+        path_parts = io_impl.split(".")
+        if len(path_parts) < 2:
+            raise ValueError(f"py-io-impl should be full path 
(module.CustomFileIO), got: {io_impl}")
+        module_name, class_name = ".".join(path_parts[:-1]), path_parts[-1]
+        module = importlib.import_module(module_name)
+        class_ = getattr(module, class_name)
+        return class_(properties)
+    except ImportError:
+        logger.exception("Could not initialize FileIO: %s", io_impl)
+        return None
+
+
+PY_IO_IMPL = "py-io-impl"
+IO_IMPL = "io-impl"
+
+
+def load_file_io(properties: Properties) -> FileIO:

Review Comment:
   I'm not following what you mean. Why would it matter if a string argument 
contained slashes?
   
   The reason why I would add `location` as an argument is because it isn't 
part of `properties`. It is tracked separately as warehouse or table location.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to