rdblue commented on code in PR #5588:
URL: https://github.com/apache/iceberg/pull/5588#discussion_r950593852
##########
python/pyiceberg/io/__init__.py:
##########
@@ -218,11 +234,53 @@ def delete(self, location: Union[str, InputFile,
OutputFile]) -> None:
"""
-def load_file_io(_: Properties) -> FileIO:
- # To be implemented in a different PR.
- # - If py-file-io is present, load the right Python class
- # - When the property is missing, map from Java's filo-io to an
appropriate FileIO
- # - Extend the FileIO structure with a initialize that pass in properties
(could also be the constructor?)
+ARROW_FILE_IO = "pyiceberg.io.pyarrow.PyArrowFileIO"
+
+# Mappings from the Java FileIO impl to a Python one. The list is ordered by
preference.
+# If a implementation isn't installed, it will fall back to the next one.
+JAVA_FILE_IO_MAPPINGS: Dict[str, List[str]] = {
+ "org.apache.iceberg.dell.ecs.EcsFileIO": [ARROW_FILE_IO],
+ "org.apache.iceberg.gcp.gcs.GCSFileIO": [ARROW_FILE_IO],
+ "org.apache.iceberg.hadoop.HadoopFileIO": [ARROW_FILE_IO],
+ "org.apache.iceberg.aliyun.oss.OSSFileIO": [ARROW_FILE_IO],
+ "org.apache.iceberg.io.ResolvingFileIO": [ARROW_FILE_IO],
+ "org.apache.iceberg.aws.s3.S3FileIO": [ARROW_FILE_IO],
+}
+
+
+def _import_file_io(io_impl: str, properties: Properties) -> Optional[FileIO]:
+ try:
+ path_parts = io_impl.split(".")
+ if len(path_parts) < 2:
+ raise ValueError(f"py-io-impl should be full path
(module.CustomFileIO), got: {io_impl}")
+ module_name, class_name = ".".join(path_parts[:-1]), path_parts[-1]
+ module = importlib.import_module(module_name)
+ class_ = getattr(module, class_name)
+ return class_(properties)
+ except ImportError:
+ logger.exception("Could not initialize FileIO: %s", io_impl)
+ return None
+
+
+PY_IO_IMPL = "py-io-impl"
+IO_IMPL = "io-impl"
+
+
+def load_file_io(properties: Properties) -> FileIO:
Review Comment:
If we were to use location to determine this automatically, then we'd
probably want to pass it here as optional.
##########
python/pyiceberg/io/__init__.py:
##########
@@ -218,11 +234,53 @@ def delete(self, location: Union[str, InputFile,
OutputFile]) -> None:
"""
-def load_file_io(_: Properties) -> FileIO:
- # To be implemented in a different PR.
- # - If py-file-io is present, load the right Python class
- # - When the property is missing, map from Java's filo-io to an
appropriate FileIO
- # - Extend the FileIO structure with a initialize that pass in properties
(could also be the constructor?)
+ARROW_FILE_IO = "pyiceberg.io.pyarrow.PyArrowFileIO"
+
+# Mappings from the Java FileIO impl to a Python one. The list is ordered by
preference.
+# If a implementation isn't installed, it will fall back to the next one.
+JAVA_FILE_IO_MAPPINGS: Dict[str, List[str]] = {
+ "org.apache.iceberg.dell.ecs.EcsFileIO": [ARROW_FILE_IO],
+ "org.apache.iceberg.gcp.gcs.GCSFileIO": [ARROW_FILE_IO],
+ "org.apache.iceberg.hadoop.HadoopFileIO": [ARROW_FILE_IO],
+ "org.apache.iceberg.aliyun.oss.OSSFileIO": [ARROW_FILE_IO],
+ "org.apache.iceberg.io.ResolvingFileIO": [ARROW_FILE_IO],
+ "org.apache.iceberg.aws.s3.S3FileIO": [ARROW_FILE_IO],
+}
+
+
+def _import_file_io(io_impl: str, properties: Properties) -> Optional[FileIO]:
Review Comment:
This looks good to me.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]