This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new b71dfd9d13 Python: Move io package base.py classes to __init__.py
(#5456)
b71dfd9d13 is described below
commit b71dfd9d1375724d2bc34085fe38c961d0b41d16
Author: Samuel Redai <[email protected]>
AuthorDate: Sun Aug 7 16:08:46 2022 -0400
Python: Move io package base.py classes to __init__.py (#5456)
---
python/pyiceberg/avro/decoder.py | 2 +-
python/pyiceberg/avro/file.py | 2 +-
python/pyiceberg/io/__init__.py | 200 ++++++++++++++++++++++
python/pyiceberg/io/base.py | 216 ------------------------
python/pyiceberg/io/memory.py | 2 +-
python/pyiceberg/io/pyarrow.py | 2 +-
python/pyiceberg/manifest.py | 2 +-
python/pyiceberg/serializers.py | 2 +-
python/tests/avro/test_decoder.py | 2 +-
python/tests/avro/test_reader.py | 2 +-
python/tests/conftest.py | 4 +-
python/tests/io/{test_io_base.py => test_io.py} | 2 +-
python/tests/io/test_pyarrow.py | 2 +-
python/tests/utils/test_manifest.py | 2 +-
14 files changed, 213 insertions(+), 229 deletions(-)
diff --git a/python/pyiceberg/avro/decoder.py b/python/pyiceberg/avro/decoder.py
index 3a05cfeec3..776c8bae31 100644
--- a/python/pyiceberg/avro/decoder.py
+++ b/python/pyiceberg/avro/decoder.py
@@ -19,7 +19,7 @@ import struct
from datetime import date, datetime, time
from io import SEEK_CUR
-from pyiceberg.io.base import InputStream
+from pyiceberg.io import InputStream
from pyiceberg.utils.datetime import (
days_to_date,
micros_to_time,
diff --git a/python/pyiceberg/avro/file.py b/python/pyiceberg/avro/file.py
index 3b518a5184..cce705382f 100644
--- a/python/pyiceberg/avro/file.py
+++ b/python/pyiceberg/avro/file.py
@@ -28,7 +28,7 @@ from pyiceberg.avro.codecs import KNOWN_CODECS, Codec
from pyiceberg.avro.decoder import BinaryDecoder
from pyiceberg.avro.reader import AvroStruct, ConstructReader, StructReader
from pyiceberg.avro.resolver import resolve
-from pyiceberg.io.base import InputFile, InputStream
+from pyiceberg.io import InputFile, InputStream
from pyiceberg.io.memory import MemoryInputStream
from pyiceberg.schema import Schema, visit
from pyiceberg.types import (
diff --git a/python/pyiceberg/io/__init__.py b/python/pyiceberg/io/__init__.py
index 13a83393a9..6169dcdfd5 100644
--- a/python/pyiceberg/io/__init__.py
+++ b/python/pyiceberg/io/__init__.py
@@ -14,3 +14,203 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+"""Base FileIO classes for implementing reading and writing table files
+
+The FileIO abstraction includes a subset of full filesystem implementations.
Specifically,
+Iceberg needs to read or write a file at a given location (as a seekable
stream), as well
+as check if a file exists. An implementation of the FileIO abstract base class
is responsible
+for returning an InputFile instance, an OutputFile instance, and deleting a
file given
+its location.
+"""
+from abc import ABC, abstractmethod
+from io import SEEK_SET
+from typing import Protocol, Union, runtime_checkable
+
+
+@runtime_checkable
+class InputStream(Protocol):
+ """A protocol for the file-like object returned by InputFile.open(...)
+
+ This outlines the minimally required methods for a seekable input stream
returned from an InputFile
+ implementation's `open(...)` method. These methods are a subset of
IOBase/RawIOBase.
+ """
+
+ @abstractmethod
+ def read(self, size: int = 0) -> bytes:
+ ...
+
+ @abstractmethod
+ def seek(self, offset: int, whence: int = SEEK_SET) -> int:
+ ...
+
+ @abstractmethod
+ def tell(self) -> int:
+ ...
+
+ @abstractmethod
+ def close(self) -> None:
+ ...
+
+ @abstractmethod
+ def __enter__(self):
+ ...
+
+ @abstractmethod
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ ...
+
+
+@runtime_checkable
+class OutputStream(Protocol): # pragma: no cover
+ """A protocol for the file-like object returned by OutputFile.create(...)
+
+ This outlines the minimally required methods for a writable output stream
returned from an OutputFile
+ implementation's `create(...)` method. These methods are a subset of
IOBase/RawIOBase.
+ """
+
+ @abstractmethod
+ def write(self, b: bytes) -> int:
+ ...
+
+ @abstractmethod
+ def close(self) -> None:
+ ...
+
+ @abstractmethod
+ def __enter__(self):
+ ...
+
+ @abstractmethod
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ ...
+
+
+class InputFile(ABC):
+ """A base class for InputFile implementations
+
+ Args:
+ location(str): A URI or a path to a local file
+
+ Attributes:
+ location(str): The URI or path to a local file for an InputFile
instance
+ exists(bool): Whether the file exists or not
+ """
+
+ def __init__(self, location: str):
+ self._location = location
+
+ @abstractmethod
+ def __len__(self) -> int:
+ """Returns the total length of the file, in bytes"""
+
+ @property
+ def location(self) -> str:
+ """The fully-qualified location of the input file"""
+ return self._location
+
+ @abstractmethod
+ def exists(self) -> bool:
+ """Checks whether the location exists
+
+
+ Raises:
+ PermissionError: If the file at self.location cannot be accessed
due to a permission error
+ """
+
+ @abstractmethod
+ def open(self) -> InputStream:
+ """This method should return an object that matches the InputStream
protocol
+
+ Returns:
+ InputStream: An object that matches the InputStream protocol
+
+ Raises:
+ PermissionError: If the file at self.location cannot be accessed
due to a permission error
+ FileNotFoundError: If the file at self.location does not exist
+ """
+
+
+class OutputFile(ABC):
+ """A base class for OutputFile implementations
+
+ Args:
+ location(str): A URI or a path to a local file
+
+ Attributes:
+ location(str): The URI or path to a local file for an OutputFile
instance
+ exists(bool): Whether the file exists or not
+ """
+
+ def __init__(self, location: str):
+ self._location = location
+
+ @abstractmethod
+ def __len__(self) -> int:
+ """Returns the total length of the file, in bytes"""
+
+ @property
+ def location(self) -> str:
+ """The fully-qualified location of the output file"""
+ return self._location
+
+ @abstractmethod
+ def exists(self) -> bool:
+ """Checks whether the location exists
+
+
+ Raises:
+ PermissionError: If the file at self.location cannot be accessed
due to a permission error
+ """
+
+ @abstractmethod
+ def to_input_file(self) -> InputFile:
+ """Returns an InputFile for the location of this output file"""
+
+ @abstractmethod
+ def create(self, overwrite: bool = False) -> OutputStream:
+ """This method should return an object that matches the OutputStream
protocol.
+
+ Args:
+ overwrite(bool): If the file already exists at `self.location`
+ and `overwrite` is False a FileExistsError should be raised
+
+ Returns:
+ OutputStream: An object that matches the OutputStream protocol
+
+ Raises:
+ PermissionError: If the file at self.location cannot be accessed
due to a permission error
+ FileExistsError: If the file at self.location already exists and
`overwrite=False`
+ """
+
+
+class FileIO(ABC):
+ """A base class for FileIO implementations"""
+
+ @abstractmethod
+ def new_input(self, location: str) -> InputFile:
+ """Get an InputFile instance to read bytes from the file at the given
location
+
+ Args:
+ location(str): A URI or a path to a local file
+ """
+
+ @abstractmethod
+ def new_output(self, location: str) -> OutputFile:
+ """Get an OutputFile instance to write bytes to the file at the given
location
+
+ Args:
+ location(str): A URI or a path to a local file
+ """
+
+ @abstractmethod
+ def delete(self, location: Union[str, InputFile, OutputFile]) -> None:
+ """Delete the file at the given path
+
+ Args:
+ location(str, InputFile, OutputFile): A URI or a path to a local
file--if an InputFile instance or
+ an OutputFile instance is provided, the location attribute for
that instance is used as the URI to delete
+
+ Raises:
+ PermissionError: If the file at location cannot be accessed due to
a permission error
+ FileNotFoundError: When the file at the provided location does not
exist
+ """
diff --git a/python/pyiceberg/io/base.py b/python/pyiceberg/io/base.py
deleted file mode 100644
index 6169dcdfd5..0000000000
--- a/python/pyiceberg/io/base.py
+++ /dev/null
@@ -1,216 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Base FileIO classes for implementing reading and writing table files
-
-The FileIO abstraction includes a subset of full filesystem implementations.
Specifically,
-Iceberg needs to read or write a file at a given location (as a seekable
stream), as well
-as check if a file exists. An implementation of the FileIO abstract base class
is responsible
-for returning an InputFile instance, an OutputFile instance, and deleting a
file given
-its location.
-"""
-from abc import ABC, abstractmethod
-from io import SEEK_SET
-from typing import Protocol, Union, runtime_checkable
-
-
-@runtime_checkable
-class InputStream(Protocol):
- """A protocol for the file-like object returned by InputFile.open(...)
-
- This outlines the minimally required methods for a seekable input stream
returned from an InputFile
- implementation's `open(...)` method. These methods are a subset of
IOBase/RawIOBase.
- """
-
- @abstractmethod
- def read(self, size: int = 0) -> bytes:
- ...
-
- @abstractmethod
- def seek(self, offset: int, whence: int = SEEK_SET) -> int:
- ...
-
- @abstractmethod
- def tell(self) -> int:
- ...
-
- @abstractmethod
- def close(self) -> None:
- ...
-
- @abstractmethod
- def __enter__(self):
- ...
-
- @abstractmethod
- def __exit__(self, exc_type, exc_val, exc_tb):
- ...
-
-
-@runtime_checkable
-class OutputStream(Protocol): # pragma: no cover
- """A protocol for the file-like object returned by OutputFile.create(...)
-
- This outlines the minimally required methods for a writable output stream
returned from an OutputFile
- implementation's `create(...)` method. These methods are a subset of
IOBase/RawIOBase.
- """
-
- @abstractmethod
- def write(self, b: bytes) -> int:
- ...
-
- @abstractmethod
- def close(self) -> None:
- ...
-
- @abstractmethod
- def __enter__(self):
- ...
-
- @abstractmethod
- def __exit__(self, exc_type, exc_val, exc_tb):
- ...
-
-
-class InputFile(ABC):
- """A base class for InputFile implementations
-
- Args:
- location(str): A URI or a path to a local file
-
- Attributes:
- location(str): The URI or path to a local file for an InputFile
instance
- exists(bool): Whether the file exists or not
- """
-
- def __init__(self, location: str):
- self._location = location
-
- @abstractmethod
- def __len__(self) -> int:
- """Returns the total length of the file, in bytes"""
-
- @property
- def location(self) -> str:
- """The fully-qualified location of the input file"""
- return self._location
-
- @abstractmethod
- def exists(self) -> bool:
- """Checks whether the location exists
-
-
- Raises:
- PermissionError: If the file at self.location cannot be accessed
due to a permission error
- """
-
- @abstractmethod
- def open(self) -> InputStream:
- """This method should return an object that matches the InputStream
protocol
-
- Returns:
- InputStream: An object that matches the InputStream protocol
-
- Raises:
- PermissionError: If the file at self.location cannot be accessed
due to a permission error
- FileNotFoundError: If the file at self.location does not exist
- """
-
-
-class OutputFile(ABC):
- """A base class for OutputFile implementations
-
- Args:
- location(str): A URI or a path to a local file
-
- Attributes:
- location(str): The URI or path to a local file for an OutputFile
instance
- exists(bool): Whether the file exists or not
- """
-
- def __init__(self, location: str):
- self._location = location
-
- @abstractmethod
- def __len__(self) -> int:
- """Returns the total length of the file, in bytes"""
-
- @property
- def location(self) -> str:
- """The fully-qualified location of the output file"""
- return self._location
-
- @abstractmethod
- def exists(self) -> bool:
- """Checks whether the location exists
-
-
- Raises:
- PermissionError: If the file at self.location cannot be accessed
due to a permission error
- """
-
- @abstractmethod
- def to_input_file(self) -> InputFile:
- """Returns an InputFile for the location of this output file"""
-
- @abstractmethod
- def create(self, overwrite: bool = False) -> OutputStream:
- """This method should return an object that matches the OutputStream
protocol.
-
- Args:
- overwrite(bool): If the file already exists at `self.location`
- and `overwrite` is False a FileExistsError should be raised
-
- Returns:
- OutputStream: An object that matches the OutputStream protocol
-
- Raises:
- PermissionError: If the file at self.location cannot be accessed
due to a permission error
- FileExistsError: If the file at self.location already exists and
`overwrite=False`
- """
-
-
-class FileIO(ABC):
- """A base class for FileIO implementations"""
-
- @abstractmethod
- def new_input(self, location: str) -> InputFile:
- """Get an InputFile instance to read bytes from the file at the given
location
-
- Args:
- location(str): A URI or a path to a local file
- """
-
- @abstractmethod
- def new_output(self, location: str) -> OutputFile:
- """Get an OutputFile instance to write bytes to the file at the given
location
-
- Args:
- location(str): A URI or a path to a local file
- """
-
- @abstractmethod
- def delete(self, location: Union[str, InputFile, OutputFile]) -> None:
- """Delete the file at the given path
-
- Args:
- location(str, InputFile, OutputFile): A URI or a path to a local
file--if an InputFile instance or
- an OutputFile instance is provided, the location attribute for
that instance is used as the URI to delete
-
- Raises:
- PermissionError: If the file at location cannot be accessed due to
a permission error
- FileNotFoundError: When the file at the provided location does not
exist
- """
diff --git a/python/pyiceberg/io/memory.py b/python/pyiceberg/io/memory.py
index 28db68c1f0..1ea17feac1 100644
--- a/python/pyiceberg/io/memory.py
+++ b/python/pyiceberg/io/memory.py
@@ -17,7 +17,7 @@
from io import SEEK_CUR, SEEK_END, SEEK_SET
-from pyiceberg.io.base import InputStream
+from pyiceberg.io import InputStream
class MemoryInputStream(InputStream):
diff --git a/python/pyiceberg/io/pyarrow.py b/python/pyiceberg/io/pyarrow.py
index 8268cdc7f0..c07a1118aa 100644
--- a/python/pyiceberg/io/pyarrow.py
+++ b/python/pyiceberg/io/pyarrow.py
@@ -28,7 +28,7 @@ from urllib.parse import urlparse
from pyarrow.fs import FileInfo, FileSystem, FileType
-from pyiceberg.io.base import (
+from pyiceberg.io import (
FileIO,
InputFile,
InputStream,
diff --git a/python/pyiceberg/manifest.py b/python/pyiceberg/manifest.py
index b1349390d9..ffb1c3b910 100644
--- a/python/pyiceberg/manifest.py
+++ b/python/pyiceberg/manifest.py
@@ -29,7 +29,7 @@ from pydantic import Field
from pyiceberg.avro.file import AvroFile
from pyiceberg.avro.reader import AvroStruct
-from pyiceberg.io.base import InputFile
+from pyiceberg.io import InputFile
from pyiceberg.schema import Schema
from pyiceberg.types import (
IcebergType,
diff --git a/python/pyiceberg/serializers.py b/python/pyiceberg/serializers.py
index d922cf7944..5906cb435d 100644
--- a/python/pyiceberg/serializers.py
+++ b/python/pyiceberg/serializers.py
@@ -19,7 +19,7 @@ import codecs
import json
from typing import Union
-from pyiceberg.io.base import InputFile, InputStream, OutputFile
+from pyiceberg.io import InputFile, InputStream, OutputFile
from pyiceberg.table.metadata import TableMetadata, TableMetadataV1,
TableMetadataV2
diff --git a/python/tests/avro/test_decoder.py
b/python/tests/avro/test_decoder.py
index 15b3029ec1..73aba1a1be 100644
--- a/python/tests/avro/test_decoder.py
+++ b/python/tests/avro/test_decoder.py
@@ -22,7 +22,7 @@ import pytest
from pyiceberg.avro.decoder import BinaryDecoder
from pyiceberg.avro.resolver import promote
-from pyiceberg.io.base import InputStream
+from pyiceberg.io import InputStream
from pyiceberg.io.memory import MemoryInputStream
from pyiceberg.types import DoubleType, FloatType
diff --git a/python/tests/avro/test_reader.py b/python/tests/avro/test_reader.py
index 9790590fd4..e4b0cb665e 100644
--- a/python/tests/avro/test_reader.py
+++ b/python/tests/avro/test_reader.py
@@ -53,7 +53,7 @@ from pyiceberg.types import (
TimestamptzType,
TimeType,
)
-from tests.io.test_io_base import LocalInputFile
+from tests.io.test_io import LocalInputFile
def test_read_header(generated_manifest_entry_file: str,
iceberg_manifest_entry_schema: Schema):
diff --git a/python/tests/conftest.py b/python/tests/conftest.py
index 7aa9290390..e2072790f1 100644
--- a/python/tests/conftest.py
+++ b/python/tests/conftest.py
@@ -32,7 +32,7 @@ from urllib.parse import urlparse
import pytest
from pyiceberg import schema
-from pyiceberg.io.base import (
+from pyiceberg.io import (
FileIO,
InputFile,
OutputFile,
@@ -53,7 +53,7 @@ from pyiceberg.types import (
StructType,
)
from tests.catalog.test_base import InMemoryCatalog
-from tests.io.test_io_base import LocalInputFile
+from tests.io.test_io import LocalInputFile
class FooStruct:
diff --git a/python/tests/io/test_io_base.py b/python/tests/io/test_io.py
similarity index 99%
rename from python/tests/io/test_io_base.py
rename to python/tests/io/test_io.py
index 3165c4099a..c9bc60cd22 100644
--- a/python/tests/io/test_io_base.py
+++ b/python/tests/io/test_io.py
@@ -22,7 +22,7 @@ from urllib.parse import ParseResult, urlparse
import pytest
-from pyiceberg.io.base import (
+from pyiceberg.io import (
FileIO,
InputFile,
InputStream,
diff --git a/python/tests/io/test_pyarrow.py b/python/tests/io/test_pyarrow.py
index 67bdee4427..43dcd22a8d 100644
--- a/python/tests/io/test_pyarrow.py
+++ b/python/tests/io/test_pyarrow.py
@@ -23,7 +23,7 @@ from unittest.mock import MagicMock, patch
import pytest
from pyarrow.fs import FileType
-from pyiceberg.io.base import InputStream, OutputStream
+from pyiceberg.io import InputStream, OutputStream
from pyiceberg.io.pyarrow import PyArrowFile, PyArrowFileIO
diff --git a/python/tests/utils/test_manifest.py
b/python/tests/utils/test_manifest.py
index 289989bed3..66b70a2dc0 100644
--- a/python/tests/utils/test_manifest.py
+++ b/python/tests/utils/test_manifest.py
@@ -23,7 +23,7 @@ from pyiceberg.manifest import (
read_manifest_entry,
read_manifest_list,
)
-from tests.io.test_io_base import LocalInputFile
+from tests.io.test_io import LocalInputFile
def test_read_manifest_entry(generated_manifest_entry_file: str):