This is an automated email from the ASF dual-hosted git repository.
kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 78615d2b Remove 0.11.0 deprecated methods (#2983)
78615d2b is described below
commit 78615d2b80dbaf412ddd132105039c4f879490c0
Author: Alex Stephen <[email protected]>
AuthorDate: Thu Jan 29 20:38:24 2026 -0800
Remove 0.11.0 deprecated methods (#2983)
<!--
Thanks for opening a pull request!
-->
<!-- In the case this PR will resolve an issue, please replace
${GITHUB_ISSUE_ID} below with the actual Github issue id. -->
<!-- Closes #${GITHUB_ISSUE_ID} -->
# Rationale for this change
We have a couple features that are deprecated in 0.11. We should remove
them.
## Are these changes tested?
Tests should pass.
## Are there any user-facing changes?
<!-- In the case of user-facing changes, please add the changelog label.
-->
---------
Co-authored-by: Kevin Liu <[email protected]>
---
pyiceberg/io/__init__.py | 1 -
pyiceberg/io/pyarrow.py | 22 -------------------
pyiceberg/table/snapshots.py | 47 +----------------------------------------
tests/integration/test_reads.py | 44 --------------------------------------
4 files changed, 1 insertion(+), 113 deletions(-)
diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py
index 3df22be7..5592bc99 100644
--- a/pyiceberg/io/__init__.py
+++ b/pyiceberg/io/__init__.py
@@ -99,7 +99,6 @@ GCS_DEFAULT_LOCATION = "gcs.default-bucket-location"
GCS_VERSION_AWARE = "gcs.version-aware"
HF_ENDPOINT = "hf.endpoint"
HF_TOKEN = "hf.token"
-PYARROW_USE_LARGE_TYPES_ON_READ = "pyarrow.use-large-types-on-read"
@runtime_checkable
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index 6a50e24d..a120c3b7 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -100,7 +100,6 @@ from pyiceberg.io import (
HDFS_KERB_TICKET,
HDFS_PORT,
HDFS_USER,
- PYARROW_USE_LARGE_TYPES_ON_READ,
S3_ACCESS_KEY_ID,
S3_ANONYMOUS,
S3_CONNECT_TIMEOUT,
@@ -179,7 +178,6 @@ from pyiceberg.utils.concurrent import ExecutorFactory
from pyiceberg.utils.config import Config
from pyiceberg.utils.datetime import millis_to_datetime
from pyiceberg.utils.decimal import unscaled_to_decimal
-from pyiceberg.utils.deprecated import deprecation_message
from pyiceberg.utils.properties import get_first_property_value,
property_as_bool, property_as_int
from pyiceberg.utils.singleton import Singleton
from pyiceberg.utils.truncate import truncate_upper_bound_binary_string,
truncate_upper_bound_text_string
@@ -1756,14 +1754,6 @@ class ArrowScan:
(pa.Table.from_batches([batch]) for batch in
itertools.chain([first_batch], batches)), promote_options="permissive"
)
- if property_as_bool(self._io.properties,
PYARROW_USE_LARGE_TYPES_ON_READ, False):
- deprecation_message(
- deprecated_in="0.10.0",
- removed_in="0.11.0",
- help_message=f"Property `{PYARROW_USE_LARGE_TYPES_ON_READ}`
will be removed.",
- )
- result = result.cast(arrow_schema)
-
return result
def to_record_batches(self, tasks: Iterable[FileScanTask]) ->
Iterator[pa.RecordBatch]:
@@ -1872,7 +1862,6 @@ class
ArrowProjectionVisitor(SchemaWithPartnerVisitor[pa.Array, pa.Array | None]
_file_schema: Schema
_include_field_ids: bool
_downcast_ns_timestamp_to_us: bool
- _use_large_types: bool | None
_projected_missing_fields: dict[int, Any]
_allow_timestamp_tz_mismatch: bool
@@ -1881,26 +1870,17 @@ class
ArrowProjectionVisitor(SchemaWithPartnerVisitor[pa.Array, pa.Array | None]
file_schema: Schema,
downcast_ns_timestamp_to_us: bool = False,
include_field_ids: bool = False,
- use_large_types: bool | None = None,
projected_missing_fields: dict[int, Any] = EMPTY_DICT,
allow_timestamp_tz_mismatch: bool = False,
) -> None:
self._file_schema = file_schema
self._include_field_ids = include_field_ids
self._downcast_ns_timestamp_to_us = downcast_ns_timestamp_to_us
- self._use_large_types = use_large_types
self._projected_missing_fields = projected_missing_fields
# When True, allows projecting timestamptz (UTC) to timestamp (no tz).
# Allowed for reading (aligns with Spark); disallowed for writing to
enforce Iceberg spec's strict typing.
self._allow_timestamp_tz_mismatch = allow_timestamp_tz_mismatch
- if use_large_types is not None:
- deprecation_message(
- deprecated_in="0.10.0",
- removed_in="0.11.0",
- help_message="Argument `use_large_types` will be removed from
ArrowProjectionVisitor",
- )
-
def _cast_if_needed(self, field: NestedField, values: pa.Array) ->
pa.Array:
file_field = self._file_schema.find_field(field.field_id)
@@ -1949,8 +1929,6 @@ class
ArrowProjectionVisitor(SchemaWithPartnerVisitor[pa.Array, pa.Array | None]
target_schema = schema_to_pyarrow(
promote(file_field.field_type, field.field_type),
include_field_ids=self._include_field_ids
)
- if self._use_large_types is False:
- target_schema =
_pyarrow_schema_ensure_small_types(target_schema)
return values.cast(target_schema)
return values
diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py
index a64cf7bd..7e4c6eb1 100644
--- a/pyiceberg/table/snapshots.py
+++ b/pyiceberg/table/snapshots.py
@@ -29,7 +29,6 @@ from pyiceberg.io import FileIO
from pyiceberg.manifest import DataFile, DataFileContent, ManifestFile,
_manifests
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
from pyiceberg.schema import Schema
-from pyiceberg.utils.deprecated import deprecation_message
if TYPE_CHECKING:
from pyiceberg.table.metadata import TableMetadata
@@ -344,54 +343,10 @@ class SnapshotSummaryCollector:
return ",".join([f"{prop}={val}" for prop, val in
update_metrics.to_dict().items()])
-def _truncate_table_summary(summary: Summary, previous_summary: Mapping[str,
str]) -> Summary:
- for prop in {
- TOTAL_DATA_FILES,
- TOTAL_DELETE_FILES,
- TOTAL_RECORDS,
- TOTAL_FILE_SIZE,
- TOTAL_POSITION_DELETES,
- TOTAL_EQUALITY_DELETES,
- }:
- summary[prop] = "0"
-
- def get_prop(prop: str) -> int:
- value = previous_summary.get(prop) or "0"
- try:
- return int(value)
- except ValueError as e:
- raise ValueError(f"Could not parse summary property {prop} to an
int: {value}") from e
-
- if value := get_prop(TOTAL_DATA_FILES):
- summary[DELETED_DATA_FILES] = str(value)
- if value := get_prop(TOTAL_DELETE_FILES):
- summary[REMOVED_DELETE_FILES] = str(value)
- if value := get_prop(TOTAL_RECORDS):
- summary[DELETED_RECORDS] = str(value)
- if value := get_prop(TOTAL_FILE_SIZE):
- summary[REMOVED_FILE_SIZE] = str(value)
- if value := get_prop(TOTAL_POSITION_DELETES):
- summary[REMOVED_POSITION_DELETES] = str(value)
- if value := get_prop(TOTAL_EQUALITY_DELETES):
- summary[REMOVED_EQUALITY_DELETES] = str(value)
-
- return summary
-
-
-def update_snapshot_summaries(
- summary: Summary, previous_summary: Mapping[str, str] | None = None,
truncate_full_table: bool = False
-) -> Summary:
+def update_snapshot_summaries(summary: Summary, previous_summary: Mapping[str,
str] | None = None) -> Summary:
if summary.operation not in {Operation.APPEND, Operation.OVERWRITE,
Operation.DELETE}:
raise ValueError(f"Operation not implemented: {summary.operation}")
- if truncate_full_table and summary.operation == Operation.OVERWRITE and
previous_summary is not None:
- deprecation_message(
- deprecated_in="0.10.0",
- removed_in="0.11.0",
- help_message="The truncate-full-table shouldn't be used.",
- )
- summary = _truncate_table_summary(summary, previous_summary)
-
if not previous_summary:
previous_summary = {
TOTAL_DATA_FILES: "0",
diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py
index c123ddb7..fb63bee4 100644
--- a/tests/integration/test_reads.py
+++ b/tests/integration/test_reads.py
@@ -45,7 +45,6 @@ from pyiceberg.expressions import (
NotNaN,
NotNull,
)
-from pyiceberg.io import PYARROW_USE_LARGE_TYPES_ON_READ
from pyiceberg.io.pyarrow import (
pyarrow_to_schema,
)
@@ -1125,49 +1124,6 @@ def test_table_scan_keep_types(catalog: Catalog) -> None:
assert result_table.schema.equals(expected_schema)
[email protected]
[email protected]("catalog",
[pytest.lazy_fixture("session_catalog_hive"),
pytest.lazy_fixture("session_catalog")])
-def test_table_scan_override_with_small_types(catalog: Catalog) -> None:
- identifier = "default.test_table_scan_override_with_small_types"
- arrow_table = pa.Table.from_arrays(
- [
- pa.array(["a", "b", "c"]),
- pa.array(["a", "b", "c"]),
- pa.array([b"a", b"b", b"c"]),
- pa.array([["a", "b"], ["c", "d"], ["e", "f"]]),
- ],
- names=["string", "string-to-binary", "binary", "list"],
- )
-
- try:
- catalog.drop_table(identifier)
- except NoSuchTableError:
- pass
-
- tbl = catalog.create_table(
- identifier,
- schema=arrow_table.schema,
- )
-
- tbl.append(arrow_table)
-
- with tbl.update_schema() as update_schema:
- update_schema.update_column("string-to-binary", BinaryType())
-
- tbl.io.properties[PYARROW_USE_LARGE_TYPES_ON_READ] = "False"
- result_table = tbl.scan().to_arrow()
-
- expected_schema = pa.schema(
- [
- pa.field("string", pa.string()),
- pa.field("string-to-binary", pa.large_binary()),
- pa.field("binary", pa.binary()),
- pa.field("list", pa.list_(pa.string())),
- ]
- )
- assert result_table.schema.equals(expected_schema)
-
-
@pytest.mark.integration
@pytest.mark.parametrize("catalog",
[pytest.lazy_fixture("session_catalog_hive"),
pytest.lazy_fixture("session_catalog")])
def test_empty_scan_ordered_str(catalog: Catalog) -> None: