(iceberg-python) branch main updated: Remove 0.11.0 deprecated methods (#2983)

kevinjqliu Thu, 29 Jan 2026 20:38:43 -0800

This is an automated email from the ASF dual-hosted git repository.

kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git



The following commit(s) were added to refs/heads/main by this push:
     new 78615d2b Remove 0.11.0 deprecated methods (#2983)
78615d2b is described below

commit 78615d2b80dbaf412ddd132105039c4f879490c0
Author: Alex Stephen <[email protected]>
AuthorDate: Thu Jan 29 20:38:24 2026 -0800

    Remove 0.11.0 deprecated methods (#2983)
    
    <!--
    Thanks for opening a pull request!
    -->
    
    <!-- In the case this PR will resolve an issue, please replace
    ${GITHUB_ISSUE_ID} below with the actual Github issue id. -->
    <!-- Closes #${GITHUB_ISSUE_ID} -->
    
    # Rationale for this change
    We have a couple features that are deprecated in 0.11. We should remove
    them.
    
    ## Are these changes tested?
    Tests should pass.
    
    ## Are there any user-facing changes?
    
    <!-- In the case of user-facing changes, please add the changelog label.
    -->
    
    ---------
    
    Co-authored-by: Kevin Liu <[email protected]>
---
 pyiceberg/io/__init__.py        |  1 -
 pyiceberg/io/pyarrow.py         | 22 -------------------
 pyiceberg/table/snapshots.py    | 47 +----------------------------------------
 tests/integration/test_reads.py | 44 --------------------------------------
 4 files changed, 1 insertion(+), 113 deletions(-)

diff --git a/pyiceberg/io/__init__.py b/pyiceberg/io/__init__.py
index 3df22be7..5592bc99 100644
--- a/pyiceberg/io/__init__.py
+++ b/pyiceberg/io/__init__.py
@@ -99,7 +99,6 @@ GCS_DEFAULT_LOCATION = "gcs.default-bucket-location"
 GCS_VERSION_AWARE = "gcs.version-aware"
 HF_ENDPOINT = "hf.endpoint"
 HF_TOKEN = "hf.token"
-PYARROW_USE_LARGE_TYPES_ON_READ = "pyarrow.use-large-types-on-read"
 
 
 @runtime_checkable
diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py
index 6a50e24d..a120c3b7 100644
--- a/pyiceberg/io/pyarrow.py
+++ b/pyiceberg/io/pyarrow.py
@@ -100,7 +100,6 @@ from pyiceberg.io import (
     HDFS_KERB_TICKET,
     HDFS_PORT,
     HDFS_USER,
-    PYARROW_USE_LARGE_TYPES_ON_READ,
     S3_ACCESS_KEY_ID,
     S3_ANONYMOUS,
     S3_CONNECT_TIMEOUT,
@@ -179,7 +178,6 @@ from pyiceberg.utils.concurrent import ExecutorFactory
 from pyiceberg.utils.config import Config
 from pyiceberg.utils.datetime import millis_to_datetime
 from pyiceberg.utils.decimal import unscaled_to_decimal
-from pyiceberg.utils.deprecated import deprecation_message
 from pyiceberg.utils.properties import get_first_property_value, 
property_as_bool, property_as_int
 from pyiceberg.utils.singleton import Singleton
 from pyiceberg.utils.truncate import truncate_upper_bound_binary_string, 
truncate_upper_bound_text_string
@@ -1756,14 +1754,6 @@ class ArrowScan:
             (pa.Table.from_batches([batch]) for batch in 
itertools.chain([first_batch], batches)), promote_options="permissive"
         )
 
-        if property_as_bool(self._io.properties, 
PYARROW_USE_LARGE_TYPES_ON_READ, False):
-            deprecation_message(
-                deprecated_in="0.10.0",
-                removed_in="0.11.0",
-                help_message=f"Property `{PYARROW_USE_LARGE_TYPES_ON_READ}` 
will be removed.",
-            )
-            result = result.cast(arrow_schema)
-
         return result
 
     def to_record_batches(self, tasks: Iterable[FileScanTask]) -> 
Iterator[pa.RecordBatch]:
@@ -1872,7 +1862,6 @@ class 
ArrowProjectionVisitor(SchemaWithPartnerVisitor[pa.Array, pa.Array | None]
     _file_schema: Schema
     _include_field_ids: bool
     _downcast_ns_timestamp_to_us: bool
-    _use_large_types: bool | None
     _projected_missing_fields: dict[int, Any]
     _allow_timestamp_tz_mismatch: bool
 
@@ -1881,26 +1870,17 @@ class 
ArrowProjectionVisitor(SchemaWithPartnerVisitor[pa.Array, pa.Array | None]
         file_schema: Schema,
         downcast_ns_timestamp_to_us: bool = False,
         include_field_ids: bool = False,
-        use_large_types: bool | None = None,
         projected_missing_fields: dict[int, Any] = EMPTY_DICT,
         allow_timestamp_tz_mismatch: bool = False,
     ) -> None:
         self._file_schema = file_schema
         self._include_field_ids = include_field_ids
         self._downcast_ns_timestamp_to_us = downcast_ns_timestamp_to_us
-        self._use_large_types = use_large_types
         self._projected_missing_fields = projected_missing_fields
         # When True, allows projecting timestamptz (UTC) to timestamp (no tz).
         # Allowed for reading (aligns with Spark); disallowed for writing to 
enforce Iceberg spec's strict typing.
         self._allow_timestamp_tz_mismatch = allow_timestamp_tz_mismatch
 
-        if use_large_types is not None:
-            deprecation_message(
-                deprecated_in="0.10.0",
-                removed_in="0.11.0",
-                help_message="Argument `use_large_types` will be removed from 
ArrowProjectionVisitor",
-            )
-
     def _cast_if_needed(self, field: NestedField, values: pa.Array) -> 
pa.Array:
         file_field = self._file_schema.find_field(field.field_id)
 
@@ -1949,8 +1929,6 @@ class 
ArrowProjectionVisitor(SchemaWithPartnerVisitor[pa.Array, pa.Array | None]
                 target_schema = schema_to_pyarrow(
                     promote(file_field.field_type, field.field_type), 
include_field_ids=self._include_field_ids
                 )
-                if self._use_large_types is False:
-                    target_schema = 
_pyarrow_schema_ensure_small_types(target_schema)
                 return values.cast(target_schema)
 
         return values
diff --git a/pyiceberg/table/snapshots.py b/pyiceberg/table/snapshots.py
index a64cf7bd..7e4c6eb1 100644
--- a/pyiceberg/table/snapshots.py
+++ b/pyiceberg/table/snapshots.py
@@ -29,7 +29,6 @@ from pyiceberg.io import FileIO
 from pyiceberg.manifest import DataFile, DataFileContent, ManifestFile, 
_manifests
 from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
 from pyiceberg.schema import Schema
-from pyiceberg.utils.deprecated import deprecation_message
 
 if TYPE_CHECKING:
     from pyiceberg.table.metadata import TableMetadata
@@ -344,54 +343,10 @@ class SnapshotSummaryCollector:
         return ",".join([f"{prop}={val}" for prop, val in 
update_metrics.to_dict().items()])
 
 
-def _truncate_table_summary(summary: Summary, previous_summary: Mapping[str, 
str]) -> Summary:
-    for prop in {
-        TOTAL_DATA_FILES,
-        TOTAL_DELETE_FILES,
-        TOTAL_RECORDS,
-        TOTAL_FILE_SIZE,
-        TOTAL_POSITION_DELETES,
-        TOTAL_EQUALITY_DELETES,
-    }:
-        summary[prop] = "0"
-
-    def get_prop(prop: str) -> int:
-        value = previous_summary.get(prop) or "0"
-        try:
-            return int(value)
-        except ValueError as e:
-            raise ValueError(f"Could not parse summary property {prop} to an 
int: {value}") from e
-
-    if value := get_prop(TOTAL_DATA_FILES):
-        summary[DELETED_DATA_FILES] = str(value)
-    if value := get_prop(TOTAL_DELETE_FILES):
-        summary[REMOVED_DELETE_FILES] = str(value)
-    if value := get_prop(TOTAL_RECORDS):
-        summary[DELETED_RECORDS] = str(value)
-    if value := get_prop(TOTAL_FILE_SIZE):
-        summary[REMOVED_FILE_SIZE] = str(value)
-    if value := get_prop(TOTAL_POSITION_DELETES):
-        summary[REMOVED_POSITION_DELETES] = str(value)
-    if value := get_prop(TOTAL_EQUALITY_DELETES):
-        summary[REMOVED_EQUALITY_DELETES] = str(value)
-
-    return summary
-
-
-def update_snapshot_summaries(
-    summary: Summary, previous_summary: Mapping[str, str] | None = None, 
truncate_full_table: bool = False
-) -> Summary:
+def update_snapshot_summaries(summary: Summary, previous_summary: Mapping[str, 
str] | None = None) -> Summary:
     if summary.operation not in {Operation.APPEND, Operation.OVERWRITE, 
Operation.DELETE}:
         raise ValueError(f"Operation not implemented: {summary.operation}")
 
-    if truncate_full_table and summary.operation == Operation.OVERWRITE and 
previous_summary is not None:
-        deprecation_message(
-            deprecated_in="0.10.0",
-            removed_in="0.11.0",
-            help_message="The truncate-full-table shouldn't be used.",
-        )
-        summary = _truncate_table_summary(summary, previous_summary)
-
     if not previous_summary:
         previous_summary = {
             TOTAL_DATA_FILES: "0",
diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py
index c123ddb7..fb63bee4 100644
--- a/tests/integration/test_reads.py
+++ b/tests/integration/test_reads.py
@@ -45,7 +45,6 @@ from pyiceberg.expressions import (
     NotNaN,
     NotNull,
 )
-from pyiceberg.io import PYARROW_USE_LARGE_TYPES_ON_READ
 from pyiceberg.io.pyarrow import (
     pyarrow_to_schema,
 )
@@ -1125,49 +1124,6 @@ def test_table_scan_keep_types(catalog: Catalog) -> None:
     assert result_table.schema.equals(expected_schema)
 
 
[email protected]
[email protected]("catalog", 
[pytest.lazy_fixture("session_catalog_hive"), 
pytest.lazy_fixture("session_catalog")])
-def test_table_scan_override_with_small_types(catalog: Catalog) -> None:
-    identifier = "default.test_table_scan_override_with_small_types"
-    arrow_table = pa.Table.from_arrays(
-        [
-            pa.array(["a", "b", "c"]),
-            pa.array(["a", "b", "c"]),
-            pa.array([b"a", b"b", b"c"]),
-            pa.array([["a", "b"], ["c", "d"], ["e", "f"]]),
-        ],
-        names=["string", "string-to-binary", "binary", "list"],
-    )
-
-    try:
-        catalog.drop_table(identifier)
-    except NoSuchTableError:
-        pass
-
-    tbl = catalog.create_table(
-        identifier,
-        schema=arrow_table.schema,
-    )
-
-    tbl.append(arrow_table)
-
-    with tbl.update_schema() as update_schema:
-        update_schema.update_column("string-to-binary", BinaryType())
-
-    tbl.io.properties[PYARROW_USE_LARGE_TYPES_ON_READ] = "False"
-    result_table = tbl.scan().to_arrow()
-
-    expected_schema = pa.schema(
-        [
-            pa.field("string", pa.string()),
-            pa.field("string-to-binary", pa.large_binary()),
-            pa.field("binary", pa.binary()),
-            pa.field("list", pa.list_(pa.string())),
-        ]
-    )
-    assert result_table.schema.equals(expected_schema)
-
-
 @pytest.mark.integration
 @pytest.mark.parametrize("catalog", 
[pytest.lazy_fixture("session_catalog_hive"), 
pytest.lazy_fixture("session_catalog")])
 def test_empty_scan_ordered_str(catalog: Catalog) -> None:

(iceberg-python) branch main updated: Remove 0.11.0 deprecated methods (#2983)

Reply via email to