This is an automated email from the ASF dual-hosted git repository.

Fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 75cd77fda Fix precision loss in large integral string conversions 
(#3405)
75cd77fda is described below

commit 75cd77fda94a6bdd242c8d892d2bae5db595653c
Author: Minh Vu <[email protected]>
AuthorDate: Mon May 25 22:03:47 2026 +0200

    Fix precision loss in large integral string conversions (#3405)
    
    ## Summary
    
    Fixes precision loss when converting large integral strings in two
    runtime paths:
    
    - `StringLiteral.to(IntegerType/LongType)`
    - `partition_to_py(...)` for integral and time-based partition values
    backed by integers
    
    ## Root cause
    
    Both paths were converting through `float` before converting to `int`,
    which loses precision for values outside the IEEE-754 exact integer
    range.
    
    That caused valid 64-bit integers like `LongType.max` and
    `9007199254740993` to be corrupted.
    
    ## What changed
    
    - Replaced `int(float(...))` with exact integer parsing in
    `partition_to_py`
    - For `StringLiteral.to(IntegerType/LongType)`, exact integral strings
    now use exact integer parsing while fractional numeric strings retain
    the existing truncation behavior
    - Added regression tests for `LongType.max` and `9007199254740993`
    
    ## Validation
    
    - `uv run pytest tests/expressions/test_literals.py
    tests/test_conversions.py`
    
    Closes #3404.
---
 pyiceberg/conversions.py           |  2 +-
 pyiceberg/expressions/literals.py  | 19 +++++++++++++------
 tests/expressions/test_literals.py | 33 +++++++++++++++++++++++++++++++++
 tests/test_conversions.py          |  3 +++
 4 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/pyiceberg/conversions.py b/pyiceberg/conversions.py
index 42d996f75..268cbb93e 100644
--- a/pyiceberg/conversions.py
+++ b/pyiceberg/conversions.py
@@ -143,7 +143,7 @@ def _(primitive_type: PrimitiveType, value_str: str) -> int:
     _, _, exponent = Decimal(value_str).as_tuple()
     if exponent != 0:  # Raise if there are digits to the right of the decimal
         raise ValueError(f"Cannot convert partition value, value cannot have 
fractional digits for {primitive_type} partition")
-    return int(float(value_str))
+    return int(value_str)
 
 
 @partition_to_py.register(FloatType)
diff --git a/pyiceberg/expressions/literals.py 
b/pyiceberg/expressions/literals.py
index 5bf70990b..50c6d2d61 100644
--- a/pyiceberg/expressions/literals.py
+++ b/pyiceberg/expressions/literals.py
@@ -68,6 +68,13 @@ from pyiceberg.utils.singleton import Singleton
 UUID_BYTES_LENGTH = 16
 
 
+def _parse_numeric_string(value: str) -> Decimal:
+    number = Decimal(value)
+    if not number.is_finite():
+        raise ValueError(f"Cannot convert non-finite numeric string: {value}")
+    return number
+
+
 class Literal(IcebergRootModel[L], Generic[L], ABC):  # type: ignore
     """Literal which has a value and can be converted between types."""
 
@@ -555,27 +562,27 @@ class StringLiteral(Literal[str]):
     @to.register(IntegerType)
     def _(self, type_var: IntegerType) -> Literal[int]:
         try:
-            number = int(float(self.value))
+            number = _parse_numeric_string(self.value)
 
             if IntegerType.max < number:
                 return IntAboveMax()
             elif IntegerType.min > number:
                 return IntBelowMin()
-            return LongLiteral(number)
-        except ValueError as e:
+            return LongLiteral(int(number))
+        except (ArithmeticError, OverflowError, ValueError) as e:
             raise ValueError(f"Could not convert {self.value} into a 
{type_var}") from e
 
     @to.register(LongType)
     def _(self, type_var: LongType) -> Literal[int]:
         try:
-            long_value = int(float(self.value))
+            long_value = _parse_numeric_string(self.value)
             if LongType.max < long_value:
                 return LongAboveMax()
             elif LongType.min > long_value:
                 return LongBelowMin()
             else:
-                return LongLiteral(long_value)
-        except (TypeError, ValueError) as e:
+                return LongLiteral(int(long_value))
+        except (ArithmeticError, OverflowError, TypeError, ValueError) as e:
             raise ValueError(f"Could not convert {self.value} into a 
{type_var}") from e
 
     @to.register(DateType)
diff --git a/tests/expressions/test_literals.py 
b/tests/expressions/test_literals.py
index c3ace5d36..7a129685b 100644
--- a/tests/expressions/test_literals.py
+++ b/tests/expressions/test_literals.py
@@ -39,6 +39,7 @@ from pyiceberg.expressions.literals import (
     IntAboveMax,
     IntBelowMin,
     Literal,
+    LongAboveMax,
     LongLiteral,
     StringLiteral,
     TimeLiteral,
@@ -845,6 +846,38 @@ def test_string_to_int_min_value() -> None:
     assert isinstance(literal(str(IntegerType.min - 1)).to(IntegerType()), 
IntBelowMin)
 
 
+def test_string_to_long_max_value_without_precision_loss() -> None:
+    assert literal(str(LongType.max)).to(LongType()) == literal(LongType.max)
+
+
+def test_string_to_long_large_integer_without_precision_loss() -> None:
+    assert literal("9007199254740993").to(LongType()) == 
literal(9007199254740993)
+
+
+def test_string_to_long_decimal_like_integer_without_precision_loss() -> None:
+    assert literal("9007199254740993.0").to(LongType()) == 
literal(9007199254740993)
+
+
+def test_string_to_long_scientific_notation_integer_without_precision_loss() 
-> None:
+    assert literal("9007199254740993e0").to(LongType()) == 
literal(9007199254740993)
+
+
+def test_string_to_long_max_decimal_like_integer_without_precision_loss() -> 
None:
+    assert literal(f"{LongType.max}.0").to(LongType()) == literal(LongType.max)
+
+
+def test_string_to_integer_scientific_notation_without_regression() -> None:
+    assert literal("1e3").to(IntegerType()) == literal(1000)
+
+
+def test_string_to_integer_large_scientific_notation_above_max() -> None:
+    assert isinstance(literal("1e1000000").to(IntegerType()), IntAboveMax)
+
+
+def test_string_to_long_large_scientific_notation_above_max() -> None:
+    assert isinstance(literal("1e1000000").to(LongType()), LongAboveMax)
+
+
 def test_string_to_integer_type_invalid_value() -> None:
     with pytest.raises(ValueError) as e:
         _ = literal("abc").to(IntegerType())
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index e38bdbd6f..9b73b2db8 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -160,8 +160,11 @@ def test_unscaled_to_decimal(unscaled: int, scale: int, 
expected_result: Decimal
         (IntegerType(), "1", 1),
         (IntegerType(), "9999", 9999),
         (LongType(), "123456789", 123456789),
+        (LongType(), "9007199254740993", 9007199254740993),
+        (LongType(), str(LongType.max), LongType.max),
         (FloatType(), "1.1", 1.1),
         (DoubleType(), "99999.9", 99999.9),
+        (TimestampNanoType(), "9007199254740993", 9007199254740993),
         (DecimalType(5, 2), "123.45", Decimal("123.45")),
         (StringType(), "foo", "foo"),
         (UUIDType(), "f79c3e09-677c-4bbd-a479-3f349cb785e7", 
uuid.UUID("f79c3e09-677c-4bbd-a479-3f349cb785e7")),

Reply via email to