This is an automated email from the ASF dual-hosted git repository.
kevinjqliu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new ce28e004 Make Literal Pydantic serializeable (#2575)
ce28e004 is described below
commit ce28e0045a9949304d0a80fa0c518509a61ab158
Author: Fokko Driesprong <[email protected]>
AuthorDate: Tue Oct 7 03:45:01 2025 +0200
Make Literal Pydantic serializeable (#2575)
# Rationale for this change
Resolves #2572
## Are these changes tested?
## Are there any user-facing changes?
<!-- In the case of user-facing changes, please add the changelog label.
-->
---
pyiceberg/expressions/__init__.py | 2 +-
pyiceberg/expressions/literals.py | 49 ++++++++++++++++++++++++++++---------
tests/expressions/test_evaluator.py | 4 +--
tests/expressions/test_literals.py | 28 ++++++++++++++++-----
4 files changed, 63 insertions(+), 20 deletions(-)
diff --git a/pyiceberg/expressions/__init__.py
b/pyiceberg/expressions/__init__.py
index c3b5ae74..a8c0fdf4 100644
--- a/pyiceberg/expressions/__init__.py
+++ b/pyiceberg/expressions/__init__.py
@@ -696,7 +696,7 @@ class In(SetPredicate[L]):
if count == 0:
return AlwaysFalse()
elif count == 1:
- return EqualTo(term, next(iter(literals))) # type: ignore
+ return EqualTo(term, next(iter(literals)))
else:
return super().__new__(cls)
diff --git a/pyiceberg/expressions/literals.py
b/pyiceberg/expressions/literals.py
index 921e24e2..0847f19c 100644
--- a/pyiceberg/expressions/literals.py
+++ b/pyiceberg/expressions/literals.py
@@ -30,7 +30,9 @@ from math import isnan
from typing import Any, Generic, Type
from uuid import UUID
-from pyiceberg.typedef import L
+from pydantic import Field, model_serializer
+
+from pyiceberg.typedef import IcebergRootModel, L
from pyiceberg.types import (
BinaryType,
BooleanType,
@@ -52,7 +54,9 @@ from pyiceberg.utils.datetime import (
date_str_to_days,
date_to_days,
datetime_to_micros,
+ days_to_date,
micros_to_days,
+ micros_to_timestamp,
time_str_to_micros,
time_to_micros,
timestamp_to_micros,
@@ -64,21 +68,24 @@ from pyiceberg.utils.singleton import Singleton
UUID_BYTES_LENGTH = 16
-class Literal(Generic[L], ABC):
+class Literal(IcebergRootModel[L], Generic[L], ABC): # type: ignore
"""Literal which has a value and can be converted between types."""
- _value: L
+ root: L = Field()
+
+ def __init__(self, value: L, value_type: Type[L], /, **data): # type:
ignore
+ if value is None:
+ raise TypeError("Invalid literal value: None")
- def __init__(self, value: L, value_type: Type[L]):
+ super().__init__(value)
if value is None or not isinstance(value, value_type):
raise TypeError(f"Invalid literal value: {value!r} (not a
{value_type})")
if isinstance(value, float) and isnan(value):
raise ValueError("Cannot create expression literal from NaN.")
- self._value = value
@property
def value(self) -> L:
- return self._value
+ return self.root
@singledispatchmethod
@abstractmethod
@@ -136,7 +143,7 @@ def literal(value: L) -> Literal[L]:
LongLiteral(123)
"""
if isinstance(value, float):
- return DoubleLiteral(value) # type: ignore
+ return DoubleLiteral(value)
elif isinstance(value, bool):
return BooleanLiteral(value)
elif isinstance(value, int):
@@ -144,17 +151,17 @@ def literal(value: L) -> Literal[L]:
elif isinstance(value, str):
return StringLiteral(value)
elif isinstance(value, UUID):
- return UUIDLiteral(value.bytes) # type: ignore
+ return UUIDLiteral(value.bytes)
elif isinstance(value, bytes):
return BinaryLiteral(value)
elif isinstance(value, Decimal):
return DecimalLiteral(value)
elif isinstance(value, datetime):
- return TimestampLiteral(datetime_to_micros(value)) # type: ignore
+ return TimestampLiteral(datetime_to_micros(value))
elif isinstance(value, date):
- return DateLiteral(date_to_days(value)) # type: ignore
+ return DateLiteral(date_to_days(value))
elif isinstance(value, time):
- return TimeLiteral(time_to_micros(value)) # type: ignore
+ return TimeLiteral(time_to_micros(value))
else:
raise TypeError(f"Invalid literal value: {repr(value)}")
@@ -411,6 +418,10 @@ class DateLiteral(Literal[int]):
def __init__(self, value: int) -> None:
super().__init__(value, int)
+ @model_serializer
+ def ser_model(self) -> date:
+ return days_to_date(self.root)
+
def increment(self) -> Literal[int]:
return DateLiteral(self.value + 1)
@@ -443,6 +454,10 @@ class TimestampLiteral(Literal[int]):
def __init__(self, value: int) -> None:
super().__init__(value, int)
+ @model_serializer
+ def ser_model(self) -> str:
+ return micros_to_timestamp(self.root).isoformat()
+
def increment(self) -> Literal[int]:
return TimestampLiteral(self.value + 1)
@@ -635,6 +650,10 @@ class UUIDLiteral(Literal[bytes]):
def __init__(self, value: bytes) -> None:
super().__init__(value, bytes)
+ @model_serializer
+ def ser_model(self) -> UUID:
+ return UUID(bytes=self.root)
+
@singledispatchmethod
def to(self, type_var: IcebergType) -> Literal: # type: ignore
raise TypeError(f"Cannot convert UUIDLiteral into {type_var}")
@@ -661,6 +680,10 @@ class FixedLiteral(Literal[bytes]):
def __init__(self, value: bytes) -> None:
super().__init__(value, bytes)
+ @model_serializer
+ def ser_model(self) -> str:
+ return self.root.hex()
+
@singledispatchmethod
def to(self, type_var: IcebergType) -> Literal: # type: ignore
raise TypeError(f"Cannot convert FixedLiteral into {type_var}")
@@ -692,6 +715,10 @@ class BinaryLiteral(Literal[bytes]):
def __init__(self, value: bytes) -> None:
super().__init__(value, bytes)
+ @model_serializer
+ def ser_model(self) -> str:
+ return self.root.hex()
+
@singledispatchmethod
def to(self, type_var: IcebergType) -> Literal: # type: ignore
raise TypeError(f"Cannot convert BinaryLiteral into {type_var}")
diff --git a/tests/expressions/test_evaluator.py
b/tests/expressions/test_evaluator.py
index 7b150991..cfc32d9b 100644
--- a/tests/expressions/test_evaluator.py
+++ b/tests/expressions/test_evaluator.py
@@ -683,7 +683,7 @@ def data_file_nan() -> DataFile:
def
test_inclusive_metrics_evaluator_less_than_and_less_than_equal(schema_data_file_nan:
Schema, data_file_nan: DataFile) -> None:
- for operator in [LessThan, LessThanOrEqual]:
+ for operator in [LessThan, LessThanOrEqual]: # type: ignore
should_read = _InclusiveMetricsEvaluator(schema_data_file_nan,
operator("all_nan", 1)).eval(data_file_nan) # type: ignore[arg-type]
assert not should_read, "Should not match: all nan column doesn't
contain number"
@@ -711,7 +711,7 @@ def
test_inclusive_metrics_evaluator_less_than_and_less_than_equal(schema_data_f
def test_inclusive_metrics_evaluator_greater_than_and_greater_than_equal(
schema_data_file_nan: Schema, data_file_nan: DataFile
) -> None:
- for operator in [GreaterThan, GreaterThanOrEqual]:
+ for operator in [GreaterThan, GreaterThanOrEqual]: # type: ignore
should_read = _InclusiveMetricsEvaluator(schema_data_file_nan,
operator("all_nan", 1)).eval(data_file_nan) # type: ignore[arg-type]
assert not should_read, "Should not match: all nan column doesn't
contain number"
diff --git a/tests/expressions/test_literals.py
b/tests/expressions/test_literals.py
index 4d8f5557..2137681e 100644
--- a/tests/expressions/test_literals.py
+++ b/tests/expressions/test_literals.py
@@ -319,8 +319,8 @@ def test_string_to_time_literal() -> None:
avro_val = 51661919000
- assert isinstance(time_lit, TimeLiteral) # type: ignore
- assert avro_val == time_lit.value # type: ignore
+ assert isinstance(time_lit, TimeLiteral)
+ assert avro_val == time_lit.value
def test_string_to_timestamp_literal() -> None:
@@ -428,8 +428,8 @@ def test_python_date_conversion() -> None:
from_str_lit = literal(one_day_str).to(DateType())
- assert isinstance(from_str_lit, DateLiteral) # type: ignore
- assert from_str_lit.value == 19079 # type: ignore
+ assert isinstance(from_str_lit, DateLiteral)
+ assert from_str_lit.value == 19079
@pytest.mark.parametrize(
@@ -911,7 +911,7 @@ def test_uuid_to_fixed() -> None:
with pytest.raises(TypeError) as e:
uuid_literal.to(FixedType(15))
assert "Cannot convert UUIDLiteral into fixed[15], different length: 15 <>
16" in str(e.value)
- assert isinstance(fixed_literal, FixedLiteral) # type: ignore
+ assert isinstance(fixed_literal, FixedLiteral)
def test_uuid_to_binary() -> None:
@@ -919,7 +919,7 @@ def test_uuid_to_binary() -> None:
uuid_literal = literal(test_uuid)
binary_literal = uuid_literal.to(BinaryType())
assert test_uuid.bytes == binary_literal.value
- assert isinstance(binary_literal, BinaryLiteral) # type: ignore
+ assert isinstance(binary_literal, BinaryLiteral)
def test_literal_from_datetime() -> None:
@@ -930,6 +930,22 @@ def test_literal_from_date() -> None:
assert isinstance(literal(datetime.date.today()), DateLiteral)
+def test_to_json() -> None:
+ assert literal(True).model_dump_json() == "true"
+ assert literal(float(123)).model_dump_json() == "123.0"
+ assert literal(123).model_dump_json() == "123"
+ assert literal("vo").model_dump_json() == '"vo"'
+ assert (
+
literal(uuid.UUID("f79c3e09-677c-4bbd-a479-3f349cb785e7")).model_dump_json() ==
'"f79c3e09-677c-4bbd-a479-3f349cb785e7"'
+ )
+ assert literal(bytes([0x01, 0x02, 0x03])).model_dump_json() == '"010203"'
+ assert literal(Decimal("19.25")).model_dump_json() == '"19.25"'
+ assert
literal(datetime.date.fromisoformat("2022-03-28")).model_dump_json() ==
'"2022-03-28"'
+ assert (
+
literal(datetime.datetime.fromisoformat("1970-11-22T00:00:00.000000+00:00")).model_dump_json()
== '"1970-11-22T00:00:00"'
+ )
+
+
# __ __ ___
# | \/ |_ _| _ \_ _
# | |\/| | || | _/ || |