This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 517b930d Make SetPredicate and Subclasses JSON Serializable with 
Pydantic (#2557)
517b930d is described below

commit 517b930da76c68021f849b4d6ff5dcafe4e6e15e
Author: Aniket <[email protected]>
AuthorDate: Thu Oct 9 23:19:13 2025 +0530

    Make SetPredicate and Subclasses JSON Serializable with Pydantic (#2557)
    
    #2524
    
    This PR addresses issue by making the `SetPredicate` class and its
    subclasses (In, NotIn) JSON serializable using Pydantic.
    - Added tests to verify JSON serialization of `In` and `NotIn`
    predicates.
    
    Please let me know if my approach or fix needs any improvements . I’m
    open to feedback and happy to make changes based on suggestions.
    Thank you !
    
    ---------
    
    Co-authored-by: Fokko Driesprong <[email protected]>
---
 pyiceberg/expressions/__init__.py     | 22 +++++++++++++++++-----
 tests/expressions/test_expressions.py | 10 ++++++++++
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/pyiceberg/expressions/__init__.py 
b/pyiceberg/expressions/__init__.py
index a8c0fdf4..d0824cc3 100644
--- a/pyiceberg/expressions/__init__.py
+++ b/pyiceberg/expressions/__init__.py
@@ -31,6 +31,7 @@ from typing import (
     TypeVar,
     Union,
 )
+from typing import Literal as TypingLiteral
 
 from pydantic import Field
 
@@ -41,10 +42,15 @@ from pyiceberg.expressions.literals import (
     literal,
 )
 from pyiceberg.schema import Accessor, Schema
-from pyiceberg.typedef import IcebergRootModel, L, StructProtocol
+from pyiceberg.typedef import IcebergBaseModel, IcebergRootModel, L, 
StructProtocol
 from pyiceberg.types import DoubleType, FloatType, NestedField
 from pyiceberg.utils.singleton import Singleton
 
+try:
+    from pydantic import ConfigDict
+except ImportError:
+    ConfigDict = dict
+
 
 def _to_unbound_term(term: Union[str, UnboundTerm[Any]]) -> UnboundTerm[Any]:
     return Reference(term) if isinstance(term, str) else term
@@ -571,12 +577,14 @@ class NotNaN(UnaryPredicate):
         return BoundNotNaN[L]
 
 
-class SetPredicate(UnboundPredicate[L], ABC):
-    literals: Set[Literal[L]]
+class SetPredicate(IcebergBaseModel, UnboundPredicate[L], ABC):
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    type: TypingLiteral["in", "not-in"] = Field(default="in")
+    literals: Set[Literal[L]] = Field(alias="items")
 
     def __init__(self, term: Union[str, UnboundTerm[Any]], literals: 
Union[Iterable[L], Iterable[Literal[L]]]):
-        super().__init__(term)
-        self.literals = _to_literal_set(literals)
+        super().__init__(term=_to_unbound_term(term), 
items=_to_literal_set(literals))  # type: ignore
 
     def bind(self, schema: Schema, case_sensitive: bool = True) -> 
BoundSetPredicate[L]:
         bound_term = self.term.bind(schema, case_sensitive)
@@ -688,6 +696,8 @@ class BoundNotIn(BoundSetPredicate[L]):
 
 
 class In(SetPredicate[L]):
+    type: TypingLiteral["in"] = Field(default="in", alias="type")
+
     def __new__(  # type: ignore  # pylint: disable=W0221
         cls, term: Union[str, UnboundTerm[Any]], literals: Union[Iterable[L], 
Iterable[Literal[L]]]
     ) -> BooleanExpression:
@@ -710,6 +720,8 @@ class In(SetPredicate[L]):
 
 
 class NotIn(SetPredicate[L], ABC):
+    type: TypingLiteral["not-in"] = Field(default="not-in", alias="type")
+
     def __new__(  # type: ignore  # pylint: disable=W0221
         cls, term: Union[str, UnboundTerm[Any]], literals: Union[Iterable[L], 
Iterable[Literal[L]]]
     ) -> BooleanExpression:
diff --git a/tests/expressions/test_expressions.py 
b/tests/expressions/test_expressions.py
index bcbf25a1..5a0c8c92 100644
--- a/tests/expressions/test_expressions.py
+++ b/tests/expressions/test_expressions.py
@@ -873,6 +873,16 @@ def test_not_in() -> None:
     assert not_in == pickle.loads(pickle.dumps(not_in))
 
 
+def test_serialize_in() -> None:
+    pred = In(term="foo", literals=[1, 2, 3])
+    assert pred.model_dump_json() == 
'{"term":"foo","type":"in","items":[1,2,3]}'
+
+
+def test_serialize_not_in() -> None:
+    pred = NotIn(term="foo", literals=[1, 2, 3])
+    assert pred.model_dump_json() == 
'{"term":"foo","type":"not-in","items":[1,2,3]}'
+
+
 def test_bound_equal_to(term: BoundReference[Any]) -> None:
     bound_equal_to = BoundEqualTo(term, literal("a"))
     assert str(bound_equal_to) == f"BoundEqualTo(term={str(term)}, 
literal=literal('a'))"

Reply via email to