rdblue commented on code in PR #5362:
URL: https://github.com/apache/iceberg/pull/5362#discussion_r932479543
##########
python/pyiceberg/expressions/base.py:
##########
@@ -342,151 +313,197 @@ def __str__(self) -> str:
@dataclass(frozen=True)
-class AlwaysTrue(BooleanExpression, ABC, Singleton):
+class AlwaysTrue(BooleanExpression, Singleton):
"""TRUE expression"""
def __invert__(self) -> AlwaysFalse:
return AlwaysFalse()
@dataclass(frozen=True)
-class AlwaysFalse(BooleanExpression, ABC, Singleton):
+class AlwaysFalse(BooleanExpression, Singleton):
"""FALSE expression"""
def __invert__(self) -> AlwaysTrue:
return AlwaysTrue()
-class IsNull(UnboundPredicate[T]):
- def __invert__(self) -> NotNull:
- return NotNull(self.term)
+@dataclass(frozen=True)
+class UnaryPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+ as_bound: ClassVar[type]
+ term: UnboundTerm[T]
- def _validate_literals(self): # pylint: disable=W0238
- if self.literals is not None:
- raise AttributeError("Null is a unary predicate and takes no
Literals.")
+ @abstractmethod
+ def __invert__(self) -> UnaryPredicate:
+ ...
- def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNull[T]:
- bound_ref = self.term.bind(schema, case_sensitive)
- return BoundIsNull(bound_ref)
+ def bind(self, schema: Schema, case_sensitive: bool = True) ->
BooleanExpression:
+ bound_term = self.term.bind(schema, case_sensitive)
+ return self.as_bound(bound_term)
-class BoundIsNull(BoundPredicate[T]):
- def __invert__(self) -> BoundNotNull:
- return BoundNotNull(self.term)
+@dataclass(frozen=True)
+class BoundUnaryPredicate(Bound[T], BooleanExpression, ABC):
+ term: BoundTerm[T]
- def _validate_literals(self): # pylint: disable=W0238
- if self.literals:
- raise AttributeError("Null is a unary predicate and takes no
Literals.")
+ @abstractmethod
+ def __invert__(self) -> BoundUnaryPredicate:
+ ...
-class NotNull(UnboundPredicate[T]):
- def __invert__(self) -> IsNull:
- return IsNull(self.term)
+class BoundIsNull(BoundUnaryPredicate[T]):
+ def __new__(cls, term: BoundTerm[T]):
+ if term.ref().field.required:
+ return AlwaysFalse()
+ return super().__new__(cls)
- def _validate_literals(self): # pylint: disable=W0238
- if self.literals:
- raise AttributeError("NotNull is a unary predicate and takes no
Literals.")
+ def __invert__(self) -> BoundNotNull:
+ return BoundNotNull(self.term)
- def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNull[T]:
- bound_ref = self.term.bind(schema, case_sensitive)
- return BoundNotNull(bound_ref)
+class BoundNotNull(BoundUnaryPredicate[T]):
+ def __new__(cls, term: BoundTerm[T]):
+ if term.ref().field.required:
+ return AlwaysTrue()
+ return super().__new__(cls)
-class BoundNotNull(BoundPredicate[T]):
def __invert__(self) -> BoundIsNull:
return BoundIsNull(self.term)
- def _validate_literals(self): # pylint: disable=W0238
- if self.literals:
- raise AttributeError("NotNull is a unary predicate and takes no
Literals.")
+class IsNull(UnaryPredicate[T]):
+ as_bound = BoundIsNull
-class IsNaN(UnboundPredicate[T]):
- def __invert__(self) -> NotNaN:
- return NotNaN(self.term)
+ def __invert__(self) -> NotNull:
+ return NotNull(self.term)
- def _validate_literals(self): # pylint: disable=W0238
- if self.literals:
- raise AttributeError("IsNaN is a unary predicate and takes no
Literals.")
- def bind(self, schema: Schema, case_sensitive: bool) -> BoundIsNaN[T]:
- bound_ref = self.term.bind(schema, case_sensitive)
- return BoundIsNaN(bound_ref)
+class NotNull(UnaryPredicate[T]):
+ as_bound = BoundNotNull
+
+ def __invert__(self) -> IsNull:
+ return IsNull(self.term)
+
+class BoundIsNaN(BoundUnaryPredicate[T]):
+ def __new__(cls, term: BoundTerm[T]):
+ bound_type = term.ref().field.field_type
+ if type(bound_type) in {FloatType, DoubleType}:
+ return super().__new__(cls)
+ return AlwaysFalse()
-class BoundIsNaN(BoundPredicate[T]):
def __invert__(self) -> BoundNotNaN:
return BoundNotNaN(self.term)
- def _validate_literals(self): # pylint: disable=W0238
- if self.literals:
- raise AttributeError("IsNaN is a unary predicate and takes no
Literals.")
+
+class BoundNotNaN(BoundUnaryPredicate[T]):
+ def __new__(cls, term: BoundTerm[T]):
+ bound_type = term.ref().field.field_type
+ if type(bound_type) in {FloatType, DoubleType}:
+ return super().__new__(cls)
+ return AlwaysTrue()
+
+ def __invert__(self) -> BoundIsNaN:
+ return BoundIsNaN(self.term)
-class NotNaN(UnboundPredicate[T]):
+class IsNaN(UnaryPredicate[T]):
+ as_bound = BoundIsNaN
+
+ def __invert__(self) -> NotNaN:
+ return NotNaN(self.term)
+
+
+class NotNaN(UnaryPredicate[T]):
+ as_bound = BoundNotNaN
+
def __invert__(self) -> IsNaN:
return IsNaN(self.term)
- def _validate_literals(self): # pylint: disable=W0238
- if self.literals:
- raise AttributeError("NotNaN is a unary predicate and takes no
Literals.")
- def bind(self, schema: Schema, case_sensitive: bool) -> BoundNotNaN[T]:
- bound_ref = self.term.bind(schema, case_sensitive)
- return BoundNotNaN(bound_ref)
+@dataclass(frozen=True)
+class SetPredicate(Unbound[T, BooleanExpression], BooleanExpression, ABC):
+ as_bound: ClassVar[type]
+ term: UnboundTerm[T]
+ literals: tuple[Literal[T], ...]
+
+ @abstractmethod
+ def __invert__(self) -> BooleanExpression:
+ ...
+ def bind(self, schema: Schema, case_sensitive: bool = True) ->
BooleanExpression:
+ bound_term = self.term.bind(schema, case_sensitive)
+ return self.as_bound(bound_term,
{lit.to(bound_term.ref().field.field_type) for lit in self.literals})
-class BoundNotNaN(BoundPredicate[T]):
- def __invert__(self) -> BoundIsNaN:
- return BoundIsNaN(self.term)
- def _validate_literals(self): # pylint: disable=W0238
- if self.literals:
- raise AttributeError("NotNaN is a unary predicate and takes no
Literals.")
+@dataclass(frozen=True)
+class BoundSetPredicate(Bound[T], BooleanExpression, ABC):
+ term: BoundTerm[T]
+ literals: set[Literal[T]]
+ @abstractmethod
+ def __invert__(self) -> BooleanExpression:
+ ...
-class BoundIn(BoundPredicate[T]):
- def _validate_literals(self): # pylint: disable=W0238
- if not self.literals:
- raise AttributeError("BoundIn must contain at least 1 literal.")
- def __invert__(self) -> BoundNotIn[T]:
- return BoundNotIn(self.term, *self.literals)
+class BoundIn(BoundSetPredicate[T]):
+ def __new__(cls, term: BoundTerm[T], literals: set[Literal[T]]) ->
BooleanExpression:
+ count = len(literals)
+ if count == 0:
+ return AlwaysFalse()
+ if count == 1:
+ return BoundEq(term, literals.pop())
+ else:
+ return super().__new__(cls)
+ def __invert__(self) -> BooleanExpression:
+ return BoundNotIn(self.term, self.literals)
-class In(UnboundPredicate[T]):
- def _validate_literals(self): # pylint: disable=W0238
- if not self.literals:
- raise AttributeError("In must contain at least 1 literal.")
- def __invert__(self) -> NotIn[T]:
- return NotIn(self.term, *self.literals)
+class BoundNotIn(BoundSetPredicate[T]):
+ def __new__(cls, term: BoundTerm[T], literals: set[Literal[T]]) ->
BooleanExpression:
+ count = len(literals)
+ if count == 0:
+ return AlwaysTrue()
+ if count == 1:
+ return BoundNotEq(term, literals.pop())
+ else:
+ return super().__new__(cls)
- def bind(self, schema: Schema, case_sensitive: bool) -> BoundIn[T]:
- bound_ref = self.term.bind(schema, case_sensitive)
- return BoundIn(bound_ref, *tuple(lit.to(bound_ref.field.field_type)
for lit in self.literals)) # type: ignore
+ def __invert__(self) -> BooleanExpression:
+ return BoundIn(self.term, self.literals)
-class BoundNotIn(BoundPredicate[T]):
- def _validate_literals(self): # pylint: disable=W0238
- if not self.literals:
- raise AttributeError("BoundNotIn must contain at least 1 literal.")
+class In(SetPredicate[T]):
+ as_bound = BoundIn
- def __invert__(self) -> BoundIn[T]:
- return BoundIn(self.term, *self.literals)
+ def __new__(cls, term: UnboundTerm[T], literals: tuple[Literal[T], ...])
-> BooleanExpression:
Review Comment:
I wanted to do this, but I didn't know how to make it work with
`@dataclass`. I looked it up and saw that you have to implement `__init__`. But
maybe we don't want to use `@dataclass` anyway? It seems to have strange mypy
errors:
```
327: error: Only concrete class can be given where "Type[UnaryPredicate[T]]"
is expected
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]