alamb commented on code in PR #17813:
URL: https://github.com/apache/datafusion/pull/17813#discussion_r2414843209
##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -647,6 +679,246 @@ impl ExprSchemable for Expr {
}
}
+/// Represents the possible values for SQL's three valued logic.
+/// `Option<bool>` is not used for this since `None` is used to represent
+/// inconclusive answers already.
+enum TriStateBool {
+ True,
+ False,
+ Uncertain,
+}
+
+impl TryFrom<&ScalarValue> for TriStateBool {
+ type Error = DataFusionError;
+
+ fn try_from(value: &ScalarValue) -> std::result::Result<Self, Self::Error>
{
+ match value {
+ ScalarValue::Null => Ok(TriStateBool::Uncertain),
+ ScalarValue::Boolean(b) => Ok(match b {
+ None => TriStateBool::Uncertain,
+ Some(true) => TriStateBool::True,
+ Some(false) => TriStateBool::False,
+ }),
+ _ => Self::try_from(&value.cast_to(&DataType::Boolean)?),
+ }
+ }
+}
+
+struct WhenThenConstEvaluator<'a> {
+ then_expr: &'a Expr,
+ input_schema: &'a dyn ExprSchema,
+}
+
+impl WhenThenConstEvaluator<'_> {
+ /// Attempts to const evaluate the given predicate.
+ /// Returns a `Some` value containing the const result if so; otherwise
returns `None`.
+ fn const_eval_predicate(&self, predicate: &Expr) -> Option<TriStateBool> {
+ match predicate {
+ // Literal null is equivalent to boolean uncertain
+ Expr::Literal(scalar, _) => TriStateBool::try_from(scalar).ok(),
Review Comment:
I always worry about ignoring errors as creating a DataFusionError is quite
expensive (it allocates a String and we have seen it show up in traces before)
However, I changed this to the following locally and all the tests still pass
```rust
Expr::Literal(scalar, _) =>
Some(TriStateBool::try_from(scalar).unwrap()),
```
So I think this would only discard errors if the `Expr `was not correctly
type coerced.
Thus I think this is ok
##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -830,6 +1102,137 @@ mod tests {
assert!(expr.nullable(&get_schema(false)).unwrap());
}
+ fn assert_nullability(expr: &Expr, schema: &dyn ExprSchema, expected:
bool) {
+ assert_eq!(
+ expr.nullable(schema).unwrap(),
+ expected,
+ "Nullability of '{expr}' should be {expected}"
+ );
+ }
+
+ fn assert_not_nullable(expr: &Expr, schema: &dyn ExprSchema) {
+ assert_nullability(expr, schema, false);
+ }
+
+ fn assert_nullable(expr: &Expr, schema: &dyn ExprSchema) {
+ assert_nullability(expr, schema, true);
+ }
+
+ #[test]
+ fn test_case_expression_nullability() -> Result<()> {
Review Comment:
this is an impressive list of tests
##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -647,6 +679,246 @@ impl ExprSchemable for Expr {
}
}
+/// Represents the possible values for SQL's three valued logic.
+/// `Option<bool>` is not used for this since `None` is used to represent
+/// inconclusive answers already.
+enum TriStateBool {
+ True,
+ False,
+ Uncertain,
+}
+
+impl TryFrom<&ScalarValue> for TriStateBool {
+ type Error = DataFusionError;
+
+ fn try_from(value: &ScalarValue) -> std::result::Result<Self, Self::Error>
{
+ match value {
+ ScalarValue::Null => Ok(TriStateBool::Uncertain),
+ ScalarValue::Boolean(b) => Ok(match b {
+ None => TriStateBool::Uncertain,
+ Some(true) => TriStateBool::True,
+ Some(false) => TriStateBool::False,
+ }),
+ _ => Self::try_from(&value.cast_to(&DataType::Boolean)?),
+ }
+ }
+}
+
+struct WhenThenConstEvaluator<'a> {
+ then_expr: &'a Expr,
+ input_schema: &'a dyn ExprSchema,
+}
+
+impl WhenThenConstEvaluator<'_> {
+ /// Attempts to const evaluate the given predicate.
+ /// Returns a `Some` value containing the const result if so; otherwise
returns `None`.
+ fn const_eval_predicate(&self, predicate: &Expr) -> Option<TriStateBool> {
+ match predicate {
+ // Literal null is equivalent to boolean uncertain
+ Expr::Literal(scalar, _) => TriStateBool::try_from(scalar).ok(),
+ Expr::IsNotNull(e) => {
+ if let Ok(false) = e.nullable(self.input_schema) {
+ // If `e` is not nullable, then `e IS NOT NULL` is always
true
+ return Some(TriStateBool::True);
+ }
+
+ match e.get_type(self.input_schema) {
+ Ok(DataType::Boolean) => match
self.const_eval_predicate(e) {
+ Some(TriStateBool::True) | Some(TriStateBool::False)
=> {
+ Some(TriStateBool::True)
+ }
+ Some(TriStateBool::Uncertain) =>
Some(TriStateBool::False),
+ None => None,
+ },
+ Ok(_) => match self.is_null(e) {
+ Some(true) => Some(TriStateBool::False),
+ Some(false) => Some(TriStateBool::True),
+ None => None,
+ },
+ Err(_) => None,
+ }
+ }
+ Expr::IsNull(e) => {
+ if let Ok(false) = e.nullable(self.input_schema) {
+ // If `e` is not nullable, then `e IS NULL` is always false
+ return Some(TriStateBool::False);
+ }
+
+ match e.get_type(self.input_schema) {
+ Ok(DataType::Boolean) => match
self.const_eval_predicate(e) {
+ Some(TriStateBool::True) | Some(TriStateBool::False)
=> {
+ Some(TriStateBool::False)
+ }
+ Some(TriStateBool::Uncertain) =>
Some(TriStateBool::True),
+ None => None,
+ },
+ Ok(_) => match self.is_null(e) {
+ Some(true) => Some(TriStateBool::True),
+ Some(false) => Some(TriStateBool::False),
+ None => None,
+ },
+ Err(_) => None,
+ }
+ }
+ Expr::IsTrue(e) => match self.const_eval_predicate(e) {
Review Comment:
Unrelated to this PR -- I can't help but feel `IS TRUE`, `IS NOT TRUE`, etc
is pretty redundant and we could probably have had a simpler `Expr`
representation for this. No action required, just a musing
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]