kosiew commented on code in PR #22948:
URL: https://github.com/apache/datafusion/pull/22948#discussion_r3426647959


##########
datafusion/sql/src/expr/mod.rs:
##########
@@ -55,6 +55,99 @@ mod unary_op;
 mod value;
 
 impl<S: ContextProvider> SqlToRel<'_, S> {
+    pub(crate) fn warn_on_null_equality_predicate(&self, predicate: &SQLExpr) {
+        fn null_value_span(expr: &SQLExpr) -> Option<Span> {
+            match expr {
+                SQLExpr::Value(ValueWithSpan {
+                    value: Value::Null,
+                    span,
+                }) => Span::try_from_sqlparser_span(*span),
+                _ => None,
+            }
+        }
+
+        fn null_equality_warning(expr: &SQLExpr) -> Option<Diagnostic> {
+            let SQLExpr::BinaryOp { left, op, right } = expr else {
+                return None;
+            };
+
+            let (message, help) = match op {
+                BinaryOperator::Eq => (
+                    "comparison with NULL using `=` always evaluates to NULL",
+                    "use `IS NULL` to check for NULL values",
+                ),
+                BinaryOperator::NotEq => (
+                    "comparison with NULL using `<>` always evaluates to NULL",
+                    "use `IS NOT NULL` to check for non-NULL values",
+                ),
+                _ => return None,
+            };
+
+            let null_span = null_value_span(left).or_else(|| 
null_value_span(right));
+            null_span.map(|null_span| {
+                Diagnostic::new_warning(
+                    message,
+                    Span::try_from_sqlparser_span(expr.span()),
+                )
+                .with_help(help, Some(null_span))
+            })
+        }
+

Review Comment:
   `null_value_span` currently returns `None` in two different cases: when the 
operand is not NULL, and when it is NULL but the sqlparser span is not 
available.
   
   Because `null_equality_warning` only emits through `null_span.map(...)`, a 
planner caller that provides a spanless SQL AST can silently miss the warning. 
I think spans should add location detail to diagnostics, not decide whether the 
warning exists.
   
   Could we split NULL detection from the optional span, and add regression 
coverage for an AST without locations?



##########
datafusion/sql/src/expr/mod.rs:
##########
@@ -55,6 +55,99 @@ mod unary_op;
 mod value;
 
 impl<S: ContextProvider> SqlToRel<'_, S> {
+    pub(crate) fn warn_on_null_equality_predicate(&self, predicate: &SQLExpr) {
+        fn null_value_span(expr: &SQLExpr) -> Option<Span> {
+            match expr {
+                SQLExpr::Value(ValueWithSpan {
+                    value: Value::Null,
+                    span,
+                }) => Span::try_from_sqlparser_span(*span),
+                _ => None,
+            }
+        }
+
+        fn null_equality_warning(expr: &SQLExpr) -> Option<Diagnostic> {
+            let SQLExpr::BinaryOp { left, op, right } = expr else {
+                return None;
+            };
+
+            let (message, help) = match op {
+                BinaryOperator::Eq => (
+                    "comparison with NULL using `=` always evaluates to NULL",
+                    "use `IS NULL` to check for NULL values",
+                ),
+                BinaryOperator::NotEq => (
+                    "comparison with NULL using `<>` always evaluates to NULL",
+                    "use `IS NOT NULL` to check for non-NULL values",
+                ),
+                _ => return None,
+            };
+
+            let null_span = null_value_span(left).or_else(|| 
null_value_span(right));
+            null_span.map(|null_span| {
+                Diagnostic::new_warning(
+                    message,
+                    Span::try_from_sqlparser_span(expr.span()),
+                )
+                .with_help(help, Some(null_span))
+            })
+        }
+
+        fn collect_null_equality_warnings(
+            expr: &SQLExpr,
+            warnings: &mut Vec<Diagnostic>,
+        ) {
+            if let Some(warning) = null_equality_warning(expr) {
+                warnings.push(warning);
+            }
+
+            match expr {
+                SQLExpr::BinaryOp { left, right, .. }
+                | SQLExpr::IsDistinctFrom(left, right)
+                | SQLExpr::IsNotDistinctFrom(left, right) => {
+                    collect_null_equality_warnings(left, warnings);
+                    collect_null_equality_warnings(right, warnings);
+                }
+                SQLExpr::Nested(expr)
+                | SQLExpr::UnaryOp { expr, .. }
+                | SQLExpr::IsFalse(expr)
+                | SQLExpr::IsNotFalse(expr)
+                | SQLExpr::IsTrue(expr)
+                | SQLExpr::IsNotTrue(expr)
+                | SQLExpr::IsUnknown(expr)
+                | SQLExpr::IsNotUnknown(expr)
+                | SQLExpr::OuterJoin(expr)
+                | SQLExpr::Prior(expr) => {
+                    collect_null_equality_warnings(expr, warnings);
+                }
+                SQLExpr::Case {
+                    operand,
+                    conditions,
+                    else_result,
+                    ..
+                } => {
+                    if let Some(operand) = operand {
+                        collect_null_equality_warnings(operand, warnings);
+                    }
+                    for condition in conditions {
+                        collect_null_equality_warnings(&condition.condition, 
warnings);
+                        collect_null_equality_warnings(&condition.result, 
warnings);
+                    }
+                    if let Some(else_result) = else_result {
+                        collect_null_equality_warnings(else_result, warnings);
+                    }
+                }
+                _ => {}
+            }
+        }
+
+        let mut warnings = vec![];
+        collect_null_equality_warnings(predicate, &mut warnings);
+        for warning in warnings {
+            self.add_warning(warning);
+        }
+    }

Review Comment:
   The recursive scan looks like it only visits a small subset of SQL 
expressions that can contain children. That means comparisons in predicate 
context can be missed under wrappers such as `IS NULL`, `IS NOT NULL`, `CAST`, 
function arguments, `BETWEEN`, `IN`, `LIKE`, and similar expressions.
   
   For example, `WHERE (first_name = NULL) IS NULL` or `WHERE 
coalesce(first_name = NULL, false)` still contains an `= NULL` comparison 
inside a `WHERE` predicate, but it looks like no warning would be produced.
   
   Could we use a more complete AST traversal or visitor here, or otherwise 
cover all expression children that can appear under `WHERE`, `JOIN ON`, and 
`HAVING`? A regression test for at least one missed wrapper would be helpful 
too.



##########
datafusion/sql/src/expr/mod.rs:
##########
@@ -55,6 +55,99 @@ mod unary_op;
 mod value;
 
 impl<S: ContextProvider> SqlToRel<'_, S> {
+    pub(crate) fn warn_on_null_equality_predicate(&self, predicate: &SQLExpr) {
+        fn null_value_span(expr: &SQLExpr) -> Option<Span> {
+            match expr {
+                SQLExpr::Value(ValueWithSpan {
+                    value: Value::Null,
+                    span,
+                }) => Span::try_from_sqlparser_span(*span),
+                _ => None,
+            }
+        }
+
+        fn null_equality_warning(expr: &SQLExpr) -> Option<Diagnostic> {
+            let SQLExpr::BinaryOp { left, op, right } = expr else {
+                return None;
+            };
+
+            let (message, help) = match op {
+                BinaryOperator::Eq => (
+                    "comparison with NULL using `=` always evaluates to NULL",
+                    "use `IS NULL` to check for NULL values",
+                ),
+                BinaryOperator::NotEq => (
+                    "comparison with NULL using `<>` always evaluates to NULL",
+                    "use `IS NOT NULL` to check for non-NULL values",
+                ),
+                _ => return None,
+            };
+
+            let null_span = null_value_span(left).or_else(|| 
null_value_span(right));
+            null_span.map(|null_span| {
+                Diagnostic::new_warning(
+                    message,
+                    Span::try_from_sqlparser_span(expr.span()),
+                )
+                .with_help(help, Some(null_span))
+            })
+        }
+
+        fn collect_null_equality_warnings(
+            expr: &SQLExpr,
+            warnings: &mut Vec<Diagnostic>,
+        ) {
+            if let Some(warning) = null_equality_warning(expr) {
+                warnings.push(warning);
+            }
+
+            match expr {
+                SQLExpr::BinaryOp { left, right, .. }
+                | SQLExpr::IsDistinctFrom(left, right)
+                | SQLExpr::IsNotDistinctFrom(left, right) => {
+                    collect_null_equality_warnings(left, warnings);
+                    collect_null_equality_warnings(right, warnings);
+                }
+                SQLExpr::Nested(expr)
+                | SQLExpr::UnaryOp { expr, .. }
+                | SQLExpr::IsFalse(expr)
+                | SQLExpr::IsNotFalse(expr)
+                | SQLExpr::IsTrue(expr)
+                | SQLExpr::IsNotTrue(expr)
+                | SQLExpr::IsUnknown(expr)
+                | SQLExpr::IsNotUnknown(expr)
+                | SQLExpr::OuterJoin(expr)
+                | SQLExpr::Prior(expr) => {
+                    collect_null_equality_warnings(expr, warnings);
+                }
+                SQLExpr::Case {
+                    operand,
+                    conditions,
+                    else_result,
+                    ..
+                } => {
+                    if let Some(operand) = operand {
+                        collect_null_equality_warnings(operand, warnings);
+                    }
+                    for condition in conditions {
+                        collect_null_equality_warnings(&condition.condition, 
warnings);
+                        collect_null_equality_warnings(&condition.result, 
warnings);
+                    }
+                    if let Some(else_result) = else_result {
+                        collect_null_equality_warnings(else_result, warnings);
+                    }
+                }
+                _ => {}
+            }
+        }
+
+        let mut warnings = vec![];
+        collect_null_equality_warnings(predicate, &mut warnings);
+        for warning in warnings {
+            self.add_warning(warning);
+        }
+    }
+
     pub(crate) fn sql_expr_to_logical_expr_with_alias(
         &self,
         sql: SQLExprWithAlias,

Review Comment:
   Small suggestion: it might be cleaner to pass an emission callback, or make 
the traversal a method, so warnings are emitted during traversal instead of 
collected into a temporary `Vec` and drained afterward. That would make the 
helper's side effect more explicit and avoid the collect-then-drain loop.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to