kosiew commented on code in PR #22948:
URL: https://github.com/apache/datafusion/pull/22948#discussion_r3426647959
##########
datafusion/sql/src/expr/mod.rs:
##########
@@ -55,6 +55,99 @@ mod unary_op;
mod value;
impl<S: ContextProvider> SqlToRel<'_, S> {
+ pub(crate) fn warn_on_null_equality_predicate(&self, predicate: &SQLExpr) {
+ fn null_value_span(expr: &SQLExpr) -> Option<Span> {
+ match expr {
+ SQLExpr::Value(ValueWithSpan {
+ value: Value::Null,
+ span,
+ }) => Span::try_from_sqlparser_span(*span),
+ _ => None,
+ }
+ }
+
+ fn null_equality_warning(expr: &SQLExpr) -> Option<Diagnostic> {
+ let SQLExpr::BinaryOp { left, op, right } = expr else {
+ return None;
+ };
+
+ let (message, help) = match op {
+ BinaryOperator::Eq => (
+ "comparison with NULL using `=` always evaluates to NULL",
+ "use `IS NULL` to check for NULL values",
+ ),
+ BinaryOperator::NotEq => (
+ "comparison with NULL using `<>` always evaluates to NULL",
+ "use `IS NOT NULL` to check for non-NULL values",
+ ),
+ _ => return None,
+ };
+
+ let null_span = null_value_span(left).or_else(||
null_value_span(right));
+ null_span.map(|null_span| {
+ Diagnostic::new_warning(
+ message,
+ Span::try_from_sqlparser_span(expr.span()),
+ )
+ .with_help(help, Some(null_span))
+ })
+ }
+
Review Comment:
`null_value_span` currently returns `None` in two different cases: when the
operand is not NULL, and when it is NULL but the sqlparser span is not
available.
Because `null_equality_warning` only emits through `null_span.map(...)`, a
planner caller that provides a spanless SQL AST can silently miss the warning.
I think spans should add location detail to diagnostics, not decide whether the
warning exists.
Could we split NULL detection from the optional span, and add regression
coverage for an AST without locations?
##########
datafusion/sql/src/expr/mod.rs:
##########
@@ -55,6 +55,99 @@ mod unary_op;
mod value;
impl<S: ContextProvider> SqlToRel<'_, S> {
+ pub(crate) fn warn_on_null_equality_predicate(&self, predicate: &SQLExpr) {
+ fn null_value_span(expr: &SQLExpr) -> Option<Span> {
+ match expr {
+ SQLExpr::Value(ValueWithSpan {
+ value: Value::Null,
+ span,
+ }) => Span::try_from_sqlparser_span(*span),
+ _ => None,
+ }
+ }
+
+ fn null_equality_warning(expr: &SQLExpr) -> Option<Diagnostic> {
+ let SQLExpr::BinaryOp { left, op, right } = expr else {
+ return None;
+ };
+
+ let (message, help) = match op {
+ BinaryOperator::Eq => (
+ "comparison with NULL using `=` always evaluates to NULL",
+ "use `IS NULL` to check for NULL values",
+ ),
+ BinaryOperator::NotEq => (
+ "comparison with NULL using `<>` always evaluates to NULL",
+ "use `IS NOT NULL` to check for non-NULL values",
+ ),
+ _ => return None,
+ };
+
+ let null_span = null_value_span(left).or_else(||
null_value_span(right));
+ null_span.map(|null_span| {
+ Diagnostic::new_warning(
+ message,
+ Span::try_from_sqlparser_span(expr.span()),
+ )
+ .with_help(help, Some(null_span))
+ })
+ }
+
+ fn collect_null_equality_warnings(
+ expr: &SQLExpr,
+ warnings: &mut Vec<Diagnostic>,
+ ) {
+ if let Some(warning) = null_equality_warning(expr) {
+ warnings.push(warning);
+ }
+
+ match expr {
+ SQLExpr::BinaryOp { left, right, .. }
+ | SQLExpr::IsDistinctFrom(left, right)
+ | SQLExpr::IsNotDistinctFrom(left, right) => {
+ collect_null_equality_warnings(left, warnings);
+ collect_null_equality_warnings(right, warnings);
+ }
+ SQLExpr::Nested(expr)
+ | SQLExpr::UnaryOp { expr, .. }
+ | SQLExpr::IsFalse(expr)
+ | SQLExpr::IsNotFalse(expr)
+ | SQLExpr::IsTrue(expr)
+ | SQLExpr::IsNotTrue(expr)
+ | SQLExpr::IsUnknown(expr)
+ | SQLExpr::IsNotUnknown(expr)
+ | SQLExpr::OuterJoin(expr)
+ | SQLExpr::Prior(expr) => {
+ collect_null_equality_warnings(expr, warnings);
+ }
+ SQLExpr::Case {
+ operand,
+ conditions,
+ else_result,
+ ..
+ } => {
+ if let Some(operand) = operand {
+ collect_null_equality_warnings(operand, warnings);
+ }
+ for condition in conditions {
+ collect_null_equality_warnings(&condition.condition,
warnings);
+ collect_null_equality_warnings(&condition.result,
warnings);
+ }
+ if let Some(else_result) = else_result {
+ collect_null_equality_warnings(else_result, warnings);
+ }
+ }
+ _ => {}
+ }
+ }
+
+ let mut warnings = vec![];
+ collect_null_equality_warnings(predicate, &mut warnings);
+ for warning in warnings {
+ self.add_warning(warning);
+ }
+ }
Review Comment:
The recursive scan looks like it only visits a small subset of SQL
expressions that can contain children. That means comparisons in predicate
context can be missed under wrappers such as `IS NULL`, `IS NOT NULL`, `CAST`,
function arguments, `BETWEEN`, `IN`, `LIKE`, and similar expressions.
For example, `WHERE (first_name = NULL) IS NULL` or `WHERE
coalesce(first_name = NULL, false)` still contains an `= NULL` comparison
inside a `WHERE` predicate, but it looks like no warning would be produced.
Could we use a more complete AST traversal or visitor here, or otherwise
cover all expression children that can appear under `WHERE`, `JOIN ON`, and
`HAVING`? A regression test for at least one missed wrapper would be helpful
too.
##########
datafusion/sql/src/expr/mod.rs:
##########
@@ -55,6 +55,99 @@ mod unary_op;
mod value;
impl<S: ContextProvider> SqlToRel<'_, S> {
+ pub(crate) fn warn_on_null_equality_predicate(&self, predicate: &SQLExpr) {
+ fn null_value_span(expr: &SQLExpr) -> Option<Span> {
+ match expr {
+ SQLExpr::Value(ValueWithSpan {
+ value: Value::Null,
+ span,
+ }) => Span::try_from_sqlparser_span(*span),
+ _ => None,
+ }
+ }
+
+ fn null_equality_warning(expr: &SQLExpr) -> Option<Diagnostic> {
+ let SQLExpr::BinaryOp { left, op, right } = expr else {
+ return None;
+ };
+
+ let (message, help) = match op {
+ BinaryOperator::Eq => (
+ "comparison with NULL using `=` always evaluates to NULL",
+ "use `IS NULL` to check for NULL values",
+ ),
+ BinaryOperator::NotEq => (
+ "comparison with NULL using `<>` always evaluates to NULL",
+ "use `IS NOT NULL` to check for non-NULL values",
+ ),
+ _ => return None,
+ };
+
+ let null_span = null_value_span(left).or_else(||
null_value_span(right));
+ null_span.map(|null_span| {
+ Diagnostic::new_warning(
+ message,
+ Span::try_from_sqlparser_span(expr.span()),
+ )
+ .with_help(help, Some(null_span))
+ })
+ }
+
+ fn collect_null_equality_warnings(
+ expr: &SQLExpr,
+ warnings: &mut Vec<Diagnostic>,
+ ) {
+ if let Some(warning) = null_equality_warning(expr) {
+ warnings.push(warning);
+ }
+
+ match expr {
+ SQLExpr::BinaryOp { left, right, .. }
+ | SQLExpr::IsDistinctFrom(left, right)
+ | SQLExpr::IsNotDistinctFrom(left, right) => {
+ collect_null_equality_warnings(left, warnings);
+ collect_null_equality_warnings(right, warnings);
+ }
+ SQLExpr::Nested(expr)
+ | SQLExpr::UnaryOp { expr, .. }
+ | SQLExpr::IsFalse(expr)
+ | SQLExpr::IsNotFalse(expr)
+ | SQLExpr::IsTrue(expr)
+ | SQLExpr::IsNotTrue(expr)
+ | SQLExpr::IsUnknown(expr)
+ | SQLExpr::IsNotUnknown(expr)
+ | SQLExpr::OuterJoin(expr)
+ | SQLExpr::Prior(expr) => {
+ collect_null_equality_warnings(expr, warnings);
+ }
+ SQLExpr::Case {
+ operand,
+ conditions,
+ else_result,
+ ..
+ } => {
+ if let Some(operand) = operand {
+ collect_null_equality_warnings(operand, warnings);
+ }
+ for condition in conditions {
+ collect_null_equality_warnings(&condition.condition,
warnings);
+ collect_null_equality_warnings(&condition.result,
warnings);
+ }
+ if let Some(else_result) = else_result {
+ collect_null_equality_warnings(else_result, warnings);
+ }
+ }
+ _ => {}
+ }
+ }
+
+ let mut warnings = vec![];
+ collect_null_equality_warnings(predicate, &mut warnings);
+ for warning in warnings {
+ self.add_warning(warning);
+ }
+ }
+
pub(crate) fn sql_expr_to_logical_expr_with_alias(
&self,
sql: SQLExprWithAlias,
Review Comment:
Small suggestion: it might be cleaner to pass an emission callback, or make
the traversal a method, so warnings are emitted during traversal instead of
collected into a temporary `Vec` and drained afterward. That would make the
helper's side effect more explicit and avoid the collect-then-drain loop.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]