alamb commented on code in PR #17813:
URL: https://github.com/apache/datafusion/pull/17813#discussion_r2388181962
##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -830,6 +883,150 @@ mod tests {
assert!(expr.nullable(&get_schema(false)).unwrap());
}
+ fn check_nullability(
+ expr: Expr,
+ nullable: bool,
+ get_schema: fn(bool) -> MockExprSchema,
+ ) -> Result<()> {
+ assert_eq!(
+ expr.nullable(&get_schema(true))?,
+ nullable,
+ "Nullability of '{expr}' should be {nullable} when column is
nullable"
+ );
+ assert!(
+ !expr.nullable(&get_schema(false))?,
+ "Nullability of '{expr}' should be false when column is not
nullable"
+ );
+ Ok(())
+ }
+
+ #[test]
+ fn test_case_expression_nullability() -> Result<()> {
+ let get_schema = |nullable| {
+ MockExprSchema::new()
+ .with_data_type(DataType::Int32)
+ .with_nullable(nullable)
+ };
+
+ check_nullability(
+ when(is_not_null(col("foo")), col("foo")).otherwise(lit(0))?,
+ false,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(not(is_null(col("foo"))), col("foo")).otherwise(lit(0))?,
+ false,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(binary_expr(col("foo"), Operator::Eq, lit(5)), col("foo"))
+ .otherwise(lit(0))?,
Review Comment:
Minor: You can probably make this more concise using the `eq` method,
something like this:
```suggestion
when(col("foo").eq(lit(5))), col("foo")).otherwise(lit(0))?,
```
##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -830,6 +883,150 @@ mod tests {
assert!(expr.nullable(&get_schema(false)).unwrap());
}
+ fn check_nullability(
+ expr: Expr,
+ nullable: bool,
+ get_schema: fn(bool) -> MockExprSchema,
+ ) -> Result<()> {
+ assert_eq!(
+ expr.nullable(&get_schema(true))?,
+ nullable,
+ "Nullability of '{expr}' should be {nullable} when column is
nullable"
+ );
+ assert!(
+ !expr.nullable(&get_schema(false))?,
+ "Nullability of '{expr}' should be false when column is not
nullable"
+ );
+ Ok(())
+ }
+
+ #[test]
+ fn test_case_expression_nullability() -> Result<()> {
+ let get_schema = |nullable| {
+ MockExprSchema::new()
+ .with_data_type(DataType::Int32)
+ .with_nullable(nullable)
+ };
+
+ check_nullability(
+ when(is_not_null(col("foo")), col("foo")).otherwise(lit(0))?,
+ false,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(not(is_null(col("foo"))), col("foo")).otherwise(lit(0))?,
+ false,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(binary_expr(col("foo"), Operator::Eq, lit(5)), col("foo"))
+ .otherwise(lit(0))?,
+ true,
Review Comment:
technically this could also be reported as `false`, given that if `foo` is
null, then the expr resolves to `0` (non null)
```sql
> create table t(foo int) as values (0), (NULL), (5);
0 row(s) fetched.
Elapsed 0.001 seconds.
> select foo, CASE WHEN foo=5 THEN foo ELSE 0 END from t;
+------+---------------------------------------------------------+
| foo | CASE WHEN t.foo = Int64(5) THEN t.foo ELSE Int64(0) END |
+------+---------------------------------------------------------+
| 0 | 0 |
| NULL | 0 |
| 5 | 5 |
+------+---------------------------------------------------------+
3 row(s) fetched.
Elapsed 0.002 seconds.
```
However, maybe we can improve that in a follow on PR
##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -830,6 +883,150 @@ mod tests {
assert!(expr.nullable(&get_schema(false)).unwrap());
}
+ fn check_nullability(
+ expr: Expr,
+ nullable: bool,
+ get_schema: fn(bool) -> MockExprSchema,
+ ) -> Result<()> {
+ assert_eq!(
+ expr.nullable(&get_schema(true))?,
+ nullable,
+ "Nullability of '{expr}' should be {nullable} when column is
nullable"
+ );
+ assert!(
+ !expr.nullable(&get_schema(false))?,
+ "Nullability of '{expr}' should be false when column is not
nullable"
+ );
+ Ok(())
+ }
+
+ #[test]
+ fn test_case_expression_nullability() -> Result<()> {
+ let get_schema = |nullable| {
+ MockExprSchema::new()
+ .with_data_type(DataType::Int32)
+ .with_nullable(nullable)
+ };
+
+ check_nullability(
+ when(is_not_null(col("foo")), col("foo")).otherwise(lit(0))?,
+ false,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(not(is_null(col("foo"))), col("foo")).otherwise(lit(0))?,
+ false,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(binary_expr(col("foo"), Operator::Eq, lit(5)), col("foo"))
+ .otherwise(lit(0))?,
+ true,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(
+ and(
+ is_not_null(col("foo")),
+ binary_expr(col("foo"), Operator::Eq, lit(5)),
+ ),
+ col("foo"),
+ )
+ .otherwise(lit(0))?,
+ false,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(
+ and(
+ binary_expr(col("foo"), Operator::Eq, lit(5)),
+ is_not_null(col("foo")),
+ ),
+ col("foo"),
+ )
+ .otherwise(lit(0))?,
+ false,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(
+ or(
+ is_not_null(col("foo")),
+ binary_expr(col("foo"), Operator::Eq, lit(5)),
Review Comment:
as above, I don't think this expression can everr be true so this overall
expression is still non nullable
##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -830,6 +883,150 @@ mod tests {
assert!(expr.nullable(&get_schema(false)).unwrap());
}
+ fn check_nullability(
+ expr: Expr,
+ nullable: bool,
+ get_schema: fn(bool) -> MockExprSchema,
+ ) -> Result<()> {
+ assert_eq!(
+ expr.nullable(&get_schema(true))?,
+ nullable,
+ "Nullability of '{expr}' should be {nullable} when column is
nullable"
+ );
+ assert!(
+ !expr.nullable(&get_schema(false))?,
+ "Nullability of '{expr}' should be false when column is not
nullable"
+ );
+ Ok(())
+ }
+
+ #[test]
+ fn test_case_expression_nullability() -> Result<()> {
+ let get_schema = |nullable| {
+ MockExprSchema::new()
+ .with_data_type(DataType::Int32)
+ .with_nullable(nullable)
+ };
+
+ check_nullability(
+ when(is_not_null(col("foo")), col("foo")).otherwise(lit(0))?,
+ false,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(not(is_null(col("foo"))), col("foo")).otherwise(lit(0))?,
+ false,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(binary_expr(col("foo"), Operator::Eq, lit(5)), col("foo"))
+ .otherwise(lit(0))?,
+ true,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(
+ and(
+ is_not_null(col("foo")),
+ binary_expr(col("foo"), Operator::Eq, lit(5)),
+ ),
+ col("foo"),
+ )
+ .otherwise(lit(0))?,
+ false,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(
+ and(
+ binary_expr(col("foo"), Operator::Eq, lit(5)),
+ is_not_null(col("foo")),
+ ),
+ col("foo"),
+ )
+ .otherwise(lit(0))?,
+ false,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(
+ or(
+ is_not_null(col("foo")),
+ binary_expr(col("foo"), Operator::Eq, lit(5)),
+ ),
+ col("foo"),
+ )
+ .otherwise(lit(0))?,
+ true,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(
+ or(
+ binary_expr(col("foo"), Operator::Eq, lit(5)),
+ is_not_null(col("foo")),
+ ),
+ col("foo"),
+ )
+ .otherwise(lit(0))?,
+ true,
Review Comment:
Here too -- this expression is not nullabile
```sql
> select foo, CASE WHEN foo=5 OR foo IS NOT NULL THEN foo ELSE 0 END from t;
+------+------------------------------------------------------------------------------+
| foo | CASE WHEN t.foo = Int64(5) OR t.foo IS NOT NULL THEN t.foo ELSE
Int64(0) END |
+------+------------------------------------------------------------------------------+
| 0 | 0
|
| NULL | 0
|
| 5 | 5
|
+------+------------------------------------------------------------------------------+
3 row(s) fetched.
Elapsed 0.002 seconds.
```
##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -830,6 +883,150 @@ mod tests {
assert!(expr.nullable(&get_schema(false)).unwrap());
}
+ fn check_nullability(
+ expr: Expr,
+ nullable: bool,
+ get_schema: fn(bool) -> MockExprSchema,
+ ) -> Result<()> {
+ assert_eq!(
+ expr.nullable(&get_schema(true))?,
+ nullable,
+ "Nullability of '{expr}' should be {nullable} when column is
nullable"
+ );
+ assert!(
+ !expr.nullable(&get_schema(false))?,
+ "Nullability of '{expr}' should be false when column is not
nullable"
+ );
+ Ok(())
+ }
+
+ #[test]
+ fn test_case_expression_nullability() -> Result<()> {
+ let get_schema = |nullable| {
+ MockExprSchema::new()
+ .with_data_type(DataType::Int32)
+ .with_nullable(nullable)
+ };
+
+ check_nullability(
+ when(is_not_null(col("foo")), col("foo")).otherwise(lit(0))?,
+ false,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(not(is_null(col("foo"))), col("foo")).otherwise(lit(0))?,
+ false,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(binary_expr(col("foo"), Operator::Eq, lit(5)), col("foo"))
+ .otherwise(lit(0))?,
Review Comment:
likewise there is `Expr::and` for ands that could be used as well below
However, the current setup of using `and` as a prefix is pretty clear too,
so maybe what you have here is actually more readable.
##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -830,6 +883,150 @@ mod tests {
assert!(expr.nullable(&get_schema(false)).unwrap());
}
+ fn check_nullability(
Review Comment:
I found this a little confusing at first, because it makes an explicit
assumption that `expr`'s will never introduce nulls (in order for
`!expr.nullable(&get_schema(false))?,` to be true). So for example, it wouldn't
do the right thing with the `NULLIF` function `NULLIF(foo, 25)` or something
Maybe some comments would help
```suggestion
/// Verifies that `expr` has `nullable` nullability when the 'foo'
column is
/// null.
/// Also assumes and verifies that `expr` is NOT nullable when 'foo' is
NOT null
fn check_nullability(
```
##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -830,6 +883,150 @@ mod tests {
assert!(expr.nullable(&get_schema(false)).unwrap());
}
+ fn check_nullability(
+ expr: Expr,
+ nullable: bool,
+ get_schema: fn(bool) -> MockExprSchema,
+ ) -> Result<()> {
+ assert_eq!(
+ expr.nullable(&get_schema(true))?,
+ nullable,
+ "Nullability of '{expr}' should be {nullable} when column is
nullable"
+ );
+ assert!(
+ !expr.nullable(&get_schema(false))?,
+ "Nullability of '{expr}' should be false when column is not
nullable"
+ );
+ Ok(())
+ }
+
+ #[test]
+ fn test_case_expression_nullability() -> Result<()> {
+ let get_schema = |nullable| {
+ MockExprSchema::new()
+ .with_data_type(DataType::Int32)
+ .with_nullable(nullable)
+ };
+
+ check_nullability(
+ when(is_not_null(col("foo")), col("foo")).otherwise(lit(0))?,
+ false,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(not(is_null(col("foo"))), col("foo")).otherwise(lit(0))?,
+ false,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(binary_expr(col("foo"), Operator::Eq, lit(5)), col("foo"))
+ .otherwise(lit(0))?,
+ true,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(
+ and(
+ is_not_null(col("foo")),
+ binary_expr(col("foo"), Operator::Eq, lit(5)),
+ ),
+ col("foo"),
+ )
+ .otherwise(lit(0))?,
+ false,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(
+ and(
+ binary_expr(col("foo"), Operator::Eq, lit(5)),
+ is_not_null(col("foo")),
+ ),
+ col("foo"),
+ )
+ .otherwise(lit(0))?,
+ false,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(
+ or(
+ is_not_null(col("foo")),
+ binary_expr(col("foo"), Operator::Eq, lit(5)),
+ ),
+ col("foo"),
+ )
+ .otherwise(lit(0))?,
+ true,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(
+ or(
+ binary_expr(col("foo"), Operator::Eq, lit(5)),
+ is_not_null(col("foo")),
+ ),
+ col("foo"),
+ )
+ .otherwise(lit(0))?,
+ true,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(
+ or(
+ is_not_null(col("foo")),
+ binary_expr(col("foo"), Operator::Eq, lit(5)),
+ ),
+ col("foo"),
+ )
+ .otherwise(lit(0))?,
+ true,
+ get_schema,
+ )?;
+
+ check_nullability(
+ when(
+ or(
+ binary_expr(col("foo"), Operator::Eq, lit(5)),
+ is_not_null(col("foo")),
+ ),
+ col("foo"),
+ )
+ .otherwise(lit(0))?,
+ true,
+ get_schema,
+ )?;
Review Comment:
Can you also please add a check with `is_null` in the `OR` clause (which
should be null)
Something like the equivalent to
```sql
> select foo, CASE WHEN foo=5 OR foo IS NULL THEN foo ELSE 0 END from t;
+------+--------------------------------------------------------------------------+
| foo | CASE WHEN t.foo = Int64(5) OR t.foo IS NULL THEN t.foo ELSE
Int64(0) END |
+------+--------------------------------------------------------------------------+
| 0 | 0
|
| NULL | NULL
|
| 5 | 5
|
+------+--------------------------------------------------------------------------+
3 row(s) fetched.
Elapsed 0.000 seconds.
```
Like
```rust
check_nullability(
when(
or(
binary_expr(col("foo"), Operator::Eq, lit(5)),
is_null(col("foo")),
),
col("foo"),
)
.otherwise(lit(0))?,
true,
get_schema,
)?;
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]