alamb commented on code in PR #17813:
URL: https://github.com/apache/datafusion/pull/17813#discussion_r2388181962


##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -830,6 +883,150 @@ mod tests {
         assert!(expr.nullable(&get_schema(false)).unwrap());
     }
 
+    fn check_nullability(
+        expr: Expr,
+        nullable: bool,
+        get_schema: fn(bool) -> MockExprSchema,
+    ) -> Result<()> {
+        assert_eq!(
+            expr.nullable(&get_schema(true))?,
+            nullable,
+            "Nullability of '{expr}' should be {nullable} when column is 
nullable"
+        );
+        assert!(
+            !expr.nullable(&get_schema(false))?,
+            "Nullability of '{expr}' should be false when column is not 
nullable"
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_case_expression_nullability() -> Result<()> {
+        let get_schema = |nullable| {
+            MockExprSchema::new()
+                .with_data_type(DataType::Int32)
+                .with_nullable(nullable)
+        };
+
+        check_nullability(
+            when(is_not_null(col("foo")), col("foo")).otherwise(lit(0))?,
+            false,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(not(is_null(col("foo"))), col("foo")).otherwise(lit(0))?,
+            false,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(binary_expr(col("foo"), Operator::Eq, lit(5)), col("foo"))
+                .otherwise(lit(0))?,

Review Comment:
   Minor: You can probably make this more concise using the `eq` method, 
something like this:
   
   ```suggestion
               when(col("foo").eq(lit(5))), col("foo")).otherwise(lit(0))?,
   ```
   
   



##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -830,6 +883,150 @@ mod tests {
         assert!(expr.nullable(&get_schema(false)).unwrap());
     }
 
+    fn check_nullability(
+        expr: Expr,
+        nullable: bool,
+        get_schema: fn(bool) -> MockExprSchema,
+    ) -> Result<()> {
+        assert_eq!(
+            expr.nullable(&get_schema(true))?,
+            nullable,
+            "Nullability of '{expr}' should be {nullable} when column is 
nullable"
+        );
+        assert!(
+            !expr.nullable(&get_schema(false))?,
+            "Nullability of '{expr}' should be false when column is not 
nullable"
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_case_expression_nullability() -> Result<()> {
+        let get_schema = |nullable| {
+            MockExprSchema::new()
+                .with_data_type(DataType::Int32)
+                .with_nullable(nullable)
+        };
+
+        check_nullability(
+            when(is_not_null(col("foo")), col("foo")).otherwise(lit(0))?,
+            false,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(not(is_null(col("foo"))), col("foo")).otherwise(lit(0))?,
+            false,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(binary_expr(col("foo"), Operator::Eq, lit(5)), col("foo"))
+                .otherwise(lit(0))?,
+            true,

Review Comment:
   technically this could also be reported as `false`, given that if `foo` is 
null, then the expr resolves to `0` (non null)
   
   ```sql
   > create table t(foo int) as values (0), (NULL), (5);
   0 row(s) fetched.
   Elapsed 0.001 seconds.
   
   > select foo, CASE WHEN foo=5 THEN foo ELSE 0 END from t;
   +------+---------------------------------------------------------+
   | foo  | CASE WHEN t.foo = Int64(5) THEN t.foo ELSE Int64(0) END |
   +------+---------------------------------------------------------+
   | 0    | 0                                                       |
   | NULL | 0                                                       |
   | 5    | 5                                                       |
   +------+---------------------------------------------------------+
   3 row(s) fetched.
   Elapsed 0.002 seconds.
   ```
   
   However, maybe we can improve that in a follow on PR



##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -830,6 +883,150 @@ mod tests {
         assert!(expr.nullable(&get_schema(false)).unwrap());
     }
 
+    fn check_nullability(
+        expr: Expr,
+        nullable: bool,
+        get_schema: fn(bool) -> MockExprSchema,
+    ) -> Result<()> {
+        assert_eq!(
+            expr.nullable(&get_schema(true))?,
+            nullable,
+            "Nullability of '{expr}' should be {nullable} when column is 
nullable"
+        );
+        assert!(
+            !expr.nullable(&get_schema(false))?,
+            "Nullability of '{expr}' should be false when column is not 
nullable"
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_case_expression_nullability() -> Result<()> {
+        let get_schema = |nullable| {
+            MockExprSchema::new()
+                .with_data_type(DataType::Int32)
+                .with_nullable(nullable)
+        };
+
+        check_nullability(
+            when(is_not_null(col("foo")), col("foo")).otherwise(lit(0))?,
+            false,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(not(is_null(col("foo"))), col("foo")).otherwise(lit(0))?,
+            false,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(binary_expr(col("foo"), Operator::Eq, lit(5)), col("foo"))
+                .otherwise(lit(0))?,
+            true,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(
+                and(
+                    is_not_null(col("foo")),
+                    binary_expr(col("foo"), Operator::Eq, lit(5)),
+                ),
+                col("foo"),
+            )
+            .otherwise(lit(0))?,
+            false,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(
+                and(
+                    binary_expr(col("foo"), Operator::Eq, lit(5)),
+                    is_not_null(col("foo")),
+                ),
+                col("foo"),
+            )
+            .otherwise(lit(0))?,
+            false,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(
+                or(
+                    is_not_null(col("foo")),
+                    binary_expr(col("foo"), Operator::Eq, lit(5)),

Review Comment:
   as above, I don't think this expression can everr be true so this overall 
expression is still non nullable



##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -830,6 +883,150 @@ mod tests {
         assert!(expr.nullable(&get_schema(false)).unwrap());
     }
 
+    fn check_nullability(
+        expr: Expr,
+        nullable: bool,
+        get_schema: fn(bool) -> MockExprSchema,
+    ) -> Result<()> {
+        assert_eq!(
+            expr.nullable(&get_schema(true))?,
+            nullable,
+            "Nullability of '{expr}' should be {nullable} when column is 
nullable"
+        );
+        assert!(
+            !expr.nullable(&get_schema(false))?,
+            "Nullability of '{expr}' should be false when column is not 
nullable"
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_case_expression_nullability() -> Result<()> {
+        let get_schema = |nullable| {
+            MockExprSchema::new()
+                .with_data_type(DataType::Int32)
+                .with_nullable(nullable)
+        };
+
+        check_nullability(
+            when(is_not_null(col("foo")), col("foo")).otherwise(lit(0))?,
+            false,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(not(is_null(col("foo"))), col("foo")).otherwise(lit(0))?,
+            false,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(binary_expr(col("foo"), Operator::Eq, lit(5)), col("foo"))
+                .otherwise(lit(0))?,
+            true,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(
+                and(
+                    is_not_null(col("foo")),
+                    binary_expr(col("foo"), Operator::Eq, lit(5)),
+                ),
+                col("foo"),
+            )
+            .otherwise(lit(0))?,
+            false,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(
+                and(
+                    binary_expr(col("foo"), Operator::Eq, lit(5)),
+                    is_not_null(col("foo")),
+                ),
+                col("foo"),
+            )
+            .otherwise(lit(0))?,
+            false,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(
+                or(
+                    is_not_null(col("foo")),
+                    binary_expr(col("foo"), Operator::Eq, lit(5)),
+                ),
+                col("foo"),
+            )
+            .otherwise(lit(0))?,
+            true,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(
+                or(
+                    binary_expr(col("foo"), Operator::Eq, lit(5)),
+                    is_not_null(col("foo")),
+                ),
+                col("foo"),
+            )
+            .otherwise(lit(0))?,
+            true,

Review Comment:
   Here too -- this expression is not nullabile 
   
   ```sql
   > select foo, CASE WHEN foo=5 OR foo IS NOT NULL THEN foo ELSE 0 END from t;
   
+------+------------------------------------------------------------------------------+
   | foo  | CASE WHEN t.foo = Int64(5) OR t.foo IS NOT NULL THEN t.foo ELSE 
Int64(0) END |
   
+------+------------------------------------------------------------------------------+
   | 0    | 0                                                                   
         |
   | NULL | 0                                                                   
         |
   | 5    | 5                                                                   
         |
   
+------+------------------------------------------------------------------------------+
   3 row(s) fetched.
   Elapsed 0.002 seconds.
   ```



##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -830,6 +883,150 @@ mod tests {
         assert!(expr.nullable(&get_schema(false)).unwrap());
     }
 
+    fn check_nullability(
+        expr: Expr,
+        nullable: bool,
+        get_schema: fn(bool) -> MockExprSchema,
+    ) -> Result<()> {
+        assert_eq!(
+            expr.nullable(&get_schema(true))?,
+            nullable,
+            "Nullability of '{expr}' should be {nullable} when column is 
nullable"
+        );
+        assert!(
+            !expr.nullable(&get_schema(false))?,
+            "Nullability of '{expr}' should be false when column is not 
nullable"
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_case_expression_nullability() -> Result<()> {
+        let get_schema = |nullable| {
+            MockExprSchema::new()
+                .with_data_type(DataType::Int32)
+                .with_nullable(nullable)
+        };
+
+        check_nullability(
+            when(is_not_null(col("foo")), col("foo")).otherwise(lit(0))?,
+            false,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(not(is_null(col("foo"))), col("foo")).otherwise(lit(0))?,
+            false,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(binary_expr(col("foo"), Operator::Eq, lit(5)), col("foo"))
+                .otherwise(lit(0))?,

Review Comment:
   likewise there is `Expr::and` for ands that could be used as well below
   
   However, the current setup of using `and` as a prefix is pretty clear too, 
so maybe what you have here is actually more readable. 



##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -830,6 +883,150 @@ mod tests {
         assert!(expr.nullable(&get_schema(false)).unwrap());
     }
 
+    fn check_nullability(

Review Comment:
   I found this a little confusing at first, because it makes an explicit 
assumption that `expr`'s will never introduce nulls (in order for 
`!expr.nullable(&get_schema(false))?,` to be true). So for example, it wouldn't 
do the right thing with the `NULLIF` function `NULLIF(foo, 25)` or something
   
   Maybe some comments would help
   
   ```suggestion
       /// Verifies that `expr` has `nullable` nullability when the 'foo' 
column is 
       /// null. 
       /// Also assumes and verifies that `expr` is NOT nullable when 'foo' is 
NOT null
       fn check_nullability(
   ```



##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -830,6 +883,150 @@ mod tests {
         assert!(expr.nullable(&get_schema(false)).unwrap());
     }
 
+    fn check_nullability(
+        expr: Expr,
+        nullable: bool,
+        get_schema: fn(bool) -> MockExprSchema,
+    ) -> Result<()> {
+        assert_eq!(
+            expr.nullable(&get_schema(true))?,
+            nullable,
+            "Nullability of '{expr}' should be {nullable} when column is 
nullable"
+        );
+        assert!(
+            !expr.nullable(&get_schema(false))?,
+            "Nullability of '{expr}' should be false when column is not 
nullable"
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_case_expression_nullability() -> Result<()> {
+        let get_schema = |nullable| {
+            MockExprSchema::new()
+                .with_data_type(DataType::Int32)
+                .with_nullable(nullable)
+        };
+
+        check_nullability(
+            when(is_not_null(col("foo")), col("foo")).otherwise(lit(0))?,
+            false,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(not(is_null(col("foo"))), col("foo")).otherwise(lit(0))?,
+            false,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(binary_expr(col("foo"), Operator::Eq, lit(5)), col("foo"))
+                .otherwise(lit(0))?,
+            true,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(
+                and(
+                    is_not_null(col("foo")),
+                    binary_expr(col("foo"), Operator::Eq, lit(5)),
+                ),
+                col("foo"),
+            )
+            .otherwise(lit(0))?,
+            false,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(
+                and(
+                    binary_expr(col("foo"), Operator::Eq, lit(5)),
+                    is_not_null(col("foo")),
+                ),
+                col("foo"),
+            )
+            .otherwise(lit(0))?,
+            false,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(
+                or(
+                    is_not_null(col("foo")),
+                    binary_expr(col("foo"), Operator::Eq, lit(5)),
+                ),
+                col("foo"),
+            )
+            .otherwise(lit(0))?,
+            true,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(
+                or(
+                    binary_expr(col("foo"), Operator::Eq, lit(5)),
+                    is_not_null(col("foo")),
+                ),
+                col("foo"),
+            )
+            .otherwise(lit(0))?,
+            true,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(
+                or(
+                    is_not_null(col("foo")),
+                    binary_expr(col("foo"), Operator::Eq, lit(5)),
+                ),
+                col("foo"),
+            )
+            .otherwise(lit(0))?,
+            true,
+            get_schema,
+        )?;
+
+        check_nullability(
+            when(
+                or(
+                    binary_expr(col("foo"), Operator::Eq, lit(5)),
+                    is_not_null(col("foo")),
+                ),
+                col("foo"),
+            )
+            .otherwise(lit(0))?,
+            true,
+            get_schema,
+        )?;

Review Comment:
   Can you also please add a check with `is_null` in the `OR` clause (which 
should be null)
   
   Something like the equivalent to 
   
   ```sql
   > select foo, CASE WHEN foo=5 OR foo IS NULL THEN foo ELSE 0 END from t;
   
+------+--------------------------------------------------------------------------+
   | foo  | CASE WHEN t.foo = Int64(5) OR t.foo IS NULL THEN t.foo ELSE 
Int64(0) END |
   
+------+--------------------------------------------------------------------------+
   | 0    | 0                                                                   
     |
   | NULL | NULL                                                                
     |
   | 5    | 5                                                                   
     |
   
+------+--------------------------------------------------------------------------+
   3 row(s) fetched.
   Elapsed 0.000 seconds.
   ```
   
   Like
   ```rust
         check_nullability(
               when(
                   or(
                       binary_expr(col("foo"), Operator::Eq, lit(5)),
                       is_null(col("foo")),
                   ),
                   col("foo"),
               )
               .otherwise(lit(0))?,
               true,
               get_schema,
           )?;
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to