Re: [PR] Support literal-only predicates in PruningPredicate [datafusion]

via GitHub Mon, 08 Dec 2025 05:34:02 -0800


alamb commented on code in PR #19129:
URL: https://github.com/apache/datafusion/pull/19129#discussion_r2598652472



##########
datafusion/pruning/src/pruning_predicate.rs:
##########
@@ -2774,6 +2811,66 @@ mod tests {
         Ok(())
     }
 
+    // Test that literal-to-literal comparisons are correctly evaluated.
+    // When both sides are constants, the expression should be evaluated 
directly
+    // and if it's false, all containers should be pruned.
+    #[test]
+    fn row_group_predicate_literal_false() {
+        // lit(1) = lit(2) is always false, so all containers should be pruned
+        let schema = Arc::new(Schema::new(vec![Field::new("c1", 
DataType::Int32, true)]));
+        let statistics = TestStatistics::new()
+            .with("c1", ContainerStats::new_i32(vec![Some(0)], 
vec![Some(10)]));
+        let expected_ret = &[false];
+        prune_with_expr(lit(1).eq(lit(2)), &schema, &statistics, expected_ret);
+    }
+
+    // Test that always-true literal predicates don't prune any containers
+    #[test]
+    fn row_group_predicate_literal_true() {
+        // lit(1) = lit(1) is always true, so no containers should be pruned
+        let schema = Arc::new(Schema::new(vec![Field::new("c1", 
DataType::Int32, true)]));
+        let statistics = TestStatistics::new()
+            .with("c1", ContainerStats::new_i32(vec![Some(0)], 
vec![Some(10)]));
+        let expected_ret = &[true];
+        prune_with_expr(lit(1).eq(lit(1)), &schema, &statistics, expected_ret);
+    }
+
+    // Test nested/complex literal expression trees
+    #[test]
+    fn row_group_predicate_complex_literals() {
+        let schema = Arc::new(Schema::new(vec![Field::new("c1", 
DataType::Int32, true)]));
+        let statistics = TestStatistics::new()
+            .with("c1", ContainerStats::new_i32(vec![Some(0)], 
vec![Some(10)]));
+
+        // (1 + 2) > 0 is always true
+        prune_with_expr((lit(1) + lit(2)).gt(lit(0)), &schema, &statistics, 
&[true]);
+
+        // (1 + 2) < 0 is always false
+        prune_with_expr((lit(1) + lit(2)).lt(lit(0)), &schema, &statistics, 
&[false]);
+
+        // Nested AND of literals: true AND false = false
+        prune_with_expr(lit(true).and(lit(false)), &schema, &statistics, 
&[false]);
+
+        // Nested OR of literals: true OR false = true
+        prune_with_expr(lit(true).or(lit(false)), &schema, &statistics, 
&[true]);
+
+        // Complex nested: (1 < 2) AND (3 > 1) = true AND true = true
+        prune_with_expr(

Review Comment:
   Can you also please add an error test for pruning with a non boolean (e.g. 
`lit(1i32)`) -- and just verify that it errors resonably (rather than gives the 
wrong answer)



##########
datafusion/pruning/src/pruning_predicate.rs:
##########
@@ -2774,6 +2811,66 @@ mod tests {
         Ok(())
     }
 
+    // Test that literal-to-literal comparisons are correctly evaluated.
+    // When both sides are constants, the expression should be evaluated 
directly
+    // and if it's false, all containers should be pruned.
+    #[test]
+    fn row_group_predicate_literal_false() {
+        // lit(1) = lit(2) is always false, so all containers should be pruned
+        let schema = Arc::new(Schema::new(vec![Field::new("c1", 
DataType::Int32, true)]));
+        let statistics = TestStatistics::new()
+            .with("c1", ContainerStats::new_i32(vec![Some(0)], 
vec![Some(10)]));
+        let expected_ret = &[false];
+        prune_with_expr(lit(1).eq(lit(2)), &schema, &statistics, expected_ret);
+    }
+
+    // Test that always-true literal predicates don't prune any containers
+    #[test]
+    fn row_group_predicate_literal_true() {

Review Comment:
   Can we please also add a test for literal (boolean) null?



##########
datafusion/pruning/src/pruning_predicate.rs:
##########
@@ -1529,8 +1537,37 @@ fn build_predicate_expression(
         return expr;
     }
 
-    let expr_builder =
-        PruningExpressionBuilder::try_new(&left, &right, op, schema, 
required_columns);
+    // Collect columns once to avoid redundant traversal
+    let left_columns = collect_columns(&left);
+    let right_columns = collect_columns(&right);
+
+    // Handle literal-to-literal comparisons (no columns on either side)
+    // e.g., lit(1) = lit(2) should evaluate to false and prune all containers
+    if left_columns.is_empty() && right_columns.is_empty() {

Review Comment:
   This seems redundant with the constant folding introduced in  
https://github.com/apache/datafusion/pull/19130
   
   Why do we need both? Maybe we just need to constant fold the expression 
after applying the physical expr adapter 🤔 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] Support literal-only predicates in PruningPredicate [datafusion]

Reply via email to