alamb commented on code in PR #19129:
URL: https://github.com/apache/datafusion/pull/19129#discussion_r2598652472
##########
datafusion/pruning/src/pruning_predicate.rs:
##########
@@ -2774,6 +2811,66 @@ mod tests {
Ok(())
}
+ // Test that literal-to-literal comparisons are correctly evaluated.
+ // When both sides are constants, the expression should be evaluated
directly
+ // and if it's false, all containers should be pruned.
+ #[test]
+ fn row_group_predicate_literal_false() {
+ // lit(1) = lit(2) is always false, so all containers should be pruned
+ let schema = Arc::new(Schema::new(vec![Field::new("c1",
DataType::Int32, true)]));
+ let statistics = TestStatistics::new()
+ .with("c1", ContainerStats::new_i32(vec![Some(0)],
vec![Some(10)]));
+ let expected_ret = &[false];
+ prune_with_expr(lit(1).eq(lit(2)), &schema, &statistics, expected_ret);
+ }
+
+ // Test that always-true literal predicates don't prune any containers
+ #[test]
+ fn row_group_predicate_literal_true() {
+ // lit(1) = lit(1) is always true, so no containers should be pruned
+ let schema = Arc::new(Schema::new(vec![Field::new("c1",
DataType::Int32, true)]));
+ let statistics = TestStatistics::new()
+ .with("c1", ContainerStats::new_i32(vec![Some(0)],
vec![Some(10)]));
+ let expected_ret = &[true];
+ prune_with_expr(lit(1).eq(lit(1)), &schema, &statistics, expected_ret);
+ }
+
+ // Test nested/complex literal expression trees
+ #[test]
+ fn row_group_predicate_complex_literals() {
+ let schema = Arc::new(Schema::new(vec![Field::new("c1",
DataType::Int32, true)]));
+ let statistics = TestStatistics::new()
+ .with("c1", ContainerStats::new_i32(vec![Some(0)],
vec![Some(10)]));
+
+ // (1 + 2) > 0 is always true
+ prune_with_expr((lit(1) + lit(2)).gt(lit(0)), &schema, &statistics,
&[true]);
+
+ // (1 + 2) < 0 is always false
+ prune_with_expr((lit(1) + lit(2)).lt(lit(0)), &schema, &statistics,
&[false]);
+
+ // Nested AND of literals: true AND false = false
+ prune_with_expr(lit(true).and(lit(false)), &schema, &statistics,
&[false]);
+
+ // Nested OR of literals: true OR false = true
+ prune_with_expr(lit(true).or(lit(false)), &schema, &statistics,
&[true]);
+
+ // Complex nested: (1 < 2) AND (3 > 1) = true AND true = true
+ prune_with_expr(
Review Comment:
Can you also please add an error test for pruning with a non boolean (e.g.
`lit(1i32)`) -- and just verify that it errors resonably (rather than gives the
wrong answer)
##########
datafusion/pruning/src/pruning_predicate.rs:
##########
@@ -2774,6 +2811,66 @@ mod tests {
Ok(())
}
+ // Test that literal-to-literal comparisons are correctly evaluated.
+ // When both sides are constants, the expression should be evaluated
directly
+ // and if it's false, all containers should be pruned.
+ #[test]
+ fn row_group_predicate_literal_false() {
+ // lit(1) = lit(2) is always false, so all containers should be pruned
+ let schema = Arc::new(Schema::new(vec![Field::new("c1",
DataType::Int32, true)]));
+ let statistics = TestStatistics::new()
+ .with("c1", ContainerStats::new_i32(vec![Some(0)],
vec![Some(10)]));
+ let expected_ret = &[false];
+ prune_with_expr(lit(1).eq(lit(2)), &schema, &statistics, expected_ret);
+ }
+
+ // Test that always-true literal predicates don't prune any containers
+ #[test]
+ fn row_group_predicate_literal_true() {
Review Comment:
Can we please also add a test for literal (boolean) null?
##########
datafusion/pruning/src/pruning_predicate.rs:
##########
@@ -1529,8 +1537,37 @@ fn build_predicate_expression(
return expr;
}
- let expr_builder =
- PruningExpressionBuilder::try_new(&left, &right, op, schema,
required_columns);
+ // Collect columns once to avoid redundant traversal
+ let left_columns = collect_columns(&left);
+ let right_columns = collect_columns(&right);
+
+ // Handle literal-to-literal comparisons (no columns on either side)
+ // e.g., lit(1) = lit(2) should evaluate to false and prune all containers
+ if left_columns.is_empty() && right_columns.is_empty() {
Review Comment:
This seems redundant with the constant folding introduced in
https://github.com/apache/datafusion/pull/19130
Why do we need both? Maybe we just need to constant fold the expression
after applying the physical expr adapter 🤔
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]