AdamGS commented on issue #20078:
URL: https://github.com/apache/datafusion/issues/20078#issuecomment-3834930297
Including all changes (included at the bottom), the results in that
benchmark are really good (almost 90% improvement on my machine).
There are now three tests failing:
- test_nested_expression_simplification
- test_complex_nested_expression
- test_unwrap_cast_preserves_non_comparison_operators
```
tpc-ds/q76/cs/16 time: [111.92 µs 112.85 µs 114.16 µs]
change: [−88.093% −88.019% −87.946%] (p = 0.00 <
0.05)
Performance has improved.
Found 2 outliers among 100 measurements (2.00%)
1 (1.00%) low mild
1 (1.00%) high severe
tpc-ds/q76/ws/16 time: [111.10 µs 111.92 µs 112.85 µs]
change: [−88.209% −88.136% −88.058%] (p = 0.00 <
0.05)
Performance has improved.
Found 6 outliers among 100 measurements (6.00%)
2 (2.00%) low mild
1 (1.00%) high mild
3 (3.00%) high severe
tpc-ds/q76/cs/128 time: [837.58 µs 840.52 µs 843.69 µs]
change: [−87.751% −87.699% −87.651%] (p = 0.00 <
0.05)
Performance has improved.
Found 7 outliers among 100 measurements (7.00%)
1 (1.00%) low mild
4 (4.00%) high mild
2 (2.00%) high severe
tpc-ds/q76/ws/128 time: [847.83 µs 852.18 µs 856.13 µs]
change: [−87.768% −87.713% −87.656%] (p = 0.00 <
0.05)
Performance has improved.
Found 2 outliers among 100 measurements (2.00%)
2 (2.00%) high mild
```
```diff
diff --git a/datafusion/physical-expr/src/simplifier/const_evaluator.rs
b/datafusion/physical-expr/src/simplifier/const_evaluator.rs
index 8a2368c40..ac4326285 100644
--- a/datafusion/physical-expr/src/simplifier/const_evaluator.rs
+++ b/datafusion/physical-expr/src/simplifier/const_evaluator.rs
@@ -40,10 +40,13 @@ use crate::expressions::{Column, Literal};
/// - `(1 + 2) * 3` -> `9` (with bottom-up traversal)
/// - `'hello' || ' world'` -> `'hello world'`
pub fn simplify_const_expr(
- expr: &Arc<dyn PhysicalExpr>,
+ expr: Arc<dyn PhysicalExpr>,
) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
- if !can_evaluate_as_constant(expr) {
- return Ok(Transformed::no(Arc::clone(expr)));
+ if expr.as_any().downcast_ref::<Literal>().is_some()
+ || expr.as_any().downcast_ref::<Column>().is_some()
+ || expr.is_volatile_node()
+ {
+ return Ok(Transformed::no(expr));
}
// Create a 1-row dummy batch for evaluation
@@ -61,13 +64,13 @@ pub fn simplify_const_expr(
}
Ok(_) => {
// Unexpected result - keep original expression
- Ok(Transformed::no(Arc::clone(expr)))
+ Ok(Transformed::no(expr))
}
Err(_) => {
// On error, keep original expression
// The expression might succeed at runtime due to short-circuit
evaluation
// or other runtime conditions
- Ok(Transformed::no(Arc::clone(expr)))
+ Ok(Transformed::no(expr))
}
}
}
diff --git a/datafusion/physical-expr/src/simplifier/mod.rs
b/datafusion/physical-expr/src/simplifier/mod.rs
index 3bd4683c1..44bd8242a 100644
--- a/datafusion/physical-expr/src/simplifier/mod.rs
+++ b/datafusion/physical-expr/src/simplifier/mod.rs
@@ -58,11 +58,11 @@ impl<'a> PhysicalExprSimplifier<'a> {
// Apply NOT expression simplification first, then unwrap
cast optimization,
// then constant expression evaluation
- let rewritten = simplify_not_expr(&node, schema)?
+ let rewritten = simplify_not_expr(node, schema)?
.transform_data(|node| {
unwrap_cast::unwrap_cast_in_comparison(node, schema)
})?
- .transform_data(|node|
const_evaluator::simplify_const_expr(&node))?;
+ .transform_data(|node|
const_evaluator::simplify_const_expr(node))?;
#[cfg(debug_assertions)]
assert_eq!(
diff --git a/datafusion/physical-expr/src/simplifier/not.rs
b/datafusion/physical-expr/src/simplifier/not.rs
index 9b65d5cba..ea5467d0a 100644
--- a/datafusion/physical-expr/src/simplifier/not.rs
+++ b/datafusion/physical-expr/src/simplifier/not.rs
@@ -44,13 +44,13 @@ use crate::expressions::{BinaryExpr, InListExpr,
Literal, NotExpr, in_list, lit}
/// TreeNodeRewriter, multiple passes will automatically be applied until
no more
/// transformations are possible.
pub fn simplify_not_expr(
- expr: &Arc<dyn PhysicalExpr>,
+ expr: Arc<dyn PhysicalExpr>,
schema: &Schema,
) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
// Check if this is a NOT expression
let not_expr = match expr.as_any().downcast_ref::<NotExpr>() {
Some(not_expr) => not_expr,
- None => return Ok(Transformed::no(Arc::clone(expr))),
+ None => return Ok(Transformed::no(expr)),
};
let inner_expr = not_expr.arg();
@@ -120,5 +120,5 @@ pub fn simplify_not_expr(
}
// If no simplification possible, return the original expression
- Ok(Transformed::no(Arc::clone(expr)))
+ Ok(Transformed::no(expr))
}
diff --git a/datafusion/physical-expr/src/simplifier/unwrap_cast.rs
b/datafusion/physical-expr/src/simplifier/unwrap_cast.rs
index ae6da9c5e..0752e9e31 100644
--- a/datafusion/physical-expr/src/simplifier/unwrap_cast.rs
+++ b/datafusion/physical-expr/src/simplifier/unwrap_cast.rs
@@ -49,14 +49,12 @@ pub(crate) fn unwrap_cast_in_comparison(
expr: Arc<dyn PhysicalExpr>,
schema: &Schema,
) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
- expr.transform_down(|e| {
- if let Some(binary) = e.as_any().downcast_ref::<BinaryExpr>()
- && let Some(unwrapped) = try_unwrap_cast_binary(binary, schema)?
- {
- return Ok(Transformed::yes(unwrapped));
- }
- Ok(Transformed::no(e))
- })
+ if let Some(binary) = expr.as_any().downcast_ref::<BinaryExpr>()
+ && let Some(unwrapped) = try_unwrap_cast_binary(binary, schema)?
+ {
+ return Ok(Transformed::yes(unwrapped));
+ }
+ Ok(Transformed::no(expr))
}
/// Try to unwrap casts in binary expressions
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]