AdamGS commented on issue #20078:
URL: https://github.com/apache/datafusion/issues/20078#issuecomment-3834930297

   Including all changes (included at the bottom), the results in that 
benchmark are really good (almost 90% improvement on my machine).
   
   There are now three tests failing:
   - test_nested_expression_simplification
   - test_complex_nested_expression
   - test_unwrap_cast_preserves_non_comparison_operators
   
   ```
   tpc-ds/q76/cs/16        time:   [111.92 µs 112.85 µs 114.16 µs]
                           change: [−88.093% −88.019% −87.946%] (p = 0.00 < 
0.05)
                           Performance has improved.
   Found 2 outliers among 100 measurements (2.00%)
     1 (1.00%) low mild
     1 (1.00%) high severe
   
   tpc-ds/q76/ws/16        time:   [111.10 µs 111.92 µs 112.85 µs]
                           change: [−88.209% −88.136% −88.058%] (p = 0.00 < 
0.05)
                           Performance has improved.
   Found 6 outliers among 100 measurements (6.00%)
     2 (2.00%) low mild
     1 (1.00%) high mild
     3 (3.00%) high severe
   
   tpc-ds/q76/cs/128       time:   [837.58 µs 840.52 µs 843.69 µs]
                           change: [−87.751% −87.699% −87.651%] (p = 0.00 < 
0.05)
                           Performance has improved.
   Found 7 outliers among 100 measurements (7.00%)
     1 (1.00%) low mild
     4 (4.00%) high mild
     2 (2.00%) high severe
   
   tpc-ds/q76/ws/128       time:   [847.83 µs 852.18 µs 856.13 µs]
                           change: [−87.768% −87.713% −87.656%] (p = 0.00 < 
0.05)
                           Performance has improved.
   Found 2 outliers among 100 measurements (2.00%)
     2 (2.00%) high mild
   ```
   
   ```diff
   diff --git a/datafusion/physical-expr/src/simplifier/const_evaluator.rs 
b/datafusion/physical-expr/src/simplifier/const_evaluator.rs
   index 8a2368c40..ac4326285 100644
   --- a/datafusion/physical-expr/src/simplifier/const_evaluator.rs
   +++ b/datafusion/physical-expr/src/simplifier/const_evaluator.rs
   @@ -40,10 +40,13 @@ use crate::expressions::{Column, Literal};
    /// - `(1 + 2) * 3` -> `9` (with bottom-up traversal)
    /// - `'hello' || ' world'` -> `'hello world'`
    pub fn simplify_const_expr(
   -    expr: &Arc<dyn PhysicalExpr>,
   +    expr: Arc<dyn PhysicalExpr>,
    ) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
   -    if !can_evaluate_as_constant(expr) {
   -        return Ok(Transformed::no(Arc::clone(expr)));
   +    if expr.as_any().downcast_ref::<Literal>().is_some()
   +        || expr.as_any().downcast_ref::<Column>().is_some()
   +        || expr.is_volatile_node()
   +    {
   +        return Ok(Transformed::no(expr));
        }
    
        // Create a 1-row dummy batch for evaluation
   @@ -61,13 +64,13 @@ pub fn simplify_const_expr(
            }
            Ok(_) => {
                // Unexpected result - keep original expression
   -            Ok(Transformed::no(Arc::clone(expr)))
   +            Ok(Transformed::no(expr))
            }
            Err(_) => {
                // On error, keep original expression
                // The expression might succeed at runtime due to short-circuit 
evaluation
                // or other runtime conditions
   -            Ok(Transformed::no(Arc::clone(expr)))
   +            Ok(Transformed::no(expr))
            }
        }
    }
   diff --git a/datafusion/physical-expr/src/simplifier/mod.rs 
b/datafusion/physical-expr/src/simplifier/mod.rs
   index 3bd4683c1..44bd8242a 100644
   --- a/datafusion/physical-expr/src/simplifier/mod.rs
   +++ b/datafusion/physical-expr/src/simplifier/mod.rs
   @@ -58,11 +58,11 @@ impl<'a> PhysicalExprSimplifier<'a> {
    
                    // Apply NOT expression simplification first, then unwrap 
cast optimization,
                    // then constant expression evaluation
   -                let rewritten = simplify_not_expr(&node, schema)?
   +                let rewritten = simplify_not_expr(node, schema)?
                        .transform_data(|node| {
                            unwrap_cast::unwrap_cast_in_comparison(node, schema)
                        })?
   -                    .transform_data(|node| 
const_evaluator::simplify_const_expr(&node))?;
   +                    .transform_data(|node| 
const_evaluator::simplify_const_expr(node))?;
    
                    #[cfg(debug_assertions)]
                    assert_eq!(
   diff --git a/datafusion/physical-expr/src/simplifier/not.rs 
b/datafusion/physical-expr/src/simplifier/not.rs
   index 9b65d5cba..ea5467d0a 100644
   --- a/datafusion/physical-expr/src/simplifier/not.rs
   +++ b/datafusion/physical-expr/src/simplifier/not.rs
   @@ -44,13 +44,13 @@ use crate::expressions::{BinaryExpr, InListExpr, 
Literal, NotExpr, in_list, lit}
    /// TreeNodeRewriter, multiple passes will automatically be applied until 
no more
    /// transformations are possible.
    pub fn simplify_not_expr(
   -    expr: &Arc<dyn PhysicalExpr>,
   +    expr: Arc<dyn PhysicalExpr>,
        schema: &Schema,
    ) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
        // Check if this is a NOT expression
        let not_expr = match expr.as_any().downcast_ref::<NotExpr>() {
            Some(not_expr) => not_expr,
   -        None => return Ok(Transformed::no(Arc::clone(expr))),
   +        None => return Ok(Transformed::no(expr)),
        };
    
        let inner_expr = not_expr.arg();
   @@ -120,5 +120,5 @@ pub fn simplify_not_expr(
        }
    
        // If no simplification possible, return the original expression
   -    Ok(Transformed::no(Arc::clone(expr)))
   +    Ok(Transformed::no(expr))
    }
   diff --git a/datafusion/physical-expr/src/simplifier/unwrap_cast.rs 
b/datafusion/physical-expr/src/simplifier/unwrap_cast.rs
   index ae6da9c5e..0752e9e31 100644
   --- a/datafusion/physical-expr/src/simplifier/unwrap_cast.rs
   +++ b/datafusion/physical-expr/src/simplifier/unwrap_cast.rs
   @@ -49,14 +49,12 @@ pub(crate) fn unwrap_cast_in_comparison(
        expr: Arc<dyn PhysicalExpr>,
        schema: &Schema,
    ) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
   -    expr.transform_down(|e| {
   -        if let Some(binary) = e.as_any().downcast_ref::<BinaryExpr>()
   -            && let Some(unwrapped) = try_unwrap_cast_binary(binary, schema)?
   -        {
   -            return Ok(Transformed::yes(unwrapped));
   -        }
   -        Ok(Transformed::no(e))
   -    })
   +    if let Some(binary) = expr.as_any().downcast_ref::<BinaryExpr>()
   +        && let Some(unwrapped) = try_unwrap_cast_binary(binary, schema)?
   +    {
   +        return Ok(Transformed::yes(unwrapped));
   +    }
   +    Ok(Transformed::no(expr))
    }
    
    /// Try to unwrap casts in binary expressions
   
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to