joroKr21 commented on code in PR #22389:
URL: https://github.com/apache/datafusion/pull/22389#discussion_r3274379979


##########
datafusion/physical-plan/src/projection.rs:
##########
@@ -1014,55 +1011,70 @@ pub fn update_join_filter(
     })
 }
 
-/// Unifies `projection` with its input (which is also a [`ProjectionExec`]).
-fn try_unifying_projections(
-    projection: &ProjectionExec,
-    child: &ProjectionExec,
+/// Collapse a chain of consecutive [`ProjectionExec`]s into one. Returns
+/// `None` if nothing could be merged.
+fn try_collapse_projection_chain(
+    outer: &ProjectionExec,
 ) -> Result<Option<Arc<dyn ExecutionPlan>>> {
-    let mut projected_exprs = vec![];
+    let mut current_exprs: Vec<ProjectionExpr> = outer.expr().to_vec();
+    let mut current_input: Arc<dyn ExecutionPlan> = Arc::clone(outer.input());
     let mut column_ref_map: HashMap<Column, usize> = HashMap::new();
+    let mut collapsed_any = false;
+
+    'outer: while let Some(inner_proj) = 
current_input.downcast_ref::<ProjectionExec>() {
+        // Collect the column references usage in the outer projection.
+        column_ref_map.clear();
+        for proj_expr in &current_exprs {
+            proj_expr.expr.apply(|expr| {
+                if let Some(column) = expr.downcast_ref::<Column>() {
+                    *column_ref_map.entry(column.clone()).or_default() += 1;
+                }
+                Ok(TreeNodeRecursion::Continue)
+            })?;
+        }
+        let inner_exprs = inner_proj.expr();
+        // Merging these projections is not beneficial, e.g
+        // If an expression is not trivial (KeepInPlace) and it is referred 
more than 1, unifies projections will be
+        // beneficial as caching mechanism for non-trivial computations.
+        // See discussion in: https://github.com/apache/datafusion/issues/8296
+        let blocked = column_ref_map.iter().any(|(column, count)| {
+            *count > 1
+                && !inner_exprs[column.index()]
+                    .expr
+                    .placement()
+                    .should_push_to_leaves()
+        });
+        if blocked {
+            break;
+        }
 
-    // Collect the column references usage in the outer projection.
-    projection.expr().iter().for_each(|proj_expr| {
-        proj_expr
-            .expr
-            .apply(|expr| {
-                Ok({
-                    if let Some(column) = expr.downcast_ref::<Column>() {
-                        *column_ref_map.entry(column.clone()).or_default() += 
1;
-                    }
-                    TreeNodeRecursion::Continue
-                })
-            })
-            .unwrap();
-    });
-    // Merging these projections is not beneficial, e.g
-    // If an expression is not trivial (KeepInPlace) and it is referred more 
than 1, unifies projections will be
-    // beneficial as caching mechanism for non-trivial computations.
-    // See discussion in: https://github.com/apache/datafusion/issues/8296
-    if column_ref_map.iter().any(|(column, count)| {
-        *count > 1
-            && !child.expr()[column.index()]
-                .expr
-                .placement()
-                .should_push_to_leaves()
-    }) {
-        return Ok(None);
+        let mut new_exprs = Vec::with_capacity(current_exprs.len());
+        for proj_expr in &current_exprs {
+            // If there is no match in the input projection, we cannot unify 
these
+            // projections. This case will arise if the projection expression 
contains
+            // a `PhysicalExpr` variant `update_expr` doesn't support.
+            let Some(expr) = update_expr(&proj_expr.expr, inner_exprs, true)? 
else {
+                break 'outer;
+            };
+            new_exprs.push(ProjectionExpr {

Review Comment:
   Could we modify `current_exprs` in place instead of pushing to a new vector?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to