adriangb commented on code in PR #17337:
URL: https://github.com/apache/datafusion/pull/17337#discussion_r2429380969


##########
datafusion/optimizer/src/push_down_filter.rs:
##########
@@ -1388,24 +1375,6 @@ impl PushDownFilter {
     }
 }
 
-/// replaces columns by its name on the projection.
-pub fn replace_cols_by_name(

Review Comment:
   Moved to `utils.rs` so we can re-use in sort pushdown



##########
datafusion/optimizer/src/push_down_filter.rs:
##########
@@ -836,17 +837,10 @@ impl OptimizerRule for PushDownFilter {
                 insert_below(LogicalPlan::Sort(sort), new_filter)
             }
             LogicalPlan::SubqueryAlias(subquery_alias) => {
-                let mut replace_map = HashMap::new();
-                for (i, (qualifier, field)) in
-                    subquery_alias.input.schema().iter().enumerate()
-                {
-                    let (sub_qualifier, sub_field) =
-                        subquery_alias.schema.qualified_field(i);
-                    replace_map.insert(
-                        qualified_name(sub_qualifier, sub_field.name()),
-                        Expr::Column(Column::new(qualifier.cloned(), 
field.name())),
-                    );
-                }

Review Comment:
   Unified into helper function in `utils.rs`



##########
datafusion/optimizer/src/utils.rs:
##########
@@ -151,6 +154,73 @@ fn coerce(expr: Expr, schema: &DFSchema) -> Result<Expr> {
     expr.rewrite(&mut expr_rewrite).data()
 }
 
+/// Replaces columns by their name in the provided expression.
+///
+/// Replaces all column references in the expression tree by looking up their 
names
+/// in the provided HashMap and replacing them with the corresponding 
expression.
+///
+/// # Arguments
+///
+/// * `e` - The expression to transform
+/// * `replace_map` - A map from column names (flat_name) to replacement 
expressions
+///
+/// # Returns
+///
+/// The transformed expression with columns replaced according to the map
+pub(crate) fn replace_cols_by_name(
+    e: Expr,
+    replace_map: &HashMap<String, Expr>,
+) -> Result<Expr> {
+    e.transform_up(|expr| {
+        Ok(if let Expr::Column(c) = &expr {
+            match replace_map.get(&c.flat_name()) {
+                Some(new_c) => Transformed::yes(new_c.clone()),
+                None => Transformed::no(expr),
+            }
+        } else {
+            Transformed::no(expr)
+        })
+    })
+    .data()
+}
+
+/// Builds a replace map for rewriting column qualifiers from an output schema 
to an input schema.
+///
+/// This function creates a mapping from qualified column names in the output 
schema
+/// to their corresponding Column expressions in the input schema. This is 
useful when
+/// pushing expressions through operators like SubqueryAlias or Union that 
change
+/// column qualifiers.
+///
+/// # Arguments
+///
+/// * `output_schema` - The schema with output qualifiers (e.g., alias or 
union name)
+/// * `input_schema` - The schema with input qualifiers (e.g., underlying 
table name)
+///
+/// # Returns
+///
+/// A HashMap mapping qualified output column names to input Column expressions
+///
+/// # Example
+///
+/// For a SubqueryAlias "subquery" over table "test":
+/// - Input: "test.a" (from input_schema)
+/// - Output: "subquery.a" (from output_schema)
+/// - Map: {"subquery.a" -> Column("test", "a")}
+pub(crate) fn build_schema_remapping(

Review Comment:
   Avoiding a public symbol until someone asks for this



##########
datafusion/sqllogictest/test_files/spark/aggregate/avg.slt:
##########
@@ -53,4 +53,4 @@ FROM (VALUES (0::INT), (0::INT)) AS t(a)
 GROUP BY a
 ORDER BY a;
 ----
-0 0

Review Comment:
   Ugh I'm going to have to go through these one by one, I just ran the update 
on all of them but it seems that's too eager



##########
datafusion/expr/src/logical_plan/tree_node.rs:
##########
@@ -868,3 +870,99 @@ impl LogicalPlan {
         })
     }
 }
+
+/// A node context object beneficial for writing optimizer rules.
+/// This context encapsulates a [`LogicalPlan`] node with a payload.
+///
+/// Since each wrapped node has its children within both the 
[`LogicalPlanContext.plan.inputs()`],
+/// as well as separately within the [`LogicalPlanContext.children`] (which 
are child nodes wrapped in the context),
+/// it's important to keep these child plans in sync when performing mutations.
+///
+/// Since there are two ways to access child plans directly — it's recommended
+/// to perform mutable operations via [`Self::update_plan_from_children`].
+/// After mutating the `LogicalPlanContext.children`, or after creating the 
`LogicalPlanContext`,
+/// call `update_plan_from_children` to sync.
+#[derive(Debug, Clone)]

Review Comment:
   Happy to break this out into it's own PR



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to