Dandandan commented on a change in pull request #8836:
URL: https://github.com/apache/arrow/pull/8836#discussion_r536397617



##########
File path: rust/datafusion/src/sql/utils.rs
##########
@@ -0,0 +1,305 @@
+use crate::error::{DataFusionError, Result};
+use crate::logical_plan::{Expr, LogicalPlan};
+use arrow::datatypes::Schema;
+
+/// Resolves an `Expr::Wildcard` to a collection of `Expr::Column`'s.
+pub(crate) fn expand_wildcard(expr: &Expr, schema: &Schema) -> Vec<Expr> {
+    match expr {
+        Expr::Wildcard => schema
+            .fields()
+            .iter()
+            .map(|f| Expr::Column(f.name().to_string()))
+            .collect::<Vec<Expr>>(),
+        _ => vec![expr.clone()],
+    }
+}
+
+/// Collect all deeply nested `Expr::AggregateFunction` and
+/// `Expr::AggregateUDF`. They are returned in order of occurrence (depth
+/// first), with duplicates omitted.
+pub(crate) fn find_aggregate_exprs(exprs: &Vec<Expr>) -> Vec<Expr> {
+    find_exprs_in_exprs(exprs, &|nested_expr| match nested_expr {
+        Expr::AggregateFunction { .. } | Expr::AggregateUDF { .. } => true,
+        _ => false,
+    })
+}
+
+/// Collect all deeply nested `Expr::Column`'s. They are returned in order of
+/// appearance (depth first), with duplicates omitted.
+pub(crate) fn find_column_exprs(exprs: &Vec<Expr>) -> Vec<Expr> {
+    find_exprs_in_exprs(exprs, &|nested_expr| match nested_expr {
+        Expr::Column(_) => true,
+        _ => false,
+    })
+}
+
+/// Search the provided `Expr`'s, and all of their nested `Expr`, for any that
+/// pass the provided test. The returned `Expr`'s are deduplicated and returned
+/// in order of appearance (depth first).
+fn find_exprs_in_exprs<F>(exprs: &Vec<Expr>, test_fn: &F) -> Vec<Expr>
+where
+    F: Fn(&Expr) -> bool,
+{
+    exprs
+        .iter()
+        .flat_map(|expr| find_exprs_in_expr(expr, test_fn))
+        .fold(vec![], |mut acc, expr| {
+            if !acc.contains(&expr) {
+                acc.push(expr)
+            }
+            acc
+        })
+}
+
+/// Search an `Expr`, and all of its nested `Expr`'s, for any that pass the
+/// provided test. The returned `Expr`'s are deduplicated and returned in order
+/// of appearance (depth first).
+fn find_exprs_in_expr<F>(expr: &Expr, test_fn: &F) -> Vec<Expr>
+where
+    F: Fn(&Expr) -> bool,
+{
+    let matched_exprs = if test_fn(expr) {
+        vec![expr.clone()]
+    } else {
+        match expr {
+            Expr::AggregateFunction { args, .. } => find_exprs_in_exprs(&args, 
test_fn),
+            Expr::AggregateUDF { args, .. } => find_exprs_in_exprs(&args, 
test_fn),
+            Expr::Alias(nested_expr, _) => {
+                find_exprs_in_expr(nested_expr.as_ref(), test_fn)
+            }
+            Expr::BinaryExpr { left, right, .. } => {
+                let mut matches = vec![];
+                matches.extend(find_exprs_in_expr(left.as_ref(), test_fn));
+                matches.extend(find_exprs_in_expr(right.as_ref(), test_fn));
+                matches
+            }
+            Expr::Case {
+                expr: case_expr_opt,
+                when_then_expr,
+                else_expr: else_expr_opt,
+            } => {
+                let mut matches = vec![];
+
+                if let Some(case_expr) = case_expr_opt {
+                    matches.extend(find_exprs_in_expr(case_expr.as_ref(), 
test_fn));
+                }
+
+                matches.extend(
+                    when_then_expr
+                        .iter()
+                        .flat_map(|(a, b)| vec![a, b])
+                        .flat_map(|expr| find_exprs_in_expr(expr.as_ref(), 
test_fn))
+                        .collect::<Vec<Expr>>(),
+                );
+
+                if let Some(else_expr) = else_expr_opt {
+                    matches.extend(find_exprs_in_expr(else_expr.as_ref(), 
test_fn));
+                }
+
+                matches
+            }
+            Expr::Cast {
+                expr: nested_expr, ..
+            } => find_exprs_in_expr(nested_expr.as_ref(), test_fn),
+            Expr::IsNotNull(nested_expr) => {
+                find_exprs_in_expr(nested_expr.as_ref(), test_fn)
+            }
+            Expr::IsNull(nested_expr) => {
+                find_exprs_in_expr(nested_expr.as_ref(), test_fn)
+            }
+            Expr::Not(nested_expr) => find_exprs_in_expr(nested_expr.as_ref(), 
test_fn),
+            Expr::ScalarFunction { args, .. } => find_exprs_in_exprs(&args, 
test_fn),
+            Expr::ScalarUDF { args, .. } => find_exprs_in_exprs(&args, 
test_fn),
+            Expr::Sort {
+                expr: nested_expr, ..
+            } => find_exprs_in_expr(nested_expr.as_ref(), test_fn),
+
+            // These expressions don't nest other expressions.
+            Expr::Column(_)
+            | Expr::Literal(_)
+            | Expr::ScalarVariable(_)
+            | Expr::Wildcard => vec![],
+        }
+    };
+
+    matched_exprs.into_iter().fold(vec![], |mut acc, expr| {
+        if !acc.contains(&expr) {

Review comment:
       Acc might be better as hashset?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to