This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/master by this push:
new 71ba457b0 MINOR: Rewrite imports in optimizer moduler (#2667)
71ba457b0 is described below
commit 71ba457b0121c099562ee2dc06f5f6889b140f7b
Author: Andy Grove <[email protected]>
AuthorDate: Wed Jun 1 13:49:24 2022 -0600
MINOR: Rewrite imports in optimizer moduler (#2667)
* initial refactor
* fix regression
* improve
* rewrite imports for common_subexpr_eliminate.rs
* rewrite imports for common_subexpr_eliminate.rs
* rewrite imports for eliminate_filter.rs
* rewrite imports for eliminate_limit.rs
* rewrite imports for filter_push_down.rs
* rewrite imports for limit_push_down.rs
* rewrite imports for optimizer.rs
* rewrite imports for projection_push_down.rs
* rewrite imports for simplify_expressions.rs
* rewrite imports for single_distinct_to_groupby.rs
* rewrite imports for subquery_filter_to_join.rs
* rewrite imports for utils.rs
* clippy
---
.../core/src/optimizer/common_subexpr_eliminate.rs | 28 +++++++-------
datafusion/core/src/optimizer/eliminate_filter.rs | 19 ++++-----
datafusion/core/src/optimizer/eliminate_limit.rs | 15 ++++----
datafusion/core/src/optimizer/filter_push_down.rs | 14 +++----
datafusion/core/src/optimizer/limit_push_down.rs | 27 +++++++------
datafusion/core/src/optimizer/optimizer.rs | 8 ++--
.../core/src/optimizer/projection_push_down.rs | 37 ++++++++++--------
.../core/src/optimizer/simplify_expressions.rs | 45 +++++++++++-----------
.../src/optimizer/single_distinct_to_groupby.rs | 19 +++++----
.../core/src/optimizer/subquery_filter_to_join.rs | 28 +++++++-------
datafusion/core/src/optimizer/utils.rs | 23 ++++++-----
11 files changed, 131 insertions(+), 132 deletions(-)
diff --git a/datafusion/core/src/optimizer/common_subexpr_eliminate.rs
b/datafusion/core/src/optimizer/common_subexpr_eliminate.rs
index fa99db2e9..916e99713 100644
--- a/datafusion/core/src/optimizer/common_subexpr_eliminate.rs
+++ b/datafusion/core/src/optimizer/common_subexpr_eliminate.rs
@@ -17,19 +17,18 @@
//! Eliminate common sub-expression.
-use crate::error::Result;
-use crate::logical_plan::plan::{Filter, Projection, Window};
-use crate::logical_plan::{
+use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
+use arrow::datatypes::DataType;
+use datafusion_common::{DFField, DFSchema, Result};
+use datafusion_expr::{
col,
- plan::{Aggregate, Sort},
- DFField, DFSchema, Expr, ExprRewritable, ExprRewriter, ExprSchemable,
ExprVisitable,
- ExpressionVisitor, LogicalPlan, Recursion, RewriteRecursion,
+ expr::GroupingSet,
+ expr_rewriter::{ExprRewritable, ExprRewriter, RewriteRecursion},
+ expr_visitor::{ExprVisitable, ExpressionVisitor, Recursion},
+ logical_plan::{Aggregate, Filter, LogicalPlan, Projection, Sort, Window},
+ utils::from_plan,
+ Expr, ExprSchemable,
};
-use crate::optimizer::optimizer::OptimizerConfig;
-use crate::optimizer::optimizer::OptimizerRule;
-use arrow::datatypes::DataType;
-use datafusion_expr::expr::GroupingSet;
-use datafusion_expr::utils::from_plan;
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
@@ -696,10 +695,11 @@ fn replace_common_expr(
#[cfg(test)]
mod test {
use super::*;
- use crate::logical_plan::{
- avg, binary_expr, col, lit, sum, LogicalPlanBuilder, Operator,
- };
use crate::test::*;
+ use datafusion_expr::{
+ avg, binary_expr, col, lit, logical_plan::builder::LogicalPlanBuilder,
sum,
+ Operator,
+ };
use std::iter;
fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) {
diff --git a/datafusion/core/src/optimizer/eliminate_filter.rs
b/datafusion/core/src/optimizer/eliminate_filter.rs
index a3c3e0341..4bbc2c401 100644
--- a/datafusion/core/src/optimizer/eliminate_filter.rs
+++ b/datafusion/core/src/optimizer/eliminate_filter.rs
@@ -18,16 +18,14 @@
//! Optimizer rule to replace `where false` on a plan with an empty relation.
//! This saves time in planning and executing the query.
//! Note that this rule should be applied after simplify expressions optimizer
rule.
-use datafusion_common::ScalarValue;
-use datafusion_expr::utils::from_plan;
-use datafusion_expr::Expr;
+use datafusion_common::{Result, ScalarValue};
+use datafusion_expr::{
+ logical_plan::{EmptyRelation, Filter, LogicalPlan},
+ utils::from_plan,
+ Expr,
+};
-use crate::error::Result;
-use crate::logical_plan::plan::Filter;
-use crate::logical_plan::{EmptyRelation, LogicalPlan};
-use crate::optimizer::optimizer::OptimizerRule;
-
-use crate::optimizer::optimizer::OptimizerConfig;
+use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
/// Optimization rule that elimanate the scalar value (true/false) filter with
an [LogicalPlan::EmptyRelation]
#[derive(Default)]
@@ -81,9 +79,8 @@ impl OptimizerRule for EliminateFilter {
#[cfg(test)]
mod tests {
use super::*;
- use crate::logical_plan::LogicalPlanBuilder;
- use crate::logical_plan::{col, sum};
use crate::test::*;
+ use datafusion_expr::{col, logical_plan::builder::LogicalPlanBuilder, sum};
fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) {
let rule = EliminateFilter::new();
diff --git a/datafusion/core/src/optimizer/eliminate_limit.rs
b/datafusion/core/src/optimizer/eliminate_limit.rs
index b07f6f224..27e5fab17 100644
--- a/datafusion/core/src/optimizer/eliminate_limit.rs
+++ b/datafusion/core/src/optimizer/eliminate_limit.rs
@@ -17,12 +17,12 @@
//! Optimizer rule to replace `LIMIT 0` on a plan with an empty relation.
//! This saves time in planning and executing the query.
-use crate::error::Result;
-use crate::logical_plan::{EmptyRelation, Limit, LogicalPlan};
-use crate::optimizer::optimizer::OptimizerRule;
-use datafusion_expr::utils::from_plan;
-
-use crate::optimizer::optimizer::OptimizerConfig;
+use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
+use datafusion_common::Result;
+use datafusion_expr::{
+ logical_plan::{EmptyRelation, Limit, LogicalPlan},
+ utils::from_plan,
+};
/// Optimization rule that replaces LIMIT 0 with an
[LogicalPlan::EmptyRelation]
#[derive(Default)]
@@ -72,9 +72,8 @@ impl OptimizerRule for EliminateLimit {
#[cfg(test)]
mod tests {
use super::*;
- use crate::logical_plan::LogicalPlanBuilder;
- use crate::logical_plan::{col, sum};
use crate::test::*;
+ use datafusion_expr::{col, logical_plan::builder::LogicalPlanBuilder, sum};
fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) {
let rule = EliminateLimit::new();
diff --git a/datafusion/core/src/optimizer/filter_push_down.rs
b/datafusion/core/src/optimizer/filter_push_down.rs
index a07155c2f..9641c9ae5 100644
--- a/datafusion/core/src/optimizer/filter_push_down.rs
+++ b/datafusion/core/src/optimizer/filter_push_down.rs
@@ -14,8 +14,10 @@
//! Filter Push Down optimizer rule ensures that filters are applied as early
as possible in the plan
-use crate::optimizer::optimizer::OptimizerConfig;
-use crate::optimizer::{optimizer::OptimizerRule, utils};
+use crate::optimizer::{
+ optimizer::{OptimizerConfig, OptimizerRule},
+ utils,
+};
use datafusion_common::{Column, DFSchema, Result};
use datafusion_expr::{
col,
@@ -560,19 +562,17 @@ fn rewrite(expr: &Expr, projection: &HashMap<String,
Expr>) -> Result<Expr> {
#[cfg(test)]
mod tests {
- use std::sync::Arc;
-
use super::*;
use crate::test::*;
+ use arrow::datatypes::SchemaRef;
+ use async_trait::async_trait;
use datafusion_common::DFSchema;
use datafusion_expr::{
and, col, lit,
logical_plan::{builder::union_with_alias, JoinType},
sum, Expr, LogicalPlanBuilder, Operator, TableSource, TableType,
};
-
- use arrow::datatypes::SchemaRef;
- use async_trait::async_trait;
+ use std::sync::Arc;
fn optimize_plan(plan: &LogicalPlan) -> LogicalPlan {
let rule = FilterPushDown::new();
diff --git a/datafusion/core/src/optimizer/limit_push_down.rs
b/datafusion/core/src/optimizer/limit_push_down.rs
index 19578b2bc..990182c38 100644
--- a/datafusion/core/src/optimizer/limit_push_down.rs
+++ b/datafusion/core/src/optimizer/limit_push_down.rs
@@ -17,15 +17,14 @@
//! Optimizer rule to push down LIMIT in the query plan
//! It will push down through projection, limits (taking the smaller limit)
-use crate::error::Result;
-use crate::logical_plan::plan::Projection;
-use crate::logical_plan::{Limit, TableScan};
-use crate::logical_plan::{LogicalPlan, Union};
-use crate::optimizer::optimizer::OptimizerConfig;
-use crate::optimizer::optimizer::OptimizerRule;
-use datafusion_common::DataFusionError;
-use datafusion_expr::logical_plan::{Join, JoinType, Offset};
-use datafusion_expr::utils::from_plan;
+use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
+use datafusion_common::{DataFusionError, Result};
+use datafusion_expr::{
+ logical_plan::{
+ Join, JoinType, Limit, LogicalPlan, Offset, Projection, TableScan,
Union,
+ },
+ utils::from_plan,
+};
use std::sync::Arc;
/// Optimization rule that tries pushes down LIMIT n
@@ -270,12 +269,12 @@ impl OptimizerRule for LimitPushDown {
#[cfg(test)]
mod test {
use super::*;
- use crate::{
- logical_plan::{col, max, LogicalPlan, LogicalPlanBuilder},
- test::*,
+ use crate::test::*;
+ use datafusion_expr::{
+ col, exists,
+ logical_plan::{builder::LogicalPlanBuilder, JoinType, LogicalPlan},
+ max,
};
- use datafusion_expr::exists;
- use datafusion_expr::logical_plan::JoinType;
fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) {
let rule = LimitPushDown::new();
diff --git a/datafusion/core/src/optimizer/optimizer.rs
b/datafusion/core/src/optimizer/optimizer.rs
index 8b602f423..67f0cf8b6 100644
--- a/datafusion/core/src/optimizer/optimizer.rs
+++ b/datafusion/core/src/optimizer/optimizer.rs
@@ -18,12 +18,10 @@
//! Query optimizer traits
use chrono::{DateTime, Utc};
-use std::sync::Arc;
-
+use datafusion_common::Result;
+use datafusion_expr::logical_plan::LogicalPlan;
use log::{debug, trace};
-
-use crate::error::Result;
-use crate::logical_plan::LogicalPlan;
+use std::sync::Arc;
/// `OptimizerRule` transforms one ['LogicalPlan'] into another which
/// computes the same results, but in a potentially more efficient
diff --git a/datafusion/core/src/optimizer/projection_push_down.rs
b/datafusion/core/src/optimizer/projection_push_down.rs
index 977a34b53..b99b81f52 100644
--- a/datafusion/core/src/optimizer/projection_push_down.rs
+++ b/datafusion/core/src/optimizer/projection_push_down.rs
@@ -18,22 +18,21 @@
//! Projection Push Down optimizer rule ensures that only referenced columns
are
//! loaded into memory
-use crate::error::{DataFusionError, Result};
-use crate::logical_plan::plan::{
- Aggregate, Analyze, Join, Projection, SubqueryAlias, TableScan, Window,
-};
-use crate::logical_plan::{
- build_join_schema, Column, DFField, DFSchema, DFSchemaRef, LogicalPlan,
- LogicalPlanBuilder, ToDFSchema, Union,
-};
-use crate::optimizer::optimizer::OptimizerConfig;
-use crate::optimizer::optimizer::OptimizerRule;
+use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
use arrow::datatypes::{Field, Schema};
use arrow::error::Result as ArrowResult;
-use datafusion_expr::utils::{
- expr_to_columns, exprlist_to_columns, find_sort_exprs, from_plan,
+use datafusion_common::{
+ Column, DFField, DFSchema, DFSchemaRef, DataFusionError, Result,
ToDFSchema,
+};
+use datafusion_expr::{
+ logical_plan::{
+ builder::{build_join_schema, LogicalPlanBuilder},
+ Aggregate, Analyze, Join, LogicalPlan, Projection, SubqueryAlias,
TableScan,
+ Union, Window,
+ },
+ utils::{expr_to_columns, exprlist_to_columns, find_sort_exprs, from_plan},
+ Expr,
};
-use datafusion_expr::Expr;
use std::{
collections::{BTreeSet, HashSet},
sync::Arc,
@@ -529,14 +528,18 @@ fn optimize_plan(
#[cfg(test)]
mod tests {
- use std::collections::HashMap;
-
use super::*;
- use crate::logical_plan::{col, lit, max, min, Expr, JoinType,
LogicalPlanBuilder};
use crate::test::*;
use crate::test_util::scan_empty;
use arrow::datatypes::DataType;
- use datafusion_expr::utils::exprlist_to_fields;
+ use datafusion_expr::{
+ col, lit,
+ logical_plan::{builder::LogicalPlanBuilder, JoinType},
+ max, min,
+ utils::exprlist_to_fields,
+ Expr,
+ };
+ use std::collections::HashMap;
#[test]
fn aggregate_no_group_by() -> Result<()> {
diff --git a/datafusion/core/src/optimizer/simplify_expressions.rs
b/datafusion/core/src/optimizer/simplify_expressions.rs
index 49a5a1295..b85377df3 100644
--- a/datafusion/core/src/optimizer/simplify_expressions.rs
+++ b/datafusion/core/src/optimizer/simplify_expressions.rs
@@ -17,23 +17,22 @@
//! Simplify expressions optimizer rule
-use crate::error::DataFusionError;
use crate::execution::context::ExecutionProps;
-use crate::logical_plan::ExprSchemable;
-use crate::logical_plan::{
- lit, DFSchema, DFSchemaRef, Expr, ExprRewritable, ExprRewriter,
ExprSimplifiable,
- LogicalPlan, RewriteRecursion, SimplifyInfo,
-};
-use crate::optimizer::optimizer::OptimizerConfig;
-use crate::optimizer::optimizer::OptimizerRule;
+use crate::logical_plan::{ExprSimplifiable, SimplifyInfo};
+use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
use crate::physical_plan::planner::create_physical_expr;
-use crate::scalar::ScalarValue;
-use crate::{error::Result, logical_plan::Operator};
use arrow::array::new_null_array;
use arrow::datatypes::{DataType, Field, Schema};
use arrow::record_batch::RecordBatch;
-use datafusion_expr::utils::from_plan;
-use datafusion_expr::Volatility;
+use datafusion_common::{DFSchema, DFSchemaRef, DataFusionError, Result,
ScalarValue};
+use datafusion_expr::{
+ expr_rewriter::RewriteRecursion,
+ expr_rewriter::{ExprRewritable, ExprRewriter},
+ lit,
+ logical_plan::LogicalPlan,
+ utils::from_plan,
+ Expr, ExprSchemable, Operator, Volatility,
+};
/// Provides simplification information based on schema and properties
pub(crate) struct SimplifyContext<'a, 'b> {
@@ -748,22 +747,22 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a,
S> {
#[cfg(test)]
mod tests {
- use std::collections::HashMap;
- use std::sync::Arc;
-
- use arrow::array::{ArrayRef, Int32Array};
- use chrono::{DateTime, TimeZone, Utc};
- use datafusion_expr::{BuiltinScalarFunction, ExprSchemable};
-
use super::*;
use crate::assert_contains;
- use crate::logical_plan::{
- and, binary_expr, call_fn, col, create_udf, lit, lit_timestamp_nano,
DFField,
- Expr, LogicalPlanBuilder,
- };
+ use crate::logical_plan::{call_fn, create_udf};
use crate::physical_plan::functions::make_scalar_function;
use crate::physical_plan::udf::ScalarUDF;
use crate::test_util::scan_empty;
+ use arrow::array::{ArrayRef, Int32Array};
+ use chrono::{DateTime, TimeZone, Utc};
+ use datafusion_common::DFField;
+ use datafusion_expr::{
+ and, binary_expr, col, lit, lit_timestamp_nano,
+ logical_plan::builder::LogicalPlanBuilder, BuiltinScalarFunction, Expr,
+ ExprSchemable,
+ };
+ use std::collections::HashMap;
+ use std::sync::Arc;
#[test]
fn test_simplify_or_true() {
diff --git a/datafusion/core/src/optimizer/single_distinct_to_groupby.rs
b/datafusion/core/src/optimizer/single_distinct_to_groupby.rs
index 65458c4db..d29a2477b 100644
--- a/datafusion/core/src/optimizer/single_distinct_to_groupby.rs
+++ b/datafusion/core/src/optimizer/single_distinct_to_groupby.rs
@@ -17,13 +17,14 @@
//! single distinct to group by optimizer rule
-use crate::error::Result;
-use crate::logical_plan::plan::{Aggregate, Projection};
-use crate::logical_plan::ExprSchemable;
-use crate::logical_plan::{col, DFSchema, Expr, LogicalPlan};
-use crate::optimizer::optimizer::OptimizerConfig;
-use crate::optimizer::optimizer::OptimizerRule;
-use datafusion_expr::utils::{columnize_expr, from_plan};
+use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
+use datafusion_common::{DFSchema, Result};
+use datafusion_expr::{
+ col,
+ logical_plan::{Aggregate, LogicalPlan, Projection},
+ utils::{columnize_expr, from_plan},
+ Expr, ExprSchemable,
+};
use hashbrown::HashSet;
use std::sync::Arc;
@@ -200,9 +201,11 @@ impl OptimizerRule for SingleDistinctToGroupBy {
#[cfg(test)]
mod tests {
use super::*;
- use crate::logical_plan::{col, count, count_distinct, lit, max,
LogicalPlanBuilder};
use crate::physical_plan::aggregates;
use crate::test::*;
+ use datafusion_expr::{
+ col, count, count_distinct, lit,
logical_plan::builder::LogicalPlanBuilder, max,
+ };
fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) {
let rule = SingleDistinctToGroupBy::new();
diff --git a/datafusion/core/src/optimizer/subquery_filter_to_join.rs
b/datafusion/core/src/optimizer/subquery_filter_to_join.rs
index add03c72d..bcbd9ae8a 100644
--- a/datafusion/core/src/optimizer/subquery_filter_to_join.rs
+++ b/datafusion/core/src/optimizer/subquery_filter_to_join.rs
@@ -26,16 +26,18 @@
//! WHERE t1.f IN (SELECT f FROM t2) OR t2.f = 'x'
//! ```
//! won't
-use std::sync::Arc;
-
-use crate::error::{DataFusionError, Result};
-use crate::logical_plan::plan::{Filter, Join};
-use crate::logical_plan::{
- build_join_schema, Expr, JoinConstraint, JoinType, LogicalPlan,
+use crate::optimizer::{
+ optimizer::{OptimizerConfig, OptimizerRule},
+ utils,
+};
+use datafusion_common::{DataFusionError, Result};
+use datafusion_expr::{
+ logical_plan::{
+ builder::build_join_schema, Filter, Join, JoinConstraint, JoinType,
LogicalPlan,
+ },
+ Expr,
};
-use crate::optimizer::optimizer::OptimizerConfig;
-use crate::optimizer::optimizer::OptimizerRule;
-use crate::optimizer::utils;
+use std::sync::Arc;
/// Optimizer rule for rewriting subquery filters to joins
#[derive(Default)]
@@ -192,11 +194,11 @@ fn extract_subquery_filters(expression: &Expr, extracted:
&mut Vec<Expr>) -> Res
#[cfg(test)]
mod tests {
use super::*;
- use crate::logical_plan::{
- and, binary_expr, col, in_subquery, lit, not_in_subquery, or,
LogicalPlanBuilder,
- Operator,
- };
use crate::test::*;
+ use datafusion_expr::{
+ and, binary_expr, col, in_subquery, lit,
logical_plan::LogicalPlanBuilder,
+ not_in_subquery, or, Operator,
+ };
fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) {
let rule = SubqueryFilterToJoin::new();
diff --git a/datafusion/core/src/optimizer/utils.rs
b/datafusion/core/src/optimizer/utils.rs
index 006613328..863536972 100644
--- a/datafusion/core/src/optimizer/utils.rs
+++ b/datafusion/core/src/optimizer/utils.rs
@@ -17,16 +17,16 @@
//! Collection of utility functions that are leveraged by the query optimizer
rules
-use super::optimizer::OptimizerRule;
-use crate::optimizer::optimizer::OptimizerConfig;
-use datafusion_expr::logical_plan::Filter;
-
-use crate::error::{DataFusionError, Result};
-use crate::logical_plan::{and, Expr, LogicalPlan, Operator};
-use crate::prelude::lit;
-use crate::scalar::ScalarValue;
-use datafusion_expr::expr::GroupingSet;
-use datafusion_expr::utils::from_plan;
+use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule};
+use datafusion_common::{DataFusionError, Result, ScalarValue};
+use datafusion_expr::{
+ and,
+ expr::GroupingSet,
+ lit,
+ logical_plan::{Filter, LogicalPlan},
+ utils::from_plan,
+ Expr, Operator,
+};
use std::sync::Arc;
const CASE_EXPR_MARKER: &str = "__DATAFUSION_CASE_EXPR__";
@@ -363,10 +363,9 @@ pub fn add_filter(plan: LogicalPlan, predicates: &[&Expr])
-> LogicalPlan {
#[cfg(test)]
mod tests {
use super::*;
- use crate::logical_plan::col;
use arrow::datatypes::DataType;
use datafusion_common::Column;
- use datafusion_expr::utils::expr_to_columns;
+ use datafusion_expr::{col, utils::expr_to_columns};
use std::collections::HashSet;
#[test]