From 71ba457b0121c099562ee2dc06f5f6889b140f7b Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 1 Jun 2022 13:49:24 -0600 Subject: [PATCH] MINOR: Rewrite imports in optimizer moduler (#2667) * initial refactor * fix regression * improve * rewrite imports for common_subexpr_eliminate.rs * rewrite imports for common_subexpr_eliminate.rs * rewrite imports for eliminate_filter.rs * rewrite imports for eliminate_limit.rs * rewrite imports for filter_push_down.rs * rewrite imports for limit_push_down.rs * rewrite imports for optimizer.rs * rewrite imports for projection_push_down.rs * rewrite imports for simplify_expressions.rs * rewrite imports for single_distinct_to_groupby.rs * rewrite imports for subquery_filter_to_join.rs * rewrite imports for utils.rs * clippy --- .../src/optimizer/common_subexpr_eliminate.rs | 28 ++++++------ .../core/src/optimizer/eliminate_filter.rs | 19 ++++---- .../core/src/optimizer/eliminate_limit.rs | 15 +++---- .../core/src/optimizer/filter_push_down.rs | 14 +++--- .../core/src/optimizer/limit_push_down.rs | 27 ++++++----- datafusion/core/src/optimizer/optimizer.rs | 8 ++-- .../src/optimizer/projection_push_down.rs | 37 ++++++++------- .../src/optimizer/simplify_expressions.rs | 45 +++++++++---------- .../optimizer/single_distinct_to_groupby.rs | 19 ++++---- .../src/optimizer/subquery_filter_to_join.rs | 28 ++++++------ datafusion/core/src/optimizer/utils.rs | 23 +++++----- 11 files changed, 131 insertions(+), 132 deletions(-) diff --git a/datafusion/core/src/optimizer/common_subexpr_eliminate.rs b/datafusion/core/src/optimizer/common_subexpr_eliminate.rs index fa99db2e9513..916e99713d4a 100644 --- a/datafusion/core/src/optimizer/common_subexpr_eliminate.rs +++ b/datafusion/core/src/optimizer/common_subexpr_eliminate.rs @@ -17,19 +17,18 @@ //! Eliminate common sub-expression. -use crate::error::Result; -use crate::logical_plan::plan::{Filter, Projection, Window}; -use crate::logical_plan::{ +use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule}; +use arrow::datatypes::DataType; +use datafusion_common::{DFField, DFSchema, Result}; +use datafusion_expr::{ col, - plan::{Aggregate, Sort}, - DFField, DFSchema, Expr, ExprRewritable, ExprRewriter, ExprSchemable, ExprVisitable, - ExpressionVisitor, LogicalPlan, Recursion, RewriteRecursion, + expr::GroupingSet, + expr_rewriter::{ExprRewritable, ExprRewriter, RewriteRecursion}, + expr_visitor::{ExprVisitable, ExpressionVisitor, Recursion}, + logical_plan::{Aggregate, Filter, LogicalPlan, Projection, Sort, Window}, + utils::from_plan, + Expr, ExprSchemable, }; -use crate::optimizer::optimizer::OptimizerConfig; -use crate::optimizer::optimizer::OptimizerRule; -use arrow::datatypes::DataType; -use datafusion_expr::expr::GroupingSet; -use datafusion_expr::utils::from_plan; use std::collections::{HashMap, HashSet}; use std::sync::Arc; @@ -696,10 +695,11 @@ fn replace_common_expr( #[cfg(test)] mod test { use super::*; - use crate::logical_plan::{ - avg, binary_expr, col, lit, sum, LogicalPlanBuilder, Operator, - }; use crate::test::*; + use datafusion_expr::{ + avg, binary_expr, col, lit, logical_plan::builder::LogicalPlanBuilder, sum, + Operator, + }; use std::iter; fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) { diff --git a/datafusion/core/src/optimizer/eliminate_filter.rs b/datafusion/core/src/optimizer/eliminate_filter.rs index a3c3e03412f8..4bbc2c4019e4 100644 --- a/datafusion/core/src/optimizer/eliminate_filter.rs +++ b/datafusion/core/src/optimizer/eliminate_filter.rs @@ -18,16 +18,14 @@ //! Optimizer rule to replace `where false` on a plan with an empty relation. //! This saves time in planning and executing the query. //! Note that this rule should be applied after simplify expressions optimizer rule. -use datafusion_common::ScalarValue; -use datafusion_expr::utils::from_plan; -use datafusion_expr::Expr; +use datafusion_common::{Result, ScalarValue}; +use datafusion_expr::{ + logical_plan::{EmptyRelation, Filter, LogicalPlan}, + utils::from_plan, + Expr, +}; -use crate::error::Result; -use crate::logical_plan::plan::Filter; -use crate::logical_plan::{EmptyRelation, LogicalPlan}; -use crate::optimizer::optimizer::OptimizerRule; - -use crate::optimizer::optimizer::OptimizerConfig; +use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule}; /// Optimization rule that elimanate the scalar value (true/false) filter with an [LogicalPlan::EmptyRelation] #[derive(Default)] @@ -81,9 +79,8 @@ impl OptimizerRule for EliminateFilter { #[cfg(test)] mod tests { use super::*; - use crate::logical_plan::LogicalPlanBuilder; - use crate::logical_plan::{col, sum}; use crate::test::*; + use datafusion_expr::{col, logical_plan::builder::LogicalPlanBuilder, sum}; fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) { let rule = EliminateFilter::new(); diff --git a/datafusion/core/src/optimizer/eliminate_limit.rs b/datafusion/core/src/optimizer/eliminate_limit.rs index b07f6f224a94..27e5fab1720d 100644 --- a/datafusion/core/src/optimizer/eliminate_limit.rs +++ b/datafusion/core/src/optimizer/eliminate_limit.rs @@ -17,12 +17,12 @@ //! Optimizer rule to replace `LIMIT 0` on a plan with an empty relation. //! This saves time in planning and executing the query. -use crate::error::Result; -use crate::logical_plan::{EmptyRelation, Limit, LogicalPlan}; -use crate::optimizer::optimizer::OptimizerRule; -use datafusion_expr::utils::from_plan; - -use crate::optimizer::optimizer::OptimizerConfig; +use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule}; +use datafusion_common::Result; +use datafusion_expr::{ + logical_plan::{EmptyRelation, Limit, LogicalPlan}, + utils::from_plan, +}; /// Optimization rule that replaces LIMIT 0 with an [LogicalPlan::EmptyRelation] #[derive(Default)] @@ -72,9 +72,8 @@ impl OptimizerRule for EliminateLimit { #[cfg(test)] mod tests { use super::*; - use crate::logical_plan::LogicalPlanBuilder; - use crate::logical_plan::{col, sum}; use crate::test::*; + use datafusion_expr::{col, logical_plan::builder::LogicalPlanBuilder, sum}; fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) { let rule = EliminateLimit::new(); diff --git a/datafusion/core/src/optimizer/filter_push_down.rs b/datafusion/core/src/optimizer/filter_push_down.rs index a07155c2f0bb..9641c9ae5bd0 100644 --- a/datafusion/core/src/optimizer/filter_push_down.rs +++ b/datafusion/core/src/optimizer/filter_push_down.rs @@ -14,8 +14,10 @@ //! Filter Push Down optimizer rule ensures that filters are applied as early as possible in the plan -use crate::optimizer::optimizer::OptimizerConfig; -use crate::optimizer::{optimizer::OptimizerRule, utils}; +use crate::optimizer::{ + optimizer::{OptimizerConfig, OptimizerRule}, + utils, +}; use datafusion_common::{Column, DFSchema, Result}; use datafusion_expr::{ col, @@ -560,19 +562,17 @@ fn rewrite(expr: &Expr, projection: &HashMap) -> Result { #[cfg(test)] mod tests { - use std::sync::Arc; - use super::*; use crate::test::*; + use arrow::datatypes::SchemaRef; + use async_trait::async_trait; use datafusion_common::DFSchema; use datafusion_expr::{ and, col, lit, logical_plan::{builder::union_with_alias, JoinType}, sum, Expr, LogicalPlanBuilder, Operator, TableSource, TableType, }; - - use arrow::datatypes::SchemaRef; - use async_trait::async_trait; + use std::sync::Arc; fn optimize_plan(plan: &LogicalPlan) -> LogicalPlan { let rule = FilterPushDown::new(); diff --git a/datafusion/core/src/optimizer/limit_push_down.rs b/datafusion/core/src/optimizer/limit_push_down.rs index 19578b2bc5d7..990182c38738 100644 --- a/datafusion/core/src/optimizer/limit_push_down.rs +++ b/datafusion/core/src/optimizer/limit_push_down.rs @@ -17,15 +17,14 @@ //! Optimizer rule to push down LIMIT in the query plan //! It will push down through projection, limits (taking the smaller limit) -use crate::error::Result; -use crate::logical_plan::plan::Projection; -use crate::logical_plan::{Limit, TableScan}; -use crate::logical_plan::{LogicalPlan, Union}; -use crate::optimizer::optimizer::OptimizerConfig; -use crate::optimizer::optimizer::OptimizerRule; -use datafusion_common::DataFusionError; -use datafusion_expr::logical_plan::{Join, JoinType, Offset}; -use datafusion_expr::utils::from_plan; +use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule}; +use datafusion_common::{DataFusionError, Result}; +use datafusion_expr::{ + logical_plan::{ + Join, JoinType, Limit, LogicalPlan, Offset, Projection, TableScan, Union, + }, + utils::from_plan, +}; use std::sync::Arc; /// Optimization rule that tries pushes down LIMIT n @@ -270,12 +269,12 @@ impl OptimizerRule for LimitPushDown { #[cfg(test)] mod test { use super::*; - use crate::{ - logical_plan::{col, max, LogicalPlan, LogicalPlanBuilder}, - test::*, + use crate::test::*; + use datafusion_expr::{ + col, exists, + logical_plan::{builder::LogicalPlanBuilder, JoinType, LogicalPlan}, + max, }; - use datafusion_expr::exists; - use datafusion_expr::logical_plan::JoinType; fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) { let rule = LimitPushDown::new(); diff --git a/datafusion/core/src/optimizer/optimizer.rs b/datafusion/core/src/optimizer/optimizer.rs index 8b602f4231b4..67f0cf8b6e41 100644 --- a/datafusion/core/src/optimizer/optimizer.rs +++ b/datafusion/core/src/optimizer/optimizer.rs @@ -18,12 +18,10 @@ //! Query optimizer traits use chrono::{DateTime, Utc}; -use std::sync::Arc; - +use datafusion_common::Result; +use datafusion_expr::logical_plan::LogicalPlan; use log::{debug, trace}; - -use crate::error::Result; -use crate::logical_plan::LogicalPlan; +use std::sync::Arc; /// `OptimizerRule` transforms one ['LogicalPlan'] into another which /// computes the same results, but in a potentially more efficient diff --git a/datafusion/core/src/optimizer/projection_push_down.rs b/datafusion/core/src/optimizer/projection_push_down.rs index 977a34b53488..b99b81f5259f 100644 --- a/datafusion/core/src/optimizer/projection_push_down.rs +++ b/datafusion/core/src/optimizer/projection_push_down.rs @@ -18,22 +18,21 @@ //! Projection Push Down optimizer rule ensures that only referenced columns are //! loaded into memory -use crate::error::{DataFusionError, Result}; -use crate::logical_plan::plan::{ - Aggregate, Analyze, Join, Projection, SubqueryAlias, TableScan, Window, -}; -use crate::logical_plan::{ - build_join_schema, Column, DFField, DFSchema, DFSchemaRef, LogicalPlan, - LogicalPlanBuilder, ToDFSchema, Union, -}; -use crate::optimizer::optimizer::OptimizerConfig; -use crate::optimizer::optimizer::OptimizerRule; +use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule}; use arrow::datatypes::{Field, Schema}; use arrow::error::Result as ArrowResult; -use datafusion_expr::utils::{ - expr_to_columns, exprlist_to_columns, find_sort_exprs, from_plan, +use datafusion_common::{ + Column, DFField, DFSchema, DFSchemaRef, DataFusionError, Result, ToDFSchema, +}; +use datafusion_expr::{ + logical_plan::{ + builder::{build_join_schema, LogicalPlanBuilder}, + Aggregate, Analyze, Join, LogicalPlan, Projection, SubqueryAlias, TableScan, + Union, Window, + }, + utils::{expr_to_columns, exprlist_to_columns, find_sort_exprs, from_plan}, + Expr, }; -use datafusion_expr::Expr; use std::{ collections::{BTreeSet, HashSet}, sync::Arc, @@ -529,14 +528,18 @@ fn optimize_plan( #[cfg(test)] mod tests { - use std::collections::HashMap; - use super::*; - use crate::logical_plan::{col, lit, max, min, Expr, JoinType, LogicalPlanBuilder}; use crate::test::*; use crate::test_util::scan_empty; use arrow::datatypes::DataType; - use datafusion_expr::utils::exprlist_to_fields; + use datafusion_expr::{ + col, lit, + logical_plan::{builder::LogicalPlanBuilder, JoinType}, + max, min, + utils::exprlist_to_fields, + Expr, + }; + use std::collections::HashMap; #[test] fn aggregate_no_group_by() -> Result<()> { diff --git a/datafusion/core/src/optimizer/simplify_expressions.rs b/datafusion/core/src/optimizer/simplify_expressions.rs index 49a5a1295f23..b85377df33fd 100644 --- a/datafusion/core/src/optimizer/simplify_expressions.rs +++ b/datafusion/core/src/optimizer/simplify_expressions.rs @@ -17,23 +17,22 @@ //! Simplify expressions optimizer rule -use crate::error::DataFusionError; use crate::execution::context::ExecutionProps; -use crate::logical_plan::ExprSchemable; -use crate::logical_plan::{ - lit, DFSchema, DFSchemaRef, Expr, ExprRewritable, ExprRewriter, ExprSimplifiable, - LogicalPlan, RewriteRecursion, SimplifyInfo, -}; -use crate::optimizer::optimizer::OptimizerConfig; -use crate::optimizer::optimizer::OptimizerRule; +use crate::logical_plan::{ExprSimplifiable, SimplifyInfo}; +use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule}; use crate::physical_plan::planner::create_physical_expr; -use crate::scalar::ScalarValue; -use crate::{error::Result, logical_plan::Operator}; use arrow::array::new_null_array; use arrow::datatypes::{DataType, Field, Schema}; use arrow::record_batch::RecordBatch; -use datafusion_expr::utils::from_plan; -use datafusion_expr::Volatility; +use datafusion_common::{DFSchema, DFSchemaRef, DataFusionError, Result, ScalarValue}; +use datafusion_expr::{ + expr_rewriter::RewriteRecursion, + expr_rewriter::{ExprRewritable, ExprRewriter}, + lit, + logical_plan::LogicalPlan, + utils::from_plan, + Expr, ExprSchemable, Operator, Volatility, +}; /// Provides simplification information based on schema and properties pub(crate) struct SimplifyContext<'a, 'b> { @@ -748,22 +747,22 @@ impl<'a, S: SimplifyInfo> ExprRewriter for Simplifier<'a, S> { #[cfg(test)] mod tests { - use std::collections::HashMap; - use std::sync::Arc; - - use arrow::array::{ArrayRef, Int32Array}; - use chrono::{DateTime, TimeZone, Utc}; - use datafusion_expr::{BuiltinScalarFunction, ExprSchemable}; - use super::*; use crate::assert_contains; - use crate::logical_plan::{ - and, binary_expr, call_fn, col, create_udf, lit, lit_timestamp_nano, DFField, - Expr, LogicalPlanBuilder, - }; + use crate::logical_plan::{call_fn, create_udf}; use crate::physical_plan::functions::make_scalar_function; use crate::physical_plan::udf::ScalarUDF; use crate::test_util::scan_empty; + use arrow::array::{ArrayRef, Int32Array}; + use chrono::{DateTime, TimeZone, Utc}; + use datafusion_common::DFField; + use datafusion_expr::{ + and, binary_expr, col, lit, lit_timestamp_nano, + logical_plan::builder::LogicalPlanBuilder, BuiltinScalarFunction, Expr, + ExprSchemable, + }; + use std::collections::HashMap; + use std::sync::Arc; #[test] fn test_simplify_or_true() { diff --git a/datafusion/core/src/optimizer/single_distinct_to_groupby.rs b/datafusion/core/src/optimizer/single_distinct_to_groupby.rs index 65458c4dba88..d29a2477b125 100644 --- a/datafusion/core/src/optimizer/single_distinct_to_groupby.rs +++ b/datafusion/core/src/optimizer/single_distinct_to_groupby.rs @@ -17,13 +17,14 @@ //! single distinct to group by optimizer rule -use crate::error::Result; -use crate::logical_plan::plan::{Aggregate, Projection}; -use crate::logical_plan::ExprSchemable; -use crate::logical_plan::{col, DFSchema, Expr, LogicalPlan}; -use crate::optimizer::optimizer::OptimizerConfig; -use crate::optimizer::optimizer::OptimizerRule; -use datafusion_expr::utils::{columnize_expr, from_plan}; +use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule}; +use datafusion_common::{DFSchema, Result}; +use datafusion_expr::{ + col, + logical_plan::{Aggregate, LogicalPlan, Projection}, + utils::{columnize_expr, from_plan}, + Expr, ExprSchemable, +}; use hashbrown::HashSet; use std::sync::Arc; @@ -200,9 +201,11 @@ impl OptimizerRule for SingleDistinctToGroupBy { #[cfg(test)] mod tests { use super::*; - use crate::logical_plan::{col, count, count_distinct, lit, max, LogicalPlanBuilder}; use crate::physical_plan::aggregates; use crate::test::*; + use datafusion_expr::{ + col, count, count_distinct, lit, logical_plan::builder::LogicalPlanBuilder, max, + }; fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) { let rule = SingleDistinctToGroupBy::new(); diff --git a/datafusion/core/src/optimizer/subquery_filter_to_join.rs b/datafusion/core/src/optimizer/subquery_filter_to_join.rs index add03c72d588..bcbd9ae8a73a 100644 --- a/datafusion/core/src/optimizer/subquery_filter_to_join.rs +++ b/datafusion/core/src/optimizer/subquery_filter_to_join.rs @@ -26,16 +26,18 @@ //! WHERE t1.f IN (SELECT f FROM t2) OR t2.f = 'x' //! ``` //! won't -use std::sync::Arc; - -use crate::error::{DataFusionError, Result}; -use crate::logical_plan::plan::{Filter, Join}; -use crate::logical_plan::{ - build_join_schema, Expr, JoinConstraint, JoinType, LogicalPlan, +use crate::optimizer::{ + optimizer::{OptimizerConfig, OptimizerRule}, + utils, +}; +use datafusion_common::{DataFusionError, Result}; +use datafusion_expr::{ + logical_plan::{ + builder::build_join_schema, Filter, Join, JoinConstraint, JoinType, LogicalPlan, + }, + Expr, }; -use crate::optimizer::optimizer::OptimizerConfig; -use crate::optimizer::optimizer::OptimizerRule; -use crate::optimizer::utils; +use std::sync::Arc; /// Optimizer rule for rewriting subquery filters to joins #[derive(Default)] @@ -192,11 +194,11 @@ fn extract_subquery_filters(expression: &Expr, extracted: &mut Vec) -> Res #[cfg(test)] mod tests { use super::*; - use crate::logical_plan::{ - and, binary_expr, col, in_subquery, lit, not_in_subquery, or, LogicalPlanBuilder, - Operator, - }; use crate::test::*; + use datafusion_expr::{ + and, binary_expr, col, in_subquery, lit, logical_plan::LogicalPlanBuilder, + not_in_subquery, or, Operator, + }; fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) { let rule = SubqueryFilterToJoin::new(); diff --git a/datafusion/core/src/optimizer/utils.rs b/datafusion/core/src/optimizer/utils.rs index 006613328674..863536972f73 100644 --- a/datafusion/core/src/optimizer/utils.rs +++ b/datafusion/core/src/optimizer/utils.rs @@ -17,16 +17,16 @@ //! Collection of utility functions that are leveraged by the query optimizer rules -use super::optimizer::OptimizerRule; -use crate::optimizer::optimizer::OptimizerConfig; -use datafusion_expr::logical_plan::Filter; - -use crate::error::{DataFusionError, Result}; -use crate::logical_plan::{and, Expr, LogicalPlan, Operator}; -use crate::prelude::lit; -use crate::scalar::ScalarValue; -use datafusion_expr::expr::GroupingSet; -use datafusion_expr::utils::from_plan; +use crate::optimizer::optimizer::{OptimizerConfig, OptimizerRule}; +use datafusion_common::{DataFusionError, Result, ScalarValue}; +use datafusion_expr::{ + and, + expr::GroupingSet, + lit, + logical_plan::{Filter, LogicalPlan}, + utils::from_plan, + Expr, Operator, +}; use std::sync::Arc; const CASE_EXPR_MARKER: &str = "__DATAFUSION_CASE_EXPR__"; @@ -363,10 +363,9 @@ pub fn add_filter(plan: LogicalPlan, predicates: &[&Expr]) -> LogicalPlan { #[cfg(test)] mod tests { use super::*; - use crate::logical_plan::col; use arrow::datatypes::DataType; use datafusion_common::Column; - use datafusion_expr::utils::expr_to_columns; + use datafusion_expr::{col, utils::expr_to_columns}; use std::collections::HashSet; #[test]