This is an automated email from the ASF dual-hosted git repository. mikeb pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit fe47b2352ea9fc13f2d51da8fa9d0f7c70037779 Author: Paul Rogers <prog...@cloudera.com> AuthorDate: Thu Nov 8 15:12:11 2018 -0800 IMPALA-7841 (Part 1): Refactor SelectStmt for easier debugging Builds on IMPALA-7808 with several additional refactorings: * IMPALA-7808 minimized code changes. This change cleans up the new functions, removing if's and merging the "aggregation" function with the body of the new analyze() function. * Removed an unneeded analyzer argument. This is all refactoring: there is no functional change. Testing: Reran existing tests to ensure that functionality remained unchanged. Change-Id: I4f4fe3d3a1ab3170e294714dab066d40d169eff1 Reviewed-on: http://gerrit.cloudera.org:8080/11915 Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> --- .../java/org/apache/impala/analysis/Analyzer.java | 8 +- .../apache/impala/analysis/CollectionTableRef.java | 2 +- .../org/apache/impala/analysis/InlineViewRef.java | 2 +- .../java/org/apache/impala/analysis/QueryStmt.java | 2 +- .../org/apache/impala/analysis/SelectStmt.java | 176 ++++++++++----------- .../java/org/apache/impala/analysis/Subquery.java | 2 +- .../org/apache/impala/analysis/WithClause.java | 2 +- 7 files changed, 94 insertions(+), 100 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java index fbe4135..2d39b43 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java +++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java @@ -156,7 +156,7 @@ public class Analyzer { private boolean isSubquery_ = false; // Flag indicating whether this analyzer belongs to a WITH clause view. - private boolean isWithClause_ = false; + private boolean hasWithClause_ = false; // If set, when masked privilege requests are registered they will use this error // error message. @@ -183,8 +183,8 @@ public class Analyzer { } public boolean setHasPlanHints() { return globalState_.hasPlanHints = true; } public boolean hasPlanHints() { return globalState_.hasPlanHints; } - public void setIsWithClause() { isWithClause_ = true; } - public boolean isWithClause() { return isWithClause_; } + public void setHasWithClause() { hasWithClause_ = true; } + public boolean hasWithClause() { return hasWithClause_; } // State shared between all objects of an Analyzer tree. We use LinkedHashMap and // LinkedHashSet where applicable to preserve the iteration order and make the class @@ -403,7 +403,7 @@ public class Analyzer { authErrorMsg_ = parentAnalyzer.authErrorMsg_; maskPrivChecks_ = parentAnalyzer.maskPrivChecks_; enablePrivChecks_ = parentAnalyzer.enablePrivChecks_; - isWithClause_ = parentAnalyzer.isWithClause_; + hasWithClause_ = parentAnalyzer.hasWithClause_; } /** diff --git a/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java b/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java index a6f66ab..87f38b7 100644 --- a/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java +++ b/fe/src/main/java/org/apache/impala/analysis/CollectionTableRef.java @@ -81,7 +81,7 @@ public class CollectionTableRef extends TableRef { public void analyze(Analyzer analyzer) throws AnalysisException { if (isAnalyzed_) return; desc_ = analyzer.registerTableRef(this); - if (isRelative() && !analyzer.isWithClause()) { + if (isRelative() && !analyzer.hasWithClause()) { SlotDescriptor parentSlotDesc = analyzer.registerSlotRef(resolvedPath_); parentSlotDesc.setItemTupleDesc(desc_); collectionExpr_ = new SlotRef(parentSlotDesc); diff --git a/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java b/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java index 5bc5675..5993235 100644 --- a/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java +++ b/fe/src/main/java/org/apache/impala/analysis/InlineViewRef.java @@ -164,7 +164,7 @@ public class InlineViewRef extends TableRef { inlineViewAnalyzer_.setUseHiveColLabels( isCatalogView ? true : analyzer.useHiveColLabels()); queryStmt_.analyze(inlineViewAnalyzer_); - correlatedTupleIds_.addAll(queryStmt_.getCorrelatedTupleIds(inlineViewAnalyzer_)); + correlatedTupleIds_.addAll(queryStmt_.getCorrelatedTupleIds()); if (explicitColLabels_ != null) { Preconditions.checkState( explicitColLabels_.size() == queryStmt_.getColLabels().size()); diff --git a/fe/src/main/java/org/apache/impala/analysis/QueryStmt.java b/fe/src/main/java/org/apache/impala/analysis/QueryStmt.java index 4426026..a80608b 100644 --- a/fe/src/main/java/org/apache/impala/analysis/QueryStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/QueryStmt.java @@ -171,7 +171,7 @@ public abstract class QueryStmt extends StatementBase { * (3) a mix of correlated table refs and table refs rooted at those refs * (the statement is 'self-contained' with respect to correlation) */ - public List<TupleId> getCorrelatedTupleIds(Analyzer analyzer) + public List<TupleId> getCorrelatedTupleIds() throws AnalysisException { // Correlated tuple ids of this stmt. List<TupleId> correlatedTupleIds = new ArrayList<>(); diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java index 8c99714..d568429 100644 --- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java @@ -223,9 +223,23 @@ public class SelectStmt extends QueryStmt { analyzeSelectClause(); verifyResultExprs(); analyzeWhereClause(); - createSortInfo(analyzer_); - analyzeAggregation(); + + // Analyze aggregation-relevant components of the select block (Group By + // clause, select list, Order By clause), substitute AVG with SUM/COUNT, + // create the AggregationInfo, including the agg output tuple, and transform + // all post-agg exprs given AggregationInfo's smap. + analyzeHavingClause(); + if (checkForAggregates()) { + verifyAggSemantics(); + analyzeGroupingExprs(); + collectAggExprs(); + rewriteCountDistinct(); + buildAggregateExprs(); + buildResultExprs(); + verifyAggregation(); + } + createAnalyticInfo(); if (evaluateOrderBy_) createSortTupleInfo(analyzer_); @@ -243,24 +257,12 @@ public class SelectStmt extends QueryStmt { buildColumnLineageGraph(); } - private void buildColumnLineageGraph() { - ColumnLineageGraph graph = analyzer_.getColumnLineageGraph(); - if (multiAggInfo_ != null && multiAggInfo_.hasAggregateExprs()) { - graph.addDependencyPredicates(multiAggInfo_.getGroupingExprs()); - } - if (sortInfo_ != null && hasLimit()) { - // When there is a LIMIT clause in conjunction with an ORDER BY, the - // ordering exprs must be added in the column lineage graph. - graph.addDependencyPredicates(sortInfo_.getSortExprs()); - } - } - private void analyzeSelectClause() throws AnalysisException { // Generate !empty() predicates to filter out empty collections. // Skip this step when analyzing a WITH-clause because CollectionTableRefs // do not register collection slots in their parent in that context // (see CollectionTableRef.analyze()). - if (!analyzer_.isWithClause()) registerIsNotEmptyPredicates(); + if (!analyzer_.hasWithClause()) registerIsNotEmptyPredicates(); // analyze plan hints from select list selectList_.analyzePlanHints(analyzer_); @@ -340,20 +342,19 @@ public class SelectStmt extends QueryStmt { } private void analyzeWhereClause() throws AnalysisException { - if (whereClause_ != null) { - whereClause_.analyze(analyzer_); - if (whereClause_.contains(Expr.isAggregatePredicate())) { - throw new AnalysisException( - "aggregate function not allowed in WHERE clause"); - } - whereClause_.checkReturnsBool("WHERE clause", false); - Expr e = whereClause_.findFirstOf(AnalyticExpr.class); - if (e != null) { - throw new AnalysisException( - "WHERE clause must not contain analytic expressions: " + e.toSql()); - } - analyzer_.registerConjuncts(whereClause_, false); + if (whereClause_ == null) return; + whereClause_.analyze(analyzer_); + if (whereClause_.contains(Expr.isAggregatePredicate())) { + throw new AnalysisException( + "aggregate function not allowed in WHERE clause"); } + whereClause_.checkReturnsBool("WHERE clause", false); + Expr e = whereClause_.findFirstOf(AnalyticExpr.class); + if (e != null) { + throw new AnalysisException( + "WHERE clause must not contain analytic expressions: " + e.toSql()); + } + analyzer_.registerConjuncts(whereClause_, false); } /** @@ -551,26 +552,6 @@ public class SelectStmt extends QueryStmt { } /** - * Analyze aggregation-relevant components of the select block (Group By clause, - * select list, Order By clause), substitute AVG with SUM/COUNT, create the - * AggregationInfo, including the agg output tuple, and transform all post-agg exprs - * given AggregationInfo's smap. - */ - private void analyzeAggregation() throws AnalysisException { - analyzeHavingClause(); - if (!checkForAggregates()) { - return; - } - verifyAggSemantics(); - analyzeGroupingExprs(); - collectAggExprs(); - rewriteCountDistinct(); - buildAggregateExprs(); - buildResultExprs(); - verifyAggregation(); - } - - /** * Analyze the HAVING clause. The HAVING clause is a predicate, not a list * (like GROUP BY or ORDER BY) and so cannot contain ordinals. * @@ -660,28 +641,28 @@ public class SelectStmt extends QueryStmt { } private void analyzeGroupingExprs() throws AnalysisException { - // analyze grouping exprs - groupingExprsCopy_ = new ArrayList<>(); - if (groupingExprs_ != null) { - // make a deep copy here, we don't want to modify the original - // exprs during analysis (in case we need to print them later) - groupingExprsCopy_ = Expr.cloneList(groupingExprs_); - substituteOrdinalsAndAliases(groupingExprsCopy_, "GROUP BY", analyzer_); - - for (int i = 0; i < groupingExprsCopy_.size(); ++i) { - groupingExprsCopy_.get(i).analyze(analyzer_); - if (groupingExprsCopy_.get(i).contains(Expr.isAggregatePredicate())) { - // reference the original expr in the error msg - throw new AnalysisException( - "GROUP BY expression must not contain aggregate functions: " - + groupingExprs_.get(i).toSql()); - } - if (groupingExprsCopy_.get(i).contains(AnalyticExpr.class)) { - // reference the original expr in the error msg - throw new AnalysisException( - "GROUP BY expression must not contain analytic expressions: " - + groupingExprsCopy_.get(i).toSql()); - } + if (groupingExprs_ == null) { + groupingExprsCopy_ = new ArrayList<>(); + return; + } + // make a deep copy here, we don't want to modify the original + // exprs during analysis (in case we need to print them later) + groupingExprsCopy_ = Expr.cloneList(groupingExprs_); + substituteOrdinalsAndAliases(groupingExprsCopy_, "GROUP BY", analyzer_); + + for (int i = 0; i < groupingExprsCopy_.size(); ++i) { + groupingExprsCopy_.get(i).analyze(analyzer_); + if (groupingExprsCopy_.get(i).contains(Expr.isAggregatePredicate())) { + // reference the original expr in the error msg + throw new AnalysisException( + "GROUP BY expression must not contain aggregate functions: " + + groupingExprs_.get(i).toSql()); + } + if (groupingExprsCopy_.get(i).contains(AnalyticExpr.class)) { + // reference the original expr in the error msg + throw new AnalysisException( + "GROUP BY expression must not contain analytic expressions: " + + groupingExprsCopy_.get(i).toSql()); } } } @@ -703,28 +684,29 @@ public class SelectStmt extends QueryStmt { private void rewriteCountDistinct() { // Optionally rewrite all count(distinct <expr>) into equivalent NDV() calls. - if (analyzer_.getQueryCtx().client_request.query_options.appx_count_distinct) { - ndvSmap_ = new ExprSubstitutionMap(); - for (FunctionCallExpr aggExpr: aggExprs_) { - if (!aggExpr.isDistinct() - || !aggExpr.getFnName().getFunction().equals("count") - || aggExpr.getParams().size() != 1) { - continue; - } - FunctionCallExpr ndvFnCall = - new FunctionCallExpr("ndv", aggExpr.getParams().exprs()); - ndvFnCall.analyzeNoThrow(analyzer_); - Preconditions.checkState(ndvFnCall.getType().equals(aggExpr.getType())); - ndvSmap_.put(aggExpr, ndvFnCall); - } - // Replace all count(distinct <expr>) with NDV(<expr>). - List<Expr> substAggExprs = Expr.substituteList(aggExprs_, - ndvSmap_, analyzer_, false); - aggExprs_.clear(); - for (Expr aggExpr: substAggExprs) { - Preconditions.checkState(aggExpr instanceof FunctionCallExpr); - aggExprs_.add((FunctionCallExpr) aggExpr); + if (!analyzer_.getQueryCtx().client_request.query_options.appx_count_distinct) { + return; + } + ndvSmap_ = new ExprSubstitutionMap(); + for (FunctionCallExpr aggExpr: aggExprs_) { + if (!aggExpr.isDistinct() + || !aggExpr.getFnName().getFunction().equals("count") + || aggExpr.getParams().size() != 1) { + continue; } + FunctionCallExpr ndvFnCall = + new FunctionCallExpr("ndv", aggExpr.getParams().exprs()); + ndvFnCall.analyzeNoThrow(analyzer_); + Preconditions.checkState(ndvFnCall.getType().equals(aggExpr.getType())); + ndvSmap_.put(aggExpr, ndvFnCall); + } + // Replace all count(distinct <expr>) with NDV(<expr>). + List<Expr> substAggExprs = Expr.substituteList(aggExprs_, + ndvSmap_, analyzer_, false); + aggExprs_.clear(); + for (Expr aggExpr: substAggExprs) { + Preconditions.checkState(aggExpr instanceof FunctionCallExpr); + aggExprs_.add((FunctionCallExpr) aggExpr); } } @@ -939,6 +921,18 @@ public class SelectStmt extends QueryStmt { } } } + + private void buildColumnLineageGraph() { + ColumnLineageGraph graph = analyzer_.getColumnLineageGraph(); + if (multiAggInfo_ != null && multiAggInfo_.hasAggregateExprs()) { + graph.addDependencyPredicates(multiAggInfo_.getGroupingExprs()); + } + if (sortInfo_ != null && hasLimit()) { + // When there is a LIMIT clause in conjunction with an ORDER BY, the + // ordering exprs must be added in the column lineage graph. + graph.addDependencyPredicates(sortInfo_.getSortExprs()); + } + } } /** diff --git a/fe/src/main/java/org/apache/impala/analysis/Subquery.java b/fe/src/main/java/org/apache/impala/analysis/Subquery.java index 17ce30b..68d0355 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Subquery.java +++ b/fe/src/main/java/org/apache/impala/analysis/Subquery.java @@ -79,7 +79,7 @@ public class Subquery extends Expr { analyzer_.setIsSubquery(); stmt_.analyze(analyzer_); // Check whether the stmt_ contains an illegal mix of un/correlated table refs. - stmt_.getCorrelatedTupleIds(analyzer_); + stmt_.getCorrelatedTupleIds(); // Set the subquery type based on the types of the exprs in the // result list of the associated SelectStmt. diff --git a/fe/src/main/java/org/apache/impala/analysis/WithClause.java b/fe/src/main/java/org/apache/impala/analysis/WithClause.java index 6c7cfd0..90a13d4 100644 --- a/fe/src/main/java/org/apache/impala/analysis/WithClause.java +++ b/fe/src/main/java/org/apache/impala/analysis/WithClause.java @@ -75,7 +75,7 @@ public class WithClause extends StmtNode { // during analysis of the WITH clause. withClauseAnalyzer is a child of 'analyzer' so // that local views registered in parent blocks are visible here. Analyzer withClauseAnalyzer = Analyzer.createWithNewGlobalState(analyzer); - withClauseAnalyzer.setIsWithClause(); + withClauseAnalyzer.setHasWithClause(); if (analyzer.isExplain()) withClauseAnalyzer.setIsExplain(); for (View view: views_) { Analyzer viewAnalyzer = new Analyzer(withClauseAnalyzer);