HIVE-11211 : Reset the fields in JoinStatsRule in StatsRulesProcFactory (Pengcheng Xiong, reviewed by Laljo John Pullokkaran)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/42326958 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/42326958 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/42326958 Branch: refs/heads/beeline-cli Commit: 42326958148c2558be9c3d4dfe44c9e735704617 Parents: 4d984bd Author: Hari Subramaniyan <harisan...@apache.org> Authored: Wed Jul 15 13:15:34 2015 -0700 Committer: Hari Subramaniyan <harisan...@apache.org> Committed: Wed Jul 15 13:15:34 2015 -0700 ---------------------------------------------------------------------- .../stats/annotation/StatsRulesProcFactory.java | 42 ++++++++++---------- 1 file changed, 22 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/42326958/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 0982059..376d42c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -1013,17 +1013,14 @@ public class StatsRulesProcFactory { */ public static class JoinStatsRule extends DefaultStatsRule implements NodeProcessor { - private boolean pkfkInferred = false; - private long newNumRows = 0; - private List<Operator<? extends OperatorDesc>> parents; - private CommonJoinOperator<? extends JoinDesc> jop; - private int numAttr = 1; @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { - jop = (CommonJoinOperator<? extends JoinDesc>) nd; - parents = jop.getParentOperators(); + long newNumRows = 0; + CommonJoinOperator<? extends JoinDesc> jop = (CommonJoinOperator<? extends JoinDesc>) nd; + List<Operator<? extends OperatorDesc>> parents = jop.getParentOperators(); + int numAttr = 1; AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; HiveConf conf = aspCtx.getConf(); boolean allStatsAvail = true; @@ -1062,7 +1059,7 @@ public class StatsRulesProcFactory { numAttr = keyExprs.size(); // infer PK-FK relationship in single attribute join case - inferPKFKRelationship(); + long inferredRowCount = inferPKFKRelationship(numAttr, parents, jop); // get the join keys from parent ReduceSink operators for (int pos = 0; pos < parents.size(); pos++) { ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos); @@ -1149,7 +1146,7 @@ public class StatsRulesProcFactory { // update join statistics stats.setColumnStats(outColStats); - long newRowCount = pkfkInferred ? newNumRows : computeNewRowCount(rowCounts, denom); + long newRowCount = inferredRowCount !=-1 ? inferredRowCount : computeNewRowCount(rowCounts, denom); updateStatsForJoinType(stats, newRowCount, jop, rowCountParents); jop.setStatistics(stats); @@ -1180,7 +1177,7 @@ public class StatsRulesProcFactory { } long maxDataSize = parentSizes.get(maxRowIdx); - long newNumRows = StatsUtils.safeMult(StatsUtils.safeMult(maxRowCount, (numParents - 1)), joinFactor); + newNumRows = StatsUtils.safeMult(StatsUtils.safeMult(maxRowCount, (numParents - 1)), joinFactor); long newDataSize = StatsUtils.safeMult(StatsUtils.safeMult(maxDataSize, (numParents - 1)), joinFactor); Statistics wcStats = new Statistics(); wcStats.setNumRows(newNumRows); @@ -1195,15 +1192,17 @@ public class StatsRulesProcFactory { return null; } - private void inferPKFKRelationship() { + private long inferPKFKRelationship(int numAttr, List<Operator<? extends OperatorDesc>> parents, + CommonJoinOperator<? extends JoinDesc> jop) { + long newNumRows = -1; if (numAttr == 1) { // If numAttr is 1, this means we join on one single key column. Map<Integer, ColStatistics> parentsWithPK = getPrimaryKeyCandidates(parents); // We only allow one single PK. if (parentsWithPK.size() != 1) { - LOG.debug("STATS-" + jop.toString() + ": detects multiple PK parents."); - return; + LOG.debug("STATS-" + jop.toString() + ": detects none/multiple PK parents."); + return newNumRows; } Integer pkPos = parentsWithPK.keySet().iterator().next(); ColStatistics csPK = parentsWithPK.values().iterator().next(); @@ -1215,7 +1214,7 @@ public class StatsRulesProcFactory { // csfKs.size() + 1 == parents.size() means we have a single PK and all // the rest ops are FKs. if (csFKs.size() + 1 == parents.size()) { - getSelectivity(parents, pkPos, csPK, csFKs); + newNumRows = getCardinality(parents, pkPos, csPK, csFKs, jop); // some debug information if (isDebugEnabled) { @@ -1236,16 +1235,17 @@ public class StatsRulesProcFactory { } } } + return newNumRows; } /** - * Get selectivity of reduce sink operators. + * Get cardinality of reduce sink operators. * @param csPK - ColStatistics for a single primary key * @param csFKs - ColStatistics for multiple foreign keys */ - private void getSelectivity(List<Operator<? extends OperatorDesc>> ops, Integer pkPos, ColStatistics csPK, - Map<Integer, ColStatistics> csFKs) { - this.pkfkInferred = true; + private long getCardinality(List<Operator<? extends OperatorDesc>> ops, Integer pkPos, + ColStatistics csPK, Map<Integer, ColStatistics> csFKs, + CommonJoinOperator<? extends JoinDesc> jop) { double pkfkSelectivity = Double.MAX_VALUE; int fkInd = -1; // 1. We iterate through all the operators that have candidate FKs and @@ -1290,13 +1290,15 @@ public class StatsRulesProcFactory { distinctVals.add(csFK.getCountDistint()); } } + long newNumRows; if (csFKs.size() == 1) { // there is only one FK - this.newNumRows = newrows; + newNumRows = newrows; } else { // there is more than one FK - this.newNumRows = this.computeNewRowCount(rowCounts, getDenominator(distinctVals)); + newNumRows = this.computeNewRowCount(rowCounts, getDenominator(distinctVals)); } + return newNumRows; } private float getSelectivitySimpleTree(Operator<? extends OperatorDesc> op) {