http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 44269f0..571c050 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -337,10 +337,9 @@ public class StatsRulesProcFactory { // can be boolean column in which case return true count ExprNodeColumnDesc encd = (ExprNodeColumnDesc) pred; String colName = encd.getColumn(); - String tabAlias = encd.getTabAlias(); String colType = encd.getTypeString(); if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) { - ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName); + ColStatistics cs = stats.getColumnStatisticsFromColName(colName); if (cs != null) { return cs.getNumTrues(); } @@ -393,10 +392,9 @@ public class StatsRulesProcFactory { // NOT on boolean columns is possible. in which case return false count. ExprNodeColumnDesc encd = (ExprNodeColumnDesc) leaf; String colName = encd.getColumn(); - String tabAlias = encd.getTabAlias(); String colType = encd.getTypeString(); if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) { - ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName); + ColStatistics cs = stats.getColumnStatisticsFromColName(colName); if (cs != null) { return cs.getNumFalses(); } @@ -423,8 +421,7 @@ public class StatsRulesProcFactory { if (leaf instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leaf; String colName = colDesc.getColumn(); - String tabAlias = colDesc.getTabAlias(); - ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName); + ColStatistics cs = stats.getColumnStatisticsFromColName(colName); if (cs != null) { return cs.getNumNulls(); } @@ -450,7 +447,6 @@ public class StatsRulesProcFactory { if (udf instanceof GenericUDFOPEqual || udf instanceof GenericUDFOPEqualNS) { String colName = null; - String tabAlias = null; boolean isConst = false; Object prevConst = null; @@ -483,7 +479,7 @@ public class StatsRulesProcFactory { return numRows; } - ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName); + ColStatistics cs = stats.getColumnStatisticsFromColName(colName); if (cs != null) { long dvs = cs.getCountDistint(); numRows = dvs == 0 ? numRows / 2 : numRows / dvs; @@ -492,7 +488,6 @@ public class StatsRulesProcFactory { } else if (leaf instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leaf; colName = colDesc.getColumn(); - tabAlias = colDesc.getTabAlias(); // if const is first argument then evaluate the result if (isConst) { @@ -504,7 +499,7 @@ public class StatsRulesProcFactory { return numRows; } - ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName); + ColStatistics cs = stats.getColumnStatisticsFromColName(colName); if (cs != null) { long dvs = cs.getCountDistint(); numRows = dvs == 0 ? numRows / 2 : numRows / dvs; @@ -753,11 +748,11 @@ public class StatsRulesProcFactory { GroupByOperator mGop = OperatorUtils.findSingleOperatorUpstream(parent, GroupByOperator.class); if (mGop != null) { containsGroupingSet = mGop.getConf().isGroupingSetsPresent(); - sizeOfGroupingSet = mGop.getConf().getListGroupingSets().size(); } if (containsGroupingSet) { // Case 8: column stats, grouping sets + sizeOfGroupingSet = mGop.getConf().getListGroupingSets().size(); cardinality = Math.min(parentNumRows, StatsUtils.safeMult(ndvProduct, sizeOfGroupingSet)); if (isDebugEnabled) { @@ -826,9 +821,8 @@ public class StatsRulesProcFactory { // for those newly added columns if (!colExprMap.containsKey(ci.getInternalName())) { String colName = ci.getInternalName(); - String tabAlias = ci.getTabAlias(); String colType = ci.getTypeName(); - ColStatistics cs = new ColStatistics(tabAlias, colName, colType); + ColStatistics cs = new ColStatistics(colName, colType); cs.setCountDistint(stats.getNumRows()); cs.setNumNulls(0); cs.setAvgColLen(StatsUtils.getAvgColLenOfFixedLengthTypes(colType)); @@ -1053,54 +1047,37 @@ public class StatsRulesProcFactory { // statistics object that is combination of statistics from all // relations involved in JOIN Statistics stats = new Statistics(); - Map<String, Long> rowCountParents = new HashMap<String, Long>(); List<Long> distinctVals = Lists.newArrayList(); int numParent = parents.size(); - Map<String, ColStatistics> joinedColStats = Maps.newHashMap(); + Map<Integer, Long> rowCountParents = Maps.newHashMap(); + Map<Integer, Statistics> joinStats = Maps.newHashMap(); Map<Integer, List<String>> joinKeys = Maps.newHashMap(); List<Long> rowCounts = Lists.newArrayList(); // detect if there are multiple attributes in join key ReduceSinkOperator rsOp = (ReduceSinkOperator) jop.getParentOperators().get(0); - List<String> keyExprs = rsOp.getConf().getOutputKeyColumnNames(); + List<String> keyExprs = StatsUtils.getQualifedReducerKeyNames(rsOp.getConf() + .getOutputKeyColumnNames()); numAttr = keyExprs.size(); // infer PK-FK relationship in single attribute join case pkfkInferred = false; inferPKFKRelationship(); - // get the join keys from parent ReduceSink operators for (int pos = 0; pos < parents.size(); pos++) { ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos); - Statistics parentStats = parent.getStatistics(); - keyExprs = parent.getConf().getOutputKeyColumnNames(); - - // Parent RS may have column statistics from multiple parents. - // Populate table alias to row count map, this will be used later to - // scale down/up column statistics based on new row count - // NOTE: JOIN with UNION as parent of RS will not have table alias - // propagated properly. UNION operator does not propagate the table - // alias of subqueries properly to expression nodes. Hence union20.q - // will have wrong number of rows. - Set<String> tableAliases = StatsUtils.getAllTableAlias(parent.getColumnExprMap()); - for (String tabAlias : tableAliases) { - rowCountParents.put(tabAlias, parentStats.getNumRows()); - } + keyExprs = StatsUtils.getQualifedReducerKeyNames(parent.getConf() + .getOutputKeyColumnNames()); + + rowCountParents.put(pos, parentStats.getNumRows()); rowCounts.add(parentStats.getNumRows()); - // compute fully qualified join key column names. this name will be - // used to quickly look-up for column statistics of join key. - // TODO: expressions in join condition will be ignored. assign // internal name for expressions and estimate column statistics for expression. - List<String> fqCols = StatsUtils.getFullyQualifedReducerKeyNames(keyExprs, - parent.getColumnExprMap()); - joinKeys.put(pos, fqCols); + joinKeys.put(pos, keyExprs); // get column statistics for all output columns - for (ColStatistics cs : parentStats.getColumnStats()) { - joinedColStats.put(cs.getFullyQualifiedColName(), cs); - } + joinStats.put(pos, parentStats); // since new statistics is derived from all relations involved in // JOIN, we need to update the state information accordingly @@ -1116,12 +1093,11 @@ public class StatsRulesProcFactory { for (int idx = 0; idx < numAttr; idx++) { for (Integer i : joinKeys.keySet()) { String col = joinKeys.get(i).get(idx); - ColStatistics cs = joinedColStats.get(col); + ColStatistics cs = joinStats.get(i).getColumnStatisticsFromColName(col); if (cs != null) { perAttrDVs.add(cs.getCountDistint()); } } - distinctVals.add(getDenominator(perAttrDVs)); perAttrDVs.clear(); } @@ -1136,9 +1112,10 @@ public class StatsRulesProcFactory { } } } else { - for (List<String> jkeys : joinKeys.values()) { - for (String jk : jkeys) { - ColStatistics cs = joinedColStats.get(jk); + if (numAttr == 1) { + for (Integer i : joinKeys.keySet()) { + String col = joinKeys.get(i).get(0); + ColStatistics cs = joinStats.get(i).getColumnStatisticsFromColName(col); if (cs != null) { distinctVals.add(cs.getCountDistint()); } @@ -1148,28 +1125,23 @@ public class StatsRulesProcFactory { } // Update NDV of joined columns to be min(V(R,y), V(S,y)) - updateJoinColumnsNDV(joinKeys, joinedColStats, numAttr); + updateJoinColumnsNDV(joinKeys, joinStats, numAttr); - // column statistics from different sources are put together and rename - // fully qualified column names based on output schema of join operator + // column statistics from different sources are put together and + // rename based on output schema of join operator Map<String, ExprNodeDesc> colExprMap = jop.getColumnExprMap(); RowSchema rs = jop.getSchema(); List<ColStatistics> outColStats = Lists.newArrayList(); - Map<String, String> outInTabAlias = new HashMap<String, String>(); for (ColumnInfo ci : rs.getSignature()) { String key = ci.getInternalName(); ExprNodeDesc end = colExprMap.get(key); if (end instanceof ExprNodeColumnDesc) { String colName = ((ExprNodeColumnDesc) end).getColumn(); - String tabAlias = ((ExprNodeColumnDesc) end).getTabAlias(); - String fqColName = StatsUtils.getFullyQualifiedColumnName(tabAlias, colName); - ColStatistics cs = joinedColStats.get(fqColName); + int pos = jop.getConf().getReversedExprs().get(key); + ColStatistics cs = joinStats.get(pos).getColumnStatisticsFromColName(colName); String outColName = key; - String outTabAlias = ci.getTabAlias(); - outInTabAlias.put(outTabAlias, tabAlias); if (cs != null) { cs.setColumnName(outColName); - cs.setTableAlias(outTabAlias); } outColStats.add(cs); } @@ -1178,7 +1150,7 @@ public class StatsRulesProcFactory { // update join statistics stats.setColumnStats(outColStats); long newRowCount = pkfkInferred ? newNumRows : computeNewRowCount(rowCounts, denom); - updateStatsForJoinType(stats, newRowCount, jop, rowCountParents,outInTabAlias); + updateStatsForJoinType(stats, newRowCount, jop, rowCountParents); jop.setStatistics(stats); if (isDebugEnabled) { @@ -1364,13 +1336,11 @@ public class StatsRulesProcFactory { Operator<? extends OperatorDesc> op = ops.get(i); if (op != null && op instanceof ReduceSinkOperator) { ReduceSinkOperator rsOp = (ReduceSinkOperator) op; - List<String> keys = rsOp.getConf().getOutputKeyColumnNames(); - List<String> fqCols = StatsUtils.getFullyQualifedReducerKeyNames(keys, - rsOp.getColumnExprMap()); - if (fqCols.size() == 1) { - String joinCol = fqCols.get(0); + List<String> keys = StatsUtils.getQualifedReducerKeyNames(rsOp.getConf().getOutputKeyColumnNames()); + if (keys.size() == 1) { + String joinCol = keys.get(0); if (rsOp.getStatistics() != null) { - ColStatistics cs = rsOp.getStatistics().getColumnStatisticsFromFQColName(joinCol); + ColStatistics cs = rsOp.getStatistics().getColumnStatisticsFromColName(joinCol); if (cs != null && !cs.isPrimaryKey()) { if (StatsUtils.inferForeignKey(csPK, cs)) { result.add(i); @@ -1395,13 +1365,11 @@ public class StatsRulesProcFactory { Operator<? extends OperatorDesc> op = ops.get(i); if (op instanceof ReduceSinkOperator) { ReduceSinkOperator rsOp = (ReduceSinkOperator) op; - List<String> keys = rsOp.getConf().getOutputKeyColumnNames(); - List<String> fqCols = StatsUtils.getFullyQualifedReducerKeyNames(keys, - rsOp.getColumnExprMap()); - if (fqCols.size() == 1) { - String joinCol = fqCols.get(0); + List<String> keys = StatsUtils.getQualifedReducerKeyNames(rsOp.getConf().getOutputKeyColumnNames()); + if (keys.size() == 1) { + String joinCol = keys.get(0); if (rsOp.getStatistics() != null) { - ColStatistics cs = rsOp.getStatistics().getColumnStatisticsFromFQColName(joinCol); + ColStatistics cs = rsOp.getStatistics().getColumnStatisticsFromColName(joinCol); if (cs != null && cs.isPrimaryKey()) { result.add(i); } @@ -1429,13 +1397,17 @@ public class StatsRulesProcFactory { private void updateStatsForJoinType(Statistics stats, long newNumRows, CommonJoinOperator<? extends JoinDesc> jop, - Map<String, Long> rowCountParents, - Map<String, String> outInTabAlias) { + Map<Integer, Long> rowCountParents) { if (newNumRows < 0) { LOG.info("STATS-" + jop.toString() + ": Overflow in number of rows." + newNumRows + " rows will be set to Long.MAX_VALUE"); } + if (newNumRows == 0) { + LOG.info("STATS-" + jop.toString() + ": Equals 0 in number of rows." + + newNumRows + " rows will be set to 1"); + newNumRows = 1; + } newNumRows = StatsUtils.getMaxIfOverflow(newNumRows); stats.setNumRows(newNumRows); @@ -1447,7 +1419,8 @@ public class StatsRulesProcFactory { // and stats for columns from 2nd parent should be scaled down by 200x List<ColStatistics> colStats = stats.getColumnStats(); for (ColStatistics cs : colStats) { - long oldRowCount = rowCountParents.get(outInTabAlias.get(cs.getTableAlias())); + int pos = jop.getConf().getReversedExprs().get(cs.getColumnName()); + long oldRowCount = rowCountParents.get(pos); double ratio = (double) newNumRows / (double) oldRowCount; long oldDV = cs.getCountDistint(); long newDV = oldDV; @@ -1499,15 +1472,16 @@ public class StatsRulesProcFactory { } private void updateJoinColumnsNDV(Map<Integer, List<String>> joinKeys, - Map<String, ColStatistics> joinedColStats, int numAttr) { + Map<Integer, Statistics> joinStats, int numAttr) { int joinColIdx = 0; while (numAttr > 0) { long minNDV = Long.MAX_VALUE; // find min NDV for joining columns for (Map.Entry<Integer, List<String>> entry : joinKeys.entrySet()) { + int pos = entry.getKey(); String key = entry.getValue().get(joinColIdx); - ColStatistics cs = joinedColStats.get(key); + ColStatistics cs = joinStats.get(pos).getColumnStatisticsFromColName(key); if (cs != null && cs.getCountDistint() < minNDV) { minNDV = cs.getCountDistint(); } @@ -1516,8 +1490,9 @@ public class StatsRulesProcFactory { // set min NDV value to both columns involved in join if (minNDV != Long.MAX_VALUE) { for (Map.Entry<Integer, List<String>> entry : joinKeys.entrySet()) { + int pos = entry.getKey(); String key = entry.getValue().get(joinColIdx); - ColStatistics cs = joinedColStats.get(key); + ColStatistics cs = joinStats.get(pos).getColumnStatisticsFromColName(key); if (cs != null) { cs.setCountDistint(minNDV); } @@ -1810,9 +1785,14 @@ public class StatsRulesProcFactory { if (newNumRows < 0) { LOG.info("STATS-" + op.toString() + ": Overflow in number of rows." + newNumRows + " rows will be set to Long.MAX_VALUE"); + newNumRows = StatsUtils.getMaxIfOverflow(newNumRows); + } + if (newNumRows == 0) { + LOG.info("STATS-" + op.toString() + ": Equals 0 in number of rows." + + newNumRows + " rows will be set to 1"); + newNumRows = 1; } - newNumRows = StatsUtils.getMaxIfOverflow(newNumRows); long oldRowCount = stats.getNumRows(); double ratio = (double) newNumRows / (double) oldRowCount; stats.setNumRows(newNumRows);
http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java index 0a83440..bc34710 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java @@ -27,7 +27,7 @@ import org.apache.hadoop.hive.ql.plan.Explain.Level; public class AbstractOperatorDesc implements OperatorDesc { protected boolean vectorMode = false; - protected transient Statistics statistics; + protected Statistics statistics; protected transient OpTraits opTraits; protected transient Map<String, String> opProps; http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java index c420190..41a1c7a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColStatistics.java @@ -23,10 +23,8 @@ import org.apache.hadoop.hive.ql.stats.StatsUtils; public class ColStatistics { - private String tabAlias; private String colName; private String colType; - private String fqColName; private long countDistint; private long numNulls; private double avgColLen; @@ -35,16 +33,14 @@ public class ColStatistics { private Range range; private boolean isPrimaryKey; - public ColStatistics(String tabAlias, String colName, String colType) { - this.setTableAlias(tabAlias); + public ColStatistics(String colName, String colType) { this.setColumnName(colName); this.setColumnType(colType); - this.setFullyQualifiedColName(StatsUtils.getFullyQualifiedColumnName(tabAlias, colName)); this.setPrimaryKey(false); } public ColStatistics() { - this(null, null, null); + this(null, null); } public String getColumnName() { @@ -53,7 +49,6 @@ public class ColStatistics { public void setColumnName(String colName) { this.colName = colName; - this.fqColName = StatsUtils.getFullyQualifiedColumnName(tabAlias, colName); } public String getColumnType() { @@ -88,23 +83,6 @@ public class ColStatistics { this.avgColLen = avgColLen; } - public String getFullyQualifiedColName() { - return fqColName; - } - - public void setFullyQualifiedColName(String fqColName) { - this.fqColName = fqColName; - } - - public String getTableAlias() { - return tabAlias; - } - - public void setTableAlias(String tabName) { - this.tabAlias = tabName; - this.fqColName = StatsUtils.getFullyQualifiedColumnName(tabName, colName); - } - public long getNumTrues() { return numTrues; } @@ -136,8 +114,6 @@ public class ColStatistics { @Override public String toString() { StringBuilder sb = new StringBuilder(); - sb.append(" fqColName: "); - sb.append(fqColName); sb.append(" colName: "); sb.append(colName); sb.append(" colType: "); @@ -163,8 +139,7 @@ public class ColStatistics { @Override public ColStatistics clone() throws CloneNotSupportedException { - ColStatistics clone = new ColStatistics(tabAlias, colName, colType); - clone.setFullyQualifiedColName(fqColName); + ColStatistics clone = new ColStatistics(colName, colType); clone.setAvgColLen(avgColLen); clone.setCountDistint(countDistint); clone.setNumNulls(numNulls); @@ -189,7 +164,7 @@ public class ColStatistics { public final Number minValue; public final Number maxValue; - Range(Number minValue, Number maxValue) { + public Range(Number minValue, Number maxValue) { super(); this.minValue = minValue; this.maxValue = maxValue; http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java index f66279f..4e52bac 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java @@ -176,7 +176,7 @@ public class Statistics implements Serializable { ColStatistics updatedCS = null; if (cs != null) { - String key = cs.getFullyQualifiedColName(); + String key = cs.getColumnName(); // if column statistics for a column is already found then merge the statistics if (columnStats.containsKey(key) && columnStats.get(key) != null) { updatedCS = columnStats.get(key); @@ -230,13 +230,6 @@ public class Statistics implements Serializable { return dataSize; } - public ColStatistics getColumnStatisticsFromFQColName(String fqColName) { - if (columnStats != null) { - return columnStats.get(fqColName); - } - return null; - } - public ColStatistics getColumnStatisticsFromColName(String colName) { if (columnStats == null) { return null; @@ -249,16 +242,10 @@ public class Statistics implements Serializable { return null; } - public ColStatistics getColumnStatisticsForColumn(String tabAlias, String colName) { - String fqColName = StatsUtils.getFullyQualifiedColumnName(tabAlias, colName); - return getColumnStatisticsFromFQColName(fqColName); - } - public List<ColStatistics> getColumnStats() { if (columnStats != null) { return Lists.newArrayList(columnStats.values()); } return null; } - } http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 10871e4..4cd9120 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.ColStatistics.Range; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; @@ -170,6 +171,9 @@ public class StatsUtils { nr = ds / avgRowSize; } } + if (nr == 0) { + nr = 1; + } stats.setNumRows(nr); stats.setDataSize(ds); @@ -226,6 +230,9 @@ public class StatsUtils { nr = ds / avgRowSize; } } + if (nr == 0) { + nr = 1; + } stats.addToNumRows(nr); stats.addToDataSize(ds); @@ -239,8 +246,7 @@ public class StatsUtils { for (Partition part : partList.getNotDeniedPartns()) { partNames.add(part.getName()); } - Map<String, String> colToTabAlias = new HashMap<String, String>(); - neededColumns = processNeededColumns(schema, neededColumns, colToTabAlias); + neededColumns = processNeededColumns(schema, neededColumns); AggrStats aggrStats = Hive.get().getAggrColStatsFor(table.getDbName(), table.getTableName(), neededColumns, partNames); if (null == aggrStats) { @@ -261,8 +267,7 @@ public class StatsUtils { LOG.debug("Column stats requested for : " + neededColumns.size() + " columns. Able to" + " retrieve for " + colStats.size() + " columns"); } - List<ColStatistics> columnStats = convertColStats(colStats, table.getTableName(), - colToTabAlias); + List<ColStatistics> columnStats = convertColStats(colStats, table.getTableName()); addParitionColumnStats(conf, neededColumns, referencedColumns, schema, table, partList, columnStats); @@ -354,13 +359,15 @@ public class StatsUtils { // currently metastore does not store column stats for // partition column, so we calculate the NDV from pruned // partition list - ColStatistics partCS = new ColStatistics(table.getTableName(), - ci.getInternalName(), ci.getType().getTypeName()); + ColStatistics partCS = new ColStatistics(ci.getInternalName(), ci.getType() + .getTypeName()); long numPartitions = getNDVPartitionColumn(partList.getPartitions(), ci.getInternalName()); partCS.setCountDistint(numPartitions); partCS.setAvgColLen(StatsUtils.getAvgColLenOfVariableLengthTypes(conf, ci.getObjectInspector(), partCS.getColumnType())); + partCS.setRange(getRangePartitionColumn(partList.getPartitions(), ci.getInternalName(), + ci.getType().getTypeName())); colStats.add(partCS); } } @@ -376,6 +383,47 @@ public class StatsUtils { return distinctVals.size(); } + public static Range getRangePartitionColumn(Set<Partition> partitions, String partColName, + String colType) { + Range range = null; + if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.SMALLINT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.INT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) { + long min = Long.MAX_VALUE; + long max = Long.MIN_VALUE; + for (Partition partition : partitions) { + long value = Long.parseLong(partition.getSpec().get(partColName)); + min = Math.min(min, value); + max = Math.max(max, value); + } + range = new Range(min, max); + } else if (colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME) + || colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) { + double min = Double.MAX_VALUE; + double max = Double.MIN_VALUE; + for (Partition partition : partitions) { + double value = Double.parseDouble(partition.getSpec().get(partColName)); + min = Math.min(min, value); + max = Math.max(max, value); + } + range = new Range(min, max); + } else if (colType.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) { + double min = Double.MAX_VALUE; + double max = Double.MIN_VALUE; + for (Partition partition : partitions) { + double value = new BigDecimal(partition.getSpec().get(partColName)).doubleValue(); + min = Math.min(min, value); + max = Math.max(max, value); + } + range = new Range(min, max); + } else { + // Columns statistics for complex datatypes are not supported yet + return null; + } + return range; + } + private static void setUnknownRcDsToAverage( List<Long> rowCounts, List<Long> dataSizes, int avgRowSize) { if (LOG.isDebugEnabled()) { @@ -531,7 +579,7 @@ public class StatsUtils { */ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tabName, String colName) { - ColStatistics cs = new ColStatistics(tabName, colName, cso.getColType()); + ColStatistics cs = new ColStatistics(colName, cso.getColType()); String colType = cso.getColType(); ColumnStatisticsData csd = cso.getStatsData(); if (colType.equalsIgnoreCase(serdeConstants.TINYINT_TYPE_NAME) @@ -612,13 +660,12 @@ public class StatsUtils { Table table, List<ColumnInfo> schema, List<String> neededColumns) { String dbName = table.getDbName(); String tabName = table.getTableName(); - Map<String, String> colToTabAlias = new HashMap<String, String>(schema.size()); - List<String> neededColsInTable = processNeededColumns(schema, neededColumns, colToTabAlias); + List<String> neededColsInTable = processNeededColumns(schema, neededColumns); List<ColStatistics> stats = null; try { List<ColumnStatisticsObj> colStat = Hive.get().getTableColumnStatistics( dbName, tabName, neededColsInTable); - stats = convertColStats(colStat, tabName, colToTabAlias); + stats = convertColStats(colStat, tabName); } catch (HiveException e) { LOG.error("Failed to retrieve table statistics: ", e); stats = null; @@ -626,35 +673,29 @@ public class StatsUtils { return stats; } - private static List<ColStatistics> convertColStats(List<ColumnStatisticsObj> colStats, String tabName, - Map<String,String> colToTabAlias) { + private static List<ColStatistics> convertColStats(List<ColumnStatisticsObj> colStats, String tabName) { List<ColStatistics> stats = new ArrayList<ColStatistics>(colStats.size()); for (ColumnStatisticsObj statObj : colStats) { ColStatistics cs = getColStatistics(statObj, tabName, statObj.getColName()); - cs.setTableAlias(colToTabAlias.get(cs.getColumnName())); stats.add(cs); } return stats; } private static List<String> processNeededColumns(List<ColumnInfo> schema, - List<String> neededColumns, Map<String, String> colToTabAlias) { - for (ColumnInfo col : schema) { - if (col.isHiddenVirtualCol()) continue; - colToTabAlias.put(col.getInternalName(), col.getTabAlias()); - } + List<String> neededColumns) { // Remove hidden virtual columns, as well as needed columns that are not // part of the table. TODO: the latter case should not really happen... List<String> neededColsInTable = null; int limit = neededColumns.size(); for (int i = 0; i < limit; ++i) { - if (colToTabAlias.containsKey(neededColumns.get(i))) continue; if (neededColsInTable == null) { neededColsInTable = Lists.newArrayList(neededColumns); } neededColsInTable.remove(i--); --limit; } - return (neededColsInTable == null) ? neededColumns : neededColsInTable; + return (neededColsInTable == null || neededColsInTable.size() == 0) ? neededColumns + : neededColsInTable; } /** @@ -1012,12 +1053,10 @@ public class StatsUtils { if (colExprMap != null && rowSchema != null) { for (ColumnInfo ci : rowSchema.getSignature()) { String outColName = ci.getInternalName(); - String outTabAlias = ci.getTabAlias(); ExprNodeDesc end = colExprMap.get(outColName); ColStatistics colStat = getColStatisticsFromExpression(conf, parentStats, end); if (colStat != null) { colStat.setColumnName(outColName); - colStat.setTableAlias(outTabAlias); cs.add(colStat); } } @@ -1058,10 +1097,6 @@ public class StatsUtils { colStat = null; } if (colStat != null) { - ColumnInfo ci = rowSchema.getColumnInfo(colStat.getColumnName()); - if (ci != null) { - colStat.setTableAlias(ci.getTabAlias()); - } cs.add(colStat); } } @@ -1093,13 +1128,11 @@ public class StatsUtils { long numNulls = 0; ObjectInspector oi = null; long numRows = parentStats.getNumRows(); - String tabAlias = null; if (end instanceof ExprNodeColumnDesc) { // column projection ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end; colName = encd.getColumn(); - tabAlias = encd.getTabAlias(); if (encd.getIsPartitionColOrVirtualCol()) { @@ -1116,7 +1149,7 @@ public class StatsUtils { } else { // clone the column stats and return - ColStatistics result = parentStats.getColumnStatisticsForColumn(tabAlias, colName); + ColStatistics result = parentStats.getColumnStatisticsFromColName(colName); if (result != null) { try { return result.clone(); @@ -1181,7 +1214,7 @@ public class StatsUtils { avgColSize = getAvgColLenOfFixedLengthTypes(colType); } - ColStatistics colStats = new ColStatistics(tabAlias, colName, colType); + ColStatistics colStats = new ColStatistics(colName, colType); colStats.setAvgColLen(avgColSize); colStats.setCountDistint(countDistincts); colStats.setNumNulls(numNulls); @@ -1316,40 +1349,6 @@ public class StatsUtils { return result; } - /** - * Returns fully qualified name of column - * @param tabName - * @param colName - * @return - */ - public static String getFullyQualifiedColumnName(String tabName, String colName) { - return getFullyQualifiedName(null, tabName, colName); - } - - /** - * Returns fully qualified name of column - * @param dbName - * @param tabName - * @param colName - * @return - */ - public static String getFullyQualifiedColumnName(String dbName, String tabName, String colName) { - return getFullyQualifiedName(dbName, tabName, colName); - } - - /** - * Returns fully qualified name of column - * @param dbName - * @param tabName - * @param partName - * @param colName - * @return - */ - public static String getFullyQualifiedColumnName(String dbName, String tabName, String partName, - String colName) { - return getFullyQualifiedName(dbName, tabName, partName, colName); - } - public static String getFullyQualifiedTableName(String dbName, String tabName) { return getFullyQualifiedName(dbName, tabName); } @@ -1365,80 +1364,21 @@ public class StatsUtils { } /** - * Get fully qualified column name from output key column names and column expression map + * Get qualified column name from output key column names * @param keyExprs * - output key names - * @param map - * - column expression map - * @return list of fully qualified names + * @return list of qualified names */ - public static List<String> getFullyQualifedReducerKeyNames(List<String> keyExprs, - Map<String, ExprNodeDesc> map) { + public static List<String> getQualifedReducerKeyNames(List<String> keyExprs) { List<String> result = Lists.newArrayList(); if (keyExprs != null) { for (String key : keyExprs) { - String colName = key; - ExprNodeDesc end = map.get(colName); - // if we couldn't get expression try prepending "KEY." prefix to reducer key column names - if (end == null) { - colName = Utilities.ReduceField.KEY.toString() + "." + key; - end = map.get(colName); - if (end == null) { - continue; - } - } - if (end instanceof ExprNodeColumnDesc) { - ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end; - String tabAlias = encd.getTabAlias(); - result.add(getFullyQualifiedColumnName(tabAlias, colName)); - } else if (end instanceof ExprNodeGenericFuncDesc) { - ExprNodeGenericFuncDesc enf = (ExprNodeGenericFuncDesc) end; - String tabAlias = ""; - for (ExprNodeDesc childEnd : enf.getChildren()) { - if (childEnd instanceof ExprNodeColumnDesc) { - tabAlias = ((ExprNodeColumnDesc) childEnd).getTabAlias(); - break; - } - } - result.add(getFullyQualifiedColumnName(tabAlias, colName)); - } else if (end instanceof ExprNodeConstantDesc) { - ExprNodeConstantDesc encd = (ExprNodeConstantDesc) end; - result.add(encd.getValue().toString()); - } + result.add(Utilities.ReduceField.KEY.toString() + "." + key); } } return result; } - /** - * Returns all table aliases from expression nodes - * @param columnExprMap - column expression map - * @return - */ - public static Set<String> getAllTableAlias( - Map<String, ExprNodeDesc> columnExprMap) { - Set<String> result = new HashSet<String>(); - if (columnExprMap != null) { - for (ExprNodeDesc end : columnExprMap.values()) { - getTableAliasFromExprNode(end, result); - } - } - return result; - } - - private static void getTableAliasFromExprNode(ExprNodeDesc end, - Set<String> output) { - - if (end instanceof ExprNodeColumnDesc) { - output.add(((ExprNodeColumnDesc) end).getTabAlias()); - } else if (end instanceof ExprNodeGenericFuncDesc) { - for (ExprNodeDesc child : end.getChildren()) { - getTableAliasFromExprNode(child, output); - } - } - - } - public static long getAvailableMemory(Configuration conf) { int memory = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVETEZCONTAINERSIZE); if (memory <= 0) { http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/test/results/clientpositive/annotate_stats_filter.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out index aa66bc6..492e302 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -439,14 +439,14 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: false (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -563,14 +563,14 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: false (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -601,14 +601,14 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: false (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -639,14 +639,14 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: false (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/test/results/clientpositive/annotate_stats_limit.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/annotate_stats_limit.q.out b/ql/src/test/results/clientpositive/annotate_stats_limit.q.out index 5f8b6f8..7300ea0 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_limit.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_limit.q.out @@ -160,6 +160,6 @@ STAGE PLANS: Statistics: Num rows: 8 Data size: 804 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE ListSink http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/test/results/clientpositive/annotate_stats_part.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/annotate_stats_part.q.out b/ql/src/test/results/clientpositive/annotate_stats_part.q.out index 241192b..fc65ef7 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_part.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_part.q.out @@ -56,11 +56,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL ListSink PREHOOK: query: insert overwrite table loc_orc partition(year) select * from loc_staging @@ -287,14 +287,14 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL Filter Operator predicate: ((year = '2001') and (year = '__HIVE_DEFAULT_PARTITION__')) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL ListSink PREHOOK: query: -- partition level partial column statistics http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/test/results/clientpositive/annotate_stats_select.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/annotate_stats_select.q.out b/ql/src/test/results/clientpositive/annotate_stats_select.q.out index 753ab4e..306b870 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_select.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_select.q.out @@ -1143,14 +1143,14 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not bo1) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: bo1 (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/test/results/clientpositive/annotate_stats_table.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/annotate_stats_table.q.out b/ql/src/test/results/clientpositive/annotate_stats_table.q.out index 9bf82ac..9221ba8 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_table.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_table.q.out @@ -44,11 +44,11 @@ STAGE PLANS: Processor Tree: TableScan alias: emp_orc - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: lastname (type: string), deptid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE ListSink PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/test/results/clientpositive/auto_join30.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join30.q.out b/ql/src/test/results/clientpositive/auto_join30.q.out index b068493..5437b7f 100644 --- a/ql/src/test/results/clientpositive/auto_join30.q.out +++ b/ql/src/test/results/clientpositive/auto_join30.q.out @@ -84,10 +84,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -144,10 +146,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -330,10 +334,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -535,10 +541,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -767,10 +775,12 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -840,10 +850,12 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -891,10 +903,12 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1143,10 +1157,12 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1216,10 +1232,12 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1460,10 +1478,12 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1726,10 +1746,12 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1992,10 +2014,12 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/test/results/clientpositive/auto_join31.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join31.q.out b/ql/src/test/results/clientpositive/auto_join31.q.out index 1e19dd0..0b25134 100644 --- a/ql/src/test/results/clientpositive/auto_join31.q.out +++ b/ql/src/test/results/clientpositive/auto_join31.q.out @@ -101,10 +101,12 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -174,10 +176,12 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col2,_col3)) mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/test/results/clientpositive/auto_join32.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join32.q.out b/ql/src/test/results/clientpositive/auto_join32.q.out index bfc8be8..f862870 100644 --- a/ql/src/test/results/clientpositive/auto_join32.q.out +++ b/ql/src/test/results/clientpositive/auto_join32.q.out @@ -42,10 +42,10 @@ STAGE PLANS: s TableScan alias: s - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: name is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE HashTable Sink Operator keys: 0 name (type: string) @@ -56,10 +56,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: name is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -67,18 +67,18 @@ STAGE PLANS: 0 name (type: string) 1 name (type: string) outputColumnNames: _col0, _col8 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator aggregations: count(DISTINCT _col8) keys: _col0 (type: string), _col8 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -87,10 +87,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -156,10 +156,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: name is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -263,10 +263,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: name is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 @@ -394,14 +394,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: v - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: ((p = 'bar') and name is not null) (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: name (type: string), registration (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/test/results/clientpositive/auto_join_stats.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join_stats.q.out b/ql/src/test/results/clientpositive/auto_join_stats.q.out index 9100762..d75d6c4 100644 --- a/ql/src/test/results/clientpositive/auto_join_stats.q.out +++ b/ql/src/test/results/clientpositive/auto_join_stats.q.out @@ -57,8 +57,10 @@ STAGE PLANS: src2 TableScan alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 key (type: string) @@ -69,8 +71,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -78,8 +82,10 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) outputColumnNames: _col0, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -144,8 +150,10 @@ STAGE PLANS: src1 TableScan alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 key (type: string) @@ -156,8 +164,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -165,8 +175,10 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) outputColumnNames: _col0, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -291,8 +303,10 @@ STAGE PLANS: src2 TableScan alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 key (type: string) @@ -303,8 +317,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -312,8 +328,10 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) outputColumnNames: _col0, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -347,10 +365,10 @@ STAGE PLANS: smalltable2 TableScan alias: smalltable2 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 (_col0 + _col5) (type: double) @@ -403,8 +421,10 @@ STAGE PLANS: src1 TableScan alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 key (type: string) @@ -415,8 +435,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 @@ -424,8 +446,10 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) outputColumnNames: _col0, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col0 + _col5) is not null (type: boolean) + Statistics: Num rows: 138 Data size: 1465 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: http://git-wip-us.apache.org/repos/asf/hive/blob/d823fc80/ql/src/test/results/clientpositive/auto_join_stats2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/auto_join_stats2.q.out b/ql/src/test/results/clientpositive/auto_join_stats2.q.out index ed09875..a0aefa3 100644 --- a/ql/src/test/results/clientpositive/auto_join_stats2.q.out +++ b/ql/src/test/results/clientpositive/auto_join_stats2.q.out @@ -43,10 +43,10 @@ STAGE PLANS: smalltable TableScan alias: smalltable - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 (_col0 + _col5) (type: double) @@ -184,10 +184,10 @@ STAGE PLANS: smalltable2 TableScan alias: smalltable2 - Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 (_col0 + _col5) (type: double)