Author: hashutosh Date: Mon Apr 13 18:10:37 2015 New Revision: 1673250 URL: http://svn.apache.org/r1673250 Log: HIVE-10315 : CBO (Calcite Return Path): HiveRelSize accessing columns without available stats [CBO branch] (Jesus Camacho Rodriguez)
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java?rev=1673250&r1=1673249&r2=1673250&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java Mon Apr 13 18:10:37 2015 @@ -61,6 +61,7 @@ import org.apache.hadoop.hive.ql.stats.S import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; public class RelOptHiveTable extends RelOptAbstractTable { private final Table hiveTblMetadata; @@ -259,7 +260,7 @@ public class RelOptHiveTable extends Rel } } - private void updateColStats(Set<Integer> projIndxLst) { + private void updateColStats(Set<Integer> projIndxLst, boolean allowNullColumnForMissingStats) { List<String> nonPartColNamesThatRqrStats = new ArrayList<String>(); List<Integer> nonPartColIndxsThatRqrStats = new ArrayList<Integer>(); List<String> partColNamesThatRqrStats = new ArrayList<String>(); @@ -372,9 +373,13 @@ public class RelOptHiveTable extends Rel if (!colNamesFailedStats.isEmpty()) { String logMsg = "No Stats for " + hiveTblMetadata.getCompleteName() + ", Columns: " + getColNamesForLogging(colNamesFailedStats); - LOG.error(logMsg); noColsMissingStats.getAndAdd(colNamesFailedStats.size()); - throw new RuntimeException(logMsg); + if (allowNullColumnForMissingStats) { + LOG.warn(logMsg); + } else { + LOG.error(logMsg); + throw new RuntimeException(logMsg); + } } } @@ -387,10 +392,14 @@ public class RelOptHiveTable extends Rel } public List<ColStatistics> getColStat(List<Integer> projIndxLst) { - ImmutableList.Builder<ColStatistics> colStatsBldr = ImmutableList.<ColStatistics> builder(); + return getColStat(projIndxLst, false); + } + + public List<ColStatistics> getColStat(List<Integer> projIndxLst, boolean allowNullColumnForMissingStats) { + List<ColStatistics> colStatsBldr = Lists.newArrayList(); if (projIndxLst != null) { - updateColStats(new HashSet<Integer>(projIndxLst)); + updateColStats(new HashSet<Integer>(projIndxLst), allowNullColumnForMissingStats); for (Integer i : projIndxLst) { colStatsBldr.add(hiveColStatsMap.get(i)); } @@ -399,13 +408,13 @@ public class RelOptHiveTable extends Rel for (Integer i = 0; i < noOfNonVirtualCols; i++) { pILst.add(i); } - updateColStats(new HashSet<Integer>(pILst)); + updateColStats(new HashSet<Integer>(pILst), allowNullColumnForMissingStats); for (Integer pi : pILst) { colStatsBldr.add(hiveColStatsMap.get(pi)); } } - return colStatsBldr.build(); + return colStatsBldr; } /* Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java?rev=1673250&r1=1673249&r2=1673250&view=diff ============================================================================== --- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java (original) +++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java Mon Apr 13 18:10:37 2015 @@ -17,9 +17,7 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.stats; -import java.util.HashSet; import java.util.List; -import java.util.Set; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; @@ -54,20 +52,19 @@ public class HiveRelMdSize extends RelMd public List<Double> averageColumnSizes(HiveTableScan scan) { List<Integer> neededcolsLst = scan.getNeededColIndxsFrmReloptHT(); - Set<Integer> needColsSet = new HashSet<Integer>(neededcolsLst); List<ColStatistics> columnStatistics = ((RelOptHiveTable) scan.getTable()) - .getColStat(neededcolsLst); + .getColStat(neededcolsLst, true); // Obtain list of col stats, or use default if they are not available final ImmutableList.Builder<Double> list = ImmutableList.builder(); int indxRqdCol = 0; int nFields = scan.getRowType().getFieldCount(); for (int i = 0; i < nFields; i++) { - if (needColsSet.contains(i)) { + if (neededcolsLst.contains(i)) { ColStatistics columnStatistic = columnStatistics.get(indxRqdCol); indxRqdCol++; if (columnStatistic == null) { - RelDataTypeField field = scan.getPrunedRowType().getFieldList().get(i); + RelDataTypeField field = scan.getRowType().getFieldList().get(i); list.add(averageTypeValueSize(field.getType())); } else { list.add(columnStatistic.getAvgColLen());