Author: hashutosh
Date: Mon Apr 13 18:10:37 2015
New Revision: 1673250

URL: http://svn.apache.org/r1673250
Log:
HIVE-10315 : CBO (Calcite Return Path): HiveRelSize accessing columns without 
available stats  [CBO branch] (Jesus Camacho Rodriguez)

Modified:
    
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
    
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java

Modified: 
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
URL: 
http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java?rev=1673250&r1=1673249&r2=1673250&view=diff
==============================================================================
--- 
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
 (original)
+++ 
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
 Mon Apr 13 18:10:37 2015
@@ -61,6 +61,7 @@ import org.apache.hadoop.hive.ql.stats.S
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
 
 public class RelOptHiveTable extends RelOptAbstractTable {
   private final Table                             hiveTblMetadata;
@@ -259,7 +260,7 @@ public class RelOptHiveTable extends Rel
     }
   }
 
-  private void updateColStats(Set<Integer> projIndxLst) {
+  private void updateColStats(Set<Integer> projIndxLst, boolean 
allowNullColumnForMissingStats) {
     List<String> nonPartColNamesThatRqrStats = new ArrayList<String>();
     List<Integer> nonPartColIndxsThatRqrStats = new ArrayList<Integer>();
     List<String> partColNamesThatRqrStats = new ArrayList<String>();
@@ -372,9 +373,13 @@ public class RelOptHiveTable extends Rel
     if (!colNamesFailedStats.isEmpty()) {
       String logMsg = "No Stats for " + hiveTblMetadata.getCompleteName() + ", 
Columns: "
           + getColNamesForLogging(colNamesFailedStats);
-      LOG.error(logMsg);
       noColsMissingStats.getAndAdd(colNamesFailedStats.size());
-      throw new RuntimeException(logMsg);
+      if (allowNullColumnForMissingStats) {
+        LOG.warn(logMsg);
+      } else {
+        LOG.error(logMsg);
+        throw new RuntimeException(logMsg);
+      }
     }
   }
 
@@ -387,10 +392,14 @@ public class RelOptHiveTable extends Rel
   }
 
   public List<ColStatistics> getColStat(List<Integer> projIndxLst) {
-    ImmutableList.Builder<ColStatistics> colStatsBldr = 
ImmutableList.<ColStatistics> builder();
+    return getColStat(projIndxLst, false);
+  }
+
+  public List<ColStatistics> getColStat(List<Integer> projIndxLst, boolean 
allowNullColumnForMissingStats) {
+    List<ColStatistics> colStatsBldr = Lists.newArrayList();
 
     if (projIndxLst != null) {
-      updateColStats(new HashSet<Integer>(projIndxLst));
+      updateColStats(new HashSet<Integer>(projIndxLst), 
allowNullColumnForMissingStats);
       for (Integer i : projIndxLst) {
         colStatsBldr.add(hiveColStatsMap.get(i));
       }
@@ -399,13 +408,13 @@ public class RelOptHiveTable extends Rel
       for (Integer i = 0; i < noOfNonVirtualCols; i++) {
         pILst.add(i);
       }
-      updateColStats(new HashSet<Integer>(pILst));
+      updateColStats(new HashSet<Integer>(pILst), 
allowNullColumnForMissingStats);
       for (Integer pi : pILst) {
         colStatsBldr.add(hiveColStatsMap.get(pi));
       }
     }
 
-    return colStatsBldr.build();
+    return colStatsBldr;
   }
 
   /*

Modified: 
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java
URL: 
http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java?rev=1673250&r1=1673249&r2=1673250&view=diff
==============================================================================
--- 
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java
 (original)
+++ 
hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSize.java
 Mon Apr 13 18:10:37 2015
@@ -17,9 +17,7 @@
  */
 package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
 
-import java.util.HashSet;
 import java.util.List;
-import java.util.Set;
 
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
@@ -54,20 +52,19 @@ public class HiveRelMdSize extends RelMd
 
   public List<Double> averageColumnSizes(HiveTableScan scan) {
     List<Integer> neededcolsLst = scan.getNeededColIndxsFrmReloptHT();
-    Set<Integer> needColsSet = new HashSet<Integer>(neededcolsLst);
     List<ColStatistics> columnStatistics = ((RelOptHiveTable) scan.getTable())
-        .getColStat(neededcolsLst);
+        .getColStat(neededcolsLst, true);
 
     // Obtain list of col stats, or use default if they are not available
     final ImmutableList.Builder<Double> list = ImmutableList.builder();
     int indxRqdCol = 0;
     int nFields = scan.getRowType().getFieldCount();
     for (int i = 0; i < nFields; i++) {
-      if (needColsSet.contains(i)) {
+      if (neededcolsLst.contains(i)) {
         ColStatistics columnStatistic = columnStatistics.get(indxRqdCol);
         indxRqdCol++;
         if (columnStatistic == null) {
-          RelDataTypeField field = 
scan.getPrunedRowType().getFieldList().get(i);
+          RelDataTypeField field = scan.getRowType().getFieldList().get(i);
           list.add(averageTypeValueSize(field.getType()));
         } else {
           list.add(columnStatistic.getAvgColLen());


Reply via email to