Repository: hive
Updated Branches:
  refs/heads/master 47cac2d0e -> 233884620


PPD: Handle FLOAT boxing differently for single/double precision constants 
(Gopal V, reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/23388462
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/23388462
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/23388462

Branch: refs/heads/master
Commit: 233884620af67e6af72b60629f799a69f5823eb2
Parents: 47cac2d
Author: Gopal V <gop...@apache.org>
Authored: Sun Feb 11 23:02:46 2018 -0800
Committer: Gopal V <gop...@apache.org>
Committed: Sun Feb 11 23:02:46 2018 -0800

----------------------------------------------------------------------
 .../hive/ql/io/sarg/ConvertAstToSearchArg.java  | 107 ++++++++-----
 .../test/queries/clientpositive/orc_ppd_basic.q |  17 +++
 .../clientpositive/llap/orc_ppd_basic.q.out     | 153 +++++++++++++++++++
 3 files changed, 240 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/23388462/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
index 51b1ac6..27fe828 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java
@@ -23,8 +23,6 @@ import java.sql.Timestamp;
 import java.util.List;
 import java.util.concurrent.ExecutionException;
 
-import com.google.common.cache.Cache;
-import com.google.common.cache.CacheBuilder;
 import org.apache.commons.codec.binary.Base64;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.common.type.HiveChar;
@@ -60,12 +58,35 @@ import org.slf4j.LoggerFactory;
 
 import com.esotericsoftware.kryo.Kryo;
 import com.esotericsoftware.kryo.io.Input;
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
 
 public class ConvertAstToSearchArg {
   private static final Logger LOG = 
LoggerFactory.getLogger(ConvertAstToSearchArg.class);
   private final SearchArgument.Builder builder;
   private final Configuration conf;
 
+  /*
+   * Create a new type for handling precision conversions from Decimal -> 
Double/Float
+   * 
+   * The type is only relevant to boxLiteral and all other functions treat it 
identically.
+   */
+  private static enum BoxType {
+    LONG(PredicateLeaf.Type.LONG),      // all of the integer types
+    FLOAT(PredicateLeaf.Type.FLOAT),   // float
+    DOUBLE(PredicateLeaf.Type.FLOAT),   // double
+    STRING(PredicateLeaf.Type.STRING),  // string, char, varchar
+    DATE(PredicateLeaf.Type.DATE),
+    DECIMAL(PredicateLeaf.Type.DECIMAL),
+    TIMESTAMP(PredicateLeaf.Type.TIMESTAMP),
+    BOOLEAN(PredicateLeaf.Type.BOOLEAN);
+
+    public final PredicateLeaf.Type type;
+    BoxType(PredicateLeaf.Type type) {
+      this.type = type;
+    }
+  }
+
   /**
    * Builds the expression and leaf list from the original predicate.
    * @param expression the expression to translate.
@@ -89,7 +110,7 @@ public class ConvertAstToSearchArg {
    * @param expr the expression to get the type of
    * @return int, string, or float or null if we don't know the type
    */
-  private static PredicateLeaf.Type getType(ExprNodeDesc expr) {
+  private static BoxType getType(ExprNodeDesc expr) {
     TypeInfo type = expr.getTypeInfo();
     if (type.getCategory() == ObjectInspector.Category.PRIMITIVE) {
       switch (((PrimitiveTypeInfo) type).getPrimitiveCategory()) {
@@ -97,22 +118,23 @@ public class ConvertAstToSearchArg {
         case SHORT:
         case INT:
         case LONG:
-          return PredicateLeaf.Type.LONG;
+          return BoxType.LONG;
         case CHAR:
         case VARCHAR:
         case STRING:
-          return PredicateLeaf.Type.STRING;
+          return BoxType.STRING;
         case FLOAT:
+          return BoxType.FLOAT;
         case DOUBLE:
-          return PredicateLeaf.Type.FLOAT;
+          return BoxType.DOUBLE;
         case DATE:
-          return PredicateLeaf.Type.DATE;
+          return BoxType.DATE;
         case TIMESTAMP:
-          return PredicateLeaf.Type.TIMESTAMP;
+          return BoxType.TIMESTAMP;
         case DECIMAL:
-          return PredicateLeaf.Type.DECIMAL;
+          return BoxType.DECIMAL;
         case BOOLEAN:
-          return PredicateLeaf.Type.BOOLEAN;
+          return BoxType.BOOLEAN;
         default:
       }
     }
@@ -140,12 +162,12 @@ public class ConvertAstToSearchArg {
   }
 
   private static Object boxLiteral(ExprNodeConstantDesc constantDesc,
-                                   PredicateLeaf.Type type) {
+                                   BoxType boxType) {
     Object lit = constantDesc.getValue();
     if (lit == null) {
       return null;
     }
-    switch (type) {
+    switch (boxType) {
       case LONG:
         if (lit instanceof HiveDecimal) {
           HiveDecimal dec = (HiveDecimal) lit;
@@ -163,13 +185,24 @@ public class ConvertAstToSearchArg {
         } else {
           return lit.toString();
         }
+      case DOUBLE:
+        final Number dbl;
+        if (lit instanceof HiveDecimal) {
+          // HiveDecimal -> Number -> Double
+          dbl = ((HiveDecimal) lit).doubleValue();
+        } else {
+          dbl = ((Number) lit);
+        }
+        return dbl.doubleValue();
       case FLOAT:
+        final Number fl;
         if (lit instanceof HiveDecimal) {
-          // HiveDecimal -> Float -> Number -> Double
-          return ((Number)((HiveDecimal) lit).floatValue()).doubleValue();
+          // HiveDecimal -> Float -> Number
+          fl = ((Number)((HiveDecimal) lit).floatValue());
         } else {
-          return ((Number) lit).doubleValue();
+          fl = ((Number) lit);
         }
+        return fl.doubleValue();
       case TIMESTAMP:
         return Timestamp.valueOf(lit.toString());
       case DATE:
@@ -179,25 +212,25 @@ public class ConvertAstToSearchArg {
       case BOOLEAN:
         return lit;
       default:
-        throw new IllegalArgumentException("Unknown literal " + type);
+        throw new IllegalArgumentException("Unknown literal " + boxType);
     }
   }
 
   /**
    * Find the child that is the literal.
    * @param expr the parent node to check
-   * @param type the type of the expression
+   * @param boxType the type of the expression
    * @return the literal boxed if found or null
    */
   private static Object findLiteral(Configuration conf, 
ExprNodeGenericFuncDesc expr,
-                                    PredicateLeaf.Type type) {
+                                    final BoxType boxType) {
     List<ExprNodeDesc> children = expr.getChildren();
     if (children.size() != 2) {
       return null;
     }
     Object result = null;
     for(ExprNodeDesc child: children) {
-      Object currentResult = getLiteral(conf, child, type);
+      Object currentResult = getLiteral(conf, child, boxType);
       if (currentResult != null) {
         // Both children in the expression should not be literal
         if (result != null) {
@@ -209,9 +242,9 @@ public class ConvertAstToSearchArg {
     return result;
   }
 
-  private static Object getLiteral(Configuration conf, ExprNodeDesc child, 
PredicateLeaf.Type type) {
+  private static Object getLiteral(Configuration conf, ExprNodeDesc child, 
BoxType boxType) {
     if (child instanceof ExprNodeConstantDesc) {
-      return boxLiteral((ExprNodeConstantDesc) child, type);
+      return boxLiteral((ExprNodeConstantDesc) child, boxType);
     } else if (child instanceof ExprNodeDynamicValueDesc) {
       LiteralDelegate value = ((ExprNodeDynamicValueDesc) 
child).getDynamicValue();
       value.setConf(conf);
@@ -228,15 +261,15 @@ public class ConvertAstToSearchArg {
    * @return the boxed literal if found otherwise null
    */
   private static Object getLiteral(Configuration conf, ExprNodeGenericFuncDesc 
expr,
-                                   PredicateLeaf.Type type,
+                                   BoxType boxType,
                                    int position) {
     List<ExprNodeDesc> children = expr.getChildren();
     ExprNodeDesc child = children.get(position);
-    return getLiteral(conf, child, type);
+    return getLiteral(conf, child, boxType);
   }
 
   private static Object[] getLiteralList(ExprNodeGenericFuncDesc expr,
-                                         PredicateLeaf.Type type,
+                                         BoxType boxType,
                                          int start) {
     List<ExprNodeDesc> children = expr.getChildren();
     Object[] result = new Object[children.size() - start];
@@ -245,7 +278,7 @@ public class ConvertAstToSearchArg {
     int posn = 0;
     for(ExprNodeDesc child: children.subList(start, children.size())) {
       if (child instanceof ExprNodeConstantDesc) {
-        result[posn++] = boxLiteral((ExprNodeConstantDesc) child, type);
+        result[posn++] = boxLiteral((ExprNodeConstantDesc) child, boxType);
       } else {
         // if we get some non-literals, we need to punt
         return null;
@@ -262,8 +295,8 @@ public class ConvertAstToSearchArg {
       builder.literal(SearchArgument.TruthValue.YES_NO_NULL);
       return;
     }
-    PredicateLeaf.Type type = getType(expression.getChildren().get(variable));
-    if (type == null) {
+    BoxType boxType = getType(expression.getChildren().get(variable));
+    if (boxType == null) {
       builder.literal(SearchArgument.TruthValue.YES_NO_NULL);
       return;
     }
@@ -286,28 +319,28 @@ public class ConvertAstToSearchArg {
     try {
       switch (operator) {
         case IS_NULL:
-          builder.isNull(columnName, type);
+          builder.isNull(columnName, boxType.type);
           break;
         case EQUALS:
-          builder.equals(columnName, type, findLiteral(conf, expression, 
type));
+          builder.equals(columnName, boxType.type, findLiteral(conf, 
expression, boxType));
           break;
         case NULL_SAFE_EQUALS:
-          builder.nullSafeEquals(columnName, type, findLiteral(conf, 
expression, type));
+          builder.nullSafeEquals(columnName, boxType.type, findLiteral(conf, 
expression, boxType));
           break;
         case LESS_THAN:
-          builder.lessThan(columnName, type, findLiteral(conf, expression, 
type));
+          builder.lessThan(columnName, boxType.type, findLiteral(conf, 
expression, boxType));
           break;
         case LESS_THAN_EQUALS:
-          builder.lessThanEquals(columnName, type, findLiteral(conf, 
expression, type));
+          builder.lessThanEquals(columnName, boxType.type, findLiteral(conf, 
expression, boxType));
           break;
         case IN:
-          builder.in(columnName, type,
-              getLiteralList(expression, type, variable + 1));
+          builder.in(columnName, boxType.type,
+              getLiteralList(expression, boxType, variable + 1));
           break;
         case BETWEEN:
-          builder.between(columnName, type,
-              getLiteral(conf, expression, type, variable + 1),
-              getLiteral(conf, expression, type, variable + 2));
+          builder.between(columnName, boxType.type,
+              getLiteral(conf, expression, boxType, variable + 1),
+              getLiteral(conf, expression, boxType, variable + 2));
           break;
       }
     } catch (Exception e) {

http://git-wip-us.apache.org/repos/asf/hive/blob/23388462/ql/src/test/queries/clientpositive/orc_ppd_basic.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/orc_ppd_basic.q 
b/ql/src/test/queries/clientpositive/orc_ppd_basic.q
index b483914..1c9e4dd 100644
--- a/ql/src/test/queries/clientpositive/orc_ppd_basic.q
+++ b/ql/src/test/queries/clientpositive/orc_ppd_basic.q
@@ -178,8 +178,10 @@ select count(*) from orc_ppd where s = "wendy king" and t 
< 0;
 -- INPUT_RECORDS: 100
 select count(*) from orc_ppd where s = "wendy king" and t > 100;
 
+set hive.cbo.enable=false;
 set hive.optimize.index.filter=false;
 -- when cbo is disabled constant gets converted to HiveDecimal
+--  74.72f + 0.0 = 74.72000122070312
 select count(*) from orc_ppd where f=74.72;
 set hive.optimize.index.filter=true;
 select count(*) from orc_ppd where f=74.72;
@@ -190,6 +192,21 @@ select count(*) from orc_ppd where f=74.72;
 set hive.optimize.index.filter=true;
 select count(*) from orc_ppd where f=74.72;
 
+-- 42.47f + 0.0 == 42.470001220703125
+create temporary table orc_ppd_1 stored as orc as select * from 
orc_ppd_staging where d = 42.47;
+
+set hive.cbo.enable=false;
+set hive.optimize.index.filter=false;
+-- when cbo is disabled constant gets converted to HiveDecimal
+select count(*) from orc_ppd_1 where d=42.47;
+set hive.optimize.index.filter=true;
+select count(*) from orc_ppd_1 where d=42.47;
+
+set hive.cbo.enable=true;
+set hive.optimize.index.filter=false;
+select count(*) from orc_ppd_1 where d=42.47;
+set hive.optimize.index.filter=true;
+select count(*) from orc_ppd_1 where d=42.47;
 
 RESET;
 set hive.compute.query.using.stats=false;

http://git-wip-us.apache.org/repos/asf/hive/blob/23388462/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out 
b/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out
index c5302c3..590437c 100644
--- a/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out
+++ b/ql/src/test/results/clientpositive/llap/orc_ppd_basic.q.out
@@ -1514,6 +1514,159 @@ Stage-1 INPUT COUNTERS:
    INPUT_FILES_Map_1: 1
    RAW_INPUT_SPLITS_Map_1: 1
 2
+PREHOOK: query: create temporary table orc_ppd_1 stored as orc as select * 
from orc_ppd_staging where d = 42.47
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@orc_ppd_staging
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orc_ppd_1
+Stage-1 FILE SYSTEM COUNTERS:
+   HDFS_BYTES_READ: 10129
+   HDFS_BYTES_WRITTEN: 1415
+   HDFS_READ_OPS: 5
+   HDFS_LARGE_READ_OPS: 0
+   HDFS_WRITE_OPS: 3
+Stage-1 HIVE COUNTERS:
+   CREATED_FILES: 1
+   DESERIALIZE_ERRORS: 0
+   RECORDS_IN_Map_1: 98
+   RECORDS_OUT_1_default.orc_ppd_1: 2
+Stage-1 LLAP IO COUNTERS:
+   ALLOCATED_BYTES: 2359296
+   ALLOCATED_USED_BYTES: 44166
+   CACHE_HIT_BYTES: 30897
+   CACHE_MISS_BYTES: 10129
+   METADATA_CACHE_HIT: 2
+   NUM_DECODED_BATCHES: 1
+   NUM_VECTOR_BATCHES: 1
+   ROWS_EMITTED: 98
+   SELECTED_ROWGROUPS: 1
+Stage-1 INPUT COUNTERS:
+   GROUPED_INPUT_SPLITS_Map_1: 1
+   INPUT_DIRECTORIES_Map_1: 1
+   INPUT_FILES_Map_1: 1
+   RAW_INPUT_SPLITS_Map_1: 1
+PREHOOK: query: select count(*) from orc_ppd_1 where d=42.47
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd_1
+PREHOOK: Output: hdfs://### HDFS PATH ###
+Stage-1 FILE SYSTEM COUNTERS:
+   HDFS_BYTES_READ: 1539
+   HDFS_BYTES_WRITTEN: 101
+   HDFS_READ_OPS: 6
+   HDFS_LARGE_READ_OPS: 0
+   HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+   CREATED_FILES: 1
+   DESERIALIZE_ERRORS: 0
+   RECORDS_IN_Map_1: 2
+   RECORDS_OUT_0: 1
+   RECORDS_OUT_INTERMEDIATE_Map_1: 2
+Stage-1 LLAP IO COUNTERS:
+   ALLOCATED_BYTES: 786432
+   ALLOCATED_USED_BYTES: 65
+   CACHE_HIT_BYTES: 0
+   CACHE_MISS_BYTES: 62
+   METADATA_CACHE_MISS: 2
+   NUM_DECODED_BATCHES: 1
+   NUM_VECTOR_BATCHES: 1
+   ROWS_EMITTED: 2
+   SELECTED_ROWGROUPS: 1
+Stage-1 INPUT COUNTERS:
+   GROUPED_INPUT_SPLITS_Map_1: 1
+   INPUT_DIRECTORIES_Map_1: 1
+   INPUT_FILES_Map_1: 1
+   RAW_INPUT_SPLITS_Map_1: 1
+2
+PREHOOK: query: select count(*) from orc_ppd_1 where d=42.47
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd_1
+PREHOOK: Output: hdfs://### HDFS PATH ###
+Stage-1 FILE SYSTEM COUNTERS:
+   HDFS_BYTES_READ: 0
+   HDFS_BYTES_WRITTEN: 101
+   HDFS_READ_OPS: 2
+   HDFS_LARGE_READ_OPS: 0
+   HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+   CREATED_FILES: 1
+   DESERIALIZE_ERRORS: 0
+   RECORDS_IN_Map_1: 2
+   RECORDS_OUT_0: 1
+   RECORDS_OUT_INTERMEDIATE_Map_1: 2
+Stage-1 LLAP IO COUNTERS:
+   CACHE_HIT_BYTES: 62
+   CACHE_MISS_BYTES: 0
+   METADATA_CACHE_HIT: 2
+   NUM_DECODED_BATCHES: 1
+   NUM_VECTOR_BATCHES: 1
+   ROWS_EMITTED: 2
+   SELECTED_ROWGROUPS: 1
+Stage-1 INPUT COUNTERS:
+   GROUPED_INPUT_SPLITS_Map_1: 1
+   INPUT_DIRECTORIES_Map_1: 1
+   INPUT_FILES_Map_1: 1
+   RAW_INPUT_SPLITS_Map_1: 1
+2
+PREHOOK: query: select count(*) from orc_ppd_1 where d=42.47
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd_1
+PREHOOK: Output: hdfs://### HDFS PATH ###
+Stage-1 FILE SYSTEM COUNTERS:
+   HDFS_BYTES_READ: 0
+   HDFS_BYTES_WRITTEN: 101
+   HDFS_READ_OPS: 2
+   HDFS_LARGE_READ_OPS: 0
+   HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+   CREATED_FILES: 1
+   DESERIALIZE_ERRORS: 0
+   RECORDS_IN_Map_1: 2
+   RECORDS_OUT_0: 1
+   RECORDS_OUT_INTERMEDIATE_Map_1: 2
+Stage-1 LLAP IO COUNTERS:
+   CACHE_HIT_BYTES: 62
+   CACHE_MISS_BYTES: 0
+   METADATA_CACHE_HIT: 2
+   NUM_DECODED_BATCHES: 1
+   NUM_VECTOR_BATCHES: 1
+   ROWS_EMITTED: 2
+   SELECTED_ROWGROUPS: 1
+Stage-1 INPUT COUNTERS:
+   GROUPED_INPUT_SPLITS_Map_1: 1
+   INPUT_DIRECTORIES_Map_1: 1
+   INPUT_FILES_Map_1: 1
+   RAW_INPUT_SPLITS_Map_1: 1
+2
+PREHOOK: query: select count(*) from orc_ppd_1 where d=42.47
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_ppd_1
+PREHOOK: Output: hdfs://### HDFS PATH ###
+Stage-1 FILE SYSTEM COUNTERS:
+   HDFS_BYTES_READ: 0
+   HDFS_BYTES_WRITTEN: 101
+   HDFS_READ_OPS: 2
+   HDFS_LARGE_READ_OPS: 0
+   HDFS_WRITE_OPS: 2
+Stage-1 HIVE COUNTERS:
+   CREATED_FILES: 1
+   DESERIALIZE_ERRORS: 0
+   RECORDS_IN_Map_1: 2
+   RECORDS_OUT_0: 1
+   RECORDS_OUT_INTERMEDIATE_Map_1: 2
+Stage-1 LLAP IO COUNTERS:
+   CACHE_HIT_BYTES: 62
+   CACHE_MISS_BYTES: 0
+   METADATA_CACHE_HIT: 2
+   NUM_DECODED_BATCHES: 1
+   NUM_VECTOR_BATCHES: 1
+   ROWS_EMITTED: 2
+   SELECTED_ROWGROUPS: 1
+Stage-1 INPUT COUNTERS:
+   GROUPED_INPUT_SPLITS_Map_1: 1
+   INPUT_DIRECTORIES_Map_1: 1
+   INPUT_FILES_Map_1: 1
+   RAW_INPUT_SPLITS_Map_1: 1
+2
 PREHOOK: query: create temporary table tmp_orcppd
                     stored as orc
                     as select ctinyint, csmallint, cint , cbigint, cfloat, 
cdouble,

Reply via email to