HIVE-15884: Optimize not between for vectorization (Pengcheng Xiong, reviewed 
by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5f533bce
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5f533bce
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5f533bce

Branch: refs/heads/branch-2.2
Commit: 5f533bcebfdfe0d79a2b41de7fc72bd8c1b366c4
Parents: fb1c9fd
Author: Pengcheng Xiong <pxi...@apache.org>
Authored: Thu Mar 2 11:16:34 2017 -0800
Committer: Owen O'Malley <omal...@apache.org>
Committed: Tue Mar 28 15:27:56 2017 -0700

----------------------------------------------------------------------
 .../ql/exec/vector/VectorizationContext.java    | 24 +++++++++++++++++++-
 .../clientpositive/llap/vector_between_in.q.out |  6 ++---
 .../spark/vector_between_in.q.out               | 12 +++++-----
 .../clientpositive/tez/vector_between_in.q.out  |  6 ++---
 .../clientpositive/vector_between_in.q.out      |  6 ++---
 5 files changed, 38 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/5f533bce/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index f81a0fb..d8387bf 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -102,7 +102,6 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDynamicValueDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.apache.hadoop.hive.ql.plan.GroupByDesc;
-import org.apache.hadoop.hive.ql.udf.SettableUDF;
 import org.apache.hadoop.hive.ql.udf.*;
 import org.apache.hadoop.hive.ql.udf.generic.*;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
@@ -582,6 +581,29 @@ public class VectorizationContext {
       ve = getColumnVectorExpression((ExprNodeColumnDesc) exprDesc, mode);
     } else if (exprDesc instanceof ExprNodeGenericFuncDesc) {
       ExprNodeGenericFuncDesc expr = (ExprNodeGenericFuncDesc) exprDesc;
+      // push not through between...
+      if ("not".equals(expr.getFuncText())) {
+        if (expr.getChildren() != null && expr.getChildren().size() == 1) {
+          ExprNodeDesc child = expr.getChildren().get(0);
+          if (child instanceof ExprNodeGenericFuncDesc) {
+            ExprNodeGenericFuncDesc childExpr = (ExprNodeGenericFuncDesc) 
child;
+            if ("between".equals(childExpr.getFuncText())) {
+              ExprNodeConstantDesc flag = (ExprNodeConstantDesc) 
childExpr.getChildren().get(0);
+              List<ExprNodeDesc> newChildren = new ArrayList<>();
+              if (Boolean.TRUE.equals(flag.getValue())) {
+                newChildren.add(new ExprNodeConstantDesc(Boolean.FALSE));
+              } else {
+                newChildren.add(new ExprNodeConstantDesc(Boolean.TRUE));
+              }
+              newChildren
+                  .addAll(childExpr.getChildren().subList(1, 
childExpr.getChildren().size()));
+              expr.setTypeInfo(childExpr.getTypeInfo());
+              expr.setGenericUDF(childExpr.getGenericUDF());
+              expr.setChildren(newChildren);
+            }
+          }
+        }
+      }
       if (isCustomUDF(expr)) {
         ve = getCustomUDFExpression(expr, mode);
       } else {

http://git-wip-us.apache.org/repos/asf/hive/blob/5f533bce/ql/src/test/results/clientpositive/llap/vector_between_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_between_in.q.out 
b/ql/src/test/results/clientpositive/llap/vector_between_in.q.out
index 88d97f5..7f9067d 100644
--- a/ql/src/test/results/clientpositive/llap/vector_between_in.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_between_in.q.out
@@ -317,7 +317,7 @@ STAGE PLANS:
                   alias: decimal_date_test
                   Statistics: Num rows: 12288 Data size: 2467616 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (not cdate BETWEEN 1968-05-01 AND 1971-09-01) 
(type: boolean)
+                    predicate: cdate NOT BETWEEN 1968-05-01 AND 1971-09-01 
(type: boolean)
                     Statistics: Num rows: 10923 Data size: 2193503 Basic 
stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cdate (type: date)
@@ -427,7 +427,7 @@ STAGE PLANS:
                   alias: decimal_date_test
                   Statistics: Num rows: 12288 Data size: 2467616 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (not cdecimal1 BETWEEN -2000 AND 
4390.1351351351) (type: boolean)
+                    predicate: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 
(type: boolean)
                     Statistics: Num rows: 10923 Data size: 2193503 Basic 
stats: COMPLETE Column stats: NONE
                     Select Operator
                       Statistics: Num rows: 10923 Data size: 2193503 Basic 
stats: COMPLETE Column stats: NONE
@@ -917,7 +917,7 @@ STAGE PLANS:
                   alias: decimal_date_test
                   Statistics: Num rows: 12288 Data size: 2467616 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
-                    expressions: (not cdecimal1 BETWEEN -2000 AND 
4390.1351351351) (type: boolean)
+                    expressions: cdecimal1 NOT BETWEEN -2000 AND 
4390.1351351351 (type: boolean)
                     outputColumnNames: _col0
                     Statistics: Num rows: 12288 Data size: 2467616 Basic 
stats: COMPLETE Column stats: NONE
                     Group By Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/5f533bce/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_between_in.q.out 
b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
index 689707f..efbca8c 100644
--- a/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_between_in.q.out
@@ -311,8 +311,8 @@ STAGE PLANS:
                     Filter Vectorization:
                         className: VectorFilterOperator
                         native: true
-                        predicateExpression: SelectColumnIsFalse(col 
4)(children: VectorUDFAdaptor(cdate BETWEEN 1968-05-01 AND 1971-09-01) -> 
4:boolean) -> boolean
-                    predicate: (not cdate BETWEEN 1968-05-01 AND 1971-09-01) 
(type: boolean)
+                        predicateExpression: FilterLongColumnNotBetween(col 3, 
left -610, right 608) -> boolean
+                    predicate: cdate NOT BETWEEN 1968-05-01 AND 1971-09-01 
(type: boolean)
                     Statistics: Num rows: 10923 Data size: 2193503 Basic 
stats: COMPLETE Column stats: NONE
 >>>>>>> eb1da30... HIVE-15388: HiveParser spends lots of time in parsing 
 >>>>>>> queries with lots of '(' (Pengcheng Xiong, reviewed by Ashutosh 
 >>>>>>> Chauhan, Gunther Hagleitner)
                     Select Operator
@@ -332,7 +332,7 @@ STAGE PLANS:
                 groupByVectorOutput: true
                 inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                 allNative: false
-                usesVectorUDFAdaptor: true
+                usesVectorUDFAdaptor: false
                 vectorized: true
 >>>>>>> eb1da30... HIVE-15388: HiveParser spends lots of time in parsing 
 >>>>>>> queries with lots of '(' (Pengcheng Xiong, reviewed by Ashutosh 
 >>>>>>> Chauhan, Gunther Hagleitner)
         Reducer 2 
@@ -437,8 +437,8 @@ STAGE PLANS:
                     Filter Vectorization:
                         className: VectorFilterOperator
                         native: true
-                        predicateExpression: SelectColumnIsFalse(col 
4)(children: VectorUDFAdaptor(cdecimal1 BETWEEN -2000 AND 4390.1351351351) -> 
4:boolean) -> boolean
-                    predicate: (not cdecimal1 BETWEEN -2000 AND 
4390.1351351351) (type: boolean)
+                        predicateExpression: FilterDecimalColumnNotBetween(col 
1, left -2000, right 4390.1351351351) -> boolean
+                    predicate: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 
(type: boolean)
                     Statistics: Num rows: 10923 Data size: 2193503 Basic 
stats: COMPLETE Column stats: NONE
 >>>>>>> eb1da30... HIVE-15388: HiveParser spends lots of time in parsing 
 >>>>>>> queries with lots of '(' (Pengcheng Xiong, reviewed by Ashutosh 
 >>>>>>> Chauhan, Gunther Hagleitner)
                     Select Operator
@@ -461,7 +461,7 @@ STAGE PLANS:
                 groupByVectorOutput: true
                 inputFileFormats: 
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                 allNative: false
-                usesVectorUDFAdaptor: true
+                usesVectorUDFAdaptor: false
                 vectorized: true
 >>>>>>> eb1da30... HIVE-15388: HiveParser spends lots of time in parsing 
 >>>>>>> queries with lots of '(' (Pengcheng Xiong, reviewed by Ashutosh 
 >>>>>>> Chauhan, Gunther Hagleitner)
         Reducer 2 

http://git-wip-us.apache.org/repos/asf/hive/blob/5f533bce/ql/src/test/results/clientpositive/tez/vector_between_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vector_between_in.q.out 
b/ql/src/test/results/clientpositive/tez/vector_between_in.q.out
index 8903337..57e6d1a 100644
--- a/ql/src/test/results/clientpositive/tez/vector_between_in.q.out
+++ b/ql/src/test/results/clientpositive/tez/vector_between_in.q.out
@@ -310,7 +310,7 @@ STAGE PLANS:
                   alias: decimal_date_test
                   Statistics: Num rows: 12288 Data size: 2467616 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (not cdate BETWEEN 1968-05-01 AND 1971-09-01) 
(type: boolean)
+                    predicate: cdate NOT BETWEEN 1968-05-01 AND 1971-09-01 
(type: boolean)
                     Statistics: Num rows: 10923 Data size: 2193503 Basic 
stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cdate (type: date)
@@ -418,7 +418,7 @@ STAGE PLANS:
                   alias: decimal_date_test
                   Statistics: Num rows: 12288 Data size: 2467616 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: (not cdecimal1 BETWEEN -2000 AND 
4390.1351351351) (type: boolean)
+                    predicate: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 
(type: boolean)
                     Statistics: Num rows: 10923 Data size: 2193503 Basic 
stats: COMPLETE Column stats: NONE
                     Select Operator
                       Statistics: Num rows: 10923 Data size: 2193503 Basic 
stats: COMPLETE Column stats: NONE
@@ -903,7 +903,7 @@ STAGE PLANS:
                   alias: decimal_date_test
                   Statistics: Num rows: 12288 Data size: 2467616 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
-                    expressions: (not cdecimal1 BETWEEN -2000 AND 
4390.1351351351) (type: boolean)
+                    expressions: cdecimal1 NOT BETWEEN -2000 AND 
4390.1351351351 (type: boolean)
                     outputColumnNames: _col0
                     Statistics: Num rows: 12288 Data size: 2467616 Basic 
stats: COMPLETE Column stats: NONE
                     Group By Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/5f533bce/ql/src/test/results/clientpositive/vector_between_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_between_in.q.out 
b/ql/src/test/results/clientpositive/vector_between_in.q.out
index 270de4b..cad29dd 100644
--- a/ql/src/test/results/clientpositive/vector_between_in.q.out
+++ b/ql/src/test/results/clientpositive/vector_between_in.q.out
@@ -264,7 +264,7 @@ STAGE PLANS:
             alias: decimal_date_test
             Statistics: Num rows: 12288 Data size: 2467616 Basic stats: 
COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (not cdate BETWEEN 1968-05-01 AND 1971-09-01) (type: 
boolean)
+              predicate: cdate NOT BETWEEN 1968-05-01 AND 1971-09-01 (type: 
boolean)
               Statistics: Num rows: 10923 Data size: 2193503 Basic stats: 
COMPLETE Column stats: NONE
               Select Operator
                 expressions: cdate (type: date)
@@ -356,7 +356,7 @@ STAGE PLANS:
             alias: decimal_date_test
             Statistics: Num rows: 12288 Data size: 2467616 Basic stats: 
COMPLETE Column stats: NONE
             Filter Operator
-              predicate: (not cdecimal1 BETWEEN -2000 AND 4390.1351351351) 
(type: boolean)
+              predicate: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 
(type: boolean)
               Statistics: Num rows: 10923 Data size: 2193503 Basic stats: 
COMPLETE Column stats: NONE
               Select Operator
                 Statistics: Num rows: 10923 Data size: 2193503 Basic stats: 
COMPLETE Column stats: NONE
@@ -809,7 +809,7 @@ STAGE PLANS:
             alias: decimal_date_test
             Statistics: Num rows: 12288 Data size: 2467616 Basic stats: 
COMPLETE Column stats: NONE
             Select Operator
-              expressions: (not cdecimal1 BETWEEN -2000 AND 4390.1351351351) 
(type: boolean)
+              expressions: cdecimal1 NOT BETWEEN -2000 AND 4390.1351351351 
(type: boolean)
               outputColumnNames: _col0
               Statistics: Num rows: 12288 Data size: 2467616 Basic stats: 
COMPLETE Column stats: NONE
               Group By Operator

Reply via email to