hive git commit: HIVE-18573 : Use proper Calcite operator instead of UDFs (Slim Bouguerra via Ashutosh Chauhan)

hashutosh Wed, 21 Feb 2018 15:46:15 -0800

Repository: hive
Updated Branches:
  refs/heads/master ad87176c7 -> dcb3817d6



HIVE-18573 : Use proper Calcite operator instead of UDFs (Slim Bouguerra via 
Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dcb3817d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dcb3817d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dcb3817d

Branch: refs/heads/master
Commit: dcb3817d6d1360b816e8687bbae8d7aa62dc2b20
Parents: ad87176
Author: Slim Bouguerra <[email protected]>
Authored: Mon Jan 29 13:49:00 2018 -0800
Committer: Ashutosh Chauhan <[email protected]>
Committed: Wed Feb 21 15:44:46 2018 -0800

----------------------------------------------------------------------
 .../calcite/reloperators/HiveConcat.java        |  35 +++++
 .../calcite/reloperators/HiveExtractDate.java   |   3 +-
 .../translator/SqlFunctionConverter.java        |  41 +++++
 .../llap/bucket_map_join_tez_empty.q.out        |   4 +-
 .../clientpositive/llap/subquery_in.q.out       |   4 +-
 .../clientpositive/llap/subquery_notin.q.out    |   2 +-
 .../clientpositive/llap/subquery_scalar.q.out   | 149 ++++++++-----------
 .../clientpositive/spark/subquery_in.q.out      |   4 +-
 .../clientpositive/spark/subquery_notin.q.out   |   2 +-
 9 files changed, 148 insertions(+), 96 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveConcat.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveConcat.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveConcat.java
new file mode 100644
index 0000000..36c34cc
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveConcat.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlSpecialOperator;
+import org.apache.calcite.sql.type.InferTypes;
+import org.apache.calcite.sql.type.ReturnTypes;
+
+public class HiveConcat extends SqlSpecialOperator {
+  public static final SqlSpecialOperator INSTANCE = new HiveConcat();
+
+  private HiveConcat() {
+    super("||", SqlKind.OTHER_FUNCTION, 30, true, ReturnTypes.VARCHAR_2000,
+        InferTypes.RETURN_TYPE, null
+    );
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java
index 4099733..a43f406 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java
@@ -22,6 +22,7 @@ import java.util.Set;
 import org.apache.calcite.sql.SqlFunction;
 import org.apache.calcite.sql.SqlFunctionCategory;
 import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.fun.SqlExtractFunction;
 import org.apache.calcite.sql.type.OperandTypes;
 import org.apache.calcite.sql.type.ReturnTypes;
 import org.apache.calcite.sql.type.SqlTypeTransforms;
@@ -43,7 +44,7 @@ public class HiveExtractDate extends SqlFunction {
           Sets.newHashSet(YEAR, QUARTER, MONTH, WEEK, DAY, HOUR, MINUTE, 
SECOND);
 
   private HiveExtractDate(String name) {
-    super(name, SqlKind.EXTRACT, 
+    super(name, SqlKind.EXTRACT,
        ReturnTypes.cascade(ReturnTypes.INTEGER, 
SqlTypeTransforms.FORCE_NULLABLE), null,
        OperandTypes.INTERVALINTERVAL_INTERVALDATETIME,
        SqlFunctionCategory.SYSTEM);

http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
index 3f2eaef..cb0c2b1 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
@@ -50,6 +50,7 @@ import 
org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunc
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlMinMaxAggFunction;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveBetween;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveConcat;
 import 
org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn;
@@ -235,6 +236,8 @@ public class SqlFunctionConverter {
         case CASE:
         case EXTRACT:
         case FLOOR:
+        case CEIL:
+        case LIKE:
         case OTHER_FUNCTION:
           node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, 
"TOK_FUNCTION");
           node.addChild((ASTNode) ParseDriver.adaptor.create(hToken.type, 
hToken.text));
@@ -398,6 +401,44 @@ public class SqlFunctionConverter {
           hToken(HiveParser.Identifier, "floor_minute"));
       registerFunction("floor_second", HiveFloorDate.SECOND,
           hToken(HiveParser.Identifier, "floor_second"));
+      registerFunction("power", SqlStdOperatorTable.POWER, 
hToken(HiveParser.Identifier, "power"));
+      registerDuplicateFunction("pow", SqlStdOperatorTable.POWER,
+          hToken(HiveParser.Identifier, "power")
+      );
+      registerFunction("ceil", SqlStdOperatorTable.CEIL, 
hToken(HiveParser.Identifier, "ceil"));
+      registerDuplicateFunction("ceiling", SqlStdOperatorTable.CEIL,
+          hToken(HiveParser.Identifier, "ceil")
+      );
+      registerFunction("floor", SqlStdOperatorTable.FLOOR, 
hToken(HiveParser.Identifier, "floor"));
+      registerFunction("log10", SqlStdOperatorTable.LOG10, 
hToken(HiveParser.Identifier, "log10"));
+      registerFunction("ln", SqlStdOperatorTable.LN, 
hToken(HiveParser.Identifier, "ln"));
+      registerFunction("cos", SqlStdOperatorTable.COS, 
hToken(HiveParser.Identifier, "cos"));
+      registerFunction("sin", SqlStdOperatorTable.SIN, 
hToken(HiveParser.Identifier, "sin"));
+      registerFunction("tan", SqlStdOperatorTable.TAN, 
hToken(HiveParser.Identifier, "tan"));
+      registerFunction("concat", HiveConcat.INSTANCE,
+          hToken(HiveParser.Identifier, "concat")
+      );
+      registerFunction("substring", SqlStdOperatorTable.SUBSTRING,
+          hToken(HiveParser.Identifier, "substring")
+      );
+      registerFunction("like", SqlStdOperatorTable.LIKE, 
hToken(HiveParser.Identifier, "like"));
+      registerFunction("exp", SqlStdOperatorTable.EXP, 
hToken(HiveParser.Identifier, "exp"));
+      registerFunction("div", SqlStdOperatorTable.DIVIDE_INTEGER,
+          hToken(HiveParser.DIV, "div")
+      );
+      registerFunction("sqrt", SqlStdOperatorTable.SQRT, 
hToken(HiveParser.Identifier, "sqrt"));
+      registerFunction("lower", SqlStdOperatorTable.LOWER, 
hToken(HiveParser.Identifier, "lower"));
+      registerFunction("upper", SqlStdOperatorTable.UPPER, 
hToken(HiveParser.Identifier, "upper"));
+      registerFunction("abs", SqlStdOperatorTable.ABS, 
hToken(HiveParser.Identifier, "abs"));
+      registerFunction("char_length", SqlStdOperatorTable.CHAR_LENGTH,
+          hToken(HiveParser.Identifier, "char_length")
+      );
+      registerDuplicateFunction("character_length", 
SqlStdOperatorTable.CHAR_LENGTH,
+          hToken(HiveParser.Identifier, "char_length")
+      );
+      registerFunction("length", SqlStdOperatorTable.CHARACTER_LENGTH,
+          hToken(HiveParser.Identifier, "length")
+      );
     }
 
     private void registerFunction(String name, SqlOperator calciteFn, 
HiveToken hiveToken) {

http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/test/results/clientpositive/llap/bucket_map_join_tez_empty.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez_empty.q.out 
b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez_empty.q.out
index 33825da..08df574 100644
--- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez_empty.q.out
+++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez_empty.q.out
@@ -71,10 +71,10 @@ STAGE PLANS:
                         outputColumnNames: _col0, _col1, _col2, _col3
                         input vertices:
                           1 Map 2
-                        Statistics: Num rows: 2 Data size: 364 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 3 Data size: 546 Basic stats: 
COMPLETE Column stats: COMPLETE
                         File Output Operator
                           compressed: false
-                          Statistics: Num rows: 2 Data size: 364 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 3 Data size: 546 Basic stats: 
COMPLETE Column stats: COMPLETE
                           table:
                               input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                               output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/test/results/clientpositive/llap/subquery_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out 
b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
index d1ee21b..b5f9641 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
@@ -1920,7 +1920,7 @@ STAGE PLANS:
                   alias: part
                   Statistics: Num rows: 26 Data size: 16094 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: floor(p_retailprice) is not null (type: boolean)
+                    predicate: p_retailprice is not null (type: boolean)
                     Statistics: Num rows: 26 Data size: 16094 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: p_partkey (type: int), p_name (type: 
string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), 
p_size (type: int), p_container (type: string), p_retailprice (type: double), 
p_comment (type: string)
@@ -1989,7 +1989,7 @@ STAGE PLANS:
                   outputColumnNames: _col1
                   Statistics: Num rows: 13 Data size: 104 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: floor(_col1) is not null (type: boolean)
+                    predicate: _col1 is not null (type: boolean)
                     Statistics: Num rows: 13 Data size: 104 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: floor(_col1) (type: bigint)

http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out 
b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
index e894a44..50c18c8 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
@@ -2509,7 +2509,7 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col12
                 Statistics: Num rows: 26 Data size: 16538 Basic stats: 
COMPLETE Column stats: COMPLETE
                 Filter Operator
-                  predicate: ((_col12 is null and floor(_col7) is not null and 
(_col10 >= _col9)) or (_col9 = 0)) (type: boolean)
+                  predicate: ((_col12 is null and _col7 is not null and 
(_col10 >= _col9)) or (_col9 = 0)) (type: boolean)
                   Statistics: Num rows: 26 Data size: 16538 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Select Operator
                     expressions: _col0 (type: int), _col1 (type: string), 
_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: 
int), _col6 (type: string), _col7 (type: double), _col8 (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out 
b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
index 15535f5..cec3daa 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
@@ -3161,26 +3161,24 @@ STAGE PLANS:
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Outer Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col2 (type: int)
                   1 _col2 (type: int)
                 outputColumnNames: _col0, _col1, _col3, _col4
-                Statistics: Num rows: 26 Data size: 6634 Basic stats: COMPLETE 
Column stats: COMPLETE
-                Filter Operator
-                  predicate: (_col1 like CASE WHEN (_col4 is null) THEN (null) 
ELSE (_col3) END) (type: boolean)
-                  Statistics: Num rows: 13 Data size: 3317 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    expressions: _col0 (type: int)
-                    outputColumnNames: _col0
-                    Statistics: Num rows: 13 Data size: 52 Basic stats: 
COMPLETE Column stats: COMPLETE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 13 Data size: 52 Basic stats: 
COMPLETE Column stats: COMPLETE
-                      table:
-                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                residual filter predicates: {(_col1 like CASE WHEN (_col4 is 
null) THEN (null) ELSE (_col3) END)}
+                Statistics: Num rows: 8 Data size: 2504 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: int)
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 8 Data size: 32 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
         Reducer 4 
             Execution mode: llap
             Reduce Operator Tree:
@@ -3258,8 +3256,7 @@ POSTHOOK: Input: default@part
 85768
 86428
 90681
-Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 2' is a cross product
-Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 3' is a cross product
+Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in 
Stage 'Reducer 2' is a cross product
 PREHOOK: query: explain select * from part_null where p_name NOT LIKE (select 
min(p_name) from part_null) AND p_brand NOT IN (select p_name from part)
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select * from part_null where p_name NOT LIKE (select 
min(p_name) from part_null) AND p_brand NOT IN (select p_name from part)
@@ -3273,12 +3270,11 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE)
-        Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 8 (XPROD_EDGE)
-        Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE)
-        Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE)
-        Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE)
-        Reducer 9 <- Map 7 (SIMPLE_EDGE)
+        Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE), Reducer 7 
(XPROD_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE)
+        Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+        Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE)
+        Reducer 8 <- Map 6 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -3296,7 +3292,7 @@ STAGE PLANS:
                       value expressions: _col0 (type: int), _col1 (type: 
string), _col2 (type: string), _col3 (type: string), _col4 (type: string), 
_col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: 
string)
             Execution mode: llap
             LLAP IO: no inputs
-        Map 5 
+        Map 4 
             Map Operator Tree:
                 TableScan
                   alias: part_null
@@ -3316,7 +3312,7 @@ STAGE PLANS:
                         value expressions: _col0 (type: string)
             Execution mode: llap
             LLAP IO: no inputs
-        Map 7 
+        Map 6 
             Map Operator Tree:
                 TableScan
                   alias: part
@@ -3351,41 +3347,22 @@ STAGE PLANS:
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Outer Join 0 to 1
-                keys:
-                  0 
-                  1 
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9
-                Statistics: Num rows: 1 Data size: 1489 Basic stats: COMPLETE 
Column stats: NONE
-                Filter Operator
-                  predicate: (not (_col1 like _col9)) (type: boolean)
-                  Statistics: Num rows: 1 Data size: 1489 Basic stats: 
COMPLETE Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: int), _col1 (type: string), 
_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: 
int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
-                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
-                    Statistics: Num rows: 1 Data size: 1489 Basic stats: 
COMPLETE Column stats: NONE
-                    Reduce Output Operator
-                      sort order: 
-                      Statistics: Num rows: 1 Data size: 1489 Basic stats: 
COMPLETE Column stats: NONE
-                      value expressions: _col0 (type: int), _col1 (type: 
string), _col2 (type: string), _col3 (type: string), _col4 (type: string), 
_col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: 
string)
-        Reducer 3 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
                      Inner Join 0 to 1
+                     Inner Join 0 to 2
                 keys:
                   0 
                   1 
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col10, _col11
-                Statistics: Num rows: 1 Data size: 1506 Basic stats: COMPLETE 
Column stats: NONE
+                  2 
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11
+                residual filter predicates: {(not (_col1 like _col9))}
+                Statistics: Num rows: 1 Data size: 1505 Basic stats: COMPLETE 
Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col3 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col3 (type: string)
-                  Statistics: Num rows: 1 Data size: 1506 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 1505 Basic stats: 
COMPLETE Column stats: NONE
                   value expressions: _col0 (type: int), _col1 (type: string), 
_col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: 
string), _col7 (type: double), _col8 (type: string), _col10 (type: bigint), 
_col11 (type: bigint)
-        Reducer 4 
+        Reducer 3 
             Execution mode: llap
             Reduce Operator Tree:
               Merge Join Operator
@@ -3410,7 +3387,7 @@ STAGE PLANS:
                           input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                           output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                           serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 6 
+        Reducer 5 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
@@ -3422,7 +3399,7 @@ STAGE PLANS:
                   sort order: 
                   Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE 
Column stats: NONE
                   value expressions: _col0 (type: string)
-        Reducer 8 
+        Reducer 7 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
@@ -3434,7 +3411,7 @@ STAGE PLANS:
                   sort order: 
                   Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE 
Column stats: COMPLETE
                   value expressions: _col0 (type: bigint), _col1 (type: bigint)
-        Reducer 9 
+        Reducer 8 
             Execution mode: llap
             Reduce Operator Tree:
               Group By Operator
@@ -3459,8 +3436,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 2' is a cross product
-Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 3' is a cross product
+Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in 
Stage 'Reducer 2' is a cross product
 PREHOOK: query: select * from part_null where p_name NOT LIKE (select 
min(p_name) from part_null) AND p_brand NOT IN (select p_name from part)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@part
@@ -3496,7 +3472,7 @@ POSTHOOK: Input: default@part_null
 85768  almond antique chartreuse lavender yellow       Manufacturer#1  
Brand#12        LARGE BRUSHED STEEL     34      SM BAG  1753.76 refull
 86428  almond aquamarine burnished black steel Manufacturer#1  Brand#12        
STANDARD ANODIZED STEEL 28      WRAP BAG        1414.42 arefully 
 90681  almond antique chartreuse khaki white   Manufacturer#3  Brand#31        
MEDIUM BURNISHED TIN    17      SM CASE 1671.68 are slyly after the sl
-Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 2' is a cross product
 PREHOOK: query: explain select * from part_null where p_brand NOT IN (select 
p_name from part) AND p_name NOT LIKE (select min(p_name) from part_null pp 
where part_null.p_type = pp.p_type)
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select * from part_null where p_brand NOT IN (select 
p_name from part) AND p_name NOT LIKE (select min(p_name) from part_null pp 
where part_null.p_type = pp.p_type)
@@ -3631,26 +3607,24 @@ STAGE PLANS:
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Outer Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col4 (type: string)
                   1 _col2 (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col13, _col14
-                Statistics: Num rows: 7 Data size: 982 Basic stats: COMPLETE 
Column stats: NONE
-                Filter Operator
-                  predicate: (not (_col1 like CASE WHEN (_col14 is null) THEN 
(null) ELSE (_col13) END)) (type: boolean)
+                residual filter predicates: {(not (_col1 like CASE WHEN 
(_col14 is null) THEN (null) ELSE (_col13) END))}
+                Statistics: Num rows: 4 Data size: 561 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: string), _col2 
(type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), 
_col6 (type: string), _col7 (type: double), _col8 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
                   Statistics: Num rows: 4 Data size: 561 Basic stats: COMPLETE 
Column stats: NONE
-                  Select Operator
-                    expressions: _col0 (type: int), _col1 (type: string), 
_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: 
int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
-                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8
+                  File Output Operator
+                    compressed: false
                     Statistics: Num rows: 4 Data size: 561 Basic stats: 
COMPLETE Column stats: NONE
-                    File Output Operator
-                      compressed: false
-                      Statistics: Num rows: 4 Data size: 561 Basic stats: 
COMPLETE Column stats: NONE
-                      table:
-                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
-                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
         Reducer 6 
             Execution mode: llap
             Reduce Operator Tree:
@@ -3707,7 +3681,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1]] in Stage 
'Reducer 2' is a cross product
 PREHOOK: query: select * from part_null where p_brand NOT IN (select p_name 
from part) AND p_name NOT LIKE (select min(p_name) from part_null pp where 
part_null.p_type = pp.p_type)
 PREHOOK: type: QUERY
 PREHOOK: Input: default@part
@@ -4339,7 +4313,7 @@ STAGE PLANS:
                   alias: part
                   Statistics: Num rows: 26 Data size: 8242 Basic stats: 
COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: (p_name is not null and p_type is not null) 
(type: boolean)
+                    predicate: p_type is not null (type: boolean)
                     Statistics: Num rows: 26 Data size: 8242 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: p_name (type: string), p_brand (type: 
string), p_type (type: string)
@@ -4385,10 +4359,10 @@ STAGE PLANS:
                   0 _col1 (type: string), _col4 (type: string)
                   1 _col0 (type: string), _col1 (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8
-                Statistics: Num rows: 6 Data size: 1485 Basic stats: COMPLETE 
Column stats: NONE
+                Statistics: Num rows: 3 Data size: 742 Basic stats: COMPLETE 
Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 6 Data size: 1485 Basic stats: 
COMPLETE Column stats: NONE
+                  Statistics: Num rows: 3 Data size: 742 Basic stats: COMPLETE 
Column stats: NONE
                   table:
                       input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -4398,29 +4372,30 @@ STAGE PLANS:
             Reduce Operator Tree:
               Merge Join Operator
                 condition map:
-                     Left Outer Join 0 to 1
+                     Inner Join 0 to 1
                 keys:
                   0 _col2 (type: string)
                   1 _col2 (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                Statistics: Num rows: 26 Data size: 11062 Basic stats: 
COMPLETE Column stats: COMPLETE
-                Filter Operator
-                  predicate: (not (_col1 like CASE WHEN (_col4 is null) THEN 
(null) ELSE (_col3) END)) (type: boolean)
-                  Statistics: Num rows: 13 Data size: 5625 Basic stats: 
COMPLETE Column stats: COMPLETE
-                  Select Operator
-                    expressions: _col0 (type: string), _col2 (type: string)
-                    outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 13 Data size: 2925 Basic stats: 
COMPLETE Column stats: COMPLETE
+                residual filter predicates: {(not (_col1 like CASE WHEN (_col4 
is null) THEN (null) ELSE (_col3) END))}
+                Statistics: Num rows: 7 Data size: 3535 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col0 (type: string), _col2 (type: string)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 7 Data size: 1575 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: (_col0 is not null and _col1 is not null) 
(type: boolean)
+                    Statistics: Num rows: 7 Data size: 1575 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Group By Operator
                       keys: _col0 (type: string), _col1 (type: string)
                       mode: hash
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 6 Data size: 1350 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 3 Data size: 675 Basic stats: 
COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: string), _col1 (type: 
string)
                         sort order: ++
                         Map-reduce partition columns: _col0 (type: string), 
_col1 (type: string)
-                        Statistics: Num rows: 6 Data size: 1350 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 3 Data size: 675 Basic stats: 
COMPLETE Column stats: COMPLETE
         Reducer 6 
             Execution mode: llap
             Reduce Operator Tree:

http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/test/results/clientpositive/spark/subquery_in.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out 
b/ql/src/test/results/clientpositive/spark/subquery_in.q.out
index f89c146..5e48a5c 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out
@@ -1827,7 +1827,7 @@ STAGE PLANS:
                   alias: part
                   Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: floor(p_retailprice) is not null (type: boolean)
+                    predicate: p_retailprice is not null (type: boolean)
                     Statistics: Num rows: 26 Data size: 3147 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: p_partkey (type: int), p_name (type: 
string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), 
p_size (type: int), p_container (type: string), p_retailprice (type: double), 
p_comment (type: string)
@@ -1890,7 +1890,7 @@ STAGE PLANS:
                   outputColumnNames: _col1
                   Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
                   Filter Operator
-                    predicate: floor(_col1) is not null (type: boolean)
+                    predicate: _col1 is not null (type: boolean)
                     Statistics: Num rows: 13 Data size: 1573 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: floor(_col1) (type: bigint)

http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/test/results/clientpositive/spark/subquery_notin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/subquery_notin.q.out 
b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out
index b2a1972..e2f26a9 100644
--- a/ql/src/test/results/clientpositive/spark/subquery_notin.q.out
+++ b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out
@@ -2496,7 +2496,7 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col12
                 Statistics: Num rows: 28 Data size: 3947 Basic stats: COMPLETE 
Column stats: NONE
                 Filter Operator
-                  predicate: ((_col12 is null and floor(_col7) is not null and 
(_col10 >= _col9)) or (_col9 = 0)) (type: boolean)
+                  predicate: ((_col12 is null and _col7 is not null and 
(_col10 >= _col9)) or (_col9 = 0)) (type: boolean)
                   Statistics: Num rows: 18 Data size: 2537 Basic stats: 
COMPLETE Column stats: NONE
                   Select Operator
                     expressions: _col0 (type: int), _col1 (type: string), 
_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: 
int), _col6 (type: string), _col7 (type: double), _col8 (type: string)

hive git commit: HIVE-18573 : Use proper Calcite operator instead of UDFs (Slim Bouguerra via Ashutosh Chauhan)

Reply via email to