Repository: hive Updated Branches: refs/heads/master ad87176c7 -> dcb3817d6
HIVE-18573 : Use proper Calcite operator instead of UDFs (Slim Bouguerra via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/dcb3817d Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/dcb3817d Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/dcb3817d Branch: refs/heads/master Commit: dcb3817d6d1360b816e8687bbae8d7aa62dc2b20 Parents: ad87176 Author: Slim Bouguerra <[email protected]> Authored: Mon Jan 29 13:49:00 2018 -0800 Committer: Ashutosh Chauhan <[email protected]> Committed: Wed Feb 21 15:44:46 2018 -0800 ---------------------------------------------------------------------- .../calcite/reloperators/HiveConcat.java | 35 +++++ .../calcite/reloperators/HiveExtractDate.java | 3 +- .../translator/SqlFunctionConverter.java | 41 +++++ .../llap/bucket_map_join_tez_empty.q.out | 4 +- .../clientpositive/llap/subquery_in.q.out | 4 +- .../clientpositive/llap/subquery_notin.q.out | 2 +- .../clientpositive/llap/subquery_scalar.q.out | 149 ++++++++----------- .../clientpositive/spark/subquery_in.q.out | 4 +- .../clientpositive/spark/subquery_notin.q.out | 2 +- 9 files changed, 148 insertions(+), 96 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveConcat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveConcat.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveConcat.java new file mode 100644 index 0000000..36c34cc --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveConcat.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators; + +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlSpecialOperator; +import org.apache.calcite.sql.type.InferTypes; +import org.apache.calcite.sql.type.ReturnTypes; + +public class HiveConcat extends SqlSpecialOperator { + public static final SqlSpecialOperator INSTANCE = new HiveConcat(); + + private HiveConcat() { + super("||", SqlKind.OTHER_FUNCTION, 30, true, ReturnTypes.VARCHAR_2000, + InferTypes.RETURN_TYPE, null + ); + } +} + http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java index 4099733..a43f406 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveExtractDate.java @@ -22,6 +22,7 @@ import java.util.Set; import org.apache.calcite.sql.SqlFunction; import org.apache.calcite.sql.SqlFunctionCategory; import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.fun.SqlExtractFunction; import org.apache.calcite.sql.type.OperandTypes; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlTypeTransforms; @@ -43,7 +44,7 @@ public class HiveExtractDate extends SqlFunction { Sets.newHashSet(YEAR, QUARTER, MONTH, WEEK, DAY, HOUR, MINUTE, SECOND); private HiveExtractDate(String name) { - super(name, SqlKind.EXTRACT, + super(name, SqlKind.EXTRACT, ReturnTypes.cascade(ReturnTypes.INTEGER, SqlTypeTransforms.FORCE_NULLABLE), null, OperandTypes.INTERVALINTERVAL_INTERVALDATETIME, SqlFunctionCategory.SYSTEM); http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index 3f2eaef..cb0c2b1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -50,6 +50,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunc import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlMinMaxAggFunction; import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveBetween; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveConcat; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn; @@ -235,6 +236,8 @@ public class SqlFunctionConverter { case CASE: case EXTRACT: case FLOOR: + case CEIL: + case LIKE: case OTHER_FUNCTION: node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION"); node.addChild((ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text)); @@ -398,6 +401,44 @@ public class SqlFunctionConverter { hToken(HiveParser.Identifier, "floor_minute")); registerFunction("floor_second", HiveFloorDate.SECOND, hToken(HiveParser.Identifier, "floor_second")); + registerFunction("power", SqlStdOperatorTable.POWER, hToken(HiveParser.Identifier, "power")); + registerDuplicateFunction("pow", SqlStdOperatorTable.POWER, + hToken(HiveParser.Identifier, "power") + ); + registerFunction("ceil", SqlStdOperatorTable.CEIL, hToken(HiveParser.Identifier, "ceil")); + registerDuplicateFunction("ceiling", SqlStdOperatorTable.CEIL, + hToken(HiveParser.Identifier, "ceil") + ); + registerFunction("floor", SqlStdOperatorTable.FLOOR, hToken(HiveParser.Identifier, "floor")); + registerFunction("log10", SqlStdOperatorTable.LOG10, hToken(HiveParser.Identifier, "log10")); + registerFunction("ln", SqlStdOperatorTable.LN, hToken(HiveParser.Identifier, "ln")); + registerFunction("cos", SqlStdOperatorTable.COS, hToken(HiveParser.Identifier, "cos")); + registerFunction("sin", SqlStdOperatorTable.SIN, hToken(HiveParser.Identifier, "sin")); + registerFunction("tan", SqlStdOperatorTable.TAN, hToken(HiveParser.Identifier, "tan")); + registerFunction("concat", HiveConcat.INSTANCE, + hToken(HiveParser.Identifier, "concat") + ); + registerFunction("substring", SqlStdOperatorTable.SUBSTRING, + hToken(HiveParser.Identifier, "substring") + ); + registerFunction("like", SqlStdOperatorTable.LIKE, hToken(HiveParser.Identifier, "like")); + registerFunction("exp", SqlStdOperatorTable.EXP, hToken(HiveParser.Identifier, "exp")); + registerFunction("div", SqlStdOperatorTable.DIVIDE_INTEGER, + hToken(HiveParser.DIV, "div") + ); + registerFunction("sqrt", SqlStdOperatorTable.SQRT, hToken(HiveParser.Identifier, "sqrt")); + registerFunction("lower", SqlStdOperatorTable.LOWER, hToken(HiveParser.Identifier, "lower")); + registerFunction("upper", SqlStdOperatorTable.UPPER, hToken(HiveParser.Identifier, "upper")); + registerFunction("abs", SqlStdOperatorTable.ABS, hToken(HiveParser.Identifier, "abs")); + registerFunction("char_length", SqlStdOperatorTable.CHAR_LENGTH, + hToken(HiveParser.Identifier, "char_length") + ); + registerDuplicateFunction("character_length", SqlStdOperatorTable.CHAR_LENGTH, + hToken(HiveParser.Identifier, "char_length") + ); + registerFunction("length", SqlStdOperatorTable.CHARACTER_LENGTH, + hToken(HiveParser.Identifier, "length") + ); } private void registerFunction(String name, SqlOperator calciteFn, HiveToken hiveToken) { http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/test/results/clientpositive/llap/bucket_map_join_tez_empty.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez_empty.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez_empty.q.out index 33825da..08df574 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez_empty.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez_empty.q.out @@ -71,10 +71,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 2 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 546 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 546 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/test/results/clientpositive/llap/subquery_in.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out index d1ee21b..b5f9641 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -1920,7 +1920,7 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: floor(p_retailprice) is not null (type: boolean) + predicate: p_retailprice is not null (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) @@ -1989,7 +1989,7 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: floor(_col1) is not null (type: boolean) + predicate: _col1 is not null (type: boolean) Statistics: Num rows: 13 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: floor(_col1) (type: bigint) http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/test/results/clientpositive/llap/subquery_notin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index e894a44..50c18c8 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -2509,7 +2509,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 Statistics: Num rows: 26 Data size: 16538 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((_col12 is null and floor(_col7) is not null and (_col10 >= _col9)) or (_col9 = 0)) (type: boolean) + predicate: ((_col12 is null and _col7 is not null and (_col10 >= _col9)) or (_col9 = 0)) (type: boolean) Statistics: Num rows: 26 Data size: 16538 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index 15535f5..cec3daa 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -3161,26 +3161,24 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col2 (type: int) 1 _col2 (type: int) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 26 Data size: 6634 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col1 like CASE WHEN (_col4 is null) THEN (null) ELSE (_col3) END) (type: boolean) - Statistics: Num rows: 13 Data size: 3317 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + residual filter predicates: {(_col1 like CASE WHEN (_col4 is null) THEN (null) ELSE (_col3) END)} + Statistics: Num rows: 8 Data size: 2504 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -3258,8 +3256,7 @@ POSTHOOK: Input: default@part 85768 86428 90681 -Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from part_null where p_name NOT LIKE (select min(p_name) from part_null) AND p_brand NOT IN (select p_name from part) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part_null where p_name NOT LIKE (select min(p_name) from part_null) AND p_brand NOT IN (select p_name from part) @@ -3273,12 +3270,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 8 (XPROD_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (ONE_TO_ONE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) - Reducer 9 <- Map 7 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (XPROD_EDGE), Reducer 5 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3296,7 +3292,7 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: part_null @@ -3316,7 +3312,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 6 Map Operator Tree: TableScan alias: part @@ -3351,41 +3347,22 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 1489 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (not (_col1 like _col9)) (type: boolean) - Statistics: Num rows: 1 Data size: 1489 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 1489 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 1489 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11 - Statistics: Num rows: 1 Data size: 1506 Basic stats: COMPLETE Column stats: NONE + 2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + residual filter predicates: {(not (_col1 like _col9))} + Statistics: Num rows: 1 Data size: 1505 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string) sort order: + Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 1 Data size: 1506 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1505 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: bigint), _col11 (type: bigint) - Reducer 4 + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -3410,7 +3387,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -3422,7 +3399,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 368 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 8 + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -3434,7 +3411,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 9 + Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -3459,8 +3436,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[36][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -Warning: Shuffle Join MERGEJOIN[37][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[32][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from part_null where p_name NOT LIKE (select min(p_name) from part_null) AND p_brand NOT IN (select p_name from part) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -3496,7 +3472,7 @@ POSTHOOK: Input: default@part_null 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: explain select * from part_null where p_brand NOT IN (select p_name from part) AND p_name NOT LIKE (select min(p_name) from part_null pp where part_null.p_type = pp.p_type) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part_null where p_brand NOT IN (select p_name from part) AND p_name NOT LIKE (select min(p_name) from part_null pp where part_null.p_type = pp.p_type) @@ -3631,26 +3607,24 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col4 (type: string) 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14 - Statistics: Num rows: 7 Data size: 982 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (not (_col1 like CASE WHEN (_col14 is null) THEN (null) ELSE (_col13) END)) (type: boolean) + residual filter predicates: {(not (_col1 like CASE WHEN (_col14 is null) THEN (null) ELSE (_col13) END))} + Statistics: Num rows: 4 Data size: 561 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 4 Data size: 561 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 561 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 561 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -3707,7 +3681,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[42][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from part_null where p_brand NOT IN (select p_name from part) AND p_name NOT LIKE (select min(p_name) from part_null pp where part_null.p_type = pp.p_type) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -4339,7 +4313,7 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_name is not null and p_type is not null) (type: boolean) + predicate: p_type is not null (type: boolean) Statistics: Num rows: 26 Data size: 8242 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_name (type: string), p_brand (type: string), p_type (type: string) @@ -4385,10 +4359,10 @@ STAGE PLANS: 0 _col1 (type: string), _col4 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 6 Data size: 1485 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 742 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 1485 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 742 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4398,29 +4372,30 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col2 (type: string) 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 26 Data size: 11062 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (not (_col1 like CASE WHEN (_col4 is null) THEN (null) ELSE (_col3) END)) (type: boolean) - Statistics: Num rows: 13 Data size: 5625 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 2925 Basic stats: COMPLETE Column stats: COMPLETE + residual filter predicates: {(not (_col1 like CASE WHEN (_col4 is null) THEN (null) ELSE (_col3) END))} + Statistics: Num rows: 7 Data size: 3535 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7 Data size: 1575 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col0 is not null and _col1 is not null) (type: boolean) + Statistics: Num rows: 7 Data size: 1575 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6 Data size: 1350 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 675 Basic stats: COMPLETE Column stats: COMPLETE Reducer 6 Execution mode: llap Reduce Operator Tree: http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/test/results/clientpositive/spark/subquery_in.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out b/ql/src/test/results/clientpositive/spark/subquery_in.q.out index f89c146..5e48a5c 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out @@ -1827,7 +1827,7 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: floor(p_retailprice) is not null (type: boolean) + predicate: p_retailprice is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) @@ -1890,7 +1890,7 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: floor(_col1) is not null (type: boolean) + predicate: _col1 is not null (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: floor(_col1) (type: bigint) http://git-wip-us.apache.org/repos/asf/hive/blob/dcb3817d/ql/src/test/results/clientpositive/spark/subquery_notin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/subquery_notin.q.out b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out index b2a1972..e2f26a9 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out @@ -2496,7 +2496,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 Statistics: Num rows: 28 Data size: 3947 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col12 is null and floor(_col7) is not null and (_col10 >= _col9)) or (_col9 = 0)) (type: boolean) + predicate: ((_col12 is null and _col7 is not null and (_col10 >= _col9)) or (_col9 = 0)) (type: boolean) Statistics: Num rows: 18 Data size: 2537 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
