HIVE-17475: Disable mapjoin using hint (Deepak Jaiswal reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a74107ef Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a74107ef Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a74107ef Branch: refs/heads/hive-14535 Commit: a74107ef1dfacd88921dbac1c694641302af6044 Parents: 5e57a2a Author: Jason Dere <jd...@hortonworks.com> Authored: Fri Sep 8 14:03:27 2017 -0700 Committer: Jason Dere <jd...@hortonworks.com> Committed: Fri Sep 8 14:03:27 2017 -0700 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../hive/ql/optimizer/ConvertJoinMapJoin.java | 4 +- .../hadoop/hive/ql/parse/ParseContext.java | 10 + .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 27 + .../test/queries/clientpositive/mapjoin_hint.q | 69 ++ .../clientpositive/llap/mapjoin_hint.q.out | 643 +++++++++++++++++++ 6 files changed, 753 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/a74107ef/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 663c95b..d9e760f 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -542,6 +542,7 @@ minillaplocal.query.files=\ load_dyn_part5.q,\ lvj_mapjoin.q,\ mapjoin_decimal.q,\ + mapjoin_hint.q,\ mapjoin_emit_interval.q,\ mergejoin_3way.q,\ mrr.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/a74107ef/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index a2414f3..9412e5f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -103,7 +103,9 @@ public class ConvertJoinMapJoin implements NodeProcessor { joinOp.getConf().setMemoryMonitorInfo(memoryMonitorInfo); TezBucketJoinProcCtx tezBucketJoinProcCtx = new TezBucketJoinProcCtx(context.conf); - if (!context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN)) { + boolean hiveConvertJoin = context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN) & + !context.parseContext.getDisableMapJoin(); + if (!hiveConvertJoin) { // we are just converting to a common merge join operator. The shuffle // join in map-reduce case. Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx, maxSize); http://git-wip-us.apache.org/repos/asf/hive/blob/a74107ef/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java index adf7f84..9ab42f2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java @@ -133,6 +133,8 @@ public class ParseContext { new HashMap<>(); private Map<String, List<SemiJoinHint>> semiJoinHints; + private boolean disableMapJoin; + public ParseContext() { } @@ -705,4 +707,12 @@ public class ParseContext { public Map<String, List<SemiJoinHint>> getSemiJoinHints() { return semiJoinHints; } + + public void setDisableMapJoin(boolean disableMapJoin) { + this.disableMapJoin = disableMapJoin; + } + + public boolean getDisableMapJoin() { + return disableMapJoin; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/a74107ef/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index cebb0af..1c74779 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -9118,6 +9118,31 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } /** + * disableMapJoinWithHint + * @param hints + * @return true if hint to disable hint is provided, else false + * @throws SemanticException + */ + private boolean disableMapJoinWithHint(List<ASTNode> hints) throws SemanticException { + if (hints == null || hints.size() == 0) return false; + for (ASTNode hintNode : hints) { + for (Node node : hintNode.getChildren()) { + ASTNode hint = (ASTNode) node; + if (hint.getChild(0).getType() != HintParser.TOK_MAPJOIN) continue; + Tree args = hint.getChild(1); + if (args.getChildCount() == 1) { + String text = args.getChild(0).getText(); + if (text.equalsIgnoreCase("None")) { + // Hint to disable mapjoin. + return true; + } + } + } + } + return false; + } + + /** * Merges node to target */ private void mergeJoins(QB qb, QBJoinTree node, QBJoinTree target, int pos, int[] tgtToNodeExprMap) { @@ -11377,6 +11402,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { // Set the semijoin hints in parse context pCtx.setSemiJoinHints(parseSemiJoinHint(getQB().getParseInfo().getHintList())); + // Set the mapjoin hint if it needs to be disabled. + pCtx.setDisableMapJoin(disableMapJoinWithHint(getQB().getParseInfo().getHintList())); // 5. Take care of view creation if (createVwDesc != null) { http://git-wip-us.apache.org/repos/asf/hive/blob/a74107ef/ql/src/test/queries/clientpositive/mapjoin_hint.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/mapjoin_hint.q b/ql/src/test/queries/clientpositive/mapjoin_hint.q new file mode 100644 index 0000000..75bebfa --- /dev/null +++ b/ql/src/test/queries/clientpositive/mapjoin_hint.q @@ -0,0 +1,69 @@ +set hive.compute.query.using.stats=false; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.optimize.ppd=true; +set hive.ppd.remove.duplicatefilters=true; +set hive.tez.dynamic.partition.pruning=true; +set hive.tez.dynamic.semijoin.reduction=true; +set hive.optimize.metadataonly=false; +set hive.optimize.index.filter=true; +set hive.stats.autogather=true; +set hive.tez.bigtable.minsize.semijoin.reduction=1; +set hive.tez.min.bloom.filter.entries=1; +set hive.stats.fetch.column.stats=true; + +-- Create Tables +create table srcpart_date (key string, value string) partitioned by (ds string ) stored as ORC; +CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds string) STORED as ORC; + +-- Add Partitions +alter table srcpart_date add partition (ds = "2008-04-08"); +alter table srcpart_date add partition (ds = "2008-04-09"); + +alter table srcpart_small add partition (ds = "2008-04-08"); +alter table srcpart_small add partition (ds = "2008-04-09"); + +-- Load +insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08"; +insert overwrite table srcpart_date partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09"; +insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20; + + +analyze table srcpart_date compute statistics for columns; +analyze table srcpart_small compute statistics for columns; + +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=100000000000; + +--HIVE-17475 +EXPLAIN select /*+ mapjoin(None)*/ count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1); +EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1); + + +-- Ensure that hint works even with CBO on, on a query with subquery. +create table tnull(i int, c char(2)); +insert into tnull values(NULL, NULL), (NULL, NULL); + +create table tempty(c char(2)); + +CREATE TABLE part_null( +p_partkey INT, +p_name STRING, +p_mfgr STRING, +p_brand STRING, +p_type STRING, +p_size INT, +p_container STRING, +p_retailprice DOUBLE, +p_comment STRING +) +ROW FORMAT DELIMITED FIELDS TERMINATED BY "," +; + +LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' overwrite into table part_null; + +insert into part_null values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS', 23, 'MED BAG',1464.48,'hely blith'); + +explain select /*+ mapjoin(None)*/ * from part where p_name = (select p_name from part_null where p_name is null); +explain select * from part where p_name = (select p_name from part_null where p_name is null); http://git-wip-us.apache.org/repos/asf/hive/blob/a74107ef/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out new file mode 100644 index 0000000..6a43f0d --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out @@ -0,0 +1,643 @@ +PREHOOK: query: create table srcpart_date (key string, value string) partitioned by (ds string ) stored as ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_date +POSTHOOK: query: create table srcpart_date (key string, value string) partitioned by (ds string ) stored as ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_date +PREHOOK: query: CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds string) STORED as ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@srcpart_small +POSTHOOK: query: CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds string) STORED as ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@srcpart_small +PREHOOK: query: alter table srcpart_date add partition (ds = "2008-04-08") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_date +POSTHOOK: query: alter table srcpart_date add partition (ds = "2008-04-08") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_date +POSTHOOK: Output: default@srcpart_date@ds=2008-04-08 +PREHOOK: query: alter table srcpart_date add partition (ds = "2008-04-09") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_date +POSTHOOK: query: alter table srcpart_date add partition (ds = "2008-04-09") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_date +POSTHOOK: Output: default@srcpart_date@ds=2008-04-09 +PREHOOK: query: alter table srcpart_small add partition (ds = "2008-04-08") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_small +POSTHOOK: query: alter table srcpart_small add partition (ds = "2008-04-08") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_small +POSTHOOK: Output: default@srcpart_small@ds=2008-04-08 +PREHOOK: query: alter table srcpart_small add partition (ds = "2008-04-09") +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@srcpart_small +POSTHOOK: query: alter table srcpart_small add partition (ds = "2008-04-09") +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@srcpart_small +POSTHOOK: Output: default@srcpart_small@ds=2008-04-09 +PREHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@srcpart_date@ds=2008-04-08 +POSTHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select key, value from srcpart where ds = "2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_date@ds=2008-04-09 +POSTHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@srcpart_small@ds=2008-04-09 +POSTHOOK: query: insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from srcpart where ds = "2008-04-09" limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@srcpart_small@ds=2008-04-09 +POSTHOOK: Lineage: srcpart_small PARTITION(ds=2008-04-09).key1 SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: srcpart_small PARTITION(ds=2008-04-09).value1 SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: analyze table srcpart_date compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_date +PREHOOK: Input: default@srcpart_date@ds=2008-04-08 +PREHOOK: Input: default@srcpart_date@ds=2008-04-09 +PREHOOK: Output: default@srcpart_date +PREHOOK: Output: default@srcpart_date@ds=2008-04-08 +PREHOOK: Output: default@srcpart_date@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_date compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_date +POSTHOOK: Input: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_date@ds=2008-04-09 +POSTHOOK: Output: default@srcpart_date +POSTHOOK: Output: default@srcpart_date@ds=2008-04-08 +POSTHOOK: Output: default@srcpart_date@ds=2008-04-09 +#### A masked pattern was here #### +PREHOOK: query: analyze table srcpart_small compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart_small +PREHOOK: Input: default@srcpart_small@ds=2008-04-08 +PREHOOK: Input: default@srcpart_small@ds=2008-04-09 +PREHOOK: Output: default@srcpart_small +PREHOOK: Output: default@srcpart_small@ds=2008-04-08 +PREHOOK: Output: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +POSTHOOK: query: analyze table srcpart_small compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart_small +POSTHOOK: Input: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Input: default@srcpart_small@ds=2008-04-09 +POSTHOOK: Output: default@srcpart_small +POSTHOOK: Output: default@srcpart_small@ds=2008-04-08 +POSTHOOK: Output: default@srcpart_small@ds=2008-04-09 +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN select /*+ mapjoin(None)*/ count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select /*+ mapjoin(None)*/ count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key1_min) AND DynamicValue(RS_7_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key1_min) AND DynamicValue(RS_7_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key1_bloom_filter)))) (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 129 Data size: 1032 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart_date + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + input vertices: + 1 Map 3 + Statistics: Num rows: 129 Data size: 1032 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: srcpart_small + filterExpr: key1 is not null (type: boolean) + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + Filter Operator + predicate: key1 is not null (type: boolean) + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: key1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: create table tnull(i int, c char(2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tnull +POSTHOOK: query: create table tnull(i int, c char(2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tnull +PREHOOK: query: insert into tnull values(NULL, NULL), (NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Output: default@tnull +POSTHOOK: query: insert into tnull values(NULL, NULL), (NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@tnull +POSTHOOK: Lineage: tnull.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: tnull.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create table tempty(c char(2)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tempty +POSTHOOK: query: create table tempty(c char(2)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tempty +PREHOOK: query: CREATE TABLE part_null( +p_partkey INT, +p_name STRING, +p_mfgr STRING, +p_brand STRING, +p_type STRING, +p_size INT, +p_container STRING, +p_retailprice DOUBLE, +p_comment STRING +) +ROW FORMAT DELIMITED FIELDS TERMINATED BY "," +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_null +POSTHOOK: query: CREATE TABLE part_null( +p_partkey INT, +p_name STRING, +p_mfgr STRING, +p_brand STRING, +p_type STRING, +p_size INT, +p_container STRING, +p_retailprice DOUBLE, +p_comment STRING +) +ROW FORMAT DELIMITED FIELDS TERMINATED BY "," +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_null +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' overwrite into table part_null +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@part_null +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' overwrite into table part_null +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@part_null +PREHOOK: query: insert into part_null values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS', 23, 'MED BAG',1464.48,'hely blith') +PREHOOK: type: QUERY +PREHOOK: Output: default@part_null +POSTHOOK: query: insert into part_null values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS', 23, 'MED BAG',1464.48,'hely blith') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@part_null +POSTHOOK: Lineage: part_null.p_brand SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_comment SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col9, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_container SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col7, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_mfgr SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_name SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_partkey EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_retailprice EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col8, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_size EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col6, type:string, comment:), ] +POSTHOOK: Lineage: part_null.p_type SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: explain select /*+ mapjoin(None)*/ * from part where p_name = (select p_name from part_null where p_name is null) +PREHOOK: type: QUERY +POSTHOOK: query: explain select /*+ mapjoin(None)*/ * from part where p_name = (select p_name from part_null where p_name is null) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + filterExpr: (p_name = null) (type: boolean) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (p_name = null) (type: boolean) + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: part_null + filterExpr: p_name is null (type: boolean) + Statistics: Num rows: 32 Data size: 5888 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 + 1 + 2 + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), null (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sq_count_check(_col0) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Reducer 3' is a cross product +PREHOOK: query: explain select * from part where p_name = (select p_name from part_null where p_name is null) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from part where p_name = (select p_name from part_null where p_name is null) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: part + filterExpr: (p_name = null) (type: boolean) + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (p_name = null) (type: boolean) + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan + alias: part_null + filterExpr: p_name is null (type: boolean) + Statistics: Num rows: 32 Data size: 5888 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: part_null + filterExpr: p_name is null (type: boolean) + Statistics: Num rows: 32 Data size: 5888 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_name is null (type: boolean) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (sq_count_check(_col0) <= 1) (type: boolean) + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 + 1 + 2 + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + input vertices: + 0 Map 1 + 2 Map 4 + Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), null (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +