Repository: hive Updated Branches: refs/heads/branch-2.0 237729430 -> 55c629691
HIVE-12738: subquery with NOT IN failing due to ClassCastException (Matt McCline via Gunther Hagleitner) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/55c62969 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/55c62969 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/55c62969 Branch: refs/heads/branch-2.0 Commit: 55c6296914c63d290a7db332eb4c55ab3397691a Parents: 2377294 Author: Gunther Hagleitner <gunt...@apache.org> Authored: Thu Dec 24 13:32:17 2015 -0800 Committer: Gunther Hagleitner <gunt...@apache.org> Committed: Thu Dec 24 14:14:11 2015 -0800 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../hive/ql/optimizer/physical/Vectorizer.java | 111 ++++++++---- .../clientpositive/vector_groupby_mapjoin.q | 22 +++ .../tez/vector_groupby_mapjoin.q.out | 125 ++++++++++++++ .../clientpositive/vector_groupby_mapjoin.q.out | 167 +++++++++++++++++++ 5 files changed, 389 insertions(+), 37 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/55c62969/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 1e7dce3..fd48cf4 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -264,6 +264,7 @@ minitez.query.files.shared=acid_globallimit.q,\ vector_distinct_2.q,\ vector_elt.q,\ vector_groupby_3.q,\ + vector_groupby_mapjoin.q,\ vector_groupby_reduce.q,\ vector_grouping_sets.q,\ vector_if_expr.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/55c62969/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index a842649..1629a5d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -338,6 +338,8 @@ public class Vectorizer implements PhysicalPlanResolver { String[] scratchTypeNameArray; + Set<Operator<? extends OperatorDesc>> nonVectorizedOps; + VectorTaskColumnInfo() { partitionColumnCount = 0; } @@ -355,6 +357,14 @@ public class Vectorizer implements PhysicalPlanResolver { this.scratchTypeNameArray = scratchTypeNameArray; } + public void setNonVectorizedOps(Set<Operator<? extends OperatorDesc>> nonVectorizedOps) { + this.nonVectorizedOps = nonVectorizedOps; + } + + public Set<Operator<? extends OperatorDesc>> getNonVectorizedOps() { + return nonVectorizedOps; + } + public void transferToBaseWork(BaseWork baseWork) { String[] columnNameArray = columnNames.toArray(new String[0]); @@ -701,6 +711,7 @@ public class Vectorizer implements PhysicalPlanResolver { } } } + vectorTaskColumnInfo.setNonVectorizedOps(vnp.getNonVectorizedOps()); return true; } @@ -819,6 +830,7 @@ public class Vectorizer implements PhysicalPlanResolver { } } } + vectorTaskColumnInfo.setNonVectorizedOps(vnp.getNonVectorizedOps()); return true; } @@ -863,6 +875,14 @@ public class Vectorizer implements PhysicalPlanResolver { private final MapWork mapWork; private final boolean isTez; + // Children of Vectorized GROUPBY that outputs rows instead of vectorized row batchs. + protected final Set<Operator<? extends OperatorDesc>> nonVectorizedOps = + new HashSet<Operator<? extends OperatorDesc>>(); + + public Set<Operator<? extends OperatorDesc>> getNonVectorizedOps() { + return nonVectorizedOps; + } + public MapWorkValidationNodeProcessor(MapWork mapWork, boolean isTez) { this.mapWork = mapWork; this.isTez = isTez; @@ -873,7 +893,7 @@ public class Vectorizer implements PhysicalPlanResolver { Object... nodeOutputs) throws SemanticException { for (Node n : stack) { Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) n; - if (nonVectorizableChildOfGroupBy(op)) { + if (nonVectorizedOps.contains(op)) { return new Boolean(true); } boolean ret; @@ -886,6 +906,12 @@ public class Vectorizer implements PhysicalPlanResolver { LOG.info("MapWork Operator: " + op.getName() + " could not be vectorized."); return new Boolean(false); } + // When Vectorized GROUPBY outputs rows instead of vectorized row batches, we don't + // vectorize the operators below it. + if (isVectorizedGroupByThatOutputsRows(op)) { + addOperatorChildrenToSet(op, nonVectorizedOps); + return new Boolean(true); + } } return new Boolean(true); } @@ -893,12 +919,24 @@ public class Vectorizer implements PhysicalPlanResolver { class ReduceWorkValidationNodeProcessor implements NodeProcessor { + // Children of Vectorized GROUPBY that outputs rows instead of vectorized row batchs. + protected final Set<Operator<? extends OperatorDesc>> nonVectorizedOps = + new HashSet<Operator<? extends OperatorDesc>>(); + + public Set<Operator<? extends OperatorDesc>> getNonVectorizeOps() { + return nonVectorizedOps; + } + + public Set<Operator<? extends OperatorDesc>> getNonVectorizedOps() { + return nonVectorizedOps; + } + @Override public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { for (Node n : stack) { Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) n; - if (nonVectorizableChildOfGroupBy(op)) { + if (nonVectorizedOps.contains(op)) { return new Boolean(true); } boolean ret = validateReduceWorkOperator(op); @@ -906,6 +944,12 @@ public class Vectorizer implements PhysicalPlanResolver { LOG.info("ReduceWork Operator: " + op.getName() + " could not be vectorized."); return new Boolean(false); } + // When Vectorized GROUPBY outputs rows instead of vectorized row batches, we don't + // vectorize the operators below it. + if (isVectorizedGroupByThatOutputsRows(op)) { + addOperatorChildrenToSet(op, nonVectorizedOps); + return new Boolean(true); + } } return new Boolean(true); } @@ -918,7 +962,10 @@ public class Vectorizer implements PhysicalPlanResolver { // The vectorization context for the Map or Reduce task. protected VectorizationContext taskVectorizationContext; - VectorizationNodeProcessor() { + protected final Set<Operator<? extends OperatorDesc>> nonVectorizedOps; + + VectorizationNodeProcessor(Set<Operator<? extends OperatorDesc>> nonVectorizedOps) { + this.nonVectorizedOps = nonVectorizedOps; } public String[] getVectorScratchColumnTypeNames() { @@ -997,7 +1044,7 @@ public class Vectorizer implements PhysicalPlanResolver { public MapWorkVectorizationNodeProcessor(MapWork mWork, boolean isTez, VectorTaskColumnInfo vectorTaskColumnInfo) { - super(); + super(vectorTaskColumnInfo.getNonVectorizedOps()); this.mWork = mWork; this.vectorTaskColumnInfo = vectorTaskColumnInfo; this.isTez = isTez; @@ -1008,6 +1055,9 @@ public class Vectorizer implements PhysicalPlanResolver { Object... nodeOutputs) throws SemanticException { Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd; + if (nonVectorizedOps.contains(op)) { + return null; + } VectorizationContext vContext = null; @@ -1031,16 +1081,6 @@ public class Vectorizer implements PhysicalPlanResolver { + " using vectorization context" + vContext.toString()); } - // When Vectorized GROUPBY outputs rows instead of vectorized row batchs, we don't - // vectorize the operators below it. - if (nonVectorizableChildOfGroupBy(op)) { - // No need to vectorize - if (!opsDone.contains(op)) { - opsDone.add(op); - } - return null; - } - Operator<? extends OperatorDesc> vectorOp = doVectorize(op, vContext, isTez); if (LOG.isDebugEnabled()) { @@ -1070,7 +1110,7 @@ public class Vectorizer implements PhysicalPlanResolver { public ReduceWorkVectorizationNodeProcessor(VectorTaskColumnInfo vectorTaskColumnInfo, boolean isTez) { - super(); + super(vectorTaskColumnInfo.getNonVectorizedOps()); this.vectorTaskColumnInfo = vectorTaskColumnInfo; rootVectorOp = null; this.isTez = isTez; @@ -1081,6 +1121,9 @@ public class Vectorizer implements PhysicalPlanResolver { Object... nodeOutputs) throws SemanticException { Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd; + if (nonVectorizedOps.contains(op)) { + return null; + } VectorizationContext vContext = null; @@ -1110,16 +1153,6 @@ public class Vectorizer implements PhysicalPlanResolver { assert vContext != null; LOG.info("ReduceWorkVectorizationNodeProcessor process operator " + op.getName() + " using vectorization context" + vContext.toString()); - // When Vectorized GROUPBY outputs rows instead of vectorized row batchs, we don't - // vectorize the operators below it. - if (nonVectorizableChildOfGroupBy(op)) { - // No need to vectorize - if (!opsDone.contains(op)) { - opsDone.add(op); - } - return null; - } - Operator<? extends OperatorDesc> vectorOp = doVectorize(op, vContext, isTez); if (LOG.isDebugEnabled()) { @@ -1267,20 +1300,24 @@ public class Vectorizer implements PhysicalPlanResolver { return ret; } - public Boolean nonVectorizableChildOfGroupBy(Operator<? extends OperatorDesc> op) { - Operator<? extends OperatorDesc> currentOp = op; - while (currentOp.getParentOperators().size() > 0) { - currentOp = currentOp.getParentOperators().get(0); - if (currentOp.getType().equals(OperatorType.GROUPBY)) { - GroupByDesc desc = (GroupByDesc)currentOp.getConf(); - boolean isVectorOutput = desc.getVectorDesc().isVectorOutput(); - if (isVectorOutput) { - // This GROUP BY does vectorize its output. - return false; - } - return true; + private void addOperatorChildrenToSet(Operator<? extends OperatorDesc> op, + Set<Operator<? extends OperatorDesc>> nonVectorizedOps) { + for (Operator<? extends OperatorDesc> childOp : op.getChildOperators()) { + if (!nonVectorizedOps.contains(childOp)) { + nonVectorizedOps.add(childOp); + addOperatorChildrenToSet(childOp, nonVectorizedOps); } } + } + + // When Vectorized GROUPBY outputs rows instead of vectorized row batchs, we don't + // vectorize the operators below it. + private Boolean isVectorizedGroupByThatOutputsRows(Operator<? extends OperatorDesc> op) + throws SemanticException { + if (op.getType().equals(OperatorType.GROUPBY)) { + GroupByDesc desc = (GroupByDesc) op.getConf(); + return !desc.getVectorDesc().isVectorOutput(); + } return false; } http://git-wip-us.apache.org/repos/asf/hive/blob/55c62969/ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q b/ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q new file mode 100644 index 0000000..a3cec04 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q @@ -0,0 +1,22 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=true; +SET hive.vectorized.execution.enabled = true; +set hive.fetch.task.conversion=none; +SET hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +SET hive.auto.convert.join.noconditionaltask.size=1000000000; +set hive.exec.dynamic.partition.mode=nonstrict; + +-- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. +explain +select * +from src +where not key in +(select key from src) +order by key; + +select * +from src +where not key in +(select key from src) +order by key; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/55c62969/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out b/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out new file mode 100644 index 0000000..dedcec8 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out @@ -0,0 +1,125 @@ +Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Reducer 3' is a cross product +PREHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. +explain +select * +from src +where not key in +(select key from src) +order by key +PREHOOK: type: QUERY +POSTHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. +explain +select * +from src +where not key in +(select key from src) +order by key +POSTHOOK: type: QUERY +Plan optimized by CBO. + +Vertex dependency in root stage +Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) +Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + +Stage-0 + Fetch Operator + limit:-1 + Stage-1 + Reducer 4 vectorized + File Output Operator [FS_34] + compressed:false + Statistics:Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} + Select Operator [OP_33] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 3 [SIMPLE_EDGE] vectorized + Reduce Output Operator [RS_22] + key expressions:_col0 (type: string) + sort order:+ + Statistics:Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + value expressions:_col1 (type: string) + Select Operator [SEL_21] + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_20] + predicate:_col3 is null (type: boolean) + Statistics:Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Map Join Operator [MAPJOIN_29] + | condition map:[{"":"Left Outer Join0 to 1"}] + | HybridGraceHashJoin:true + | keys:{"Reducer 3":"_col0 (type: string)","Map 5":"_col0 (type: string)"} + | outputColumnNames:["_col0","_col1","_col3"] + | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + |<-Map 5 [BROADCAST_EDGE] + | Reduce Output Operator [RS_18] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_12] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_11] + | alias:src + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Map Join Operator [MAPJOIN_28] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + |<-Map 1 [BROADCAST_EDGE] + | Reduce Output Operator [RS_14] + | sort order: + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string), _col1 (type: string) + | Select Operator [SEL_1] + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_0] + | alias:src + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Select Operator [SEL_10] + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_9] + predicate:(_col0 = 0) (type: boolean) + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator [OP_32] + | aggregations:["count(VALUE._col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + |<-Map 2 [SIMPLE_EDGE] + Reduce Output Operator [RS_6] + sort order: + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions:_col0 (type: bigint) + Group By Operator [GBY_5] + aggregations:["count()"] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_4] + Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_26] + predicate:key is null (type: boolean) + Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_2] + alias:src + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + +Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Reducer 3' is a cross product +PREHOOK: query: select * +from src +where not key in +(select key from src) +order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src +where not key in +(select key from src) +order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### http://git-wip-us.apache.org/repos/asf/hive/blob/55c62969/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out b/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out new file mode 100644 index 0000000..367eb59 --- /dev/null +++ b/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out @@ -0,0 +1,167 @@ +Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. +explain +select * +from src +where not key in +(select key from src) +order by key +PREHOOK: type: QUERY +POSTHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work properly. +explain +select * +from src +where not key in +(select key from src) +order by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-8 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:src + Fetch Operator + limit: -1 + $hdt$_2:src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:src + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 + 1 + $hdt$_2:src + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col3 is null (type: boolean) + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Stage-3:MAPRED' is a cross product +PREHOOK: query: select * +from src +where not key in +(select key from src) +order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src +where not key in +(select key from src) +order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here ####