hive git commit: HIVE-12738: subquery with NOT IN failing due to ClassCastException (Matt McCline via Gunther Hagleitner)

gunther Thu, 24 Dec 2015 14:39:39 -0800

Repository: hive
Updated Branches:
  refs/heads/branch-2.0 237729430 -> 55c629691



HIVE-12738: subquery with NOT IN failing due to ClassCastException (Matt 
McCline via Gunther Hagleitner)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/55c62969
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/55c62969
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/55c62969

Branch: refs/heads/branch-2.0
Commit: 55c6296914c63d290a7db332eb4c55ab3397691a
Parents: 2377294
Author: Gunther Hagleitner <gunt...@apache.org>
Authored: Thu Dec 24 13:32:17 2015 -0800
Committer: Gunther Hagleitner <gunt...@apache.org>
Committed: Thu Dec 24 14:14:11 2015 -0800

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   1 +
 .../hive/ql/optimizer/physical/Vectorizer.java  | 111 ++++++++----
 .../clientpositive/vector_groupby_mapjoin.q     |  22 +++
 .../tez/vector_groupby_mapjoin.q.out            | 125 ++++++++++++++
 .../clientpositive/vector_groupby_mapjoin.q.out | 167 +++++++++++++++++++
 5 files changed, 389 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/55c62969/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index 1e7dce3..fd48cf4 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -264,6 +264,7 @@ minitez.query.files.shared=acid_globallimit.q,\
   vector_distinct_2.q,\
   vector_elt.q,\
   vector_groupby_3.q,\
+  vector_groupby_mapjoin.q,\
   vector_groupby_reduce.q,\
   vector_grouping_sets.q,\
   vector_if_expr.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/55c62969/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index a842649..1629a5d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -338,6 +338,8 @@ public class Vectorizer implements PhysicalPlanResolver {
 
     String[] scratchTypeNameArray;
 
+    Set<Operator<? extends OperatorDesc>> nonVectorizedOps;
+
     VectorTaskColumnInfo() {
       partitionColumnCount = 0;
     }
@@ -355,6 +357,14 @@ public class Vectorizer implements PhysicalPlanResolver {
       this.scratchTypeNameArray = scratchTypeNameArray;
     }
 
+    public void setNonVectorizedOps(Set<Operator<? extends OperatorDesc>> 
nonVectorizedOps) {
+      this.nonVectorizedOps = nonVectorizedOps;
+    }
+
+    public Set<Operator<? extends OperatorDesc>> getNonVectorizedOps() {
+      return nonVectorizedOps;
+    }
+
     public void transferToBaseWork(BaseWork baseWork) {
 
       String[] columnNameArray = columnNames.toArray(new String[0]);
@@ -701,6 +711,7 @@ public class Vectorizer implements PhysicalPlanResolver {
           }
         }
       }
+      vectorTaskColumnInfo.setNonVectorizedOps(vnp.getNonVectorizedOps());
       return true;
     }
 
@@ -819,6 +830,7 @@ public class Vectorizer implements PhysicalPlanResolver {
           }
         }
       }
+      vectorTaskColumnInfo.setNonVectorizedOps(vnp.getNonVectorizedOps());
       return true;
     }
 
@@ -863,6 +875,14 @@ public class Vectorizer implements PhysicalPlanResolver {
     private final MapWork mapWork;
     private final boolean isTez;
 
+    // Children of Vectorized GROUPBY that outputs rows instead of vectorized 
row batchs.
+    protected final Set<Operator<? extends OperatorDesc>> nonVectorizedOps =
+        new HashSet<Operator<? extends OperatorDesc>>();
+
+    public Set<Operator<? extends OperatorDesc>> getNonVectorizedOps() {
+      return nonVectorizedOps;
+    }
+
     public MapWorkValidationNodeProcessor(MapWork mapWork, boolean isTez) {
       this.mapWork = mapWork;
       this.isTez = isTez;
@@ -873,7 +893,7 @@ public class Vectorizer implements PhysicalPlanResolver {
         Object... nodeOutputs) throws SemanticException {
       for (Node n : stack) {
         Operator<? extends OperatorDesc> op = (Operator<? extends 
OperatorDesc>) n;
-        if (nonVectorizableChildOfGroupBy(op)) {
+        if (nonVectorizedOps.contains(op)) {
           return new Boolean(true);
         }
         boolean ret;
@@ -886,6 +906,12 @@ public class Vectorizer implements PhysicalPlanResolver {
           LOG.info("MapWork Operator: " + op.getName() + " could not be 
vectorized.");
           return new Boolean(false);
         }
+        // When Vectorized GROUPBY outputs rows instead of vectorized row 
batches, we don't
+        // vectorize the operators below it.
+        if (isVectorizedGroupByThatOutputsRows(op)) {
+          addOperatorChildrenToSet(op, nonVectorizedOps);
+          return new Boolean(true);
+        }
       }
       return new Boolean(true);
     }
@@ -893,12 +919,24 @@ public class Vectorizer implements PhysicalPlanResolver {
 
   class ReduceWorkValidationNodeProcessor implements NodeProcessor {
 
+    // Children of Vectorized GROUPBY that outputs rows instead of vectorized 
row batchs.
+    protected final Set<Operator<? extends OperatorDesc>> nonVectorizedOps =
+        new HashSet<Operator<? extends OperatorDesc>>();
+
+    public Set<Operator<? extends OperatorDesc>> getNonVectorizeOps() {
+      return nonVectorizedOps;
+    }
+
+    public Set<Operator<? extends OperatorDesc>> getNonVectorizedOps() {
+      return nonVectorizedOps;
+    }
+
     @Override
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {
       for (Node n : stack) {
         Operator<? extends OperatorDesc> op = (Operator<? extends 
OperatorDesc>) n;
-        if (nonVectorizableChildOfGroupBy(op)) {
+        if (nonVectorizedOps.contains(op)) {
           return new Boolean(true);
         }
         boolean ret = validateReduceWorkOperator(op);
@@ -906,6 +944,12 @@ public class Vectorizer implements PhysicalPlanResolver {
           LOG.info("ReduceWork Operator: " + op.getName() + " could not be 
vectorized.");
           return new Boolean(false);
         }
+        // When Vectorized GROUPBY outputs rows instead of vectorized row 
batches, we don't
+        // vectorize the operators below it.
+        if (isVectorizedGroupByThatOutputsRows(op)) {
+          addOperatorChildrenToSet(op, nonVectorizedOps);
+          return new Boolean(true);
+        }
       }
       return new Boolean(true);
     }
@@ -918,7 +962,10 @@ public class Vectorizer implements PhysicalPlanResolver {
     // The vectorization context for the Map or Reduce task.
     protected VectorizationContext taskVectorizationContext;
 
-    VectorizationNodeProcessor() {
+    protected final Set<Operator<? extends OperatorDesc>> nonVectorizedOps;
+
+    VectorizationNodeProcessor(Set<Operator<? extends OperatorDesc>> 
nonVectorizedOps) {
+      this.nonVectorizedOps = nonVectorizedOps;
     }
 
     public String[] getVectorScratchColumnTypeNames() {
@@ -997,7 +1044,7 @@ public class Vectorizer implements PhysicalPlanResolver {
 
     public MapWorkVectorizationNodeProcessor(MapWork mWork, boolean isTez,
         VectorTaskColumnInfo vectorTaskColumnInfo) {
-      super();
+      super(vectorTaskColumnInfo.getNonVectorizedOps());
       this.mWork = mWork;
       this.vectorTaskColumnInfo = vectorTaskColumnInfo;
       this.isTez = isTez;
@@ -1008,6 +1055,9 @@ public class Vectorizer implements PhysicalPlanResolver {
         Object... nodeOutputs) throws SemanticException {
 
       Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) 
nd;
+      if (nonVectorizedOps.contains(op)) {
+        return null;
+      }
 
       VectorizationContext vContext = null;
 
@@ -1031,16 +1081,6 @@ public class Vectorizer implements PhysicalPlanResolver {
             + " using vectorization context" + vContext.toString());
       }
 
-      // When Vectorized GROUPBY outputs rows instead of vectorized row 
batchs, we don't
-      // vectorize the operators below it.
-      if (nonVectorizableChildOfGroupBy(op)) {
-        // No need to vectorize
-        if (!opsDone.contains(op)) {
-            opsDone.add(op);
-          }
-        return null;
-      }
-
       Operator<? extends OperatorDesc> vectorOp = doVectorize(op, vContext, 
isTez);
 
       if (LOG.isDebugEnabled()) {
@@ -1070,7 +1110,7 @@ public class Vectorizer implements PhysicalPlanResolver {
     public ReduceWorkVectorizationNodeProcessor(VectorTaskColumnInfo 
vectorTaskColumnInfo,
             boolean isTez) {
 
-      super();
+      super(vectorTaskColumnInfo.getNonVectorizedOps());
       this.vectorTaskColumnInfo =  vectorTaskColumnInfo;
       rootVectorOp = null;
       this.isTez = isTez;
@@ -1081,6 +1121,9 @@ public class Vectorizer implements PhysicalPlanResolver {
         Object... nodeOutputs) throws SemanticException {
 
       Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) 
nd;
+      if (nonVectorizedOps.contains(op)) {
+        return null;
+      }
 
       VectorizationContext vContext = null;
 
@@ -1110,16 +1153,6 @@ public class Vectorizer implements PhysicalPlanResolver {
       assert vContext != null;
       LOG.info("ReduceWorkVectorizationNodeProcessor process operator " + 
op.getName() + " using vectorization context" + vContext.toString());
 
-      // When Vectorized GROUPBY outputs rows instead of vectorized row 
batchs, we don't
-      // vectorize the operators below it.
-      if (nonVectorizableChildOfGroupBy(op)) {
-        // No need to vectorize
-        if (!opsDone.contains(op)) {
-          opsDone.add(op);
-        }
-        return null;
-      }
-
       Operator<? extends OperatorDesc> vectorOp = doVectorize(op, vContext, 
isTez);
 
       if (LOG.isDebugEnabled()) {
@@ -1267,20 +1300,24 @@ public class Vectorizer implements PhysicalPlanResolver 
{
     return ret;
   }
 
-  public Boolean nonVectorizableChildOfGroupBy(Operator<? extends 
OperatorDesc> op) {
-    Operator<? extends OperatorDesc> currentOp = op;
-    while (currentOp.getParentOperators().size() > 0) {
-      currentOp = currentOp.getParentOperators().get(0);
-      if (currentOp.getType().equals(OperatorType.GROUPBY)) {
-        GroupByDesc desc = (GroupByDesc)currentOp.getConf();
-        boolean isVectorOutput = desc.getVectorDesc().isVectorOutput();
-        if (isVectorOutput) {
-          // This GROUP BY does vectorize its output.
-          return false;
-        }
-        return true;
+  private void addOperatorChildrenToSet(Operator<? extends OperatorDesc> op,
+      Set<Operator<? extends OperatorDesc>> nonVectorizedOps) {
+    for (Operator<? extends OperatorDesc> childOp : op.getChildOperators()) {
+      if (!nonVectorizedOps.contains(childOp)) {
+        nonVectorizedOps.add(childOp);
+        addOperatorChildrenToSet(childOp, nonVectorizedOps);
       }
     }
+  }
+
+  // When Vectorized GROUPBY outputs rows instead of vectorized row batchs, we 
don't
+  // vectorize the operators below it.
+   private Boolean isVectorizedGroupByThatOutputsRows(Operator<? extends 
OperatorDesc> op)
+      throws SemanticException {
+    if (op.getType().equals(OperatorType.GROUPBY)) {
+      GroupByDesc desc = (GroupByDesc) op.getConf();
+      return !desc.getVectorDesc().isVectorOutput();
+    }
     return false;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/55c62969/ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q 
b/ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q
new file mode 100644
index 0000000..a3cec04
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_groupby_mapjoin.q
@@ -0,0 +1,22 @@
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=true;
+SET hive.vectorized.execution.enabled = true;
+set hive.fetch.task.conversion=none;
+SET hive.auto.convert.join=true;
+SET hive.auto.convert.join.noconditionaltask=true;
+SET hive.auto.convert.join.noconditionaltask.size=1000000000;
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+-- HIVE-12738 -- We are checking if a MapJoin after a GroupBy will work 
properly.
+explain
+select *
+from src
+where not key in
+(select key from src)
+order by key;
+
+select *
+from src
+where not key in
+(select key from src)
+order by key;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/55c62969/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out 
b/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out
new file mode 100644
index 0000000..dedcec8
--- /dev/null
+++ b/ql/src/test/results/clientpositive/tez/vector_groupby_mapjoin.q.out
@@ -0,0 +1,125 @@
+Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Reducer 3' is a cross 
product
+PREHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy 
will work properly.
+explain
+select *
+from src
+where not key in
+(select key from src)
+order by key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy 
will work properly.
+explain
+select *
+from src
+where not key in
+(select key from src)
+order by key
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE), Map 5 
(BROADCAST_EDGE)
+Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
+
+Stage-0
+   Fetch Operator
+      limit:-1
+      Stage-1
+         Reducer 4 vectorized
+         File Output Operator [FS_34]
+            compressed:false
+            Statistics:Num rows: 302 Data size: 3208 Basic stats: COMPLETE 
Column stats: NONE
+            table:{"input 
format:":"org.apache.hadoop.mapred.TextInputFormat","output 
format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
+            Select Operator [OP_33]
+            |  outputColumnNames:["_col0","_col1"]
+            |  Statistics:Num rows: 302 Data size: 3208 Basic stats: COMPLETE 
Column stats: NONE
+            |<-Reducer 3 [SIMPLE_EDGE] vectorized
+               Reduce Output Operator [RS_22]
+                  key expressions:_col0 (type: string)
+                  sort order:+
+                  Statistics:Num rows: 302 Data size: 3208 Basic stats: 
COMPLETE Column stats: NONE
+                  value expressions:_col1 (type: string)
+                  Select Operator [SEL_21]
+                     outputColumnNames:["_col0","_col1"]
+                     Statistics:Num rows: 302 Data size: 3208 Basic stats: 
COMPLETE Column stats: NONE
+                     Filter Operator [FIL_20]
+                        predicate:_col3 is null (type: boolean)
+                        Statistics:Num rows: 302 Data size: 3208 Basic stats: 
COMPLETE Column stats: NONE
+                        Map Join Operator [MAPJOIN_29]
+                        |  condition map:[{"":"Left Outer Join0 to 1"}]
+                        |  HybridGraceHashJoin:true
+                        |  keys:{"Reducer 3":"_col0 (type: string)","Map 
5":"_col0 (type: string)"}
+                        |  outputColumnNames:["_col0","_col1","_col3"]
+                        |  Statistics:Num rows: 605 Data size: 6427 Basic 
stats: COMPLETE Column stats: NONE
+                        |<-Map 5 [BROADCAST_EDGE]
+                        |  Reduce Output Operator [RS_18]
+                        |     key expressions:_col0 (type: string)
+                        |     Map-reduce partition columns:_col0 (type: string)
+                        |     sort order:+
+                        |     Statistics:Num rows: 500 Data size: 5312 Basic 
stats: COMPLETE Column stats: NONE
+                        |     Select Operator [SEL_12]
+                        |        outputColumnNames:["_col0"]
+                        |        Statistics:Num rows: 500 Data size: 5312 
Basic stats: COMPLETE Column stats: NONE
+                        |        TableScan [TS_11]
+                        |           alias:src
+                        |           Statistics:Num rows: 500 Data size: 5312 
Basic stats: COMPLETE Column stats: NONE
+                        |<-Map Join Operator [MAPJOIN_28]
+                           |  condition map:[{"":"Inner Join 0 to 1"}]
+                           |  keys:{}
+                           |  outputColumnNames:["_col0","_col1"]
+                           |  Statistics:Num rows: 550 Data size: 5843 Basic 
stats: COMPLETE Column stats: NONE
+                           |<-Map 1 [BROADCAST_EDGE]
+                           |  Reduce Output Operator [RS_14]
+                           |     sort order:
+                           |     Statistics:Num rows: 500 Data size: 5312 
Basic stats: COMPLETE Column stats: NONE
+                           |     value expressions:_col0 (type: string), _col1 
(type: string)
+                           |     Select Operator [SEL_1]
+                           |        outputColumnNames:["_col0","_col1"]
+                           |        Statistics:Num rows: 500 Data size: 5312 
Basic stats: COMPLETE Column stats: NONE
+                           |        TableScan [TS_0]
+                           |           alias:src
+                           |           Statistics:Num rows: 500 Data size: 
5312 Basic stats: COMPLETE Column stats: NONE
+                           |<-Select Operator [SEL_10]
+                                 Statistics:Num rows: 1 Data size: 8 Basic 
stats: COMPLETE Column stats: NONE
+                                 Filter Operator [FIL_9]
+                                    predicate:(_col0 = 0) (type: boolean)
+                                    Statistics:Num rows: 1 Data size: 8 Basic 
stats: COMPLETE Column stats: NONE
+                                    Group By Operator [OP_32]
+                                    |  aggregations:["count(VALUE._col0)"]
+                                    |  outputColumnNames:["_col0"]
+                                    |  Statistics:Num rows: 1 Data size: 8 
Basic stats: COMPLETE Column stats: NONE
+                                    |<-Map 2 [SIMPLE_EDGE]
+                                       Reduce Output Operator [RS_6]
+                                          sort order:
+                                          Statistics:Num rows: 1 Data size: 8 
Basic stats: COMPLETE Column stats: NONE
+                                          value expressions:_col0 (type: 
bigint)
+                                          Group By Operator [GBY_5]
+                                             aggregations:["count()"]
+                                             outputColumnNames:["_col0"]
+                                             Statistics:Num rows: 1 Data size: 
8 Basic stats: COMPLETE Column stats: NONE
+                                             Select Operator [SEL_4]
+                                                Statistics:Num rows: 250 Data 
size: 2656 Basic stats: COMPLETE Column stats: NONE
+                                                Filter Operator [FIL_26]
+                                                   predicate:key is null 
(type: boolean)
+                                                   Statistics:Num rows: 250 
Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+                                                   TableScan [TS_2]
+                                                      alias:src
+                                                      Statistics:Num rows: 500 
Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+
+Warning: Map Join MAPJOIN[28][bigTable=?] in task 'Reducer 3' is a cross 
product
+PREHOOK: query: select *
+from src
+where not key in
+(select key from src)
+order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src
+where not key in
+(select key from src)
+order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####

http://git-wip-us.apache.org/repos/asf/hive/blob/55c62969/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out 
b/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out
new file mode 100644
index 0000000..367eb59
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out
@@ -0,0 +1,167 @@
+Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Stage-3:MAPRED' is a cross 
product
+PREHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy 
will work properly.
+explain
+select *
+from src
+where not key in
+(select key from src)
+order by key
+PREHOOK: type: QUERY
+POSTHOOK: query: -- HIVE-12738 -- We are checking if a MapJoin after a GroupBy 
will work properly.
+explain
+select *
+from src
+where not key in
+(select key from src)
+order by key
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-4 is a root stage
+  Stage-8 depends on stages: Stage-4
+  Stage-3 depends on stages: Stage-8
+  Stage-0 depends on stages: Stage-3
+
+STAGE PLANS:
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Filter Operator
+              predicate: key is null (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: 
COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
+          Filter Operator
+            predicate: (_col0 = 0) (type: boolean)
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column 
stats: NONE
+            Select Operator
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                table:
+                    input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: 
org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-8
+    Map Reduce Local Work
+      Alias -> Map Local Tables:
+        $hdt$_0:src 
+          Fetch Operator
+            limit: -1
+        $hdt$_2:src 
+          Fetch Operator
+            limit: -1
+      Alias -> Map Local Operator Tree:
+        $hdt$_0:src 
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              HashTable Sink Operator
+                keys:
+                  0 
+                  1 
+        $hdt$_2:src 
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: key (type: string)
+              outputColumnNames: _col0
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE 
Column stats: NONE
+              HashTable Sink Operator
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+
+  Stage: Stage-3
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              keys:
+                0 
+                1 
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE 
Column stats: NONE
+              Map Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                outputColumnNames: _col0, _col1, _col3
+                Statistics: Num rows: 605 Data size: 6427 Basic stats: 
COMPLETE Column stats: NONE
+                Filter Operator
+                  predicate: _col3 is null (type: boolean)
+                  Statistics: Num rows: 302 Data size: 3208 Basic stats: 
COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: _col0 (type: string), _col1 (type: string)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 302 Data size: 3208 Basic stats: 
COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: string)
+                      sort order: +
+                      Statistics: Num rows: 302 Data size: 3208 Basic stats: 
COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: string)
+      Local Work:
+        Map Reduce Local Work
+      Reduce Operator Tree:
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: 
string)
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE 
Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Map Join MAPJOIN[33][bigTable=?] in task 'Stage-3:MAPRED' is a cross 
product
+PREHOOK: query: select *
+from src
+where not key in
+(select key from src)
+order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select *
+from src
+where not key in
+(select key from src)
+order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####

hive git commit: HIVE-12738: subquery with NOT IN failing due to ClassCastException (Matt McCline via Gunther Hagleitner)

Reply via email to