[10/38] hive git commit: HIVE-14530: Union All query returns incorrect results (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

sershe Wed, 07 Sep 2016 18:53:21 -0700

HIVE-14530: Union All query returns incorrect results (Jesus Camacho Rodriguez, 
reviewed by Ashutosh Chauhan)



Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/131631e0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/131631e0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/131631e0

Branch: refs/heads/hive-14535
Commit: 131631e0e5275e8408a4db48e568573aaf220141
Parents: 349445c
Author: Jesus Camacho Rodriguez <jcama...@apache.org>
Authored: Fri Sep 2 07:58:36 2016 +0100
Committer: Jesus Camacho Rodriguez <jcama...@apache.org>
Committed: Fri Sep 2 07:58:36 2016 +0100

----------------------------------------------------------------------
 .../calcite/stats/HiveRelMdPredicates.java      |  24 +-
 ql/src/test/queries/clientpositive/union37.q    | 125 +++++
 .../clientpositive/correlationoptimizer8.q.out  |   6 +-
 ql/src/test/results/clientpositive/join34.q.out |   2 +-
 ql/src/test/results/clientpositive/join35.q.out |   2 +-
 .../results/clientpositive/spark/join34.q.out   |   2 +-
 .../results/clientpositive/spark/join35.q.out   |   2 +-
 .../test/results/clientpositive/union37.q.out   | 522 +++++++++++++++++++
 8 files changed, 670 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/131631e0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
index 09e0fc1..e468573 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java
@@ -19,9 +19,9 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
 
 import java.util.ArrayList;
 import java.util.BitSet;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
-import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -213,15 +213,16 @@ public class HiveRelMdPredicates extends RelMdPredicates {
   public RelOptPredicateList getPredicates(Union union, RelMetadataQuery mq) {
     RexBuilder rB = union.getCluster().getRexBuilder();
 
-    Map<String, RexNode> finalPreds = new LinkedHashMap<>();
-    Map<String, RexNode> finalResidualPreds = new LinkedHashMap<>();
+    Map<String, RexNode> finalPreds = new HashMap<>();
+    List<RexNode> finalResidualPreds = new ArrayList<>();
     for (int i = 0; i < union.getInputs().size(); i++) {
       RelNode input = union.getInputs().get(i);
       RelOptPredicateList info = mq.getPulledUpPredicates(input);
       if (info.pulledUpPredicates.isEmpty()) {
         return RelOptPredicateList.EMPTY;
       }
-      Map<String, RexNode> preds = new LinkedHashMap<>();
+      Map<String, RexNode> preds = new HashMap<>();
+      List<RexNode> residualPreds = new ArrayList<>();
       for (RexNode pred : info.pulledUpPredicates) {
         final String predString = pred.toString();
         if (i == 0) {
@@ -231,21 +232,28 @@ public class HiveRelMdPredicates extends RelMdPredicates {
         if (finalPreds.containsKey(predString)) {
           preds.put(predString, pred);
         } else {
-          finalResidualPreds.put(predString, pred);
+          residualPreds.add(pred);
         }
       }
+      // Add new residual preds
+      finalResidualPreds.add(RexUtil.composeConjunction(rB, residualPreds, 
false));
       // Add those that are not part of the final set to residual
       for (Entry<String, RexNode> e : finalPreds.entrySet()) {
         if (!preds.containsKey(e.getKey())) {
-          finalResidualPreds.put(e.getKey(), e.getValue());
+          // This node was in previous union inputs, but it is not in this one
+          for (int j = 0; j < i; j++) {
+            finalResidualPreds.set(j, RexUtil.composeConjunction(rB, 
Lists.newArrayList(
+                    finalResidualPreds.get(j), e.getValue()), false));
+          }
         }
       }
+      // Final preds
       finalPreds = preds;
     }
 
     List<RexNode> preds = new ArrayList<>(finalPreds.values());
-    RexNode disjPred = RexUtil.composeDisjunction(rB, 
finalResidualPreds.values(), true);
-    if (disjPred != null) {
+    RexNode disjPred = RexUtil.composeDisjunction(rB, finalResidualPreds, 
false);
+    if (!disjPred.isAlwaysTrue()) {
       preds.add(disjPred);
     }
     return RelOptPredicateList.of(preds);

http://git-wip-us.apache.org/repos/asf/hive/blob/131631e0/ql/src/test/queries/clientpositive/union37.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union37.q 
b/ql/src/test/queries/clientpositive/union37.q
new file mode 100644
index 0000000..23c130c
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/union37.q
@@ -0,0 +1,125 @@
+create table l_test1 (id bigint,val string,trans_date string) row format 
delimited fields terminated by ' ' ;
+insert into l_test1 values (1, "table_1", "2016-08-11");
+
+create table l_test2 (id bigint,val string,trans_date string) row format 
delimited fields terminated by ' ' ;  
+insert into l_test2 values (2, "table_2", "2016-08-11");
+
+explain
+select 
+    id,
+    'table_1' ,
+    trans_date
+from l_test1
+union all
+select 
+    id,
+    val,
+    trans_date
+from l_test2 ;
+
+select 
+    id,
+    'table_1' ,
+    trans_date
+from l_test1
+union all
+select 
+    id,
+    val,
+    trans_date
+from l_test2 ;
+
+explain
+select 
+    id,
+    999,
+    'table_1' ,
+    trans_date
+from l_test1
+union all
+select 
+    id,
+    999,
+    val,
+    trans_date
+from l_test2 ;
+
+select 
+    id,
+    999,
+    'table_1' ,
+    trans_date
+from l_test1
+union all
+select 
+    id,
+    999,
+    val,
+    trans_date
+from l_test2 ;
+
+explain
+select 
+    id,
+    999,
+    666,
+    'table_1' ,
+    trans_date
+from l_test1
+union all
+select 
+    id,
+    999,
+    666,
+    val,
+    trans_date
+from l_test2 ;
+
+select 
+    id,
+    999,
+    666,
+    'table_1' ,
+    trans_date
+from l_test1
+union all
+select 
+    id,
+    999,
+    666,
+    val,
+    trans_date
+from l_test2 ;
+
+explain
+select 
+    id,
+    999,
+    'table_1' ,
+    trans_date,
+    '2016-11-11'
+from l_test1
+union all
+select 
+    id,
+    999,
+    val,
+    trans_date,
+    trans_date
+from l_test2 ;
+
+select 
+    id,
+    999,
+    'table_1' ,
+    trans_date,
+    '2016-11-11'
+from l_test1
+union all
+select 
+    id,
+    999,
+    val,
+    trans_date,
+    trans_date
+from l_test2 ;

http://git-wip-us.apache.org/repos/asf/hive/blob/131631e0/ql/src/test/results/clientpositive/correlationoptimizer8.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/correlationoptimizer8.q.out 
b/ql/src/test/results/clientpositive/correlationoptimizer8.q.out
index 18c4aad..4667149 100644
--- a/ql/src/test/results/clientpositive/correlationoptimizer8.q.out
+++ b/ql/src/test/results/clientpositive/correlationoptimizer8.q.out
@@ -103,7 +103,7 @@ STAGE PLANS:
             alias: x
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
-              predicate: (((UDFToDouble(key) > 100.0) or (UDFToDouble(key) < 
20.0)) and key is not null) (type: boolean)
+              predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 
100.0)) and key is not null) (type: boolean)
               Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
@@ -290,7 +290,7 @@ STAGE PLANS:
             alias: x
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
-              predicate: (((UDFToDouble(key) > 100.0) or (UDFToDouble(key) < 
20.0)) and key is not null) (type: boolean)
+              predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 
100.0)) and key is not null) (type: boolean)
               Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)
@@ -963,7 +963,7 @@ STAGE PLANS:
             alias: x
             Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE 
Column stats: NONE
             Filter Operator
-              predicate: (((UDFToDouble(key) > 100.0) or (UDFToDouble(key) < 
20.0)) and key is not null) (type: boolean)
+              predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 
100.0)) and key is not null) (type: boolean)
               Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/131631e0/ql/src/test/results/clientpositive/join34.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join34.q.out 
b/ql/src/test/results/clientpositive/join34.q.out
index ff1c8e1..e8f51ea 100644
--- a/ql/src/test/results/clientpositive/join34.q.out
+++ b/ql/src/test/results/clientpositive/join34.q.out
@@ -55,7 +55,7 @@ STAGE PLANS:
             GatherStats: false
             Filter Operator
               isSamplingPred: false
-              predicate: (((UDFToDouble(key) > 100.0) or (UDFToDouble(key) < 
20.0)) and key is not null) (type: boolean)
+              predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 
100.0)) and key is not null) (type: boolean)
               Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/131631e0/ql/src/test/results/clientpositive/join35.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join35.q.out 
b/ql/src/test/results/clientpositive/join35.q.out
index d766e20..e8d69fd 100644
--- a/ql/src/test/results/clientpositive/join35.q.out
+++ b/ql/src/test/results/clientpositive/join35.q.out
@@ -162,7 +162,7 @@ STAGE PLANS:
             GatherStats: false
             Filter Operator
               isSamplingPred: false
-              predicate: (((UDFToDouble(key) > 100.0) or (UDFToDouble(key) < 
20.0)) and key is not null) (type: boolean)
+              predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 
100.0)) and key is not null) (type: boolean)
               Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE 
Column stats: NONE
               Select Operator
                 expressions: key (type: string), value (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/131631e0/ql/src/test/results/clientpositive/spark/join34.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/join34.q.out 
b/ql/src/test/results/clientpositive/spark/join34.q.out
index d14b28e..2d97046 100644
--- a/ql/src/test/results/clientpositive/spark/join34.q.out
+++ b/ql/src/test/results/clientpositive/spark/join34.q.out
@@ -198,7 +198,7 @@ STAGE PLANS:
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: (((UDFToDouble(key) > 100.0) or 
(UDFToDouble(key) < 20.0)) and key is not null) (type: boolean)
+                    predicate: (((UDFToDouble(key) < 20.0) or 
(UDFToDouble(key) > 100.0)) and key is not null) (type: boolean)
                     Statistics: Num rows: 16 Data size: 122 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/131631e0/ql/src/test/results/clientpositive/spark/join35.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/join35.q.out 
b/ql/src/test/results/clientpositive/spark/join35.q.out
index 21ed82d..80c9998 100644
--- a/ql/src/test/results/clientpositive/spark/join35.q.out
+++ b/ql/src/test/results/clientpositive/spark/join35.q.out
@@ -212,7 +212,7 @@ STAGE PLANS:
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
-                    predicate: (((UDFToDouble(key) > 100.0) or 
(UDFToDouble(key) < 20.0)) and key is not null) (type: boolean)
+                    predicate: (((UDFToDouble(key) < 20.0) or 
(UDFToDouble(key) > 100.0)) and key is not null) (type: boolean)
                     Statistics: Num rows: 16 Data size: 122 Basic stats: 
COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)

http://git-wip-us.apache.org/repos/asf/hive/blob/131631e0/ql/src/test/results/clientpositive/union37.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union37.q.out 
b/ql/src/test/results/clientpositive/union37.q.out
new file mode 100644
index 0000000..e8a6f1d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/union37.q.out
@@ -0,0 +1,522 @@
+PREHOOK: query: create table l_test1 (id bigint,val string,trans_date string) 
row format delimited fields terminated by ' '
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@l_test1
+POSTHOOK: query: create table l_test1 (id bigint,val string,trans_date string) 
row format delimited fields terminated by ' '
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@l_test1
+PREHOOK: query: insert into l_test1 values (1, "table_1", "2016-08-11")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@l_test1
+POSTHOOK: query: insert into l_test1 values (1, "table_1", "2016-08-11")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@l_test1
+POSTHOOK: Lineage: l_test1.id EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: l_test1.trans_date SIMPLE 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
+POSTHOOK: Lineage: l_test1.val SIMPLE 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: create table l_test2 (id bigint,val string,trans_date string) 
row format delimited fields terminated by ' '
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@l_test2
+POSTHOOK: query: create table l_test2 (id bigint,val string,trans_date string) 
row format delimited fields terminated by ' '
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@l_test2
+PREHOOK: query: insert into l_test2 values (2, "table_2", "2016-08-11")
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@l_test2
+POSTHOOK: query: insert into l_test2 values (2, "table_2", "2016-08-11")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@l_test2
+POSTHOOK: Lineage: l_test2.id EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: l_test2.trans_date SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
+POSTHOOK: Lineage: l_test2.val SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: explain
+select 
+    id,
+    'table_1' ,
+    trans_date
+from l_test1
+union all
+select 
+    id,
+    val,
+    trans_date
+from l_test2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select 
+    id,
+    'table_1' ,
+    trans_date
+from l_test1
+union all
+select 
+    id,
+    val,
+    trans_date
+from l_test2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: l_test1
+            Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column 
stats: NONE
+            Select Operator
+              expressions: id (type: bigint), 'table_1' (type: string), 
trans_date (type: string)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE 
Column stats: NONE
+              Union
+                Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          TableScan
+            alias: l_test2
+            Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column 
stats: NONE
+            Select Operator
+              expressions: id (type: bigint), val (type: string), trans_date 
(type: string)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE 
Column stats: NONE
+              Union
+                Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE 
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE 
Column stats: NONE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select 
+    id,
+    'table_1' ,
+    trans_date
+from l_test1
+union all
+select 
+    id,
+    val,
+    trans_date
+from l_test2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@l_test1
+PREHOOK: Input: default@l_test2
+#### A masked pattern was here ####
+POSTHOOK: query: select 
+    id,
+    'table_1' ,
+    trans_date
+from l_test1
+union all
+select 
+    id,
+    val,
+    trans_date
+from l_test2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@l_test1
+POSTHOOK: Input: default@l_test2
+#### A masked pattern was here ####
+1      table_1 2016-08-11
+2      table_2 2016-08-11
+PREHOOK: query: explain
+select 
+    id,
+    999,
+    'table_1' ,
+    trans_date
+from l_test1
+union all
+select 
+    id,
+    999,
+    val,
+    trans_date
+from l_test2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select 
+    id,
+    999,
+    'table_1' ,
+    trans_date
+from l_test1
+union all
+select 
+    id,
+    999,
+    val,
+    trans_date
+from l_test2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: l_test1
+            Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column 
stats: NONE
+            Select Operator
+              expressions: id (type: bigint), 'table_1' (type: string), 
trans_date (type: string)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE 
Column stats: NONE
+              Union
+                Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: bigint), 999 (type: int), _col1 
(type: string), _col2 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 2 Data size: 40 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          TableScan
+            alias: l_test2
+            Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column 
stats: NONE
+            Select Operator
+              expressions: id (type: bigint), val (type: string), trans_date 
(type: string)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE 
Column stats: NONE
+              Union
+                Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: bigint), 999 (type: int), _col1 
(type: string), _col2 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3
+                  Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 2 Data size: 40 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select 
+    id,
+    999,
+    'table_1' ,
+    trans_date
+from l_test1
+union all
+select 
+    id,
+    999,
+    val,
+    trans_date
+from l_test2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@l_test1
+PREHOOK: Input: default@l_test2
+#### A masked pattern was here ####
+POSTHOOK: query: select 
+    id,
+    999,
+    'table_1' ,
+    trans_date
+from l_test1
+union all
+select 
+    id,
+    999,
+    val,
+    trans_date
+from l_test2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@l_test1
+POSTHOOK: Input: default@l_test2
+#### A masked pattern was here ####
+1      999     table_1 2016-08-11
+2      999     table_2 2016-08-11
+PREHOOK: query: explain
+select 
+    id,
+    999,
+    666,
+    'table_1' ,
+    trans_date
+from l_test1
+union all
+select 
+    id,
+    999,
+    666,
+    val,
+    trans_date
+from l_test2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select 
+    id,
+    999,
+    666,
+    'table_1' ,
+    trans_date
+from l_test1
+union all
+select 
+    id,
+    999,
+    666,
+    val,
+    trans_date
+from l_test2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: l_test1
+            Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column 
stats: NONE
+            Select Operator
+              expressions: id (type: bigint), 'table_1' (type: string), 
trans_date (type: string)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE 
Column stats: NONE
+              Union
+                Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: bigint), 999 (type: int), 666 
(type: int), _col1 (type: string), _col2 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 2 Data size: 40 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          TableScan
+            alias: l_test2
+            Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column 
stats: NONE
+            Select Operator
+              expressions: id (type: bigint), val (type: string), trans_date 
(type: string)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE 
Column stats: NONE
+              Union
+                Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: bigint), 999 (type: int), 666 
(type: int), _col1 (type: string), _col2 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 2 Data size: 40 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select 
+    id,
+    999,
+    666,
+    'table_1' ,
+    trans_date
+from l_test1
+union all
+select 
+    id,
+    999,
+    666,
+    val,
+    trans_date
+from l_test2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@l_test1
+PREHOOK: Input: default@l_test2
+#### A masked pattern was here ####
+POSTHOOK: query: select 
+    id,
+    999,
+    666,
+    'table_1' ,
+    trans_date
+from l_test1
+union all
+select 
+    id,
+    999,
+    666,
+    val,
+    trans_date
+from l_test2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@l_test1
+POSTHOOK: Input: default@l_test2
+#### A masked pattern was here ####
+1      999     666     table_1 2016-08-11
+2      999     666     table_2 2016-08-11
+PREHOOK: query: explain
+select 
+    id,
+    999,
+    'table_1' ,
+    trans_date,
+    '2016-11-11'
+from l_test1
+union all
+select 
+    id,
+    999,
+    val,
+    trans_date,
+    trans_date
+from l_test2
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select 
+    id,
+    999,
+    'table_1' ,
+    trans_date,
+    '2016-11-11'
+from l_test1
+union all
+select 
+    id,
+    999,
+    val,
+    trans_date,
+    trans_date
+from l_test2
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: l_test1
+            Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column 
stats: NONE
+            Select Operator
+              expressions: id (type: bigint), 'table_1' (type: string), 
trans_date (type: string), '2016-11-11' (type: string)
+              outputColumnNames: _col0, _col2, _col3, _col4
+              Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE 
Column stats: NONE
+              Union
+                Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: bigint), 999 (type: int), _col2 
(type: string), _col3 (type: string), _col4 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 2 Data size: 40 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+          TableScan
+            alias: l_test2
+            Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column 
stats: NONE
+            Select Operator
+              expressions: id (type: bigint), val (type: string), trans_date 
(type: string), trans_date (type: string)
+              outputColumnNames: _col0, _col2, _col3, _col4
+              Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE 
Column stats: NONE
+              Union
+                Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE 
Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: bigint), 999 (type: int), _col2 
(type: string), _col3 (type: string), _col4 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4
+                  Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE 
Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 2 Data size: 40 Basic stats: 
COMPLETE Column stats: NONE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select 
+    id,
+    999,
+    'table_1' ,
+    trans_date,
+    '2016-11-11'
+from l_test1
+union all
+select 
+    id,
+    999,
+    val,
+    trans_date,
+    trans_date
+from l_test2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@l_test1
+PREHOOK: Input: default@l_test2
+#### A masked pattern was here ####
+POSTHOOK: query: select 
+    id,
+    999,
+    'table_1' ,
+    trans_date,
+    '2016-11-11'
+from l_test1
+union all
+select 
+    id,
+    999,
+    val,
+    trans_date,
+    trans_date
+from l_test2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@l_test1
+POSTHOOK: Input: default@l_test2
+#### A masked pattern was here ####
+1      999     table_1 2016-08-11      2016-11-11
+2      999     table_2 2016-08-11      2016-08-11

[10/38] hive git commit: HIVE-14530: Union All query returns incorrect results (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Reply via email to