HIVE-14530: Union All query returns incorrect results (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/131631e0 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/131631e0 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/131631e0 Branch: refs/heads/hive-14535 Commit: 131631e0e5275e8408a4db48e568573aaf220141 Parents: 349445c Author: Jesus Camacho Rodriguez <jcama...@apache.org> Authored: Fri Sep 2 07:58:36 2016 +0100 Committer: Jesus Camacho Rodriguez <jcama...@apache.org> Committed: Fri Sep 2 07:58:36 2016 +0100 ---------------------------------------------------------------------- .../calcite/stats/HiveRelMdPredicates.java | 24 +- ql/src/test/queries/clientpositive/union37.q | 125 +++++ .../clientpositive/correlationoptimizer8.q.out | 6 +- ql/src/test/results/clientpositive/join34.q.out | 2 +- ql/src/test/results/clientpositive/join35.q.out | 2 +- .../results/clientpositive/spark/join34.q.out | 2 +- .../results/clientpositive/spark/join35.q.out | 2 +- .../test/results/clientpositive/union37.q.out | 522 +++++++++++++++++++ 8 files changed, 670 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/131631e0/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java index 09e0fc1..e468573 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java @@ -19,9 +19,9 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.stats; import java.util.ArrayList; import java.util.BitSet; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -213,15 +213,16 @@ public class HiveRelMdPredicates extends RelMdPredicates { public RelOptPredicateList getPredicates(Union union, RelMetadataQuery mq) { RexBuilder rB = union.getCluster().getRexBuilder(); - Map<String, RexNode> finalPreds = new LinkedHashMap<>(); - Map<String, RexNode> finalResidualPreds = new LinkedHashMap<>(); + Map<String, RexNode> finalPreds = new HashMap<>(); + List<RexNode> finalResidualPreds = new ArrayList<>(); for (int i = 0; i < union.getInputs().size(); i++) { RelNode input = union.getInputs().get(i); RelOptPredicateList info = mq.getPulledUpPredicates(input); if (info.pulledUpPredicates.isEmpty()) { return RelOptPredicateList.EMPTY; } - Map<String, RexNode> preds = new LinkedHashMap<>(); + Map<String, RexNode> preds = new HashMap<>(); + List<RexNode> residualPreds = new ArrayList<>(); for (RexNode pred : info.pulledUpPredicates) { final String predString = pred.toString(); if (i == 0) { @@ -231,21 +232,28 @@ public class HiveRelMdPredicates extends RelMdPredicates { if (finalPreds.containsKey(predString)) { preds.put(predString, pred); } else { - finalResidualPreds.put(predString, pred); + residualPreds.add(pred); } } + // Add new residual preds + finalResidualPreds.add(RexUtil.composeConjunction(rB, residualPreds, false)); // Add those that are not part of the final set to residual for (Entry<String, RexNode> e : finalPreds.entrySet()) { if (!preds.containsKey(e.getKey())) { - finalResidualPreds.put(e.getKey(), e.getValue()); + // This node was in previous union inputs, but it is not in this one + for (int j = 0; j < i; j++) { + finalResidualPreds.set(j, RexUtil.composeConjunction(rB, Lists.newArrayList( + finalResidualPreds.get(j), e.getValue()), false)); + } } } + // Final preds finalPreds = preds; } List<RexNode> preds = new ArrayList<>(finalPreds.values()); - RexNode disjPred = RexUtil.composeDisjunction(rB, finalResidualPreds.values(), true); - if (disjPred != null) { + RexNode disjPred = RexUtil.composeDisjunction(rB, finalResidualPreds, false); + if (!disjPred.isAlwaysTrue()) { preds.add(disjPred); } return RelOptPredicateList.of(preds); http://git-wip-us.apache.org/repos/asf/hive/blob/131631e0/ql/src/test/queries/clientpositive/union37.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/union37.q b/ql/src/test/queries/clientpositive/union37.q new file mode 100644 index 0000000..23c130c --- /dev/null +++ b/ql/src/test/queries/clientpositive/union37.q @@ -0,0 +1,125 @@ +create table l_test1 (id bigint,val string,trans_date string) row format delimited fields terminated by ' ' ; +insert into l_test1 values (1, "table_1", "2016-08-11"); + +create table l_test2 (id bigint,val string,trans_date string) row format delimited fields terminated by ' ' ; +insert into l_test2 values (2, "table_2", "2016-08-11"); + +explain +select + id, + 'table_1' , + trans_date +from l_test1 +union all +select + id, + val, + trans_date +from l_test2 ; + +select + id, + 'table_1' , + trans_date +from l_test1 +union all +select + id, + val, + trans_date +from l_test2 ; + +explain +select + id, + 999, + 'table_1' , + trans_date +from l_test1 +union all +select + id, + 999, + val, + trans_date +from l_test2 ; + +select + id, + 999, + 'table_1' , + trans_date +from l_test1 +union all +select + id, + 999, + val, + trans_date +from l_test2 ; + +explain +select + id, + 999, + 666, + 'table_1' , + trans_date +from l_test1 +union all +select + id, + 999, + 666, + val, + trans_date +from l_test2 ; + +select + id, + 999, + 666, + 'table_1' , + trans_date +from l_test1 +union all +select + id, + 999, + 666, + val, + trans_date +from l_test2 ; + +explain +select + id, + 999, + 'table_1' , + trans_date, + '2016-11-11' +from l_test1 +union all +select + id, + 999, + val, + trans_date, + trans_date +from l_test2 ; + +select + id, + 999, + 'table_1' , + trans_date, + '2016-11-11' +from l_test1 +union all +select + id, + 999, + val, + trans_date, + trans_date +from l_test2 ; http://git-wip-us.apache.org/repos/asf/hive/blob/131631e0/ql/src/test/results/clientpositive/correlationoptimizer8.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/correlationoptimizer8.q.out b/ql/src/test/results/clientpositive/correlationoptimizer8.q.out index 18c4aad..4667149 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer8.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer8.q.out @@ -103,7 +103,7 @@ STAGE PLANS: alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((UDFToDouble(key) > 100.0) or (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean) + predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0)) and key is not null) (type: boolean) Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -290,7 +290,7 @@ STAGE PLANS: alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((UDFToDouble(key) > 100.0) or (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean) + predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0)) and key is not null) (type: boolean) Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -963,7 +963,7 @@ STAGE PLANS: alias: x Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (((UDFToDouble(key) > 100.0) or (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean) + predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0)) and key is not null) (type: boolean) Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/131631e0/ql/src/test/results/clientpositive/join34.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/join34.q.out b/ql/src/test/results/clientpositive/join34.q.out index ff1c8e1..e8f51ea 100644 --- a/ql/src/test/results/clientpositive/join34.q.out +++ b/ql/src/test/results/clientpositive/join34.q.out @@ -55,7 +55,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (((UDFToDouble(key) > 100.0) or (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean) + predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0)) and key is not null) (type: boolean) Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/131631e0/ql/src/test/results/clientpositive/join35.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/join35.q.out b/ql/src/test/results/clientpositive/join35.q.out index d766e20..e8d69fd 100644 --- a/ql/src/test/results/clientpositive/join35.q.out +++ b/ql/src/test/results/clientpositive/join35.q.out @@ -162,7 +162,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (((UDFToDouble(key) > 100.0) or (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean) + predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0)) and key is not null) (type: boolean) Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/131631e0/ql/src/test/results/clientpositive/spark/join34.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/join34.q.out b/ql/src/test/results/clientpositive/spark/join34.q.out index d14b28e..2d97046 100644 --- a/ql/src/test/results/clientpositive/spark/join34.q.out +++ b/ql/src/test/results/clientpositive/spark/join34.q.out @@ -198,7 +198,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (((UDFToDouble(key) > 100.0) or (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean) + predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0)) and key is not null) (type: boolean) Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/131631e0/ql/src/test/results/clientpositive/spark/join35.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/join35.q.out b/ql/src/test/results/clientpositive/spark/join35.q.out index 21ed82d..80c9998 100644 --- a/ql/src/test/results/clientpositive/spark/join35.q.out +++ b/ql/src/test/results/clientpositive/spark/join35.q.out @@ -212,7 +212,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (((UDFToDouble(key) > 100.0) or (UDFToDouble(key) < 20.0)) and key is not null) (type: boolean) + predicate: (((UDFToDouble(key) < 20.0) or (UDFToDouble(key) > 100.0)) and key is not null) (type: boolean) Statistics: Num rows: 16 Data size: 122 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) http://git-wip-us.apache.org/repos/asf/hive/blob/131631e0/ql/src/test/results/clientpositive/union37.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/union37.q.out b/ql/src/test/results/clientpositive/union37.q.out new file mode 100644 index 0000000..e8a6f1d --- /dev/null +++ b/ql/src/test/results/clientpositive/union37.q.out @@ -0,0 +1,522 @@ +PREHOOK: query: create table l_test1 (id bigint,val string,trans_date string) row format delimited fields terminated by ' ' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@l_test1 +POSTHOOK: query: create table l_test1 (id bigint,val string,trans_date string) row format delimited fields terminated by ' ' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@l_test1 +PREHOOK: query: insert into l_test1 values (1, "table_1", "2016-08-11") +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@l_test1 +POSTHOOK: query: insert into l_test1 values (1, "table_1", "2016-08-11") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@l_test1 +POSTHOOK: Lineage: l_test1.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: l_test1.trans_date SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: l_test1.val SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: create table l_test2 (id bigint,val string,trans_date string) row format delimited fields terminated by ' ' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@l_test2 +POSTHOOK: query: create table l_test2 (id bigint,val string,trans_date string) row format delimited fields terminated by ' ' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@l_test2 +PREHOOK: query: insert into l_test2 values (2, "table_2", "2016-08-11") +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@l_test2 +POSTHOOK: query: insert into l_test2 values (2, "table_2", "2016-08-11") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@l_test2 +POSTHOOK: Lineage: l_test2.id EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: l_test2.trans_date SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: l_test2.val SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: explain +select + id, + 'table_1' , + trans_date +from l_test1 +union all +select + id, + val, + trans_date +from l_test2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + id, + 'table_1' , + trans_date +from l_test1 +union all +select + id, + val, + trans_date +from l_test2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: l_test1 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: bigint), 'table_1' (type: string), trans_date (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: l_test2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: bigint), val (type: string), trans_date (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + id, + 'table_1' , + trans_date +from l_test1 +union all +select + id, + val, + trans_date +from l_test2 +PREHOOK: type: QUERY +PREHOOK: Input: default@l_test1 +PREHOOK: Input: default@l_test2 +#### A masked pattern was here #### +POSTHOOK: query: select + id, + 'table_1' , + trans_date +from l_test1 +union all +select + id, + val, + trans_date +from l_test2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@l_test1 +POSTHOOK: Input: default@l_test2 +#### A masked pattern was here #### +1 table_1 2016-08-11 +2 table_2 2016-08-11 +PREHOOK: query: explain +select + id, + 999, + 'table_1' , + trans_date +from l_test1 +union all +select + id, + 999, + val, + trans_date +from l_test2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + id, + 999, + 'table_1' , + trans_date +from l_test1 +union all +select + id, + 999, + val, + trans_date +from l_test2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: l_test1 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: bigint), 'table_1' (type: string), trans_date (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), 999 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: l_test2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: bigint), val (type: string), trans_date (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), 999 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + id, + 999, + 'table_1' , + trans_date +from l_test1 +union all +select + id, + 999, + val, + trans_date +from l_test2 +PREHOOK: type: QUERY +PREHOOK: Input: default@l_test1 +PREHOOK: Input: default@l_test2 +#### A masked pattern was here #### +POSTHOOK: query: select + id, + 999, + 'table_1' , + trans_date +from l_test1 +union all +select + id, + 999, + val, + trans_date +from l_test2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@l_test1 +POSTHOOK: Input: default@l_test2 +#### A masked pattern was here #### +1 999 table_1 2016-08-11 +2 999 table_2 2016-08-11 +PREHOOK: query: explain +select + id, + 999, + 666, + 'table_1' , + trans_date +from l_test1 +union all +select + id, + 999, + 666, + val, + trans_date +from l_test2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + id, + 999, + 666, + 'table_1' , + trans_date +from l_test1 +union all +select + id, + 999, + 666, + val, + trans_date +from l_test2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: l_test1 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: bigint), 'table_1' (type: string), trans_date (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), 999 (type: int), 666 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: l_test2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: bigint), val (type: string), trans_date (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), 999 (type: int), 666 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + id, + 999, + 666, + 'table_1' , + trans_date +from l_test1 +union all +select + id, + 999, + 666, + val, + trans_date +from l_test2 +PREHOOK: type: QUERY +PREHOOK: Input: default@l_test1 +PREHOOK: Input: default@l_test2 +#### A masked pattern was here #### +POSTHOOK: query: select + id, + 999, + 666, + 'table_1' , + trans_date +from l_test1 +union all +select + id, + 999, + 666, + val, + trans_date +from l_test2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@l_test1 +POSTHOOK: Input: default@l_test2 +#### A masked pattern was here #### +1 999 666 table_1 2016-08-11 +2 999 666 table_2 2016-08-11 +PREHOOK: query: explain +select + id, + 999, + 'table_1' , + trans_date, + '2016-11-11' +from l_test1 +union all +select + id, + 999, + val, + trans_date, + trans_date +from l_test2 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select + id, + 999, + 'table_1' , + trans_date, + '2016-11-11' +from l_test1 +union all +select + id, + 999, + val, + trans_date, + trans_date +from l_test2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: l_test1 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: bigint), 'table_1' (type: string), trans_date (type: string), '2016-11-11' (type: string) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), 999 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: l_test2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: bigint), val (type: string), trans_date (type: string), trans_date (type: string) + outputColumnNames: _col0, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), 999 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select + id, + 999, + 'table_1' , + trans_date, + '2016-11-11' +from l_test1 +union all +select + id, + 999, + val, + trans_date, + trans_date +from l_test2 +PREHOOK: type: QUERY +PREHOOK: Input: default@l_test1 +PREHOOK: Input: default@l_test2 +#### A masked pattern was here #### +POSTHOOK: query: select + id, + 999, + 'table_1' , + trans_date, + '2016-11-11' +from l_test1 +union all +select + id, + 999, + val, + trans_date, + trans_date +from l_test2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@l_test1 +POSTHOOK: Input: default@l_test2 +#### A masked pattern was here #### +1 999 table_1 2016-08-11 2016-11-11 +2 999 table_2 2016-08-11 2016-08-11