HIVE-15848: count or sum distinct incorrect when hive.optimize.reducededuplication set to true (Zoltan Haindrich reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b9ad6dc3 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b9ad6dc3 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b9ad6dc3 Branch: refs/heads/hive-14535 Commit: b9ad6dc3867efce4dd833e519c788aab280dabd9 Parents: 3e94fb2 Author: Haindrich Zoltán (kirk) <k...@rxd.hu> Authored: Wed Mar 1 22:30:13 2017 +0100 Committer: Haindrich Zoltán (kirk) <k...@rxd.hu> Committed: Wed Mar 1 22:35:49 2017 +0100 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../correlation/ReduceSinkDeDuplication.java | 4 + .../reduce_deduplicate_distinct.q | 54 +++ .../llap/reduce_deduplicate_distinct.q.out | 483 +++++++++++++++++++ 4 files changed, 542 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/b9ad6dc3/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 807b124..9c6a069 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -413,6 +413,7 @@ minillap.query.files=acid_bucket_pruning.q,\ llap_udf.q,\ llapdecider.q,\ reduce_deduplicate.q,\ + reduce_deduplicate_distinct.q, \ remote_script.q,\ tez_aggr_part_stats.q,\ tez_union_view.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/b9ad6dc3/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java index d53efbf..2b075be 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java @@ -360,6 +360,10 @@ public class ReduceSinkDeDuplication extends Transform { if (moveRSOrderTo == null) { return null; } + // if cRS is being used for distinct - the two reduce sinks are incompatible + if (cConf.getDistinctColumnIndices().size() >= 2) { + return null; + } Integer moveReducerNumTo = checkNumReducer(cConf.getNumReducers(), pConf.getNumReducers()); if (moveReducerNumTo == null || moveReducerNumTo > 0 && cConf.getNumReducers() < minReducer) { http://git-wip-us.apache.org/repos/asf/hive/blob/b9ad6dc3/ql/src/test/queries/clientpositive/reduce_deduplicate_distinct.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/reduce_deduplicate_distinct.q b/ql/src/test/queries/clientpositive/reduce_deduplicate_distinct.q new file mode 100644 index 0000000..840025c --- /dev/null +++ b/ql/src/test/queries/clientpositive/reduce_deduplicate_distinct.q @@ -0,0 +1,54 @@ +create table count_distinct_test(id int,key int,name int); + +insert into count_distinct_test values (1,1,2),(1,2,3),(1,3,2),(1,4,2),(1,5,3); + +-- simple case; no need for opt +explain select id,count(distinct key),count(distinct name) +from count_distinct_test +group by id; + +select id,count(distinct key),count(distinct name) +from count_distinct_test +group by id; + +-- dedup on +set hive.optimize.reducededuplication=true; + +-- candidate1 +explain select id,count(Distinct key),count(Distinct name) +from (select id,key,name from count_distinct_test group by id,key,name)m +group by id; + +select id,count(Distinct key),count(Distinct name) +from (select id,key,name from count_distinct_test group by id,key,name)m +group by id; + +-- candidate2 +explain select id,count(Distinct name),count(Distinct key) +from (select id,key,name from count_distinct_test group by id,name,key)m +group by id; + +select id,count(Distinct name),count(Distinct key) +from (select id,key,name from count_distinct_test group by id,name,key)m +group by id; + +-- deduplication off +set hive.optimize.reducededuplication=false; + +-- candidate1 +explain select id,count(Distinct key),count(Distinct name) +from (select id,key,name from count_distinct_test group by id,key,name)m +group by id; + +select id,count(Distinct key),count(Distinct name) +from (select id,key,name from count_distinct_test group by id,key,name)m +group by id; + +-- candidate2 +explain select id,count(Distinct name),count(Distinct key) +from (select id,key,name from count_distinct_test group by id,name,key)m +group by id; + +select id,count(Distinct name),count(Distinct key) +from (select id,key,name from count_distinct_test group by id,name,key)m +group by id; http://git-wip-us.apache.org/repos/asf/hive/blob/b9ad6dc3/ql/src/test/results/clientpositive/llap/reduce_deduplicate_distinct.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/reduce_deduplicate_distinct.q.out b/ql/src/test/results/clientpositive/llap/reduce_deduplicate_distinct.q.out new file mode 100644 index 0000000..e5b8d11 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/reduce_deduplicate_distinct.q.out @@ -0,0 +1,483 @@ +PREHOOK: query: create table count_distinct_test(id int,key int,name int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@count_distinct_test +POSTHOOK: query: create table count_distinct_test(id int,key int,name int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@count_distinct_test +PREHOOK: query: insert into count_distinct_test values (1,1,2),(1,2,3),(1,3,2),(1,4,2),(1,5,3) +PREHOOK: type: QUERY +PREHOOK: Output: default@count_distinct_test +POSTHOOK: query: insert into count_distinct_test values (1,1,2),(1,2,3),(1,3,2),(1,4,2),(1,5,3) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@count_distinct_test +POSTHOOK: Lineage: count_distinct_test.id EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: count_distinct_test.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: count_distinct_test.name EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: explain select id,count(distinct key),count(distinct name) +from count_distinct_test +group by id +PREHOOK: type: QUERY +POSTHOOK: query: explain select id,count(distinct key),count(distinct name) +from count_distinct_test +group by id +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: count_distinct_test + Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), key (type: int), name (type: int) + outputColumnNames: id, key, name + Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT key), count(DISTINCT name) + keys: id (type: int), key (type: int), name (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select id,count(distinct key),count(distinct name) +from count_distinct_test +group by id +PREHOOK: type: QUERY +PREHOOK: Input: default@count_distinct_test +#### A masked pattern was here #### +POSTHOOK: query: select id,count(distinct key),count(distinct name) +from count_distinct_test +group by id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@count_distinct_test +#### A masked pattern was here #### +1 5 2 +PREHOOK: query: explain select id,count(Distinct key),count(Distinct name) +from (select id,key,name from count_distinct_test group by id,key,name)m +group by id +PREHOOK: type: QUERY +POSTHOOK: query: explain select id,count(Distinct key),count(Distinct name) +from (select id,key,name from count_distinct_test group by id,key,name)m +group by id +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: count_distinct_test + Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), key (type: int), name (type: int) + outputColumnNames: id, key, name + Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: id (type: int), key (type: int), name (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col1), count(DISTINCT _col2) + keys: _col0 (type: int), _col1 (type: int), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select id,count(Distinct key),count(Distinct name) +from (select id,key,name from count_distinct_test group by id,key,name)m +group by id +PREHOOK: type: QUERY +PREHOOK: Input: default@count_distinct_test +#### A masked pattern was here #### +POSTHOOK: query: select id,count(Distinct key),count(Distinct name) +from (select id,key,name from count_distinct_test group by id,key,name)m +group by id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@count_distinct_test +#### A masked pattern was here #### +1 5 2 +PREHOOK: query: explain select id,count(Distinct name),count(Distinct key) +from (select id,key,name from count_distinct_test group by id,name,key)m +group by id +PREHOOK: type: QUERY +POSTHOOK: query: explain select id,count(Distinct name),count(Distinct key) +from (select id,key,name from count_distinct_test group by id,name,key)m +group by id +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: count_distinct_test + Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), key (type: int), name (type: int) + outputColumnNames: id, key, name + Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: id (type: int), key (type: int), name (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col2), count(DISTINCT _col1) + keys: _col0 (type: int), _col2 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select id,count(Distinct name),count(Distinct key) +from (select id,key,name from count_distinct_test group by id,name,key)m +group by id +PREHOOK: type: QUERY +PREHOOK: Input: default@count_distinct_test +#### A masked pattern was here #### +POSTHOOK: query: select id,count(Distinct name),count(Distinct key) +from (select id,key,name from count_distinct_test group by id,name,key)m +group by id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@count_distinct_test +#### A masked pattern was here #### +1 2 5 +PREHOOK: query: explain select id,count(Distinct key),count(Distinct name) +from (select id,key,name from count_distinct_test group by id,key,name)m +group by id +PREHOOK: type: QUERY +POSTHOOK: query: explain select id,count(Distinct key),count(Distinct name) +from (select id,key,name from count_distinct_test group by id,key,name)m +group by id +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: count_distinct_test + Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), key (type: int), name (type: int) + outputColumnNames: id, key, name + Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: id (type: int), key (type: int), name (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col1), count(DISTINCT _col2) + keys: _col0 (type: int), _col1 (type: int), _col2 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select id,count(Distinct key),count(Distinct name) +from (select id,key,name from count_distinct_test group by id,key,name)m +group by id +PREHOOK: type: QUERY +PREHOOK: Input: default@count_distinct_test +#### A masked pattern was here #### +POSTHOOK: query: select id,count(Distinct key),count(Distinct name) +from (select id,key,name from count_distinct_test group by id,key,name)m +group by id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@count_distinct_test +#### A masked pattern was here #### +1 5 2 +PREHOOK: query: explain select id,count(Distinct name),count(Distinct key) +from (select id,key,name from count_distinct_test group by id,name,key)m +group by id +PREHOOK: type: QUERY +POSTHOOK: query: explain select id,count(Distinct name),count(Distinct key) +from (select id,key,name from count_distinct_test group by id,name,key)m +group by id +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: count_distinct_test + Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: id (type: int), key (type: int), name (type: int) + outputColumnNames: id, key, name + Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: id (type: int), key (type: int), name (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) + Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col2), count(DISTINCT _col1) + keys: _col0 (type: int), _col2 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select id,count(Distinct name),count(Distinct key) +from (select id,key,name from count_distinct_test group by id,name,key)m +group by id +PREHOOK: type: QUERY +PREHOOK: Input: default@count_distinct_test +#### A masked pattern was here #### +POSTHOOK: query: select id,count(Distinct name),count(Distinct key) +from (select id,key,name from count_distinct_test group by id,name,key)m +group by id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@count_distinct_test +#### A masked pattern was here #### +1 2 5