This is an automated email from the ASF dual-hosted git repository. krisztiankasa pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new f265cc25905 HIVE-27876 Incorrect query results on tables with ClusterBy & SortBy (Ramesh Kumar Thangarajan, reviewed by Krisztian Kasa, Attila Turoczy) f265cc25905 is described below commit f265cc25905d0bdbdc65a16720e33fb21ee79da9 Author: Ramesh Kumar <rameshkumarthangara...@gmail.com> AuthorDate: Wed Dec 20 01:02:36 2023 -0800 HIVE-27876 Incorrect query results on tables with ClusterBy & SortBy (Ramesh Kumar Thangarajan, reviewed by Krisztian Kasa, Attila Turoczy) --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 6 +- .../queries/clientpositive/groupby_sort_2_23.q | 10 + .../results/clientpositive/cbo_rp_auto_join1.q.out | 457 ++++++++++++++------- .../llap/auto_sortmerge_join_10.q.out | 295 ++++++++----- .../clientpositive/llap/bucket_groupby.q.out | 89 +++- .../clientpositive/llap/groupby_sort_2_23.q.out | 180 ++++++++ 6 files changed, 779 insertions(+), 258 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 714df4c22a9..1fa63ae3821 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2019,10 +2019,10 @@ public class HiveConf extends Configuration { HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true, "Whether to optimize multi group by query to generate single M/R job plan. If the multi group by query has \n" + "common group by keys, it will be optimized to generate single M/R job."), - HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", true, + HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", false, "If the bucketing/sorting properties of the table exactly match the grouping key, whether to perform \n" + - "the group by in the mapper by using BucketizedHiveInputFormat. The only downside to this\n" + - "is that it limits the number of mappers to the number of files."), + "the group by in the mapper by using BucketizedHiveInputFormat. This can only work if the number of files to be\n" + + "processed is exactly 1. The downside to this is that it limits the number of mappers to the number of files."), HIVE_DEFAULT_NULLS_LAST("hive.default.nulls.last", true, "Whether to set NULLS LAST as the default null ordering for ASC order and " + "NULLS FIRST for DESC order."), diff --git a/ql/src/test/queries/clientpositive/groupby_sort_2_23.q b/ql/src/test/queries/clientpositive/groupby_sort_2_23.q new file mode 100644 index 00000000000..b241bee6855 --- /dev/null +++ b/ql/src/test/queries/clientpositive/groupby_sort_2_23.q @@ -0,0 +1,10 @@ +set hive.mapred.mode=nonstrict; +set hive.map.aggr=true; +set hive.explain.user=false; + +create table test_bucket(age int, name string, dept string) clustered by (age, name) sorted by (age asc, name asc) into 2 buckets stored as ORC; +insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2'); +insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2'); + +explain vectorization detail select age, name, count(*) from test_bucket group by age, name having count(*) > 1; +select age, name, count(*) from test_bucket group by age, name having count(*) > 1; diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out index 8f3788d40fa..5bdf0edc2b4 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out @@ -92,8 +92,10 @@ POSTHOOK: Input: default@tbl2_n12 #### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -112,49 +114,53 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + bucketGroup: true keys: key (type: int) - mode: final + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: key, $f1 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: $f1 (type: bigint) + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan - alias: subq1:b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: key - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: key (type: int) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: key, $f1 - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: $f1 (type: bigint) + Reduce Output Operator + key expressions: key (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: key (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) Reduce Operator Tree: Join Operator condition map: @@ -181,7 +187,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -205,6 +211,50 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: subq1:b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -255,7 +305,8 @@ POSTHOOK: Input: default@tbl2_n12 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -273,20 +324,42 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + bucketGroup: true keys: key (type: int) - mode: final + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: key + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: subq2:subq1:b filterExpr: key is not null (type: boolean) @@ -325,7 +398,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -421,16 +494,20 @@ POSTHOOK: Input: default@tbl1_n13 POSTHOOK: Input: default@tbl2_n12 #### A masked pattern was here #### STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3, Stage-5 Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-0 depends on stages: Stage-1 + Stage-4 is a root stage + Stage-5 depends on stages: Stage-4, Stage-6 + Stage-6 is a root stage + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: src1:subq1:a + alias: src2:subq2:a filterExpr: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -442,23 +519,89 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + bucketGroup: true keys: key (type: int) - mode: final + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: key, $f1 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: $f1 (type: bigint) + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan - alias: src1:subq1:b + Reduce Output Operator + key expressions: key (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: key (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: key (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint), $f10 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: key, $f1, $f10, $f11, $f100 + Statistics: Num rows: 6 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), ($f11 * $f100) (type: bigint), ($f1 * $f10) (type: bigint) + outputColumnNames: key, cnt1, cnt11 + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src2:subq2:b filterExpr: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -470,30 +613,27 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + bucketGroup: true keys: key (type: int) - mode: final + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: key, $f1 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: $f1 (type: bigint) + value expressions: _col1 (type: bigint) + Execution mode: vectorized Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: key, $f1, $f10 - Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -501,11 +641,11 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - alias: src2:subq2:a + alias: src1:subq1:a filterExpr: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -517,23 +657,74 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + bucketGroup: true keys: key (type: int) - mode: final + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: key, $f1 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: $f1 (type: bigint) + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: TableScan - alias: src2:subq2:b + Reduce Output Operator + key expressions: key (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: key (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: key, $f1, $f10 + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: src1:subq1:b filterExpr: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator @@ -545,51 +736,33 @@ STAGE PLANS: Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() + bucketGroup: true keys: key (type: int) - mode: final + minReductionHashAggr: 0.99 + mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col1 (type: bigint) - outputColumnNames: key, $f1 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: $f1 (type: bigint) - TableScan - Reduce Output Operator - key expressions: key (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: $f1 (type: bigint), $f10 (type: bigint) + value expressions: _col1 (type: bigint) + Execution mode: vectorized Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - outputColumnNames: key, $f1, $f10, $f11, $f100 - Statistics: Num rows: 6 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int), ($f11 * $f100) (type: bigint), ($f1 * $f10) (type: bigint) - outputColumnNames: key, cnt1, cnt11 - Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out index 7b93c1d6a54..35538b91174 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out @@ -372,7 +372,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -384,11 +386,22 @@ STAGE PLANS: Filter Operator predicate: (key < 6) (type: boolean) Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) + Group By Operator + bucketGroup: true + keys: key (type: int) + minReductionHashAggr: 0.4 + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Dummy Store + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 5 Map Operator Tree: TableScan alias: a @@ -397,31 +410,54 @@ STAGE PLANS: Filter Operator predicate: (key < 6) (type: boolean) Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: int) - mode: final + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - minReductionHashAggr: 0.85714287 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: llap + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + minReductionHashAggr: 0.85714287 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -492,6 +528,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -503,11 +541,22 @@ STAGE PLANS: Filter Operator predicate: (key < 6) (type: boolean) Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) + Group By Operator + bucketGroup: true + keys: key (type: int) + minReductionHashAggr: 0.4 + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Dummy Store + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 5 Map Operator Tree: TableScan alias: a @@ -516,30 +565,53 @@ STAGE PLANS: Filter Operator predicate: (key < 6) (type: boolean) Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: key (type: int) - mode: final + Select Operator + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: llap + Execution mode: vectorized, llap + LLAP IO: all inputs Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Statistics: Num rows: 7 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -717,10 +789,29 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 Map Operator Tree: TableScan alias: t2 @@ -730,51 +821,50 @@ STAGE PLANS: predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + bucketGroup: true keys: key (type: int) - mode: final + minReductionHashAggr: 0.4 + mode: hash outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: true (type: boolean), _col0 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE - Dummy Store - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col1 is null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) + expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Merge Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col1 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: _col1 is null (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col0) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col0) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint) - Execution mode: llap - Reducer 2 + value expressions: _col0 (type: bigint) + Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -789,6 +879,25 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: true (type: boolean), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: boolean) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out b/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out index e0a6a183faa..f85c3bbf22c 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out @@ -1512,30 +1512,79 @@ POSTHOOK: Input: default@clustergroupby POSTHOOK: Input: default@clustergroupby@ds=102 #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: clustergroupby + filterExpr: (ds = '102') (type: boolean) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: key (type: string), value (type: string) + null sort order: zz + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + top n: 10 + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: string), value (type: string) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 316 Data size: 58776 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 316 Data size: 58776 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 316 Data size: 58776 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1860 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Fetch Operator limit: 10 Processor Tree: - TableScan - alias: clustergroupby - filterExpr: (ds = '102') (type: boolean) - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Group By Operator - aggregations: count() - keys: key (type: string), value (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Limit - Number of rows: 10 - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1 - ListSink + ListSink PREHOOK: query: select key, count(1) from clustergroupby where ds='102' group by key, value limit 10 PREHOOK: type: QUERY @@ -1547,7 +1596,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@clustergroupby POSTHOOK: Input: default@clustergroupby@ds=102 #### A masked pattern was here #### -0 3 10 1 100 2 103 2 @@ -1555,8 +1603,9 @@ POSTHOOK: Input: default@clustergroupby@ds=102 105 1 11 1 111 1 -113 2 114 1 +0 3 +113 2 PREHOOK: query: drop table clustergroupby PREHOOK: type: DROPTABLE PREHOOK: Input: default@clustergroupby diff --git a/ql/src/test/results/clientpositive/llap/groupby_sort_2_23.q.out b/ql/src/test/results/clientpositive/llap/groupby_sort_2_23.q.out new file mode 100644 index 00000000000..780cb6ccd27 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/groupby_sort_2_23.q.out @@ -0,0 +1,180 @@ +PREHOOK: query: create table test_bucket(age int, name string, dept string) clustered by (age, name) sorted by (age asc, name asc) into 2 buckets stored as ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_bucket +POSTHOOK: query: create table test_bucket(age int, name string, dept string) clustered by (age, name) sorted by (age asc, name asc) into 2 buckets stored as ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_bucket +PREHOOK: query: insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_bucket +POSTHOOK: query: insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_bucket +POSTHOOK: Lineage: test_bucket.age SCRIPT [] +POSTHOOK: Lineage: test_bucket.dept SCRIPT [] +POSTHOOK: Lineage: test_bucket.name SCRIPT [] +PREHOOK: query: insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_bucket +POSTHOOK: query: insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_bucket +POSTHOOK: Lineage: test_bucket.age SCRIPT [] +POSTHOOK: Lineage: test_bucket.dept SCRIPT [] +POSTHOOK: Lineage: test_bucket.name SCRIPT [] +PREHOOK: query: explain vectorization detail select age, name, count(*) from test_bucket group by age, name having count(*) > 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_bucket +#### A masked pattern was here #### +POSTHOOK: query: explain vectorization detail select age, name, count(*) from test_bucket group by age, name having count(*) > 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_bucket +#### A masked pattern was here #### +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test_bucket + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:age:int, 1:name:string, 2:dept:string, 3:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>, 4:ROW__IS__DELETED:boolean] + Select Operator + expressions: age (type: int), name (type: string) + outputColumnNames: age, name + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + bucketGroup: true + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int, col 1:string + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + keys: age (type: int), name (type: string) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:int, 1:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:bigint + Statistics: Num rows: 2 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1] + dataColumns: age:int, name:string, dept:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true + reduceColumnNullOrder: zz + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY._col0:int, KEY._col1:string, VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 2:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int, col 1:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0] + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 202 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 2:bigint, val 1) + predicate: (_col2 > 1L) (type: boolean) + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select age, name, count(*) from test_bucket group by age, name having count(*) > 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_bucket +#### A masked pattern was here #### +POSTHOOK: query: select age, name, count(*) from test_bucket group by age, name having count(*) > 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_bucket +#### A masked pattern was here #### +1 user1 2 +2 user2 2