http://git-wip-us.apache.org/repos/asf/hive/blob/6f5c1135/ql/src/test/results/clientpositive/bucket_groupby.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/bucket_groupby.q.out b/ql/src/test/results/clientpositive/bucket_groupby.q.out deleted file mode 100644 index f808bba..0000000 --- a/ql/src/test/results/clientpositive/bucket_groupby.q.out +++ /dev/null @@ -1,1635 +0,0 @@ -PREHOOK: query: create table clustergroupby(key string, value string) partitioned by(ds string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@clustergroupby -POSTHOOK: query: create table clustergroupby(key string, value string) partitioned by(ds string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@clustergroupby -PREHOOK: query: describe extended clustergroupby -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@clustergroupby -POSTHOOK: query: describe extended clustergroupby -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@clustergroupby -key string -value string -ds string - -# Partition Information -# col_name data_type comment - -ds string - -#### A masked pattern was here #### -PREHOOK: query: alter table clustergroupby clustered by (key) into 1 buckets -PREHOOK: type: ALTERTABLE_CLUSTER_SORT -PREHOOK: Input: default@clustergroupby -PREHOOK: Output: default@clustergroupby -POSTHOOK: query: alter table clustergroupby clustered by (key) into 1 buckets -POSTHOOK: type: ALTERTABLE_CLUSTER_SORT -POSTHOOK: Input: default@clustergroupby -POSTHOOK: Output: default@clustergroupby -PREHOOK: query: insert overwrite table clustergroupby partition (ds='100') select key, value from src sort by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@clustergroupby@ds=100 -POSTHOOK: query: insert overwrite table clustergroupby partition (ds='100') select key, value from src sort by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@clustergroupby@ds=100 -POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: clustergroupby PARTITION(ds=100).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain -select key, count(1) from clustergroupby where ds='100' group by key order by key limit 10 -PREHOOK: type: QUERY -POSTHOOK: query: explain -select key, count(1) from clustergroupby where ds='100' group by key order by key limit 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: clustergroupby - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: select key, count(1) from clustergroupby where ds='100' group by key order by key limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@clustergroupby -PREHOOK: Input: default@clustergroupby@ds=100 -#### A masked pattern was here #### -POSTHOOK: query: select key, count(1) from clustergroupby where ds='100' group by key order by key limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@clustergroupby -POSTHOOK: Input: default@clustergroupby@ds=100 -#### A masked pattern was here #### -0 3 -10 1 -100 2 -103 2 -104 2 -105 1 -11 1 -111 1 -113 2 -114 1 -PREHOOK: query: describe extended clustergroupby -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@clustergroupby -POSTHOOK: query: describe extended clustergroupby -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@clustergroupby -key string -value string -ds string - -# Partition Information -# col_name data_type comment - -ds string - -#### A masked pattern was here #### -PREHOOK: query: insert overwrite table clustergroupby partition (ds='101') select key, value from src distribute by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@clustergroupby@ds=101 -POSTHOOK: query: insert overwrite table clustergroupby partition (ds='101') select key, value from src distribute by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@clustergroupby@ds=101 -POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: clustergroupby PARTITION(ds=101).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: --normal-- -explain -select key, count(1) from clustergroupby where ds='101' group by key order by key limit 10 -PREHOOK: type: QUERY -POSTHOOK: query: --normal-- -explain -select key, count(1) from clustergroupby where ds='101' group by key order by key limit 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: clustergroupby - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key order by key limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@clustergroupby -PREHOOK: Input: default@clustergroupby@ds=101 -#### A masked pattern was here #### -POSTHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key order by key limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@clustergroupby -POSTHOOK: Input: default@clustergroupby@ds=101 -#### A masked pattern was here #### -0 3 -10 1 -100 2 -103 2 -104 2 -105 1 -11 1 -111 1 -113 2 -114 1 -PREHOOK: query: --function-- -explain -select length(key), count(1) from clustergroupby where ds='101' group by length(key) limit 10 -PREHOOK: type: QUERY -POSTHOOK: query: --function-- -explain -select length(key), count(1) from clustergroupby where ds='101' group by length(key) limit 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: clustergroupby - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: length(key) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: select length(key), count(1) from clustergroupby where ds='101' group by length(key) limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@clustergroupby -PREHOOK: Input: default@clustergroupby@ds=101 -#### A masked pattern was here #### -POSTHOOK: query: select length(key), count(1) from clustergroupby where ds='101' group by length(key) limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@clustergroupby -POSTHOOK: Input: default@clustergroupby@ds=101 -#### A masked pattern was here #### -1 10 -2 74 -3 416 -PREHOOK: query: explain -select abs(length(key)), count(1) from clustergroupby where ds='101' group by abs(length(key)) limit 10 -PREHOOK: type: QUERY -POSTHOOK: query: explain -select abs(length(key)), count(1) from clustergroupby where ds='101' group by abs(length(key)) limit 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: clustergroupby - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: abs(length(key)) (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: select abs(length(key)), count(1) from clustergroupby where ds='101' group by abs(length(key)) limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@clustergroupby -PREHOOK: Input: default@clustergroupby@ds=101 -#### A masked pattern was here #### -POSTHOOK: query: select abs(length(key)), count(1) from clustergroupby where ds='101' group by abs(length(key)) limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@clustergroupby -POSTHOOK: Input: default@clustergroupby@ds=101 -#### A masked pattern was here #### -1 10 -2 74 -3 416 -PREHOOK: query: --constant-- -explain -select key, count(1) from clustergroupby where ds='101' group by key,3 order by key,3 limit 10 -PREHOOK: type: QUERY -POSTHOOK: query: --constant-- -explain -select key, count(1) from clustergroupby where ds='101' group by key,3 order by key,3 limit 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: clustergroupby - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key,3 order by key,3 limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@clustergroupby -PREHOOK: Input: default@clustergroupby@ds=101 -#### A masked pattern was here #### -POSTHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key,3 order by key,3 limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@clustergroupby -POSTHOOK: Input: default@clustergroupby@ds=101 -#### A masked pattern was here #### -0 3 -10 1 -100 2 -103 2 -104 2 -105 1 -11 1 -111 1 -113 2 -114 1 -PREHOOK: query: --subquery-- -explain -select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key order by key limit 10 -PREHOOK: type: QUERY -POSTHOOK: query: --subquery-- -explain -select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key order by key limit 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: clustergroupby - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key order by key limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@clustergroupby -PREHOOK: Input: default@clustergroupby@ds=101 -#### A masked pattern was here #### -POSTHOOK: query: select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key order by key limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@clustergroupby -POSTHOOK: Input: default@clustergroupby@ds=101 -#### A masked pattern was here #### -val_0 3 -val_10 1 -val_100 2 -val_103 2 -val_104 2 -val_105 1 -val_11 1 -val_111 1 -val_113 2 -val_114 1 -PREHOOK: query: explain -select key, count(1) from clustergroupby group by key -PREHOOK: type: QUERY -POSTHOOK: query: explain -select key, count(1) from clustergroupby group by key -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: clustergroupby - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select key, count(1) from clustergroupby group by key -PREHOOK: type: QUERY -PREHOOK: Input: default@clustergroupby -PREHOOK: Input: default@clustergroupby@ds=100 -PREHOOK: Input: default@clustergroupby@ds=101 -#### A masked pattern was here #### -POSTHOOK: query: select key, count(1) from clustergroupby group by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@clustergroupby -POSTHOOK: Input: default@clustergroupby@ds=100 -POSTHOOK: Input: default@clustergroupby@ds=101 -#### A masked pattern was here #### -0 6 -10 2 -100 4 -103 4 -104 4 -105 2 -11 2 -111 2 -113 4 -114 2 -116 2 -118 4 -119 6 -12 4 -120 4 -125 4 -126 2 -128 6 -129 4 -131 2 -133 2 -134 4 -136 2 -137 4 -138 8 -143 2 -145 2 -146 4 -149 4 -15 4 -150 2 -152 4 -153 2 -155 2 -156 2 -157 2 -158 2 -160 2 -162 2 -163 2 -164 4 -165 4 -166 2 -167 6 -168 2 -169 8 -17 2 -170 2 -172 4 -174 4 -175 4 -176 4 -177 2 -178 2 -179 4 -18 4 -180 2 -181 2 -183 2 -186 2 -187 6 -189 2 -19 2 -190 2 -191 4 -192 2 -193 6 -194 2 -195 4 -196 2 -197 4 -199 6 -2 2 -20 2 -200 4 -201 2 -202 2 -203 4 -205 4 -207 4 -208 6 -209 4 -213 4 -214 2 -216 4 -217 4 -218 2 -219 4 -221 4 -222 2 -223 4 -224 4 -226 2 -228 2 -229 4 -230 10 -233 4 -235 2 -237 4 -238 4 -239 4 -24 4 -241 2 -242 4 -244 2 -247 2 -248 2 -249 2 -252 2 -255 4 -256 4 -257 2 -258 2 -26 4 -260 2 -262 2 -263 2 -265 4 -266 2 -27 2 -272 4 -273 6 -274 2 -275 2 -277 8 -278 4 -28 2 -280 4 -281 4 -282 4 -283 2 -284 2 -285 2 -286 2 -287 2 -288 4 -289 2 -291 2 -292 2 -296 2 -298 6 -30 2 -302 2 -305 2 -306 2 -307 4 -308 2 -309 4 -310 2 -311 6 -315 2 -316 6 -317 4 -318 6 -321 4 -322 4 -323 2 -325 4 -327 6 -33 2 -331 4 -332 2 -333 4 -335 2 -336 2 -338 2 -339 2 -34 2 -341 2 -342 4 -344 4 -345 2 -348 10 -35 6 -351 2 -353 4 -356 2 -360 2 -362 2 -364 2 -365 2 -366 2 -367 4 -368 2 -369 6 -37 4 -373 2 -374 2 -375 2 -377 2 -378 2 -379 2 -382 4 -384 6 -386 2 -389 2 -392 2 -393 2 -394 2 -395 4 -396 6 -397 4 -399 4 -4 2 -400 2 -401 10 -402 2 -403 6 -404 4 -406 8 -407 2 -409 6 -41 2 -411 2 -413 4 -414 4 -417 6 -418 2 -419 2 -42 4 -421 2 -424 4 -427 2 -429 4 -43 2 -430 6 -431 6 -432 2 -435 2 -436 2 -437 2 -438 6 -439 4 -44 2 -443 2 -444 2 -446 2 -448 2 -449 2 -452 2 -453 2 -454 6 -455 2 -457 2 -458 4 -459 4 -460 2 -462 4 -463 4 -466 6 -467 2 -468 8 -469 10 -47 2 -470 2 -472 2 -475 2 -477 2 -478 4 -479 2 -480 6 -481 2 -482 2 -483 2 -484 2 -485 2 -487 2 -489 8 -490 2 -491 2 -492 4 -493 2 -494 2 -495 2 -496 2 -497 2 -498 6 -5 6 -51 4 -53 2 -54 2 -57 2 -58 4 -64 2 -65 2 -66 2 -67 4 -69 2 -70 6 -72 4 -74 2 -76 4 -77 2 -78 2 -8 2 -80 2 -82 2 -83 4 -84 4 -85 2 -86 2 -87 2 -9 2 -90 6 -92 2 -95 4 -96 2 -97 4 -98 4 -PREHOOK: query: explain -select key, count(1) from clustergroupby group by key, 3 -PREHOOK: type: QUERY -POSTHOOK: query: explain -select key, count(1) from clustergroupby group by key, 3 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: clustergroupby - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: -- number of buckets cannot be changed, so drop the table -drop table clustergroupby -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@clustergroupby -PREHOOK: Output: default@clustergroupby -POSTHOOK: query: -- number of buckets cannot be changed, so drop the table -drop table clustergroupby -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@clustergroupby -POSTHOOK: Output: default@clustergroupby -PREHOOK: query: create table clustergroupby(key string, value string) partitioned by(ds string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@clustergroupby -POSTHOOK: query: create table clustergroupby(key string, value string) partitioned by(ds string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@clustergroupby -PREHOOK: query: --sort columns-- -alter table clustergroupby clustered by (value) sorted by (key, value) into 1 buckets -PREHOOK: type: ALTERTABLE_CLUSTER_SORT -PREHOOK: Input: default@clustergroupby -PREHOOK: Output: default@clustergroupby -POSTHOOK: query: --sort columns-- -alter table clustergroupby clustered by (value) sorted by (key, value) into 1 buckets -POSTHOOK: type: ALTERTABLE_CLUSTER_SORT -POSTHOOK: Input: default@clustergroupby -POSTHOOK: Output: default@clustergroupby -PREHOOK: query: describe extended clustergroupby -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@clustergroupby -POSTHOOK: query: describe extended clustergroupby -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@clustergroupby -key string -value string -ds string - -# Partition Information -# col_name data_type comment - -ds string - -#### A masked pattern was here #### -PREHOOK: query: insert overwrite table clustergroupby partition (ds='102') select key, value from src distribute by value sort by key, value -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@clustergroupby@ds=102 -POSTHOOK: query: insert overwrite table clustergroupby partition (ds='102') select key, value from src distribute by value sort by key, value -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@clustergroupby@ds=102 -POSTHOOK: Lineage: clustergroupby PARTITION(ds=102).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: clustergroupby PARTITION(ds=102).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain -select key, count(1) from clustergroupby where ds='102' group by key order by key limit 10 -PREHOOK: type: QUERY -POSTHOOK: query: explain -select key, count(1) from clustergroupby where ds='102' group by key order by key limit 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: clustergroupby - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - bucketGroup: true - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: select key, count(1) from clustergroupby where ds='102' group by key order by key limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@clustergroupby -PREHOOK: Input: default@clustergroupby@ds=102 -#### A masked pattern was here #### -POSTHOOK: query: select key, count(1) from clustergroupby where ds='102' group by key order by key limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@clustergroupby -POSTHOOK: Input: default@clustergroupby@ds=102 -#### A masked pattern was here #### -0 3 -10 1 -100 2 -103 2 -104 2 -105 1 -11 1 -111 1 -113 2 -114 1 -PREHOOK: query: explain -select value, count(1) from clustergroupby where ds='102' group by value order by value limit 10 -PREHOOK: type: QUERY -POSTHOOK: query: explain -select value, count(1) from clustergroupby where ds='102' group by value order by value limit 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: clustergroupby - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: select value, count(1) from clustergroupby where ds='102' group by value order by value limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@clustergroupby -PREHOOK: Input: default@clustergroupby@ds=102 -#### A masked pattern was here #### -POSTHOOK: query: select value, count(1) from clustergroupby where ds='102' group by value order by value limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@clustergroupby -POSTHOOK: Input: default@clustergroupby@ds=102 -#### A masked pattern was here #### -val_0 3 -val_10 1 -val_100 2 -val_103 2 -val_104 2 -val_105 1 -val_11 1 -val_111 1 -val_113 2 -val_114 1 -PREHOOK: query: explain -select key, count(1) from clustergroupby where ds='102' group by key, value limit 10 -PREHOOK: type: QUERY -POSTHOOK: query: explain -select key, count(1) from clustergroupby where ds='102' group by key, value limit 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: clustergroupby - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: select key, count(1) from clustergroupby where ds='102' group by key, value limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@clustergroupby -PREHOOK: Input: default@clustergroupby@ds=102 -#### A masked pattern was here #### -POSTHOOK: query: select key, count(1) from clustergroupby where ds='102' group by key, value limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@clustergroupby -POSTHOOK: Input: default@clustergroupby@ds=102 -#### A masked pattern was here #### -0 3 -10 1 -100 2 -103 2 -104 2 -105 1 -11 1 -111 1 -113 2 -114 1 -PREHOOK: query: -- number of buckets cannot be changed, so drop the table -drop table clustergroupby -PREHOOK: type: DROPTABLE -PREHOOK: Input: default@clustergroupby -PREHOOK: Output: default@clustergroupby -POSTHOOK: query: -- number of buckets cannot be changed, so drop the table -drop table clustergroupby -POSTHOOK: type: DROPTABLE -POSTHOOK: Input: default@clustergroupby -POSTHOOK: Output: default@clustergroupby -PREHOOK: query: create table clustergroupby(key string, value string) partitioned by(ds string) -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@clustergroupby -POSTHOOK: query: create table clustergroupby(key string, value string) partitioned by(ds string) -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@clustergroupby -PREHOOK: query: alter table clustergroupby clustered by (value, key) sorted by (key) into 1 buckets -PREHOOK: type: ALTERTABLE_CLUSTER_SORT -PREHOOK: Input: default@clustergroupby -PREHOOK: Output: default@clustergroupby -POSTHOOK: query: alter table clustergroupby clustered by (value, key) sorted by (key) into 1 buckets -POSTHOOK: type: ALTERTABLE_CLUSTER_SORT -POSTHOOK: Input: default@clustergroupby -POSTHOOK: Output: default@clustergroupby -PREHOOK: query: describe extended clustergroupby -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@clustergroupby -POSTHOOK: query: describe extended clustergroupby -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@clustergroupby -key string -value string -ds string - -# Partition Information -# col_name data_type comment - -ds string - -#### A masked pattern was here #### -PREHOOK: query: insert overwrite table clustergroupby partition (ds='103') select key, value from src distribute by value, key sort by key -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@clustergroupby@ds=103 -POSTHOOK: query: insert overwrite table clustergroupby partition (ds='103') select key, value from src distribute by value, key sort by key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@clustergroupby@ds=103 -POSTHOOK: Lineage: clustergroupby PARTITION(ds=103).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: clustergroupby PARTITION(ds=103).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain -select key, count(1) from clustergroupby where ds='103' group by key order by key limit 10 -PREHOOK: type: QUERY -POSTHOOK: query: explain -select key, count(1) from clustergroupby where ds='103' group by key order by key limit 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: clustergroupby - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - bucketGroup: true - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: select key, count(1) from clustergroupby where ds='103' group by key order by key limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@clustergroupby -PREHOOK: Input: default@clustergroupby@ds=103 -#### A masked pattern was here #### -POSTHOOK: query: select key, count(1) from clustergroupby where ds='103' group by key order by key limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@clustergroupby -POSTHOOK: Input: default@clustergroupby@ds=103 -#### A masked pattern was here #### -0 3 -10 1 -100 2 -103 2 -104 2 -105 1 -11 1 -111 1 -113 2 -114 1 -PREHOOK: query: explain -select key, count(1) from clustergroupby where ds='103' group by value, key order by key limit 10 -PREHOOK: type: QUERY -POSTHOOK: query: explain -select key, count(1) from clustergroupby where ds='103' group by value, key order by key limit 10 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: clustergroupby - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col1, _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col1 (type: string), _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: bigint) - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: 10 - Processor Tree: - ListSink - -PREHOOK: query: select key, count(1) from clustergroupby where ds='103' group by value, key order by key limit 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@clustergroupby -PREHOOK: Input: default@clustergroupby@ds=103 -#### A masked pattern was here #### -POSTHOOK: query: select key, count(1) from clustergroupby where ds='103' group by value, key order by key limit 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@clustergroupby -POSTHOOK: Input: default@clustergroupby@ds=103 -#### A masked pattern was here #### -0 3 -10 1 -100 2 -103 2 -104 2 -105 1 -11 1 -111 1 -113 2 -114 1
http://git-wip-us.apache.org/repos/asf/hive/blob/6f5c1135/ql/src/test/results/clientpositive/bucket_many.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/bucket_many.q.out b/ql/src/test/results/clientpositive/bucket_many.q.out deleted file mode 100644 index d6dda03..0000000 --- a/ql/src/test/results/clientpositive/bucket_many.q.out +++ /dev/null @@ -1,222 +0,0 @@ -PREHOOK: query: create table bucket_many(key int, value string) clustered by (key) into 256 buckets -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@bucket_many -POSTHOOK: query: create table bucket_many(key int, value string) clustered by (key) into 256 buckets -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@bucket_many -PREHOOK: query: explain extended -insert overwrite table bucket_many -select * from src -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -insert overwrite table bucket_many -select * from src -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - tag: -1 - value expressions: _col0 (type: string), _col1 (type: string) - auto parallelism: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: src - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 0 - rawDataSize 0 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value - columns.comments 'default','default' - columns.types string:string -#### A masked pattern was here #### - name default.src - numFiles 1 - numRows 0 - rawDataSize 0 - serialization.ddl struct src { string key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src - Truncated Path -> Alias: - /src [src] - Needs Tagging: false - Reduce Operator Tree: - Select Operator - expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 16 - Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count 256 - bucket_field_name key - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.bucket_many - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct bucket_many { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_many - TotalFiles: 256 - GatherStats: true - MultiFileSpray: true - - Stage: Stage-0 - Move Operator - tables: - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count 256 - bucket_field_name key - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.bucket_many - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct bucket_many { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.bucket_many - - Stage: Stage-2 - Stats-Aggr Operator -#### A masked pattern was here #### - -PREHOOK: query: insert overwrite table bucket_many -select * from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@bucket_many -POSTHOOK: query: insert overwrite table bucket_many -select * from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@bucket_many -POSTHOOK: Lineage: bucket_many.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: bucket_many.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain -select * from bucket_many tablesample (bucket 1 out of 256) s -PREHOOK: type: QUERY -POSTHOOK: query: explain -select * from bucket_many tablesample (bucket 1 out of 256) s -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: s - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (((hash(key) & 2147483647) % 256) = 0) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select * from bucket_many tablesample (bucket 1 out of 256) s -PREHOOK: type: QUERY -PREHOOK: Input: default@bucket_many -#### A masked pattern was here #### -POSTHOOK: query: select * from bucket_many tablesample (bucket 1 out of 256) s -POSTHOOK: type: QUERY -POSTHOOK: Input: default@bucket_many -#### A masked pattern was here #### -256 val_256 -0 val_0 -0 val_0 -0 val_0 -256 val_256 http://git-wip-us.apache.org/repos/asf/hive/blob/6f5c1135/ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out b/ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out deleted file mode 100644 index 557e270..0000000 --- a/ql/src/test/results/clientpositive/bucketizedhiveinputformat.q.out +++ /dev/null @@ -1,116 +0,0 @@ -PREHOOK: query: CREATE TABLE T1(name STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@T1 -POSTHOOK: query: CREATE TABLE T1(name STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@T1 -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@t1 -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T1 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@t1 -PREHOOK: query: CREATE TABLE T2(name STRING) STORED AS SEQUENCEFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@T2 -POSTHOOK: query: CREATE TABLE T2(name STRING) STORED AS SEQUENCEFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@T2 -Warning: Shuffle Join JOIN[9][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product -PREHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM ( -SELECT tmp1.name as name FROM ( - SELECT name, 'MMM' AS n FROM T1) tmp1 - JOIN (SELECT 'MMM' AS n FROM T1) tmp2 - JOIN (SELECT 'MMM' AS n FROM T1) tmp3 - ON tmp1.n = tmp2.n AND tmp1.n = tmp3.n) ttt LIMIT 5000000 -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@t2 -POSTHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM ( -SELECT tmp1.name as name FROM ( - SELECT name, 'MMM' AS n FROM T1) tmp1 - JOIN (SELECT 'MMM' AS n FROM T1) tmp2 - JOIN (SELECT 'MMM' AS n FROM T1) tmp3 - ON tmp1.n = tmp2.n AND tmp1.n = tmp3.n) ttt LIMIT 5000000 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@t2 -POSTHOOK: Lineage: t2.name SIMPLE [(t1)t1.FieldSchema(name:name, type:string, comment:null), ] -PREHOOK: query: CREATE TABLE T3(name STRING) STORED AS TEXTFILE -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@T3 -POSTHOOK: query: CREATE TABLE T3(name STRING) STORED AS TEXTFILE -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@T3 -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T3 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@t3 -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv1.txt' INTO TABLE T3 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@t3 -PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T3 -PREHOOK: type: LOAD -#### A masked pattern was here #### -PREHOOK: Output: default@t3 -POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv2.txt' INTO TABLE T3 -POSTHOOK: type: LOAD -#### A masked pattern was here #### -POSTHOOK: Output: default@t3 -PREHOOK: query: -- 2 split by max.split.size -SELECT COUNT(1) FROM T2 -PREHOOK: type: QUERY -PREHOOK: Input: default@t2 -#### A masked pattern was here #### -POSTHOOK: query: -- 2 split by max.split.size -SELECT COUNT(1) FROM T2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t2 -#### A masked pattern was here #### -Stage-1=Map: 2 Reduce: 1 -5000000 -PREHOOK: query: -- 1 split for two file -SELECT COUNT(1) FROM T3 -PREHOOK: type: QUERY -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: -- 1 split for two file -SELECT COUNT(1) FROM T3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -Stage-1=Map: 1 Reduce: 1 -1000 -PREHOOK: query: -- 1 split -SELECT COUNT(1) FROM T2 -PREHOOK: type: QUERY -PREHOOK: Input: default@t2 -#### A masked pattern was here #### -POSTHOOK: query: -- 1 split -SELECT COUNT(1) FROM T2 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t2 -#### A masked pattern was here #### -Stage-1=Map: 1 Reduce: 1 -5000000 -PREHOOK: query: -- 2 split for two file -SELECT COUNT(1) FROM T3 -PREHOOK: type: QUERY -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: -- 2 split for two file -SELECT COUNT(1) FROM T3 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -Stage-1=Map: 2 Reduce: 1 -1000