Author: namit Date: Fri Jan 11 08:38:12 2013 New Revision: 1431936 URL: http://svn.apache.org/viewvc?rev=1431936&view=rev Log: HIVE-3888 wrong mapside groupby if no partition is being selected (Namit Jain via Ashutosh and namit)
Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_6.q hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_7.q hive/trunk/ql/src/test/results/clientpositive/groupby_sort_6.q.out hive/trunk/ql/src/test/results/clientpositive/groupby_sort_7.q.out Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java?rev=1431936&r1=1431935&r2=1431936&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GroupByOptimizer.java Fri Jan 11 08:38:12 2013 @@ -333,7 +333,9 @@ public class GroupByOptimizer implements throw new SemanticException(e.getMessage(), e); } - GroupByOptimizerSortMatch currentMatch = GroupByOptimizerSortMatch.COMPLETE_MATCH; + GroupByOptimizerSortMatch currentMatch = + partsList.getNotDeniedPartns().isEmpty() ? GroupByOptimizerSortMatch.NO_MATCH : + GroupByOptimizerSortMatch.COMPLETE_MATCH; for (Partition part : partsList.getNotDeniedPartns()) { List<String> sortCols = part.getSortColNames(); List<String> bucketCols = part.getBucketCols(); Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_6.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_6.q?rev=1431936&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_6.q (added) +++ hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_6.q Fri Jan 11 08:38:12 2013 @@ -0,0 +1,41 @@ +set hive.enforce.bucketing = true; +set hive.enforce.sorting = true; +set hive.exec.reducers.max = 10; +set hive.map.groupby.sorted=true; + +CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string); + +CREATE TABLE outputTbl1(key int, cnt int); + +-- The plan should not be converted to a map-side group since no partition is being accessed +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE outputTbl1 +SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key; + +INSERT OVERWRITE TABLE outputTbl1 +SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key; + +SELECT * FROM outputTbl1 ORDER BY key; + +LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='2'); + +-- The plan should not be converted to a map-side group since no partition is being accessed +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE outputTbl1 +SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key; + +INSERT OVERWRITE TABLE outputTbl1 +SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key; + +SELECT * FROM outputTbl1 ORDER BY key; + +-- The plan should not be converted to a map-side group since the partition being accessed +-- is neither bucketed not sorted +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE outputTbl1 +SELECT key, count(1) FROM T1 where ds = '2' GROUP BY key; + +INSERT OVERWRITE TABLE outputTbl1 +SELECT key, count(1) FROM T1 where ds = '2' GROUP BY key; + +SELECT * FROM outputTbl1 ORDER BY key; Added: hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_7.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_7.q?rev=1431936&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_7.q (added) +++ hive/trunk/ql/src/test/queries/clientpositive/groupby_sort_7.q Fri Jan 11 08:38:12 2013 @@ -0,0 +1,28 @@ +set hive.enforce.bucketing = true; +set hive.enforce.sorting = true; +set hive.exec.reducers.max = 10; +set hive.map.groupby.sorted=true; + +CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) +CLUSTERED BY (val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='1'); + +-- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1'; + +CREATE TABLE outputTbl1(key STRING, val STRING, cnt INT); + +-- The plan should be converted to a map-side group by, since the +-- sorting columns and grouping columns match, and all the bucketing columns +-- are part of sorting columns +EXPLAIN +INSERT OVERWRITE TABLE outputTbl1 +SELECT key, val, count(1) FROM T1 where ds = '1' GROUP BY key, val; + +INSERT OVERWRITE TABLE outputTbl1 +SELECT key, val, count(1) FROM T1 where ds = '1' GROUP BY key, val; + +SELECT * FROM outputTbl1 ORDER BY key, val; + +DROP TABLE T1; Added: hive/trunk/ql/src/test/results/clientpositive/groupby_sort_6.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_sort_6.q.out?rev=1431936&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/groupby_sort_6.q.out (added) +++ hive/trunk/ql/src/test/results/clientpositive/groupby_sort_6.q.out Fri Jan 11 08:38:12 2013 @@ -0,0 +1,574 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@T1 +PREHOOK: query: CREATE TABLE outputTbl1(key int, cnt int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE outputTbl1(key int, cnt int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@outputTbl1 +PREHOOK: query: -- The plan should not be converted to a map-side group since no partition is being accessed +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE outputTbl1 +SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: -- The plan should not be converted to a map-side group since no partition is being accessed +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE outputTbl1 +SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t1 + TableScan + alias: t1 + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (ds = '1') + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: UDFToInteger(_col1) + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,cnt + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,cnt + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 +#### A masked pattern was here #### + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + +PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 +SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Output: default@outputtbl1 +POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 +SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Output: default@outputtbl1 +POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM outputTbl1 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@outputtbl1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM outputTbl1 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@outputtbl1 +#### A masked pattern was here #### +POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='2') +PREHOOK: type: LOAD +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='2') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@t1 +POSTHOOK: Output: default@t1@ds=2 +POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: -- The plan should not be converted to a map-side group since no partition is being accessed +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE outputTbl1 +SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: -- The plan should not be converted to a map-side group since no partition is being accessed +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE outputTbl1 +SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t1 + TableScan + alias: t1 + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (ds = '1') + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: UDFToInteger(_col1) + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,cnt + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,cnt + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 +#### A masked pattern was here #### + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + +PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 +SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Output: default@outputtbl1 +POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 +SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Output: default@outputtbl1 +POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM outputTbl1 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@outputtbl1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM outputTbl1 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@outputtbl1 +#### A masked pattern was here #### +POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: -- The plan should not be converted to a map-side group since the partition being accessed +-- is neither bucketed not sorted +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE outputTbl1 +SELECT key, count(1) FROM T1 where ds = '2' GROUP BY key +PREHOOK: type: QUERY +POSTHOOK: query: -- The plan should not be converted to a map-side group since the partition being accessed +-- is neither bucketed not sorted +EXPLAIN EXTENDED +INSERT OVERWRITE TABLE outputTbl1 +SELECT key, count(1) FROM T1 where ds = '2' GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2')) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t1 + TableScan + alias: t1 + GatherStats: false + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Needs Tagging: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2 + properties: + bucket_count -1 + columns key,val + columns.types string:string +#### A masked pattern was here #### + name default.t1 + numFiles 1 + numPartitions 1 + numRows 0 + partition_columns ds + rawDataSize 0 + serialization.ddl struct t1 { string key, string val} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 30 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,val + columns.types string:string +#### A masked pattern was here #### + name default.t1 + numFiles 1 + numPartitions 1 + numRows 0 + partition_columns ds + rawDataSize 0 + serialization.ddl struct t1 { string key, string val} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 30 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.t1 + name: default.t1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: UDFToInteger(_col1) + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,cnt + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Truncated Path -> Alias: + /t1/ds=2 [t1] + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,cnt + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 1 + numPartitions 0 + numRows 0 + rawDataSize 0 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 0 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 +#### A masked pattern was here #### + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + +PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 +SELECT key, count(1) FROM T1 where ds = '2' GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1@ds=2 +PREHOOK: Output: default@outputtbl1 +POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 +SELECT key, count(1) FROM T1 where ds = '2' GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1@ds=2 +POSTHOOK: Output: default@outputtbl1 +POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM outputTbl1 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@outputtbl1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM outputTbl1 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@outputtbl1 +#### A masked pattern was here #### +POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +1 1 +2 1 +3 1 +7 1 +8 2 Added: hive/trunk/ql/src/test/results/clientpositive/groupby_sort_7.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_sort_7.q.out?rev=1431936&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/groupby_sort_7.q.out (added) +++ hive/trunk/ql/src/test/results/clientpositive/groupby_sort_7.q.out Fri Jan 11 08:38:12 2013 @@ -0,0 +1,216 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) +CLUSTERED BY (val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) PARTITIONED BY (ds string) +CLUSTERED BY (val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='1') +PREHOOK: type: LOAD +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PARTITION (ds='1') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@t1 +POSTHOOK: Output: default@t1@ds=1 +PREHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1' +PREHOOK: type: QUERY +PREHOOK: Input: default@t1@ds=1 +PREHOOK: Output: default@t1@ds=1 +POSTHOOK: query: -- perform an insert to make sure there are 2 files +INSERT OVERWRITE TABLE T1 PARTITION (ds='1') select key, val from T1 where ds = '1' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1@ds=1 +POSTHOOK: Output: default@t1@ds=1 +POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE outputTbl1(key STRING, val STRING, cnt INT) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE outputTbl1(key STRING, val STRING, cnt INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@outputTbl1 +POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: -- The plan should be converted to a map-side group by, since the +-- sorting columns and grouping columns match, and all the bucketing columns +-- are part of sorting columns +EXPLAIN +INSERT OVERWRITE TABLE outputTbl1 +SELECT key, val, count(1) FROM T1 where ds = '1' GROUP BY key, val +PREHOOK: type: QUERY +POSTHOOK: query: -- The plan should be converted to a map-side group by, since the +-- sorting columns and grouping columns match, and all the bucketing columns +-- are part of sorting columns +EXPLAIN +INSERT OVERWRITE TABLE outputTbl1 +SELECT key, val, count(1) FROM T1 where ds = '1' GROUP BY key, val +POSTHOOK: type: QUERY +POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '1')) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + t1 + TableScan + alias: t1 + Select Operator + expressions: + expr: key + type: string + expr: val + type: string + outputColumnNames: key, val + Group By Operator + aggregations: + expr: count(1) + bucketGroup: false + keys: + expr: key + type: string + expr: val + type: string + mode: final + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: UDFToInteger(_col2) + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + +PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 +SELECT key, val, count(1) FROM T1 where ds = '1' GROUP BY key, val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1@ds=1 +PREHOOK: Output: default@outputtbl1 +POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 +SELECT key, val, count(1) FROM T1 where ds = '1' GROUP BY key, val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1@ds=1 +POSTHOOK: Output: default@outputtbl1 +POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM outputTbl1 ORDER BY key, val +PREHOOK: type: QUERY +PREHOOK: Input: default@outputtbl1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM outputTbl1 ORDER BY key, val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@outputtbl1 +#### A masked pattern was here #### +POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +1 11 1 +2 12 1 +3 13 1 +7 17 1 +8 18 1 +8 28 1 +PREHOOK: query: DROP TABLE T1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: DROP TABLE T1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 +POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: t1 PARTITION(ds=1).val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] Modified: hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out?rev=1431936&r1=1431935&r2=1431936&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out Fri Jan 11 08:38:12 2013 @@ -30,7 +30,7 @@ STAGE PLANS: Group By Operator aggregations: expr: max(ds) - bucketGroup: true + bucketGroup: false mode: hash outputColumnNames: _col0 Reduce Output Operator