HIVE-15797 : separate the configs for gby and oby position alias usage (Sergey Shelukhin, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/39a0d39e Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/39a0d39e Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/39a0d39e Branch: refs/heads/hive-14535 Commit: 39a0d39e7ce0566eaa8d6cad02a2bcda301381e4 Parents: fe9a6d5 Author: Sergey Shelukhin <[email protected]> Authored: Mon Feb 6 16:27:55 2017 -0800 Committer: Sergey Shelukhin <[email protected]> Committed: Mon Feb 6 17:24:11 2017 -0800 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 8 +++++++- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 18 +++++++++--------- .../test/queries/clientpositive/bucket_groupby.q | 4 ++-- .../test/queries/clientpositive/decimal_stats.q | 2 +- ql/src/test/results/clientpositive/cp_sel.q.out | 9 +++++---- .../results/clientpositive/decimal_stats.q.out | 11 ++++++----- .../clientpositive/llap/bucket_groupby.q.out | 8 ++++---- 7 files changed, 34 insertions(+), 26 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/39a0d39e/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index d62e527..cb27cd6 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1141,8 +1141,14 @@ public class HiveConf extends Configuration { "If the bucketing/sorting properties of the table exactly match the grouping key, whether to perform \n" + "the group by in the mapper by using BucketizedHiveInputFormat. The only downside to this\n" + "is that it limits the number of mappers to the number of files."), + HIVE_GROUPBY_POSITION_ALIAS("hive.groupby.position.alias", false, + "Whether to enable using Column Position Alias in Group By"), + HIVE_ORDERBY_POSITION_ALIAS("hive.orderby.position.alias", true, + "Whether to enable using Column Position Alias in Order By"), + @Deprecated HIVE_GROUPBY_ORDERBY_POSITION_ALIAS("hive.groupby.orderby.position.alias", false, - "Whether to enable using Column Position Alias in Group By or Order By"), + "Whether to enable using Column Position Alias in Group By or Order By (deprecated).\n" + + "Use " + HIVE_ORDERBY_POSITION_ALIAS.varname + " or " + HIVE_GROUPBY_POSITION_ALIAS.varname + " instead"), HIVE_NEW_JOB_GROUPING_SET_CARDINALITY("hive.new.job.grouping.set.cardinality", 30, "Whether a new map-reduce job should be launched for grouping sets/rollups/cubes.\n" + "For a query like: select a, b, c, count(1) from T group by a, b, c with rollup;\n" + http://git-wip-us.apache.org/repos/asf/hive/blob/39a0d39e/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index b0de71c..dceb4a5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -12243,11 +12243,11 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { // Process the position alias in GROUPBY and ORDERBY private void processPositionAlias(ASTNode ast) throws SemanticException { - boolean isByPos = false; - if (HiveConf.getBoolVar(conf, - HiveConf.ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS) == true) { - isByPos = true; - } + boolean isBothByPos = HiveConf.getBoolVar(conf, ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS); + boolean isGbyByPos = isBothByPos + || HiveConf.getBoolVar(conf, ConfVars.HIVE_GROUPBY_POSITION_ALIAS); + boolean isObyByPos = isBothByPos + || HiveConf.getBoolVar(conf, ConfVars.HIVE_ORDERBY_POSITION_ALIAS); Deque<ASTNode> stack = new ArrayDeque<ASTNode>(); stack.push(ast); @@ -12286,7 +12286,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { for (int child_pos = 0; child_pos < groupbyNode.getChildCount(); ++child_pos) { ASTNode node = (ASTNode) groupbyNode.getChild(child_pos); if (node.getToken().getType() == HiveParser.Number) { - if (isByPos) { + if (isGbyByPos) { int pos = Integer.parseInt(node.getText()); if (pos > 0 && pos <= selectExpCnt) { groupbyNode.setChild(child_pos, @@ -12299,7 +12299,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } } else { warn("Using constant number " + node.getText() + - " in group by. If you try to use position alias when hive.groupby.orderby.position.alias is false, the position alias will be ignored."); + " in group by. If you try to use position alias when hive.groupby.position.alias is false, the position alias will be ignored."); } } } @@ -12318,7 +12318,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { ASTNode colNode = (ASTNode) orderbyNode.getChild(child_pos).getChild(0); ASTNode node = (ASTNode) colNode.getChild(0); if (node != null && node.getToken().getType() == HiveParser.Number) { - if( isByPos ) { + if (isObyByPos) { if (!isAllCol) { int pos = Integer.parseInt(node.getText()); if (pos > 0 && pos <= selectExpCnt) { @@ -12335,7 +12335,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } } else { //if not using position alias and it is a number. warn("Using constant number " + node.getText() + - " in order by. If you try to use position alias when hive.groupby.orderby.position.alias is false, the position alias will be ignored."); + " in order by. If you try to use position alias when hive.orderby.position.alias is false, the position alias will be ignored."); } } } http://git-wip-us.apache.org/repos/asf/hive/blob/39a0d39e/ql/src/test/queries/clientpositive/bucket_groupby.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/bucket_groupby.q b/ql/src/test/queries/clientpositive/bucket_groupby.q index a36c79d..3cb6709 100644 --- a/ql/src/test/queries/clientpositive/bucket_groupby.q +++ b/ql/src/test/queries/clientpositive/bucket_groupby.q @@ -28,8 +28,8 @@ select abs(length(key)), count(1) from clustergroupby where ds='101' group by a --constant-- explain -select key, count(1) from clustergroupby where ds='101' group by key,3 order by key,3 limit 10; -select key, count(1) from clustergroupby where ds='101' group by key,3 order by key,3 limit 10; +select key, count(1) from clustergroupby where ds='101' group by key,'a' order by key,'a' limit 10; +select key, count(1) from clustergroupby where ds='101' group by key,'a' order by key,'a' limit 10; --subquery-- explain http://git-wip-us.apache.org/repos/asf/hive/blob/39a0d39e/ql/src/test/queries/clientpositive/decimal_stats.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/decimal_stats.q b/ql/src/test/queries/clientpositive/decimal_stats.q index 2370e7d..e00cf1f 100644 --- a/ql/src/test/queries/clientpositive/decimal_stats.q +++ b/ql/src/test/queries/clientpositive/decimal_stats.q @@ -12,5 +12,5 @@ analyze table decimal_1 compute statistics for columns; desc formatted decimal_1 v; -explain select * from decimal_1 order by 1 limit 100; +explain select * from decimal_1 order by t limit 100; drop table decimal_1; http://git-wip-us.apache.org/repos/asf/hive/blob/39a0d39e/ql/src/test/results/clientpositive/cp_sel.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/cp_sel.q.out b/ql/src/test/results/clientpositive/cp_sel.q.out index 6300578..1778ccd 100644 --- a/ql/src/test/results/clientpositive/cp_sel.q.out +++ b/ql/src/test/results/clientpositive/cp_sel.q.out @@ -20,13 +20,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: string), _col1 (type: string) + value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), 'hello' (type: string), 'world' (type: string) + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), 'hello' (type: string), 'world' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Limit @@ -58,7 +59,7 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 #### A masked pattern was here #### -238 val_238 hello world +0 val_0 hello world PREHOOK: query: create table testpartbucket (key string, value string) partitioned by (ds string, hr string) clustered by(key) sorted by(key) into 2 buckets PREHOOK: type: CREATETABLE PREHOOK: Output: database:default http://git-wip-us.apache.org/repos/asf/hive/blob/39a0d39e/ql/src/test/results/clientpositive/decimal_stats.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/decimal_stats.q.out b/ql/src/test/results/clientpositive/decimal_stats.q.out index 5af58fb..cb02f76 100644 --- a/ql/src/test/results/clientpositive/decimal_stats.q.out +++ b/ql/src/test/results/clientpositive/decimal_stats.q.out @@ -49,9 +49,9 @@ POSTHOOK: Input: default@decimal_1 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment v decimal(10,0) 500 1 from deserializer -PREHOOK: query: explain select * from decimal_1 order by 1 limit 100 +PREHOOK: query: explain select * from decimal_1 order by t limit 100 PREHOOK: type: QUERY -POSTHOOK: query: explain select * from decimal_1 order by 1 limit 100 +POSTHOOK: query: explain select * from decimal_1 order by t limit 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -69,13 +69,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 112112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: + key expressions: _col0 (type: decimal(4,2)) + sort order: + Statistics: Num rows: 500 Data size: 112112 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 - value expressions: _col0 (type: decimal(4,2)), _col1 (type: decimal(5,0)), _col2 (type: decimal(10,0)) + value expressions: _col1 (type: decimal(5,0)), _col2 (type: decimal(10,0)) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: decimal(4,2)), VALUE._col1 (type: decimal(5,0)), VALUE._col2 (type: decimal(10,0)) + expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: decimal(5,0)), VALUE._col1 (type: decimal(10,0)) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 112112 Basic stats: COMPLETE Column stats: COMPLETE Limit http://git-wip-us.apache.org/repos/asf/hive/blob/39a0d39e/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out b/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out index 2c3cf2f..cb7623e 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out @@ -433,10 +433,10 @@ POSTHOOK: Input: default@clustergroupby@ds=101 2 74 3 416 PREHOOK: query: explain -select key, count(1) from clustergroupby where ds='101' group by key,3 order by key,3 limit 10 +select key, count(1) from clustergroupby where ds='101' group by key,'a' order by key,'a' limit 10 PREHOOK: type: QUERY POSTHOOK: query: explain -select key, count(1) from clustergroupby where ds='101' group by key,3 order by key,3 limit 10 +select key, count(1) from clustergroupby where ds='101' group by key,'a' order by key,'a' limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -514,12 +514,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key,3 order by key,3 limit 10 +PREHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key,'a' order by key,'a' limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@clustergroupby PREHOOK: Input: default@clustergroupby@ds=101 #### A masked pattern was here #### -POSTHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key,3 order by key,3 limit 10 +POSTHOOK: query: select key, count(1) from clustergroupby where ds='101' group by key,'a' order by key,'a' limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@clustergroupby POSTHOOK: Input: default@clustergroupby@ds=101
