HIVE-15874: Invalid position alias in Group By when CBO failed (Walter Wu, reviewed by Pengcheng Xiong)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d8ff8260 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d8ff8260 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d8ff8260 Branch: refs/heads/branch-2.2 Commit: d8ff82609357cb7e4a72bbc4ae8889ff67d41ef2 Parents: ea98f94 Author: Pengcheng Xiong <pxi...@apache.org> Authored: Mon Feb 20 12:33:24 2017 -0800 Committer: Owen O'Malley <omal...@apache.org> Committed: Tue Mar 28 14:02:46 2017 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/parse/CalcitePlanner.java | 2 + .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 8 +- ql/src/test/queries/clientpositive/masking_10.q | 25 ++ .../clientpositive/position_alias_test_1.q | 18 ++ .../results/clientpositive/masking_10.q.out | 244 +++++++++++++++++++ .../clientpositive/position_alias_test_1.q.out | 148 +++++++++++ 6 files changed, 443 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/d8ff8260/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 36fa3bd..7c6ebf1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -301,6 +301,8 @@ public class CalcitePlanner extends SemanticAnalyzer { public RelNode genLogicalPlan(ASTNode ast) throws SemanticException { LOG.info("Starting generating logical plan"); PreCboCtx cboCtx = new PreCboCtx(); + //change the location of position alias process here + processPositionAlias(ast); if (!genResolvedParseTree(ast, cboCtx)) { return null; } http://git-wip-us.apache.org/repos/asf/hive/blob/d8ff8260/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 6a66691..e71abc7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -10912,7 +10912,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { ctesExpanded = new ArrayList<String>(); // 1. analyze and process the position alias - processPositionAlias(ast); + // step processPositionAlias out of genResolvedParseTree // 2. analyze create table command if (ast.getToken().getType() == HiveParser.TOK_CREATETABLE) { @@ -11027,6 +11027,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { void analyzeInternal(ASTNode ast, PlannerContext plannerCtx) throws SemanticException { // 1. Generate Resolved Parse tree from syntax tree LOG.info("Starting Semantic Analysis"); + //change the location of position alias process here + processPositionAlias(ast); if (!genResolvedParseTree(ast, plannerCtx)) { return; } @@ -11040,6 +11042,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { if (tree != ast) { ctx.setSkipTableMasking(true); init(true); + //change the location of position alias process here + processPositionAlias(tree); genResolvedParseTree(tree, plannerCtx); if (this instanceof CalcitePlanner) { ((CalcitePlanner) this).resetCalciteConfiguration(); @@ -12197,7 +12201,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } // Process the position alias in GROUPBY and ORDERBY - private void processPositionAlias(ASTNode ast) throws SemanticException { + public void processPositionAlias(ASTNode ast) throws SemanticException { boolean isBothByPos = HiveConf.getBoolVar(conf, ConfVars.HIVE_GROUPBY_ORDERBY_POSITION_ALIAS); boolean isGbyByPos = isBothByPos || HiveConf.getBoolVar(conf, ConfVars.HIVE_GROUPBY_POSITION_ALIAS); http://git-wip-us.apache.org/repos/asf/hive/blob/d8ff8260/ql/src/test/queries/clientpositive/masking_10.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/masking_10.q b/ql/src/test/queries/clientpositive/masking_10.q new file mode 100644 index 0000000..e933253 --- /dev/null +++ b/ql/src/test/queries/clientpositive/masking_10.q @@ -0,0 +1,25 @@ +set hive.mapred.mode=nonstrict; +set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest; + +drop table masking_test; + +create temporary table masking_test as select cast(key as int) as key, value from src; + +set hive.groupby.position.alias = true; +set hive.cbo.enable=true; + +explain select 2017 as a, value from masking_test group by 1, 2; + +select 2017 as a, value from masking_test group by 1, 2; + +explain +select * from + masking_test alias01 + left join + ( + select 2017 as a, value from masking_test group by 1, 2 + ) alias02 + on alias01.key = alias02.a + left join + masking_test alias03 +on alias01.key = alias03.key; http://git-wip-us.apache.org/repos/asf/hive/blob/d8ff8260/ql/src/test/queries/clientpositive/position_alias_test_1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/position_alias_test_1.q b/ql/src/test/queries/clientpositive/position_alias_test_1.q new file mode 100644 index 0000000..599bc08 --- /dev/null +++ b/ql/src/test/queries/clientpositive/position_alias_test_1.q @@ -0,0 +1,18 @@ +create table alias_test_01(a INT, b STRING) ; + create table alias_test_02(a INT, b STRING) ; + create table alias_test_03(a INT, b STRING) ; + set hive.groupby.position.alias = true; + set hive.cbo.enable=true; + + + explain + select * from + alias_test_01 alias01 + left join + ( + select 2017 as a, b from alias_test_02 group by 1, 2 + ) alias02 + on alias01.a = alias02.a + left join + alias_test_03 alias03 + on alias01.a = alias03.a; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/d8ff8260/ql/src/test/results/clientpositive/masking_10.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/masking_10.q.out b/ql/src/test/results/clientpositive/masking_10.q.out new file mode 100644 index 0000000..d6293e3 --- /dev/null +++ b/ql/src/test/results/clientpositive/masking_10.q.out @@ -0,0 +1,244 @@ +PREHOOK: query: drop table masking_test +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table masking_test +POSTHOOK: type: DROPTABLE +PREHOOK: query: create temporary table masking_test as select cast(key as int) as key, value from src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@masking_test +POSTHOOK: query: create temporary table masking_test as select cast(key as int) as key, value from src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@masking_test +PREHOOK: query: explain select 2017 as a, value from masking_test group by 1, 2 +PREHOOK: type: QUERY +POSTHOOK: query: explain select 2017 as a, value from masking_test group by 1, 2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key % 2) = 0) and (key < 10)) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: reverse(value) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 2017 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select 2017 as a, value from masking_test group by 1, 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@masking_test +#### A masked pattern was here #### +POSTHOOK: query: select 2017 as a, value from masking_test group by 1, 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@masking_test +#### A masked pattern was here #### +2017 0_lav +2017 2_lav +2017 4_lav +2017 8_lav +Warning: Shuffle Join JOIN[34][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain +select * from + masking_test alias01 + left join + ( + select 2017 as a, value from masking_test group by 1, 2 + ) alias02 + on alias01.key = alias02.a + left join + masking_test alias03 +on alias01.key = alias03.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from + masking_test alias01 + left join + ( + select 2017 as a, value from masking_test group by 1, 2 + ) alias02 + on alias01.key = alias02.a + left join + masking_test alias03 +on alias01.key = alias03.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key % 2) = 0) and (key < 10)) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: reverse(value) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 2017 (type: int), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: masking_test + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key % 2) = 0) and (key < 10)) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), reverse(value) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 41 Data size: 435 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + filter predicates: + 0 {(VALUE._col0 = 2017)} + 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 3403 Data size: 75629 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3403 Data size: 75629 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) + TableScan + alias: masking_test + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key % 2) = 0) and (key < 10)) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), reverse(value) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 3743 Data size: 83191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3743 Data size: 83191 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + http://git-wip-us.apache.org/repos/asf/hive/blob/d8ff8260/ql/src/test/results/clientpositive/position_alias_test_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/position_alias_test_1.q.out b/ql/src/test/results/clientpositive/position_alias_test_1.q.out new file mode 100644 index 0000000..9053bf1 --- /dev/null +++ b/ql/src/test/results/clientpositive/position_alias_test_1.q.out @@ -0,0 +1,148 @@ +PREHOOK: query: create table alias_test_01(a INT, b STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@alias_test_01 +POSTHOOK: query: create table alias_test_01(a INT, b STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alias_test_01 +PREHOOK: query: create table alias_test_02(a INT, b STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@alias_test_02 +POSTHOOK: query: create table alias_test_02(a INT, b STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alias_test_02 +PREHOOK: query: create table alias_test_03(a INT, b STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@alias_test_03 +POSTHOOK: query: create table alias_test_03(a INT, b STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@alias_test_03 +PREHOOK: query: explain + select * from + alias_test_01 alias01 + left join + ( + select 2017 as a, b from alias_test_02 group by 1, 2 + ) alias02 + on alias01.a = alias02.a + left join + alias_test_03 alias03 + on alias01.a = alias03.a +PREHOOK: type: QUERY +POSTHOOK: query: explain + select * from + alias_test_01 alias01 + left join + ( + select 2017 as a, b from alias_test_02 group by 1, 2 + ) alias02 + on alias01.a = alias02.a + left join + alias_test_03 alias03 + on alias01.a = alias03.a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alias_test_02 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: b (type: string) + outputColumnNames: b + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + keys: 2017 (type: int), b (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: 2017 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: 2017 (type: int), _col1 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: 2017 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: 2017 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: alias01 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: b (type: string) + TableScan + alias: alias03 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: b (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + Left Outer Join0 to 2 + keys: + 0 a (type: int) + 1 _col0 (type: int) + 2 a (type: int) + outputColumnNames: _col0, _col1, _col5, _col6, _col7, _col8 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +