This is an automated email from the ASF dual-hosted git repository. mbod pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new cceee0a HIVE-25754: Fix column projection for union all queries with multiple aliases (Marton Bod, reviewed by Krisztian Kasa) cceee0a is described below commit cceee0a61a75274520178cd31cad26e3b1a25b12 Author: Marton Bod <m...@cloudera.com> AuthorDate: Thu Dec 2 09:53:32 2021 +0100 HIVE-25754: Fix column projection for union all queries with multiple aliases (Marton Bod, reviewed by Krisztian Kasa) --- .../apache/hadoop/hive/ql/parse/ParseUtils.java | 96 ++++++++++++---------- ql/src/test/queries/clientpositive/udf_explode.q | 6 ++ .../results/clientpositive/llap/udf_explode.q.out | 52 ++++++++++++ 3 files changed, 109 insertions(+), 45 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java index eb46c22..e7c15ff 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java @@ -445,7 +445,7 @@ public final class ParseUtils { // Repeat the procedure for the new select. } - // Found the proper columns. + // Find the proper columns. List<ASTNode> newChildren = new ArrayList<>(select.getChildCount()); HashSet<String> aliases = new HashSet<>(); for (int i = 0; i < select.getChildCount(); ++i) { @@ -453,51 +453,57 @@ public final class ParseUtils { if (selExpr.getType() == HiveParser.QUERY_HINT) continue; assert selExpr.getType() == HiveParser.TOK_SELEXPR; assert selExpr.getChildCount() > 0; - // Examine the last child. It could be an alias. - Tree child = selExpr.getChild(selExpr.getChildCount() - 1); - switch (child.getType()) { - case HiveParser.TOK_SETCOLREF: - // We have a nested setcolref. Process that and start from scratch TODO: use stack? - processSetColsNode((ASTNode)child, searcher, ctx); - processSetColsNode(setCols, searcher, ctx); - return; - case HiveParser.TOK_ALLCOLREF: - // We should find an alias of this insert and do (alias).*. This however won't fix e.g. - // positional order by alias case, cause we'd still have a star on the top level. Bail. - LOG.debug("Replacing SETCOLREF with ALLCOLREF because of nested ALLCOLREF"); - setCols.token.setType(HiveParser.TOK_ALLCOLREF); - return; - case HiveParser.TOK_TABLE_OR_COL: - Tree idChild = child.getChild(0); - assert idChild.getType() == HiveParser.Identifier : idChild; - if (!createChildColumnRef(idChild, alias, newChildren, aliases, ctx)) { - setCols.token.setType(HiveParser.TOK_ALLCOLREF); - return; - } - break; - case HiveParser.Identifier: - if (!createChildColumnRef(child, alias, newChildren, aliases, ctx)) { - setCols.token.setType(HiveParser.TOK_ALLCOLREF); - return; - } - break; - case HiveParser.DOT: { - Tree colChild = child.getChild(child.getChildCount() - 1); - assert colChild.getType() == HiveParser.Identifier : colChild; - if (!createChildColumnRef(colChild, alias, newChildren, aliases, ctx)) { - setCols.token.setType(HiveParser.TOK_ALLCOLREF); - return; + // we can have functions which generate multiple aliases (e.g. explode(map(x, y)) as (key, val)) + boolean isFunctionWithMultipleParameters = + selExpr.getChild(0).getType() == HiveParser.TOK_FUNCTION && selExpr.getChildCount() > 2; + // if so let's skip the function token buth then examine all its parameters - otherwise check only the last item + int start = isFunctionWithMultipleParameters ? 1 : selExpr.getChildCount() - 1; + for (int j = start; j < selExpr.getChildCount(); ++j) { + Tree child = selExpr.getChild(j); + switch (child.getType()) { + case HiveParser.TOK_SETCOLREF: + // We have a nested setcolref. Process that and start from scratch TODO: use stack? + processSetColsNode((ASTNode) child, searcher, ctx); + processSetColsNode(setCols, searcher, ctx); + return; + case HiveParser.TOK_ALLCOLREF: + // We should find an alias of this insert and do (alias).*. This however won't fix e.g. + // positional order by alias case, cause we'd still have a star on the top level. Bail. + LOG.debug("Replacing SETCOLREF with ALLCOLREF because of nested ALLCOLREF"); + setCols.token.setType(HiveParser.TOK_ALLCOLREF); + return; + case HiveParser.TOK_TABLE_OR_COL: + Tree idChild = child.getChild(0); + assert idChild.getType() == HiveParser.Identifier : idChild; + if (!createChildColumnRef(idChild, alias, newChildren, aliases, ctx)) { + setCols.token.setType(HiveParser.TOK_ALLCOLREF); + return; + } + break; + case HiveParser.Identifier: + if (!createChildColumnRef(child, alias, newChildren, aliases, ctx)) { + setCols.token.setType(HiveParser.TOK_ALLCOLREF); + return; + } + break; + case HiveParser.DOT: { + Tree colChild = child.getChild(child.getChildCount() - 1); + assert colChild.getType() == HiveParser.Identifier : colChild; + if (!createChildColumnRef(colChild, alias, newChildren, aliases, ctx)) { + setCols.token.setType(HiveParser.TOK_ALLCOLREF); + return; + } + break; + } + default: + // Not really sure how to refer to this (or if we can). + // TODO: We could find a different from branch for the union, that might have an alias? + // Or we could add an alias here to refer to, but that might break other branches. + LOG.debug("Replacing SETCOLREF with ALLCOLREF because of the nested node " + + child.getType() + " " + child.getText()); + setCols.token.setType(HiveParser.TOK_ALLCOLREF); + return; } - break; - } - default: - // Not really sure how to refer to this (or if we can). - // TODO: We could find a different from branch for the union, that might have an alias? - // Or we could add an alias here to refer to, but that might break other branches. - LOG.debug("Replacing SETCOLREF with ALLCOLREF because of the nested node " - + child.getType() + " " + child.getText()); - setCols.token.setType(HiveParser.TOK_ALLCOLREF); - return; } } // Insert search in the beginning would have failed if these parents didn't exist. diff --git a/ql/src/test/queries/clientpositive/udf_explode.q b/ql/src/test/queries/clientpositive/udf_explode.q index 7825eca..2c017fe 100644 --- a/ql/src/test/queries/clientpositive/udf_explode.q +++ b/ql/src/test/queries/clientpositive/udf_explode.q @@ -23,3 +23,9 @@ INSERT OVERWRITE TABLE lazy_array_map select map(1, 'one', 2, 'two', 3, 'three') SELECT array_col, myCol FROM lazy_array_map lateral view explode(array_col) X AS myCol ORDER BY array_col, myCol; SELECT map_col, myKey, myValue FROM lazy_array_map lateral view explode(map_col) X AS myKey, myValue ORDER BY map_col, myKey, myValue; + +create table source1 (dt string, d1 int, d2 int) stored as orc; +create table source2 (dt string, d1 int, d2 int) stored as orc; +insert into source1 values ('20211107', 1, 2); +insert into source2 values ('20211108', 11, 22); +select explode(map('D219', d1,'D220', d2)) as (keyx, valuex) from source1 union all select explode(map('D221', d1,'D222', d2)) as (keyy, valuey) from source2; diff --git a/ql/src/test/results/clientpositive/llap/udf_explode.q.out b/ql/src/test/results/clientpositive/llap/udf_explode.q.out index 8cfdbe2..2577e2e 100644 --- a/ql/src/test/results/clientpositive/llap/udf_explode.q.out +++ b/ql/src/test/results/clientpositive/llap/udf_explode.q.out @@ -653,3 +653,55 @@ POSTHOOK: Input: default@lazy_array_map {1:"one",2:"two",3:"three"} 1 one {1:"one",2:"two",3:"three"} 2 two {1:"one",2:"two",3:"three"} 3 three +PREHOOK: query: create table source1 (dt string, d1 int, d2 int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@source1 +POSTHOOK: query: create table source1 (dt string, d1 int, d2 int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@source1 +PREHOOK: query: create table source2 (dt string, d1 int, d2 int) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@source2 +POSTHOOK: query: create table source2 (dt string, d1 int, d2 int) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@source2 +PREHOOK: query: insert into source1 values ('20211107', 1, 2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@source1 +POSTHOOK: query: insert into source1 values ('20211107', 1, 2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@source1 +POSTHOOK: Lineage: source1.d1 SCRIPT [] +POSTHOOK: Lineage: source1.d2 SCRIPT [] +POSTHOOK: Lineage: source1.dt SCRIPT [] +PREHOOK: query: insert into source2 values ('20211108', 11, 22) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@source2 +POSTHOOK: query: insert into source2 values ('20211108', 11, 22) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@source2 +POSTHOOK: Lineage: source2.d1 SCRIPT [] +POSTHOOK: Lineage: source2.d2 SCRIPT [] +POSTHOOK: Lineage: source2.dt SCRIPT [] +PREHOOK: query: select explode(map('D219', d1,'D220', d2)) as (keyx, valuex) from source1 union all select explode(map('D221', d1,'D222', d2)) as (keyy, valuey) from source2 +PREHOOK: type: QUERY +PREHOOK: Input: default@source1 +PREHOOK: Input: default@source2 +#### A masked pattern was here #### +POSTHOOK: query: select explode(map('D219', d1,'D220', d2)) as (keyx, valuex) from source1 union all select explode(map('D221', d1,'D222', d2)) as (keyy, valuey) from source2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@source1 +POSTHOOK: Input: default@source2 +#### A masked pattern was here #### +D219 1 +D220 2 +D221 11 +D222 22