HIVE-9828: Semantic analyzer does not capture view parent entity for tables referred in view with union all (Prasad via Xuefu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3e713bcc Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3e713bcc Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3e713bcc Branch: refs/heads/beeline-cli Commit: 3e713bcc1f74c90aba1da654b63b85878ab23768 Parents: 809fcb0 Author: Xuefu Zhang <xzh...@cloudera.com> Authored: Sat May 9 02:32:13 2015 -0700 Committer: Xuefu Zhang <xzh...@cloudera.com> Committed: Sat May 9 02:32:13 2015 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 15 +-- .../apache/hadoop/hive/ql/plan/PlanUtils.java | 4 + .../hadoop/hive/ql/plan/TestViewEntity.java | 108 +++++++++++++++++++ 3 files changed, 121 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/3e713bcc/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index cbc5466..2993539 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -222,6 +222,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { public static final String DUMMY_DATABASE = "_dummy_database"; public static final String DUMMY_TABLE = "_dummy_table"; + public static final String SUBQUERY_TAG_1 = "-subquery1"; + public static final String SUBQUERY_TAG_2 = "-subquery2"; + // Max characters when auto generating the column name with func name private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20; @@ -429,16 +432,16 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { qbexpr.setOpcode(QBExpr.Opcode.UNION); // query 1 assert (ast.getChild(0) != null); - QBExpr qbexpr1 = new QBExpr(alias + "-subquery1"); - doPhase1QBExpr((ASTNode) ast.getChild(0), qbexpr1, id + "-subquery1", - alias + "-subquery1"); + QBExpr qbexpr1 = new QBExpr(alias + SUBQUERY_TAG_1); + doPhase1QBExpr((ASTNode) ast.getChild(0), qbexpr1, id + SUBQUERY_TAG_1, + alias + SUBQUERY_TAG_1); qbexpr.setQBExpr1(qbexpr1); // query 2 assert (ast.getChild(1) != null); - QBExpr qbexpr2 = new QBExpr(alias + "-subquery2"); - doPhase1QBExpr((ASTNode) ast.getChild(1), qbexpr2, id + "-subquery2", - alias + "-subquery2"); + QBExpr qbexpr2 = new QBExpr(alias + SUBQUERY_TAG_2); + doPhase1QBExpr((ASTNode) ast.getChild(1), qbexpr2, id + SUBQUERY_TAG_2, + alias + SUBQUERY_TAG_2); qbexpr.setQBExpr2(qbexpr2); } break; http://git-wip-us.apache.org/repos/asf/hive/blob/3e713bcc/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index 80e11a3..87a2548 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -53,6 +53,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler; import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory; import org.apache.hadoop.hive.ql.session.SessionState; @@ -975,6 +976,9 @@ public final class PlanUtils { // T's parent would be V1 for (int pos = 0; pos < aliases.length; pos++) { currentAlias = currentAlias == null ? aliases[pos] : currentAlias + ":" + aliases[pos]; + + currentAlias = currentAlias.replace(SemanticAnalyzer.SUBQUERY_TAG_1, "") + .replace(SemanticAnalyzer.SUBQUERY_TAG_2, ""); ReadEntity input = viewAliasToInput.get(currentAlias); if (input == null) { return currentInput; http://git-wip-us.apache.org/repos/asf/hive/blob/3e713bcc/ql/src/test/org/apache/hadoop/hive/ql/plan/TestViewEntity.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/plan/TestViewEntity.java b/ql/src/test/org/apache/hadoop/hive/ql/plan/TestViewEntity.java new file mode 100644 index 0000000..17a4e06 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/plan/TestViewEntity.java @@ -0,0 +1,108 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.plan; + +import static org.junit.Assert.*; + +import java.io.Serializable; +import java.util.List; +import java.util.Set; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.CommandNeedRetryException; +import org.apache.hadoop.hive.ql.Driver; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.hooks.ReadEntity; +import org.apache.hadoop.hive.ql.parse.AbstractSemanticAnalyzerHook; +import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.TestReadEntityDirect.CheckInputReadEntityDirect; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestViewEntity { + /** + * Hook used in the test to capture the set of ReadEntities + */ + public static class CheckInputReadEntity extends + AbstractSemanticAnalyzerHook { + public static ReadEntity[] readEntities; + + @Override + public void postAnalyze(HiveSemanticAnalyzerHookContext context, + List<Task<? extends Serializable>> rootTasks) throws SemanticException { + readEntities = context.getInputs().toArray(new ReadEntity[0]); + } + + } + + private static Driver driver; + + @BeforeClass + public static void onetimeSetup() throws Exception { + HiveConf conf = new HiveConf(Driver.class); + conf.setVar(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK, + CheckInputReadEntity.class.getName()); + HiveConf + .setBoolVar(conf, HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, false); + SessionState.start(conf); + driver = new Driver(conf); + driver.init(); + } + + @AfterClass + public static void cleanUp() throws Exception { + driver.close(); + driver.destroy(); + } + + /** + * Verify that the parent entities are captured correctly for union views + * @throws Exception + */ + @Test + public void testUnionView() throws Exception { + int ret = driver.run("create table t1(id int)").getResponseCode(); + assertEquals("Checking command success", 0, ret); + ret = driver.run("create table t2(id int)").getResponseCode(); + assertEquals("Checking command success", 0, ret); + ret = driver.run("create view v1 as select t.id from " + + "(select t1.id from t1 union all select t2.id from t2) as t") + .getResponseCode(); + assertEquals("Checking command success", 0, ret); + + driver.compile("select * from v1"); + // view entity + assertEquals("default@v1", CheckInputReadEntity.readEntities[0].getName()); + + // first table in union query with view as parent + assertEquals("default@t1", CheckInputReadEntity.readEntities[1].getName()); + assertEquals("default@v1", CheckInputReadEntity.readEntities[1] + .getParents() + .iterator().next().getName()); + // second table in union query with view as parent + assertEquals("default@t2", CheckInputReadEntity.readEntities[2].getName()); + assertEquals("default@v1", CheckInputReadEntity.readEntities[2] + .getParents() + .iterator().next().getName()); + + } + +}