Repository: hive Updated Branches: refs/heads/master 727e4b2d2 -> 778c47ccd
HIVE-20552: Get Schema from LogicalPlan faster (Teddy Choi, reviewed by Jesus Camacho Rodriguez) Signed-off-by: Teddy Choi <pudi...@gmail.com> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/778c47cc Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/778c47cc Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/778c47cc Branch: refs/heads/master Commit: 778c47ccdffa138f4738aadc94a75b75b5055fe1 Parents: 727e4b2 Author: Teddy Choi <pudi...@gmail.com> Authored: Fri Sep 28 11:28:39 2018 +0900 Committer: Teddy Choi <pudi...@gmail.com> Committed: Fri Sep 28 11:28:39 2018 +0900 ---------------------------------------------------------------------- .../metadata/HiveMaterializedViewsRegistry.java | 31 ++++---------------- .../apache/hadoop/hive/ql/parse/ParseUtils.java | 28 ++++++++++++++++++ .../ql/udf/generic/GenericUDTFGetSplits.java | 29 ++++++++++++------ 3 files changed, 53 insertions(+), 35 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/778c47cc/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java index 7d11cac..ee405ca 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMaterializedViewsRegistry.java @@ -52,8 +52,6 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; -import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; @@ -61,11 +59,8 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; -import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.CalcitePlanner; -import org.apache.hadoop.hive.ql.parse.ColumnStatsList; import org.apache.hadoop.hive.ql.parse.ParseUtils; -import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.RowResolver; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde2.SerDeException; @@ -227,10 +222,12 @@ public final class HiveMaterializedViewsRegistry { " ignored; error creating view replacement"); return null; } - final RelNode queryRel = parseQuery(conf, viewQuery); - if (queryRel == null) { + final RelNode queryRel; + try { + queryRel = ParseUtils.parseQuery(conf, viewQuery); + } catch (Exception e) { LOG.warn("Materialized view " + materializedViewTable.getCompleteName() + - " ignored; error parsing original query"); + " ignored; error parsing original query; " + e); return null; } @@ -403,24 +400,6 @@ public final class HiveMaterializedViewsRegistry { return tableRel; } - private static RelNode parseQuery(HiveConf conf, String viewQuery) { - try { - final ASTNode node = ParseUtils.parse(viewQuery); - final QueryState qs = - new QueryState.Builder().withHiveConf(conf).build(); - CalcitePlanner analyzer = new CalcitePlanner(qs); - Context ctx = new Context(conf); - ctx.setIsLoadingMaterializedView(true); - analyzer.initCtx(ctx); - analyzer.init(false); - return analyzer.genLogicalPlan(node); - } catch (Exception e) { - // We could not parse the view - LOG.error("Error parsing original query for materialized view", e); - return null; - } - } - private static TableType obtainTableType(Table tabMetaData) { if (tabMetaData.getStorageHandler() != null) { final String storageHandlerStr = tabMetaData.getStorageHandler().toString(); http://git-wip-us.apache.org/repos/asf/hive/blob/778c47cc/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java index 89e8412..be1c59f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hive.ql.parse; import com.google.common.base.Preconditions; + +import java.io.IOException; import java.lang.reflect.Field; import java.lang.reflect.Modifier; import java.util.ArrayDeque; @@ -31,10 +33,13 @@ import java.util.Set; import java.util.Stack; import org.antlr.runtime.tree.CommonTree; import org.antlr.runtime.tree.Tree; +import org.apache.calcite.rel.RelNode; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.QueryState; import org.apache.hadoop.hive.ql.exec.PTFUtils; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.lib.Node; @@ -537,4 +542,27 @@ public final class ParseUtils { } return sb.toString(); } + + public static RelNode parseQuery(HiveConf conf, String viewQuery) + throws SemanticException, IOException, ParseException { + return getAnalyzer(conf).genLogicalPlan(parse(viewQuery)); + } + + public static List<FieldSchema> parseQueryAndGetSchema(HiveConf conf, String viewQuery) + throws SemanticException, IOException, ParseException { + final CalcitePlanner analyzer = getAnalyzer(conf); + analyzer.genLogicalPlan(parse(viewQuery)); + return analyzer.getResultSchema(); + } + + private static CalcitePlanner getAnalyzer(HiveConf conf) throws SemanticException, IOException { + final QueryState qs = + new QueryState.Builder().withHiveConf(conf).build(); + CalcitePlanner analyzer = new CalcitePlanner(qs); + Context ctx = new Context(conf); + ctx.setIsLoadingMaterializedView(true); + analyzer.initCtx(ctx); + analyzer.init(false); + return analyzer; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/778c47cc/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java index a29b560..30673ce 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java @@ -74,6 +74,8 @@ import org.apache.hadoop.hive.ql.exec.tez.TezTask; import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; import org.apache.hadoop.hive.ql.lockmgr.TxnManagerFactory; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.ParseException; +import org.apache.hadoop.hive.ql.parse.ParseUtils; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.TezWork; @@ -246,6 +248,17 @@ public class GenericUDTFGetSplits extends GenericUDTF { // hive compiler is going to remove inner order by. disable that optimization until then. HiveConf.setBoolVar(conf, ConfVars.HIVE_REMOVE_ORDERBY_IN_SUBQUERY, false); + if(num == 0) { + //Schema only + try { + List<FieldSchema> fieldSchemas = ParseUtils.parseQueryAndGetSchema(conf, query); + Schema schema = new Schema(convertSchema(fieldSchemas)); + return new PlanFragment(null, schema, null); + } catch (IOException | ParseException e) { + throw new HiveException(e); + } + } + try { jc = DagUtils.getInstance().createConfiguration(conf); } catch (IOException e) { @@ -274,10 +287,6 @@ public class GenericUDTFGetSplits extends GenericUDTF { HiveConf.getBoolVar(conf, ConfVars.LLAP_EXTERNAL_SPLITS_ORDER_BY_FORCE_SINGLE_SPLIT); List<Task<?>> roots = plan.getRootTasks(); Schema schema = convertSchema(plan.getResultSchema()); - if(num == 0) { - //Schema only - return new PlanFragment(null, schema, null); - } boolean fetchTask = plan.getFetchTask() != null; TezWork tezWork; if (roots == null || roots.size() != 1 || !(roots.get(0) instanceof TezTask)) { @@ -665,16 +674,18 @@ public class GenericUDTFGetSplits extends GenericUDTF { } } - private Schema convertSchema(Object obj) throws HiveException { - org.apache.hadoop.hive.metastore.api.Schema schema = (org.apache.hadoop.hive.metastore.api.Schema) obj; + private List<FieldDesc> convertSchema(List<FieldSchema> fieldSchemas) { List<FieldDesc> colDescs = new ArrayList<FieldDesc>(); - for (FieldSchema fs : schema.getFieldSchemas()) { + for (FieldSchema fs : fieldSchemas) { String colName = fs.getName(); String typeString = fs.getType(); colDescs.add(new FieldDesc(colName, TypeInfoUtils.getTypeInfoFromTypeString(typeString))); } - Schema Schema = new Schema(colDescs); - return Schema; + return colDescs; + } + + private Schema convertSchema(org.apache.hadoop.hive.metastore.api.Schema schema) { + return new Schema(convertSchema(schema.getFieldSchemas())); } private String getTempTableStorageFormatString(HiveConf conf) {