HIVE-15932: Add support for: "explain ast" (Igor Kryvenko via Zoltan Haindrich)
Signed-off-by: Zoltan Haindrich <k...@rxd.hu> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/154ca3e3 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/154ca3e3 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/154ca3e3 Branch: refs/heads/master Commit: 154ca3e3b5eb78cd49a4b3650c750ca731fba7da Parents: df8af55 Author: Igor Kryvenko <kryvenko7i...@gmail.com> Authored: Wed Sep 5 10:00:56 2018 +0200 Committer: Zoltan Haindrich <k...@rxd.hu> Committed: Wed Sep 5 10:01:04 2018 +0200 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/exec/ExplainTask.java | 5 + .../apache/hadoop/hive/ql/hooks/ATSHook.java | 1 + .../hive/ql/hooks/HiveProtoLoggingHook.java | 2 +- .../hive/ql/parse/ExplainConfiguration.java | 9 ++ .../hive/ql/parse/ExplainSemanticAnalyzer.java | 3 + .../org/apache/hadoop/hive/ql/parse/HiveLexer.g | 1 + .../apache/hadoop/hive/ql/parse/HiveParser.g | 2 + .../hadoop/hive/ql/parse/IdentifiersParser.g | 2 +- .../apache/hadoop/hive/ql/plan/ExplainWork.java | 22 ++- .../parse/TestUpdateDeleteSemanticAnalyzer.java | 2 +- .../test/queries/clientpositive/explain_ast.q | 16 ++ .../results/clientpositive/explain_ast.q.out | 159 +++++++++++++++++++ 12 files changed, 220 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/154ca3e3/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index da20d2e..65ec4b2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -402,6 +402,11 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable { if(work.isFormatted()) { out.print(jsonLocks); } + } else if (work.isAst()) { + // Print out the parse AST + if (work.getAstStringTree() != null) { + outputAST(work.getAstStringTree(), out, work.isFormatted(), 0); + } } else { if (work.isUserLevelExplain()) { // Because of the implementation of the JsonParserFactory, we are sure http://git-wip-us.apache.org/repos/asf/hive/blob/154ca3e3/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java index fa69f13..92fcfec 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/ATSHook.java @@ -264,6 +264,7 @@ public class ATSHook implements ExecuteWithHookContext { null,// pCtx plan.getRootTasks(),// RootTasks plan.getFetchTask(),// FetchTask + null, null,// analyzer config, //explainConfig null, // cboInfo http://git-wip-us.apache.org/repos/asf/hive/blob/154ca3e3/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java index 8f7c53f..673c858 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java @@ -480,7 +480,7 @@ public class HiveProtoLoggingHook implements ExecuteWithHookContext { null, // pCtx plan.getRootTasks(), // RootTasks plan.getFetchTask(), // FetchTask - null, // analyzer + null, null, // analyzer config, // explainConfig plan.getCboInfo(), // cboInfo, plan.getOptimizedQueryString() http://git-wip-us.apache.org/repos/asf/hive/blob/154ca3e3/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java index 5ca6b59..2f12bda 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java @@ -48,6 +48,7 @@ public class ExplainConfiguration { private boolean vectorizationOnly = false; private VectorizationDetailLevel vectorizationDetailLevel = VectorizationDetailLevel.SUMMARY; private boolean locks = false; + private boolean ast = false; private Path explainRootPath; private Map<String, Long> opIdToRuntimeNumRows; @@ -161,4 +162,12 @@ public class ExplainConfiguration { public void setLocks(boolean locks) { this.locks = locks; } + + public boolean isAst() { + return ast; + } + + public void setAst(boolean ast) { + this.ast = ast; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/154ca3e3/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java index 3aefb61..b17efff 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java @@ -114,6 +114,8 @@ public class ExplainSemanticAnalyzer extends BaseSemanticAnalyzer { } } else if (explainOptions == HiveParser.KW_LOCKS) { config.setLocks(true); + } else if (explainOptions == HiveParser.KW_AST){ + config.setAst(true); } else { // UNDONE: UNKNOWN OPTION? } @@ -206,6 +208,7 @@ public class ExplainSemanticAnalyzer extends BaseSemanticAnalyzer { pCtx, tasks, fetchTask, + input, sem, config, ctx.getCboInfo(), http://git-wip-us.apache.org/repos/asf/hive/blob/154ca3e3/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 6641e0d..807f6f5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -377,6 +377,7 @@ KW_ACTIVE: 'ACTIVE'; KW_UNMANAGED: 'UNMANAGED'; KW_APPLICATION: 'APPLICATION'; KW_SYNC: 'SYNC'; +KW_AST: 'AST'; // Operators // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. http://git-wip-us.apache.org/repos/asf/hive/blob/154ca3e3/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 15d4edf..98471a7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -616,6 +616,7 @@ import org.apache.hadoop.hive.conf.HiveConf; xlateMap.put("KW_ALLOC_FRACTION", "ALLOC_FRACTION"); xlateMap.put("KW_SCHEDULING_POLICY", "SCHEDULING_POLICY"); xlateMap.put("KW_PATH", "PATH"); + xlateMap.put("KW_AST", "AST"); // Operators xlateMap.put("DOT", "."); @@ -801,6 +802,7 @@ explainOption | KW_ANALYZE | KW_REOPTIMIZATION | KW_LOCKS + | KW_AST | (KW_VECTORIZATION vectorizationOnly? vectorizatonDetail?) ; http://git-wip-us.apache.org/repos/asf/hive/blob/154ca3e3/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 2566b31..f327a12 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -807,7 +807,7 @@ nonReserved | KW_PRINCIPALS | KW_PROTECTION | KW_PURGE | KW_QUERY | KW_QUARTER | KW_READ | KW_READONLY | KW_REBUILD | KW_RECORDREADER | KW_RECORDWRITER | KW_RELOAD | KW_RENAME | KW_REPAIR | KW_REPLACE | KW_REPLICATION | KW_RESTRICT | KW_REWRITE | KW_ROLE | KW_ROLES | KW_SCHEMA | KW_SCHEMAS | KW_SECOND | KW_SEMI | KW_SERDE | KW_SERDEPROPERTIES | KW_SERVER | KW_SETS | KW_SHARED - | KW_SHOW | KW_SHOW_DATABASE | KW_SKEWED | KW_SORT | KW_SORTED | KW_SSL | KW_STATISTICS | KW_STORED + | KW_SHOW | KW_SHOW_DATABASE | KW_SKEWED | KW_SORT | KW_SORTED | KW_SSL | KW_STATISTICS | KW_STORED | KW_AST | KW_STREAMTABLE | KW_STRING | KW_STRUCT | KW_TABLES | KW_TBLPROPERTIES | KW_TEMPORARY | KW_TERMINATED | KW_TINYINT | KW_TOUCH | KW_TRANSACTIONS | KW_UNARCHIVE | KW_UNDO | KW_UNIONTYPE | KW_UNLOCK | KW_UNSET | KW_UNSIGNED | KW_URI | KW_USE | KW_UTC | KW_UTCTIMESTAMP | KW_VALUE_TYPE | KW_VIEW | KW_WEEK | KW_WHILE | KW_YEAR http://git-wip-us.apache.org/repos/asf/hive/blob/154ca3e3/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java index 3e62142..0654f57 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java @@ -27,6 +27,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; +import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration; import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.VectorizationDetailLevel; @@ -42,6 +43,8 @@ public class ExplainWork implements Serializable { private Path resFile; private ArrayList<Task<?>> rootTasks; private Task<?> fetchTask; + private ASTNode astTree; + private String astStringTree; private HashSet<ReadEntity> inputs; private HashSet<WriteEntity> outputs; private ParseContext pCtx; @@ -63,6 +66,7 @@ public class ExplainWork implements Serializable { ParseContext pCtx, List<Task<?>> rootTasks, Task<?> fetchTask, + ASTNode astTree, BaseSemanticAnalyzer analyzer, ExplainConfiguration config, String cboInfo, @@ -70,6 +74,9 @@ public class ExplainWork implements Serializable { this.resFile = resFile; this.rootTasks = new ArrayList<Task<?>>(rootTasks); this.fetchTask = fetchTask; + if(astTree != null) { + this.astTree = astTree; + } this.analyzer = analyzer; if (analyzer != null) { this.inputs = analyzer.getInputs(); @@ -123,6 +130,17 @@ public class ExplainWork implements Serializable { this.outputs = outputs; } + public ASTNode getAstTree() { + return astTree; + } + + public String getAstStringTree() { + if (astStringTree == null) { + astStringTree = astTree.dump(); + } + return astStringTree; + } + public boolean getExtended() { return config.isExtended(); } @@ -206,5 +224,7 @@ public class ExplainWork implements Serializable { public boolean isLocks() { return config.isLocks(); } - + public boolean isAst() { + return config.isAst(); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/154ca3e3/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java index ec77b73..932f4e8 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java @@ -300,7 +300,7 @@ public class TestUpdateDeleteSemanticAnalyzer { ExplainConfiguration config = new ExplainConfiguration(); config.setExtended(true); ExplainWork work = new ExplainWork(tmp, sem.getParseContext(), sem.getRootTasks(), - sem.getFetchTask(), sem, config, null, plan.getOptimizedQueryString()); + sem.getFetchTask(), null, sem, config, null, plan.getOptimizedQueryString()); ExplainTask task = new ExplainTask(); task.setWork(work); task.initialize(queryState, plan, null, null); http://git-wip-us.apache.org/repos/asf/hive/blob/154ca3e3/ql/src/test/queries/clientpositive/explain_ast.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/explain_ast.q b/ql/src/test/queries/clientpositive/explain_ast.q new file mode 100644 index 0000000..e2d9fd0 --- /dev/null +++ b/ql/src/test/queries/clientpositive/explain_ast.q @@ -0,0 +1,16 @@ +--! qt:dataset:src1 +--! qt:dataset:src +set hive.mapred.mode=nonstrict; +set hive.optimize.ppd=true; +set hive.ppd.remove.duplicatefilters=false; + +-- SORT_QUERY_RESULTS + +EXPLAIN FORMATTED AST +SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key > '1' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key > '2' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +WHERE src1.c1 > '20' and (src1.c2 < 'val_50' or src1.c1 > '2') and (src2.c3 > '50' or src1.c1 < '50') and (src2.c3 <> '4'); \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hive/blob/154ca3e3/ql/src/test/results/clientpositive/explain_ast.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/explain_ast.q.out b/ql/src/test/results/clientpositive/explain_ast.q.out new file mode 100644 index 0000000..75720eb --- /dev/null +++ b/ql/src/test/results/clientpositive/explain_ast.q.out @@ -0,0 +1,159 @@ +PREHOOK: query: EXPLAIN FORMATTED AST +SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key > '1' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key > '2' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +WHERE src1.c1 > '20' and (src1.c2 < 'val_50' or src1.c1 > '2') and (src2.c3 > '50' or src1.c1 < '50') and (src2.c3 <> '4') +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FORMATTED AST +SELECT src1.c1, src2.c4 +FROM +(SELECT src.key as c1, src.value as c2 from src where src.key > '1' ) src1 +JOIN +(SELECT src.key as c3, src.value as c4 from src where src.key > '2' ) src2 +ON src1.c1 = src2.c3 AND src1.c1 < '400' +WHERE src1.c1 > '20' and (src1.c2 < 'val_50' or src1.c1 > '2') and (src2.c3 > '50' or src1.c1 < '50') and (src2.c3 <> '4') +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + src + key + c1 + TOK_SELEXPR + . + TOK_TABLE_OR_COL + src + value + c2 + TOK_WHERE + > + . + TOK_TABLE_OR_COL + src + key + '1' + src1 + TOK_SUBQUERY + TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + src + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + src + key + c3 + TOK_SELEXPR + . + TOK_TABLE_OR_COL + src + value + c4 + TOK_WHERE + > + . + TOK_TABLE_OR_COL + src + key + '2' + src2 + AND + = + . + TOK_TABLE_OR_COL + src1 + c1 + . + TOK_TABLE_OR_COL + src2 + c3 + < + . + TOK_TABLE_OR_COL + src1 + c1 + '400' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + . + TOK_TABLE_OR_COL + src1 + c1 + TOK_SELEXPR + . + TOK_TABLE_OR_COL + src2 + c4 + TOK_WHERE + and + and + and + > + . + TOK_TABLE_OR_COL + src1 + c1 + '20' + or + < + . + TOK_TABLE_OR_COL + src1 + c2 + 'val_50' + > + . + TOK_TABLE_OR_COL + src1 + c1 + '2' + or + > + . + TOK_TABLE_OR_COL + src2 + c3 + '50' + < + . + TOK_TABLE_OR_COL + src1 + c1 + '50' + <> + . + TOK_TABLE_OR_COL + src2 + c3 + '4' +