This is an automated email from the ASF dual-hosted git repository. dmollitor pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 83f917c HIVE-23171: Create Tool To Visualize Hive Parser Tree (David Mollitor, reviewed by Miklos Gergely) 83f917c is described below commit 83f917c787d60543f171b23d28ceda44d69c235d Author: David Mollitor <dmolli...@apache.org> AuthorDate: Thu Apr 16 10:32:39 2020 -0400 HIVE-23171: Create Tool To Visualize Hive Parser Tree (David Mollitor, reviewed by Miklos Gergely) --- parser/pom.xml | 7 +- .../org/apache/hadoop/hive/ql/parse/HqlParser.java | 145 +++++++++++++++++++++ 2 files changed, 148 insertions(+), 4 deletions(-) diff --git a/parser/pom.xml b/parser/pom.xml index 05fd78d..18e0ad8 100644 --- a/parser/pom.xml +++ b/parser/pom.xml @@ -49,14 +49,13 @@ <dependency> <groupId>org.antlr</groupId> <artifactId>antlr-runtime</artifactId> - <version>${antlr.version}</version> </dependency> <dependency> <groupId>org.antlr</groupId> - <artifactId>ST4</artifactId> - <version>${ST4.version}</version> + <artifactId>stringtemplate</artifactId> + <version>3.2.1</version> + <scope>test</scope> </dependency> - <!-- test inter-project --> </dependencies> <build> diff --git a/parser/src/test/org/apache/hadoop/hive/ql/parse/HqlParser.java b/parser/src/test/org/apache/hadoop/hive/ql/parse/HqlParser.java new file mode 100644 index 0000000..e74172c --- /dev/null +++ b/parser/src/test/org/apache/hadoop/hive/ql/parse/HqlParser.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse; + +import org.antlr.runtime.ANTLRStringStream; +import org.antlr.runtime.CharStream; +import org.antlr.runtime.CommonToken; +import org.antlr.runtime.RecognitionException; +import org.antlr.runtime.Token; +import org.antlr.runtime.TokenRewriteStream; +import org.antlr.runtime.TokenStream; +import org.antlr.runtime.tree.CommonTree; +import org.antlr.runtime.tree.CommonTreeAdaptor; +import org.antlr.runtime.tree.DOTTreeGenerator; +import org.antlr.runtime.tree.TreeAdaptor; +import org.antlr.stringtemplate.StringTemplate; + +/** + * A simple command-line application that accepts an SQL statement as a single + * argument. The SQl is parsed with the Hive SQL (HQL) parser and outputs the + * parse graph in the DOT (graphviz) file format. DOT is a graph description + * language. DOT graphs are typically files with the filename extension gv or + * dot. This information + * + * @see <a href="https://dreampuf.github.io/GraphvizOnline/">Graphviz Online + * Render</a> + */ +public class HqlParser { + + /** + * The main entry point of this application. + */ + public static void main(String[] args) throws RecognitionException { + HiveLexer lexer = new HiveLexer(new ANTLRNoCaseStringStream(args[0])); + HiveParser parser = new HiveParser(new TokenRewriteStream(lexer)); + parser.setTreeAdaptor(ADAPTOR); + CommonTree tree = parser.statement().getTree(); + DOTTreeGenerator gen = new DOTTreeGenerator(); + StringTemplate st = gen.toDOT(tree); + System.out.println(st); + } + + /** + * Tree adaptor for making antlr return ASTNodes instead of CommonTree nodes + * so that the graph walking algorithms and the rules framework defined in + * ql.lib can be used with the AST Nodes. + */ + public static final TreeAdaptor ADAPTOR = new CommonTreeAdaptor() { + /** + * Creates an ASTNode for the given token. The ASTNode is a wrapper around + * antlr's CommonTree class that implements the Node interface. + * + * @param payload The token. + * @return Object (which is actually an ASTNode) for the token. + */ + @Override + public Object create(Token payload) { + return new ASTNode(payload); + } + + @Override + public Token createToken(int tokenType, String text) { + if (tokenType == HiveParser.TOK_SETCOLREF) { + // ParseUtils.processSetColsNode() can change type of TOK_SETCOLREF + // nodes later + return new CommonToken(tokenType, text); + } else { + return new ImmutableCommonToken(tokenType, text); + } + } + + @Override + public Object dupNode(Object t) { + return create(((CommonTree) t).token); + } + + @Override + public Object dupTree(Object t, Object parent) { + // Overriden to copy start index / end index, that is needed through + // optimization, e.g., for masking/filtering + ASTNode astNode = (ASTNode) t; + ASTNode astNodeCopy = (ASTNode) super.dupTree(t, parent); + astNodeCopy.setTokenStartIndex(astNode.getTokenStartIndex()); + astNodeCopy.setTokenStopIndex(astNode.getTokenStopIndex()); + return astNodeCopy; + } + + @Override + public Object errorNode(TokenStream input, Token start, Token stop, RecognitionException e) { + return new ASTErrorNode(input, start, stop, e); + } + }; + + /** + * ANTLRNoCaseStringStream. + * + * This class provides and implementation for a case insensitive token checker + * for the lexical analysis part of antlr. By converting the token stream into + * upper case at the time when lexical rules are checked, this class ensures + * that the lexical rules need to just match the token with upper case letters + * as opposed to combination of upper case and lower case characteres. This is + * purely used for matching lexical rules. The actual token text is stored in + * the same way as the user input without actually converting it into an upper + * case. The token values are generated by the consume() function of the super + * class ANTLRStringStream. The LA() function is the lookahead funtion and is + * purely used for matching lexical rules. This also means that the grammar + * will only accept capitalized tokens in case it is run from other tools like + * antlrworks which do not have the ANTLRNoCaseStringStream implementation. + */ + public static class ANTLRNoCaseStringStream extends ANTLRStringStream { + + public ANTLRNoCaseStringStream(String input) { + super(input); + } + + @Override + public int LA(int i) { + + int returnChar = super.LA(i); + if (returnChar == CharStream.EOF) { + return returnChar; + } else if (returnChar == 0) { + return returnChar; + } + + return Character.toUpperCase((char) returnChar); + } + } + +}