This is an automated email from the ASF dual-hosted git repository.

dmollitor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 83f917c  HIVE-23171: Create Tool To Visualize Hive Parser Tree (David 
Mollitor, reviewed by Miklos Gergely)
83f917c is described below

commit 83f917c787d60543f171b23d28ceda44d69c235d
Author: David Mollitor <dmolli...@apache.org>
AuthorDate: Thu Apr 16 10:32:39 2020 -0400

    HIVE-23171: Create Tool To Visualize Hive Parser Tree (David Mollitor, 
reviewed by Miklos Gergely)
---
 parser/pom.xml                                     |   7 +-
 .../org/apache/hadoop/hive/ql/parse/HqlParser.java | 145 +++++++++++++++++++++
 2 files changed, 148 insertions(+), 4 deletions(-)

diff --git a/parser/pom.xml b/parser/pom.xml
index 05fd78d..18e0ad8 100644
--- a/parser/pom.xml
+++ b/parser/pom.xml
@@ -49,14 +49,13 @@
     <dependency>
       <groupId>org.antlr</groupId>
       <artifactId>antlr-runtime</artifactId>
-      <version>${antlr.version}</version>
     </dependency>
     <dependency>
       <groupId>org.antlr</groupId>
-      <artifactId>ST4</artifactId>
-      <version>${ST4.version}</version>
+      <artifactId>stringtemplate</artifactId>
+      <version>3.2.1</version>
+      <scope>test</scope>
     </dependency>
-    <!-- test inter-project -->
   </dependencies>
 
   <build>
diff --git a/parser/src/test/org/apache/hadoop/hive/ql/parse/HqlParser.java 
b/parser/src/test/org/apache/hadoop/hive/ql/parse/HqlParser.java
new file mode 100644
index 0000000..e74172c
--- /dev/null
+++ b/parser/src/test/org/apache/hadoop/hive/ql/parse/HqlParser.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.parse;
+
+import org.antlr.runtime.ANTLRStringStream;
+import org.antlr.runtime.CharStream;
+import org.antlr.runtime.CommonToken;
+import org.antlr.runtime.RecognitionException;
+import org.antlr.runtime.Token;
+import org.antlr.runtime.TokenRewriteStream;
+import org.antlr.runtime.TokenStream;
+import org.antlr.runtime.tree.CommonTree;
+import org.antlr.runtime.tree.CommonTreeAdaptor;
+import org.antlr.runtime.tree.DOTTreeGenerator;
+import org.antlr.runtime.tree.TreeAdaptor;
+import org.antlr.stringtemplate.StringTemplate;
+
+/**
+ * A simple command-line application that accepts an SQL statement as a single
+ * argument. The SQl is parsed with the Hive SQL (HQL) parser and outputs the
+ * parse graph in the DOT (graphviz) file format. DOT is a graph description
+ * language. DOT graphs are typically files with the filename extension gv or
+ * dot. This information
+ *
+ * @see <a href="https://dreampuf.github.io/GraphvizOnline/";>Graphviz Online
+ *      Render</a>
+ */
+public class HqlParser {
+
+  /**
+   * The main entry point of this application.
+   */
+  public static void main(String[] args) throws RecognitionException {
+    HiveLexer lexer = new HiveLexer(new ANTLRNoCaseStringStream(args[0]));
+    HiveParser parser = new HiveParser(new TokenRewriteStream(lexer));
+    parser.setTreeAdaptor(ADAPTOR);
+    CommonTree tree = parser.statement().getTree();
+    DOTTreeGenerator gen = new DOTTreeGenerator();
+    StringTemplate st = gen.toDOT(tree);
+    System.out.println(st);
+  }
+
+  /**
+   * Tree adaptor for making antlr return ASTNodes instead of CommonTree nodes
+   * so that the graph walking algorithms and the rules framework defined in
+   * ql.lib can be used with the AST Nodes.
+   */
+  public static final TreeAdaptor ADAPTOR = new CommonTreeAdaptor() {
+    /**
+     * Creates an ASTNode for the given token. The ASTNode is a wrapper around
+     * antlr's CommonTree class that implements the Node interface.
+     *
+     * @param payload The token.
+     * @return Object (which is actually an ASTNode) for the token.
+     */
+    @Override
+    public Object create(Token payload) {
+      return new ASTNode(payload);
+    }
+
+    @Override
+    public Token createToken(int tokenType, String text) {
+      if (tokenType == HiveParser.TOK_SETCOLREF) {
+        // ParseUtils.processSetColsNode() can change type of TOK_SETCOLREF
+        // nodes later
+        return new CommonToken(tokenType, text);
+      } else {
+        return new ImmutableCommonToken(tokenType, text);
+      }
+    }
+
+    @Override
+    public Object dupNode(Object t) {
+      return create(((CommonTree) t).token);
+    }
+
+    @Override
+    public Object dupTree(Object t, Object parent) {
+      // Overriden to copy start index / end index, that is needed through
+      // optimization, e.g., for masking/filtering
+      ASTNode astNode = (ASTNode) t;
+      ASTNode astNodeCopy = (ASTNode) super.dupTree(t, parent);
+      astNodeCopy.setTokenStartIndex(astNode.getTokenStartIndex());
+      astNodeCopy.setTokenStopIndex(astNode.getTokenStopIndex());
+      return astNodeCopy;
+    }
+
+    @Override
+    public Object errorNode(TokenStream input, Token start, Token stop, 
RecognitionException e) {
+      return new ASTErrorNode(input, start, stop, e);
+    }
+  };
+
+  /**
+   * ANTLRNoCaseStringStream.
+   *
+   * This class provides and implementation for a case insensitive token 
checker
+   * for the lexical analysis part of antlr. By converting the token stream 
into
+   * upper case at the time when lexical rules are checked, this class ensures
+   * that the lexical rules need to just match the token with upper case 
letters
+   * as opposed to combination of upper case and lower case characteres. This 
is
+   * purely used for matching lexical rules. The actual token text is stored in
+   * the same way as the user input without actually converting it into an 
upper
+   * case. The token values are generated by the consume() function of the 
super
+   * class ANTLRStringStream. The LA() function is the lookahead funtion and is
+   * purely used for matching lexical rules. This also means that the grammar
+   * will only accept capitalized tokens in case it is run from other tools 
like
+   * antlrworks which do not have the ANTLRNoCaseStringStream implementation.
+   */
+  public static class ANTLRNoCaseStringStream extends ANTLRStringStream {
+
+    public ANTLRNoCaseStringStream(String input) {
+      super(input);
+    }
+
+    @Override
+    public int LA(int i) {
+
+      int returnChar = super.LA(i);
+      if (returnChar == CharStream.EOF) {
+        return returnChar;
+      } else if (returnChar == 0) {
+        return returnChar;
+      }
+
+      return Character.toUpperCase((char) returnChar);
+    }
+  }
+
+}

Reply via email to