[1/3] spark git commit: [SPARK-12362][SQL][WIP] Inline Hive Parser

rxin Tue, 29 Dec 2015 18:48:06 -0800

Repository: spark
Updated Branches:
  refs/heads/master 270a65958 -> b600bccf4



http://git-wip-us.apache.org/repos/asf/spark/blob/b600bccf/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java 
b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java
new file mode 100644
index 0000000..c77198b
--- /dev/null
+++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseDriver.java
@@ -0,0 +1,213 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.parser;
+
+import java.util.ArrayList;
+import org.antlr.runtime.ANTLRStringStream;
+import org.antlr.runtime.CharStream;
+import org.antlr.runtime.NoViableAltException;
+import org.antlr.runtime.RecognitionException;
+import org.antlr.runtime.Token;
+import org.antlr.runtime.TokenRewriteStream;
+import org.antlr.runtime.TokenStream;
+import org.antlr.runtime.tree.CommonTree;
+import org.antlr.runtime.tree.CommonTreeAdaptor;
+import org.antlr.runtime.tree.TreeAdaptor;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hive.ql.Context;
+
+/**
+ * ParseDriver.
+ *
+ */
+public class ParseDriver {
+
+  private static final Logger LOG = 
LoggerFactory.getLogger("hive.ql.parse.ParseDriver");
+
+  /**
+   * ANTLRNoCaseStringStream.
+   *
+   */
+  //This class provides and implementation for a case insensitive token checker
+  //for the lexical analysis part of antlr. By converting the token stream into
+  //upper case at the time when lexical rules are checked, this class ensures 
that the
+  //lexical rules need to just match the token with upper case letters as 
opposed to
+  //combination of upper case and lower case characters. This is purely used 
for matching lexical
+  //rules. The actual token text is stored in the same way as the user input 
without
+  //actually converting it into an upper case. The token values are generated 
by the consume()
+  //function of the super class ANTLRStringStream. The LA() function is the 
lookahead function
+  //and is purely used for matching lexical rules. This also means that the 
grammar will only
+  //accept capitalized tokens in case it is run from other tools like 
antlrworks which
+  //do not have the ANTLRNoCaseStringStream implementation.
+  public class ANTLRNoCaseStringStream extends ANTLRStringStream {
+
+    public ANTLRNoCaseStringStream(String input) {
+      super(input);
+    }
+
+    @Override
+    public int LA(int i) {
+
+      int returnChar = super.LA(i);
+      if (returnChar == CharStream.EOF) {
+        return returnChar;
+      } else if (returnChar == 0) {
+        return returnChar;
+      }
+
+      return Character.toUpperCase((char) returnChar);
+    }
+  }
+
+  /**
+   * HiveLexerX.
+   *
+   */
+  public class HiveLexerX extends SparkSqlLexer {
+
+    private final ArrayList<ParseError> errors;
+
+    public HiveLexerX(CharStream input) {
+      super(input);
+      errors = new ArrayList<ParseError>();
+    }
+
+    @Override
+    public void displayRecognitionError(String[] tokenNames, 
RecognitionException e) {
+      errors.add(new ParseError(this, e, tokenNames));
+    }
+
+    @Override
+    public String getErrorMessage(RecognitionException e, String[] tokenNames) 
{
+      String msg = null;
+
+      if (e instanceof NoViableAltException) {
+        // @SuppressWarnings("unused")
+        // NoViableAltException nvae = (NoViableAltException) e;
+        // for development, can add
+        // "decision=<<"+nvae.grammarDecisionDescription+">>"
+        // and "(decision="+nvae.decisionNumber+") and
+        // "state "+nvae.stateNumber
+        msg = "character " + getCharErrorDisplay(e.c) + " not supported here";
+      } else {
+        msg = super.getErrorMessage(e, tokenNames);
+      }
+
+      return msg;
+    }
+
+    public ArrayList<ParseError> getErrors() {
+      return errors;
+    }
+
+  }
+
+  /**
+   * Tree adaptor for making antlr return ASTNodes instead of CommonTree nodes
+   * so that the graph walking algorithms and the rules framework defined in
+   * ql.lib can be used with the AST Nodes.
+   */
+  public static final TreeAdaptor adaptor = new CommonTreeAdaptor() {
+    /**
+     * Creates an ASTNode for the given token. The ASTNode is a wrapper around
+     * antlr's CommonTree class that implements the Node interface.
+     *
+     * @param payload
+     *          The token.
+     * @return Object (which is actually an ASTNode) for the token.
+     */
+    @Override
+    public Object create(Token payload) {
+      return new ASTNode(payload);
+    }
+
+    @Override
+    public Object dupNode(Object t) {
+
+      return create(((CommonTree)t).token);
+    };
+
+    @Override
+    public Object errorNode(TokenStream input, Token start, Token stop, 
RecognitionException e) {
+      return new ASTErrorNode(input, start, stop, e);
+    };
+  };
+
+  public ASTNode parse(String command) throws ParseException {
+    return parse(command, null);
+  }
+  
+  public ASTNode parse(String command, Context ctx) 
+      throws ParseException {
+    return parse(command, ctx, true);
+  }
+
+  /**
+   * Parses a command, optionally assigning the parser's token stream to the
+   * given context.
+   *
+   * @param command
+   *          command to parse
+   *
+   * @param ctx
+   *          context with which to associate this parser's token stream, or
+   *          null if either no context is available or the context already has
+   *          an existing stream
+   *
+   * @return parsed AST
+   */
+  public ASTNode parse(String command, Context ctx, boolean 
setTokenRewriteStream) 
+      throws ParseException {
+    LOG.info("Parsing command: " + command);
+
+    HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command));
+    TokenRewriteStream tokens = new TokenRewriteStream(lexer);
+    if (ctx != null) {
+      if ( setTokenRewriteStream) {
+        ctx.setTokenRewriteStream(tokens);
+      }
+      lexer.setHiveConf(ctx.getConf());
+    }
+    SparkSqlParser parser = new SparkSqlParser(tokens);
+    if (ctx != null) {
+      parser.setHiveConf(ctx.getConf());
+    }
+    parser.setTreeAdaptor(adaptor);
+    SparkSqlParser.statement_return r = null;
+    try {
+      r = parser.statement();
+    } catch (RecognitionException e) {
+      e.printStackTrace();
+      throw new ParseException(parser.errors);
+    }
+
+    if (lexer.getErrors().size() == 0 && parser.errors.size() == 0) {
+      LOG.info("Parse Completed");
+    } else if (lexer.getErrors().size() != 0) {
+      throw new ParseException(lexer.getErrors());
+    } else {
+      throw new ParseException(parser.errors);
+    }
+
+    ASTNode tree = (ASTNode) r.getTree();
+    tree.setUnknownTokenBoundaries();
+    return tree;
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b600bccf/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseError.java
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseError.java 
b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseError.java
new file mode 100644
index 0000000..b47bcfb
--- /dev/null
+++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseError.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.parser;
+
+import org.antlr.runtime.BaseRecognizer;
+import org.antlr.runtime.RecognitionException;
+
+/**
+ *
+ */
+public class ParseError {
+  private final BaseRecognizer br;
+  private final RecognitionException re;
+  private final String[] tokenNames;
+
+  ParseError(BaseRecognizer br, RecognitionException re, String[] tokenNames) {
+    this.br = br;
+    this.re = re;
+    this.tokenNames = tokenNames;
+  }
+
+  BaseRecognizer getBaseRecognizer() {
+    return br;
+  }
+
+  RecognitionException getRecognitionException() {
+    return re;
+  }
+
+  String[] getTokenNames() {
+    return tokenNames;
+  }
+
+  String getMessage() {
+    return br.getErrorHeader(re) + " " + br.getErrorMessage(re, tokenNames);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b600bccf/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseException.java
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseException.java 
b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseException.java
new file mode 100644
index 0000000..fff891c
--- /dev/null
+++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseException.java
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.parser;
+
+import java.util.ArrayList;
+
+/**
+ * ParseException.
+ *
+ */
+public class ParseException extends Exception {
+
+  private static final long serialVersionUID = 1L;
+  ArrayList<ParseError> errors;
+
+  public ParseException(ArrayList<ParseError> errors) {
+    super();
+    this.errors = errors;
+  }
+
+  @Override
+  public String getMessage() {
+
+    StringBuilder sb = new StringBuilder();
+    for (ParseError err : errors) {
+      if (sb.length() > 0) {
+        sb.append('\n');
+      }
+      sb.append(err.getMessage());
+    }
+
+    return sb.toString();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b600bccf/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseUtils.java
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseUtils.java 
b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseUtils.java
new file mode 100644
index 0000000..a5c2998
--- /dev/null
+++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/ParseUtils.java
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.parser;
+
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+
+
+/**
+ * Library of utility functions used in the parse code.
+ *
+ */
+public final class ParseUtils {
+  /**
+   * Performs a descent of the leftmost branch of a tree, stopping when either 
a
+   * node with a non-null token is found or the leaf level is encountered.
+   *
+   * @param tree
+   *          candidate node from which to start searching
+   *
+   * @return node at which descent stopped
+   */
+  public static ASTNode findRootNonNullToken(ASTNode tree) {
+    while ((tree.getToken() == null) && (tree.getChildCount() > 0)) {
+      tree = (org.apache.spark.sql.parser.ASTNode) tree.getChild(0);
+    }
+    return tree;
+  }
+
+  private ParseUtils() {
+    // prevent instantiation
+  }
+
+  public static VarcharTypeInfo getVarcharTypeInfo(ASTNode node)
+      throws SemanticException {
+    if (node.getChildCount() != 1) {
+      throw new SemanticException("Bad params for type varchar");
+    }
+
+    String lengthStr = node.getChild(0).getText();
+    return TypeInfoFactory.getVarcharTypeInfo(Integer.valueOf(lengthStr));
+  }
+
+  public static CharTypeInfo getCharTypeInfo(ASTNode node)
+      throws SemanticException {
+    if (node.getChildCount() != 1) {
+      throw new SemanticException("Bad params for type char");
+    }
+
+    String lengthStr = node.getChild(0).getText();
+    return TypeInfoFactory.getCharTypeInfo(Integer.valueOf(lengthStr));
+  }
+
+  public static DecimalTypeInfo getDecimalTypeTypeInfo(ASTNode node)
+      throws SemanticException {
+    if (node.getChildCount() > 2) {
+        throw new SemanticException("Bad params for type decimal");
+      }
+
+      int precision = HiveDecimal.USER_DEFAULT_PRECISION;
+      int scale = HiveDecimal.USER_DEFAULT_SCALE;
+
+      if (node.getChildCount() >= 1) {
+        String precStr = node.getChild(0).getText();
+        precision = Integer.valueOf(precStr);
+      }
+
+      if (node.getChildCount() == 2) {
+        String scaleStr = node.getChild(1).getText();
+        scale = Integer.valueOf(scaleStr);
+      }
+
+      return TypeInfoFactory.getDecimalTypeInfo(precision, scale);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b600bccf/sql/hive/src/main/java/org/apache/spark/sql/parser/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/java/org/apache/spark/sql/parser/SemanticAnalyzer.java 
b/sql/hive/src/main/java/org/apache/spark/sql/parser/SemanticAnalyzer.java
new file mode 100644
index 0000000..4b2015e
--- /dev/null
+++ b/sql/hive/src/main/java/org/apache/spark/sql/parser/SemanticAnalyzer.java
@@ -0,0 +1,406 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.parser;
+
+import java.io.UnsupportedEncodingException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.antlr.runtime.tree.Tree;
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.ErrorMsg;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+
+/**
+ * SemanticAnalyzer.
+ *
+ */
+public abstract class SemanticAnalyzer {
+  public static String charSetString(String charSetName, String charSetString)
+      throws SemanticException {
+    try {
+      // The character set name starts with a _, so strip that
+      charSetName = charSetName.substring(1);
+      if (charSetString.charAt(0) == '\'') {
+        return new String(unescapeSQLString(charSetString).getBytes(),
+            charSetName);
+      } else // hex input is also supported
+      {
+        assert charSetString.charAt(0) == '0';
+        assert charSetString.charAt(1) == 'x';
+        charSetString = charSetString.substring(2);
+
+        byte[] bArray = new byte[charSetString.length() / 2];
+        int j = 0;
+        for (int i = 0; i < charSetString.length(); i += 2) {
+          int val = Character.digit(charSetString.charAt(i), 16) * 16
+              + Character.digit(charSetString.charAt(i + 1), 16);
+          if (val > 127) {
+            val = val - 256;
+          }
+          bArray[j++] = (byte)val;
+        }
+
+        String res = new String(bArray, charSetName);
+        return res;
+      }
+    } catch (UnsupportedEncodingException e) {
+      throw new SemanticException(e);
+    }
+  }
+
+  /**
+   * Remove the encapsulating "`" pair from the identifier. We allow users to
+   * use "`" to escape identifier for table names, column names and aliases, in
+   * case that coincide with Hive language keywords.
+   */
+  public static String unescapeIdentifier(String val) {
+    if (val == null) {
+      return null;
+    }
+    if (val.charAt(0) == '`' && val.charAt(val.length() - 1) == '`') {
+      val = val.substring(1, val.length() - 1);
+    }
+    return val;
+  }
+
+  /**
+   * Converts parsed key/value properties pairs into a map.
+   *
+   * @param prop ASTNode parent of the key/value pairs
+   *
+   * @param mapProp property map which receives the mappings
+   */
+  public static void readProps(
+    ASTNode prop, Map<String, String> mapProp) {
+
+    for (int propChild = 0; propChild < prop.getChildCount(); propChild++) {
+      String key = unescapeSQLString(prop.getChild(propChild).getChild(0)
+          .getText());
+      String value = null;
+      if (prop.getChild(propChild).getChild(1) != null) {
+        value = 
unescapeSQLString(prop.getChild(propChild).getChild(1).getText());
+      }
+      mapProp.put(key, value);
+    }
+  }
+
+  private static final int[] multiplier = new int[] {1000, 100, 10, 1};
+
+  @SuppressWarnings("nls")
+  public static String unescapeSQLString(String b) {
+    Character enclosure = null;
+
+    // Some of the strings can be passed in as unicode. For example, the
+    // delimiter can be passed in as \002 - So, we first check if the
+    // string is a unicode number, else go back to the old behavior
+    StringBuilder sb = new StringBuilder(b.length());
+    for (int i = 0; i < b.length(); i++) {
+
+      char currentChar = b.charAt(i);
+      if (enclosure == null) {
+        if (currentChar == '\'' || b.charAt(i) == '\"') {
+          enclosure = currentChar;
+        }
+        // ignore all other chars outside the enclosure
+        continue;
+      }
+
+      if (enclosure.equals(currentChar)) {
+        enclosure = null;
+        continue;
+      }
+
+      if (currentChar == '\\' && (i + 6 < b.length()) && b.charAt(i + 1) == 
'u') {
+        int code = 0;
+        int base = i + 2;
+        for (int j = 0; j < 4; j++) {
+          int digit = Character.digit(b.charAt(j + base), 16);
+          code += digit * multiplier[j];
+        }
+        sb.append((char)code);
+        i += 5;
+        continue;
+      }
+
+      if (currentChar == '\\' && (i + 4 < b.length())) {
+        char i1 = b.charAt(i + 1);
+        char i2 = b.charAt(i + 2);
+        char i3 = b.charAt(i + 3);
+        if ((i1 >= '0' && i1 <= '1') && (i2 >= '0' && i2 <= '7')
+            && (i3 >= '0' && i3 <= '7')) {
+          byte bVal = (byte) ((i3 - '0') + ((i2 - '0') * 8) + ((i1 - '0') * 8 
* 8));
+          byte[] bValArr = new byte[1];
+          bValArr[0] = bVal;
+          String tmp = new String(bValArr);
+          sb.append(tmp);
+          i += 3;
+          continue;
+        }
+      }
+
+      if (currentChar == '\\' && (i + 2 < b.length())) {
+        char n = b.charAt(i + 1);
+        switch (n) {
+        case '0':
+          sb.append("\0");
+          break;
+        case '\'':
+          sb.append("'");
+          break;
+        case '"':
+          sb.append("\"");
+          break;
+        case 'b':
+          sb.append("\b");
+          break;
+        case 'n':
+          sb.append("\n");
+          break;
+        case 'r':
+          sb.append("\r");
+          break;
+        case 't':
+          sb.append("\t");
+          break;
+        case 'Z':
+          sb.append("\u001A");
+          break;
+        case '\\':
+          sb.append("\\");
+          break;
+        // The following 2 lines are exactly what MySQL does TODO: why do we 
do this?
+        case '%':
+          sb.append("\\%");
+          break;
+        case '_':
+          sb.append("\\_");
+          break;
+        default:
+          sb.append(n);
+        }
+        i++;
+      } else {
+        sb.append(currentChar);
+      }
+    }
+    return sb.toString();
+  }
+
+  /**
+   * Get the list of FieldSchema out of the ASTNode.
+   */
+  public static List<FieldSchema> getColumns(ASTNode ast, boolean lowerCase) 
throws SemanticException {
+    List<FieldSchema> colList = new ArrayList<FieldSchema>();
+    int numCh = ast.getChildCount();
+    for (int i = 0; i < numCh; i++) {
+      FieldSchema col = new FieldSchema();
+      ASTNode child = (ASTNode) ast.getChild(i);
+      Tree grandChild = child.getChild(0);
+      if(grandChild != null) {
+        String name = grandChild.getText();
+        if(lowerCase) {
+          name = name.toLowerCase();
+        }
+        // child 0 is the name of the column
+        col.setName(unescapeIdentifier(name));
+        // child 1 is the type of the column
+        ASTNode typeChild = (ASTNode) (child.getChild(1));
+        col.setType(getTypeStringFromAST(typeChild));
+
+        // child 2 is the optional comment of the column
+        if (child.getChildCount() == 3) {
+          col.setComment(unescapeSQLString(child.getChild(2).getText()));
+        }
+      }
+      colList.add(col);
+    }
+    return colList;
+  }
+
+  protected static String getTypeStringFromAST(ASTNode typeNode)
+      throws SemanticException {
+    switch (typeNode.getType()) {
+    case SparkSqlParser.TOK_LIST:
+      return serdeConstants.LIST_TYPE_NAME + "<"
+          + getTypeStringFromAST((ASTNode) typeNode.getChild(0)) + ">";
+    case SparkSqlParser.TOK_MAP:
+      return serdeConstants.MAP_TYPE_NAME + "<"
+          + getTypeStringFromAST((ASTNode) typeNode.getChild(0)) + ","
+          + getTypeStringFromAST((ASTNode) typeNode.getChild(1)) + ">";
+    case SparkSqlParser.TOK_STRUCT:
+      return getStructTypeStringFromAST(typeNode);
+    case SparkSqlParser.TOK_UNIONTYPE:
+      return getUnionTypeStringFromAST(typeNode);
+    default:
+      return getTypeName(typeNode);
+    }
+  }
+
+  private static String getStructTypeStringFromAST(ASTNode typeNode)
+      throws SemanticException {
+    String typeStr = serdeConstants.STRUCT_TYPE_NAME + "<";
+    typeNode = (ASTNode) typeNode.getChild(0);
+    int children = typeNode.getChildCount();
+    if (children <= 0) {
+      throw new SemanticException("empty struct not allowed.");
+    }
+    StringBuilder buffer = new StringBuilder(typeStr);
+    for (int i = 0; i < children; i++) {
+      ASTNode child = (ASTNode) typeNode.getChild(i);
+      
buffer.append(unescapeIdentifier(child.getChild(0).getText())).append(":");
+      buffer.append(getTypeStringFromAST((ASTNode) child.getChild(1)));
+      if (i < children - 1) {
+        buffer.append(",");
+      }
+    }
+
+    buffer.append(">");
+    return buffer.toString();
+  }
+
+  private static String getUnionTypeStringFromAST(ASTNode typeNode)
+      throws SemanticException {
+    String typeStr = serdeConstants.UNION_TYPE_NAME + "<";
+    typeNode = (ASTNode) typeNode.getChild(0);
+    int children = typeNode.getChildCount();
+    if (children <= 0) {
+      throw new SemanticException("empty union not allowed.");
+    }
+    StringBuilder buffer = new StringBuilder(typeStr);
+    for (int i = 0; i < children; i++) {
+      buffer.append(getTypeStringFromAST((ASTNode) typeNode.getChild(i)));
+      if (i < children - 1) {
+        buffer.append(",");
+      }
+    }
+    buffer.append(">");
+    typeStr = buffer.toString();
+    return typeStr;
+  }
+
+  public static String getAstNodeText(ASTNode tree) {
+    return tree.getChildCount() == 0?tree.getText() :
+        getAstNodeText((ASTNode)tree.getChild(tree.getChildCount() - 1));
+  }
+
+  public static String generateErrorMessage(ASTNode ast, String message) {
+    StringBuilder sb = new StringBuilder();
+    if (ast == null) {
+      sb.append(message).append(". Cannot tell the position of null AST.");
+      return sb.toString();
+    }
+    sb.append(ast.getLine());
+    sb.append(":");
+    sb.append(ast.getCharPositionInLine());
+    sb.append(" ");
+    sb.append(message);
+    sb.append(". Error encountered near token '");
+    sb.append(getAstNodeText(ast));
+    sb.append("'");
+    return sb.toString();
+  }
+
+  private static final Map<Integer, String> TokenToTypeName = new 
HashMap<Integer, String>();
+
+  static {
+    TokenToTypeName.put(SparkSqlParser.TOK_BOOLEAN, 
serdeConstants.BOOLEAN_TYPE_NAME);
+    TokenToTypeName.put(SparkSqlParser.TOK_TINYINT, 
serdeConstants.TINYINT_TYPE_NAME);
+    TokenToTypeName.put(SparkSqlParser.TOK_SMALLINT, 
serdeConstants.SMALLINT_TYPE_NAME);
+    TokenToTypeName.put(SparkSqlParser.TOK_INT, serdeConstants.INT_TYPE_NAME);
+    TokenToTypeName.put(SparkSqlParser.TOK_BIGINT, 
serdeConstants.BIGINT_TYPE_NAME);
+    TokenToTypeName.put(SparkSqlParser.TOK_FLOAT, 
serdeConstants.FLOAT_TYPE_NAME);
+    TokenToTypeName.put(SparkSqlParser.TOK_DOUBLE, 
serdeConstants.DOUBLE_TYPE_NAME);
+    TokenToTypeName.put(SparkSqlParser.TOK_STRING, 
serdeConstants.STRING_TYPE_NAME);
+    TokenToTypeName.put(SparkSqlParser.TOK_CHAR, 
serdeConstants.CHAR_TYPE_NAME);
+    TokenToTypeName.put(SparkSqlParser.TOK_VARCHAR, 
serdeConstants.VARCHAR_TYPE_NAME);
+    TokenToTypeName.put(SparkSqlParser.TOK_BINARY, 
serdeConstants.BINARY_TYPE_NAME);
+    TokenToTypeName.put(SparkSqlParser.TOK_DATE, 
serdeConstants.DATE_TYPE_NAME);
+    TokenToTypeName.put(SparkSqlParser.TOK_DATETIME, 
serdeConstants.DATETIME_TYPE_NAME);
+    TokenToTypeName.put(SparkSqlParser.TOK_TIMESTAMP, 
serdeConstants.TIMESTAMP_TYPE_NAME);
+    TokenToTypeName.put(SparkSqlParser.TOK_INTERVAL_YEAR_MONTH, 
serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME);
+    TokenToTypeName.put(SparkSqlParser.TOK_INTERVAL_DAY_TIME, 
serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME);
+    TokenToTypeName.put(SparkSqlParser.TOK_DECIMAL, 
serdeConstants.DECIMAL_TYPE_NAME);
+  }
+
+  public static String getTypeName(ASTNode node) throws SemanticException {
+    int token = node.getType();
+    String typeName;
+
+    // datetime type isn't currently supported
+    if (token == SparkSqlParser.TOK_DATETIME) {
+      throw new SemanticException(ErrorMsg.UNSUPPORTED_TYPE.getMsg());
+    }
+
+    switch (token) {
+      case SparkSqlParser.TOK_CHAR:
+        CharTypeInfo charTypeInfo = ParseUtils.getCharTypeInfo(node);
+        typeName = charTypeInfo.getQualifiedName();
+        break;
+      case SparkSqlParser.TOK_VARCHAR:
+        VarcharTypeInfo varcharTypeInfo = ParseUtils.getVarcharTypeInfo(node);
+        typeName = varcharTypeInfo.getQualifiedName();
+        break;
+      case SparkSqlParser.TOK_DECIMAL:
+        DecimalTypeInfo decTypeInfo = ParseUtils.getDecimalTypeTypeInfo(node);
+        typeName = decTypeInfo.getQualifiedName();
+        break;
+      default:
+        typeName = TokenToTypeName.get(token);
+    }
+    return typeName;
+  }
+
+  public static String relativeToAbsolutePath(HiveConf conf, String location) 
throws SemanticException {
+    boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE);
+    if (testMode) {
+      URI uri = new Path(location).toUri();
+      String scheme = uri.getScheme();
+      String authority = uri.getAuthority();
+      String path = uri.getPath();
+      if (!path.startsWith("/")) {
+        path = (new Path(System.getProperty("test.tmp.dir"),
+            path)).toUri().getPath();
+      }
+      if (StringUtils.isEmpty(scheme)) {
+        scheme = "pfile";
+      }
+      try {
+        uri = new URI(scheme, authority, path, null, null);
+      } catch (URISyntaxException e) {
+        throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e);
+      }
+      return uri.toString();
+    } else {
+      //no-op for non-test mode for now
+      return location;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/b600bccf/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 0e89928..b1d841d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -27,28 +27,28 @@ import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hadoop.hive.ql.exec.{FunctionInfo, FunctionRegistry}
 import org.apache.hadoop.hive.ql.lib.Node
-import org.apache.hadoop.hive.ql.parse._
+import org.apache.hadoop.hive.ql.parse.SemanticException
 import org.apache.hadoop.hive.ql.plan.PlanUtils
 import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hadoop.hive.ql.{Context, ErrorMsg}
 import org.apache.hadoop.hive.serde.serdeConstants
 import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
-
 import org.apache.spark.Logging
-import org.apache.spark.sql.{AnalysisException, catalyst}
+import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
-import org.apache.spark.sql.catalyst.plans.{logical, _}
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.plans.{logical, _}
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin
-import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.execution.ExplainCommand
 import org.apache.spark.sql.execution.datasources.DescribeCommand
 import org.apache.spark.sql.hive.HiveShim._
 import org.apache.spark.sql.hive.client._
 import org.apache.spark.sql.hive.execution.{AnalyzeTable, DropTable, 
HiveNativeCommand, HiveScriptIOSchema}
+import org.apache.spark.sql.parser._
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.{AnalysisException, catalyst}
 import org.apache.spark.unsafe.types.CalendarInterval
 import org.apache.spark.util.random.RandomSampler
 
@@ -227,7 +227,7 @@ private[hive] object HiveQl extends Logging {
      */
     def withChildren(newChildren: Seq[ASTNode]): ASTNode = {
       (1 to n.getChildCount).foreach(_ => n.deleteChild(0))
-      n.addChildren(newChildren.asJava)
+      newChildren.foreach(n.addChild(_))
       n
     }
 
@@ -273,7 +273,8 @@ private[hive] object HiveQl extends Logging {
   private def createContext(): Context = new Context(hiveConf)
 
   private def getAst(sql: String, context: Context) =
-    ParseUtils.findRootNonNullToken((new ParseDriver).parse(sql, context))
+    ParseUtils.findRootNonNullToken(
+        (new ParseDriver).parse(sql, context))
 
   /**
    * Returns the HiveConf
@@ -312,7 +313,7 @@ private[hive] object HiveQl extends Logging {
       context.clear()
       plan
     } catch {
-      case pe: org.apache.hadoop.hive.ql.parse.ParseException =>
+      case pe: ParseException =>
         pe.getMessage match {
           case errorRegEx(line, start, message) =>
             throw new AnalysisException(message, Some(line.toInt), 
Some(start.toInt))
@@ -337,7 +338,8 @@ private[hive] object HiveQl extends Logging {
     val tree =
       try {
         ParseUtils.findRootNonNullToken(
-          (new ParseDriver).parse(ddl, null /* no context required for parsing 
alone */))
+          (new ParseDriver)
+            .parse(ddl, null /* no context required for parsing alone */))
       } catch {
         case pe: org.apache.hadoop.hive.ql.parse.ParseException =>
           throw new RuntimeException(s"Failed to parse ddl: '$ddl'", pe)
@@ -598,12 +600,12 @@ 
https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
         NativePlaceholder
       } else {
         tableType match {
-          case Token("TOK_TABTYPE", nameParts) if nameParts.size == 1 => {
-            nameParts.head match {
+          case Token("TOK_TABTYPE", Token("TOK_TABNAME", nameParts :: Nil) :: 
Nil) => {
+            nameParts match {
               case Token(".", dbName :: tableName :: Nil) =>
                 // It is describing a table with the format like "describe 
db.table".
                 // TODO: Actually, a user may mean tableName.columnName. Need 
to resolve this issue.
-                val tableIdent = extractTableIdent(nameParts.head)
+                val tableIdent = extractTableIdent(nameParts)
                 DescribeCommand(
                   UnresolvedRelation(tableIdent, None), isExtended = 
extended.isDefined)
               case Token(".", dbName :: tableName :: colName :: Nil) =>
@@ -662,7 +664,7 @@ 
https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
         NativePlaceholder
       } else {
         val schema = maybeColumns.map { cols =>
-          BaseSemanticAnalyzer.getColumns(cols, true).asScala.map { field =>
+          SemanticAnalyzer.getColumns(cols, true).asScala.map { field =>
             // We can't specify column types when create view, so fill it with 
null first, and
             // update it after the schema has been resolved later.
             HiveColumn(field.getName, null, field.getComment)
@@ -678,7 +680,7 @@ 
https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
 
         maybeComment.foreach {
           case Token("TOK_TABLECOMMENT", child :: Nil) =>
-            val comment = BaseSemanticAnalyzer.unescapeSQLString(child.getText)
+            val comment = SemanticAnalyzer.unescapeSQLString(child.getText)
             if (comment ne null) {
               properties += ("comment" -> comment)
             }
@@ -750,7 +752,7 @@ 
https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
 
       children.collect {
         case list @ Token("TOK_TABCOLLIST", _) =>
-          val cols = BaseSemanticAnalyzer.getColumns(list, true)
+          val cols = SemanticAnalyzer.getColumns(list, true)
           if (cols != null) {
             tableDesc = tableDesc.copy(
               schema = cols.asScala.map { field =>
@@ -758,11 +760,11 @@ 
https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
               })
           }
         case Token("TOK_TABLECOMMENT", child :: Nil) =>
-          val comment = BaseSemanticAnalyzer.unescapeSQLString(child.getText)
+          val comment = SemanticAnalyzer.unescapeSQLString(child.getText)
           // TODO support the sql text
           tableDesc = tableDesc.copy(viewText = Option(comment))
         case Token("TOK_TABLEPARTCOLS", list @ Token("TOK_TABCOLLIST", _) :: 
Nil) =>
-          val cols = BaseSemanticAnalyzer.getColumns(list(0), false)
+          val cols = SemanticAnalyzer.getColumns(list(0), false)
           if (cols != null) {
             tableDesc = tableDesc.copy(
               partitionColumns = cols.asScala.map { field =>
@@ -773,21 +775,21 @@ 
https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
           val serdeParams = new java.util.HashMap[String, String]()
           child match {
             case Token("TOK_TABLEROWFORMATFIELD", rowChild1 :: rowChild2) =>
-              val fieldDelim = BaseSemanticAnalyzer.unescapeSQLString 
(rowChild1.getText())
+              val fieldDelim = SemanticAnalyzer.unescapeSQLString 
(rowChild1.getText())
               serdeParams.put(serdeConstants.FIELD_DELIM, fieldDelim)
               serdeParams.put(serdeConstants.SERIALIZATION_FORMAT, fieldDelim)
               if (rowChild2.length > 1) {
-                val fieldEscape = BaseSemanticAnalyzer.unescapeSQLString 
(rowChild2(0).getText)
+                val fieldEscape = SemanticAnalyzer.unescapeSQLString 
(rowChild2(0).getText)
                 serdeParams.put(serdeConstants.ESCAPE_CHAR, fieldEscape)
               }
             case Token("TOK_TABLEROWFORMATCOLLITEMS", rowChild :: Nil) =>
-              val collItemDelim = 
BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText)
+              val collItemDelim = 
SemanticAnalyzer.unescapeSQLString(rowChild.getText)
               serdeParams.put(serdeConstants.COLLECTION_DELIM, collItemDelim)
             case Token("TOK_TABLEROWFORMATMAPKEYS", rowChild :: Nil) =>
-              val mapKeyDelim = 
BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText)
+              val mapKeyDelim = 
SemanticAnalyzer.unescapeSQLString(rowChild.getText)
               serdeParams.put(serdeConstants.MAPKEY_DELIM, mapKeyDelim)
             case Token("TOK_TABLEROWFORMATLINES", rowChild :: Nil) =>
-              val lineDelim = 
BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText)
+              val lineDelim = 
SemanticAnalyzer.unescapeSQLString(rowChild.getText)
               if (!(lineDelim == "\n") && !(lineDelim == "10")) {
                 throw new AnalysisException(
                   SemanticAnalyzer.generateErrorMessage(
@@ -796,22 +798,22 @@ 
https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
               }
               serdeParams.put(serdeConstants.LINE_DELIM, lineDelim)
             case Token("TOK_TABLEROWFORMATNULL", rowChild :: Nil) =>
-              val nullFormat = 
BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText)
+              val nullFormat = 
SemanticAnalyzer.unescapeSQLString(rowChild.getText)
               // TODO support the nullFormat
             case _ => assert(false)
           }
           tableDesc = tableDesc.copy(
             serdeProperties = tableDesc.serdeProperties ++ serdeParams.asScala)
         case Token("TOK_TABLELOCATION", child :: Nil) =>
-          var location = BaseSemanticAnalyzer.unescapeSQLString(child.getText)
-          location = EximUtil.relativeToAbsolutePath(hiveConf, location)
+          var location = SemanticAnalyzer.unescapeSQLString(child.getText)
+          location = SemanticAnalyzer.relativeToAbsolutePath(hiveConf, 
location)
           tableDesc = tableDesc.copy(location = Option(location))
         case Token("TOK_TABLESERIALIZER", child :: Nil) =>
           tableDesc = tableDesc.copy(
-            serde = 
Option(BaseSemanticAnalyzer.unescapeSQLString(child.getChild(0).getText)))
+            serde = 
Option(SemanticAnalyzer.unescapeSQLString(child.getChild(0).getText)))
           if (child.getChildCount == 2) {
             val serdeParams = new java.util.HashMap[String, String]()
-            BaseSemanticAnalyzer.readProps(
+            SemanticAnalyzer.readProps(
               (child.getChild(1).getChild(0)).asInstanceOf[ASTNode], 
serdeParams)
             tableDesc = tableDesc.copy(
               serdeProperties = tableDesc.serdeProperties ++ 
serdeParams.asScala)
@@ -891,9 +893,9 @@ 
https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
         case list @ Token("TOK_TABLEFILEFORMAT", children) =>
           tableDesc = tableDesc.copy(
             inputFormat =
-              
Option(BaseSemanticAnalyzer.unescapeSQLString(list.getChild(0).getText)),
+              
Option(SemanticAnalyzer.unescapeSQLString(list.getChild(0).getText)),
             outputFormat =
-              
Option(BaseSemanticAnalyzer.unescapeSQLString(list.getChild(1).getText)))
+              
Option(SemanticAnalyzer.unescapeSQLString(list.getChild(1).getText)))
         case Token("TOK_STORAGEHANDLER", _) =>
           throw new AnalysisException(ErrorMsg.CREATE_NON_NATIVE_AS.getMsg())
         case _ => // Unsupport features
@@ -909,24 +911,20 @@ 
https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
           Token("TOK_TABLE_PARTITION", table) :: Nil) => NativePlaceholder
 
     case Token("TOK_QUERY", queryArgs)
-        if Seq("TOK_FROM", "TOK_INSERT").contains(queryArgs.head.getText) =>
+        if Seq("TOK_CTE", "TOK_FROM", 
"TOK_INSERT").contains(queryArgs.head.getText) =>
 
       val (fromClause: Option[ASTNode], insertClauses, cteRelations) =
         queryArgs match {
-          case Token("TOK_FROM", args: Seq[ASTNode]) :: insertClauses =>
-            // check if has CTE
-            insertClauses.last match {
-              case Token("TOK_CTE", cteClauses) =>
-                val cteRelations = cteClauses.map(node => {
-                  val relation = nodeToRelation(node, 
context).asInstanceOf[Subquery]
-                  (relation.alias, relation)
-                }).toMap
-                (Some(args.head), insertClauses.init, Some(cteRelations))
-
-              case _ => (Some(args.head), insertClauses, None)
+          case Token("TOK_CTE", ctes) :: Token("TOK_FROM", from) :: inserts =>
+            val cteRelations = ctes.map { node =>
+              val relation = nodeToRelation(node, 
context).asInstanceOf[Subquery]
+              relation.alias -> relation
             }
-
-          case Token("TOK_INSERT", _) :: Nil => (None, queryArgs, None)
+            (Some(from.head), inserts, Some(cteRelations.toMap))
+          case Token("TOK_FROM", from) :: inserts =>
+            (Some(from.head), inserts, None)
+          case Token("TOK_INSERT", _) :: Nil =>
+            (None, queryArgs, None)
         }
 
       // Return one query for each insert clause.
@@ -1025,20 +1023,20 @@ 
https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
                 (rowFormat, None, Nil, false)
 
               case Token("TOK_SERDENAME", Token(serdeClass, Nil) :: Nil) :: 
Nil =>
-                (Nil, 
Some(BaseSemanticAnalyzer.unescapeSQLString(serdeClass)), Nil, false)
+                (Nil, Some(SemanticAnalyzer.unescapeSQLString(serdeClass)), 
Nil, false)
 
               case Token("TOK_SERDENAME", Token(serdeClass, Nil) ::
                 Token("TOK_TABLEPROPERTIES",
                 Token("TOK_TABLEPROPLIST", propsClause) :: Nil) :: Nil) :: Nil 
=>
                 val serdeProps = propsClause.map {
                   case Token("TOK_TABLEPROPERTY", Token(name, Nil) :: 
Token(value, Nil) :: Nil) =>
-                    (BaseSemanticAnalyzer.unescapeSQLString(name),
-                      BaseSemanticAnalyzer.unescapeSQLString(value))
+                    (SemanticAnalyzer.unescapeSQLString(name),
+                      SemanticAnalyzer.unescapeSQLString(value))
                 }
 
                 // SPARK-10310: Special cases LazySimpleSerDe
                 // TODO Fully supports user-defined record reader/writer 
classes
-                val unescapedSerDeClass = 
BaseSemanticAnalyzer.unescapeSQLString(serdeClass)
+                val unescapedSerDeClass = 
SemanticAnalyzer.unescapeSQLString(serdeClass)
                 val useDefaultRecordReaderWriter =
                   unescapedSerDeClass == 
classOf[LazySimpleSerDe].getCanonicalName
                 (Nil, Some(unescapedSerDeClass), serdeProps, 
useDefaultRecordReaderWriter)
@@ -1055,7 +1053,7 @@ 
https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
             val (outRowFormat, outSerdeClass, outSerdeProps, 
useDefaultRecordWriter) =
               matchSerDe(outputSerdeClause)
 
-            val unescapedScript = 
BaseSemanticAnalyzer.unescapeSQLString(script)
+            val unescapedScript = SemanticAnalyzer.unescapeSQLString(script)
 
             // TODO Adds support for user-defined record reader/writer classes
             val recordReaderClass = if (useDefaultRecordReader) {
@@ -1361,6 +1359,7 @@ 
https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
         case "TOK_LEFTOUTERJOIN" => LeftOuter
         case "TOK_FULLOUTERJOIN" => FullOuter
         case "TOK_LEFTSEMIJOIN" => LeftSemi
+        case "TOK_ANTIJOIN" => throw new NotImplementedError("Anti join not 
supported")
       }
       Join(nodeToRelation(relation1, context),
         nodeToRelation(relation2, context),
@@ -1475,11 +1474,11 @@ 
https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
   }
 
   val numericAstTypes = Seq(
-    HiveParser.Number,
-    HiveParser.TinyintLiteral,
-    HiveParser.SmallintLiteral,
-    HiveParser.BigintLiteral,
-    HiveParser.DecimalLiteral)
+    SparkSqlParser.Number,
+    SparkSqlParser.TinyintLiteral,
+    SparkSqlParser.SmallintLiteral,
+    SparkSqlParser.BigintLiteral,
+    SparkSqlParser.DecimalLiteral)
 
   /* Case insensitive matches */
   val COUNT = "(?i)COUNT".r
@@ -1649,7 +1648,7 @@ 
https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
     case Token(TRUE(), Nil) => Literal.create(true, BooleanType)
     case Token(FALSE(), Nil) => Literal.create(false, BooleanType)
     case Token("TOK_STRINGLITERALSEQUENCE", strings) =>
-      Literal(strings.map(s => 
BaseSemanticAnalyzer.unescapeSQLString(s.getText)).mkString)
+      Literal(strings.map(s => 
SemanticAnalyzer.unescapeSQLString(s.getText)).mkString)
 
     // This code is adapted from
     // 
/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java#L223
@@ -1684,37 +1683,37 @@ 
https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C
         v
       }
 
-    case ast: ASTNode if ast.getType == HiveParser.StringLiteral =>
-      Literal(BaseSemanticAnalyzer.unescapeSQLString(ast.getText))
+    case ast: ASTNode if ast.getType == SparkSqlParser.StringLiteral =>
+      Literal(SemanticAnalyzer.unescapeSQLString(ast.getText))
 
-    case ast: ASTNode if ast.getType == HiveParser.TOK_DATELITERAL =>
+    case ast: ASTNode if ast.getType == SparkSqlParser.TOK_DATELITERAL =>
       Literal(Date.valueOf(ast.getText.substring(1, ast.getText.length - 1)))
 
-    case ast: ASTNode if ast.getType == HiveParser.TOK_CHARSETLITERAL =>
-      Literal(BaseSemanticAnalyzer.charSetString(ast.getChild(0).getText, 
ast.getChild(1).getText))
+    case ast: ASTNode if ast.getType == SparkSqlParser.TOK_CHARSETLITERAL =>
+      Literal(SemanticAnalyzer.charSetString(ast.getChild(0).getText, 
ast.getChild(1).getText))
 
-    case ast: ASTNode if ast.getType == 
HiveParser.TOK_INTERVAL_YEAR_MONTH_LITERAL =>
+    case ast: ASTNode if ast.getType == 
SparkSqlParser.TOK_INTERVAL_YEAR_MONTH_LITERAL =>
       Literal(CalendarInterval.fromYearMonthString(ast.getText))
 
-    case ast: ASTNode if ast.getType == 
HiveParser.TOK_INTERVAL_DAY_TIME_LITERAL =>
+    case ast: ASTNode if ast.getType == 
SparkSqlParser.TOK_INTERVAL_DAY_TIME_LITERAL =>
       Literal(CalendarInterval.fromDayTimeString(ast.getText))
 
-    case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_YEAR_LITERAL =>
+    case ast: ASTNode if ast.getType == 
SparkSqlParser.TOK_INTERVAL_YEAR_LITERAL =>
       Literal(CalendarInterval.fromSingleUnitString("year", ast.getText))
 
-    case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_MONTH_LITERAL 
=>
+    case ast: ASTNode if ast.getType == 
SparkSqlParser.TOK_INTERVAL_MONTH_LITERAL =>
       Literal(CalendarInterval.fromSingleUnitString("month", ast.getText))
 
-    case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_DAY_LITERAL =>
+    case ast: ASTNode if ast.getType == 
SparkSqlParser.TOK_INTERVAL_DAY_LITERAL =>
       Literal(CalendarInterval.fromSingleUnitString("day", ast.getText))
 
-    case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_HOUR_LITERAL =>
+    case ast: ASTNode if ast.getType == 
SparkSqlParser.TOK_INTERVAL_HOUR_LITERAL =>
       Literal(CalendarInterval.fromSingleUnitString("hour", ast.getText))
 
-    case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_MINUTE_LITERAL 
=>
+    case ast: ASTNode if ast.getType == 
SparkSqlParser.TOK_INTERVAL_MINUTE_LITERAL =>
       Literal(CalendarInterval.fromSingleUnitString("minute", ast.getText))
 
-    case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_SECOND_LITERAL 
=>
+    case ast: ASTNode if ast.getType == 
SparkSqlParser.TOK_INTERVAL_SECOND_LITERAL =>
       Literal(CalendarInterval.fromSingleUnitString("second", ast.getText))
 
     case a: ASTNode =>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[1/3] spark git commit: [SPARK-12362][SQL][WIP] Inline Hive Parser

Reply via email to