Github user cloud-fan commented on a diff in the pull request:
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala ---
    @@ -0,0 +1,969 @@
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.spark.sql.catalyst
    +import java.sql.Date
    +import org.apache.spark.sql.AnalysisException
    +import org.apache.spark.sql.catalyst.analysis._
    +import org.apache.spark.sql.catalyst.expressions._
    +import org.apache.spark.sql.catalyst.expressions.aggregate.Count
    +import org.apache.spark.sql.catalyst.plans._
    +import org.apache.spark.sql.catalyst.plans.logical._
    +import org.apache.spark.sql.catalyst.trees.CurrentOrigin
    +import org.apache.spark.sql.catalyst.parser._
    +import org.apache.spark.sql.types._
    +import org.apache.spark.unsafe.types.CalendarInterval
    +import org.apache.spark.util.random.RandomSampler
    + * This class translates a HQL String to a Catalyst [[LogicalPlan]] or 
    + */
    +private[sql] class CatalystQl(val conf: ParserConf = SimpleParserConf()) {
    +  object Token {
    +    def unapply(node: ASTNode): Some[(String, List[ASTNode])] = {
    +      CurrentOrigin.setPosition(node.line, node.positionInLine)
    +      node.pattern
    +    }
    +  }
    +  // TODO improve the parse error - so we don't need this anymore.
    +  val errorRegEx = "line (\\d+):(\\d+) (.*)".r
    +  /**
    +   * Returns the AST for the given SQL string.
    +   */
    +  protected def getAst(sql: String): ASTNode = ParseDriver.parse(sql, conf)
    +  /** Creates LogicalPlan for a given HiveQL string. */
    +  def createPlan(sql: String): LogicalPlan = {
    +    try {
    +      createPlan(sql, ParseDriver.parse(sql, conf))
    +    } catch {
    +      case pe: ParseException =>
    +        pe.getMessage match {
    +          case errorRegEx(line, start, message) =>
    +            throw new AnalysisException(message, Some(line.toInt), 
    +          case otherMessage =>
    +            throw new AnalysisException(otherMessage)
    +        }
    +      case e: MatchError => throw e
    +      case e: Exception =>
    +        throw new AnalysisException(e.getMessage)
    +      case e: NotImplementedError =>
    +        throw new AnalysisException(
    +          s"""
    +             |Unsupported language features in query: $sql
    +             |${getAst(sql).treeString}
    +             |$e
    +             |${e.getStackTrace.head}
    +          """.stripMargin)
    +    }
    +  }
    +  protected def createPlan(sql: String, tree: ASTNode): LogicalPlan = 
    +  def parseDdl(ddl: String): Seq[Attribute] = {
    +    val tree =
    +      try {
    +        getAst(ddl)
    +      } catch {
    +        case pe: ParseException =>
    +          throw new RuntimeException(s"Failed to parse ddl: '$ddl'", pe)
    +      }
    +    assert(tree.text == "TOK_CREATETABLE", "Only CREATE TABLE supported.")
    +    val tableOps = tree.children
    +    val colList = tableOps
    +      .find(_.text == "TOK_TABCOLLIST")
    +      .getOrElse(sys.error("No columnList!"))
    +  }
    +  protected def getClauses(
    +      clauseNames: Seq[String],
    +      nodeList: Seq[ASTNode]): Seq[Option[ASTNode]] = {
    +    var remainingNodes = nodeList
    +    val clauses = { clauseName =>
    +      val (matches, nonMatches) = 
remainingNodes.partition(_.text.toUpperCase == clauseName)
    +      remainingNodes = nonMatches ++ (if (matches.nonEmpty) matches.tail 
else Nil)
    +      matches.headOption
    +    }
    +    if (remainingNodes.nonEmpty) {
    +      sys.error(
    +        s"""Unhandled clauses: 
    +            |You are likely trying to use an unsupported Hive 
    +    }
    +    clauses
    +  }
    +  protected def getClause(clauseName: String, nodeList: Seq[ASTNode]): 
ASTNode =
    +    getClauseOption(clauseName, nodeList).getOrElse(sys.error(
    +      s"Expected clause $clauseName missing from 
    +  protected def getClauseOption(clauseName: String, nodeList: 
Seq[ASTNode]): Option[ASTNode] = {
    +    nodeList.filter { case ast: ASTNode => ast.text == clauseName } match {
    +      case Seq(oneMatch) => Some(oneMatch)
    +      case Seq() => None
    +      case _ => sys.error(s"Found multiple instances of clause 
    +    }
    +  }
    +  protected def nodeToAttribute(node: ASTNode): Attribute = node match {
    +    case Token("TOK_TABCOL", Token(colName, Nil) :: dataType :: Nil) =>
    +      AttributeReference(colName, nodeToDataType(dataType), nullable = 
    +    case _ =>
    +      noParseRule("Attribute", node)
    +  }
    +  protected def nodeToDataType(node: ASTNode): DataType = node match {
    +    case Token("TOK_DECIMAL", precision :: scale :: Nil) =>
    +      DecimalType(precision.text.toInt, scale.text.toInt)
    +    case Token("TOK_DECIMAL", precision :: Nil) =>
    +      DecimalType(precision.text.toInt, 0)
    +    case Token("TOK_DECIMAL", Nil) => DecimalType.USER_DEFAULT
    +    case Token("TOK_BIGINT", Nil) => LongType
    +    case Token("TOK_INT", Nil) => IntegerType
    +    case Token("TOK_TINYINT", Nil) => ByteType
    +    case Token("TOK_SMALLINT", Nil) => ShortType
    +    case Token("TOK_BOOLEAN", Nil) => BooleanType
    +    case Token("TOK_STRING", Nil) => StringType
    +    case Token("TOK_VARCHAR", Token(_, Nil) :: Nil) => StringType
    +    case Token("TOK_FLOAT", Nil) => FloatType
    +    case Token("TOK_DOUBLE", Nil) => DoubleType
    +    case Token("TOK_DATE", Nil) => DateType
    +    case Token("TOK_TIMESTAMP", Nil) => TimestampType
    +    case Token("TOK_BINARY", Nil) => BinaryType
    +    case Token("TOK_LIST", elementType :: Nil) => 
    +    case Token("TOK_STRUCT", Token("TOK_TABCOLLIST", fields) :: Nil) =>
    +      StructType(
    +    case Token("TOK_MAP", keyType :: valueType :: Nil) =>
    +      MapType(nodeToDataType(keyType), nodeToDataType(valueType))
    +    case _ =>
    +      noParseRule("DataType", node)
    +  }
    +  protected def nodeToStructField(node: ASTNode): StructField = node match 
    +    case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: Nil) =>
    +      StructField(fieldName, nodeToDataType(dataType), nullable = true)
    +    case Token("TOK_TABCOL", Token(fieldName, Nil) :: dataType :: _ /* 
comment */:: Nil) =>
    +      StructField(fieldName, nodeToDataType(dataType), nullable = true)
    +    case _ =>
    +      noParseRule("StructField", node)
    +  }
    +  protected def extractTableIdent(tableNameParts: ASTNode): 
TableIdentifier = {
    + {
    +      case Token(part, Nil) => cleanIdentifier(part)
    +    } match {
    +      case Seq(tableOnly) => TableIdentifier(tableOnly)
    +      case Seq(databaseName, table) => TableIdentifier(table, 
    +      case other => sys.error("Hive only supports tables names like 
'tableName' " +
    +        s"or 'databaseName.tableName', found '$other'")
    +    }
    +  }
    +  /**
    +   * SELECT MAX(value) FROM src GROUP BY k1, k2, k3 GROUPING SETS((k1, 
k2), (k2))
    +   * is equivalent to
    +   * SELECT MAX(value) FROM src GROUP BY k1, k2 UNION SELECT MAX(value) 
    +   * Check the following link for details.
    +   *
    +   *
    +   * The bitmask denotes the grouping expressions validity for a grouping 
    +   * the bitmask also be called as grouping id (`GROUPING__ID`, the 
virtual column in Hive)
    +   * e.g. In superset (k1, k2, k3), (bit 0: k1, bit 1: k2, and bit 2: k3), 
the grouping id of
    +   * GROUPING SETS (k1, k2) and (k2) should be 3 and 2 respectively.
    +   */
    +  protected def extractGroupingSet(children: Seq[ASTNode]): 
(Seq[Expression], Seq[Int]) = {
    +    val (keyASTs, setASTs) = children.partition {
    +      case Token("TOK_GROUPING_SETS_EXPRESSION", _) => false // grouping 
    +      case _ => true // grouping keys
    +    }
    +    val keys =
    +    val keyMap = keyASTs.zipWithIndex.toMap
    +    val bitmasks: Seq[Int] = {
    +      case Token("TOK_GROUPING_SETS_EXPRESSION", null) => 0
    +      case Token("TOK_GROUPING_SETS_EXPRESSION", columns) =>
    +        columns.foldLeft(0)((bitmap, col) => {
    +          val keyIndex = keyMap.find(_._1.treeEquals(col)).map(_._2)
    +          bitmap | 1 << keyIndex.getOrElse(
    +            throw new AnalysisException(s"${col.treeString} doesn't show 
up in the GROUP BY list"))
    +        })
    +      case _ => sys.error("Expect GROUPING SETS clause")
    +    }
    +    (keys, bitmasks)
    +  }
    +  protected def nodeToPlan(node: ASTNode): LogicalPlan = node match {
    +    case Token("TOK_QUERY", queryArgs @ Token("TOK_CTE" | "TOK_FROM" | 
"TOK_INSERT", _) :: _) =>
    +      val (fromClause: Option[ASTNode], insertClauses, cteRelations) =
    +        queryArgs match {
    +          case Token("TOK_CTE", ctes) :: Token("TOK_FROM", from) :: 
inserts =>
    +            val cteRelations = { node =>
    +              val relation = nodeToRelation(node).asInstanceOf[Subquery]
    +              relation.alias -> relation
    +            }
    +            (Some(from.head), inserts, Some(cteRelations.toMap))
    +          case Token("TOK_FROM", from) :: inserts =>
    +            (Some(from.head), inserts, None)
    +          case Token("TOK_INSERT", _) :: Nil =>
    +            (None, queryArgs, None)
    +        }
    +      // Return one query for each insert clause.
    +      val queries = {
    +        case Token("TOK_INSERT", singleInsert) =>
    +          val (
    +            intoClause ::
    +              destClause ::
    +              selectClause ::
    +              selectDistinctClause ::
    +              whereClause ::
    +              groupByClause ::
    +              rollupGroupByClause ::
    +              cubeGroupByClause ::
    +              groupingSetsClause ::
    +              orderByClause ::
    +              havingClause ::
    +              sortByClause ::
    +              clusterByClause ::
    +              distributeByClause ::
    +              limitClause ::
    +              lateralViewClause ::
    +              windowClause :: Nil) = {
    +            getClauses(
    +              Seq(
    +                "TOK_INSERT_INTO",
    +                "TOK_DESTINATION",
    +                "TOK_SELECT",
    +                "TOK_SELECTDI",
    +                "TOK_WHERE",
    +                "TOK_GROUPBY",
    +                "TOK_ROLLUP_GROUPBY",
    +                "TOK_CUBE_GROUPBY",
    +                "TOK_GROUPING_SETS",
    +                "TOK_ORDERBY",
    +                "TOK_HAVING",
    +                "TOK_SORTBY",
    +                "TOK_CLUSTERBY",
    +                "TOK_DISTRIBUTEBY",
    +                "TOK_LIMIT",
    +                "TOK_LATERAL_VIEW",
    +                "WINDOW"),
    +              singleInsert)
    +          }
    +          val relations = fromClause match {
    +            case Some(f) => nodeToRelation(f)
    +            case None => OneRowRelation
    +          }
    +          val withWhere = { whereNode =>
    +            val Seq(whereExpr) = whereNode.children
    +            Filter(nodeToExpr(whereExpr), relations)
    +          }.getOrElse(relations)
    +          val select = (selectClause orElse selectDistinctClause)
    +            .getOrElse(sys.error("No select clause."))
    +          val transformation = nodeToTransformation(select.children.head, 
    +          val withLateralView = { lv =>
    +            nodeToGenerate(lv.children.head, outer = false, withWhere)
    +          }.getOrElse(withWhere)
    +          // The projection of the query can either be a normal 
projection, an aggregation
    +          // (if there is a group by) or a script transformation.
    +          val withProject: LogicalPlan = transformation.getOrElse {
    +            val selectExpressions =
    +            Seq(
    +     => e match {
    +                case Token("TOK_GROUPBY", children) =>
    +                  // Not a transformation so must be either project or 
    +                  Aggregate(, selectExpressions, 
    +                case _ => sys.error("Expect GROUP BY")
    +              }),
    +     => e match {
    +                case Token("TOK_GROUPING_SETS", children) =>
    +                  val(groupByExprs, masks) = extractGroupingSet(children)
    +                  GroupingSets(masks, groupByExprs, withLateralView, 
    +                case _ => sys.error("Expect GROUPING SETS")
    +              }),
    +     => e match {
    +                case Token("TOK_ROLLUP_GROUPBY", children) =>
    +                  Aggregate(
    +                    Seq(Rollup(,
    +                    selectExpressions,
    +                    withLateralView)
    +                case _ => sys.error("Expect WITH ROLLUP")
    +              }),
    +     => e match {
    +                case Token("TOK_CUBE_GROUPBY", children) =>
    +                  Aggregate(
    +                    Seq(Cube(,
    +                    selectExpressions,
    +                    withLateralView)
    +                case _ => sys.error("Expect WITH CUBE")
    +              }),
    +              Some(Project(selectExpressions, 
    +          }
    +          // Handle HAVING clause.
    +          val withHaving = { h =>
    +            val havingExpr = h.children match { case Seq(hexpr) => 
nodeToExpr(hexpr) }
    +            // Note that we added a cast to boolean. If the expression 
itself is already boolean,
    +            // the optimizer will get rid of the unnecessary cast.
    +            Filter(Cast(havingExpr, BooleanType), withProject)
    +          }.getOrElse(withProject)
    +          // Handle SELECT DISTINCT
    +          val withDistinct =
    +            if (selectDistinctClause.isDefined) Distinct(withHaving) else 
    +          // Handle ORDER BY, SORT BY, DISTRIBUTE BY, and CLUSTER BY 
    +          val withSort =
    +            (orderByClause, sortByClause, distributeByClause, 
clusterByClause) match {
    +              case (Some(totalOrdering), None, None, None) =>
    +                Sort(, global = 
true, withDistinct)
    +              case (None, Some(perPartitionOrdering), None, None) =>
    +                Sort(
    +        ,
    +                  global = false, withDistinct)
    +              case (None, None, Some(partitionExprs), None) =>
    +                RepartitionByExpression(
    +        , withDistinct)
    +              case (None, Some(perPartitionOrdering), 
Some(partitionExprs), None) =>
    +                Sort(
    +        , 
global = false,
    +                  RepartitionByExpression(
    +          ,
    +                    withDistinct))
    +              case (None, None, None, Some(clusterExprs)) =>
    +                Sort(
    +        , 
    +                  global = false,
    +                  RepartitionByExpression(
    +          ,
    +                    withDistinct))
    +              case (None, None, None, None) => withDistinct
    +              case _ => sys.error("Unsupported set of ordering / 
distribution clauses.")
    +            }
    +          val withLimit =
    +   => nodeToExpr(l.children.head))
    +              .map(Limit(_, withSort))
    +              .getOrElse(withSort)
    +          // Collect all window specifications defined in the WINDOW 
    +          val windowDefinitions = {
    +            case Token("TOK_WINDOWDEF",
    +            Token(windowName, Nil) :: Token("TOK_WINDOWSPEC", spec) :: 
Nil) =>
    +              windowName -> nodesToWindowSpecification(spec)
    +          }.toMap)
    +          // Handle cases like
    +          // window w1 as (partition by p_mfgr order by p_name
    +          //               range between 2 preceding and 2 following),
    +          //        w2 as w1
    +          val resolvedCrossReference = {
    +            windowDefMap => {
    +              case (windowName, WindowSpecReference(other)) =>
    +                (windowName, 
    +              case o => o.asInstanceOf[(String, WindowSpecDefinition)]
    +            }
    +          }
    +          val withWindowDefinitions =
    +  , 
    +          // TOK_INSERT_INTO means to add files to the table.
    +          // TOK_DESTINATION means to overwrite the table.
    +          val resultDestination =
    +            (intoClause orElse destClause).getOrElse(sys.error("No 
destination found."))
    +          val overwrite = intoClause.isEmpty
    +          nodeToDest(
    +            resultDestination,
    +            withWindowDefinitions,
    +            overwrite)
    +      }
    +      // If there are multiple INSERTS just UNION them together into on 
    +      val query = queries.reduceLeft(Union)
    +      // return With plan if there is CTE
    +, _)).getOrElse(query)
    +    // HIVE-9039 renamed TOK_UNION => TOK_UNIONALL while adding 
    +    case Token("TOK_UNIONALL", left :: right :: Nil) =>
    +      Union(nodeToPlan(left), nodeToPlan(right))
    +    case _ =>
    +      noParseRule("Plan", node)
    +  }
    +  val allJoinTokens = "(TOK_.*JOIN)".r
    +  val laterViewToken = "TOK_LATERAL_VIEW(.*)".r
    +  protected def nodeToRelation(node: ASTNode): LogicalPlan = {
    +    node match {
    +      case Token("TOK_SUBQUERY", query :: Token(alias, Nil) :: Nil) =>
    +        Subquery(cleanIdentifier(alias), nodeToPlan(query))
    +      case Token(laterViewToken(isOuter), selectClause :: relationClause 
:: Nil) =>
    +        nodeToGenerate(
    +          selectClause,
    +          outer = isOuter.nonEmpty,
    +          nodeToRelation(relationClause))
    +      /* All relations, possibly with aliases or sampling clauses. */
    +      case Token("TOK_TABREF", clauses) =>
    +        // If the last clause is not a token then it's the alias of the 
    +        val (nonAliasClauses, aliasClause) =
    +          if (clauses.last.text.startsWith("TOK")) {
    +            (clauses, None)
    +          } else {
    +            (clauses.dropRight(1), Some(clauses.last))
    +          }
    +        val (Some(tableNameParts) ::
    +          splitSampleClause ::
    +          bucketSampleClause :: Nil) = {
    +          getClauses(Seq("TOK_TABNAME", "TOK_TABLESPLITSAMPLE", 
    +            nonAliasClauses)
    +        }
    +        val tableIdent = extractTableIdent(tableNameParts)
    +        val alias = { case Token(a, Nil) => 
cleanIdentifier(a) }
    +        val relation = UnresolvedRelation(tableIdent, alias)
    +        // Apply sampling if requested.
    +        (bucketSampleClause orElse splitSampleClause).map {
    +          case Token("TOK_TABLESPLITSAMPLE",
    +          Token("TOK_ROWCOUNT", Nil) :: Token(count, Nil) :: Nil) =>
    +            Limit(Literal(count.toInt), relation)
    +          case Token("TOK_TABLESPLITSAMPLE",
    +          Token("TOK_PERCENT", Nil) :: Token(fraction, Nil) :: Nil) =>
    +            // The range of fraction accepted by Sample is [0, 1]. Because 
Hive's block sampling
    +            // function takes X PERCENT as the input and the range of X is 
[0, 100], we need to
    +            // adjust the fraction.
    +            require(
    +              fraction.toDouble >= (0.0 - RandomSampler.roundingEpsilon)
    +                && fraction.toDouble <= (100.0 + 
    +              s"Sampling fraction ($fraction) must be on interval [0, 
    +            Sample(0.0, fraction.toDouble / 100, withReplacement = false,
    +              (math.random * 1000).toInt,
    +              relation)
    +          case Token("TOK_TABLEBUCKETSAMPLE",
    +          Token(numerator, Nil) ::
    +            Token(denominator, Nil) :: Nil) =>
    +            val fraction = numerator.toDouble / denominator.toDouble
    +            Sample(0.0, fraction, withReplacement = false, (math.random * 
1000).toInt, relation)
    +          case a =>
    +            noParseRule("Sampling", a)
    +        }.getOrElse(relation)
    +      case Token(allJoinTokens(joinToken), relation1 :: relation2 :: 
other) =>
    +        if (!(other.size <= 1)) {
    +          sys.error(s"Unsupported join operation: $other")
    +        }
    +        val joinType = joinToken match {
    +          case "TOK_JOIN" => Inner
    +          case "TOK_CROSSJOIN" => Inner
    +          case "TOK_RIGHTOUTERJOIN" => RightOuter
    +          case "TOK_LEFTOUTERJOIN" => LeftOuter
    +          case "TOK_FULLOUTERJOIN" => FullOuter
    +          case "TOK_LEFTSEMIJOIN" => LeftSemi
    +          case "TOK_UNIQUEJOIN" => noParseRule("Unique Join", node)
    +          case "TOK_ANTIJOIN" => noParseRule("Anti Join", node)
    +        }
    +        Join(nodeToRelation(relation1),
    +          nodeToRelation(relation2),
    +          joinType,
    +      case _ =>
    +        noParseRule("Relation", node)
    +    }
    +  }
    +  protected def nodeToSortOrder(node: ASTNode): SortOrder = node match {
    +    case Token("TOK_TABSORTCOLNAMEASC", sortExpr :: Nil) =>
    +      SortOrder(nodeToExpr(sortExpr), Ascending)
    +    case Token("TOK_TABSORTCOLNAMEDESC", sortExpr :: Nil) =>
    +      SortOrder(nodeToExpr(sortExpr), Descending)
    +    case _ =>
    +      noParseRule("SortOrder", node)
    +  }
    +  val destinationToken = "TOK_DESTINATION|TOK_INSERT_INTO".r
    +  protected def nodeToDest(
    +      node: ASTNode,
    +      query: LogicalPlan,
    +      overwrite: Boolean): LogicalPlan = node match {
    +    case Token(destinationToken(),
    +    Token("TOK_DIR",
    +    Token("TOK_TMP_FILE", Nil) :: Nil) :: Nil) =>
    +      query
    +    case Token(destinationToken(),
    +    Token("TOK_TAB",
    +    tableArgs) :: Nil) =>
    +      val Some(tableNameParts) :: partitionClause :: Nil =
    +        getClauses(Seq("TOK_TABNAME", "TOK_PARTSPEC"), tableArgs)
    +      val tableIdent = extractTableIdent(tableNameParts)
    +      val partitionKeys = {
    +        // Parse partitions. We also make keys case insensitive.
    +        case Token("TOK_PARTVAL", Token(key, Nil) :: Token(value, Nil) :: 
Nil) =>
    +          cleanIdentifier(key.toLowerCase) -> Some(unquoteString(value))
    +        case Token("TOK_PARTVAL", Token(key, Nil) :: Nil) =>
    +          cleanIdentifier(key.toLowerCase) -> None
    +      }.toMap).getOrElse(Map.empty)
    +      InsertIntoTable(
    +        UnresolvedRelation(tableIdent, None), partitionKeys, query, 
overwrite, ifNotExists = false)
    +    case Token(destinationToken(),
    +    Token("TOK_TAB",
    +    tableArgs) ::
    +      Token("TOK_IFNOTEXISTS",
    +      ifNotExists) :: Nil) =>
    +      val Some(tableNameParts) :: partitionClause :: Nil =
    +        getClauses(Seq("TOK_TABNAME", "TOK_PARTSPEC"), tableArgs)
    +      val tableIdent = extractTableIdent(tableNameParts)
    +      val partitionKeys = {
    +        // Parse partitions. We also make keys case insensitive.
    +        case Token("TOK_PARTVAL", Token(key, Nil) :: Token(value, Nil) :: 
Nil) =>
    +          cleanIdentifier(key.toLowerCase) -> Some(unquoteString(value))
    +        case Token("TOK_PARTVAL", Token(key, Nil) :: Nil) =>
    +          cleanIdentifier(key.toLowerCase) -> None
    +      }.toMap).getOrElse(Map.empty)
    +      InsertIntoTable(
    +        UnresolvedRelation(tableIdent, None), partitionKeys, query, 
overwrite, ifNotExists = true)
    +    case _ =>
    +      noParseRule("Destination", node)
    +  }
    +  protected def selExprNodeToExpr(node: ASTNode): Option[Expression] = 
node match {
    +    case Token("TOK_SELEXPR", e :: Nil) =>
    +      Some(nodeToExpr(e))
    +    case Token("TOK_SELEXPR", e :: Token(alias, Nil) :: Nil) =>
    +      Some(Alias(nodeToExpr(e), cleanIdentifier(alias))())
    +    case Token("TOK_SELEXPR", e :: aliasChildren) =>
    +      val aliasNames = aliasChildren.collect {
    +        case Token(name, Nil) => cleanIdentifier(name)
    +      }
    +      Some(MultiAlias(nodeToExpr(e), aliasNames))
    +    /* Hints are ignored */
    +    case Token("TOK_HINTLIST", _) => None
    +    case _ =>
    +      noParseRule("Select", node)
    +  }
    +  protected val escapedIdentifier = "`([^`]+)`".r
    +  protected val doubleQuotedString = "\"([^\"]+)\"".r
    +  protected val singleQuotedString = "'([^']+)'".r
    +  protected def unquoteString(str: String) = str match {
    +    case singleQuotedString(s) => s
    +    case doubleQuotedString(s) => s
    +    case other => other
    +  }
    +  /** Strips backticks from ident if present */
    +  protected def cleanIdentifier(ident: String): String = ident match {
    +    case escapedIdentifier(i) => i
    +    case plainIdent => plainIdent
    +  }
    +  val numericAstTypes = Seq(
    +    SparkSqlParser.Number,
    +    SparkSqlParser.TinyintLiteral,
    +    SparkSqlParser.SmallintLiteral,
    +    SparkSqlParser.BigintLiteral,
    +    SparkSqlParser.DecimalLiteral)
    +  /* Case insensitive matches */
    +  val COUNT = "(?i)COUNT".r
    +  val SUM = "(?i)SUM".r
    +  val AND = "(?i)AND".r
    +  val OR = "(?i)OR".r
    +  val NOT = "(?i)NOT".r
    +  val TRUE = "(?i)TRUE".r
    +  val FALSE = "(?i)FALSE".r
    +  val LIKE = "(?i)LIKE".r
    +  val RLIKE = "(?i)RLIKE".r
    +  val REGEXP = "(?i)REGEXP".r
    +  val IN = "(?i)IN".r
    +  val DIV = "(?i)DIV".r
    +  val BETWEEN = "(?i)BETWEEN".r
    +  val WHEN = "(?i)WHEN".r
    +  val CASE = "(?i)CASE".r
    +  protected def nodeToExpr(node: ASTNode): Expression = node match {
    +    /* Attribute References */
    +    case Token("TOK_TABLE_OR_COL", Token(name, Nil) :: Nil) =>
    +      UnresolvedAttribute.quoted(cleanIdentifier(name))
    +    case Token(".", qualifier :: Token(attr, Nil) :: Nil) =>
    +      nodeToExpr(qualifier) match {
    +        case UnresolvedAttribute(nameParts) =>
    +          UnresolvedAttribute(nameParts :+ cleanIdentifier(attr))
    +        case other => UnresolvedExtractValue(other, Literal(attr))
    +      }
    +    /* Stars (*) */
    +    case Token("TOK_ALLCOLREF", Nil) => UnresolvedStar(None)
    +    // The format of dbName.tableName.* cannot be parsed by HiveParser. 
TOK_TABNAME will only
    +    // has a single child which is tableName.
    +    case Token("TOK_ALLCOLREF", Token("TOK_TABNAME", Token(name, Nil) :: 
Nil) :: Nil) =>
    +      UnresolvedStar(Some(UnresolvedAttribute.parseAttributeName(name)))
    +    /* Aggregate Functions */
    +    case Token("TOK_FUNCTIONDI", Token(COUNT(), Nil) :: args) =>
    +      Count( = true)
    +    case Token("TOK_FUNCTIONSTAR", Token(COUNT(), Nil) :: Nil) =>
    +      Count(Literal(1)).toAggregateExpression()
    +    /* Casts */
    +    case Token("TOK_FUNCTION", Token("TOK_STRING", Nil) :: arg :: Nil) =>
    +      Cast(nodeToExpr(arg), StringType)
    +    case Token("TOK_FUNCTION", Token("TOK_VARCHAR", _) :: arg :: Nil) =>
    +      Cast(nodeToExpr(arg), StringType)
    +    case Token("TOK_FUNCTION", Token("TOK_CHAR", _) :: arg :: Nil) =>
    +      Cast(nodeToExpr(arg), StringType)
    +    case Token("TOK_FUNCTION", Token("TOK_INT", Nil) :: arg :: Nil) =>
    +      Cast(nodeToExpr(arg), IntegerType)
    +    case Token("TOK_FUNCTION", Token("TOK_BIGINT", Nil) :: arg :: Nil) =>
    +      Cast(nodeToExpr(arg), LongType)
    +    case Token("TOK_FUNCTION", Token("TOK_FLOAT", Nil) :: arg :: Nil) =>
    +      Cast(nodeToExpr(arg), FloatType)
    +    case Token("TOK_FUNCTION", Token("TOK_DOUBLE", Nil) :: arg :: Nil) =>
    +      Cast(nodeToExpr(arg), DoubleType)
    +    case Token("TOK_FUNCTION", Token("TOK_SMALLINT", Nil) :: arg :: Nil) =>
    +      Cast(nodeToExpr(arg), ShortType)
    +    case Token("TOK_FUNCTION", Token("TOK_TINYINT", Nil) :: arg :: Nil) =>
    +      Cast(nodeToExpr(arg), ByteType)
    +    case Token("TOK_FUNCTION", Token("TOK_BINARY", Nil) :: arg :: Nil) =>
    +      Cast(nodeToExpr(arg), BinaryType)
    +    case Token("TOK_FUNCTION", Token("TOK_BOOLEAN", Nil) :: arg :: Nil) =>
    +      Cast(nodeToExpr(arg), BooleanType)
    +    case Token("TOK_FUNCTION", Token("TOK_DECIMAL", precision :: scale :: 
nil) :: arg :: Nil) =>
    +      Cast(nodeToExpr(arg), DecimalType(precision.text.toInt, 
    +    case Token("TOK_FUNCTION", Token("TOK_DECIMAL", precision :: Nil) :: 
arg :: Nil) =>
    +      Cast(nodeToExpr(arg), DecimalType(precision.text.toInt, 0))
    +    case Token("TOK_FUNCTION", Token("TOK_DECIMAL", Nil) :: arg :: Nil) =>
    +      Cast(nodeToExpr(arg), DecimalType.USER_DEFAULT)
    +    case Token("TOK_FUNCTION", Token("TOK_TIMESTAMP", Nil) :: arg :: Nil) 
    +      Cast(nodeToExpr(arg), TimestampType)
    +    case Token("TOK_FUNCTION", Token("TOK_DATE", Nil) :: arg :: Nil) =>
    +      Cast(nodeToExpr(arg), DateType)
    +    /* Arithmetic */
    +    case Token("+", child :: Nil) => nodeToExpr(child)
    +    case Token("-", child :: Nil) => UnaryMinus(nodeToExpr(child))
    +    case Token("~", child :: Nil) => BitwiseNot(nodeToExpr(child))
    +    case Token("+", left :: right:: Nil) => Add(nodeToExpr(left), 
    +    case Token("-", left :: right:: Nil) => Subtract(nodeToExpr(left), 
    +    case Token("*", left :: right:: Nil) => Multiply(nodeToExpr(left), 
    +    case Token("/", left :: right:: Nil) => Divide(nodeToExpr(left), 
    +    case Token(DIV(), left :: right:: Nil) =>
    +      Cast(Divide(nodeToExpr(left), nodeToExpr(right)), LongType)
    +    case Token("%", left :: right:: Nil) => Remainder(nodeToExpr(left), 
    +    case Token("&", left :: right:: Nil) => BitwiseAnd(nodeToExpr(left), 
    +    case Token("|", left :: right:: Nil) => BitwiseOr(nodeToExpr(left), 
    +    case Token("^", left :: right:: Nil) => BitwiseXor(nodeToExpr(left), 
    +    /* Comparisons */
    +    case Token("=", left :: right:: Nil) => EqualTo(nodeToExpr(left), 
    +    case Token("==", left :: right:: Nil) => EqualTo(nodeToExpr(left), 
    +    case Token("<=>", left :: right:: Nil) => 
EqualNullSafe(nodeToExpr(left), nodeToExpr(right))
    +    case Token("!=", left :: right:: Nil) => Not(EqualTo(nodeToExpr(left), 
    +    case Token("<>", left :: right:: Nil) => Not(EqualTo(nodeToExpr(left), 
    +    case Token(">", left :: right:: Nil) => GreaterThan(nodeToExpr(left), 
    +    case Token(">=", left :: right:: Nil) => 
GreaterThanOrEqual(nodeToExpr(left), nodeToExpr(right))
    +    case Token("<", left :: right:: Nil) => LessThan(nodeToExpr(left), 
    +    case Token("<=", left :: right:: Nil) => 
LessThanOrEqual(nodeToExpr(left), nodeToExpr(right))
    +    case Token(LIKE(), left :: right:: Nil) => Like(nodeToExpr(left), 
    +    case Token(RLIKE(), left :: right:: Nil) => RLike(nodeToExpr(left), 
    +    case Token(REGEXP(), left :: right:: Nil) => RLike(nodeToExpr(left), 
    +    case Token("TOK_FUNCTION", Token("TOK_ISNOTNULL", Nil) :: child :: 
Nil) =>
    +      IsNotNull(nodeToExpr(child))
    +    case Token("TOK_FUNCTION", Token("TOK_ISNULL", Nil) :: child :: Nil) =>
    +      IsNull(nodeToExpr(child))
    +    case Token("TOK_FUNCTION", Token(IN(), Nil) :: value :: list) =>
    +      In(nodeToExpr(value),
    +    case Token("TOK_FUNCTION",
    +    Token(BETWEEN(), Nil) ::
    +      kw ::
    +      target ::
    +      minValue ::
    +      maxValue :: Nil) =>
    +      val targetExpression = nodeToExpr(target)
    +      val betweenExpr =
    +        And(
    +          GreaterThanOrEqual(targetExpression, nodeToExpr(minValue)),
    +          LessThanOrEqual(targetExpression, nodeToExpr(maxValue)))
    +      kw match {
    +        case Token("KW_FALSE", Nil) => betweenExpr
    +        case Token("KW_TRUE", Nil) => Not(betweenExpr)
    +      }
    +    /* Boolean Logic */
    +    case Token(AND(), left :: right:: Nil) => And(nodeToExpr(left), 
    +    case Token(OR(), left :: right:: Nil) => Or(nodeToExpr(left), 
    +    case Token(NOT(), child :: Nil) => Not(nodeToExpr(child))
    +    case Token("!", child :: Nil) => Not(nodeToExpr(child))
    +    /* Case statements */
    +    case Token("TOK_FUNCTION", Token(WHEN(), Nil) :: branches) =>
    +      CaseWhen(
    +    case Token("TOK_FUNCTION", Token(CASE(), Nil) :: branches) =>
    +      val keyExpr = nodeToExpr(branches.head)
    +      CaseKeyWhen(keyExpr, branches.drop(1).map(nodeToExpr))
    +    /* Complex datatype manipulation */
    +    case Token("[", child :: ordinal :: Nil) =>
    +      UnresolvedExtractValue(nodeToExpr(child), nodeToExpr(ordinal))
    +    /* Window Functions */
    +    case Token(text, args :+ Token("TOK_WINDOWSPEC", spec)) =>
    +      val function = nodeToExpr(node.copy(children = node.children.init))
    +      nodesToWindowSpecification(spec) match {
    +        case reference: WindowSpecReference =>
    +          UnresolvedWindowExpression(function, reference)
    +        case definition: WindowSpecDefinition =>
    +          WindowExpression(function, definition)
    +      }
    +    /* UDFs - Must be last otherwise will preempt built in functions */
    +    case Token("TOK_FUNCTION", Token(name, Nil) :: args) =>
    +      UnresolvedFunction(name,, isDistinct = false)
    +    // Aggregate function with DISTINCT keyword.
    +    case Token("TOK_FUNCTIONDI", Token(name, Nil) :: args) =>
    +      UnresolvedFunction(name,, isDistinct = true)
    +    case Token("TOK_FUNCTIONSTAR", Token(name, Nil) :: args) =>
    +      UnresolvedFunction(name, UnresolvedStar(None) :: Nil, isDistinct = 
    +    /* Literals */
    +    case Token("TOK_NULL", Nil) => Literal.create(null, NullType)
    +    case Token(TRUE(), Nil) => Literal.create(true, BooleanType)
    +    case Token(FALSE(), Nil) => Literal.create(false, BooleanType)
    +    case Token("TOK_STRINGLITERALSEQUENCE", strings) =>
    +      Literal( => 
    +    // This code is adapted from
    +    // 
    +    case ast: ASTNode if numericAstTypes contains ast.tokenType =>
    +      var v: Literal = null
    +      try {
    +        if (ast.text.endsWith("L")) {
    +          // Literal bigint.
    +          v = Literal.create(ast.text.substring(0, ast.text.length() - 
1).toLong, LongType)
    +        } else if (ast.text.endsWith("S")) {
    +          // Literal smallint.
    +          v = Literal.create(ast.text.substring(0, ast.text.length() - 
1).toShort, ShortType)
    +        } else if (ast.text.endsWith("Y")) {
    +          // Literal tinyint.
    +          v = Literal.create(ast.text.substring(0, ast.text.length() - 
1).toByte, ByteType)
    +        } else if (ast.text.endsWith("BD") || ast.text.endsWith("D")) {
    +          // Literal decimal
    +          val strVal = ast.text.stripSuffix("D").stripSuffix("B")
    +          v = Literal(Decimal(strVal))
    +        } else {
    +          v = Literal.create(ast.text.toDouble, DoubleType)
    +          v = Literal.create(ast.text.toLong, LongType)
    +          v = Literal.create(ast.text.toInt, IntegerType)
    +        }
    +      } catch {
    +        case nfe: NumberFormatException => // Do nothing
    +      }
    +      if (v == null) {
    +        sys.error(s"Failed to parse number '${ast.text}'.")
    +      } else {
    +        v
    +      }
    +    case ast: ASTNode if ast.tokenType == SparkSqlParser.StringLiteral =>
    +      Literal(ParseUtils.unescapeSQLString(ast.text))
    +    case ast: ASTNode if ast.tokenType == SparkSqlParser.TOK_DATELITERAL =>
    +      Literal(Date.valueOf(ast.text.substring(1, ast.text.length - 1)))
    +    case ast: ASTNode if ast.tokenType == 
    +      Literal(ParseUtils.charSetString(ast.children.head.text, 
    +    case ast: ASTNode if ast.tokenType == 
    +      Literal(CalendarInterval.fromYearMonthString(ast.text))
    +    case ast: ASTNode if ast.tokenType == 
    +      Literal(CalendarInterval.fromDayTimeString(ast.text))
    +    case ast: ASTNode if ast.tokenType == 
    +      Literal(CalendarInterval.fromSingleUnitString("year", ast.text))
    +    case ast: ASTNode if ast.tokenType == 
    +      Literal(CalendarInterval.fromSingleUnitString("month", ast.text))
    +    case ast: ASTNode if ast.tokenType == 
    +      Literal(CalendarInterval.fromSingleUnitString("day", ast.text))
    +    case ast: ASTNode if ast.tokenType == 
    +      Literal(CalendarInterval.fromSingleUnitString("hour", ast.text))
    +    case ast: ASTNode if ast.tokenType == 
    +      Literal(CalendarInterval.fromSingleUnitString("minute", ast.text))
    +    case ast: ASTNode if ast.tokenType == 
    +      Literal(CalendarInterval.fromSingleUnitString("second", ast.text))
    +    case _ =>
    +      noParseRule("Expression", node)
    +  }
    +  /* Case insensitive matches for Window Specification */
    +  val PRECEDING = "(?i)preceding".r
    +  val FOLLOWING = "(?i)following".r
    +  val CURRENT = "(?i)current".r
    +  protected def nodesToWindowSpecification(nodes: Seq[ASTNode]): 
WindowSpec = nodes match {
    +    case Token(windowName, Nil) :: Nil =>
    +      // Refer to a window spec defined in the window clause.
    +      WindowSpecReference(windowName)
    +    case Nil =>
    +      // OVER()
    +      WindowSpecDefinition(
    +        partitionSpec = Nil,
    +        orderSpec = Nil,
    +        frameSpecification = UnspecifiedFrame)
    +    case spec =>
    +      val (partitionClause :: rowFrame :: rangeFrame :: Nil) =
    +        getClauses(
    +          Seq(
    +            "TOK_PARTITIONINGSPEC",
    +            "TOK_WINDOWRANGE",
    +            "TOK_WINDOWVALUES"),
    +          spec)
    +      // Handle Partition By and Order By.
    +      val (partitionSpec, orderSpec) = { 
partitionAndOrdering =>
    +        val (partitionByClause :: orderByClause :: sortByClause :: 
clusterByClause :: Nil) =
    +          getClauses(
    +            Seq("TOK_DISTRIBUTEBY", "TOK_ORDERBY", "TOK_SORTBY", 
    +            partitionAndOrdering.children)
    +        (partitionByClause, orderByClause.orElse(sortByClause), 
clusterByClause) match {
    +          case (Some(partitionByExpr), Some(orderByExpr), None) =>
    +            (,
    +          case (Some(partitionByExpr), None, None) =>
    +            (, Nil)
    +          case (None, Some(orderByExpr), None) =>
    +            (Nil,
    +          case (None, None, Some(clusterByExpr)) =>
    +            val expressions =
    +            (expressions,, Ascending)))
    +          case _ =>
    +            noParseRule("Partition & Ordering", partitionAndOrdering)
    +        }
    +      }.getOrElse {
    +        (Nil, Nil)
    +      }
    +      // Handle Window Frame
    +      val windowFrame =
    +        if (rowFrame.isEmpty && rangeFrame.isEmpty) {
    +          UnspecifiedFrame
    +        } else {
    +          val frameType = => RowFrame).getOrElse(RangeFrame)
    +          def nodeToBoundary(node: ASTNode): FrameBoundary = node match {
    +            case Token(PRECEDING(), Token(count, Nil) :: Nil) =>
    +              if (count.toLowerCase() == "unbounded") {
    +                UnboundedPreceding
    +              } else {
    +                ValuePreceding(count.toInt)
    +              }
    +            case Token(FOLLOWING(), Token(count, Nil) :: Nil) =>
    +              if (count.toLowerCase() == "unbounded") {
    +                UnboundedFollowing
    +              } else {
    +                ValueFollowing(count.toInt)
    +              }
    +            case Token(CURRENT(), Nil) => CurrentRow
    +            case _ =>
    +              noParseRule("Window Frame Boundary", node)
    +          }
    +          rowFrame.orElse(rangeFrame).map { frame =>
    +            frame.children match {
    +              case precedingNode :: followingNode :: Nil =>
    +                SpecifiedWindowFrame(
    +                  frameType,
    +                  nodeToBoundary(precedingNode),
    +                  nodeToBoundary(followingNode))
    +              case precedingNode :: Nil =>
    +                SpecifiedWindowFrame(frameType, 
nodeToBoundary(precedingNode), CurrentRow)
    +              case _ =>
    +                noParseRule("Window Frame", frame)
    +            }
    +          }.getOrElse(sys.error(s"If you see this, please file a bug 
report with your query."))
    +        }
    +      WindowSpecDefinition(partitionSpec, orderSpec, windowFrame)
    +  }
    +  protected def nodeToTransformation(
    +      node: ASTNode,
    +      child: LogicalPlan): Option[ScriptTransformation] = None
    +  protected def nodeToGenerate(node: ASTNode, outer: Boolean, child: 
LogicalPlan): Generate = {
    +    val Token("TOK_SELECT", Token("TOK_SELEXPR", clauses) :: Nil) = node
    +    val alias = getClause("TOK_TABALIAS", clauses).children.head.text
    +    val generator = clauses.head match {
    +      case Token("TOK_FUNCTION", Token(functionName, Nil) :: children) =>
    +        UnresolvedGenerator(functionName,
    --- End diff --
    Does lateral view a hive feature? Should we support it in catalyst?

If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at or file a JIRA ticket
with INFRA.

To unsubscribe, e-mail:
For additional commands, e-mail:

Reply via email to