[1/2] spark git commit: [SPARK-12575][SQL] Grammar parity with existing SQL parser

rxin Fri, 15 Jan 2016 15:20:06 -0800

Repository: spark
Updated Branches:
  refs/heads/master 3f1c58d60 -> 7cd7f2202



http://git-wip-us.apache.org/repos/asf/spark/blob/7cd7f220/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala
index b22f424..313ba18 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/ExtendedHiveQlParser.scala
@@ -19,14 +19,23 @@ package org.apache.spark.sql.hive
 
 import scala.language.implicitConversions
 
+import org.apache.spark.sql.catalyst.{AbstractSparkSQLParser, TableIdentifier}
+import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.AbstractSparkSQLParser
 import org.apache.spark.sql.hive.execution.{AddFile, AddJar, HiveNativeCommand}
 
 /**
  * A parser that recognizes all HiveQL constructs together with Spark SQL 
specific extensions.
  */
-private[hive] class ExtendedHiveQlParser extends AbstractSparkSQLParser {
+private[hive] class ExtendedHiveQlParser(sqlContext: HiveContext) extends 
AbstractSparkSQLParser {
+
+  val parser = new HiveQl(sqlContext.conf)
+
+  override def parseExpression(sql: String): Expression = 
parser.parseExpression(sql)
+
+  override def parseTableIdentifier(sql: String): TableIdentifier =
+    parser.parseTableIdentifier(sql)
+
   // Keyword is a convention with AbstractSparkSQLParser, which will scan all 
of the `Keyword`
   // properties via reflection the class in runtime for constructing the 
SqlLexical object
   protected val ADD = Keyword("ADD")
@@ -38,7 +47,10 @@ private[hive] class ExtendedHiveQlParser extends 
AbstractSparkSQLParser {
 
   protected lazy val hiveQl: Parser[LogicalPlan] =
     restInput ^^ {
-      case statement => HiveQl.parsePlan(statement.trim)
+      case statement =>
+        sqlContext.executionHive.withHiveState {
+          parser.parsePlan(statement.trim)
+        }
     }
 
   protected lazy val dfs: Parser[LogicalPlan] =

http://git-wip-us.apache.org/repos/asf/spark/blob/7cd7f220/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index cbaf006..7bdca52 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -42,7 +42,7 @@ import org.apache.spark.api.java.JavaSparkContext
 import org.apache.spark.sql._
 import org.apache.spark.sql.SQLConf.SQLConfEntry
 import org.apache.spark.sql.SQLConf.SQLConfEntry._
-import org.apache.spark.sql.catalyst.{InternalRow, ParserDialect, SqlParser}
+import org.apache.spark.sql.catalyst.{InternalRow, ParserDialect}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions.{Expression, LeafExpression}
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
@@ -57,17 +57,6 @@ import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
 
 /**
- * This is the HiveQL Dialect, this dialect is strongly bind with HiveContext
- */
-private[hive] class HiveQLDialect(sqlContext: HiveContext) extends 
ParserDialect {
-  override def parse(sqlText: String): LogicalPlan = {
-    sqlContext.executionHive.withHiveState {
-      HiveQl.parseSql(sqlText)
-    }
-  }
-}
-
-/**
  * Returns the current database of metadataHive.
  */
 private[hive] case class CurrentDatabase(ctx: HiveContext)
@@ -342,12 +331,12 @@ class HiveContext private[hive](
    * @since 1.3.0
    */
   def refreshTable(tableName: String): Unit = {
-    val tableIdent = SqlParser.parseTableIdentifier(tableName)
+    val tableIdent = sqlParser.parseTableIdentifier(tableName)
     catalog.refreshTable(tableIdent)
   }
 
   protected[hive] def invalidateTable(tableName: String): Unit = {
-    val tableIdent = SqlParser.parseTableIdentifier(tableName)
+    val tableIdent = sqlParser.parseTableIdentifier(tableName)
     catalog.invalidateTable(tableIdent)
   }
 
@@ -361,7 +350,7 @@ class HiveContext private[hive](
    * @since 1.2.0
    */
   def analyze(tableName: String) {
-    val tableIdent = SqlParser.parseTableIdentifier(tableName)
+    val tableIdent = sqlParser.parseTableIdentifier(tableName)
     val relation = EliminateSubQueries(catalog.lookupRelation(tableIdent))
 
     relation match {
@@ -559,7 +548,7 @@ class HiveContext private[hive](
 
   protected[sql] override def getSQLDialect(): ParserDialect = {
     if (conf.dialect == "hiveql") {
-      new HiveQLDialect(this)
+      new ExtendedHiveQlParser(this)
     } else {
       super.getSQLDialect()
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/7cd7f220/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index daaa5a5..3d54048 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -416,8 +416,8 @@ private[hive] class HiveMetastoreCatalog(val client: 
ClientInterface, hive: Hive
       alias match {
         // because hive use things like `_c0` to build the expanded text
         // currently we cannot support view from "create view v1(c1) as ..."
-        case None => Subquery(table.name, HiveQl.parsePlan(viewText))
-        case Some(aliasText) => Subquery(aliasText, HiveQl.parsePlan(viewText))
+        case None => Subquery(table.name, hive.parseSql(viewText))
+        case Some(aliasText) => Subquery(aliasText, hive.parseSql(viewText))
       }
     } else {
       MetastoreRelation(qualifiedTableName.database, qualifiedTableName.name, 
alias)(table)(hive)

http://git-wip-us.apache.org/repos/asf/spark/blob/7cd7f220/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index ca9ddf9..46246f8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -79,7 +79,7 @@ private[hive] case class CreateViewAsSelect(
 }
 
 /** Provides a mapping from HiveQL statements to catalyst logical plans and 
expression trees. */
-private[hive] object HiveQl extends SparkQl with Logging {
+private[hive] class HiveQl(conf: ParserConf) extends SparkQl(conf) with 
Logging {
   protected val nativeCommands = Seq(
     "TOK_ALTERDATABASE_OWNER",
     "TOK_ALTERDATABASE_PROPERTIES",
@@ -168,8 +168,6 @@ private[hive] object HiveQl extends SparkQl with Logging {
     "TOK_TRUNCATETABLE"     // truncate table" is a NativeCommand, does not 
need to explain.
   ) ++ nativeCommands
 
-  protected val hqlParser = new ExtendedHiveQlParser
-
   /**
    * Returns the HiveConf
    */
@@ -186,9 +184,6 @@ private[hive] object HiveQl extends SparkQl with Logging {
     ss.getConf
   }
 
-  /** Returns a LogicalPlan for a given HiveQL string. */
-  def parseSql(sql: String): LogicalPlan = hqlParser.parse(sql)
-
   protected def getProperties(node: ASTNode): Seq[(String, String)] = node 
match {
     case Token("TOK_TABLEPROPLIST", list) =>
       list.map {

http://git-wip-us.apache.org/repos/asf/spark/blob/7cd7f220/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala
index 53d15c1..137dadd 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveQlSuite.scala
@@ -23,12 +23,15 @@ import org.scalatest.BeforeAndAfterAll
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.JsonTuple
+import org.apache.spark.sql.catalyst.parser.SimpleParserConf
 import org.apache.spark.sql.catalyst.plans.logical.Generate
 import org.apache.spark.sql.hive.client.{ExternalTable, HiveColumn, HiveTable, 
ManagedTable}
 
 class HiveQlSuite extends SparkFunSuite with BeforeAndAfterAll {
+  val parser = new HiveQl(SimpleParserConf())
+
   private def extractTableDesc(sql: String): (HiveTable, Boolean) = {
-    HiveQl.parsePlan(sql).collect {
+    parser.parsePlan(sql).collect {
       case CreateTableAsSelect(desc, child, allowExisting) => (desc, 
allowExisting)
     }.head
   }
@@ -173,7 +176,7 @@ class HiveQlSuite extends SparkFunSuite with 
BeforeAndAfterAll {
   test("Invalid interval term should throw AnalysisException") {
     def assertError(sql: String, errorMessage: String): Unit = {
       val e = intercept[AnalysisException] {
-        HiveQl.parseSql(sql)
+        parser.parsePlan(sql)
       }
       assert(e.getMessage.contains(errorMessage))
     }
@@ -186,7 +189,7 @@ class HiveQlSuite extends SparkFunSuite with 
BeforeAndAfterAll {
   }
 
   test("use native json_tuple instead of hive's UDTF in LATERAL VIEW") {
-    val plan = HiveQl.parseSql(
+    val plan = parser.parsePlan(
       """
         |SELECT *
         |FROM (SELECT '{"f1": "value1", "f2": 12}' json) test

http://git-wip-us.apache.org/repos/asf/spark/blob/7cd7f220/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 78f74cd..91bedf9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -21,6 +21,7 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.sql.{QueryTest, Row, SQLConf}
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.parser.SimpleParserConf
 import org.apache.spark.sql.execution.joins._
 import org.apache.spark.sql.hive.execution._
 import org.apache.spark.sql.hive.test.TestHiveSingleton
@@ -28,9 +29,11 @@ import org.apache.spark.sql.hive.test.TestHiveSingleton
 class StatisticsSuite extends QueryTest with TestHiveSingleton {
   import hiveContext.sql
 
+  val parser = new HiveQl(SimpleParserConf())
+
   test("parse analyze commands") {
     def assertAnalyzeCommand(analyzeCommand: String, c: Class[_]) {
-      val parsed = HiveQl.parseSql(analyzeCommand)
+      val parsed = parser.parsePlan(analyzeCommand)
       val operators = parsed.collect {
         case a: AnalyzeTable => a
         case o => o

http://git-wip-us.apache.org/repos/asf/spark/blob/7cd7f220/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index f6c687a..61d5aa7 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -22,12 +22,14 @@ import java.sql.{Date, Timestamp}
 import scala.collection.JavaConverters._
 
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.{DefaultParserDialect, TableIdentifier}
+import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{EliminateSubQueries, 
FunctionRegistry}
 import org.apache.spark.sql.catalyst.errors.DialectException
+import org.apache.spark.sql.catalyst.parser.ParserConf
+import org.apache.spark.sql.execution.SparkQl
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.datasources.parquet.ParquetRelation
-import org.apache.spark.sql.hive.{HiveContext, HiveQLDialect, 
MetastoreRelation}
+import org.apache.spark.sql.hive.{ExtendedHiveQlParser, HiveContext, HiveQl, 
MetastoreRelation}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
@@ -56,7 +58,7 @@ case class WindowData(
     area: String,
     product: Int)
 /** A SQL Dialect for testing purpose, and it can not be nested type */
-class MyDialect extends DefaultParserDialect
+class MyDialect(conf: ParserConf) extends HiveQl(conf)
 
 /**
  * A collection of hive query tests where we generate the answers ourselves 
instead of depending on
@@ -339,20 +341,20 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils 
with TestHiveSingleton {
     val hiveContext = new HiveContext(sqlContext.sparkContext)
     val dialectConf = "spark.sql.dialect"
     checkAnswer(hiveContext.sql(s"set $dialectConf"), Row(dialectConf, 
"hiveql"))
-    assert(hiveContext.getSQLDialect().getClass === classOf[HiveQLDialect])
+    assert(hiveContext.getSQLDialect().getClass === 
classOf[ExtendedHiveQlParser])
   }
 
   test("SQL Dialect Switching") {
-    assert(getSQLDialect().getClass === classOf[HiveQLDialect])
+    assert(getSQLDialect().getClass === classOf[ExtendedHiveQlParser])
     setConf("spark.sql.dialect", classOf[MyDialect].getCanonicalName())
     assert(getSQLDialect().getClass === classOf[MyDialect])
     assert(sql("SELECT 1").collect() === Array(Row(1)))
 
     // set the dialect back to the DefaultSQLDialect
     sql("SET spark.sql.dialect=sql")
-    assert(getSQLDialect().getClass === classOf[DefaultParserDialect])
+    assert(getSQLDialect().getClass === classOf[SparkQl])
     sql("SET spark.sql.dialect=hiveql")
-    assert(getSQLDialect().getClass === classOf[HiveQLDialect])
+    assert(getSQLDialect().getClass === classOf[ExtendedHiveQlParser])
 
     // set invalid dialect
     sql("SET spark.sql.dialect.abc=MyTestClass")
@@ -361,14 +363,14 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils 
with TestHiveSingleton {
       sql("SELECT 1")
     }
     // test if the dialect set back to HiveQLDialect
-    getSQLDialect().getClass === classOf[HiveQLDialect]
+    getSQLDialect().getClass === classOf[ExtendedHiveQlParser]
 
     sql("SET spark.sql.dialect=MyTestClass")
     intercept[DialectException] {
       sql("SELECT 1")
     }
     // test if the dialect set back to HiveQLDialect
-    assert(getSQLDialect().getClass === classOf[HiveQLDialect])
+    assert(getSQLDialect().getClass === classOf[ExtendedHiveQlParser])
   }
 
   test("CTAS with serde") {

http://git-wip-us.apache.org/repos/asf/spark/blob/7cd7f220/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
----------------------------------------------------------------------
diff --git 
a/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java 
b/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
index 30e1758..62edf6c 100644
--- a/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
+++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/CalendarInterval.java
@@ -188,6 +188,11 @@ public final class CalendarInterval implements 
Serializable {
             Integer.MIN_VALUE, Integer.MAX_VALUE);
           result = new CalendarInterval(month, 0L);
 
+        } else if (unit.equals("week")) {
+          long week = toLongWithRange("week", m.group(1),
+                  Long.MIN_VALUE / MICROS_PER_WEEK, Long.MAX_VALUE / 
MICROS_PER_WEEK);
+          result = new CalendarInterval(0, week * MICROS_PER_WEEK);
+
         } else if (unit.equals("day")) {
           long day = toLongWithRange("day", m.group(1),
             Long.MIN_VALUE / MICROS_PER_DAY, Long.MAX_VALUE / MICROS_PER_DAY);
@@ -206,6 +211,15 @@ public final class CalendarInterval implements 
Serializable {
         } else if (unit.equals("second")) {
           long micros = parseSecondNano(m.group(1));
           result = new CalendarInterval(0, micros);
+
+        } else if (unit.equals("millisecond")) {
+          long millisecond = toLongWithRange("millisecond", m.group(1),
+                  Long.MIN_VALUE / MICROS_PER_MILLI, Long.MAX_VALUE / 
MICROS_PER_MILLI);
+          result = new CalendarInterval(0, millisecond * MICROS_PER_MILLI);
+
+        } else if (unit.equals("microsecond")) {
+          long micros = Long.valueOf(m.group(1));
+          result = new CalendarInterval(0, micros);
         }
       } catch (Exception e) {
         throw new IllegalArgumentException("Error parsing interval string: " + 
e.getMessage(), e);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[1/2] spark git commit: [SPARK-12575][SQL] Grammar parity with existing SQL parser

Reply via email to