Repository: spark
Updated Branches:
  refs/heads/master d37c7f7f0 -> 1c19c2769


[SPARK-15047][SQL] Cleanup SQL Parser

## What changes were proposed in this pull request?
This PR addresses a few minor issues in SQL parser:

- Removes some unused rules and keywords in the grammar.
- Removes code path for fallback SQL parsing (was needed for Hive native 
parsing).
- Use `UnresolvedGenerator` instead of hard-coding `Explode` & `JsonTuple`.
- Adds a more generic way of creating error messages for unsupported Hive 
features.
- Use `visitFunctionName` as much as possible.
- Interpret a `CatalogColumn`'s `DataType` directly instead of parsing it again.

## How was this patch tested?
Existing tests.

Author: Herman van Hovell <hvanhov...@questtec.nl>

Closes #12826 from hvanhovell/SPARK-15047.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1c19c276
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1c19c276
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1c19c276

Branch: refs/heads/master
Commit: 1c19c2769edecaefabc2cd67b3b32f901feb3f59
Parents: d37c7f7
Author: Herman van Hovell <hvanhov...@questtec.nl>
Authored: Mon May 2 18:12:31 2016 -0700
Committer: Reynold Xin <r...@databricks.com>
Committed: Mon May 2 18:12:31 2016 -0700

----------------------------------------------------------------------
 .../apache/spark/sql/catalyst/parser/SqlBase.g4 | 35 +++-----------------
 .../spark/sql/catalyst/parser/AstBuilder.scala  | 31 +++--------------
 .../spark/sql/catalyst/parser/ParseDriver.scala | 10 ++----
 .../sql/catalyst/parser/PlanParserSuite.scala   | 15 +++++----
 .../spark/sql/execution/SparkSqlParser.scala    | 12 ++++---
 .../spark/sql/hive/HiveDDLCommandSuite.scala    |  5 +--
 6 files changed, 31 insertions(+), 77 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/1c19c276/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 4d5d125..cc4e5c8 100644
--- 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -121,17 +121,13 @@ statement
     | UNCACHE TABLE identifier                                         
#uncacheTable
     | CLEAR CACHE                                                      
#clearCache
     | LOAD DATA LOCAL? INPATH path=STRING OVERWRITE? INTO TABLE
-      tableIdentifier partitionSpec?                                   
#loadData
+        tableIdentifier partitionSpec?                                 
#loadData
+    | TRUNCATE TABLE tableIdentifier partitionSpec?
+        (COLUMNS identifierList)?                                      
#truncateTable
     | ADD identifier .*?                                               
#addResource
     | SET ROLE .*?                                                     
#failNativeCommand
     | SET .*?                                                          
#setConfiguration
-    | kws=unsupportedHiveNativeCommands .*?                            
#failNativeCommand
-    | hiveNativeCommands                                               
#executeNativeCommand
-    ;
-
-hiveNativeCommands
-    : TRUNCATE TABLE tableIdentifier partitionSpec?
-        (COLUMNS identifierList)?
+    | unsupportedHiveNativeCommands .*?                                
#failNativeCommand
     ;
 
 unsupportedHiveNativeCommands
@@ -267,14 +263,6 @@ nestedConstantList
     : '(' constantList (',' constantList)* ')'
     ;
 
-skewedLocation
-    : (constant | constantList) EQ STRING
-    ;
-
-skewedLocationList
-    : '(' skewedLocation (',' skewedLocation)* ')'
-    ;
-
 createFileFormat
     : STORED AS fileFormat
     | STORED BY storageHandler
@@ -609,11 +597,6 @@ explainOption
     : LOGICAL | FORMATTED | EXTENDED | CODEGEN
     ;
 
-transactionMode
-    : ISOLATION LEVEL SNAPSHOT            #isolationLevel
-    | READ accessMode=(ONLY | WRITE)      #transactionAccessMode
-    ;
-
 qualifiedName
     : identifier ('.' identifier)*
     ;
@@ -661,8 +644,7 @@ nonReserved
     | VIEW | REPLACE
     | IF
     | NO | DATA
-    | START | TRANSACTION | COMMIT | ROLLBACK | WORK | ISOLATION | LEVEL
-    | SNAPSHOT | READ | WRITE | ONLY
+    | START | TRANSACTION | COMMIT | ROLLBACK
     | SORT | CLUSTER | DISTRIBUTE | UNSET | TBLPROPERTIES | SKEWED | STORED | 
DIRECTORIES | LOCATION
     | EXCHANGE | ARCHIVE | UNARCHIVE | FILEFORMAT | TOUCH | COMPACT | 
CONCATENATE | CHANGE | FIRST
     | AFTER | CASCADE | RESTRICT | BUCKETS | CLUSTERED | SORTED | PURGE | 
INPUTFORMAT | OUTPUTFORMAT
@@ -778,13 +760,6 @@ START: 'START';
 TRANSACTION: 'TRANSACTION';
 COMMIT: 'COMMIT';
 ROLLBACK: 'ROLLBACK';
-WORK: 'WORK';
-ISOLATION: 'ISOLATION';
-LEVEL: 'LEVEL';
-SNAPSHOT: 'SNAPSHOT';
-READ: 'READ';
-WRITE: 'WRITE';
-ONLY: 'ONLY';
 MACRO: 'MACRO';
 
 IF: 'IF';

http://git-wip-us.apache.org/repos/asf/spark/blob/1c19c276/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 1f923f4..c397462 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -82,25 +82,13 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
Logging {
   protected def plan(tree: ParserRuleContext): LogicalPlan = typedVisit(tree)
 
   /**
-   * Make sure we do not try to create a plan for a native command.
-   */
-  override def visitExecuteNativeCommand(ctx: ExecuteNativeCommandContext): 
LogicalPlan = null
-
-  /**
    * Create a plan for a SHOW FUNCTIONS command.
    */
   override def visitShowFunctions(ctx: ShowFunctionsContext): LogicalPlan = 
withOrigin(ctx) {
     import ctx._
     if (qualifiedName != null) {
-      val names = qualifiedName().identifier().asScala.map(_.getText).toList
-      names match {
-        case db :: name :: Nil =>
-          ShowFunctions(Some(db), Some(name))
-        case name :: Nil =>
-          ShowFunctions(None, Some(name))
-        case _ =>
-          throw new ParseException("SHOW FUNCTIONS unsupported name", ctx)
-      }
+      val name = visitFunctionName(qualifiedName)
+      ShowFunctions(name.database, Some(name.funcName))
     } else if (pattern != null) {
       ShowFunctions(None, Some(string(pattern)))
     } else {
@@ -117,7 +105,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
Logging {
       if (describeFuncName.STRING() != null) {
         string(describeFuncName.STRING())
       } else if (describeFuncName.qualifiedName() != null) {
-        
describeFuncName.qualifiedName().identifier().asScala.map(_.getText).mkString(".")
+        visitFunctionName(describeFuncName.qualifiedName).unquotedString
       } else {
         describeFuncName.getText
       }
@@ -554,19 +542,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
Logging {
       query: LogicalPlan,
       ctx: LateralViewContext): LogicalPlan = withOrigin(ctx) {
     val expressions = expressionList(ctx.expression)
-
-    // Create the generator.
-    val generator = ctx.qualifiedName.getText.toLowerCase match {
-      case "explode" if expressions.size == 1 =>
-        Explode(expressions.head)
-      case "json_tuple" =>
-        JsonTuple(expressions)
-      case name =>
-        UnresolvedGenerator(visitFunctionName(ctx.qualifiedName), expressions)
-    }
-
     Generate(
-      generator,
+      UnresolvedGenerator(visitFunctionName(ctx.qualifiedName), expressions),
       join = true,
       outer = ctx.OUTER != null,
       Some(ctx.tblName.getText.toLowerCase),

http://git-wip-us.apache.org/repos/asf/spark/blob/1c19c276/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index d013252..d042e19 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -53,19 +53,15 @@ abstract class AbstractSqlParser extends ParserInterface 
with Logging {
   override def parsePlan(sqlText: String): LogicalPlan = parse(sqlText) { 
parser =>
     astBuilder.visitSingleStatement(parser.singleStatement()) match {
       case plan: LogicalPlan => plan
-      case _ => nativeCommand(sqlText)
+      case _ =>
+        val position = Origin(None, None)
+        throw new ParseException(Option(sqlText), "Unsupported SQL statement", 
position, position)
     }
   }
 
   /** Get the builder (visitor) which converts a ParseTree into a AST. */
   protected def astBuilder: AstBuilder
 
-  /** Create a native command, or fail when this is not supported. */
-  protected def nativeCommand(sqlText: String): LogicalPlan = {
-    val position = Origin(None, None)
-    throw new ParseException(Option(sqlText), "Unsupported SQL statement", 
position, position)
-  }
-
   protected def parse[T](command: String)(toResult: SqlBaseParser => T): T = {
     logInfo(s"Parsing command: $command")
 

http://git-wip-us.apache.org/repos/asf/spark/blob/1c19c276/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 5e896a3..b7af2ce 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -53,7 +53,7 @@ class PlanParserSuite extends PlanTest {
     assertEqual("show functions foo", ShowFunctions(None, Some("foo")))
     assertEqual("show functions foo.bar", ShowFunctions(Some("foo"), 
Some("bar")))
     assertEqual("show functions 'foo\\\\.*'", ShowFunctions(None, 
Some("foo\\.*")))
-    intercept("show functions foo.bar.baz", "SHOW FUNCTIONS unsupported name")
+    intercept("show functions foo.bar.baz", "Unsupported function name")
   }
 
   test("describe function") {
@@ -263,11 +263,14 @@ class PlanParserSuite extends PlanTest {
   }
 
   test("lateral view") {
+    val explode = UnresolvedGenerator(FunctionIdentifier("explode"), Seq('x))
+    val jsonTuple = UnresolvedGenerator(FunctionIdentifier("json_tuple"), 
Seq('x, 'y))
+
     // Single lateral view
     assertEqual(
       "select * from t lateral view explode(x) expl as x",
       table("t")
-        .generate(Explode('x), join = true, outer = false, Some("expl"), 
Seq("x"))
+        .generate(explode, join = true, outer = false, Some("expl"), Seq("x"))
         .select(star()))
 
     // Multiple lateral views
@@ -277,12 +280,12 @@ class PlanParserSuite extends PlanTest {
         |lateral view explode(x) expl
         |lateral view outer json_tuple(x, y) jtup q, z""".stripMargin,
       table("t")
-        .generate(Explode('x), join = true, outer = false, Some("expl"), 
Seq.empty)
-        .generate(JsonTuple(Seq('x, 'y)), join = true, outer = true, 
Some("jtup"), Seq("q", "z"))
+        .generate(explode, join = true, outer = false, Some("expl"), Seq.empty)
+        .generate(jsonTuple, join = true, outer = true, Some("jtup"), Seq("q", 
"z"))
         .select(star()))
 
     // Multi-Insert lateral views.
-    val from = table("t1").generate(Explode('x), join = true, outer = false, 
Some("expl"), Seq("x"))
+    val from = table("t1").generate(explode, join = true, outer = false, 
Some("expl"), Seq("x"))
     assertEqual(
       """from t1
         |lateral view explode(x) expl as x
@@ -294,7 +297,7 @@ class PlanParserSuite extends PlanTest {
         |where s < 10
       """.stripMargin,
       Union(from
-        .generate(JsonTuple(Seq('x, 'y)), join = true, outer = false, 
Some("jtup"), Seq("q", "z"))
+        .generate(jsonTuple, join = true, outer = false, Some("jtup"), 
Seq("q", "z"))
         .select(star())
         .insertInto("t2"),
         from.where('s < 10).select(star()).insertInto("t3")))

http://git-wip-us.apache.org/repos/asf/spark/blob/1c19c276/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 8128a6e..dfc56a7 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -21,6 +21,7 @@ import scala.collection.JavaConverters._
 import scala.util.Try
 
 import org.antlr.v4.runtime.{ParserRuleContext, Token}
+import org.antlr.v4.runtime.tree.TerminalNode
 
 import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.catalyst.TableIdentifier
@@ -31,7 +32,7 @@ import 
org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation,
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf, VariableSubstitution}
-
+import org.apache.spark.sql.types.DataType
 
 /**
  * Concrete parser for Spark SQL statements.
@@ -780,9 +781,10 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder 
{
    */
   override def visitFailNativeCommand(
     ctx: FailNativeCommandContext): LogicalPlan = withOrigin(ctx) {
-    val keywords = if (ctx.kws != null) {
-      Seq(ctx.kws.kw1, ctx.kws.kw2, ctx.kws.kw3, ctx.kws.kw4, ctx.kws.kw5, 
ctx.kws.kw6)
-        .filter(_ != null).map(_.getText).mkString(" ")
+    val keywords = if (ctx.unsupportedHiveNativeCommands != null) {
+      ctx.unsupportedHiveNativeCommands.children.asScala.collect {
+        case n: TerminalNode => n.getText
+      }.mkString(" ")
     } else {
       // SET ROLE is the exception to the rule, because we handle this before 
other SET commands.
       "SET ROLE"
@@ -1109,7 +1111,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends 
AstBuilder {
         // just convert the whole type string to lower case, otherwise the 
struct field names
         // will no longer be case sensitive. Instead, we rely on our parser to 
get the proper
         // case before passing it to Hive.
-        CatalystSqlParser.parseDataType(col.dataType.getText).catalogString,
+        typedVisit[DataType](col.dataType).catalogString,
         nullable = true,
         Option(col.STRING).map(string))
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/1c19c276/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
index c97c28c..8dc3c64 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
@@ -254,12 +254,13 @@ class HiveDDLCommandSuite extends PlanTest {
   }
 
   test("use native json_tuple instead of hive's UDTF in LATERAL VIEW") {
-    val plan = parser.parsePlan(
+    val analyzer = TestHive.sparkSession.sessionState.analyzer
+    val plan = analyzer.execute(parser.parsePlan(
       """
         |SELECT *
         |FROM (SELECT '{"f1": "value1", "f2": 12}' json) test
         |LATERAL VIEW json_tuple(json, 'f1', 'f2') jt AS a, b
-      """.stripMargin)
+      """.stripMargin))
 
     
assert(plan.children.head.asInstanceOf[Generate].generator.isInstanceOf[JsonTuple])
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to