spark git commit: [SPARK-15312][SQL] Detect Duplicate Key in Partition Spec and Table Properties

wenchen Sat, 21 May 2016 23:57:13 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-2.0 da5d2300e -> 199bac8fa



[SPARK-15312][SQL] Detect Duplicate Key in Partition Spec and Table Properties

#### What changes were proposed in this pull request?
When there are duplicate keys in the partition specs or table properties, we 
always use the last value and ignore all the previous values. This is caused by 
the function call `toMap`.

partition specs or table properties are widely used in multiple DDL statements.

This PR is to detect the duplicates and issue an exception if found.

#### How was this patch tested?
Added test cases in DDLSuite

Author: gatorsmile <gatorsm...@gmail.com>

Closes #13095 from gatorsmile/detectDuplicate.

(cherry picked from commit a11175eecacd4a57325dab29fff9ebfad819f22f)
Signed-off-by: Wenchen Fan <wenc...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/199bac8f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/199bac8f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/199bac8f

Branch: refs/heads/branch-2.0
Commit: 199bac8fa4dfec54292fe1ba9141edb87c8ee09f
Parents: da5d230
Author: gatorsmile <gatorsm...@gmail.com>
Authored: Sat May 21 23:56:10 2016 -0700
Committer: Wenchen Fan <wenc...@databricks.com>
Committed: Sat May 21 23:56:18 2016 -0700

----------------------------------------------------------------------
 .../spark/sql/catalyst/parser/AstBuilder.scala       | 15 ++++++---------
 .../spark/sql/catalyst/parser/ParserUtils.scala      |  7 +++++++
 .../spark/sql/catalyst/parser/PlanParserSuite.scala  |  2 +-
 .../apache/spark/sql/execution/SparkSqlParser.scala  |  7 +++++--
 .../sql/execution/command/DDLCommandSuite.scala      | 15 +++++++++++++++
 5 files changed, 34 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/199bac8f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 2d7d0f9..cace026 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -125,14 +125,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
Logging {
           val namedQuery = visitNamedQuery(nCtx)
           (namedQuery.alias, namedQuery)
       }
-
       // Check for duplicate names.
-      ctes.groupBy(_._1).filter(_._2.size > 1).foreach {
-        case (name, _) =>
-          throw new ParseException(
-            s"Name '$name' is used for multiple common table expressions", ctx)
-      }
-
+      checkDuplicateKeys(ctes, ctx)
       With(query, ctes.toMap)
     }
   }
@@ -220,11 +214,14 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
Logging {
    */
   override def visitPartitionSpec(
       ctx: PartitionSpecContext): Map[String, Option[String]] = 
withOrigin(ctx) {
-    ctx.partitionVal.asScala.map { pVal =>
+    val parts = ctx.partitionVal.asScala.map { pVal =>
       val name = pVal.identifier.getText.toLowerCase
       val value = Option(pVal.constant).map(visitStringConstant)
       name -> value
-    }.toMap
+    }
+    // Check for duplicate partition columns in one spec.
+    checkDuplicateKeys(parts, ctx)
+    parts.toMap
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/199bac8f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
index 58e2bdb..9619884 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
@@ -43,6 +43,13 @@ object ParserUtils {
     new ParseException(s"Operation not allowed: $message", ctx)
   }
 
+  /** Check if duplicate keys exist in a set of key-value pairs. */
+  def checkDuplicateKeys[T](keyPairs: Seq[(String, T)], ctx: 
ParserRuleContext): Unit = {
+    keyPairs.groupBy(_._1).filter(_._2.size > 1).foreach { case (key, _) =>
+      throw new ParseException(s"Found duplicate keys '$key'.", ctx)
+    }
+  }
+
   /** Get the code that creates the given node. */
   def source(ctx: ParserRuleContext): String = {
     val stream = ctx.getStart.getInputStream

http://git-wip-us.apache.org/repos/asf/spark/blob/199bac8f/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 25d87d9..5811d32 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -108,7 +108,7 @@ class PlanParserSuite extends PlanTest {
         "cte2" -> table("cte1").select(star())))
     intercept(
       "with cte1 (select 1), cte1 as (select 1 from cte1) select * from cte1",
-      "Name 'cte1' is used for multiple common table expressions")
+      "Found duplicate keys 'cte1'")
   }
 
   test("simple select query") {

http://git-wip-us.apache.org/repos/asf/spark/blob/199bac8f/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 2e3ac97..c517b8b 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -387,11 +387,14 @@ class SparkSqlAstBuilder(conf: SQLConf) extends 
AstBuilder {
    */
   override def visitTablePropertyList(
       ctx: TablePropertyListContext): Map[String, String] = withOrigin(ctx) {
-    ctx.tableProperty.asScala.map { property =>
+    val properties = ctx.tableProperty.asScala.map { property =>
       val key = visitTablePropertyKey(property.key)
       val value = Option(property.value).map(string).orNull
       key -> value
-    }.toMap
+    }
+    // Check for duplicate property names.
+    checkDuplicateKeys(properties, ctx)
+    properties.toMap
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/199bac8f/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
index 708b878..54f98a6 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
@@ -665,6 +665,21 @@ class DDLCommandSuite extends PlanTest {
     assert(parsed.isInstanceOf[Project])
   }
 
+  test("duplicate keys in table properties") {
+    val e = intercept[ParseException] {
+      parser.parsePlan("ALTER TABLE dbx.tab1 SET TBLPROPERTIES ('key1' = '1', 
'key1' = '2')")
+    }.getMessage
+    assert(e.contains("Found duplicate keys 'key1'"))
+  }
+
+  test("duplicate columns in partition specs") {
+    val e = intercept[ParseException] {
+      parser.parsePlan(
+        "ALTER TABLE dbx.tab1 PARTITION (a='1', a='2') RENAME TO PARTITION 
(a='100', a='200')")
+    }.getMessage
+    assert(e.contains("Found duplicate keys 'a'"))
+  }
+
   test("drop table") {
     val tableName1 = "db.tab"
     val tableName2 = "tab"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-15312][SQL] Detect Duplicate Key in Partition Spec and Table Properties

Reply via email to