Repository: spark Updated Branches: refs/heads/branch-2.0 da5d2300e -> 199bac8fa
[SPARK-15312][SQL] Detect Duplicate Key in Partition Spec and Table Properties #### What changes were proposed in this pull request? When there are duplicate keys in the partition specs or table properties, we always use the last value and ignore all the previous values. This is caused by the function call `toMap`. partition specs or table properties are widely used in multiple DDL statements. This PR is to detect the duplicates and issue an exception if found. #### How was this patch tested? Added test cases in DDLSuite Author: gatorsmile <gatorsm...@gmail.com> Closes #13095 from gatorsmile/detectDuplicate. (cherry picked from commit a11175eecacd4a57325dab29fff9ebfad819f22f) Signed-off-by: Wenchen Fan <wenc...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/199bac8f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/199bac8f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/199bac8f Branch: refs/heads/branch-2.0 Commit: 199bac8fa4dfec54292fe1ba9141edb87c8ee09f Parents: da5d230 Author: gatorsmile <gatorsm...@gmail.com> Authored: Sat May 21 23:56:10 2016 -0700 Committer: Wenchen Fan <wenc...@databricks.com> Committed: Sat May 21 23:56:18 2016 -0700 ---------------------------------------------------------------------- .../spark/sql/catalyst/parser/AstBuilder.scala | 15 ++++++--------- .../spark/sql/catalyst/parser/ParserUtils.scala | 7 +++++++ .../spark/sql/catalyst/parser/PlanParserSuite.scala | 2 +- .../apache/spark/sql/execution/SparkSqlParser.scala | 7 +++++-- .../sql/execution/command/DDLCommandSuite.scala | 15 +++++++++++++++ 5 files changed, 34 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/199bac8f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 2d7d0f9..cace026 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -125,14 +125,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { val namedQuery = visitNamedQuery(nCtx) (namedQuery.alias, namedQuery) } - // Check for duplicate names. - ctes.groupBy(_._1).filter(_._2.size > 1).foreach { - case (name, _) => - throw new ParseException( - s"Name '$name' is used for multiple common table expressions", ctx) - } - + checkDuplicateKeys(ctes, ctx) With(query, ctes.toMap) } } @@ -220,11 +214,14 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging { */ override def visitPartitionSpec( ctx: PartitionSpecContext): Map[String, Option[String]] = withOrigin(ctx) { - ctx.partitionVal.asScala.map { pVal => + val parts = ctx.partitionVal.asScala.map { pVal => val name = pVal.identifier.getText.toLowerCase val value = Option(pVal.constant).map(visitStringConstant) name -> value - }.toMap + } + // Check for duplicate partition columns in one spec. + checkDuplicateKeys(parts, ctx) + parts.toMap } /** http://git-wip-us.apache.org/repos/asf/spark/blob/199bac8f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala index 58e2bdb..9619884 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala @@ -43,6 +43,13 @@ object ParserUtils { new ParseException(s"Operation not allowed: $message", ctx) } + /** Check if duplicate keys exist in a set of key-value pairs. */ + def checkDuplicateKeys[T](keyPairs: Seq[(String, T)], ctx: ParserRuleContext): Unit = { + keyPairs.groupBy(_._1).filter(_._2.size > 1).foreach { case (key, _) => + throw new ParseException(s"Found duplicate keys '$key'.", ctx) + } + } + /** Get the code that creates the given node. */ def source(ctx: ParserRuleContext): String = { val stream = ctx.getStart.getInputStream http://git-wip-us.apache.org/repos/asf/spark/blob/199bac8f/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 25d87d9..5811d32 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -108,7 +108,7 @@ class PlanParserSuite extends PlanTest { "cte2" -> table("cte1").select(star()))) intercept( "with cte1 (select 1), cte1 as (select 1 from cte1) select * from cte1", - "Name 'cte1' is used for multiple common table expressions") + "Found duplicate keys 'cte1'") } test("simple select query") { http://git-wip-us.apache.org/repos/asf/spark/blob/199bac8f/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 2e3ac97..c517b8b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -387,11 +387,14 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder { */ override def visitTablePropertyList( ctx: TablePropertyListContext): Map[String, String] = withOrigin(ctx) { - ctx.tableProperty.asScala.map { property => + val properties = ctx.tableProperty.asScala.map { property => val key = visitTablePropertyKey(property.key) val value = Option(property.value).map(string).orNull key -> value - }.toMap + } + // Check for duplicate property names. + checkDuplicateKeys(properties, ctx) + properties.toMap } /** http://git-wip-us.apache.org/repos/asf/spark/blob/199bac8f/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala index 708b878..54f98a6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala @@ -665,6 +665,21 @@ class DDLCommandSuite extends PlanTest { assert(parsed.isInstanceOf[Project]) } + test("duplicate keys in table properties") { + val e = intercept[ParseException] { + parser.parsePlan("ALTER TABLE dbx.tab1 SET TBLPROPERTIES ('key1' = '1', 'key1' = '2')") + }.getMessage + assert(e.contains("Found duplicate keys 'key1'")) + } + + test("duplicate columns in partition specs") { + val e = intercept[ParseException] { + parser.parsePlan( + "ALTER TABLE dbx.tab1 PARTITION (a='1', a='2') RENAME TO PARTITION (a='100', a='200')") + }.getMessage + assert(e.contains("Found duplicate keys 'a'")) + } + test("drop table") { val tableName1 = "db.tab" val tableName2 = "tab" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org