spark git commit: [SPARK-21132][SQL] DISTINCT modifier of function arguments should not be silently ignored

wenchen Mon, 19 Jun 2017 00:52:32 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-2.2 d3c79b77a -> fab070ca4



[SPARK-21132][SQL] DISTINCT modifier of function arguments should not be 
silently ignored

### What changes were proposed in this pull request?
We should not silently ignore `DISTINCT` when they are not supported in the 
function arguments. This PR is to block these cases and issue the error 
messages.

### How was this patch tested?
Added test cases for both regular functions and window functions

Author: Xiao Li <gatorsm...@gmail.com>

Closes #18340 from gatorsmile/firstCount.

(cherry picked from commit 9413b84b5a99e264816c61f72905b392c2f9cd35)
Signed-off-by: Wenchen Fan <wenc...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fab070ca
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fab070ca
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fab070ca

Branch: refs/heads/branch-2.2
Commit: fab070ca4048fbf51ae511dce80a51322f657c27
Parents: d3c79b7
Author: Xiao Li <gatorsm...@gmail.com>
Authored: Mon Jun 19 15:51:21 2017 +0800
Committer: Wenchen Fan <wenc...@databricks.com>
Committed: Mon Jun 19 15:51:35 2017 +0800

----------------------------------------------------------------------
 .../spark/sql/catalyst/analysis/Analyzer.scala       | 14 ++++++++++++--
 .../sql/catalyst/analysis/AnalysisErrorSuite.scala   | 15 +++++++++++++--
 .../spark/sql/catalyst/analysis/AnalysisTest.scala   |  8 ++++++--
 3 files changed, 31 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/fab070ca/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 9979642..3e416b3 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1189,11 +1189,21 @@ class Analyzer(
                 // AggregateWindowFunctions are AggregateFunctions that can 
only be evaluated within
                 // the context of a Window clause. They do not need to be 
wrapped in an
                 // AggregateExpression.
-                case wf: AggregateWindowFunction => wf
+                case wf: AggregateWindowFunction =>
+                  if (isDistinct) {
+                    failAnalysis(s"${wf.prettyName} does not support the 
modifier DISTINCT")
+                  } else {
+                    wf
+                  }
                 // We get an aggregate function, we need to wrap it in an 
AggregateExpression.
                 case agg: AggregateFunction => AggregateExpression(agg, 
Complete, isDistinct)
                 // This function is not an aggregate function, just return the 
resolved one.
-                case other => other
+                case other =>
+                  if (isDistinct) {
+                    failAnalysis(s"${other.prettyName} does not support the 
modifier DISTINCT")
+                  } else {
+                    other
+                  }
               }
             }
         }

http://git-wip-us.apache.org/repos/asf/spark/blob/fab070ca/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index d2ebca5..5050318 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -24,7 +24,8 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
 import 
org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, 
Complete, Count, Max}
-import org.apache.spark.sql.catalyst.plans.{Cross, Inner, LeftOuter, 
RightOuter}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.plans.{Cross, LeftOuter, RightOuter}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, 
GenericArrayData, MapData}
 import org.apache.spark.sql.types._
@@ -152,7 +153,7 @@ class AnalysisErrorSuite extends AnalysisTest {
     "not supported within a window function" :: Nil)
 
   errorTest(
-    "distinct window function",
+    "distinct aggregate function in window",
     testRelation2.select(
       WindowExpression(
         AggregateExpression(Count(UnresolvedAttribute("b")), Complete, 
isDistinct = true),
@@ -163,6 +164,16 @@ class AnalysisErrorSuite extends AnalysisTest {
     "Distinct window functions are not supported" :: Nil)
 
   errorTest(
+    "distinct function",
+    CatalystSqlParser.parsePlan("SELECT hex(DISTINCT a) FROM TaBlE"),
+    "hex does not support the modifier DISTINCT" :: Nil)
+
+  errorTest(
+    "distinct window function",
+    CatalystSqlParser.parsePlan("SELECT percent_rank(DISTINCT a) over () FROM 
TaBlE"),
+    "percent_rank does not support the modifier DISTINCT" :: Nil)
+
+  errorTest(
     "nested aggregate functions",
     testRelation.groupBy('a)(
       AggregateExpression(

http://git-wip-us.apache.org/repos/asf/spark/blob/fab070ca/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
index 82015b1..08d9313 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
@@ -17,10 +17,11 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import java.net.URI
 import java.util.Locale
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
+import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, 
InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.internal.SQLConf
@@ -32,7 +33,10 @@ trait AnalysisTest extends PlanTest {
 
   private def makeAnalyzer(caseSensitive: Boolean): Analyzer = {
     val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive)
-    val catalog = new SessionCatalog(new InMemoryCatalog, 
EmptyFunctionRegistry, conf)
+    val catalog = new SessionCatalog(new InMemoryCatalog, 
FunctionRegistry.builtin, conf)
+    catalog.createDatabase(
+      CatalogDatabase("default", "", new URI("loc"), Map.empty),
+      ignoreIfExists = false)
     catalog.createTempView("TaBlE", TestRelations.testRelation, 
overrideIfExists = true)
     catalog.createTempView("TaBlE2", TestRelations.testRelation2, 
overrideIfExists = true)
     new Analyzer(catalog, conf) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-21132][SQL] DISTINCT modifier of function arguments should not be silently ignored

Reply via email to