[spark] branch master updated: [SPARK-33951][SQL] Distinguish the error between filter and distinct

wenchen Sun, 03 Jan 2021 21:44:34 -0800

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new b037930  [SPARK-33951][SQL] Distinguish the error between filter and 
distinct
b037930 is described below

commit b037930952a341f4ed956a8f1839852992feaadc
Author: gengjiaan <gengji...@360.cn>
AuthorDate: Mon Jan 4 05:44:00 2021 +0000

    [SPARK-33951][SQL] Distinguish the error between filter and distinct
    
    ### What changes were proposed in this pull request?
    The error messages for specifying filter and distinct for the aggregate 
function are mixed together and should be separated. This can increase 
readability and ease of use.
    
    ### Why are the changes needed?
    increase readability and ease of use.
    
    ### Does this PR introduce _any_ user-facing change?
    'Yes'.
    
    ### How was this patch tested?
    Jenkins test
    
    Closes #30982 from beliefer/SPARK-33951.
    
    Lead-authored-by: gengjiaan <gengji...@360.cn>
    Co-authored-by: beliefer <belie...@163.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../apache/spark/sql/QueryCompilationErrors.scala  |  9 +----
 .../spark/sql/catalyst/analysis/Analyzer.scala     | 45 +++++++++++++---------
 .../catalyst/analysis/higherOrderFunctions.scala   |  3 +-
 .../sql/catalyst/analysis/AnalysisErrorSuite.scala |  8 ++--
 4 files changed, 35 insertions(+), 30 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
index e4a1f3f..f4c9132 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
@@ -263,13 +263,8 @@ object QueryCompilationErrors {
       s"its class is $classCanonicalName, which is not a generator.")
   }
 
-  def distinctOrFilterOnlyWithAggregateFunctionError(prettyName: String): 
Throwable = {
-    new AnalysisException("DISTINCT or FILTER specified, " +
-      s"but $prettyName is not an aggregate function")
-  }
-
-  def ignoreNullsWithUnsupportedFunctionError(prettyName: String): Throwable = 
{
-    new AnalysisException(s"Function $prettyName does not support IGNORE 
NULLS")
+  def functionWithUnsupportedSyntaxError(prettyName: String, syntax: String): 
Throwable = {
+    new AnalysisException(s"Function $prettyName does not support $syntax")
   }
 
   def nonDeterministicFilterInAggregateError(): Throwable = {
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 5e86368..fdd1cd0 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2120,24 +2120,30 @@ class Analyzer(override val catalogManager: 
CatalogManager)
                 // the context of a Window clause. They do not need to be 
wrapped in an
                 // AggregateExpression.
                 case wf: AggregateWindowFunction =>
-                  if (isDistinct || filter.isDefined) {
-                    throw 
QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError(
-                      wf.prettyName)
+                  if (isDistinct) {
+                    throw 
QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                      wf.prettyName, "DISTINCT")
+                  } else if (filter.isDefined) {
+                    throw 
QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                      wf.prettyName, "FILTER clause")
                   } else if (ignoreNulls) {
                     wf match {
                       case nthValue: NthValue =>
                         nthValue.copy(ignoreNulls = ignoreNulls)
                       case _ =>
-                        throw 
QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(
-                          wf.prettyName)
+                        throw 
QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                          wf.prettyName, "IGNORE NULLS")
                     }
                   } else {
                     wf
                   }
                 case owf: FrameLessOffsetWindowFunction =>
-                  if (isDistinct || filter.isDefined) {
-                    throw 
QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError(
-                      owf.prettyName)
+                  if (isDistinct) {
+                    throw 
QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                      owf.prettyName, "DISTINCT")
+                  } else if (filter.isDefined) {
+                    throw 
QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                      owf.prettyName, "FILTER clause")
                   } else if (ignoreNulls) {
                     owf match {
                       case lead: Lead =>
@@ -2145,8 +2151,8 @@ class Analyzer(override val catalogManager: 
CatalogManager)
                       case lag: Lag =>
                         lag.copy(ignoreNulls = ignoreNulls)
                       case _ =>
-                        throw 
QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(
-                          owf.prettyName)
+                        throw 
QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                          owf.prettyName, "IGNORE NULLS")
                     }
                   } else {
                     owf
@@ -2161,20 +2167,23 @@ class Analyzer(override val catalogManager: 
CatalogManager)
                       case first: First => first.copy(ignoreNulls = 
ignoreNulls)
                       case last: Last => last.copy(ignoreNulls = ignoreNulls)
                       case _ =>
-                        throw 
QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(
-                          agg.prettyName)
+                        throw 
QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                          agg.prettyName, "IGNORE NULLS")
                     }
                     AggregateExpression(aggFunc, Complete, isDistinct, filter)
                   } else {
                     AggregateExpression(agg, Complete, isDistinct, filter)
                   }
                 // This function is not an aggregate function, just return the 
resolved one.
-                case other if (isDistinct || filter.isDefined) =>
-                  throw 
QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError(
-                    other.prettyName)
-                case other if (ignoreNulls) =>
-                  throw 
QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(
-                    other.prettyName)
+                case other if isDistinct =>
+                  throw 
QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                    other.prettyName, "DISTINCT")
+                case other if filter.isDefined =>
+                  throw 
QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                    other.prettyName, "FILTER clause")
+                case other if ignoreNulls =>
+                  throw 
QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                    other.prettyName, "IGNORE NULLS")
                 case e: String2TrimExpression if arguments.size == 2 =>
                   if (trimWarningEnabled.get) {
                     log.warn("Two-parameter TRIM/LTRIM/RTRIM function 
signatures are deprecated." +
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
index 6115b4e..7d74c0d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
@@ -41,7 +41,8 @@ case class ResolveHigherOrderFunctions(catalog: 
SessionCatalog) extends Rule[Log
             filter.foreach(_.failAnalysis("FILTER predicate specified, " +
               s"but ${func.prettyName} is not an aggregate function"))
             if (ignoreNulls) {
-              throw 
QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(func.prettyName)
+              throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                func.prettyName, "IGNORE NULLS")
             }
             func
           case other => other.failAnalysis(
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index ec2a8a4..01d223d 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -184,22 +184,22 @@ class AnalysisErrorSuite extends AnalysisTest {
   errorTest(
     "distinct function",
     CatalystSqlParser.parsePlan("SELECT hex(DISTINCT a) FROM TaBlE"),
-    "DISTINCT or FILTER specified, but hex is not an aggregate function" :: 
Nil)
+    "Function hex does not support DISTINCT" :: Nil)
 
   errorTest(
     "non aggregate function with filter predicate",
     CatalystSqlParser.parsePlan("SELECT hex(a) FILTER (WHERE c = 1) FROM 
TaBlE2"),
-    "DISTINCT or FILTER specified, but hex is not an aggregate function" :: 
Nil)
+    "Function hex does not support FILTER clause" :: Nil)
 
   errorTest(
     "distinct window function",
     CatalystSqlParser.parsePlan("SELECT percent_rank(DISTINCT a) OVER () FROM 
TaBlE"),
-    "DISTINCT or FILTER specified, but percent_rank is not an aggregate 
function" :: Nil)
+    "Function percent_rank does not support DISTINCT" :: Nil)
 
   errorTest(
     "window function with filter predicate",
     CatalystSqlParser.parsePlan("SELECT percent_rank(a) FILTER (WHERE c > 1) 
OVER () FROM TaBlE2"),
-    "DISTINCT or FILTER specified, but percent_rank is not an aggregate 
function" :: Nil)
+    "Function percent_rank does not support FILTER clause" :: Nil)
 
   errorTest(
     "higher order function with filter predicate",


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-33951][SQL] Distinguish the error between filter and distinct

Reply via email to