Repository: spark
Updated Branches:
  refs/heads/master c8ef9232c -> c5a0d1132


[SPARK-24575][SQL] Prohibit window expressions inside WHERE and HAVING clauses

## What changes were proposed in this pull request?

As discussed 
[before](https://github.com/apache/spark/pull/19193#issuecomment-393726964), 
this PR prohibits window expressions inside WHERE and HAVING clauses.

## How was this patch tested?

This PR comes with a dedicated unit test.

Author: aokolnychyi <anton.okolnyc...@sap.com>

Closes #21580 from aokolnychyi/spark-24575.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c5a0d113
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c5a0d113
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c5a0d113

Branch: refs/heads/master
Commit: c5a0d1132a5608f2110781763f4c2229c6cd7175
Parents: c8ef923
Author: aokolnychyi <anton.okolnyc...@sap.com>
Authored: Wed Jun 20 18:57:13 2018 +0200
Committer: Herman van Hovell <hvanhov...@databricks.com>
Committed: Wed Jun 20 18:57:13 2018 +0200

----------------------------------------------------------------------
 .../spark/sql/catalyst/analysis/Analyzer.scala  |  3 ++
 .../sql/DataFrameWindowFunctionsSuite.scala     | 42 ++++++++++++++++++--
 2 files changed, 41 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/c5a0d113/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 6e3107f..e187133 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1923,6 +1923,9 @@ class Analyzer(
     // "Aggregate with Having clause" will be triggered.
     def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
 
+      case Filter(condition, _) if hasWindowFunction(condition) =>
+        failAnalysis("It is not allowed to use window functions inside WHERE 
and HAVING clauses")
+
       // Aggregate with Having clause. This rule works with an unresolved 
Aggregate because
       // a resolved Aggregate will not have Window Functions.
       case f @ Filter(condition, a @ Aggregate(groupingExprs, aggregateExprs, 
child))

http://git-wip-us.apache.org/repos/asf/spark/blob/c5a0d113/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index 3ea398a..97a8439 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -17,9 +17,7 @@
 
 package org.apache.spark.sql
 
-import java.sql.{Date, Timestamp}
-
-import scala.collection.mutable
+import org.scalatest.Matchers.the
 
 import org.apache.spark.TestUtils.{assertNotSpilled, assertSpilled}
 import org.apache.spark.sql.expressions.{MutableAggregationBuffer, 
UserDefinedAggregateFunction, Window}
@@ -27,7 +25,6 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSQLContext
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.CalendarInterval
 
 /**
  * Window function testing for DataFrame API.
@@ -624,4 +621,41 @@ class DataFrameWindowFunctionsSuite extends QueryTest with 
SharedSQLContext {
       }
     }
   }
+
+  test("SPARK-24575: Window functions inside WHERE and HAVING clauses") {
+    def checkAnalysisError(df: => DataFrame): Unit = {
+      val thrownException = the [AnalysisException] thrownBy {
+        df.queryExecution.analyzed
+      }
+      assert(thrownException.message.contains("window functions inside WHERE 
and HAVING clauses"))
+    }
+
+    
checkAnalysisError(testData2.select('a).where(rank().over(Window.orderBy('b)) 
=== 1))
+    checkAnalysisError(testData2.where('b === 2 && 
rank().over(Window.orderBy('b)) === 1))
+    checkAnalysisError(
+      testData2.groupBy('a)
+        .agg(avg('b).as("avgb"))
+        .where('a > 'avgb && rank().over(Window.orderBy('a)) === 1))
+    checkAnalysisError(
+      testData2.groupBy('a)
+        .agg(max('b).as("maxb"), sum('b).as("sumb"))
+        .where(rank().over(Window.orderBy('a)) === 1))
+    checkAnalysisError(
+      testData2.groupBy('a)
+        .agg(max('b).as("maxb"), sum('b).as("sumb"))
+        .where('sumb === 5 && rank().over(Window.orderBy('a)) === 1))
+
+    checkAnalysisError(sql("SELECT a FROM testData2 WHERE RANK() OVER(ORDER BY 
b) = 1"))
+    checkAnalysisError(sql("SELECT * FROM testData2 WHERE b = 2 AND RANK() 
OVER(ORDER BY b) = 1"))
+    checkAnalysisError(
+      sql("SELECT * FROM testData2 GROUP BY a HAVING a > AVG(b) AND RANK() 
OVER(ORDER BY a) = 1"))
+    checkAnalysisError(
+      sql("SELECT a, MAX(b), SUM(b) FROM testData2 GROUP BY a HAVING RANK() 
OVER(ORDER BY a) = 1"))
+    checkAnalysisError(
+      sql(
+        s"""SELECT a, MAX(b)
+           |FROM testData2
+           |GROUP BY a
+           |HAVING SUM(b) = 5 AND RANK() OVER(ORDER BY a) = 1""".stripMargin))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to