This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push: new 2fc57bba [SPARK-36015][SQL] Support TimestampNTZType in the Window spec definition 2fc57bba is described below commit 2fc57bba31a9bea3b97582751a693acd268158e9 Author: gengjiaan <gengji...@360.cn> AuthorDate: Wed Jul 7 20:27:05 2021 +0300 [SPARK-36015][SQL] Support TimestampNTZType in the Window spec definition ### What changes were proposed in this pull request? The method `WindowSpecDefinition.isValidFrameType` doesn't consider `TimestampNTZType`. We should support it as for `TimestampType`. ### Why are the changes needed? Support `TimestampNTZType` in the Window spec definition. ### Does this PR introduce _any_ user-facing change? 'Yes'. This PR allows users use `TimestampNTZType` as the sort spec in window spec definition. ### How was this patch tested? New tests. Closes #33246 from beliefer/SPARK-36015. Authored-by: gengjiaan <gengji...@360.cn> Signed-off-by: Max Gekk <max.g...@gmail.com> (cherry picked from commit 62ff2add9444fbd54802548b3daf7cde5820feef) Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../catalyst/expressions/windowExpressions.scala | 6 +-- .../sql/execution/window/WindowExecBase.scala | 10 ++-- .../src/test/resources/sql-tests/inputs/window.sql | 9 ++++ .../resources/sql-tests/results/window.sql.out | 56 +++++++++++++++++++++- 4 files changed, 73 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index 2555c6a..fc2e449 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -102,9 +102,9 @@ case class WindowSpecDefinition( private def isValidFrameType(ft: DataType): Boolean = (orderSpec.head.dataType, ft) match { case (DateType, IntegerType) => true case (DateType, _: YearMonthIntervalType) => true - case (TimestampType, CalendarIntervalType) => true - case (TimestampType, _: YearMonthIntervalType) => true - case (TimestampType, _: DayTimeIntervalType) => true + case (TimestampType | TimestampNTZType, CalendarIntervalType) => true + case (TimestampType | TimestampNTZType, _: YearMonthIntervalType) => true + case (TimestampType | TimestampNTZType, _: DayTimeIntervalType) => true case (a, b) => a == b } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala index 2aa0b02..f3b3b34 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.execution.UnaryExecNode -import org.apache.spark.sql.types.{CalendarIntervalType, DateType, DayTimeIntervalType, IntegerType, TimestampType, YearMonthIntervalType} +import org.apache.spark.sql.types._ trait WindowExecBase extends UnaryExecNode { def windowExpression: Seq[NamedExpression] @@ -96,10 +96,12 @@ trait WindowExecBase extends UnaryExecNode { val boundExpr = (expr.dataType, boundOffset.dataType) match { case (DateType, IntegerType) => DateAdd(expr, boundOffset) case (DateType, _: YearMonthIntervalType) => DateAddYMInterval(expr, boundOffset) - case (TimestampType, CalendarIntervalType) => TimeAdd(expr, boundOffset, Some(timeZone)) - case (TimestampType, _: YearMonthIntervalType) => + case (TimestampType | TimestampNTZType, CalendarIntervalType) => + TimeAdd(expr, boundOffset, Some(timeZone)) + case (TimestampType | TimestampNTZType, _: YearMonthIntervalType) => TimestampAddYMInterval(expr, boundOffset, Some(timeZone)) - case (TimestampType, _: DayTimeIntervalType) => TimeAdd(expr, boundOffset, Some(timeZone)) + case (TimestampType | TimestampNTZType, _: DayTimeIntervalType) => + TimeAdd(expr, boundOffset, Some(timeZone)) case (a, b) if a == b => Add(expr, boundOffset) } val bound = MutableProjection.create(boundExpr :: Nil, child.output) diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql index 46d3629..9766aaf 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/window.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql @@ -70,12 +70,21 @@ RANGE BETWEEN CURRENT ROW AND 2 FOLLOWING) FROM testData ORDER BY cate, val_date SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_timestamp RANGE BETWEEN CURRENT ROW AND interval 23 days 4 hours FOLLOWING) FROM testData ORDER BY cate, val_timestamp; +SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) +RANGE BETWEEN CURRENT ROW AND interval 23 days 4 hours FOLLOWING) FROM testData +ORDER BY cate, to_timestamp_ntz(val_timestamp); SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_timestamp RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM testData ORDER BY cate, val_timestamp; +SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) +RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM testData +ORDER BY cate, to_timestamp_ntz(val_timestamp); SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_timestamp RANGE BETWEEN CURRENT ROW AND interval '1 2:3:4.001' day to second FOLLOWING) FROM testData ORDER BY cate, val_timestamp; +SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) +RANGE BETWEEN CURRENT ROW AND interval '1 2:3:4.001' day to second FOLLOWING) FROM testData +ORDER BY cate, to_timestamp_ntz(val_timestamp); SELECT val_date, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_date RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM testData ORDER BY cate, val_date; diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out index b3f9e6c..455015b 100644 --- a/sql/core/src/test/resources/sql-tests/results/window.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 50 +-- Number of queries: 53 -- !query @@ -212,6 +212,24 @@ NULL NULL NULL -- !query +SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) +RANGE BETWEEN CURRENT ROW AND interval 23 days 4 hours FOLLOWING) FROM testData +ORDER BY cate, to_timestamp_ntz(val_timestamp) +-- !query schema +struct<val_timestamp:timestamp,cate:string,avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '23 days 4 hours' FOLLOWING):double> +-- !query output +NULL NULL NULL +2017-07-31 17:00:00 NULL 1.5015456E9 +2017-07-31 17:00:00 a 1.5016970666666667E9 +2017-07-31 17:00:00 a 1.5016970666666667E9 +2017-08-05 23:13:20 a 1.502E9 +2020-12-30 16:00:00 a 1.6093728E9 +2017-07-31 17:00:00 b 1.5022728E9 +2017-08-17 13:00:00 b 1.503E9 +2020-12-30 16:00:00 b 1.6093728E9 + + +-- !query SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_timestamp RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM testData ORDER BY cate, val_timestamp @@ -230,6 +248,24 @@ NULL NULL NULL -- !query +SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) +RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM testData +ORDER BY cate, to_timestamp_ntz(val_timestamp) +-- !query schema +struct<val_timestamp:timestamp,cate:string,avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '1-1' YEAR TO MONTH FOLLOWING):double> +-- !query output +NULL NULL NULL +2017-07-31 17:00:00 NULL 1.5015456E9 +2017-07-31 17:00:00 a 1.5016970666666667E9 +2017-07-31 17:00:00 a 1.5016970666666667E9 +2017-08-05 23:13:20 a 1.502E9 +2020-12-30 16:00:00 a 1.6093728E9 +2017-07-31 17:00:00 b 1.5022728E9 +2017-08-17 13:00:00 b 1.503E9 +2020-12-30 16:00:00 b 1.6093728E9 + + +-- !query SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_timestamp RANGE BETWEEN CURRENT ROW AND interval '1 2:3:4.001' day to second FOLLOWING) FROM testData ORDER BY cate, val_timestamp @@ -248,6 +284,24 @@ NULL NULL NULL -- !query +SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) +RANGE BETWEEN CURRENT ROW AND interval '1 2:3:4.001' day to second FOLLOWING) FROM testData +ORDER BY cate, to_timestamp_ntz(val_timestamp) +-- !query schema +struct<val_timestamp:timestamp,cate:string,avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '1 02:03:04.001' DAY TO SECOND FOLLOWING):double> +-- !query output +NULL NULL NULL +2017-07-31 17:00:00 NULL 1.5015456E9 +2017-07-31 17:00:00 a 1.5015456E9 +2017-07-31 17:00:00 a 1.5015456E9 +2017-08-05 23:13:20 a 1.502E9 +2020-12-30 16:00:00 a 1.6093728E9 +2017-07-31 17:00:00 b 1.5015456E9 +2017-08-17 13:00:00 b 1.503E9 +2020-12-30 16:00:00 b 1.6093728E9 + + +-- !query SELECT val_date, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY val_date RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM testData ORDER BY cate, val_date --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org