This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new 9a7c59c  [SPARK-36222][SQL] Step by days in the Sequence expression 
for dates
9a7c59c is described below

commit 9a7c59c99ce69411485acf382dfc9be053927b59
Author: gengjiaan <gengji...@360.cn>
AuthorDate: Tue Jul 20 19:16:56 2021 +0300

    [SPARK-36222][SQL] Step by days in the Sequence expression for dates
    
    ### What changes were proposed in this pull request?
    The current implement of `Sequence` expression not support step by days for 
dates.
    ```
    spark-sql> select sequence(date'2021-07-01', date'2021-07-10', interval '3' 
day);
    Error in query: cannot resolve 'sequence(DATE '2021-07-01', DATE 
'2021-07-10', INTERVAL '3' DAY)' due to data type mismatch:
    sequence uses the wrong parameter type. The parameter type must conform to:
    1. The start and stop expressions must resolve to the same type.
    2. If start and stop expressions resolve to the 'date' or 'timestamp' type
    then the step expression must resolve to the 'interval' or
    'interval year to month' or 'interval day to second' type,
    otherwise to the same type as the start and stop expressions.
             ; line 1 pos 7;
    'Project [unresolvedalias(sequence(2021-07-01, 2021-07-10, Some(INTERVAL 
'3' DAY), Some(Europe/Moscow)), None)]
    +- OneRowRelation
    ```
    
    ### Why are the changes needed?
    `DayTimeInterval` has day granularity should as step for dates.
    
    ### Does this PR introduce _any_ user-facing change?
    'Yes'.
    Sequence expression will supports step by `DayTimeInterval` has day 
granularity for dates.
    
    ### How was this patch tested?
    New tests.
    
    Closes #33439 from beliefer/SPARK-36222.
    
    Authored-by: gengjiaan <gengji...@360.cn>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
    (cherry picked from commit c0d84e6cf1046b7944796038414ef21fe9c7e3b5)
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 .../expressions/collectionOperations.scala         | 17 ++++--
 .../expressions/CollectionExpressionsSuite.scala   | 61 ++++++++++++++++++++--
 2 files changed, 68 insertions(+), 10 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 730b8d0..2c3312a 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -2574,7 +2574,8 @@ case class Sequence(
               DayTimeIntervalType.acceptsType(stepType)
           case DateType =>
             stepOpt.isEmpty || CalendarIntervalType.acceptsType(stepType) ||
-              YearMonthIntervalType.acceptsType(stepType)
+              YearMonthIntervalType.acceptsType(stepType) ||
+              DayTimeIntervalType.acceptsType(stepType)
           case _: IntegralType =>
             stepOpt.isEmpty || stepType.sameType(startType)
           case _ => false
@@ -2626,8 +2627,10 @@ case class Sequence(
     case DateType =>
       if (stepOpt.isEmpty || 
CalendarIntervalType.acceptsType(stepOpt.get.dataType)) {
         new TemporalSequenceImpl[Int](IntegerType, start.dataType, 
MICROS_PER_DAY, _.toInt, zoneId)
-      } else {
+      } else if (YearMonthIntervalType.acceptsType(stepOpt.get.dataType)) {
         new PeriodSequenceImpl[Int](IntegerType, start.dataType, 
MICROS_PER_DAY, _.toInt, zoneId)
+      } else {
+        new DurationSequenceImpl[Int](IntegerType, start.dataType, 
MICROS_PER_DAY, _.toInt, zoneId)
       }
   }
 
@@ -2807,15 +2810,19 @@ object Sequence {
     val intervalType: DataType = DayTimeIntervalType()
 
     def splitStep(input: Any): (Int, Int, Long) = {
-      (0, 0, input.asInstanceOf[Long])
+      val duration = input.asInstanceOf[Long]
+      val days = IntervalUtils.getDays(duration)
+      val micros = duration - days * MICROS_PER_DAY
+      (0, days, micros)
     }
 
     def stepSplitCode(
         stepMonths: String, stepDays: String, stepMicros: String, step: 
String): String = {
       s"""
          |final int $stepMonths = 0;
-         |final int $stepDays = 0;
-         |final long $stepMicros = $step;
+         |final int $stepDays =
+         |  (int) 
org.apache.spark.sql.catalyst.util.IntervalUtils.getDays($step);
+         |final long $stepMicros = $step - $stepDays * ${MICROS_PER_DAY}L;
        """.stripMargin
     }
   }
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index bfecbf5..caa5e96 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -919,6 +919,16 @@ class CollectionExpressionsSuite extends SparkFunSuite 
with ExpressionEvalHelper
           Date.valueOf("2020-11-01"),
           Date.valueOf("2022-04-01")))
 
+      checkEvaluation(new Sequence(
+        Literal(Date.valueOf("2021-07-01")),
+        Literal(Date.valueOf("2021-07-10")),
+        Literal(fromDayTimeString("3 0:0:0"))),
+        Seq(
+          Date.valueOf("2021-07-01"),
+          Date.valueOf("2021-07-04"),
+          Date.valueOf("2021-07-07"),
+          Date.valueOf("2021-07-10")))
+
       checkExceptionInExpression[IllegalArgumentException](
         new Sequence(
           Literal(Date.valueOf("1970-01-02")),
@@ -1092,6 +1102,52 @@ class CollectionExpressionsSuite extends SparkFunSuite 
with ExpressionEvalHelper
           Date.valueOf("2020-11-01"),
           Date.valueOf("2022-04-01")))
 
+      checkEvaluation(new Sequence(
+        Literal(Date.valueOf("2021-07-01")),
+        Literal(Date.valueOf("2021-07-10")),
+        Literal(Duration.ofDays(3))),
+        Seq(
+          Date.valueOf("2021-07-01"),
+          Date.valueOf("2021-07-04"),
+          Date.valueOf("2021-07-07"),
+          Date.valueOf("2021-07-10")))
+
+      checkExceptionInExpression[IllegalArgumentException](
+        new Sequence(
+          Literal(Date.valueOf("2021-07-01")),
+          Literal(Date.valueOf("2021-07-10")),
+          Literal(Duration.ofHours(3))),
+        EmptyRow,
+        "sequence step must be an interval day to second of day granularity" +
+          " if start and end values are dates")
+
+      checkExceptionInExpression[IllegalArgumentException](
+        new Sequence(
+          Literal(Date.valueOf("2021-07-01")),
+          Literal(Date.valueOf("2021-07-10")),
+          Literal(Duration.ofMinutes(3))),
+        EmptyRow,
+        "sequence step must be an interval day to second of day granularity" +
+          " if start and end values are dates")
+
+      checkExceptionInExpression[IllegalArgumentException](
+        new Sequence(
+          Literal(Date.valueOf("2021-07-01")),
+          Literal(Date.valueOf("2021-07-10")),
+          Literal(Duration.ofSeconds(3))),
+        EmptyRow,
+        "sequence step must be an interval day to second of day granularity" +
+          " if start and end values are dates")
+
+      checkExceptionInExpression[IllegalArgumentException](
+        new Sequence(
+          Literal(Date.valueOf("2021-07-01")),
+          Literal(Date.valueOf("2021-07-10")),
+          Literal(Duration.ofMillis(3))),
+        EmptyRow,
+        "sequence step must be an interval day to second of day granularity" +
+          " if start and end values are dates")
+
       checkExceptionInExpression[IllegalArgumentException](
         new Sequence(
           Literal(Date.valueOf("2018-01-01")),
@@ -1108,11 +1164,6 @@ class CollectionExpressionsSuite extends SparkFunSuite 
with ExpressionEvalHelper
           Literal(Period.ofMonths(-1))),
         EmptyRow,
         s"sequence boundaries: 0 to 2678400000000 by -1")
-
-      assert(Sequence(
-        Cast(Literal("2011-03-01"), DateType),
-        Cast(Literal("2011-04-01"), DateType),
-        Option(Literal(Duration.ofHours(1)))).checkInputDataTypes().isFailure)
     }
   }
 

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to