This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.5 by this push:
     new 93a09ea279e6 [SPARK-47072][SQL][3.5] Fix supported interval formats in 
error messages
93a09ea279e6 is described below

commit 93a09ea279e6bd2515ced66d8f38053e4a5514ce
Author: Max Gekk <max.g...@gmail.com>
AuthorDate: Mon Feb 19 10:29:08 2024 +0300

    [SPARK-47072][SQL][3.5] Fix supported interval formats in error messages
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to add one more field to keys of `supportedFormat` in 
`IntervalUtils` because current implementation has duplicate keys that 
overwrites each other. For instance, the following keys are the same:
    ```
    (YM.YEAR, YM.MONTH)
    ...
    (DT.DAY, DT.HOUR)
    ```
    because `YM.YEAR = DT.DAY = 0` and `YM.MONTH = DT.HOUR = 1`.
    
    This is a backport of https://github.com/apache/spark/pull/45127.
    
    ### Why are the changes needed?
    To fix the incorrect error message when Spark cannot parse ANSI interval 
string. For example, the expected format should be some year-month format but 
Spark outputs day-time one:
    ```sql
    spark-sql (default)> select interval '-\t2-2\t' year to month;
    
    Interval string does not match year-month format of `[+|-]d h`, `INTERVAL 
[+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -   2-2     . 
(line 1, pos 16)
    
    == SQL ==
    select interval '-\t2-2\t' year to month
    ----------------^^^
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    Yes.
    
    ### How was this patch tested?
    By running the existing test suite:
    ```
    $ build/sbt "test:testOnly *IntervalUtilsSuite"
    ```
    and regenerating the golden files:
    ```
    $ SPARK_GENERATE_GOLDEN_FILES=1 PYSPARK_PYTHON=python3 build/sbt 
"sql/testOnly org.apache.spark.sql.SQLQueryTestSuite"
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Authored-by: Max Gekk <max.gekkgmail.com>
    (cherry picked from commit 074fcf2807000d342831379de0fafc1e49a6bf19)
    
    Closes #45139 from MaxGekk/fix-supportedFormat-3.5.
    
    Authored-by: Max Gekk <max.g...@gmail.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 .../spark/sql/catalyst/util/IntervalUtils.scala    | 33 +++++++++++++---------
 .../sql/catalyst/expressions/CastSuiteBase.scala   |  8 +++---
 .../analyzer-results/ansi/interval.sql.out         |  2 +-
 .../sql-tests/analyzer-results/interval.sql.out    |  2 +-
 .../sql-tests/results/ansi/interval.sql.out        |  2 +-
 .../resources/sql-tests/results/interval.sql.out   |  2 +-
 6 files changed, 27 insertions(+), 22 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
index e051cfc37f12..4d90007400ea 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
@@ -107,25 +107,30 @@ object IntervalUtils extends SparkIntervalUtils {
       fallBackNotice: Option[String] = None) = {
     throw new IllegalArgumentException(
       s"Interval string does not match $intervalStr format of " +
-        s"${supportedFormat((startFiled, endField)).map(format => 
s"`$format`").mkString(", ")} " +
+        s"${supportedFormat((intervalStr, startFiled, endField))
+          .map(format => s"`$format`").mkString(", ")} " +
         s"when cast to $typeName: ${input.toString}" +
         s"${fallBackNotice.map(s => s", $s").getOrElse("")}")
   }
 
   val supportedFormat = Map(
-    (YM.YEAR, YM.MONTH) -> Seq("[+|-]y-m", "INTERVAL [+|-]'[+|-]y-m' YEAR TO 
MONTH"),
-    (YM.YEAR, YM.YEAR) -> Seq("[+|-]y", "INTERVAL [+|-]'[+|-]y' YEAR"),
-    (YM.MONTH, YM.MONTH) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MONTH"),
-    (DT.DAY, DT.DAY) -> Seq("[+|-]d", "INTERVAL [+|-]'[+|-]d' DAY"),
-    (DT.DAY, DT.HOUR) -> Seq("[+|-]d h", "INTERVAL [+|-]'[+|-]d h' DAY TO 
HOUR"),
-    (DT.DAY, DT.MINUTE) -> Seq("[+|-]d h:m", "INTERVAL [+|-]'[+|-]d h:m' DAY 
TO MINUTE"),
-    (DT.DAY, DT.SECOND) -> Seq("[+|-]d h:m:s.n", "INTERVAL [+|-]'[+|-]d 
h:m:s.n' DAY TO SECOND"),
-    (DT.HOUR, DT.HOUR) -> Seq("[+|-]h", "INTERVAL [+|-]'[+|-]h' HOUR"),
-    (DT.HOUR, DT.MINUTE) -> Seq("[+|-]h:m", "INTERVAL [+|-]'[+|-]h:m' HOUR TO 
MINUTE"),
-    (DT.HOUR, DT.SECOND) -> Seq("[+|-]h:m:s.n", "INTERVAL [+|-]'[+|-]h:m:s.n' 
HOUR TO SECOND"),
-    (DT.MINUTE, DT.MINUTE) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MINUTE"),
-    (DT.MINUTE, DT.SECOND) -> Seq("[+|-]m:s.n", "INTERVAL [+|-]'[+|-]m:s.n' 
MINUTE TO SECOND"),
-    (DT.SECOND, DT.SECOND) -> Seq("[+|-]s.n", "INTERVAL [+|-]'[+|-]s.n' 
SECOND")
+    ("year-month", YM.YEAR, YM.MONTH) -> Seq("[+|-]y-m", "INTERVAL 
[+|-]'[+|-]y-m' YEAR TO MONTH"),
+    ("year-month", YM.YEAR, YM.YEAR) -> Seq("[+|-]y", "INTERVAL [+|-]'[+|-]y' 
YEAR"),
+    ("year-month", YM.MONTH, YM.MONTH) -> Seq("[+|-]m", "INTERVAL 
[+|-]'[+|-]m' MONTH"),
+    ("day-time", DT.DAY, DT.DAY) -> Seq("[+|-]d", "INTERVAL [+|-]'[+|-]d' 
DAY"),
+    ("day-time", DT.DAY, DT.HOUR) -> Seq("[+|-]d h", "INTERVAL [+|-]'[+|-]d h' 
DAY TO HOUR"),
+    ("day-time", DT.DAY, DT.MINUTE) ->
+      Seq("[+|-]d h:m", "INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE"),
+    ("day-time", DT.DAY, DT.SECOND) ->
+      Seq("[+|-]d h:m:s.n", "INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND"),
+    ("day-time", DT.HOUR, DT.HOUR) -> Seq("[+|-]h", "INTERVAL [+|-]'[+|-]h' 
HOUR"),
+    ("day-time", DT.HOUR, DT.MINUTE) -> Seq("[+|-]h:m", "INTERVAL 
[+|-]'[+|-]h:m' HOUR TO MINUTE"),
+    ("day-time", DT.HOUR, DT.SECOND) ->
+      Seq("[+|-]h:m:s.n", "INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND"),
+    ("day-time", DT.MINUTE, DT.MINUTE) -> Seq("[+|-]m", "INTERVAL 
[+|-]'[+|-]m' MINUTE"),
+    ("day-time", DT.MINUTE, DT.SECOND) ->
+      Seq("[+|-]m:s.n", "INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND"),
+    ("day-time", DT.SECOND, DT.SECOND) -> Seq("[+|-]s.n", "INTERVAL 
[+|-]'[+|-]s.n' SECOND")
   )
 
   def castStringToYMInterval(
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
index 1ce311a5544f..4352d5bc9c6b 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
@@ -1174,7 +1174,7 @@ abstract class CastSuiteBase extends SparkFunSuite with 
ExpressionEvalHelper {
     Seq("INTERVAL '1-1' YEAR", "INTERVAL '1-1' MONTH").foreach { interval =>
       val dataType = YearMonthIntervalType()
       val expectedMsg = s"Interval string does not match year-month format of 
" +
-        s"${IntervalUtils.supportedFormat((dataType.startField, 
dataType.endField))
+        s"${IntervalUtils.supportedFormat(("year-month", dataType.startField, 
dataType.endField))
           .map(format => s"`$format`").mkString(", ")} " +
         s"when cast to ${dataType.typeName}: $interval"
       checkExceptionInExpression[IllegalArgumentException](
@@ -1194,7 +1194,7 @@ abstract class CastSuiteBase extends SparkFunSuite with 
ExpressionEvalHelper {
       ("INTERVAL '1' MONTH", YearMonthIntervalType(YEAR, MONTH)))
       .foreach { case (interval, dataType) =>
         val expectedMsg = s"Interval string does not match year-month format 
of " +
-          s"${IntervalUtils.supportedFormat((dataType.startField, 
dataType.endField))
+          s"${IntervalUtils.supportedFormat(("year-month", 
dataType.startField, dataType.endField))
             .map(format => s"`$format`").mkString(", ")} " +
           s"when cast to ${dataType.typeName}: $interval"
         checkExceptionInExpression[IllegalArgumentException](
@@ -1314,7 +1314,7 @@ abstract class CastSuiteBase extends SparkFunSuite with 
ExpressionEvalHelper {
       ("1.23", DayTimeIntervalType(MINUTE)))
       .foreach { case (interval, dataType) =>
         val expectedMsg = s"Interval string does not match day-time format of 
" +
-          s"${IntervalUtils.supportedFormat((dataType.startField, 
dataType.endField))
+          s"${IntervalUtils.supportedFormat(("day-time", dataType.startField, 
dataType.endField))
             .map(format => s"`$format`").mkString(", ")} " +
           s"when cast to ${dataType.typeName}: $interval, " +
           s"set ${SQLConf.LEGACY_FROM_DAYTIME_STRING.key} to true " +
@@ -1338,7 +1338,7 @@ abstract class CastSuiteBase extends SparkFunSuite with 
ExpressionEvalHelper {
       ("INTERVAL '92233720368541.775807' SECOND", DayTimeIntervalType(SECOND)))
       .foreach { case (interval, dataType) =>
         val expectedMsg = "Interval string does not match day-time format of " 
+
-          s"${IntervalUtils.supportedFormat((dataType.startField, 
dataType.endField))
+          s"${IntervalUtils.supportedFormat(("day-time", dataType.startField, 
dataType.endField))
             .map(format => s"`$format`").mkString(", ")} " +
           s"when cast to ${dataType.typeName}: $interval, " +
           s"set ${SQLConf.LEGACY_FROM_DAYTIME_STRING.key} to true " +
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out
index 1120c40ac15c..2e2a07beb717 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out
@@ -1916,7 +1916,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "_LEGACY_ERROR_TEMP_0063",
   "messageParameters" : {
-    "msg" : "Interval string does not match year-month format of `[+|-]d h`, 
`INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: 
-\t2-2\t"
+    "msg" : "Interval string does not match year-month format of `[+|-]y-m`, 
`INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: 
-\t2-2\t"
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out
index 337edd5980c3..6242dc142eab 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out
@@ -1916,7 +1916,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "_LEGACY_ERROR_TEMP_0063",
   "messageParameters" : {
-    "msg" : "Interval string does not match year-month format of `[+|-]d h`, 
`INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: 
-\t2-2\t"
+    "msg" : "Interval string does not match year-month format of `[+|-]y-m`, 
`INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: 
-\t2-2\t"
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git 
a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out 
b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index 9eb4a4766df8..b0d128e967a6 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -2355,7 +2355,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "_LEGACY_ERROR_TEMP_0063",
   "messageParameters" : {
-    "msg" : "Interval string does not match year-month format of `[+|-]d h`, 
`INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: 
-\t2-2\t"
+    "msg" : "Interval string does not match year-month format of `[+|-]y-m`, 
`INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: 
-\t2-2\t"
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out 
b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index fe15ade94178..faba4abfdbe7 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -2168,7 +2168,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "_LEGACY_ERROR_TEMP_0063",
   "messageParameters" : {
-    "msg" : "Interval string does not match year-month format of `[+|-]d h`, 
`INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: 
-\t2-2\t"
+    "msg" : "Interval string does not match year-month format of `[+|-]y-m`, 
`INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: 
-\t2-2\t"
   },
   "queryContext" : [ {
     "objectType" : "",


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to