This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 074fcf280700 [SPARK-47072][SQL] Fix supported interval formats in 
error messages
074fcf280700 is described below

commit 074fcf2807000d342831379de0fafc1e49a6bf19
Author: Max Gekk <max.g...@gmail.com>
AuthorDate: Fri Feb 16 14:20:37 2024 +0300

    [SPARK-47072][SQL] Fix supported interval formats in error messages
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to add one more field to keys of `supportedFormat` in 
`IntervalUtils` because current implementation has duplicate keys that 
overwrites each other. For instance, the following keys are the same:
    ```
    (YM.YEAR, YM.MONTH)
    ...
    (DT.DAY, DT.HOUR)
    ```
    because `YM.YEAR = DT.DAY = 0` and `YM.MONTH = DT.HOUR = 1`.
    
    ### Why are the changes needed?
    To fix the incorrect error message when Spark cannot parse ANSI interval 
string. For example, the expected format should be some year-month format but 
Spark outputs day-time one:
    ```sql
    spark-sql (default)> select interval '-\t2-2\t' year to month;
    
    Interval string does not match year-month format of `[+|-]d h`, `INTERVAL 
[+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -   2-2     . 
(line 1, pos 16)
    
    == SQL ==
    select interval '-\t2-2\t' year to month
    ----------------^^^
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    Yes.
    
    ### How was this patch tested?
    By running the existing test suite:
    ```
    $ build/sbt "test:testOnly *IntervalUtilsSuite"
    ```
    and regenerating the golden files:
    ```
    $ SPARK_GENERATE_GOLDEN_FILES=1 PYSPARK_PYTHON=python3 build/sbt 
"sql/testOnly org.apache.spark.sql.SQLQueryTestSuite"
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #45127 from MaxGekk/fix-supportedFormat.
    
    Authored-by: Max Gekk <max.g...@gmail.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 .../spark/sql/catalyst/util/IntervalUtils.scala    | 32 ++++++++++++----------
 .../sql/catalyst/expressions/CastSuiteBase.scala   |  8 +++---
 .../analyzer-results/ansi/interval.sql.out         |  2 +-
 .../sql-tests/analyzer-results/interval.sql.out    |  2 +-
 .../sql-tests/results/ansi/interval.sql.out        |  2 +-
 .../resources/sql-tests/results/interval.sql.out   |  2 +-
 6 files changed, 26 insertions(+), 22 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
index 9b4b0302ee77..3a7c7b0904dd 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
@@ -110,7 +110,7 @@ object IntervalUtils extends SparkIntervalUtils {
       errorClass = "_LEGACY_ERROR_TEMP_3214",
       messageParameters = Map(
         "intervalStr" -> intervalStr,
-        "supportedFormat" -> supportedFormat((startFiled, endField))
+        "supportedFormat" -> supportedFormat((intervalStr, startFiled, 
endField))
           .map(format => s"`$format`").mkString(", "),
         "typeName" -> typeName,
         "input" -> input.toString,
@@ -118,19 +118,23 @@ object IntervalUtils extends SparkIntervalUtils {
   }
 
   val supportedFormat = Map(
-    (YM.YEAR, YM.MONTH) -> Seq("[+|-]y-m", "INTERVAL [+|-]'[+|-]y-m' YEAR TO 
MONTH"),
-    (YM.YEAR, YM.YEAR) -> Seq("[+|-]y", "INTERVAL [+|-]'[+|-]y' YEAR"),
-    (YM.MONTH, YM.MONTH) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MONTH"),
-    (DT.DAY, DT.DAY) -> Seq("[+|-]d", "INTERVAL [+|-]'[+|-]d' DAY"),
-    (DT.DAY, DT.HOUR) -> Seq("[+|-]d h", "INTERVAL [+|-]'[+|-]d h' DAY TO 
HOUR"),
-    (DT.DAY, DT.MINUTE) -> Seq("[+|-]d h:m", "INTERVAL [+|-]'[+|-]d h:m' DAY 
TO MINUTE"),
-    (DT.DAY, DT.SECOND) -> Seq("[+|-]d h:m:s.n", "INTERVAL [+|-]'[+|-]d 
h:m:s.n' DAY TO SECOND"),
-    (DT.HOUR, DT.HOUR) -> Seq("[+|-]h", "INTERVAL [+|-]'[+|-]h' HOUR"),
-    (DT.HOUR, DT.MINUTE) -> Seq("[+|-]h:m", "INTERVAL [+|-]'[+|-]h:m' HOUR TO 
MINUTE"),
-    (DT.HOUR, DT.SECOND) -> Seq("[+|-]h:m:s.n", "INTERVAL [+|-]'[+|-]h:m:s.n' 
HOUR TO SECOND"),
-    (DT.MINUTE, DT.MINUTE) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MINUTE"),
-    (DT.MINUTE, DT.SECOND) -> Seq("[+|-]m:s.n", "INTERVAL [+|-]'[+|-]m:s.n' 
MINUTE TO SECOND"),
-    (DT.SECOND, DT.SECOND) -> Seq("[+|-]s.n", "INTERVAL [+|-]'[+|-]s.n' 
SECOND")
+    ("year-month", YM.YEAR, YM.MONTH) -> Seq("[+|-]y-m", "INTERVAL 
[+|-]'[+|-]y-m' YEAR TO MONTH"),
+    ("year-month", YM.YEAR, YM.YEAR) -> Seq("[+|-]y", "INTERVAL [+|-]'[+|-]y' 
YEAR"),
+    ("year-month", YM.MONTH, YM.MONTH) -> Seq("[+|-]m", "INTERVAL 
[+|-]'[+|-]m' MONTH"),
+    ("day-time", DT.DAY, DT.DAY) -> Seq("[+|-]d", "INTERVAL [+|-]'[+|-]d' 
DAY"),
+    ("day-time", DT.DAY, DT.HOUR) -> Seq("[+|-]d h", "INTERVAL [+|-]'[+|-]d h' 
DAY TO HOUR"),
+    ("day-time", DT.DAY, DT.MINUTE) ->
+      Seq("[+|-]d h:m", "INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE"),
+    ("day-time", DT.DAY, DT.SECOND) ->
+      Seq("[+|-]d h:m:s.n", "INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND"),
+    ("day-time", DT.HOUR, DT.HOUR) -> Seq("[+|-]h", "INTERVAL [+|-]'[+|-]h' 
HOUR"),
+    ("day-time", DT.HOUR, DT.MINUTE) -> Seq("[+|-]h:m", "INTERVAL 
[+|-]'[+|-]h:m' HOUR TO MINUTE"),
+    ("day-time", DT.HOUR, DT.SECOND) ->
+      Seq("[+|-]h:m:s.n", "INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND"),
+    ("day-time", DT.MINUTE, DT.MINUTE) -> Seq("[+|-]m", "INTERVAL 
[+|-]'[+|-]m' MINUTE"),
+    ("day-time", DT.MINUTE, DT.SECOND) ->
+      Seq("[+|-]m:s.n", "INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND"),
+    ("day-time", DT.SECOND, DT.SECOND) -> Seq("[+|-]s.n", "INTERVAL 
[+|-]'[+|-]s.n' SECOND")
   )
 
   def castStringToYMInterval(
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
index 2ccb8e50e034..67a68fc92a30 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
@@ -1181,7 +1181,7 @@ abstract class CastSuiteBase extends SparkFunSuite with 
ExpressionEvalHelper {
           "fallBackNotice" -> "",
           "typeName" -> "interval year to month",
           "intervalStr" -> "year-month",
-          "supportedFormat" -> "`[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO 
HOUR`",
+          "supportedFormat" -> "`[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO 
MONTH`",
           "input" -> interval)
       )
     }
@@ -1204,7 +1204,7 @@ abstract class CastSuiteBase extends SparkFunSuite with 
ExpressionEvalHelper {
             "typeName" -> dataType.typeName,
             "intervalStr" -> "year-month",
             "supportedFormat" ->
-              IntervalUtils.supportedFormat((dataType.startField, 
dataType.endField))
+              IntervalUtils.supportedFormat(("year-month", 
dataType.startField, dataType.endField))
                 .map(format => s"`$format`").mkString(", "),
             "input" -> interval))
       }
@@ -1329,7 +1329,7 @@ abstract class CastSuiteBase extends SparkFunSuite with 
ExpressionEvalHelper {
             "typeName" -> dataType.typeName,
             "input" -> interval,
             "supportedFormat" ->
-              IntervalUtils.supportedFormat((dataType.startField, 
dataType.endField))
+              IntervalUtils.supportedFormat(("day-time", dataType.startField, 
dataType.endField))
                 .map(format => s"`$format`").mkString(", "))
         )
       }
@@ -1355,7 +1355,7 @@ abstract class CastSuiteBase extends SparkFunSuite with 
ExpressionEvalHelper {
             "typeName" -> dataType.typeName,
             "input" -> interval,
             "supportedFormat" ->
-              IntervalUtils.supportedFormat((dataType.startField, 
dataType.endField))
+              IntervalUtils.supportedFormat(("day-time", dataType.startField, 
dataType.endField))
                 .map(format => s"`$format`").mkString(", ")))
       }
   }
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out
index 1120c40ac15c..2e2a07beb717 100644
--- 
a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out
@@ -1916,7 +1916,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "_LEGACY_ERROR_TEMP_0063",
   "messageParameters" : {
-    "msg" : "Interval string does not match year-month format of `[+|-]d h`, 
`INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: 
-\t2-2\t"
+    "msg" : "Interval string does not match year-month format of `[+|-]y-m`, 
`INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: 
-\t2-2\t"
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git 
a/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out 
b/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out
index 0033e7273cde..54d6a5fd85e2 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out
@@ -1916,7 +1916,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "_LEGACY_ERROR_TEMP_0063",
   "messageParameters" : {
-    "msg" : "Interval string does not match year-month format of `[+|-]d h`, 
`INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: 
-\t2-2\t"
+    "msg" : "Interval string does not match year-month format of `[+|-]y-m`, 
`INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: 
-\t2-2\t"
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git 
a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out 
b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index 9eb4a4766df8..b0d128e967a6 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -2355,7 +2355,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "_LEGACY_ERROR_TEMP_0063",
   "messageParameters" : {
-    "msg" : "Interval string does not match year-month format of `[+|-]d h`, 
`INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: 
-\t2-2\t"
+    "msg" : "Interval string does not match year-month format of `[+|-]y-m`, 
`INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: 
-\t2-2\t"
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out 
b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index 5bb15d496600..296681b844d6 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -2168,7 +2168,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "_LEGACY_ERROR_TEMP_0063",
   "messageParameters" : {
-    "msg" : "Interval string does not match year-month format of `[+|-]d h`, 
`INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: 
-\t2-2\t"
+    "msg" : "Interval string does not match year-month format of `[+|-]y-m`, 
`INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: 
-\t2-2\t"
   },
   "queryContext" : [ {
     "objectType" : "",


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to