This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 074fcf280700 [SPARK-47072][SQL] Fix supported interval formats in error messages 074fcf280700 is described below commit 074fcf2807000d342831379de0fafc1e49a6bf19 Author: Max Gekk <max.g...@gmail.com> AuthorDate: Fri Feb 16 14:20:37 2024 +0300 [SPARK-47072][SQL] Fix supported interval formats in error messages ### What changes were proposed in this pull request? In the PR, I propose to add one more field to keys of `supportedFormat` in `IntervalUtils` because current implementation has duplicate keys that overwrites each other. For instance, the following keys are the same: ``` (YM.YEAR, YM.MONTH) ... (DT.DAY, DT.HOUR) ``` because `YM.YEAR = DT.DAY = 0` and `YM.MONTH = DT.HOUR = 1`. ### Why are the changes needed? To fix the incorrect error message when Spark cannot parse ANSI interval string. For example, the expected format should be some year-month format but Spark outputs day-time one: ```sql spark-sql (default)> select interval '-\t2-2\t' year to month; Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: - 2-2 . (line 1, pos 16) == SQL == select interval '-\t2-2\t' year to month ----------------^^^ ``` ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? By running the existing test suite: ``` $ build/sbt "test:testOnly *IntervalUtilsSuite" ``` and regenerating the golden files: ``` $ SPARK_GENERATE_GOLDEN_FILES=1 PYSPARK_PYTHON=python3 build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite" ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #45127 from MaxGekk/fix-supportedFormat. Authored-by: Max Gekk <max.g...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../spark/sql/catalyst/util/IntervalUtils.scala | 32 ++++++++++++---------- .../sql/catalyst/expressions/CastSuiteBase.scala | 8 +++--- .../analyzer-results/ansi/interval.sql.out | 2 +- .../sql-tests/analyzer-results/interval.sql.out | 2 +- .../sql-tests/results/ansi/interval.sql.out | 2 +- .../resources/sql-tests/results/interval.sql.out | 2 +- 6 files changed, 26 insertions(+), 22 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 9b4b0302ee77..3a7c7b0904dd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -110,7 +110,7 @@ object IntervalUtils extends SparkIntervalUtils { errorClass = "_LEGACY_ERROR_TEMP_3214", messageParameters = Map( "intervalStr" -> intervalStr, - "supportedFormat" -> supportedFormat((startFiled, endField)) + "supportedFormat" -> supportedFormat((intervalStr, startFiled, endField)) .map(format => s"`$format`").mkString(", "), "typeName" -> typeName, "input" -> input.toString, @@ -118,19 +118,23 @@ object IntervalUtils extends SparkIntervalUtils { } val supportedFormat = Map( - (YM.YEAR, YM.MONTH) -> Seq("[+|-]y-m", "INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH"), - (YM.YEAR, YM.YEAR) -> Seq("[+|-]y", "INTERVAL [+|-]'[+|-]y' YEAR"), - (YM.MONTH, YM.MONTH) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MONTH"), - (DT.DAY, DT.DAY) -> Seq("[+|-]d", "INTERVAL [+|-]'[+|-]d' DAY"), - (DT.DAY, DT.HOUR) -> Seq("[+|-]d h", "INTERVAL [+|-]'[+|-]d h' DAY TO HOUR"), - (DT.DAY, DT.MINUTE) -> Seq("[+|-]d h:m", "INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE"), - (DT.DAY, DT.SECOND) -> Seq("[+|-]d h:m:s.n", "INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND"), - (DT.HOUR, DT.HOUR) -> Seq("[+|-]h", "INTERVAL [+|-]'[+|-]h' HOUR"), - (DT.HOUR, DT.MINUTE) -> Seq("[+|-]h:m", "INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE"), - (DT.HOUR, DT.SECOND) -> Seq("[+|-]h:m:s.n", "INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND"), - (DT.MINUTE, DT.MINUTE) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MINUTE"), - (DT.MINUTE, DT.SECOND) -> Seq("[+|-]m:s.n", "INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND"), - (DT.SECOND, DT.SECOND) -> Seq("[+|-]s.n", "INTERVAL [+|-]'[+|-]s.n' SECOND") + ("year-month", YM.YEAR, YM.MONTH) -> Seq("[+|-]y-m", "INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH"), + ("year-month", YM.YEAR, YM.YEAR) -> Seq("[+|-]y", "INTERVAL [+|-]'[+|-]y' YEAR"), + ("year-month", YM.MONTH, YM.MONTH) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MONTH"), + ("day-time", DT.DAY, DT.DAY) -> Seq("[+|-]d", "INTERVAL [+|-]'[+|-]d' DAY"), + ("day-time", DT.DAY, DT.HOUR) -> Seq("[+|-]d h", "INTERVAL [+|-]'[+|-]d h' DAY TO HOUR"), + ("day-time", DT.DAY, DT.MINUTE) -> + Seq("[+|-]d h:m", "INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE"), + ("day-time", DT.DAY, DT.SECOND) -> + Seq("[+|-]d h:m:s.n", "INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND"), + ("day-time", DT.HOUR, DT.HOUR) -> Seq("[+|-]h", "INTERVAL [+|-]'[+|-]h' HOUR"), + ("day-time", DT.HOUR, DT.MINUTE) -> Seq("[+|-]h:m", "INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE"), + ("day-time", DT.HOUR, DT.SECOND) -> + Seq("[+|-]h:m:s.n", "INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND"), + ("day-time", DT.MINUTE, DT.MINUTE) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MINUTE"), + ("day-time", DT.MINUTE, DT.SECOND) -> + Seq("[+|-]m:s.n", "INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND"), + ("day-time", DT.SECOND, DT.SECOND) -> Seq("[+|-]s.n", "INTERVAL [+|-]'[+|-]s.n' SECOND") ) def castStringToYMInterval( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala index 2ccb8e50e034..67a68fc92a30 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala @@ -1181,7 +1181,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { "fallBackNotice" -> "", "typeName" -> "interval year to month", "intervalStr" -> "year-month", - "supportedFormat" -> "`[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR`", + "supportedFormat" -> "`[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH`", "input" -> interval) ) } @@ -1204,7 +1204,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { "typeName" -> dataType.typeName, "intervalStr" -> "year-month", "supportedFormat" -> - IntervalUtils.supportedFormat((dataType.startField, dataType.endField)) + IntervalUtils.supportedFormat(("year-month", dataType.startField, dataType.endField)) .map(format => s"`$format`").mkString(", "), "input" -> interval)) } @@ -1329,7 +1329,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { "typeName" -> dataType.typeName, "input" -> interval, "supportedFormat" -> - IntervalUtils.supportedFormat((dataType.startField, dataType.endField)) + IntervalUtils.supportedFormat(("day-time", dataType.startField, dataType.endField)) .map(format => s"`$format`").mkString(", ")) ) } @@ -1355,7 +1355,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { "typeName" -> dataType.typeName, "input" -> interval, "supportedFormat" -> - IntervalUtils.supportedFormat((dataType.startField, dataType.endField)) + IntervalUtils.supportedFormat(("day-time", dataType.startField, dataType.endField)) .map(format => s"`$format`").mkString(", "))) } } diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out index 1120c40ac15c..2e2a07beb717 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out @@ -1916,7 +1916,7 @@ org.apache.spark.sql.catalyst.parser.ParseException { "errorClass" : "_LEGACY_ERROR_TEMP_0063", "messageParameters" : { - "msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t" + "msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t" }, "queryContext" : [ { "objectType" : "", diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out index 0033e7273cde..54d6a5fd85e2 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out @@ -1916,7 +1916,7 @@ org.apache.spark.sql.catalyst.parser.ParseException { "errorClass" : "_LEGACY_ERROR_TEMP_0063", "messageParameters" : { - "msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t" + "msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t" }, "queryContext" : [ { "objectType" : "", diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index 9eb4a4766df8..b0d128e967a6 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -2355,7 +2355,7 @@ org.apache.spark.sql.catalyst.parser.ParseException { "errorClass" : "_LEGACY_ERROR_TEMP_0063", "messageParameters" : { - "msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t" + "msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t" }, "queryContext" : [ { "objectType" : "", diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index 5bb15d496600..296681b844d6 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -2168,7 +2168,7 @@ org.apache.spark.sql.catalyst.parser.ParseException { "errorClass" : "_LEGACY_ERROR_TEMP_0063", "messageParameters" : { - "msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t" + "msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t" }, "queryContext" : [ { "objectType" : "", --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org