This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.5 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push: new 93a09ea279e6 [SPARK-47072][SQL][3.5] Fix supported interval formats in error messages 93a09ea279e6 is described below commit 93a09ea279e6bd2515ced66d8f38053e4a5514ce Author: Max Gekk <max.g...@gmail.com> AuthorDate: Mon Feb 19 10:29:08 2024 +0300 [SPARK-47072][SQL][3.5] Fix supported interval formats in error messages ### What changes were proposed in this pull request? In the PR, I propose to add one more field to keys of `supportedFormat` in `IntervalUtils` because current implementation has duplicate keys that overwrites each other. For instance, the following keys are the same: ``` (YM.YEAR, YM.MONTH) ... (DT.DAY, DT.HOUR) ``` because `YM.YEAR = DT.DAY = 0` and `YM.MONTH = DT.HOUR = 1`. This is a backport of https://github.com/apache/spark/pull/45127. ### Why are the changes needed? To fix the incorrect error message when Spark cannot parse ANSI interval string. For example, the expected format should be some year-month format but Spark outputs day-time one: ```sql spark-sql (default)> select interval '-\t2-2\t' year to month; Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: - 2-2 . (line 1, pos 16) == SQL == select interval '-\t2-2\t' year to month ----------------^^^ ``` ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? By running the existing test suite: ``` $ build/sbt "test:testOnly *IntervalUtilsSuite" ``` and regenerating the golden files: ``` $ SPARK_GENERATE_GOLDEN_FILES=1 PYSPARK_PYTHON=python3 build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite" ``` ### Was this patch authored or co-authored using generative AI tooling? No. Authored-by: Max Gekk <max.gekkgmail.com> (cherry picked from commit 074fcf2807000d342831379de0fafc1e49a6bf19) Closes #45139 from MaxGekk/fix-supportedFormat-3.5. Authored-by: Max Gekk <max.g...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../spark/sql/catalyst/util/IntervalUtils.scala | 33 +++++++++++++--------- .../sql/catalyst/expressions/CastSuiteBase.scala | 8 +++--- .../analyzer-results/ansi/interval.sql.out | 2 +- .../sql-tests/analyzer-results/interval.sql.out | 2 +- .../sql-tests/results/ansi/interval.sql.out | 2 +- .../resources/sql-tests/results/interval.sql.out | 2 +- 6 files changed, 27 insertions(+), 22 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index e051cfc37f12..4d90007400ea 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -107,25 +107,30 @@ object IntervalUtils extends SparkIntervalUtils { fallBackNotice: Option[String] = None) = { throw new IllegalArgumentException( s"Interval string does not match $intervalStr format of " + - s"${supportedFormat((startFiled, endField)).map(format => s"`$format`").mkString(", ")} " + + s"${supportedFormat((intervalStr, startFiled, endField)) + .map(format => s"`$format`").mkString(", ")} " + s"when cast to $typeName: ${input.toString}" + s"${fallBackNotice.map(s => s", $s").getOrElse("")}") } val supportedFormat = Map( - (YM.YEAR, YM.MONTH) -> Seq("[+|-]y-m", "INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH"), - (YM.YEAR, YM.YEAR) -> Seq("[+|-]y", "INTERVAL [+|-]'[+|-]y' YEAR"), - (YM.MONTH, YM.MONTH) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MONTH"), - (DT.DAY, DT.DAY) -> Seq("[+|-]d", "INTERVAL [+|-]'[+|-]d' DAY"), - (DT.DAY, DT.HOUR) -> Seq("[+|-]d h", "INTERVAL [+|-]'[+|-]d h' DAY TO HOUR"), - (DT.DAY, DT.MINUTE) -> Seq("[+|-]d h:m", "INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE"), - (DT.DAY, DT.SECOND) -> Seq("[+|-]d h:m:s.n", "INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND"), - (DT.HOUR, DT.HOUR) -> Seq("[+|-]h", "INTERVAL [+|-]'[+|-]h' HOUR"), - (DT.HOUR, DT.MINUTE) -> Seq("[+|-]h:m", "INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE"), - (DT.HOUR, DT.SECOND) -> Seq("[+|-]h:m:s.n", "INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND"), - (DT.MINUTE, DT.MINUTE) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MINUTE"), - (DT.MINUTE, DT.SECOND) -> Seq("[+|-]m:s.n", "INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND"), - (DT.SECOND, DT.SECOND) -> Seq("[+|-]s.n", "INTERVAL [+|-]'[+|-]s.n' SECOND") + ("year-month", YM.YEAR, YM.MONTH) -> Seq("[+|-]y-m", "INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH"), + ("year-month", YM.YEAR, YM.YEAR) -> Seq("[+|-]y", "INTERVAL [+|-]'[+|-]y' YEAR"), + ("year-month", YM.MONTH, YM.MONTH) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MONTH"), + ("day-time", DT.DAY, DT.DAY) -> Seq("[+|-]d", "INTERVAL [+|-]'[+|-]d' DAY"), + ("day-time", DT.DAY, DT.HOUR) -> Seq("[+|-]d h", "INTERVAL [+|-]'[+|-]d h' DAY TO HOUR"), + ("day-time", DT.DAY, DT.MINUTE) -> + Seq("[+|-]d h:m", "INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE"), + ("day-time", DT.DAY, DT.SECOND) -> + Seq("[+|-]d h:m:s.n", "INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND"), + ("day-time", DT.HOUR, DT.HOUR) -> Seq("[+|-]h", "INTERVAL [+|-]'[+|-]h' HOUR"), + ("day-time", DT.HOUR, DT.MINUTE) -> Seq("[+|-]h:m", "INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE"), + ("day-time", DT.HOUR, DT.SECOND) -> + Seq("[+|-]h:m:s.n", "INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND"), + ("day-time", DT.MINUTE, DT.MINUTE) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MINUTE"), + ("day-time", DT.MINUTE, DT.SECOND) -> + Seq("[+|-]m:s.n", "INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND"), + ("day-time", DT.SECOND, DT.SECOND) -> Seq("[+|-]s.n", "INTERVAL [+|-]'[+|-]s.n' SECOND") ) def castStringToYMInterval( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala index 1ce311a5544f..4352d5bc9c6b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala @@ -1174,7 +1174,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { Seq("INTERVAL '1-1' YEAR", "INTERVAL '1-1' MONTH").foreach { interval => val dataType = YearMonthIntervalType() val expectedMsg = s"Interval string does not match year-month format of " + - s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField)) + s"${IntervalUtils.supportedFormat(("year-month", dataType.startField, dataType.endField)) .map(format => s"`$format`").mkString(", ")} " + s"when cast to ${dataType.typeName}: $interval" checkExceptionInExpression[IllegalArgumentException]( @@ -1194,7 +1194,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { ("INTERVAL '1' MONTH", YearMonthIntervalType(YEAR, MONTH))) .foreach { case (interval, dataType) => val expectedMsg = s"Interval string does not match year-month format of " + - s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField)) + s"${IntervalUtils.supportedFormat(("year-month", dataType.startField, dataType.endField)) .map(format => s"`$format`").mkString(", ")} " + s"when cast to ${dataType.typeName}: $interval" checkExceptionInExpression[IllegalArgumentException]( @@ -1314,7 +1314,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { ("1.23", DayTimeIntervalType(MINUTE))) .foreach { case (interval, dataType) => val expectedMsg = s"Interval string does not match day-time format of " + - s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField)) + s"${IntervalUtils.supportedFormat(("day-time", dataType.startField, dataType.endField)) .map(format => s"`$format`").mkString(", ")} " + s"when cast to ${dataType.typeName}: $interval, " + s"set ${SQLConf.LEGACY_FROM_DAYTIME_STRING.key} to true " + @@ -1338,7 +1338,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { ("INTERVAL '92233720368541.775807' SECOND", DayTimeIntervalType(SECOND))) .foreach { case (interval, dataType) => val expectedMsg = "Interval string does not match day-time format of " + - s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField)) + s"${IntervalUtils.supportedFormat(("day-time", dataType.startField, dataType.endField)) .map(format => s"`$format`").mkString(", ")} " + s"when cast to ${dataType.typeName}: $interval, " + s"set ${SQLConf.LEGACY_FROM_DAYTIME_STRING.key} to true " + diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out index 1120c40ac15c..2e2a07beb717 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out @@ -1916,7 +1916,7 @@ org.apache.spark.sql.catalyst.parser.ParseException { "errorClass" : "_LEGACY_ERROR_TEMP_0063", "messageParameters" : { - "msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t" + "msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t" }, "queryContext" : [ { "objectType" : "", diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out index 337edd5980c3..6242dc142eab 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out @@ -1916,7 +1916,7 @@ org.apache.spark.sql.catalyst.parser.ParseException { "errorClass" : "_LEGACY_ERROR_TEMP_0063", "messageParameters" : { - "msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t" + "msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t" }, "queryContext" : [ { "objectType" : "", diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index 9eb4a4766df8..b0d128e967a6 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -2355,7 +2355,7 @@ org.apache.spark.sql.catalyst.parser.ParseException { "errorClass" : "_LEGACY_ERROR_TEMP_0063", "messageParameters" : { - "msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t" + "msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t" }, "queryContext" : [ { "objectType" : "", diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index fe15ade94178..faba4abfdbe7 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -2168,7 +2168,7 @@ org.apache.spark.sql.catalyst.parser.ParseException { "errorClass" : "_LEGACY_ERROR_TEMP_0063", "messageParameters" : { - "msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t" + "msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t" }, "queryContext" : [ { "objectType" : "", --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org