This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 1fd9a91 [SPARK-31005][SQL] Support time zone ids in casting strings to timestamps 1fd9a91 is described below commit 1fd9a91c662a368e348ef96604a79929f814041c Author: Maxim Gekk <max.g...@gmail.com> AuthorDate: Thu Mar 5 20:49:43 2020 +0800 [SPARK-31005][SQL] Support time zone ids in casting strings to timestamps ### What changes were proposed in this pull request? In the PR, I propose to change `DateTimeUtils.stringToTimestamp` to support any valid time zone id at the end of input string. After the changes, the function accepts zone ids in the formats: - no zone id. In that case, the function uses the local session time zone from the SQL config `spark.sql.session.timeZone` - -[h]h:[m]m - +[h]h:[m]m - Z - Short zone id, see https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html#SHORT_IDS - Zone ID starts with 'UTC+', 'UTC-', 'GMT+', 'GMT-', 'UT+' or 'UT-'. The ID is split in two, with a two or three letter prefix and a suffix starting with the sign. The suffix must be in the formats: - +|-h[h] - +|-hh[:]mm - +|-hh:mm:ss - +|-hhmmss - Region-based zone IDs in the form `{area}/{city}`, such as `Europe/Paris` or `America/New_York`. The default set of region ids is supplied by the IANA Time Zone Database (TZDB). ### Why are the changes needed? - To use `stringToTimestamp` as a substitution of removed `stringToTime`, see https://github.com/apache/spark/pull/27710#discussion_r385020173 - Improve UX of Spark SQL by allowing flexible formats of zone ids. Currently, Spark accepts only `Z` and zone offsets that can be inconvenient when a time zone offset is shifted due to daylight saving rules. For instance: ```sql spark-sql> select cast('2015-03-18T12:03:17.123456 Europe/Moscow' as timestamp); NULL ``` ### Does this PR introduce any user-facing change? Yes. After the changes, casting strings to timestamps allows time zone id at the end of the strings: ```sql spark-sql> select cast('2015-03-18T12:03:17.123456 Europe/Moscow' as timestamp); 2015-03-18 12:03:17.123456 ``` ### How was this patch tested? - Added new test cases to the `string to timestamp` test in `DateTimeUtilsSuite`. - Run `CastSuite` and `AnsiCastSuite`. Closes #27753 from MaxGekk/stringToTimestamp-uni-zoneId. Authored-by: Maxim Gekk <max.g...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../spark/sql/catalyst/util/DateTimeUtils.scala | 57 +++++++++++----------- .../sql/catalyst/util/DateTimeUtilsSuite.scala | 23 +++++++-- 2 files changed, 48 insertions(+), 32 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 6b252ec..3a038a4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -185,28 +185,28 @@ object DateTimeUtils { * `yyyy-[m]m` * `yyyy-[m]m-[d]d` * `yyyy-[m]m-[d]d ` - * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]` - * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z` - * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m` - * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m` - * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]` - * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z` - * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m` - * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m` - * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]` - * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z` - * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m` - * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m` - * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]` - * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]Z` - * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]-[h]h:[m]m` - * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us]+[h]h:[m]m` + * `yyyy-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` + * `yyyy-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` + * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` + * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]` + * + * where `zone_id` should have one of the forms: + * - Z - Zulu time zone UTC+0 + * - +|-[h]h:[m]m + * - A short id, see https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html#SHORT_IDS + * - An id with one of the prefixes UTC+, UTC-, GMT+, GMT-, UT+ or UT-, + * and a suffix in the formats: + * - +|-h[h] + * - +|-hh[:]mm + * - +|-hh:mm:ss + * - +|-hhmmss + * - Region-based zone IDs in the form `area/city`, such as `Europe/Paris` */ def stringToTimestamp(s: UTF8String, timeZoneId: ZoneId): Option[SQLTimestamp] = { if (s == null) { return None } - var tz: Option[Byte] = None + var tz: Option[String] = None val segments: Array[Int] = Array[Int](1, 1, 1, 0, 0, 0, 0, 0, 0) var i = 0 var currentSegmentValue = 0 @@ -257,22 +257,21 @@ object DateTimeUtils { return None } } else if (i == 5 || i == 6) { - if (b == 'Z') { + if (b == '-' || b == '+') { segments(i) = currentSegmentValue currentSegmentValue = 0 i += 1 - tz = Some(43) - } else if (b == '-' || b == '+') { + tz = Some(new String(bytes, j, 1)) + } else if (b == '.' && i == 5) { segments(i) = currentSegmentValue currentSegmentValue = 0 i += 1 - tz = Some(b) - } else if (b == '.' && i == 5) { + } else { segments(i) = currentSegmentValue currentSegmentValue = 0 i += 1 - } else { - return None + tz = Some(new String(bytes, j, bytes.length - j)) + j = bytes.length - 1 } if (i == 6 && b != '.') { i += 1 @@ -312,11 +311,11 @@ object DateTimeUtils { digitsMilli -= 1 } try { - val zoneId = if (tz.isEmpty) { - timeZoneId - } else { - val sign = if (tz.get.toChar == '-') -1 else 1 - ZoneOffset.ofHoursMinutes(sign * segments(7), sign * segments(8)) + val zoneId = tz match { + case None => timeZoneId + case Some("+") => ZoneOffset.ofHoursMinutes(segments(7), segments(8)) + case Some("-") => ZoneOffset.ofHoursMinutes(-segments(7), -segments(8)) + case Some(zoneName: String) => getZoneId(zoneName.trim) } val nanoseconds = MICROSECONDS.toNanos(segments(6)) val localTime = LocalTime.of(segments(3), segments(4), segments(5), nanoseconds.toInt) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 1465b06..6dde3d2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -183,24 +183,29 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { var zoneId = getZoneId("GMT-13:53") expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17-13:53", expected) + checkStringToTimestamp("2015-03-18T12:03:17GMT-13:53", expected) zoneId = getZoneId("UTC") expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17Z", expected) checkStringToTimestamp("2015-03-18 12:03:17Z", expected) + checkStringToTimestamp("2015-03-18 12:03:17UTC", expected) zoneId = getZoneId("GMT-01:00") expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17-1:0", expected) checkStringToTimestamp("2015-03-18T12:03:17-01:00", expected) + checkStringToTimestamp("2015-03-18T12:03:17GMT-01:00", expected) zoneId = getZoneId("GMT+07:30") expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17+07:30", expected) + checkStringToTimestamp("2015-03-18T12:03:17 GMT+07:30", expected) zoneId = getZoneId("GMT+07:03") expected = Option(date(2015, 3, 18, 12, 3, 17, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17+07:03", expected) + checkStringToTimestamp("2015-03-18T12:03:17GMT+07:03", expected) // tests for the string including milliseconds. expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, zid = zid)) @@ -213,27 +218,32 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { expected = Option(date(2015, 3, 18, 12, 3, 17, 456000, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17.456Z", expected) checkStringToTimestamp("2015-03-18 12:03:17.456Z", expected) + checkStringToTimestamp("2015-03-18 12:03:17.456 UTC", expected) zoneId = getZoneId("GMT-01:00") expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17.123-1:0", expected) checkStringToTimestamp("2015-03-18T12:03:17.123-01:00", expected) + checkStringToTimestamp("2015-03-18T12:03:17.123 GMT-01:00", expected) zoneId = getZoneId("GMT+07:30") expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17.123+07:30", expected) + checkStringToTimestamp("2015-03-18T12:03:17.123 GMT+07:30", expected) zoneId = getZoneId("GMT+07:30") expected = Option(date(2015, 3, 18, 12, 3, 17, 123000, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17.123+07:30", expected) + checkStringToTimestamp("2015-03-18T12:03:17.123GMT+07:30", expected) - zoneId = getZoneId("GMT+07:30") expected = Option(date(2015, 3, 18, 12, 3, 17, 123121, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17.123121+7:30", expected) + checkStringToTimestamp("2015-03-18T12:03:17.123121 GMT+0730", expected) zoneId = getZoneId("GMT+07:30") expected = Option(date(2015, 3, 18, 12, 3, 17, 123120, zid = zoneId)) checkStringToTimestamp("2015-03-18T12:03:17.12312+7:30", expected) + checkStringToTimestamp("2015-03-18T12:03:17.12312 UT+07:30", expected) expected = Option(time(18, 12, 15, zid = zid)) checkStringToTimestamp("18:12:15", expected) @@ -241,10 +251,12 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { zoneId = getZoneId("GMT+07:30") expected = Option(time(18, 12, 15, 123120, zid = zoneId)) checkStringToTimestamp("T18:12:15.12312+7:30", expected) + checkStringToTimestamp("T18:12:15.12312 UTC+07:30", expected) zoneId = getZoneId("GMT+07:30") expected = Option(time(18, 12, 15, 123120, zid = zoneId)) checkStringToTimestamp("18:12:15.12312+7:30", expected) + checkStringToTimestamp("18:12:15.12312 GMT+07:30", expected) expected = Option(date(2011, 5, 6, 7, 8, 9, 100000, zid = zid)) checkStringToTimestamp("2011-05-06 07:08:09.1000", expected) @@ -270,8 +282,13 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { // Truncating the fractional seconds zoneId = getZoneId("GMT+00:00") expected = Option(date(2015, 3, 18, 12, 3, 17, 123456, zid = zoneId)) - checkStringToTimestamp( - "2015-03-18T12:03:17.123456789+0:00", expected) + checkStringToTimestamp("2015-03-18T12:03:17.123456789+0:00", expected) + checkStringToTimestamp("2015-03-18T12:03:17.123456789 UTC+0", expected) + checkStringToTimestamp("2015-03-18T12:03:17.123456789GMT+00:00", expected) + + zoneId = getZoneId("Europe/Moscow") + expected = Option(date(2015, 3, 18, 12, 3, 17, 123456, zid = zoneId)) + checkStringToTimestamp("2015-03-18T12:03:17.123456 Europe/Moscow", expected) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org