[ https://issues.apache.org/jira/browse/SPARK-31449?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17092824#comment-17092824 ]
Maxim Gekk commented on SPARK-31449: ------------------------------------ [~cloud_fan] [~hyukjin.kwon] I compared results of those 2 functions for all time zones with step of 1 day, and found many differences in results: {code:scala} test("Investigate the difference between JDK and Spark's time zone offset calculation") { import java.util.{Calendar, TimeZone} import sun.util.calendar.ZoneInfo def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): Long = { var guess = tz.getRawOffset // the actual offset should be calculated based on milliseconds in UTC val offset = tz.getOffset(millisLocal - guess) if (offset != guess) { guess = tz.getOffset(millisLocal - offset) if (guess != offset) { // fallback to do the reverse lookup using java.sql.Timestamp // this should only happen near the start or end of DST val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt val year = getYear(days) val month = getMonth(days) val day = getDayOfMonth(days) var millisOfDay = (millisLocal % MILLIS_PER_DAY).toInt if (millisOfDay < 0) { millisOfDay += MILLIS_PER_DAY.toInt } val seconds = (millisOfDay / 1000L).toInt val hh = seconds / 3600 val mm = seconds / 60 % 60 val ss = seconds % 60 val ms = millisOfDay % 1000 val calendar = Calendar.getInstance(tz) calendar.set(year, month - 1, day, hh, mm, ss) calendar.set(Calendar.MILLISECOND, ms) guess = (millisLocal - calendar.getTimeInMillis()).toInt } } guess } def getOffsetFromLocalMillis2(millisLocal: Long, tz: TimeZone): Long = { tz match { case zoneInfo: ZoneInfo => zoneInfo.getOffsetsByWall(millisLocal, null) case timeZone: TimeZone => timeZone.getOffset(millisLocal - timeZone.getRawOffset) } } ALL_TIMEZONES .sortBy(_.getId) .foreach { zid => withDefaultTimeZone(zid) { val start = microsToMillis(instantToMicros(LocalDateTime.of(1, 1, 1, 0, 0, 0) .atZone(zid) .toInstant)) val end = microsToMillis(instantToMicros(LocalDateTime.of(2037, 1, 1, 0, 0, 0) .atZone(zid) .toInstant)) var millis = start var step: Long = MILLIS_PER_DAY while (millis < end) { val offset1 = getOffsetFromLocalMillis(millis, TimeZone.getTimeZone(zid)) val offset2 = getOffsetFromLocalMillis2(millis, TimeZone.getTimeZone(zid)) if (offset1 != offset2) { println(s"${zid.getId} ${new Timestamp(millis)} $offset1 $offset2") } millis += step } } } } {code} {code} Africa/Algiers 1916-10-01 23:47:48.0 3600000 0 Africa/Algiers 1917-10-07 23:47:48.0 3600000 0 Africa/Algiers 1918-10-06 23:47:48.0 3600000 0 Africa/Algiers 1919-10-05 23:47:48.0 3600000 0 Africa/Algiers 1920-10-23 23:47:48.0 3600000 0 Africa/Algiers 1921-06-21 23:47:48.0 3600000 0 Africa/Algiers 1946-10-06 23:47:48.0 3600000 0 Africa/Algiers 1963-04-13 23:47:48.0 3600000 0 Africa/Algiers 1971-09-26 23:47:48.0 3600000 0 Africa/Algiers 1979-10-25 23:47:48.0 3600000 0 Africa/Ceuta 1900-01-01 00:00:00.0 3600000 -1276000 Africa/Ceuta 1924-10-05 00:21:16.0 3600000 0 Africa/Ceuta 1926-10-03 00:21:16.0 3600000 0 Africa/Ceuta 1927-10-02 00:21:16.0 3600000 0 Africa/Ceuta 1928-10-07 00:21:16.0 3600000 0 Africa/Sao_Tome 1899-12-31 23:33:04.0 0 -2205000 Africa/Tripoli 1952-01-01 00:07:16.0 7200000 3600000 Africa/Tripoli 1954-01-01 00:07:16.0 7200000 3600000 Africa/Tripoli 1956-01-01 00:07:16.0 7200000 3600000 Africa/Tripoli 1982-01-01 00:07:16.0 7200000 3600000 Africa/Tripoli 1982-10-01 00:07:16.0 7200000 3600000 Africa/Tripoli 1983-10-01 00:07:16.0 7200000 3600000 Africa/Tripoli 1984-10-01 00:07:16.0 7200000 3600000 Africa/Tripoli 1985-10-01 00:07:16.0 7200000 3600000 Africa/Tripoli 1986-10-03 00:07:16.0 7200000 3600000 Africa/Tripoli 1987-10-01 00:07:16.0 7200000 3600000 Africa/Tripoli 1988-10-01 00:07:16.0 7200000 3600000 Africa/Tripoli 1989-10-01 00:07:16.0 7200000 3600000 Africa/Tripoli 1996-09-30 00:07:16.0 7200000 3600000 America/Inuvik 1965-10-30 18:00:00.0 -21600000 -28800000 America/Iqaluit 1999-10-30 20:00:00.0 -14400000 -21600000 America/Pangnirtung 1999-10-30 20:00:00.0 -14400000 -21600000 Antarctica/Casey 1900-01-01 00:00:00.0 28800000 0 Antarctica/Davis 1900-01-01 00:00:00.0 25200000 0 Antarctica/Davis 2009-10-18 05:00:00.0 25200000 18000000 Antarctica/Davis 2011-10-28 05:00:00.0 25200000 18000000 Antarctica/DumontDUrville 1900-01-01 00:00:00.0 36000000 0 Antarctica/Mawson 1900-01-01 00:00:00.0 18000000 0 Antarctica/Syowa 1900-01-01 00:00:00.0 10800000 0 Antarctica/Vostok 1900-01-01 00:00:00.0 21600000 0 Atlantic/Reykjavik 1939-10-29 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1940-11-03 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1941-11-02 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1942-10-25 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1943-10-24 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1944-10-22 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1945-10-28 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1946-10-27 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1947-10-26 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1948-10-24 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1949-10-30 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1950-10-22 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1951-10-28 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1952-10-26 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1953-10-25 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1954-10-24 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1955-10-23 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1956-10-28 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1957-10-27 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1958-10-26 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1959-10-25 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1960-10-23 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1961-10-22 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1962-10-28 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1963-10-27 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1964-10-25 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1965-10-24 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1966-10-23 01:28:00.0 0 -3600000 Atlantic/Reykjavik 1967-10-29 01:28:00.0 0 -3600000 Europe/Andorra 1900-12-31 23:53:56.0 364000 0 Europe/Brussels 1919-10-04 23:42:30.0 3600000 0 Europe/Brussels 1920-10-23 23:42:30.0 3600000 0 Europe/Brussels 1921-10-25 23:42:30.0 3600000 0 Europe/Brussels 1922-10-07 23:42:30.0 3600000 0 Europe/Brussels 1923-10-06 23:42:30.0 3600000 0 Europe/Brussels 1924-10-04 23:42:30.0 3600000 0 Europe/Brussels 1925-10-03 23:42:30.0 3600000 0 Europe/Brussels 1926-10-02 23:42:30.0 3600000 0 Europe/Brussels 1927-10-01 23:42:30.0 3600000 0 Europe/Gibraltar 1900-01-01 00:21:24.0 3600000 0 Europe/Luxembourg 1918-11-24 23:35:24.0 3600000 0 Europe/Madrid 1900-01-01 00:00:00.0 3600000 -884000 Europe/Madrid 1918-10-07 00:14:44.0 3600000 0 Europe/Madrid 1919-10-07 00:14:44.0 3600000 0 Europe/Madrid 1924-10-05 00:14:44.0 3600000 0 Europe/Madrid 1926-10-03 00:14:44.0 3600000 0 Europe/Madrid 1927-10-02 00:14:44.0 3600000 0 Europe/Madrid 1928-10-07 00:14:44.0 3600000 0 Europe/Madrid 1929-10-06 00:14:44.0 3600000 0 Europe/Madrid 1937-10-03 00:14:44.0 3600000 0 Europe/Madrid 1939-10-08 00:14:44.0 3600000 0 Europe/Monaco 1916-10-01 23:30:28.0 3600000 0 Europe/Monaco 1917-10-07 23:30:28.0 3600000 0 Europe/Monaco 1918-10-06 23:30:28.0 3600000 0 Europe/Monaco 1919-10-05 23:30:28.0 3600000 0 Europe/Monaco 1920-10-23 23:30:28.0 3600000 0 Europe/Monaco 1921-10-25 23:30:28.0 3600000 0 Europe/Monaco 1922-10-07 23:30:28.0 3600000 0 Europe/Monaco 1923-10-06 23:30:28.0 3600000 0 Europe/Monaco 1924-10-04 23:30:28.0 3600000 0 Europe/Monaco 1925-10-03 23:30:28.0 3600000 0 Europe/Monaco 1926-10-02 23:30:28.0 3600000 0 Europe/Monaco 1927-10-01 23:30:28.0 3600000 0 Europe/Monaco 1928-10-06 23:30:28.0 3600000 0 Europe/Monaco 1929-10-05 23:30:28.0 3600000 0 Europe/Monaco 1930-10-04 23:30:28.0 3600000 0 Europe/Monaco 1931-10-03 23:30:28.0 3600000 0 Europe/Monaco 1932-10-01 23:30:28.0 3600000 0 Europe/Monaco 1933-10-07 23:30:28.0 3600000 0 Europe/Monaco 1934-10-06 23:30:28.0 3600000 0 Europe/Monaco 1935-10-05 23:30:28.0 3600000 0 Europe/Monaco 1936-10-03 23:30:28.0 3600000 0 Europe/Monaco 1937-10-02 23:30:28.0 3600000 0 Europe/Monaco 1938-10-01 23:30:28.0 3600000 0 Europe/Monaco 1939-11-18 23:30:28.0 3600000 0 Europe/Paris 1916-10-01 23:50:39.0 3600000 0 Europe/Paris 1917-10-07 23:50:39.0 3600000 0 Europe/Paris 1918-10-06 23:50:39.0 3600000 0 Europe/Paris 1919-10-05 23:50:39.0 3600000 0 Europe/Paris 1920-10-23 23:50:39.0 3600000 0 Europe/Paris 1921-10-25 23:50:39.0 3600000 0 Europe/Paris 1922-10-07 23:50:39.0 3600000 0 Europe/Paris 1923-10-06 23:50:39.0 3600000 0 Europe/Paris 1924-10-04 23:50:39.0 3600000 0 Europe/Paris 1925-10-03 23:50:39.0 3600000 0 Europe/Paris 1926-10-02 23:50:39.0 3600000 0 Europe/Paris 1927-10-01 23:50:39.0 3600000 0 Europe/Paris 1928-10-06 23:50:39.0 3600000 0 Europe/Paris 1929-10-05 23:50:39.0 3600000 0 Europe/Paris 1930-10-04 23:50:39.0 3600000 0 Europe/Paris 1931-10-03 23:50:39.0 3600000 0 Europe/Paris 1932-10-01 23:50:39.0 3600000 0 Europe/Paris 1933-10-07 23:50:39.0 3600000 0 Europe/Paris 1934-10-06 23:50:39.0 3600000 0 Europe/Paris 1935-10-05 23:50:39.0 3600000 0 Europe/Paris 1936-10-03 23:50:39.0 3600000 0 Europe/Paris 1937-10-02 23:50:39.0 3600000 0 Europe/Paris 1938-10-01 23:50:39.0 3600000 0 Europe/Paris 1939-11-18 23:50:39.0 3600000 0 Iceland 1939-10-29 01:28:00.0 0 -3600000 Iceland 1940-11-03 01:28:00.0 0 -3600000 Iceland 1941-11-02 01:28:00.0 0 -3600000 Iceland 1942-10-25 01:28:00.0 0 -3600000 Iceland 1943-10-24 01:28:00.0 0 -3600000 Iceland 1944-10-22 01:28:00.0 0 -3600000 Iceland 1945-10-28 01:28:00.0 0 -3600000 Iceland 1946-10-27 01:28:00.0 0 -3600000 Iceland 1947-10-26 01:28:00.0 0 -3600000 Iceland 1948-10-24 01:28:00.0 0 -3600000 Iceland 1949-10-30 01:28:00.0 0 -3600000 Iceland 1950-10-22 01:28:00.0 0 -3600000 Iceland 1951-10-28 01:28:00.0 0 -3600000 Iceland 1952-10-26 01:28:00.0 0 -3600000 Iceland 1953-10-25 01:28:00.0 0 -3600000 Iceland 1954-10-24 01:28:00.0 0 -3600000 Iceland 1955-10-23 01:28:00.0 0 -3600000 Iceland 1956-10-28 01:28:00.0 0 -3600000 Iceland 1957-10-27 01:28:00.0 0 -3600000 Iceland 1958-10-26 01:28:00.0 0 -3600000 Iceland 1959-10-25 01:28:00.0 0 -3600000 Iceland 1960-10-23 01:28:00.0 0 -3600000 Iceland 1961-10-22 01:28:00.0 0 -3600000 Iceland 1962-10-28 01:28:00.0 0 -3600000 Iceland 1963-10-27 01:28:00.0 0 -3600000 Iceland 1964-10-25 01:28:00.0 0 -3600000 Iceland 1965-10-24 01:28:00.0 0 -3600000 Iceland 1966-10-23 01:28:00.0 0 -3600000 Iceland 1967-10-29 01:28:00.0 0 -3600000 Indian/Kerguelen 1900-01-01 00:00:00.0 18000000 0 Kwajalein 1969-09-30 23:50:40.0 39600000 -43200000 Libya 1952-01-01 00:07:16.0 7200000 3600000 Libya 1954-01-01 00:07:16.0 7200000 3600000 Libya 1956-01-01 00:07:16.0 7200000 3600000 Libya 1982-01-01 00:07:16.0 7200000 3600000 Libya 1982-10-01 00:07:16.0 7200000 3600000 Libya 1983-10-01 00:07:16.0 7200000 3600000 Libya 1984-10-01 00:07:16.0 7200000 3600000 Libya 1985-10-01 00:07:16.0 7200000 3600000 Libya 1986-10-03 00:07:16.0 7200000 3600000 Libya 1987-10-01 00:07:16.0 7200000 3600000 Libya 1988-10-01 00:07:16.0 7200000 3600000 Libya 1989-10-01 00:07:16.0 7200000 3600000 Libya 1996-09-30 00:07:16.0 7200000 3600000 Pacific/Apia 1900-01-01 00:00:00.0 46800000 -41216000 Pacific/Enderbury 1900-01-01 00:00:00.0 46800000 -41060000 Pacific/Fakaofo 1900-01-01 00:00:00.0 46800000 -41096000 Pacific/Kiritimati 1900-01-01 00:00:00.0 50400000 -37760000 Pacific/Kwajalein 1969-09-30 23:50:40.0 39600000 -43200000 {code} > Investigate the difference between JDK and Spark's time zone offset > calculation > ------------------------------------------------------------------------------- > > Key: SPARK-31449 > URL: https://issues.apache.org/jira/browse/SPARK-31449 > Project: Spark > Issue Type: Improvement > Components: SQL > Affects Versions: 2.4.5 > Reporter: Maxim Gekk > Priority: Major > > Spark 2.4 calculates time zone offsets from wall clock timestamp using > `DateTimeUtils.getOffsetFromLocalMillis()` (see > https://github.com/apache/spark/blob/branch-2.4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala#L1088-L1118): > {code:scala} > private[sql] def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): > Long = { > var guess = tz.getRawOffset > // the actual offset should be calculated based on milliseconds in UTC > val offset = tz.getOffset(millisLocal - guess) > if (offset != guess) { > guess = tz.getOffset(millisLocal - offset) > if (guess != offset) { > // fallback to do the reverse lookup using java.sql.Timestamp > // this should only happen near the start or end of DST > val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt > val year = getYear(days) > val month = getMonth(days) > val day = getDayOfMonth(days) > var millisOfDay = (millisLocal % MILLIS_PER_DAY).toInt > if (millisOfDay < 0) { > millisOfDay += MILLIS_PER_DAY.toInt > } > val seconds = (millisOfDay / 1000L).toInt > val hh = seconds / 3600 > val mm = seconds / 60 % 60 > val ss = seconds % 60 > val ms = millisOfDay % 1000 > val calendar = Calendar.getInstance(tz) > calendar.set(year, month - 1, day, hh, mm, ss) > calendar.set(Calendar.MILLISECOND, ms) > guess = (millisLocal - calendar.getTimeInMillis()).toInt > } > } > guess > } > {code} > Meanwhile, JDK's GregorianCalendar uses special methods of ZoneInfo, see > https://github.com/AdoptOpenJDK/openjdk-jdk8u/blob/aa318070b27849f1fe00d14684b2a40f7b29bf79/jdk/src/share/classes/java/util/GregorianCalendar.java#L2795-L2801: > {code:java} > if (zone instanceof ZoneInfo) { > ((ZoneInfo)zone).getOffsetsByWall(millis, zoneOffsets); > } else { > int gmtOffset = isFieldSet(fieldMask, ZONE_OFFSET) ? > internalGet(ZONE_OFFSET) : > zone.getRawOffset(); > zone.getOffsets(millis - gmtOffset, zoneOffsets); > } > {code} > Need to investigate are there any differences in results between 2 approaches. -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org