[ 
https://issues.apache.org/jira/browse/SPARK-31449?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17092824#comment-17092824
 ] 

Maxim Gekk commented on SPARK-31449:
------------------------------------

[~cloud_fan] [~hyukjin.kwon] I compared results of those 2 functions for all 
time zones with step of 1 day, and found many differences in results:
{code:scala}
test("Investigate the difference between JDK and Spark's time zone offset 
calculation") {
    import java.util.{Calendar, TimeZone}
    import sun.util.calendar.ZoneInfo
    def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): Long = {
      var guess = tz.getRawOffset
      // the actual offset should be calculated based on milliseconds in UTC
      val offset = tz.getOffset(millisLocal - guess)
      if (offset != guess) {
        guess = tz.getOffset(millisLocal - offset)
        if (guess != offset) {
          // fallback to do the reverse lookup using java.sql.Timestamp
          // this should only happen near the start or end of DST
          val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt
          val year = getYear(days)
          val month = getMonth(days)
          val day = getDayOfMonth(days)

          var millisOfDay = (millisLocal % MILLIS_PER_DAY).toInt
          if (millisOfDay < 0) {
            millisOfDay += MILLIS_PER_DAY.toInt
          }
          val seconds = (millisOfDay / 1000L).toInt
          val hh = seconds / 3600
          val mm = seconds / 60 % 60
          val ss = seconds % 60
          val ms = millisOfDay % 1000
          val calendar = Calendar.getInstance(tz)
          calendar.set(year, month - 1, day, hh, mm, ss)
          calendar.set(Calendar.MILLISECOND, ms)
          guess = (millisLocal - calendar.getTimeInMillis()).toInt
        }
      }
      guess
    }
    def getOffsetFromLocalMillis2(millisLocal: Long, tz: TimeZone): Long = {
      tz match {
        case zoneInfo: ZoneInfo => zoneInfo.getOffsetsByWall(millisLocal, null)
        case timeZone: TimeZone => timeZone.getOffset(millisLocal - 
timeZone.getRawOffset)
      }
    }

    ALL_TIMEZONES
      .sortBy(_.getId)
      .foreach { zid =>
        withDefaultTimeZone(zid) {
          val start = microsToMillis(instantToMicros(LocalDateTime.of(1, 1, 1, 
0, 0, 0)
            .atZone(zid)
            .toInstant))
          val end = microsToMillis(instantToMicros(LocalDateTime.of(2037, 1, 1, 
0, 0, 0)
            .atZone(zid)
            .toInstant))

          var millis = start
          var step: Long = MILLIS_PER_DAY
          while (millis < end) {
            val offset1 = getOffsetFromLocalMillis(millis, 
TimeZone.getTimeZone(zid))
            val offset2 = getOffsetFromLocalMillis2(millis, 
TimeZone.getTimeZone(zid))
            if (offset1 != offset2) {
              println(s"${zid.getId} ${new Timestamp(millis)} $offset1 
$offset2")
            }
            millis += step
          }
        }
      }
  }
{code}
{code}
Africa/Algiers 1916-10-01 23:47:48.0 3600000 0
Africa/Algiers 1917-10-07 23:47:48.0 3600000 0
Africa/Algiers 1918-10-06 23:47:48.0 3600000 0
Africa/Algiers 1919-10-05 23:47:48.0 3600000 0
Africa/Algiers 1920-10-23 23:47:48.0 3600000 0
Africa/Algiers 1921-06-21 23:47:48.0 3600000 0
Africa/Algiers 1946-10-06 23:47:48.0 3600000 0
Africa/Algiers 1963-04-13 23:47:48.0 3600000 0
Africa/Algiers 1971-09-26 23:47:48.0 3600000 0
Africa/Algiers 1979-10-25 23:47:48.0 3600000 0
Africa/Ceuta 1900-01-01 00:00:00.0 3600000 -1276000
Africa/Ceuta 1924-10-05 00:21:16.0 3600000 0
Africa/Ceuta 1926-10-03 00:21:16.0 3600000 0
Africa/Ceuta 1927-10-02 00:21:16.0 3600000 0
Africa/Ceuta 1928-10-07 00:21:16.0 3600000 0
Africa/Sao_Tome 1899-12-31 23:33:04.0 0 -2205000
Africa/Tripoli 1952-01-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1954-01-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1956-01-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1982-01-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1982-10-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1983-10-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1984-10-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1985-10-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1986-10-03 00:07:16.0 7200000 3600000
Africa/Tripoli 1987-10-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1988-10-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1989-10-01 00:07:16.0 7200000 3600000
Africa/Tripoli 1996-09-30 00:07:16.0 7200000 3600000
America/Inuvik 1965-10-30 18:00:00.0 -21600000 -28800000
America/Iqaluit 1999-10-30 20:00:00.0 -14400000 -21600000
America/Pangnirtung 1999-10-30 20:00:00.0 -14400000 -21600000
Antarctica/Casey 1900-01-01 00:00:00.0 28800000 0
Antarctica/Davis 1900-01-01 00:00:00.0 25200000 0
Antarctica/Davis 2009-10-18 05:00:00.0 25200000 18000000
Antarctica/Davis 2011-10-28 05:00:00.0 25200000 18000000
Antarctica/DumontDUrville 1900-01-01 00:00:00.0 36000000 0
Antarctica/Mawson 1900-01-01 00:00:00.0 18000000 0
Antarctica/Syowa 1900-01-01 00:00:00.0 10800000 0
Antarctica/Vostok 1900-01-01 00:00:00.0 21600000 0
Atlantic/Reykjavik 1939-10-29 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1940-11-03 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1941-11-02 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1942-10-25 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1943-10-24 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1944-10-22 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1945-10-28 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1946-10-27 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1947-10-26 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1948-10-24 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1949-10-30 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1950-10-22 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1951-10-28 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1952-10-26 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1953-10-25 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1954-10-24 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1955-10-23 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1956-10-28 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1957-10-27 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1958-10-26 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1959-10-25 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1960-10-23 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1961-10-22 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1962-10-28 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1963-10-27 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1964-10-25 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1965-10-24 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1966-10-23 01:28:00.0 0 -3600000
Atlantic/Reykjavik 1967-10-29 01:28:00.0 0 -3600000
Europe/Andorra 1900-12-31 23:53:56.0 364000 0
Europe/Brussels 1919-10-04 23:42:30.0 3600000 0
Europe/Brussels 1920-10-23 23:42:30.0 3600000 0
Europe/Brussels 1921-10-25 23:42:30.0 3600000 0
Europe/Brussels 1922-10-07 23:42:30.0 3600000 0
Europe/Brussels 1923-10-06 23:42:30.0 3600000 0
Europe/Brussels 1924-10-04 23:42:30.0 3600000 0
Europe/Brussels 1925-10-03 23:42:30.0 3600000 0
Europe/Brussels 1926-10-02 23:42:30.0 3600000 0
Europe/Brussels 1927-10-01 23:42:30.0 3600000 0
Europe/Gibraltar 1900-01-01 00:21:24.0 3600000 0
Europe/Luxembourg 1918-11-24 23:35:24.0 3600000 0
Europe/Madrid 1900-01-01 00:00:00.0 3600000 -884000
Europe/Madrid 1918-10-07 00:14:44.0 3600000 0
Europe/Madrid 1919-10-07 00:14:44.0 3600000 0
Europe/Madrid 1924-10-05 00:14:44.0 3600000 0
Europe/Madrid 1926-10-03 00:14:44.0 3600000 0
Europe/Madrid 1927-10-02 00:14:44.0 3600000 0
Europe/Madrid 1928-10-07 00:14:44.0 3600000 0
Europe/Madrid 1929-10-06 00:14:44.0 3600000 0
Europe/Madrid 1937-10-03 00:14:44.0 3600000 0
Europe/Madrid 1939-10-08 00:14:44.0 3600000 0
Europe/Monaco 1916-10-01 23:30:28.0 3600000 0
Europe/Monaco 1917-10-07 23:30:28.0 3600000 0
Europe/Monaco 1918-10-06 23:30:28.0 3600000 0
Europe/Monaco 1919-10-05 23:30:28.0 3600000 0
Europe/Monaco 1920-10-23 23:30:28.0 3600000 0
Europe/Monaco 1921-10-25 23:30:28.0 3600000 0
Europe/Monaco 1922-10-07 23:30:28.0 3600000 0
Europe/Monaco 1923-10-06 23:30:28.0 3600000 0
Europe/Monaco 1924-10-04 23:30:28.0 3600000 0
Europe/Monaco 1925-10-03 23:30:28.0 3600000 0
Europe/Monaco 1926-10-02 23:30:28.0 3600000 0
Europe/Monaco 1927-10-01 23:30:28.0 3600000 0
Europe/Monaco 1928-10-06 23:30:28.0 3600000 0
Europe/Monaco 1929-10-05 23:30:28.0 3600000 0
Europe/Monaco 1930-10-04 23:30:28.0 3600000 0
Europe/Monaco 1931-10-03 23:30:28.0 3600000 0
Europe/Monaco 1932-10-01 23:30:28.0 3600000 0
Europe/Monaco 1933-10-07 23:30:28.0 3600000 0
Europe/Monaco 1934-10-06 23:30:28.0 3600000 0
Europe/Monaco 1935-10-05 23:30:28.0 3600000 0
Europe/Monaco 1936-10-03 23:30:28.0 3600000 0
Europe/Monaco 1937-10-02 23:30:28.0 3600000 0
Europe/Monaco 1938-10-01 23:30:28.0 3600000 0
Europe/Monaco 1939-11-18 23:30:28.0 3600000 0
Europe/Paris 1916-10-01 23:50:39.0 3600000 0
Europe/Paris 1917-10-07 23:50:39.0 3600000 0
Europe/Paris 1918-10-06 23:50:39.0 3600000 0
Europe/Paris 1919-10-05 23:50:39.0 3600000 0
Europe/Paris 1920-10-23 23:50:39.0 3600000 0
Europe/Paris 1921-10-25 23:50:39.0 3600000 0
Europe/Paris 1922-10-07 23:50:39.0 3600000 0
Europe/Paris 1923-10-06 23:50:39.0 3600000 0
Europe/Paris 1924-10-04 23:50:39.0 3600000 0
Europe/Paris 1925-10-03 23:50:39.0 3600000 0
Europe/Paris 1926-10-02 23:50:39.0 3600000 0
Europe/Paris 1927-10-01 23:50:39.0 3600000 0
Europe/Paris 1928-10-06 23:50:39.0 3600000 0
Europe/Paris 1929-10-05 23:50:39.0 3600000 0
Europe/Paris 1930-10-04 23:50:39.0 3600000 0
Europe/Paris 1931-10-03 23:50:39.0 3600000 0
Europe/Paris 1932-10-01 23:50:39.0 3600000 0
Europe/Paris 1933-10-07 23:50:39.0 3600000 0
Europe/Paris 1934-10-06 23:50:39.0 3600000 0
Europe/Paris 1935-10-05 23:50:39.0 3600000 0
Europe/Paris 1936-10-03 23:50:39.0 3600000 0
Europe/Paris 1937-10-02 23:50:39.0 3600000 0
Europe/Paris 1938-10-01 23:50:39.0 3600000 0
Europe/Paris 1939-11-18 23:50:39.0 3600000 0
Iceland 1939-10-29 01:28:00.0 0 -3600000
Iceland 1940-11-03 01:28:00.0 0 -3600000
Iceland 1941-11-02 01:28:00.0 0 -3600000
Iceland 1942-10-25 01:28:00.0 0 -3600000
Iceland 1943-10-24 01:28:00.0 0 -3600000
Iceland 1944-10-22 01:28:00.0 0 -3600000
Iceland 1945-10-28 01:28:00.0 0 -3600000
Iceland 1946-10-27 01:28:00.0 0 -3600000
Iceland 1947-10-26 01:28:00.0 0 -3600000
Iceland 1948-10-24 01:28:00.0 0 -3600000
Iceland 1949-10-30 01:28:00.0 0 -3600000
Iceland 1950-10-22 01:28:00.0 0 -3600000
Iceland 1951-10-28 01:28:00.0 0 -3600000
Iceland 1952-10-26 01:28:00.0 0 -3600000
Iceland 1953-10-25 01:28:00.0 0 -3600000
Iceland 1954-10-24 01:28:00.0 0 -3600000
Iceland 1955-10-23 01:28:00.0 0 -3600000
Iceland 1956-10-28 01:28:00.0 0 -3600000
Iceland 1957-10-27 01:28:00.0 0 -3600000
Iceland 1958-10-26 01:28:00.0 0 -3600000
Iceland 1959-10-25 01:28:00.0 0 -3600000
Iceland 1960-10-23 01:28:00.0 0 -3600000
Iceland 1961-10-22 01:28:00.0 0 -3600000
Iceland 1962-10-28 01:28:00.0 0 -3600000
Iceland 1963-10-27 01:28:00.0 0 -3600000
Iceland 1964-10-25 01:28:00.0 0 -3600000
Iceland 1965-10-24 01:28:00.0 0 -3600000
Iceland 1966-10-23 01:28:00.0 0 -3600000
Iceland 1967-10-29 01:28:00.0 0 -3600000
Indian/Kerguelen 1900-01-01 00:00:00.0 18000000 0
Kwajalein 1969-09-30 23:50:40.0 39600000 -43200000
Libya 1952-01-01 00:07:16.0 7200000 3600000
Libya 1954-01-01 00:07:16.0 7200000 3600000
Libya 1956-01-01 00:07:16.0 7200000 3600000
Libya 1982-01-01 00:07:16.0 7200000 3600000
Libya 1982-10-01 00:07:16.0 7200000 3600000
Libya 1983-10-01 00:07:16.0 7200000 3600000
Libya 1984-10-01 00:07:16.0 7200000 3600000
Libya 1985-10-01 00:07:16.0 7200000 3600000
Libya 1986-10-03 00:07:16.0 7200000 3600000
Libya 1987-10-01 00:07:16.0 7200000 3600000
Libya 1988-10-01 00:07:16.0 7200000 3600000
Libya 1989-10-01 00:07:16.0 7200000 3600000
Libya 1996-09-30 00:07:16.0 7200000 3600000
Pacific/Apia 1900-01-01 00:00:00.0 46800000 -41216000
Pacific/Enderbury 1900-01-01 00:00:00.0 46800000 -41060000
Pacific/Fakaofo 1900-01-01 00:00:00.0 46800000 -41096000
Pacific/Kiritimati 1900-01-01 00:00:00.0 50400000 -37760000
Pacific/Kwajalein 1969-09-30 23:50:40.0 39600000 -43200000
{code}

> Investigate the difference between JDK and Spark's time zone offset 
> calculation
> -------------------------------------------------------------------------------
>
>                 Key: SPARK-31449
>                 URL: https://issues.apache.org/jira/browse/SPARK-31449
>             Project: Spark
>          Issue Type: Improvement
>          Components: SQL
>    Affects Versions: 2.4.5
>            Reporter: Maxim Gekk
>            Priority: Major
>
> Spark 2.4 calculates time zone offsets from wall clock timestamp using 
> `DateTimeUtils.getOffsetFromLocalMillis()` (see 
> https://github.com/apache/spark/blob/branch-2.4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala#L1088-L1118):
> {code:scala}
>   private[sql] def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): 
> Long = {
>     var guess = tz.getRawOffset
>     // the actual offset should be calculated based on milliseconds in UTC
>     val offset = tz.getOffset(millisLocal - guess)
>     if (offset != guess) {
>       guess = tz.getOffset(millisLocal - offset)
>       if (guess != offset) {
>         // fallback to do the reverse lookup using java.sql.Timestamp
>         // this should only happen near the start or end of DST
>         val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt
>         val year = getYear(days)
>         val month = getMonth(days)
>         val day = getDayOfMonth(days)
>         var millisOfDay = (millisLocal % MILLIS_PER_DAY).toInt
>         if (millisOfDay < 0) {
>           millisOfDay += MILLIS_PER_DAY.toInt
>         }
>         val seconds = (millisOfDay / 1000L).toInt
>         val hh = seconds / 3600
>         val mm = seconds / 60 % 60
>         val ss = seconds % 60
>         val ms = millisOfDay % 1000
>         val calendar = Calendar.getInstance(tz)
>         calendar.set(year, month - 1, day, hh, mm, ss)
>         calendar.set(Calendar.MILLISECOND, ms)
>         guess = (millisLocal - calendar.getTimeInMillis()).toInt
>       }
>     }
>     guess
>   }
> {code}
> Meanwhile, JDK's GregorianCalendar uses special methods of ZoneInfo, see 
> https://github.com/AdoptOpenJDK/openjdk-jdk8u/blob/aa318070b27849f1fe00d14684b2a40f7b29bf79/jdk/src/share/classes/java/util/GregorianCalendar.java#L2795-L2801:
> {code:java}
>             if (zone instanceof ZoneInfo) {
>                 ((ZoneInfo)zone).getOffsetsByWall(millis, zoneOffsets);
>             } else {
>                 int gmtOffset = isFieldSet(fieldMask, ZONE_OFFSET) ?
>                                     internalGet(ZONE_OFFSET) : 
> zone.getRawOffset();
>                 zone.getOffsets(millis - gmtOffset, zoneOffsets);
>             }
> {code}
> Need to investigate are there any differences in results between 2 approaches.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to