cloud-fan commented on a change in pull request #27807: [SPARK-31076][SQL] Convert Catalyst's DATE/TIMESTAMP to Java Date/Timestamp via local date-time URL: https://github.com/apache/spark/pull/27807#discussion_r390276723
########## File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala ########## @@ -86,28 +95,50 @@ object DateTimeUtils { * Returns the number of days since epoch from java.sql.Date. */ def fromJavaDate(date: Date): SQLDate = { - microsToDays(millisToMicros(date.getTime)) + if (date.getTime < GREGORIAN_CUTOVER_MILLIS) { + val era = if (date.before(julianCommonEraStart)) 0 else 1 + val localDate = date.toLocalDate.`with`(ChronoField.ERA, era) + localDateToDays(localDate) + } else { + microsToDays(millisToMicros(date.getTime)) + } } /** * Returns a java.sql.Date from number of days since epoch. */ def toJavaDate(daysSinceEpoch: SQLDate): Date = { - new Date(microsToMillis(daysToMicros(daysSinceEpoch))) + if (daysSinceEpoch < GREGORIAN_CUTOVER_DAY) { + Date.valueOf(LocalDate.ofEpochDay(daysSinceEpoch)) + } else { + new Date(microsToMillis(daysToMicros(daysSinceEpoch))) + } } /** * Returns a java.sql.Timestamp from number of micros since epoch. */ def toJavaTimestamp(us: SQLTimestamp): Timestamp = { - Timestamp.from(microsToInstant(us)) + if (us < GREGORIAN_CUTOVER_MICROS) { + val ldt = microsToInstant(us).atZone(ZoneId.systemDefault()).toLocalDateTime + Timestamp.valueOf(ldt) + } else { + Timestamp.from(microsToInstant(us)) + } } /** * Returns the number of micros since epoch from java.sql.Timestamp. */ def fromJavaTimestamp(t: Timestamp): SQLTimestamp = { - instantToMicros(t.toInstant) + if (t.getTime < GREGORIAN_CUTOVER_MILLIS) { + val era = if (t.before(julianCommonEraStart)) 0 else 1 + val localDateTime = t.toLocalDateTime.`with`(ChronoField.ERA, era) + val instant = ZonedDateTime.of(localDateTime, ZoneId.systemDefault()).toInstant + instantToMicros(instant) + } else { + instantToMicros(t.toInstant) + } Review comment: Spark 3.0 is always Proleptic Gregorian calendar, this PR changes how Spark interacts with external datetime values (java `Date`/`Timestamp` for example). ``` scala> sql("select date '1100-10-10'").collect() res1: Array[org.apache.spark.sql.Row] = Array([1100-10-03]) ``` This is wrong whatever calendar we use. Users write `1100-10-10` and then see `1100-10-03` without doing any operation. The reason is, java `Timestamp` uses hybrid calendar, and Spark needs to adjust the datetime values before outputing them to a system with different calendar. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org