This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 60be6d2 [SPARK-27109][SQL] Refactoring of TimestampFormatter and DateFormatter 60be6d2 is described below commit 60be6d2ea3560143515c1ce9d0a7da416f8f595a Author: Maxim Gekk <max.g...@gmail.com> AuthorDate: Mon Mar 11 19:02:30 2019 -0500 [SPARK-27109][SQL] Refactoring of TimestampFormatter and DateFormatter ## What changes were proposed in this pull request? In PR, I propose to refactor the `parse()` method of `Iso8601DateFormatter`/`Iso8601DateFormatter` and `toInstantWithZoneId` of `toInstantWithZoneId` to achieve the following: - Avoid unnecessary conversion of parsed input to `java.time.Instant` before converting it to micros and days. Necessary information exists in `ZonedDateTime` already, and micros/days can be extracted from the former one. - Avoid additional extraction of LocalTime from parsed object, more precisely, double query of `TemporalQueries.localTime` from `temporalAccessor`. - Avoid additional extraction of zone id from parsed object, in particular, double query of `TemporalQueries.offset()`. - Using `ZoneOffset.UTC` instead of `ZoneId.of` in `DateFormatter`. This allows to avoid looking for zone offset by zone id. ## How was this patch tested? By existing test suite `DateTimeUtilsSuite`, `TimestampFormatterSuite` and `DateFormatterSuite`. Closes #24030 from MaxGekk/query-localtime. Authored-by: Maxim Gekk <max.g...@gmail.com> Signed-off-by: Sean Owen <sean.o...@databricks.com> --- .../spark/sql/catalyst/util/DateFormatter.scala | 18 +++++++--------- .../catalyst/util/DateTimeFormatterHelper.scala | 22 ++++++++++--------- .../sql/catalyst/util/TimestampFormatter.scala | 25 +++++++++++----------- .../apache/spark/sql/util/DateFormatterSuite.scala | 1 - 4 files changed, 33 insertions(+), 33 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala index 9535a36..20e043a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala @@ -17,10 +17,9 @@ package org.apache.spark.sql.catalyst.util -import java.time.{Instant, ZoneId} +import java.time.{Instant, ZoneOffset} import java.util.Locale - -import org.apache.spark.sql.catalyst.util.DateTimeUtils.instantToDays +import java.util.concurrent.TimeUnit.SECONDS sealed trait DateFormatter extends Serializable { def parse(s: String): Int // returns days since epoch @@ -33,18 +32,17 @@ class Iso8601DateFormatter( @transient private lazy val formatter = getOrCreateFormatter(pattern, locale) - private val UTC = ZoneId.of("UTC") - private def toInstant(s: String): Instant = { - val temporalAccessor = formatter.parse(s) - toInstantWithZoneId(temporalAccessor, UTC) + override def parse(s: String): Int = { + val parsed = formatter.parse(s) + val zonedDateTime = toZonedDateTime(parsed, ZoneOffset.UTC) + val seconds = zonedDateTime.toEpochSecond + SECONDS.toDays(seconds).toInt } - override def parse(s: String): Int = instantToDays(toInstant(s)) - override def format(days: Int): String = { val instant = Instant.ofEpochSecond(days * DateTimeUtils.SECONDS_PER_DAY) - formatter.withZone(UTC).format(instant) + formatter.withZone(ZoneOffset.UTC).format(instant) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala index a0c0db3..a7b6309 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala @@ -28,16 +28,18 @@ import com.google.common.cache.CacheBuilder import org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper._ trait DateTimeFormatterHelper { - protected def toInstantWithZoneId(temporalAccessor: TemporalAccessor, zoneId: ZoneId): Instant = { - val localTime = if (temporalAccessor.query(TemporalQueries.localTime) == null) { - LocalTime.ofNanoOfDay(0) - } else { - LocalTime.from(temporalAccessor) - } - val localDate = LocalDate.from(temporalAccessor) - val localDateTime = LocalDateTime.of(localDate, localTime) - val zonedDateTime = ZonedDateTime.of(localDateTime, zoneId) - Instant.from(zonedDateTime) + // Converts the parsed temporal object to ZonedDateTime. It sets time components to zeros + // if they does not exist in the parsed object. + protected def toZonedDateTime( + temporalAccessor: TemporalAccessor, + zoneId: ZoneId): ZonedDateTime = { + // Parsed input might not have time related part. In that case, time component is set to zeros. + val parsedLocalTime = temporalAccessor.query(TemporalQueries.localTime) + val localTime = if (parsedLocalTime == null) LocalTime.MIDNIGHT else parsedLocalTime + // Parsed input must have date component. At least, year must present in temporalAccessor. + val localDate = temporalAccessor.query(TemporalQueries.localDate) + + ZonedDateTime.of(localDate, localTime, zoneId) } // Gets a formatter from the cache or creates new one. The buildFormatter method can be called diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index e559b6a..c079691 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -20,10 +20,10 @@ package org.apache.spark.sql.catalyst.util import java.text.ParseException import java.time._ import java.time.format.DateTimeParseException +import java.time.temporal.ChronoField.MICRO_OF_SECOND import java.time.temporal.TemporalQueries import java.util.{Locale, TimeZone} - -import org.apache.spark.sql.catalyst.util.DateTimeUtils.instantToMicros +import java.util.concurrent.TimeUnit.SECONDS sealed trait TimestampFormatter extends Serializable { /** @@ -48,21 +48,22 @@ class Iso8601TimestampFormatter( locale: Locale) extends TimestampFormatter with DateTimeFormatterHelper { @transient protected lazy val formatter = getOrCreateFormatter(pattern, locale) + private val timeZoneId = timeZone.toZoneId - private def toInstant(s: String): Instant = { - val temporalAccessor = formatter.parse(s) - if (temporalAccessor.query(TemporalQueries.offset()) == null) { - toInstantWithZoneId(temporalAccessor, timeZone.toZoneId) - } else { - Instant.from(temporalAccessor) - } - } + override def parse(s: String): Long = { + val parsed = formatter.parse(s) + val parsedZoneId = parsed.query(TemporalQueries.zone()) + val zoneId = if (parsedZoneId == null) timeZoneId else parsedZoneId + val zonedDateTime = toZonedDateTime(parsed, zoneId) + val epochSeconds = zonedDateTime.toEpochSecond + val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND) - override def parse(s: String): Long = instantToMicros(toInstant(s)) + Math.addExact(SECONDS.toMicros(epochSeconds), microsOfSecond) + } override def format(us: Long): String = { val instant = DateTimeUtils.microsToInstant(us) - formatter.withZone(timeZone.toZoneId).format(instant) + formatter.withZone(timeZoneId).format(instant) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala index 4d5872c..602542f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql.util import java.time.LocalDate -import java.util.Locale import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.plans.SQLHelper --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org