This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 1882912 [SPARK-27199][SQL] Replace TimeZone by ZoneId in TimestampFormatter API 1882912 is described below commit 1882912cca4921d3d8c8632b3bb34e69e8119791 Author: Maxim Gekk <max.g...@gmail.com> AuthorDate: Wed Mar 20 21:28:11 2019 +0900 [SPARK-27199][SQL] Replace TimeZone by ZoneId in TimestampFormatter API ## What changes were proposed in this pull request? In the PR, I propose to use `ZoneId` instead of `TimeZone` in: - the `apply` and `getFractionFormatter ` methods of the `TimestampFormatter` object, - and in implementations of the `TimestampFormatter` trait like `FractionTimestampFormatter`. The reason of the changes is to avoid unnecessary conversion from `TimeZone` to `ZoneId` because `ZoneId` is used in `TimestampFormatter` implementations internally, and the conversion is performed via `String` which is not for free. Also taking into account that `TimeZone` instances are converted from `String` in some cases, the worse case looks like `String` -> `TimeZone` -> `String` -> `ZoneId`. The PR eliminates the unneeded conversions. ## How was this patch tested? It was tested by `DateExpressionsSuite`, `DateTimeUtilsSuite` and `TimestampFormatterSuite`. Closes #24141 from MaxGekk/zone-id. Authored-by: Maxim Gekk <max.g...@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../spark/sql/catalyst/catalog/interface.scala | 3 +- .../spark/sql/catalyst/csv/CSVInferSchema.scala | 2 +- .../apache/spark/sql/catalyst/csv/CSVOptions.scala | 5 +-- .../sql/catalyst/csv/UnivocityGenerator.scala | 2 +- .../spark/sql/catalyst/csv/UnivocityParser.scala | 2 +- .../spark/sql/catalyst/expressions/Cast.scala | 2 +- .../catalyst/expressions/datetimeExpressions.scala | 21 ++++++------ .../spark/sql/catalyst/json/JSONOptions.scala | 5 +-- .../spark/sql/catalyst/json/JacksonGenerator.scala | 2 +- .../spark/sql/catalyst/json/JacksonParser.scala | 2 +- .../spark/sql/catalyst/json/JsonInferSchema.scala | 2 +- .../spark/sql/catalyst/util/DateTimeUtils.scala | 4 +-- .../sql/catalyst/util/TimestampFormatter.scala | 31 +++++++++--------- .../sql/catalyst/csv/UnivocityParserSuite.scala | 8 +++-- .../expressions/DateExpressionsSuite.scala | 3 +- .../sql/catalyst/util/DateTimeTestUtils.scala | 3 +- .../sql/catalyst/util/DateTimeUtilsSuite.scala | 2 +- .../spark/sql/util/TimestampFormatterSuite.scala | 31 ++++++++---------- .../apache/spark/sql/execution/HiveResult.scala | 2 +- .../execution/datasources/PartitioningUtils.scala | 37 +++++++++++----------- .../execution/datasources/jdbc/JDBCRelation.scala | 2 +- .../parquet/ParquetPartitionDiscoverySuite.scala | 21 ++++++------ 22 files changed, 99 insertions(+), 93 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index 69b5cb4..6006637 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.catalyst.catalog import java.net.URI +import java.time.ZoneOffset import java.util.Date import scala.collection.mutable @@ -477,7 +478,7 @@ object CatalogColumnStat extends Logging { val VERSION = 2 private def getTimestampFormatter(): TimestampFormatter = { - TimestampFormatter(format = "yyyy-MM-dd HH:mm:ss.SSSSSS", timeZone = DateTimeUtils.TimeZoneUTC) + TimestampFormatter(format = "yyyy-MM-dd HH:mm:ss.SSSSSS", zoneId = ZoneOffset.UTC) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala index 4dd4104..ae9f057 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala @@ -29,7 +29,7 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable { private val timestampParser = TimestampFormatter( options.timestampFormat, - options.timeZone, + options.zoneId, options.locale) private val decimalParser = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala index 90c96d1..1268fcf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala @@ -18,7 +18,8 @@ package org.apache.spark.sql.catalyst.csv import java.nio.charset.StandardCharsets -import java.util.{Locale, TimeZone} +import java.time.ZoneId +import java.util.Locale import com.univocity.parsers.csv.{CsvParserSettings, CsvWriterSettings, UnescapedQuoteHandling} @@ -139,7 +140,7 @@ class CSVOptions( name.map(CompressionCodecs.getCodecClassName) } - val timeZone: TimeZone = DateTimeUtils.getTimeZone( + val zoneId: ZoneId = DateTimeUtils.getZoneId( parameters.getOrElse(DateTimeUtils.TIMEZONE_OPTION, defaultTimeZoneId)) // A language tag in IETF BCP 47 format diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala index f012d96..9ca9450 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala @@ -43,7 +43,7 @@ class UnivocityGenerator( private val timestampFormatter = TimestampFormatter( options.timestampFormat, - options.timeZone, + options.zoneId, options.locale) private val dateFormatter = DateFormatter(options.dateFormat, options.locale) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala index 79dff6f..b26044e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala @@ -76,7 +76,7 @@ class UnivocityParser( private val timestampFormatter = TimestampFormatter( options.timestampFormat, - options.timeZone, + options.zoneId, options.locale) private val dateFormatter = DateFormatter(options.dateFormat, options.locale) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index c238ccb..a70ed6d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -233,7 +233,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String @inline private[this] def buildCast[T](a: Any, func: T => Any): Any = func(a.asInstanceOf[T]) private lazy val dateFormatter = DateFormatter() - private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(timeZone) + private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId) // UDFToString private[this] def castToString(from: DataType): Any => Any = from match { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 7aa1e70..7878a87 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.Timestamp -import java.time.{Instant, LocalDate} +import java.time.{Instant, LocalDate, ZoneId} import java.time.temporal.IsoFields import java.util.{Locale, TimeZone} @@ -49,6 +49,7 @@ trait TimeZoneAwareExpression extends Expression { def withTimeZone(timeZoneId: String): TimeZoneAwareExpression @transient lazy val timeZone: TimeZone = DateTimeUtils.getTimeZone(timeZoneId.get) + @transient lazy val zoneId: ZoneId = DateTimeUtils.getZoneId(timeZoneId.get) } /** @@ -532,16 +533,16 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti copy(timeZoneId = Option(timeZoneId)) override protected def nullSafeEval(timestamp: Any, format: Any): Any = { - val df = TimestampFormatter(format.toString, timeZone) + val df = TimestampFormatter(format.toString, zoneId) UTF8String.fromString(df.format(timestamp.asInstanceOf[Long])) } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val tf = TimestampFormatter.getClass.getName.stripSuffix("$") - val tz = ctx.addReferenceObj("timeZone", timeZone) + val zid = ctx.addReferenceObj("zoneId", zoneId, "java.time.ZoneId") val locale = ctx.addReferenceObj("locale", Locale.US) defineCodeGen(ctx, ev, (timestamp, format) => { - s"""UTF8String.fromString($tf$$.MODULE$$.apply($format.toString(), $tz, $locale) + s"""UTF8String.fromString($tf$$.MODULE$$.apply($format.toString(), $zid, $locale) .format($timestamp))""" }) } @@ -635,7 +636,7 @@ abstract class UnixTime private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String] private lazy val formatter: TimestampFormatter = try { - TimestampFormatter(constFormat.toString, timeZone) + TimestampFormatter(constFormat.toString, zoneId) } catch { case NonFatal(_) => null } @@ -668,7 +669,7 @@ abstract class UnixTime } else { val formatString = f.asInstanceOf[UTF8String].toString try { - TimestampFormatter(formatString, timeZone).parse( + TimestampFormatter(formatString, zoneId).parse( t.asInstanceOf[UTF8String].toString) / MICROS_PER_SECOND } catch { case NonFatal(_) => null @@ -707,7 +708,7 @@ abstract class UnixTime }""") } case StringType => - val tz = ctx.addReferenceObj("timeZone", timeZone) + val tz = ctx.addReferenceObj("zoneId", zoneId) val locale = ctx.addReferenceObj("locale", Locale.US) val tf = TimestampFormatter.getClass.getName.stripSuffix("$") nullSafeCodeGen(ctx, ev, (string, format) => { @@ -789,7 +790,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[ private lazy val constFormat: UTF8String = right.eval().asInstanceOf[UTF8String] private lazy val formatter: TimestampFormatter = try { - TimestampFormatter(constFormat.toString, timeZone) + TimestampFormatter(constFormat.toString, zoneId) } catch { case NonFatal(_) => null } @@ -815,7 +816,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[ null } else { try { - UTF8String.fromString(TimestampFormatter(f.toString, timeZone) + UTF8String.fromString(TimestampFormatter(f.toString, zoneId) .format(time.asInstanceOf[Long] * MICROS_PER_SECOND)) } catch { case NonFatal(_) => null @@ -846,7 +847,7 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[ }""") } } else { - val tz = ctx.addReferenceObj("timeZone", timeZone) + val tz = ctx.addReferenceObj("zoneId", zoneId) val locale = ctx.addReferenceObj("locale", Locale.US) val tf = TimestampFormatter.getClass.getName.stripSuffix("$") nullSafeCodeGen(ctx, ev, (seconds, f) => { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala index 1ec9d50..788eb00 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala @@ -18,7 +18,8 @@ package org.apache.spark.sql.catalyst.json import java.nio.charset.{Charset, StandardCharsets} -import java.util.{Locale, TimeZone} +import java.time.ZoneId +import java.util.Locale import com.fasterxml.jackson.core.{JsonFactory, JsonParser} @@ -78,7 +79,7 @@ private[sql] class JSONOptions( // A language tag in IETF BCP 47 format val locale: Locale = parameters.get("locale").map(Locale.forLanguageTag).getOrElse(Locale.US) - val timeZone: TimeZone = DateTimeUtils.getTimeZone( + val zoneId: ZoneId = DateTimeUtils.getZoneId( parameters.getOrElse(DateTimeUtils.TIMEZONE_OPTION, defaultTimeZoneId)) val dateFormat: String = parameters.getOrElse("dateFormat", "yyyy-MM-dd") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala index 951f519..3378040 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala @@ -79,7 +79,7 @@ private[sql] class JacksonGenerator( private val timestampFormatter = TimestampFormatter( options.timestampFormat, - options.timeZone, + options.zoneId, options.locale) private val dateFormatter = DateFormatter(options.dateFormat, options.locale) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala index 8cf758e..19bc5bf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala @@ -57,7 +57,7 @@ class JacksonParser( private val timestampFormatter = TimestampFormatter( options.timestampFormat, - options.timeZone, + options.zoneId, options.locale) private val dateFormatter = DateFormatter(options.dateFormat, options.locale) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala index 1fb4594..c5a97c7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala @@ -39,7 +39,7 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable { private val timestampFormatter = TimestampFormatter( options.timestampFormat, - options.timeZone, + options.zoneId, options.locale) /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 627ee14..45d2406 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -70,9 +70,9 @@ object DateTimeUtils { def defaultTimeZone(): TimeZone = TimeZone.getDefault() + def getZoneId(timeZoneId: String): ZoneId = ZoneId.of(timeZoneId, ZoneId.SHORT_IDS) def getTimeZone(timeZoneId: String): TimeZone = { - val zoneId = ZoneId.of(timeZoneId, ZoneId.SHORT_IDS) - TimeZone.getTimeZone(zoneId) + TimeZone.getTimeZone(getZoneId(timeZoneId)) } // we should use the exact day as Int, for example, (year, month, day) -> day diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index c079691..f2a1a95 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -44,17 +44,16 @@ sealed trait TimestampFormatter extends Serializable { class Iso8601TimestampFormatter( pattern: String, - timeZone: TimeZone, + zoneId: ZoneId, locale: Locale) extends TimestampFormatter with DateTimeFormatterHelper { @transient protected lazy val formatter = getOrCreateFormatter(pattern, locale) - private val timeZoneId = timeZone.toZoneId override def parse(s: String): Long = { val parsed = formatter.parse(s) val parsedZoneId = parsed.query(TemporalQueries.zone()) - val zoneId = if (parsedZoneId == null) timeZoneId else parsedZoneId - val zonedDateTime = toZonedDateTime(parsed, zoneId) + val timeZoneId = if (parsedZoneId == null) zoneId else parsedZoneId + val zonedDateTime = toZonedDateTime(parsed, timeZoneId) val epochSeconds = zonedDateTime.toEpochSecond val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND) @@ -63,7 +62,7 @@ class Iso8601TimestampFormatter( override def format(us: Long): String = { val instant = DateTimeUtils.microsToInstant(us) - formatter.withZone(timeZoneId).format(instant) + formatter.withZone(zoneId).format(instant) } } @@ -73,10 +72,10 @@ class Iso8601TimestampFormatter( * output trailing zeros in the fraction. For example, the timestamp `2019-03-05 15:00:01.123400` is * formatted as the string `2019-03-05 15:00:01.1234`. * - * @param timeZone the time zone in which the formatter parses or format timestamps + * @param zoneId the time zone identifier in which the formatter parses or format timestamps */ -class FractionTimestampFormatter(timeZone: TimeZone) - extends Iso8601TimestampFormatter("", timeZone, TimestampFormatter.defaultLocale) { +class FractionTimestampFormatter(zoneId: ZoneId) + extends Iso8601TimestampFormatter("", zoneId, TimestampFormatter.defaultLocale) { @transient override protected lazy val formatter = DateTimeFormatterHelper.fractionFormatter @@ -86,19 +85,19 @@ object TimestampFormatter { val defaultPattern: String = "yyyy-MM-dd HH:mm:ss" val defaultLocale: Locale = Locale.US - def apply(format: String, timeZone: TimeZone, locale: Locale): TimestampFormatter = { - new Iso8601TimestampFormatter(format, timeZone, locale) + def apply(format: String, zoneId: ZoneId, locale: Locale): TimestampFormatter = { + new Iso8601TimestampFormatter(format, zoneId, locale) } - def apply(format: String, timeZone: TimeZone): TimestampFormatter = { - apply(format, timeZone, defaultLocale) + def apply(format: String, zoneId: ZoneId): TimestampFormatter = { + apply(format, zoneId, defaultLocale) } - def apply(timeZone: TimeZone): TimestampFormatter = { - apply(defaultPattern, timeZone, defaultLocale) + def apply(zoneId: ZoneId): TimestampFormatter = { + apply(defaultPattern, zoneId, defaultLocale) } - def getFractionFormatter(timeZone: TimeZone): TimestampFormatter = { - new FractionTimestampFormatter(timeZone) + def getFractionFormatter(zoneId: ZoneId): TimestampFormatter = { + new FractionTimestampFormatter(zoneId) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala index 4ae61bc..986de12 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala @@ -116,7 +116,9 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper { parser = new UnivocityParser(StructType(Seq.empty), timestampsOptions) val customTimestamp = "31/01/2015 00:00" var format = FastDateFormat.getInstance( - timestampsOptions.timestampFormat, timestampsOptions.timeZone, timestampsOptions.locale) + timestampsOptions.timestampFormat, + TimeZone.getTimeZone(timestampsOptions.zoneId), + timestampsOptions.locale) val expectedTime = format.parse(customTimestamp).getTime val castedTimestamp = parser.makeConverter("_1", TimestampType, nullable = true) .apply(customTimestamp) @@ -126,7 +128,9 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper { val dateOptions = new CSVOptions(Map("dateFormat" -> "dd/MM/yyyy"), false, "GMT") parser = new UnivocityParser(StructType(Seq.empty), dateOptions) format = FastDateFormat.getInstance( - dateOptions.dateFormat, dateOptions.timeZone, dateOptions.locale) + dateOptions.dateFormat, + TimeZone.getTimeZone(dateOptions.zoneId), + dateOptions.locale) val expectedDate = format.parse(customDate).getTime val castedDate = parser.makeConverter("_1", DateType, nullable = true) .apply(customDate) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 62d194f..61ee8f0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} import java.text.SimpleDateFormat +import java.time.ZoneOffset import java.util.{Calendar, Locale, TimeZone} import java.util.concurrent.TimeUnit import java.util.concurrent.TimeUnit._ @@ -43,7 +44,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val jstId = Option(TimeZoneJST.getID) def toMillis(timestamp: String): Long = { - val tf = TimestampFormatter("yyyy-MM-dd HH:mm:ss", TimeZoneGMT) + val tf = TimestampFormatter("yyyy-MM-dd HH:mm:ss", ZoneOffset.UTC) TimeUnit.MICROSECONDS.toMillis(tf.parse(timestamp)) } val date = "2015-04-08 13:10:15" diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala index c35ab2b..4dfeb85 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.util -import java.time.{LocalDate, LocalDateTime, LocalTime} +import java.time.{LocalDate, LocalDateTime, LocalTime, ZoneId} import java.util.TimeZone import java.util.concurrent.TimeUnit @@ -40,6 +40,7 @@ object DateTimeTestUtils { "Asia/Hong_Kong", "Europe/Amsterdam") val outstandingTimezones: Seq[TimeZone] = outstandingTimezonesIds.map(TimeZone.getTimeZone) + val outstandingZoneIds: Seq[ZoneId] = outstandingTimezonesIds.map(DateTimeUtils.getZoneId) def withDefaultTimeZone[T](newDefaultTimeZone: TimeZone)(block: => T): T = { val originalDefaultTimeZone = TimeZone.getDefault diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 39eb7d1..464d0ab 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -33,7 +33,7 @@ class DateTimeUtilsSuite extends SparkFunSuite { private def defaultTz = DateTimeUtils.defaultTimeZone() test("nanoseconds truncation") { - val tf = TimestampFormatter.getFractionFormatter(DateTimeUtils.defaultTimeZone()) + val tf = TimestampFormatter.getFractionFormatter(DateTimeUtils.defaultTimeZone.toZoneId) def checkStringToTimestamp(originalTime: String, expectedParsedTime: String) { val parsedTimestampOp = DateTimeUtils.stringToTimestamp( UTF8String.fromString(originalTime), defaultTz) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala index 1675b61..d10c30c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala @@ -17,13 +17,12 @@ package org.apache.spark.sql.util -import java.time.{LocalDateTime, ZoneOffset} -import java.util.TimeZone +import java.time.{LocalDateTime, ZoneId, ZoneOffset} import java.util.concurrent.TimeUnit import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.plans.SQLHelper -import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, TimestampFormatter} +import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils, TimestampFormatter} class TimestampFormatterSuite extends SparkFunSuite with SQLHelper { @@ -38,12 +37,12 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper { "Antarctica/Vostok" -> 1543723872001234L, "Asia/Hong_Kong" -> 1543716672001234L, "Europe/Amsterdam" -> 1543741872001234L) - DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone => + DateTimeTestUtils.outstandingTimezonesIds.foreach { zoneId => val formatter = TimestampFormatter( "yyyy-MM-dd'T'HH:mm:ss.SSSSSS", - TimeZone.getTimeZone(timeZone)) + DateTimeUtils.getZoneId(zoneId)) val microsSinceEpoch = formatter.parse(localDate) - assert(microsSinceEpoch === expectedMicros(timeZone)) + assert(microsSinceEpoch === expectedMicros(zoneId)) } } @@ -58,12 +57,12 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper { "Antarctica/Vostok" -> "2018-12-02T16:11:12.001234", "Asia/Hong_Kong" -> "2018-12-02T18:11:12.001234", "Europe/Amsterdam" -> "2018-12-02T11:11:12.001234") - DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone => + DateTimeTestUtils.outstandingTimezonesIds.foreach { zoneId => val formatter = TimestampFormatter( "yyyy-MM-dd'T'HH:mm:ss.SSSSSS", - TimeZone.getTimeZone(timeZone)) + DateTimeUtils.getZoneId(zoneId)) val timestamp = formatter.format(microsSinceEpoch) - assert(timestamp === expectedTimestamp(timeZone)) + assert(timestamp === expectedTimestamp(zoneId)) } } @@ -79,8 +78,8 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper { 1543749753123456L, 2177456523456789L, 11858049903010203L).foreach { micros => - DateTimeTestUtils.outstandingTimezones.foreach { timeZone => - val formatter = TimestampFormatter(pattern, timeZone) + DateTimeTestUtils.outstandingZoneIds.foreach { zoneId => + val formatter = TimestampFormatter(pattern, zoneId) val timestamp = formatter.format(micros) val parsed = formatter.parse(timestamp) assert(micros === parsed) @@ -100,8 +99,8 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper { "2018-12-02T11:22:33.123456", "2039-01-01T01:02:03.456789", "2345-10-07T22:45:03.010203").foreach { timestamp => - DateTimeTestUtils.outstandingTimezones.foreach { timeZone => - val formatter = TimestampFormatter("yyyy-MM-dd'T'HH:mm:ss.SSSSSS", timeZone) + DateTimeTestUtils.outstandingZoneIds.foreach { zoneId => + val formatter = TimestampFormatter("yyyy-MM-dd'T'HH:mm:ss.SSSSSS", zoneId) val micros = formatter.parse(timestamp) val formatted = formatter.format(micros) assert(timestamp === formatted) @@ -110,16 +109,14 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper { } test(" case insensitive parsing of am and pm") { - val formatter = TimestampFormatter( - "yyyy MMM dd hh:mm:ss a", - TimeZone.getTimeZone("UTC")) + val formatter = TimestampFormatter("yyyy MMM dd hh:mm:ss a", ZoneOffset.UTC) val micros = formatter.parse("2009 Mar 20 11:30:01 am") assert(micros === TimeUnit.SECONDS.toMicros( LocalDateTime.of(2009, 3, 20, 11, 30, 1).toEpochSecond(ZoneOffset.UTC))) } test("format fraction of second") { - val formatter = TimestampFormatter.getFractionFormatter(TimeZone.getTimeZone("UTC")) + val formatter = TimestampFormatter.getFractionFormatter(ZoneOffset.UTC) assert(formatter.format(0) === "1970-01-01 00:00:00") assert(formatter.format(1) === "1970-01-01 00:00:00.000001") assert(formatter.format(1000) === "1970-01-01 00:00:00.001") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala index 38ef72e..eec8d70 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala @@ -79,7 +79,7 @@ object HiveResult { private lazy val dateFormatter = DateFormatter() private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter( - DateTimeUtils.getTimeZone(SQLConf.get.sessionLocalTimeZone)) + DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone)) /** Hive outputs fields of structs slightly differently than top level attributes. */ private def toHiveStructString(a: (Any, DataType)): String = a match { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala index 0625cfb..6f42423 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala @@ -19,7 +19,8 @@ package org.apache.spark.sql.execution.datasources import java.lang.{Double => JDouble, Long => JLong} import java.math.{BigDecimal => JBigDecimal} -import java.util.{Locale, TimeZone} +import java.time.ZoneId +import java.util.Locale import scala.collection.mutable import scala.collection.mutable.ArrayBuffer @@ -100,7 +101,7 @@ object PartitioningUtils { validatePartitionColumns: Boolean, timeZoneId: String): PartitionSpec = { parsePartitions(paths, typeInference, basePaths, userSpecifiedSchema, caseSensitive, - validatePartitionColumns, DateTimeUtils.getTimeZone(timeZoneId)) + validatePartitionColumns, DateTimeUtils.getZoneId(timeZoneId)) } private[datasources] def parsePartitions( @@ -110,7 +111,7 @@ object PartitioningUtils { userSpecifiedSchema: Option[StructType], caseSensitive: Boolean, validatePartitionColumns: Boolean, - timeZone: TimeZone): PartitionSpec = { + zoneId: ZoneId): PartitionSpec = { val userSpecifiedDataTypes = if (userSpecifiedSchema.isDefined) { val nameToDataType = userSpecifiedSchema.get.fields.map(f => f.name -> f.dataType).toMap if (!caseSensitive) { @@ -130,11 +131,11 @@ object PartitioningUtils { } val dateFormatter = DateFormatter() - val timestampFormatter = TimestampFormatter(timestampPartitionPattern, timeZone) + val timestampFormatter = TimestampFormatter(timestampPartitionPattern, zoneId) // First, we need to parse every partition's path and see if we can find partition values. val (partitionValues, optDiscoveredBasePaths) = paths.map { path => parsePartition(path, typeInference, basePaths, userSpecifiedDataTypes, - validatePartitionColumns, timeZone, dateFormatter, timestampFormatter) + validatePartitionColumns, zoneId, dateFormatter, timestampFormatter) }.unzip // We create pairs of (path -> path's partition value) here @@ -169,7 +170,7 @@ object PartitioningUtils { "please load them separately and then union them.") val resolvedPartitionValues = - resolvePartitions(pathsWithPartitionValues, caseSensitive, timeZone) + resolvePartitions(pathsWithPartitionValues, caseSensitive, zoneId) // Creates the StructType which represents the partition columns. val fields = { @@ -219,7 +220,7 @@ object PartitioningUtils { basePaths: Set[Path], userSpecifiedDataTypes: Map[String, DataType], validatePartitionColumns: Boolean, - timeZone: TimeZone, + zoneId: ZoneId, dateFormatter: DateFormatter, timestampFormatter: TimestampFormatter): (Option[PartitionValues], Option[Path]) = { val columns = ArrayBuffer.empty[(String, Literal)] @@ -243,7 +244,7 @@ object PartitioningUtils { // Once we get the string, we try to parse it and find the partition column and value. val maybeColumn = parsePartitionColumn(currentPath.getName, typeInference, userSpecifiedDataTypes, - validatePartitionColumns, timeZone, dateFormatter, timestampFormatter) + validatePartitionColumns, zoneId, dateFormatter, timestampFormatter) maybeColumn.foreach(columns += _) // Now, we determine if we should stop. @@ -278,7 +279,7 @@ object PartitioningUtils { typeInference: Boolean, userSpecifiedDataTypes: Map[String, DataType], validatePartitionColumns: Boolean, - timeZone: TimeZone, + zoneId: ZoneId, dateFormatter: DateFormatter, timestampFormatter: TimestampFormatter): Option[(String, Literal)] = { val equalSignIndex = columnSpec.indexOf('=') @@ -298,11 +299,11 @@ object PartitioningUtils { val columnValueLiteral = inferPartitionColumnValue( rawColumnValue, false, - timeZone, + zoneId, dateFormatter, timestampFormatter) val columnValue = columnValueLiteral.eval() - val castedValue = Cast(columnValueLiteral, dataType, Option(timeZone.getID)).eval() + val castedValue = Cast(columnValueLiteral, dataType, Option(zoneId.getId)).eval() if (validatePartitionColumns && columnValue != null && castedValue == null) { throw new RuntimeException(s"Failed to cast value `$columnValue` to `$dataType` " + s"for partition column `$columnName`") @@ -312,7 +313,7 @@ object PartitioningUtils { inferPartitionColumnValue( rawColumnValue, typeInference, - timeZone, + zoneId, dateFormatter, timestampFormatter) } @@ -384,7 +385,7 @@ object PartitioningUtils { def resolvePartitions( pathsWithPartitionValues: Seq[(Path, PartitionValues)], caseSensitive: Boolean, - timeZone: TimeZone): Seq[PartitionValues] = { + zoneId: ZoneId): Seq[PartitionValues] = { if (pathsWithPartitionValues.isEmpty) { Seq.empty } else { @@ -401,7 +402,7 @@ object PartitioningUtils { val values = pathsWithPartitionValues.map(_._2) val columnCount = values.head.columnNames.size val resolvedValues = (0 until columnCount).map { i => - resolveTypeConflicts(values.map(_.literals(i)), timeZone) + resolveTypeConflicts(values.map(_.literals(i)), zoneId) } // Fills resolved literals back to each partition @@ -467,7 +468,7 @@ object PartitioningUtils { private[datasources] def inferPartitionColumnValue( raw: String, typeInference: Boolean, - timeZone: TimeZone, + zoneId: ZoneId, dateFormatter: DateFormatter, timestampFormatter: TimestampFormatter): Literal = { val decimalTry = Try { @@ -503,7 +504,7 @@ object PartitioningUtils { // TimestampType timestampFormatter.parse(unescapedRaw) // SPARK-23436: see comment for date - val timestampValue = Cast(Literal(unescapedRaw), TimestampType, Some(timeZone.getID)).eval() + val timestampValue = Cast(Literal(unescapedRaw), TimestampType, Some(zoneId.getId)).eval() // Disallow TimestampType if the cast returned null require(timestampValue != null) Literal.create(timestampValue, TimestampType) @@ -607,12 +608,12 @@ object PartitioningUtils { * Given a collection of [[Literal]]s, resolves possible type conflicts by * [[findWiderTypeForPartitionColumn]]. */ - private def resolveTypeConflicts(literals: Seq[Literal], timeZone: TimeZone): Seq[Literal] = { + private def resolveTypeConflicts(literals: Seq[Literal], zoneId: ZoneId): Seq[Literal] = { val litTypes = literals.map(_.dataType) val desiredType = litTypes.reduce(findWiderTypeForPartitionColumn) literals.map { case l @ Literal(_, dataType) => - Literal.create(Cast(l, desiredType, Some(timeZone.getID)).eval(), desiredType) + Literal.create(Cast(l, desiredType, Some(zoneId.getId)).eval(), desiredType) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala index 724a0f3..fe45d67 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala @@ -200,7 +200,7 @@ private[sql] object JDBCRelation extends Logging { case DateType => DateFormatter().format(value.toInt) case TimestampType => val timestampFormatter = TimestampFormatter.getFractionFormatter( - DateTimeUtils.getTimeZone(timeZoneId)) + DateTimeUtils.getZoneId(timeZoneId)) DateTimeUtils.timestampToString(timestampFormatter, value) } s"'$dateTimeStr'" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala index 864c1e9..febbe05 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources.parquet import java.io.File import java.math.BigInteger import java.sql.{Date, Timestamp} +import java.time.{ZoneId, ZoneOffset} import java.util.{Calendar, Locale, TimeZone} import scala.collection.mutable.ArrayBuffer @@ -36,7 +37,6 @@ import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, Timesta import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.execution.datasources.{PartitionPath => Partition} import org.apache.spark.sql.execution.streaming.MemoryStream -import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ @@ -54,10 +54,9 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha val defaultPartitionName = ExternalCatalogUtils.DEFAULT_PARTITION_NAME - val timeZone = TimeZone.getDefault() - val timeZoneId = timeZone.getID + val timeZoneId = ZoneId.systemDefault() val df = DateFormatter() - val tf = TimestampFormatter(timestampPartitionPattern, timeZone) + val tf = TimestampFormatter(timestampPartitionPattern, timeZoneId) protected override def beforeAll(): Unit = { super.beforeAll() @@ -70,8 +69,8 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha } test("column type inference") { - def check(raw: String, literal: Literal, timeZone: TimeZone = timeZone): Unit = { - assert(inferPartitionColumnValue(raw, true, timeZone, df, tf) === literal) + def check(raw: String, literal: Literal, zoneId: ZoneId = timeZoneId): Unit = { + assert(inferPartitionColumnValue(raw, true, zoneId, df, tf) === literal) } check("10", Literal.create(10, IntegerType)) @@ -90,7 +89,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha c.set(Calendar.MILLISECOND, 0) check("1990-02-24 12:00:30", Literal.create(new Timestamp(c.getTimeInMillis), TimestampType), - TimeZone.getTimeZone("GMT")) + ZoneOffset.UTC) check(defaultPartitionName, Literal.create(null, NullType)) } @@ -199,13 +198,13 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha test("parse partition") { def check(path: String, expected: Option[PartitionValues]): Unit = { val actual = parsePartition(new Path(path), true, Set.empty[Path], - Map.empty, true, timeZone, df, tf)._1 + Map.empty, true, timeZoneId, df, tf)._1 assert(expected === actual) } def checkThrows[T <: Throwable: Manifest](path: String, expected: String): Unit = { val message = intercept[T] { - parsePartition(new Path(path), true, Set.empty[Path], Map.empty, true, timeZone, df, tf) + parsePartition(new Path(path), true, Set.empty[Path], Map.empty, true, timeZoneId, df, tf) }.getMessage assert(message.contains(expected)) @@ -251,7 +250,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha basePaths = Set(new Path("file://path/a=10")), Map.empty, true, - timeZone = timeZone, + zoneId = timeZoneId, df, tf)._1 @@ -264,7 +263,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest with Sha basePaths = Set(new Path("file://path")), Map.empty, true, - timeZone = timeZone, + zoneId = timeZoneId, df, tf)._1 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org