This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 2e90574 [SPARK-27414][SQL] make it clear that date type is timezone independent 2e90574 is described below commit 2e90574dd0e60ea960a33580dfb29654671b66f4 Author: Wenchen Fan <wenc...@databricks.com> AuthorDate: Wed Apr 10 16:39:28 2019 +0800 [SPARK-27414][SQL] make it clear that date type is timezone independent ## What changes were proposed in this pull request? In SQL standard, date type is a union of the `year`, `month` and `day` fields. It's timezone independent, which means it does not represent a specific point in the timeline. Spark SQL follows the SQL standard, this PR is to make it clear that date type is timezone independent 1. improve the doc to highlight that date is timezone independent. 2. when converting string to date, uses the java time API that can directly parse a `LocalDate` from a string, instead of converting `LocalDate` to a `Instant` at UTC first. 3. when converting date to string, uses the java time API that can directly format a `LocalDate` to a string, instead of converting `LocalDate` to a `Instant` at UTC first. 2 and 3 should not introduce any behavior changes. ## How was this patch tested? existing tests Closes #24325 from cloud-fan/doc. Authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- docs/sql-migration-guide-upgrade.md | 2 +- docs/sql-reference.md | 6 ++++-- .../org/apache/spark/sql/catalyst/util/DateFormatter.scala | 12 ++++-------- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 7 ++++--- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md index c3837f6..741c510 100644 --- a/docs/sql-migration-guide-upgrade.md +++ b/docs/sql-migration-guide-upgrade.md @@ -122,7 +122,7 @@ license: | - In Spark version 2.4 and earlier, the `current_date` function returns the current date shifted according to the SQL config `spark.sql.session.timeZone`. Since Spark 3.0, the function always returns the current date in the `UTC` time zone. - - Since Spark 3.0, `TIMESTAMP` literals are converted to strings using the SQL config `spark.sql.session.timeZone`, and `DATE` literals are formatted using the UTC time zone. In Spark version 2.4 and earlier, both conversions use the default time zone of the Java virtual machine. + - Since Spark 3.0, `TIMESTAMP` literals are converted to strings using the SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the conversion uses the default time zone of the Java virtual machine. - In Spark version 2.4, when a spark session is created via `cloneSession()`, the newly created spark session inherits its configuration from its parent `SparkContext` even though the same configuration may exist with a different value in its parent spark session. Since Spark 3.0, the configurations of a parent `SparkSession` have a higher precedence over the parent `SparkContext`. diff --git a/docs/sql-reference.md b/docs/sql-reference.md index ee99ed8..2ec26ec 100644 --- a/docs/sql-reference.md +++ b/docs/sql-reference.md @@ -46,8 +46,10 @@ Spark SQL and DataFrames support the following data types: - `BooleanType`: Represents boolean values. * Datetime type - `TimestampType`: Represents values comprising values of fields year, month, day, - hour, minute, and second. - - `DateType`: Represents values comprising values of fields year, month, day. + hour, minute, and second, with the session local time-zone. The timestamp value represents an + absolute point in time. + - `DateType`: Represents values comprising values of fields year, month and day, without a + time-zone. * Complex types - `ArrayType(elementType, containsNull)`: Represents values comprising a sequence of elements with the type of `elementType`. `containsNull` is used to indicate if diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala index 20e043a..9843297 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala @@ -17,9 +17,8 @@ package org.apache.spark.sql.catalyst.util -import java.time.{Instant, ZoneOffset} +import java.time.LocalDate import java.util.Locale -import java.util.concurrent.TimeUnit.SECONDS sealed trait DateFormatter extends Serializable { def parse(s: String): Int // returns days since epoch @@ -34,15 +33,12 @@ class Iso8601DateFormatter( private lazy val formatter = getOrCreateFormatter(pattern, locale) override def parse(s: String): Int = { - val parsed = formatter.parse(s) - val zonedDateTime = toZonedDateTime(parsed, ZoneOffset.UTC) - val seconds = zonedDateTime.toEpochSecond - SECONDS.toDays(seconds).toInt + val localDate = LocalDate.parse(s, formatter) + DateTimeUtils.localDateToDays(localDate) } override def format(days: Int): String = { - val instant = Instant.ofEpochSecond(days * DateTimeUtils.SECONDS_PER_DAY) - formatter.withZone(ZoneOffset.UTC).format(instant) + LocalDate.ofEpochDay(days).format(formatter) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 7687afa..7f3bb83 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -344,7 +344,9 @@ object DateTimeUtils { days.toInt } - def localDateToDays(localDate: LocalDate): Int = localDate.toEpochDay.toInt + def localDateToDays(localDate: LocalDate): Int = { + Math.toIntExact(localDate.toEpochDay) + } def daysToLocalDate(days: Int): LocalDate = LocalDate.ofEpochDay(days) @@ -396,8 +398,7 @@ object DateTimeUtils { segments(i) = currentSegmentValue try { val localDate = LocalDate.of(segments(0), segments(1), segments(2)) - val instant = localDate.atStartOfDay(ZoneOffset.UTC).toInstant - Some(instantToDays(instant)) + Some(localDateToDays(localDate)) } catch { case NonFatal(_) => None } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org