This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 2e90574  [SPARK-27414][SQL] make it clear that date type is timezone 
independent
2e90574 is described below

commit 2e90574dd0e60ea960a33580dfb29654671b66f4
Author: Wenchen Fan <wenc...@databricks.com>
AuthorDate: Wed Apr 10 16:39:28 2019 +0800

    [SPARK-27414][SQL] make it clear that date type is timezone independent
    
    ## What changes were proposed in this pull request?
    
    In SQL standard, date type is a union of the `year`, `month` and `day` 
fields. It's timezone independent, which means it does not represent a specific 
point in the timeline.
    
    Spark SQL follows the SQL standard, this PR is to make it clear that date 
type is timezone independent
    1. improve the doc to highlight that date is timezone independent.
    2. when converting string to date,  uses the java time API that can 
directly parse a `LocalDate` from a string, instead of converting `LocalDate` 
to a `Instant` at UTC first.
    3. when converting date to string, uses the java time API that can directly 
format a `LocalDate` to a string, instead of converting `LocalDate` to a 
`Instant` at UTC first.
    
    2 and 3 should not introduce any behavior changes.
    
    ## How was this patch tested?
    
    existing tests
    
    Closes #24325 from cloud-fan/doc.
    
    Authored-by: Wenchen Fan <wenc...@databricks.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 docs/sql-migration-guide-upgrade.md                          |  2 +-
 docs/sql-reference.md                                        |  6 ++++--
 .../org/apache/spark/sql/catalyst/util/DateFormatter.scala   | 12 ++++--------
 .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala   |  7 ++++---
 4 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/docs/sql-migration-guide-upgrade.md 
b/docs/sql-migration-guide-upgrade.md
index c3837f6..741c510 100644
--- a/docs/sql-migration-guide-upgrade.md
+++ b/docs/sql-migration-guide-upgrade.md
@@ -122,7 +122,7 @@ license: |
 
   - In Spark version 2.4 and earlier, the `current_date` function returns the 
current date shifted according to the SQL config `spark.sql.session.timeZone`. 
Since Spark 3.0, the function always returns the current date in the `UTC` time 
zone.
 
-  - Since Spark 3.0, `TIMESTAMP` literals are converted to strings using the 
SQL config `spark.sql.session.timeZone`, and `DATE` literals are formatted 
using the UTC time zone. In Spark version 2.4 and earlier, both conversions use 
the default time zone of the Java virtual machine.
+  - Since Spark 3.0, `TIMESTAMP` literals are converted to strings using the 
SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the 
conversion uses the default time zone of the Java virtual machine.
 
   - In Spark version 2.4, when a spark session is created via 
`cloneSession()`, the newly created spark session inherits its configuration 
from its parent `SparkContext` even though the same configuration may exist 
with a different value in its parent spark session. Since Spark 3.0, the 
configurations of a parent `SparkSession` have a higher precedence over the 
parent `SparkContext`.
 
diff --git a/docs/sql-reference.md b/docs/sql-reference.md
index ee99ed8..2ec26ec 100644
--- a/docs/sql-reference.md
+++ b/docs/sql-reference.md
@@ -46,8 +46,10 @@ Spark SQL and DataFrames support the following data types:
   - `BooleanType`: Represents boolean values.
 * Datetime type
   - `TimestampType`: Represents values comprising values of fields year, 
month, day,
-  hour, minute, and second.
-  - `DateType`: Represents values comprising values of fields year, month, day.
+  hour, minute, and second, with the session local time-zone. The timestamp 
value represents an
+  absolute point in time.
+  - `DateType`: Represents values comprising values of fields year, month and 
day, without a
+  time-zone.
 * Complex types
   - `ArrayType(elementType, containsNull)`: Represents values comprising a 
sequence of
   elements with the type of `elementType`. `containsNull` is used to indicate 
if
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
index 20e043a..9843297 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -17,9 +17,8 @@
 
 package org.apache.spark.sql.catalyst.util
 
-import java.time.{Instant, ZoneOffset}
+import java.time.LocalDate
 import java.util.Locale
-import java.util.concurrent.TimeUnit.SECONDS
 
 sealed trait DateFormatter extends Serializable {
   def parse(s: String): Int // returns days since epoch
@@ -34,15 +33,12 @@ class Iso8601DateFormatter(
   private lazy val formatter = getOrCreateFormatter(pattern, locale)
 
   override def parse(s: String): Int = {
-    val parsed = formatter.parse(s)
-    val zonedDateTime = toZonedDateTime(parsed, ZoneOffset.UTC)
-    val seconds = zonedDateTime.toEpochSecond
-    SECONDS.toDays(seconds).toInt
+    val localDate = LocalDate.parse(s, formatter)
+    DateTimeUtils.localDateToDays(localDate)
   }
 
   override def format(days: Int): String = {
-    val instant = Instant.ofEpochSecond(days * DateTimeUtils.SECONDS_PER_DAY)
-    formatter.withZone(ZoneOffset.UTC).format(instant)
+    LocalDate.ofEpochDay(days).format(formatter)
   }
 }
 
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 7687afa..7f3bb83 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -344,7 +344,9 @@ object DateTimeUtils {
     days.toInt
   }
 
-  def localDateToDays(localDate: LocalDate): Int = localDate.toEpochDay.toInt
+  def localDateToDays(localDate: LocalDate): Int = {
+    Math.toIntExact(localDate.toEpochDay)
+  }
 
   def daysToLocalDate(days: Int): LocalDate = LocalDate.ofEpochDay(days)
 
@@ -396,8 +398,7 @@ object DateTimeUtils {
     segments(i) = currentSegmentValue
     try {
       val localDate = LocalDate.of(segments(0), segments(1), segments(2))
-      val instant = localDate.atStartOfDay(ZoneOffset.UTC).toInstant
-      Some(instantToDays(instant))
+      Some(localDateToDays(localDate))
     } catch {
       case NonFatal(_) => None
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to