This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new bdddf31  [SPARK-31940][SQL][DOCS] Document the default JVM time zone 
in to/fromJavaDate and legacy date formatters
bdddf31 is described below

commit bdddf31d125652bd8975b3c1a9ce13f124c28698
Author: Max Gekk <max.g...@gmail.com>
AuthorDate: Tue Jun 9 15:20:13 2020 +0000

    [SPARK-31940][SQL][DOCS] Document the default JVM time zone in 
to/fromJavaDate and legacy date formatters
    
    ### What changes were proposed in this pull request?
    Update comments for `DateTimeUtils`.`toJavaDate` and `fromJavaDate`, and 
for the legacy date formatters `LegacySimpleDateFormatter` and 
`LegacyFastDateFormatter` regarding to the default JVM time zone. The comments 
say that the default JVM time zone is used intentionally for backward 
compatibility with Spark 2.4 and earlier versions.
    
    Closes #28709
    
    ### Why are the changes needed?
    To document current behaviour of related methods in `DateTimeUtils` and the 
legacy date formatters. For example, correctness of 
`HiveResult.hiveResultString` and `toHiveString` is directly related to the 
same time zone used by `toJavaDate` and `LegacyFastDateFormatter`.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    By running the Scala style checker `./dev/scalastyle`
    
    Closes #28767 from MaxGekk/doc-legacy-formatters.
    
    Authored-by: Max Gekk <max.g...@gmail.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
    (cherry picked from commit de91915a24a5cb5cd725b0e0a7fa63a73c2fc277)
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../spark/sql/catalyst/util/DateFormatter.scala    | 27 ++++++++++++++
 .../spark/sql/catalyst/util/DateTimeUtils.scala    | 42 +++++++++++-----------
 2 files changed, 49 insertions(+), 20 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
index ec8db46..07fa09d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -100,6 +100,17 @@ trait LegacyDateFormatter extends DateFormatter {
   }
 }
 
+/**
+ * The legacy formatter is based on Apache Commons FastDateFormat. The 
formatter uses the default
+ * JVM time zone intentionally for compatibility with Spark 2.4 and earlier 
versions.
+ *
+ * Note: Using of the default JVM time zone makes the formatter compatible 
with the legacy
+ *       `DateTimeUtils` methods `toJavaDate` and `fromJavaDate` that are 
based on the default
+ *       JVM time zone too.
+ *
+ * @param pattern `java.text.SimpleDateFormat` compatible pattern.
+ * @param locale The locale overrides the system locale and is used in 
parsing/formatting.
+ */
 class LegacyFastDateFormatter(pattern: String, locale: Locale) extends 
LegacyDateFormatter {
   @transient
   private lazy val fdf = FastDateFormat.getInstance(pattern, locale)
@@ -109,6 +120,22 @@ class LegacyFastDateFormatter(pattern: String, locale: 
Locale) extends LegacyDat
   override def validatePatternString(): Unit = fdf
 }
 
+// scalastyle:off line.size.limit
+/**
+ * The legacy formatter is based on `java.text.SimpleDateFormat`. The 
formatter uses the default
+ * JVM time zone intentionally for compatibility with Spark 2.4 and earlier 
versions.
+ *
+ * Note: Using of the default JVM time zone makes the formatter compatible 
with the legacy
+ *       `DateTimeUtils` methods `toJavaDate` and `fromJavaDate` that are 
based on the default
+ *       JVM time zone too.
+ *
+ * @param pattern The pattern describing the date and time format.
+ *                See <a 
href="https://docs.oracle.com/javase/7/docs/api/java/text/SimpleDateFormat.html";>
+ *                Date and Time Patterns</a>
+ * @param locale  The locale whose date format symbols should be used. It 
overrides the system
+ *                locale in parsing/formatting.
+ */
+// scalastyle:on line.size.limit
 class LegacySimpleDateFormatter(pattern: String, locale: Locale) extends 
LegacyDateFormatter {
   @transient
   private lazy val sdf = new SimpleDateFormat(pattern, locale)
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index fb7875d..3f8417f 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -90,19 +90,20 @@ object DateTimeUtils {
   }
 
   /**
-   * Converts an instance of `java.sql.Date` to a number of days since the 
epoch
-   * 1970-01-01 via extracting date fields `year`, `month`, `days` from the 
input,
-   * creating a local date in Proleptic Gregorian calendar from the fields, and
-   * getting the number of days from the resulted local date.
+   * Converts a local date at the default JVM time zone to the number of days 
since 1970-01-01
+   * in the hybrid calendar (Julian + Gregorian) by discarding the time part. 
The resulted days are
+   * rebased from the hybrid to Proleptic Gregorian calendar. The days 
rebasing is performed via
+   * UTC time zone for simplicity because the difference between two calendars 
is the same in
+   * any given time zone and UTC time zone.
    *
-   * This approach was taken to have the same local date as the triple of 
`year`,
-   * `month`, `day` in the original hybrid calendar used by `java.sql.Date` and
-   * Proleptic Gregorian calendar used by Spark since version 3.0.0, see 
SPARK-26651.
+   * Note: The date is shifted by the offset of the default JVM time zone for 
backward compatibility
+   *       with Spark 2.4 and earlier versions. The goal of the shift is to 
get a local date derived
+   *       from the number of days that has the same date fields (year, month, 
day) as the original
+   *       `date` at the default JVM time zone.
    *
-   * @param date It represents a specific instant in time based on
-   *             the hybrid calendar which combines Julian and
-   *             Gregorian calendars.
-   * @return The number of days since epoch from java.sql.Date.
+   * @param date It represents a specific instant in time based on the hybrid 
calendar which
+   *             combines Julian and Gregorian calendars.
+   * @return The number of days since the epoch in Proleptic Gregorian 
calendar.
    */
   def fromJavaDate(date: Date): SQLDate = {
     val millisUtc = date.getTime
@@ -112,17 +113,18 @@ object DateTimeUtils {
   }
 
   /**
-   * The opposite to `fromJavaDate` method which converts a number of days to 
an
-   * instance of `java.sql.Date`. It builds a local date in Proleptic Gregorian
-   * calendar, extracts date fields `year`, `month`, `day`, and creates a local
-   * date in the hybrid calendar (Julian + Gregorian calendars) from the 
fields.
+   * Converts days since the epoch 1970-01-01 in Proleptic Gregorian calendar 
to a local date
+   * at the default JVM time zone in the hybrid calendar (Julian + Gregorian). 
It rebases the given
+   * days from Proleptic Gregorian to the hybrid calendar at UTC time zone for 
simplicity because
+   * the difference between two calendars doesn't depend on any time zone. The 
result is shifted
+   * by the time zone offset in wall clock to have the same date fields (year, 
month, day)
+   * at the default JVM time zone as the input `daysSinceEpoch` in Proleptic 
Gregorian calendar.
    *
-   * The purpose of the conversion is to have the same local date as the triple
-   * of `year`, `month`, `day` in the original Proleptic Gregorian calendar and
-   * in the target calender.
+   * Note: The date is shifted by the offset of the default JVM time zone for 
backward compatibility
+   *       with Spark 2.4 and earlier versions.
    *
-   * @param daysSinceEpoch The number of days since 1970-01-01.
-   * @return A `java.sql.Date` from number of days since epoch.
+   * @param daysSinceEpoch The number of days since 1970-01-01 in Proleptic 
Gregorian calendar.
+   * @return A local date in the hybrid calendar as `java.sql.Date` from 
number of days since epoch.
    */
   def toJavaDate(daysSinceEpoch: SQLDate): Date = {
     val rebasedDays = rebaseGregorianToJulianDays(daysSinceEpoch)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to