[spark] branch master updated: [SPARK-35769][SQL] Truncate java.time.Period by fields of year-month interval type

maxgekk Fri, 18 Jun 2021 01:56:33 -0700

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 071566c  [SPARK-35769][SQL] Truncate java.time.Period by fields of 
year-month interval type
071566c is described below

commit 071566caf3d1efc752006afaec974c6c4cfdc679
Author: Angerszhuuuu <angers....@gmail.com>
AuthorDate: Fri Jun 18 11:55:57 2021 +0300

    [SPARK-35769][SQL] Truncate java.time.Period by fields of year-month 
interval type
    
    ### What changes were proposed in this pull request?
    Support truncate java.time.Period by fields of year-month interval type
    
    ### Why are the changes needed?
    To follow the SQL standard and respect the field restriction of the target 
year-month type.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Added UT
    
    Closes #32945 from AngersZhuuuu/SPARK-35769.
    
    Authored-by: Angerszhuuuu <angers....@gmail.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 .../apache/spark/sql/catalyst/CatalystTypeConverters.scala    | 11 ++++++-----
 .../org/apache/spark/sql/catalyst/util/IntervalUtils.scala    | 11 ++++++++++-
 .../test/scala/org/apache/spark/sql/RandomDataGenerator.scala |  7 +++++--
 .../spark/sql/catalyst/CatalystTypeConvertersSuite.scala      | 10 ++++++++++
 4 files changed, 31 insertions(+), 8 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
index 38790e0..08a5fd5 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.types.YearMonthIntervalType._
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
@@ -77,8 +78,7 @@ object CatalystTypeConverters {
       case DoubleType => DoubleConverter
       // TODO(SPARK-35726): Truncate java.time.Duration by fields of day-time 
interval type
       case _: DayTimeIntervalType => DurationConverter
-      // TODO(SPARK-35769): Truncate java.time.Period by fields of year-month 
interval type
-      case _: YearMonthIntervalType => PeriodConverter
+      case YearMonthIntervalType(_, endField) => PeriodConverter(endField)
       case dataType: DataType => IdentityConverter(dataType)
     }
     converter.asInstanceOf[CatalystTypeConverter[Any, Any, Any]]
@@ -444,9 +444,10 @@ object CatalystTypeConverters {
       IntervalUtils.microsToDuration(row.getLong(column))
   }
 
-  private object PeriodConverter extends CatalystTypeConverter[Period, Period, 
Any] {
+  private case class PeriodConverter(endField: Byte)
+      extends CatalystTypeConverter[Period, Period, Any] {
     override def toCatalystImpl(scalaValue: Period): Int = {
-      IntervalUtils.periodToMonths(scalaValue)
+      IntervalUtils.periodToMonths(scalaValue, endField)
     }
     override def toScala(catalystValue: Any): Period = {
       if (catalystValue == null) null
@@ -523,7 +524,7 @@ object CatalystTypeConverters {
         (key: Any) => convertToCatalyst(key),
         (value: Any) => convertToCatalyst(value))
     case d: Duration => DurationConverter.toCatalyst(d)
-    case p: Period => PeriodConverter.toCatalyst(p)
+    case p: Period => PeriodConverter(MONTH).toCatalyst(p)
     case other => other
   }
 
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
index 9e67004..e87ea51 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
@@ -925,8 +925,17 @@ object IntervalUtils {
    * @throws ArithmeticException If numeric overflow occurs
    */
   def periodToMonths(period: Period): Int = {
+    periodToMonths(period, YearMonthIntervalType.MONTH)
+  }
+
+  def periodToMonths(period: Period, endField: Byte): Int = {
     val monthsInYears = Math.multiplyExact(period.getYears, MONTHS_PER_YEAR)
-    Math.addExact(monthsInYears, period.getMonths)
+    val months = Math.addExact(monthsInYears, period.getMonths)
+    if (endField == YearMonthIntervalType.YEAR) {
+      months - months % MONTHS_PER_YEAR
+    } else {
+      months
+    }
   }
 
   /**
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
index 603c88d..6201f12 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
@@ -26,10 +26,11 @@ import scala.collection.mutable
 import scala.util.{Random, Try}
 
 import org.apache.spark.sql.catalyst.CatalystTypeConverters
-import 
org.apache.spark.sql.catalyst.util.DateTimeConstants.{MICROS_PER_MILLIS, 
MILLIS_PER_DAY}
+import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.types.YearMonthIntervalType.YEAR
 import org.apache.spark.unsafe.types.CalendarInterval
 /**
  * Random data generators for Spark SQL DataTypes. These generators do not 
generate uniformly random
@@ -284,7 +285,9 @@ object RandomDataGenerator {
         new CalendarInterval(months, days, ns)
       })
       case _: DayTimeIntervalType => Some(() => Duration.of(rand.nextLong(), 
ChronoUnit.MICROS))
-      case _: YearMonthIntervalType => Some(() => 
Period.ofMonths(rand.nextInt()).normalized())
+      case YearMonthIntervalType(_, YEAR) =>
+        Some(() => Period.ofYears(rand.nextInt() / 
MONTHS_PER_YEAR).normalized())
+      case YearMonthIntervalType(_, _) => Some(() => 
Period.ofMonths(rand.nextInt()).normalized())
       case DecimalType.Fixed(precision, scale) => Some(
         () => BigDecimal.apply(
           rand.nextLong() % math.pow(10, precision).toLong,
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
index 1c2359c..3a68d01 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.util.{DateTimeConstants, DateTimeUtils, 
GenericArrayData, IntervalUtils}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.types.YearMonthIntervalType._
 import org.apache.spark.unsafe.types.UTF8String
 
 class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
@@ -309,6 +310,15 @@ class CatalystTypeConvertersSuite extends SparkFunSuite 
with SQLHelper {
     assert(errMsg.contains("integer overflow"))
   }
 
+  test("SPARK-35769: Truncate java.time.Period by fields of year-month 
interval type") {
+    Seq(YearMonthIntervalType(YEAR, YEAR) -> 12,
+      YearMonthIntervalType(YEAR, MONTH) -> 13,
+      YearMonthIntervalType(MONTH, MONTH) -> 13)
+      .foreach { case (ym, value) =>
+        
assert(CatalystTypeConverters.createToCatalystConverter(ym)(Period.of(1, 1, 0)) 
== value)
+      }
+  }
+
   test("SPARK-34615: converting YearMonthIntervalType to java.time.Period") {
     Seq(
       0,

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-35769][SQL] Truncate java.time.Period by fields of year-month interval type

Reply via email to