This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 071566c [SPARK-35769][SQL] Truncate java.time.Period by fields of year-month interval type 071566c is described below commit 071566caf3d1efc752006afaec974c6c4cfdc679 Author: Angerszhuuuu <angers....@gmail.com> AuthorDate: Fri Jun 18 11:55:57 2021 +0300 [SPARK-35769][SQL] Truncate java.time.Period by fields of year-month interval type ### What changes were proposed in this pull request? Support truncate java.time.Period by fields of year-month interval type ### Why are the changes needed? To follow the SQL standard and respect the field restriction of the target year-month type. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added UT Closes #32945 from AngersZhuuuu/SPARK-35769. Authored-by: Angerszhuuuu <angers....@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../apache/spark/sql/catalyst/CatalystTypeConverters.scala | 11 ++++++----- .../org/apache/spark/sql/catalyst/util/IntervalUtils.scala | 11 ++++++++++- .../test/scala/org/apache/spark/sql/RandomDataGenerator.scala | 7 +++++-- .../spark/sql/catalyst/CatalystTypeConvertersSuite.scala | 10 ++++++++++ 4 files changed, 31 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala index 38790e0..08a5fd5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala @@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ +import org.apache.spark.sql.types.YearMonthIntervalType._ import org.apache.spark.unsafe.types.UTF8String /** @@ -77,8 +78,7 @@ object CatalystTypeConverters { case DoubleType => DoubleConverter // TODO(SPARK-35726): Truncate java.time.Duration by fields of day-time interval type case _: DayTimeIntervalType => DurationConverter - // TODO(SPARK-35769): Truncate java.time.Period by fields of year-month interval type - case _: YearMonthIntervalType => PeriodConverter + case YearMonthIntervalType(_, endField) => PeriodConverter(endField) case dataType: DataType => IdentityConverter(dataType) } converter.asInstanceOf[CatalystTypeConverter[Any, Any, Any]] @@ -444,9 +444,10 @@ object CatalystTypeConverters { IntervalUtils.microsToDuration(row.getLong(column)) } - private object PeriodConverter extends CatalystTypeConverter[Period, Period, Any] { + private case class PeriodConverter(endField: Byte) + extends CatalystTypeConverter[Period, Period, Any] { override def toCatalystImpl(scalaValue: Period): Int = { - IntervalUtils.periodToMonths(scalaValue) + IntervalUtils.periodToMonths(scalaValue, endField) } override def toScala(catalystValue: Any): Period = { if (catalystValue == null) null @@ -523,7 +524,7 @@ object CatalystTypeConverters { (key: Any) => convertToCatalyst(key), (value: Any) => convertToCatalyst(value)) case d: Duration => DurationConverter.toCatalyst(d) - case p: Period => PeriodConverter.toCatalyst(p) + case p: Period => PeriodConverter(MONTH).toCatalyst(p) case other => other } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 9e67004..e87ea51 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -925,8 +925,17 @@ object IntervalUtils { * @throws ArithmeticException If numeric overflow occurs */ def periodToMonths(period: Period): Int = { + periodToMonths(period, YearMonthIntervalType.MONTH) + } + + def periodToMonths(period: Period, endField: Byte): Int = { val monthsInYears = Math.multiplyExact(period.getYears, MONTHS_PER_YEAR) - Math.addExact(monthsInYears, period.getMonths) + val months = Math.addExact(monthsInYears, period.getMonths) + if (endField == YearMonthIntervalType.YEAR) { + months - months % MONTHS_PER_YEAR + } else { + months + } } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala index 603c88d..6201f12 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala @@ -26,10 +26,11 @@ import scala.collection.mutable import scala.util.{Random, Try} import org.apache.spark.sql.catalyst.CatalystTypeConverters -import org.apache.spark.sql.catalyst.util.DateTimeConstants.{MICROS_PER_MILLIS, MILLIS_PER_DAY} +import org.apache.spark.sql.catalyst.util.DateTimeConstants._ import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ +import org.apache.spark.sql.types.YearMonthIntervalType.YEAR import org.apache.spark.unsafe.types.CalendarInterval /** * Random data generators for Spark SQL DataTypes. These generators do not generate uniformly random @@ -284,7 +285,9 @@ object RandomDataGenerator { new CalendarInterval(months, days, ns) }) case _: DayTimeIntervalType => Some(() => Duration.of(rand.nextLong(), ChronoUnit.MICROS)) - case _: YearMonthIntervalType => Some(() => Period.ofMonths(rand.nextInt()).normalized()) + case YearMonthIntervalType(_, YEAR) => + Some(() => Period.ofYears(rand.nextInt() / MONTHS_PER_YEAR).normalized()) + case YearMonthIntervalType(_, _) => Some(() => Period.ofMonths(rand.nextInt()).normalized()) case DecimalType.Fixed(precision, scale) => Some( () => BigDecimal.apply( rand.nextLong() % math.pow(10, precision).toLong, diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala index 1c2359c..3a68d01 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala @@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.catalyst.util.{DateTimeConstants, DateTimeUtils, GenericArrayData, IntervalUtils} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ +import org.apache.spark.sql.types.YearMonthIntervalType._ import org.apache.spark.unsafe.types.UTF8String class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper { @@ -309,6 +310,15 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper { assert(errMsg.contains("integer overflow")) } + test("SPARK-35769: Truncate java.time.Period by fields of year-month interval type") { + Seq(YearMonthIntervalType(YEAR, YEAR) -> 12, + YearMonthIntervalType(YEAR, MONTH) -> 13, + YearMonthIntervalType(MONTH, MONTH) -> 13) + .foreach { case (ym, value) => + assert(CatalystTypeConverters.createToCatalystConverter(ym)(Period.of(1, 1, 0)) == value) + } + } + test("SPARK-34615: converting YearMonthIntervalType to java.time.Period") { Seq( 0, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org