This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 3dfd456 [SPARK-34668][SQL] Support casting of day-time intervals to strings 3dfd456 is described below commit 3dfd456b2c4133f751a67e4132196d2d1470af29 Author: Max Gekk <max.g...@gmail.com> AuthorDate: Wed Apr 7 13:28:55 2021 +0000 [SPARK-34668][SQL] Support casting of day-time intervals to strings ### What changes were proposed in this pull request? 1. Added new method `toDayTimeIntervalString()` to `IntervalUtils` which converts a day-time interval as a number of microseconds to a string in the form **"INTERVAL '[sign]days hours:minutes:secondsWithFraction' DAY TO SECOND"**. 2. Extended the `Cast` expression to support casting of `DayTimeIntervalType` to `StringType`. ### Why are the changes needed? To conform the ANSI SQL standard which requires to support such casting. ### Does this PR introduce _any_ user-facing change? Should not because new day-time interval has not been released yet. ### How was this patch tested? Added new tests for casting: ``` $ build/sbt "testOnly *CastSuite*" ``` Closes #32070 from MaxGekk/cast-dt-interval-to-string. Authored-by: Max Gekk <max.g...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../spark/sql/catalyst/expressions/Cast.scala | 6 ++++ .../spark/sql/catalyst/util/IntervalUtils.scala | 32 +++++++++++++++++++++ .../spark/sql/catalyst/expressions/CastSuite.scala | 33 +++++++++++++++++++--- 3 files changed, 67 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 1c37713..879b154 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -408,6 +408,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit buildCast[Any](_, o => UTF8String.fromString(udt.deserialize(o).toString)) case YearMonthIntervalType => buildCast[Int](_, i => UTF8String.fromString(IntervalUtils.toYearMonthIntervalString(i))) + case DayTimeIntervalType => + buildCast[Long](_, i => UTF8String.fromString(IntervalUtils.toDayTimeIntervalString(i))) case _ => buildCast[Any](_, o => UTF8String.fromString(o.toString)) } @@ -1127,6 +1129,10 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val iu = IntervalUtils.getClass.getName.stripSuffix("$") (c, evPrim, _) => code"""$evPrim = UTF8String.fromString($iu.toYearMonthIntervalString($c));""" + case DayTimeIntervalType => + val iu = IntervalUtils.getClass.getName.stripSuffix("$") + (c, evPrim, _) => + code"""$evPrim = UTF8String.fromString($iu.toDayTimeIntervalString($c));""" case _ => (c, evPrim, evNull) => code"$evPrim = UTF8String.fromString(String.valueOf($c));" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 8cd9d28..b96a7b9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -851,4 +851,36 @@ object IntervalUtils { } s"INTERVAL '$sign${absMonths / MONTHS_PER_YEAR}-${absMonths % MONTHS_PER_YEAR}' YEAR TO MONTH" } + + /** + * Converts a day-time interval as a number of microseconds to its textual representation + * which conforms to the ANSI SQL standard. + * + * @param micros The number of microseconds, positive or negative + * @return Day-time interval string + */ + def toDayTimeIntervalString(micros: Long): String = { + var sign = "" + var rest = micros + if (micros < 0) { + if (micros == Long.MinValue) { + // Especial handling of minimum `Long` value because negate op overflows `Long`. + // seconds = 106751991 * (24 * 60 * 60) + 4 * 60 * 60 + 54 = 9223372036854 + // microseconds = -9223372036854000000L-775808 == Long.MinValue + return "INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND" + } else { + sign = "-" + rest = -rest + } + } + val seconds = rest % MICROS_PER_MINUTE + rest /= MICROS_PER_MINUTE + val minutes = rest % MINUTES_PER_HOUR + rest /= MINUTES_PER_HOUR + val hours = rest % HOURS_PER_DAY + val days = rest / HOURS_PER_DAY + val leadSecZero = if (seconds < 10 * MICROS_PER_SECOND) "0" else "" + val secStr = java.math.BigDecimal.valueOf(seconds, 6).stripTrailingZeros().toPlainString() + f"INTERVAL '$sign$days $hours%02d:$minutes%02d:$leadSecZero$secStr' DAY TO SECOND" + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 547bf88..0554d07 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -18,7 +18,8 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.{Date, Timestamp} -import java.time.{DateTimeException, Period} +import java.time.{DateTimeException, Duration, Period} +import java.time.temporal.ChronoUnit import java.util.{Calendar, TimeZone} import scala.collection.parallel.immutable.ParVector @@ -35,6 +36,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeConstants._ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ +import org.apache.spark.sql.catalyst.util.IntervalUtils.microsToDuration import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -62,9 +64,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { } atomicTypes.foreach(dt => checkNullCast(NullType, dt)) - (atomicTypes -- Set( - // TODO(SPARK-34668): Support casting of day-time intervals to strings - DayTimeIntervalType)).foreach(dt => checkNullCast(dt, StringType)) + atomicTypes.foreach(dt => checkNullCast(dt, StringType)) checkNullCast(StringType, BinaryType) checkNullCast(StringType, BooleanType) numericTypes.foreach(dt => checkNullCast(dt, BooleanType)) @@ -818,6 +818,31 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { checkConsistencyBetweenInterpretedAndCodegen( (child: Expression) => Cast(child, StringType), YearMonthIntervalType) } + + test("SPARK-34668: cast day-time interval to string") { + Seq( + Duration.ZERO -> "0 00:00:00", + Duration.of(1, ChronoUnit.MICROS) -> "0 00:00:00.000001", + Duration.ofMillis(-1) -> "-0 00:00:00.001", + Duration.ofMillis(1234) -> "0 00:00:01.234", + Duration.ofSeconds(-9).minus(999999, ChronoUnit.MICROS) -> "-0 00:00:09.999999", + Duration.ofMinutes(30).plusMillis(59010) -> "0 00:30:59.01", + Duration.ofHours(-23).minusSeconds(59) -> "-0 23:00:59", + Duration.ofDays(1).plus(12345678, ChronoUnit.MICROS) -> "1 00:00:12.345678", + Duration.ofDays(-1234).minusHours(23).minusMinutes(59).minusSeconds(59).minusMillis(999) -> + "-1234 23:59:59.999", + microsToDuration(Long.MaxValue) -> "106751991 04:00:54.775807", + microsToDuration(Long.MinValue + 1) -> "-106751991 04:00:54.775807", + microsToDuration(Long.MinValue) -> "-106751991 04:00:54.775808" + ).foreach { case (period, intervalPayload) => + checkEvaluation( + Cast(Literal(period), StringType), + s"INTERVAL '$intervalPayload' DAY TO SECOND") + } + + checkConsistencyBetweenInterpretedAndCodegen( + (child: Expression) => Cast(child, StringType), DayTimeIntervalType) + } } abstract class AnsiCastSuiteBase extends CastSuiteBase { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org