This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 890ee47a9a74 [SPARK-51454][SQL] Support cast from time to string
890ee47a9a74 is described below

commit 890ee47a9a74d48f529f3c2cffc076530e971453
Author: Max Gekk <[email protected]>
AuthorDate: Mon Mar 10 10:44:08 2025 -0700

    [SPARK-51454][SQL] Support cast from time to string
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to support casting of TIME to STRING using the 
fraction formatter which formats times according to the pattern 
`HH:mm:ss.[..fff..]`. The pattern `[..fff..]` is a fraction of second up to 
microsecond resolution. The formatter does not output trailing zeros in the 
fraction. Apparently the `ToPrettyString` expression and `.show()` support such 
casting too.
    
    Also the PR adds the support of TIME literals created from 
`java.time.LocalTime` (used it tests).
    
    ### Why are the changes needed?
    To output time values in user friendly format. Before the changes, show 
prints internal representation of time values:
    ```scala
    scala> Seq(LocalTime.parse("17:18:19")).toDS.show()
    +-----------+
    |      value|
    +-----------+
    |62299000000|
    +-----------+
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    Yes. After the changes, the command above outputs time values in new format:
    ```scala
    scala> Seq(LocalTime.parse("17:18:19")).toDS.show()
    +--------+
    |   value|
    +--------+
    |17:18:19|
    +--------+
    ```
    
    ### How was this patch tested?
    By running new tests:
    ```
    $ build/sbt "test:testOnly *ToPrettyStringSuite"
    $ build/sbt "test:testOnly *CastWithAnsiOnSuite"
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #50224 from MaxGekk/time-cast-to-string.
    
    Authored-by: Max Gekk <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../spark/sql/catalyst/expressions/ToStringBase.scala    | 10 +++++++++-
 .../apache/spark/sql/catalyst/expressions/literals.scala |  7 ++++---
 .../spark/sql/catalyst/expressions/CastSuiteBase.scala   | 16 ++++++++++++++--
 .../sql/catalyst/expressions/ToPrettyStringSuite.scala   |  7 +++++++
 .../org/apache/spark/sql/types/DataTypeTestUtils.scala   |  4 ++++
 5 files changed, 38 insertions(+), 6 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala
index de72b94df3ac..6cfcde5f52da 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala
@@ -22,7 +22,7 @@ import java.time.ZoneOffset
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.util.{ArrayData, CharVarcharCodegenUtils, 
DateFormatter, IntervalStringStyles, IntervalUtils, MapData, SparkStringUtils, 
TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.{ArrayData, CharVarcharCodegenUtils, 
DateFormatter, FractionTimeFormatter, IntervalStringStyles, IntervalUtils, 
MapData, SparkStringUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.IntervalStringStyles.ANSI_STYLE
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.BinaryOutputStyle
@@ -34,6 +34,7 @@ import org.apache.spark.util.ArrayImplicits._
 trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression =>
 
   private lazy val dateFormatter = DateFormatter()
+  private lazy val timeFormatter = new FractionTimeFormatter()
   private lazy val timestampFormatter = 
TimestampFormatter.getFractionFormatter(zoneId)
   private lazy val timestampNTZFormatter = 
TimestampFormatter.getFractionFormatter(ZoneOffset.UTC)
 
@@ -73,6 +74,8 @@ trait ToStringBase { self: UnaryExpression with 
TimeZoneAwareExpression =>
       acceptAny[Long](t => UTF8String.fromString(timestampFormatter.format(t)))
     case TimestampNTZType =>
       acceptAny[Long](t => 
UTF8String.fromString(timestampNTZFormatter.format(t)))
+    case _: TimeType =>
+      acceptAny[Long](t => UTF8String.fromString(timeFormatter.format(t)))
     case ArrayType(et, _) =>
       acceptAny[ArrayData](array => {
         val builder = new UTF8StringBuilder
@@ -224,6 +227,11 @@ trait ToStringBase { self: UnaryExpression with 
TimeZoneAwareExpression =>
           ctx.addReferenceObj("timestampNTZFormatter", timestampNTZFormatter),
           timestampNTZFormatter.getClass)
         (c, evPrim) => code"$evPrim = UTF8String.fromString($tf.format($c));"
+      case _: TimeType =>
+        val tf = JavaCode.global(
+          ctx.addReferenceObj("timeFormatter", timeFormatter),
+          timeFormatter.getClass)
+        (c, evPrim) => code"$evPrim = UTF8String.fromString($tf.format($c));"
       case CalendarIntervalType =>
         (c, evPrim) => code"$evPrim = UTF8String.fromString($c.toString());"
       case ArrayType(et, _) =>
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 5a34b21703e5..6968b8a4cb2e 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -28,7 +28,7 @@ import java.lang.{Short => JavaShort}
 import java.math.{BigDecimal => JavaBigDecimal}
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
-import java.time.{Duration, Instant, LocalDate, LocalDateTime, Period, 
ZoneOffset}
+import java.time.{Duration, Instant, LocalDate, LocalDateTime, LocalTime, 
Period, ZoneOffset}
 import java.util
 import java.util.Objects
 
@@ -49,7 +49,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern
 import org.apache.spark.sql.catalyst.trees.TreePattern.{LITERAL, NULL_LITERAL, 
TRUE_OR_FALSE_LITERAL}
 import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.catalyst.util.DateTimeUtils.instantToMicros
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.{instantToMicros, 
localTimeToMicros}
 import org.apache.spark.sql.catalyst.util.IntervalStringStyles.ANSI_STYLE
 import org.apache.spark.sql.catalyst.util.IntervalUtils.{durationToMicros, 
periodToMonths, toDayTimeIntervalString, toYearMonthIntervalString}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, 
QueryExecutionErrors}
@@ -89,6 +89,7 @@ object Literal {
     case l: LocalDateTime => Literal(DateTimeUtils.localDateTimeToMicros(l), 
TimestampNTZType)
     case ld: LocalDate => Literal(ld.toEpochDay.toInt, DateType)
     case d: Date => Literal(DateTimeUtils.fromJavaDate(d), DateType)
+    case lt: LocalTime => Literal(localTimeToMicros(lt), TimeType())
     case d: Duration => Literal(durationToMicros(d), DayTimeIntervalType())
     case p: Period => Literal(periodToMonths(p), YearMonthIntervalType())
     case a: Array[Byte] => Literal(a, BinaryType)
@@ -521,7 +522,7 @@ case class Literal (value: Any, dataType: DataType) extends 
LeafExpression {
           }
         case ByteType | ShortType =>
           ExprCode.forNonNullValue(JavaCode.expression(s"($javaType)$value", 
dataType))
-        case TimestampType | TimestampNTZType | LongType | _: 
DayTimeIntervalType =>
+        case TimestampType | TimestampNTZType | LongType | _: 
DayTimeIntervalType | _: TimeType =>
           toExprCode(s"${value}L")
         case _ =>
           val constRef = ctx.addReferenceObj("literal", value, javaType)
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
index cec49a5ae1de..11c96a4ff540 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.{Date, Timestamp}
-import java.time.{Duration, LocalDate, LocalDateTime, Period}
+import java.time.{Duration, LocalDate, LocalDateTime, LocalTime, Period}
 import java.time.temporal.ChronoUnit
 import java.util.{Calendar, Locale, TimeZone}
 
@@ -82,7 +82,7 @@ abstract class CastSuiteBase extends SparkFunSuite with 
ExpressionEvalHelper {
     }
 
     atomicTypes.foreach(dt => checkNullCast(NullType, dt))
-    atomicTypes.foreach(dt => checkNullCast(dt, StringType))
+    (atomicTypes ++ timeTypes).foreach(dt => checkNullCast(dt, StringType))
     checkNullCast(StringType, BinaryType)
     checkNullCast(StringType, BooleanType)
     numericTypes.foreach(dt => checkNullCast(dt, BooleanType))
@@ -1457,4 +1457,16 @@ abstract class CastSuiteBase extends SparkFunSuite with 
ExpressionEvalHelper {
       }
     }
   }
+
+  test("cast time to string") {
+    Seq(
+      LocalTime.MIDNIGHT -> "00:00:00",
+      LocalTime.NOON -> "12:00:00",
+      LocalTime.of(23, 59, 59) -> "23:59:59",
+      LocalTime.of(23, 59, 59, 1000000) -> "23:59:59.001",
+      LocalTime.of(23, 59, 59, 999999000) -> "23:59:59.999999"
+    ).foreach { case (time, expectedStr) =>
+      checkEvaluation(Cast(Literal(time), StringType), expectedStr)
+    }
+  }
 }
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala
index 64529bf54bd2..5c297c00acc0 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala
@@ -134,4 +134,11 @@ class ToPrettyStringSuite extends SparkFunSuite with 
ExpressionEvalHelper {
     val prettyString = ToPrettyString(child)
     assert(prettyString.sql === child.sql)
   }
+
+  test("Time as pretty strings") {
+    checkEvaluation(ToPrettyString(Literal(1000L, TimeType())), "00:00:00.001")
+    checkEvaluation(ToPrettyString(Literal(1L, TimeType())), "00:00:00.000001")
+    checkEvaluation(ToPrettyString(Literal(
+      (23 * 3600 + 59 * 60 + 59) * 1000000L, TimeType())), "23:59:59")
+  }
 }
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala
index b2f3adac68e1..0b5d9a66c37c 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala
@@ -69,6 +69,10 @@ object DataTypeTestUtils {
     YearMonthIntervalType(YEAR),
     YearMonthIntervalType(MONTH))
 
+  val timeTypes: Seq[TimeType] = Seq(
+    TimeType(TimeType.MIN_PRECISION),
+    TimeType(TimeType.MAX_PRECISION))
+
   val unsafeRowMutableFieldTypes: Seq[DataType] = Seq(
     NullType,
     BooleanType,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to