This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 890ee47a9a74 [SPARK-51454][SQL] Support cast from time to string
890ee47a9a74 is described below
commit 890ee47a9a74d48f529f3c2cffc076530e971453
Author: Max Gekk <[email protected]>
AuthorDate: Mon Mar 10 10:44:08 2025 -0700
[SPARK-51454][SQL] Support cast from time to string
### What changes were proposed in this pull request?
In the PR, I propose to support casting of TIME to STRING using the
fraction formatter which formats times according to the pattern
`HH:mm:ss.[..fff..]`. The pattern `[..fff..]` is a fraction of second up to
microsecond resolution. The formatter does not output trailing zeros in the
fraction. Apparently the `ToPrettyString` expression and `.show()` support such
casting too.
Also the PR adds the support of TIME literals created from
`java.time.LocalTime` (used it tests).
### Why are the changes needed?
To output time values in user friendly format. Before the changes, show
prints internal representation of time values:
```scala
scala> Seq(LocalTime.parse("17:18:19")).toDS.show()
+-----------+
| value|
+-----------+
|62299000000|
+-----------+
```
### Does this PR introduce _any_ user-facing change?
Yes. After the changes, the command above outputs time values in new format:
```scala
scala> Seq(LocalTime.parse("17:18:19")).toDS.show()
+--------+
| value|
+--------+
|17:18:19|
+--------+
```
### How was this patch tested?
By running new tests:
```
$ build/sbt "test:testOnly *ToPrettyStringSuite"
$ build/sbt "test:testOnly *CastWithAnsiOnSuite"
```
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #50224 from MaxGekk/time-cast-to-string.
Authored-by: Max Gekk <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../spark/sql/catalyst/expressions/ToStringBase.scala | 10 +++++++++-
.../apache/spark/sql/catalyst/expressions/literals.scala | 7 ++++---
.../spark/sql/catalyst/expressions/CastSuiteBase.scala | 16 ++++++++++++++--
.../sql/catalyst/expressions/ToPrettyStringSuite.scala | 7 +++++++
.../org/apache/spark/sql/types/DataTypeTestUtils.scala | 4 ++++
5 files changed, 38 insertions(+), 6 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala
index de72b94df3ac..6cfcde5f52da 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala
@@ -22,7 +22,7 @@ import java.time.ZoneOffset
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen._
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.util.{ArrayData, CharVarcharCodegenUtils,
DateFormatter, IntervalStringStyles, IntervalUtils, MapData, SparkStringUtils,
TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.{ArrayData, CharVarcharCodegenUtils,
DateFormatter, FractionTimeFormatter, IntervalStringStyles, IntervalUtils,
MapData, SparkStringUtils, TimestampFormatter}
import org.apache.spark.sql.catalyst.util.IntervalStringStyles.ANSI_STYLE
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.BinaryOutputStyle
@@ -34,6 +34,7 @@ import org.apache.spark.util.ArrayImplicits._
trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression =>
private lazy val dateFormatter = DateFormatter()
+ private lazy val timeFormatter = new FractionTimeFormatter()
private lazy val timestampFormatter =
TimestampFormatter.getFractionFormatter(zoneId)
private lazy val timestampNTZFormatter =
TimestampFormatter.getFractionFormatter(ZoneOffset.UTC)
@@ -73,6 +74,8 @@ trait ToStringBase { self: UnaryExpression with
TimeZoneAwareExpression =>
acceptAny[Long](t => UTF8String.fromString(timestampFormatter.format(t)))
case TimestampNTZType =>
acceptAny[Long](t =>
UTF8String.fromString(timestampNTZFormatter.format(t)))
+ case _: TimeType =>
+ acceptAny[Long](t => UTF8String.fromString(timeFormatter.format(t)))
case ArrayType(et, _) =>
acceptAny[ArrayData](array => {
val builder = new UTF8StringBuilder
@@ -224,6 +227,11 @@ trait ToStringBase { self: UnaryExpression with
TimeZoneAwareExpression =>
ctx.addReferenceObj("timestampNTZFormatter", timestampNTZFormatter),
timestampNTZFormatter.getClass)
(c, evPrim) => code"$evPrim = UTF8String.fromString($tf.format($c));"
+ case _: TimeType =>
+ val tf = JavaCode.global(
+ ctx.addReferenceObj("timeFormatter", timeFormatter),
+ timeFormatter.getClass)
+ (c, evPrim) => code"$evPrim = UTF8String.fromString($tf.format($c));"
case CalendarIntervalType =>
(c, evPrim) => code"$evPrim = UTF8String.fromString($c.toString());"
case ArrayType(et, _) =>
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 5a34b21703e5..6968b8a4cb2e 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -28,7 +28,7 @@ import java.lang.{Short => JavaShort}
import java.math.{BigDecimal => JavaBigDecimal}
import java.nio.charset.StandardCharsets
import java.sql.{Date, Timestamp}
-import java.time.{Duration, Instant, LocalDate, LocalDateTime, Period,
ZoneOffset}
+import java.time.{Duration, Instant, LocalDate, LocalDateTime, LocalTime,
Period, ZoneOffset}
import java.util
import java.util.Objects
@@ -49,7 +49,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern
import org.apache.spark.sql.catalyst.trees.TreePattern.{LITERAL, NULL_LITERAL,
TRUE_OR_FALSE_LITERAL}
import org.apache.spark.sql.catalyst.types._
import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.catalyst.util.DateTimeUtils.instantToMicros
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.{instantToMicros,
localTimeToMicros}
import org.apache.spark.sql.catalyst.util.IntervalStringStyles.ANSI_STYLE
import org.apache.spark.sql.catalyst.util.IntervalUtils.{durationToMicros,
periodToMonths, toDayTimeIntervalString, toYearMonthIntervalString}
import org.apache.spark.sql.errors.{QueryCompilationErrors,
QueryExecutionErrors}
@@ -89,6 +89,7 @@ object Literal {
case l: LocalDateTime => Literal(DateTimeUtils.localDateTimeToMicros(l),
TimestampNTZType)
case ld: LocalDate => Literal(ld.toEpochDay.toInt, DateType)
case d: Date => Literal(DateTimeUtils.fromJavaDate(d), DateType)
+ case lt: LocalTime => Literal(localTimeToMicros(lt), TimeType())
case d: Duration => Literal(durationToMicros(d), DayTimeIntervalType())
case p: Period => Literal(periodToMonths(p), YearMonthIntervalType())
case a: Array[Byte] => Literal(a, BinaryType)
@@ -521,7 +522,7 @@ case class Literal (value: Any, dataType: DataType) extends
LeafExpression {
}
case ByteType | ShortType =>
ExprCode.forNonNullValue(JavaCode.expression(s"($javaType)$value",
dataType))
- case TimestampType | TimestampNTZType | LongType | _:
DayTimeIntervalType =>
+ case TimestampType | TimestampNTZType | LongType | _:
DayTimeIntervalType | _: TimeType =>
toExprCode(s"${value}L")
case _ =>
val constRef = ctx.addReferenceObj("literal", value, javaType)
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
index cec49a5ae1de..11c96a4ff540 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
@@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst.expressions
import java.sql.{Date, Timestamp}
-import java.time.{Duration, LocalDate, LocalDateTime, Period}
+import java.time.{Duration, LocalDate, LocalDateTime, LocalTime, Period}
import java.time.temporal.ChronoUnit
import java.util.{Calendar, Locale, TimeZone}
@@ -82,7 +82,7 @@ abstract class CastSuiteBase extends SparkFunSuite with
ExpressionEvalHelper {
}
atomicTypes.foreach(dt => checkNullCast(NullType, dt))
- atomicTypes.foreach(dt => checkNullCast(dt, StringType))
+ (atomicTypes ++ timeTypes).foreach(dt => checkNullCast(dt, StringType))
checkNullCast(StringType, BinaryType)
checkNullCast(StringType, BooleanType)
numericTypes.foreach(dt => checkNullCast(dt, BooleanType))
@@ -1457,4 +1457,16 @@ abstract class CastSuiteBase extends SparkFunSuite with
ExpressionEvalHelper {
}
}
}
+
+ test("cast time to string") {
+ Seq(
+ LocalTime.MIDNIGHT -> "00:00:00",
+ LocalTime.NOON -> "12:00:00",
+ LocalTime.of(23, 59, 59) -> "23:59:59",
+ LocalTime.of(23, 59, 59, 1000000) -> "23:59:59.001",
+ LocalTime.of(23, 59, 59, 999999000) -> "23:59:59.999999"
+ ).foreach { case (time, expectedStr) =>
+ checkEvaluation(Cast(Literal(time), StringType), expectedStr)
+ }
+ }
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala
index 64529bf54bd2..5c297c00acc0 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyStringSuite.scala
@@ -134,4 +134,11 @@ class ToPrettyStringSuite extends SparkFunSuite with
ExpressionEvalHelper {
val prettyString = ToPrettyString(child)
assert(prettyString.sql === child.sql)
}
+
+ test("Time as pretty strings") {
+ checkEvaluation(ToPrettyString(Literal(1000L, TimeType())), "00:00:00.001")
+ checkEvaluation(ToPrettyString(Literal(1L, TimeType())), "00:00:00.000001")
+ checkEvaluation(ToPrettyString(Literal(
+ (23 * 3600 + 59 * 60 + 59) * 1000000L, TimeType())), "23:59:59")
+ }
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala
index b2f3adac68e1..0b5d9a66c37c 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeTestUtils.scala
@@ -69,6 +69,10 @@ object DataTypeTestUtils {
YearMonthIntervalType(YEAR),
YearMonthIntervalType(MONTH))
+ val timeTypes: Seq[TimeType] = Seq(
+ TimeType(TimeType.MIN_PRECISION),
+ TimeType(TimeType.MAX_PRECISION))
+
val unsafeRowMutableFieldTypes: Seq[DataType] = Seq(
NullType,
BooleanType,
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]