This is an automated email from the ASF dual-hosted git repository. hvanhovell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 2a4aab7cc9c [SPARK-42538][CONNECT] Make `sql.functions#lit` function support more types 2a4aab7cc9c is described below commit 2a4aab7cc9cdd19e0889dc9577f16033991fda3e Author: yangjie01 <yangji...@baidu.com> AuthorDate: Sat Feb 25 14:08:56 2023 -0400 [SPARK-42538][CONNECT] Make `sql.functions#lit` function support more types ### What changes were proposed in this pull request? This pr aims add more types support of `sql.functions#lit` function, include: - Decimal - Instant - Timestamp - LocalDateTime - Date - Duration - Period - CalendarInterval ### Why are the changes needed? Make ·sql.functions#lit· function support more types ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Add new test - Manual checked new case with Scala-2.13 Closes #40143 from LuciferYang/functions-lit. Authored-by: yangjie01 <yangji...@baidu.com> Signed-off-by: Herman van Hovell <her...@databricks.com> --- .../scala/org/apache/spark/sql/functions.scala | 24 ++++++++++- .../apache/spark/sql/PlanGenerationTestSuite.scala | 12 +++++- .../explain-results/function_lit.explain | 2 +- .../query-tests/queries/function_lit.json | 47 +++++++++++++++++++++ .../query-tests/queries/function_lit.proto.bin | Bin 262 -> 367 bytes 5 files changed, 81 insertions(+), 4 deletions(-) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala index dd2380e8bc4..94882087eee 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala @@ -17,7 +17,8 @@ package org.apache.spark.sql import java.math.{BigDecimal => JBigDecimal} -import java.time.LocalDate +import java.sql.{Date, Timestamp} +import java.time.{Duration, Instant, LocalDate, LocalDateTime, Period} import java.util.Collections import scala.collection.JavaConverters._ @@ -26,10 +27,12 @@ import scala.reflect.runtime.universe.{typeTag, TypeTag} import com.google.protobuf.ByteString import org.apache.spark.connect.proto +import org.apache.spark.sql.catalyst.util.{DateTimeUtils, IntervalUtils} import org.apache.spark.sql.connect.client.unsupported import org.apache.spark.sql.expressions.{ScalarUserDefinedFunction, UserDefinedFunction} -import org.apache.spark.sql.types.{DataType, StructType} +import org.apache.spark.sql.types.{DataType, Decimal, StructType} import org.apache.spark.sql.types.DataType.parseTypeWithFallback +import org.apache.spark.unsafe.types.CalendarInterval /** * Commonly used functions available for DataFrame operations. Using functions defined here @@ -104,6 +107,14 @@ object functions { .setValue(value) } + private def createCalendarIntervalLiteral(months: Int, days: Int, microseconds: Long): Column = + createLiteral { builder => + builder.getCalendarIntervalBuilder + .setMonths(months) + .setDays(days) + .setMicroseconds(microseconds) + } + private val nullType = proto.DataType.newBuilder().setNull(proto.DataType.NULL.getDefaultInstance).build() @@ -136,6 +147,15 @@ object functions { case v: Array[Byte] => createLiteral(_.setBinary(ByteString.copyFrom(v))) case v: collection.mutable.WrappedArray[_] => lit(v.array) case v: LocalDate => createLiteral(_.setDate(v.toEpochDay.toInt)) + case v: Decimal => createDecimalLiteral(Math.max(v.precision, v.scale), v.scale, v.toString) + case v: Instant => createLiteral(_.setTimestamp(DateTimeUtils.instantToMicros(v))) + case v: Timestamp => createLiteral(_.setTimestamp(DateTimeUtils.fromJavaTimestamp(v))) + case v: LocalDateTime => + createLiteral(_.setTimestampNtz(DateTimeUtils.localDateTimeToMicros(v))) + case v: Date => createLiteral(_.setDate(DateTimeUtils.fromJavaDate(v))) + case v: Duration => createLiteral(_.setDayTimeInterval(IntervalUtils.durationToMicros(v))) + case v: Period => createLiteral(_.setYearMonthInterval(IntervalUtils.periodToMonths(v))) + case v: CalendarInterval => createCalendarIntervalLiteral(v.months, v.days, v.microseconds) case null => createLiteral(_.setNull(nullType)) case _ => unsupported(s"literal $literal not supported (yet).") } diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala index dc8097f44e5..48d5f0cb409 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala @@ -35,6 +35,7 @@ import org.apache.spark.sql.connect.client.util.ConnectFunSuite import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.functions.lit import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.CalendarInterval // scalastyle:off /** @@ -1971,7 +1972,16 @@ class PlanGenerationTestSuite fn.lit(Array.tabulate(10)(i => ('A' + i).toChar)), fn.lit(Array.tabulate(23)(i => (i + 120).toByte)), fn.lit(mutable.WrappedArray.make(Array[Byte](8.toByte, 6.toByte))), - fn.lit(java.time.LocalDate.of(2020, 10, 10))) + fn.lit(null), + fn.lit(java.time.LocalDate.of(2020, 10, 10)), + fn.lit(Decimal.apply(BigDecimal(8997620, 6))), + fn.lit(java.time.Instant.ofEpochMilli(1677155519808L)), + fn.lit(new java.sql.Timestamp(12345L)), + fn.lit(java.time.LocalDateTime.of(2023, 2, 23, 20, 36)), + fn.lit(java.sql.Date.valueOf("2023-02-23")), + fn.lit(java.time.Duration.ofSeconds(200L)), + fn.lit(java.time.Period.ofDays(100)), + fn.lit(new CalendarInterval(2, 20, 100L))) } /* Window API */ diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_lit.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_lit.explain index 11454267c37..7f093f9df13 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_lit.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_lit.explain @@ -1,2 +1,2 @@ -Project [id#0L, id#0L, true AS true#0, 68 AS 68#0, 9872 AS 9872#0, -8726532 AS -8726532#0, 7834609328726532 AS 7834609328726532#0L, 2.718281828459045 AS 2.718281828459045#0, -0.8 AS -0.8#0, 89.97620 AS 89.97620#0, 89889.7667231 AS 89889.7667231#0, connect! AS connect!#0, T AS T#0, ABCDEFGHIJ AS ABCDEFGHIJ#0, 0x78797A7B7C7D7E7F808182838485868788898A8B8C8D8E AS X'78797A7B7C7D7E7F808182838485868788898A8B8C8D8E'#0, 0x0806 AS X'0806'#0, 2020-10-10 AS DATE '2020-10-10'#0] +Project [id#0L, id#0L, true AS true#0, 68 AS 68#0, 9872 AS 9872#0, -8726532 AS -8726532#0, 7834609328726532 AS 7834609328726532#0L, 2.718281828459045 AS 2.718281828459045#0, -0.8 AS -0.8#0, 89.97620 AS 89.97620#0, 89889.7667231 AS 89889.7667231#0, connect! AS connect!#0, T AS T#0, ABCDEFGHIJ AS ABCDEFGHIJ#0, 0x78797A7B7C7D7E7F808182838485868788898A8B8C8D8E AS X'78797A7B7C7D7E7F808182838485868788898A8B8C8D8E'#0, 0x0806 AS X'0806'#0, null AS NULL#0, 2020-10-10 AS DATE '2020-10-10'#0, 8.997 [...] +- LocalRelation <empty>, [id#0L, a#0, b#0] diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lit.json b/connector/connect/common/src/test/resources/query-tests/queries/function_lit.json index 3cb1f421207..03924866a26 100644 --- a/connector/connect/common/src/test/resources/query-tests/queries/function_lit.json +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_lit.json @@ -83,10 +83,57 @@ "literal": { "binary": "CAY=" } + }, { + "literal": { + "null": { + "null": { + } + } + } }, { "literal": { "date": 18545 } + }, { + "literal": { + "decimal": { + "value": "8.997620", + "precision": 7, + "scale": 6 + } + } + }, { + "literal": { + "timestamp": "1677155519808000" + } + }, { + "literal": { + "timestamp": "12345000" + } + }, { + "literal": { + "timestampNtz": "1677184560000000" + } + }, { + "literal": { + "date": 19411 + } + }, { + "literal": { + "dayTimeInterval": "200000000" + } + }, { + "literal": { + "yearMonthInterval": 0 + } + }, { + "literal": { + "calendarInterval": { + "months": 2, + "days": 20, + "microseconds": "100" + } + } }] } } \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lit.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_lit.proto.bin index 9149628d7a3..fc86c71e28c 100644 Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_lit.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_lit.proto.bin differ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org