This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new ed92a5cc3ea2 [SPARK-54609][SQL] Disable TIME type by default
ed92a5cc3ea2 is described below
commit ed92a5cc3ea21b9be442b642621e2b582013161a
Author: David Milicevic <[email protected]>
AuthorDate: Wed Dec 10 19:29:17 2025 -0800
[SPARK-54609][SQL] Disable TIME type by default
Introducing a new SQL config for TIME type: `spark.sql.timeType.enabled`.
The default value is `false` and it is enabled only in tests.
TIME data type support is not complete, so we need to guard it before it is
completed, especially ahead of Spark 4.1 release.
No.
Need to add tests for disabled config.
No.
Closes #53344 from davidm-db/davidm-db/time-config.
Lead-authored-by: David Milicevic <[email protected]>
Co-authored-by: Wenchen Fan <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit 18a9435f28e602e6adfcc7f484917f8b5bfb6b48)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../src/main/resources/error/error-conditions.json | 6 ++++
.../sql/catalyst/CatalystTypeConverters.scala | 5 +--
.../spark/sql/catalyst/expressions/Cast.scala | 8 ++++-
.../sql/catalyst/expressions/timeExpressions.scala | 38 +++++++++++++++-------
.../spark/sql/errors/QueryCompilationErrors.scala | 6 ++++
.../org/apache/spark/sql/internal/SQLConf.scala | 9 +++++
.../execution/SparkConnectPlanExecution.scala | 7 +++-
.../execution/datasources/DataSourceUtils.scala | 3 ++
sql/gen-sql-functions-docs.py | 2 ++
9 files changed, 69 insertions(+), 15 deletions(-)
diff --git a/common/utils/src/main/resources/error/error-conditions.json
b/common/utils/src/main/resources/error/error-conditions.json
index b79e44a7984a..dfaf8425a1a0 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -7141,6 +7141,12 @@
],
"sqlState" : "0A001"
},
+ "UNSUPPORTED_TIME_TYPE" : {
+ "message" : [
+ "The data type TIME is not supported."
+ ],
+ "sqlState" : "0A000"
+ },
"UNSUPPORTED_TYPED_LITERAL" : {
"message" : [
"Literals of the type <unsupportedType> are not supported. Supported
types are <supportedTypes>."
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
index 53803192cb3d..a90720ac5108 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
@@ -25,12 +25,11 @@ import java.time.{Duration, Instant, LocalDate,
LocalDateTime, LocalTime, Period
import java.util.{Map => JavaMap}
import javax.annotation.Nullable
-import scala.language.existentials
-
import org.apache.spark.SparkIllegalArgumentException
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.sql.types.DayTimeIntervalType._
@@ -79,6 +78,8 @@ object CatalystTypeConverters {
new GeometryConverter(g)
case DateType if SQLConf.get.datetimeJava8ApiEnabled =>
LocalDateConverter
case DateType => DateConverter
+ case _: TimeType if !SQLConf.get.isTimeTypeEnabled =>
+ QueryCompilationErrors.unsupportedTimeTypeError()
case _: TimeType => TimeConverter
case TimestampType if SQLConf.get.datetimeJava8ApiEnabled =>
InstantConverter
case TimestampType => TimestampConverter
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 1162a5394221..03b6a452fe7d 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.catalyst.util.DateTimeConstants._
import org.apache.spark.sql.catalyst.util.DateTimeUtils._
import
org.apache.spark.sql.catalyst.util.IntervalUtils.{dayTimeIntervalToByte,
dayTimeIntervalToDecimal, dayTimeIntervalToInt, dayTimeIntervalToLong,
dayTimeIntervalToShort, yearMonthIntervalToByte, yearMonthIntervalToInt,
yearMonthIntervalToShort}
-import org.apache.spark.sql.errors.{QueryErrorsBase, QueryExecutionErrors}
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase,
QueryExecutionErrors}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.{GeographyVal, UTF8String, VariantVal}
@@ -602,6 +602,12 @@ case class Cast(
}
override def checkInputDataTypes(): TypeCheckResult = {
+ dataType match {
+ // If the cast is to a TIME type, first check if TIME type is enabled.
+ case _: TimeType if !SQLConf.get.isTimeTypeEnabled =>
+ throw QueryCompilationErrors.unsupportedTimeTypeError()
+ case _ =>
+ }
val canCast = evalMode match {
case EvalMode.LEGACY => Cast.canCast(child.dataType, dataType)
case EvalMode.ANSI => Cast.canAnsiCast(child.dataType, dataType)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala
index ff088876969b..692dd5b1f398 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala
@@ -32,11 +32,22 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.catalyst.util.TimeFormatter
import org.apache.spark.sql.catalyst.util.TypeUtils.ordinalNumber
import org.apache.spark.sql.errors.{QueryCompilationErrors,
QueryExecutionErrors}
+import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.types.StringTypeWithCollation
import org.apache.spark.sql.types.{AbstractDataType, AnyTimeType, ByteType,
DataType, DayTimeIntervalType, DecimalType, IntegerType, LongType, ObjectType,
TimeType}
import org.apache.spark.sql.types.DayTimeIntervalType.{HOUR, SECOND}
import org.apache.spark.unsafe.types.UTF8String
+trait TimeExpression extends Expression {
+ override def checkInputDataTypes(): TypeCheckResult = {
+ if (SQLConf.get.isTimeTypeEnabled) {
+ super.checkInputDataTypes()
+ } else {
+ throw QueryCompilationErrors.unsupportedTimeTypeError()
+ }
+ }
+}
+
/**
* Parses a column to a time based on the given format.
*/
@@ -64,7 +75,7 @@ import org.apache.spark.unsafe.types.UTF8String
since = "4.1.0")
// scalastyle:on line.size.limit
case class ToTime(str: Expression, format: Option[Expression])
- extends RuntimeReplaceable with ExpectsInputTypes {
+ extends RuntimeReplaceable with ExpectsInputTypes with TimeExpression {
def this(str: Expression, format: Expression) = this(str, Option(format))
def this(str: Expression) = this(str, None)
@@ -200,7 +211,7 @@ object TryToTimeExpressionBuilder extends ExpressionBuilder
{
// scalastyle:on line.size.limit
case class MinutesOfTime(child: Expression)
extends RuntimeReplaceable
- with ExpectsInputTypes {
+ with ExpectsInputTypes with TimeExpression {
override def replacement: Expression = StaticInvoke(
classOf[DateTimeUtils.type],
@@ -259,7 +270,7 @@ object MinuteExpressionBuilder extends ExpressionBuilder {
case class HoursOfTime(child: Expression)
extends RuntimeReplaceable
- with ExpectsInputTypes {
+ with ExpectsInputTypes with TimeExpression {
override def replacement: Expression = StaticInvoke(
classOf[DateTimeUtils.type],
@@ -316,7 +327,7 @@ object HourExpressionBuilder extends ExpressionBuilder {
case class SecondsOfTimeWithFraction(child: Expression)
extends RuntimeReplaceable
- with ExpectsInputTypes {
+ with ExpectsInputTypes with TimeExpression {
override def replacement: Expression = {
val precision = child.dataType match {
case TimeType(p) => p
@@ -342,7 +353,7 @@ case class SecondsOfTimeWithFraction(child: Expression)
case class SecondsOfTime(child: Expression)
extends RuntimeReplaceable
- with ExpectsInputTypes {
+ with ExpectsInputTypes with TimeExpression {
override def replacement: Expression = StaticInvoke(
classOf[DateTimeUtils.type],
@@ -433,7 +444,8 @@ object SecondExpressionBuilder extends ExpressionBuilder {
case class CurrentTime(
child: Expression = Literal(TimeType.MICROS_PRECISION),
timeZoneId: Option[String] = None) extends UnaryExpression
- with TimeZoneAwareExpression with ImplicitCastInputTypes with
CodegenFallback {
+ with TimeZoneAwareExpression with ImplicitCastInputTypes with CodegenFallback
+ with TimeExpression {
def this() = {
this(Literal(TimeType.MICROS_PRECISION), None)
@@ -545,7 +557,7 @@ case class MakeTime(
secsAndMicros: Expression)
extends RuntimeReplaceable
with ImplicitCastInputTypes
- with ExpectsInputTypes {
+ with ExpectsInputTypes with TimeExpression {
// Accept `sec` as DecimalType to avoid loosing precision of microseconds
while converting
// it to the fractional part of `sec`. If `sec` is an IntegerType, it can be
cast into decimal
@@ -570,7 +582,8 @@ case class MakeTime(
* Adds day-time interval to time.
*/
case class TimeAddInterval(time: Expression, interval: Expression)
- extends BinaryExpression with RuntimeReplaceable with ExpectsInputTypes {
+ extends BinaryExpression with RuntimeReplaceable with ExpectsInputTypes
+ with TimeExpression {
override def nullIntolerant: Boolean = true
override def left: Expression = time
@@ -611,7 +624,8 @@ case class TimeAddInterval(time: Expression, interval:
Expression)
* Returns a day-time interval between time values.
*/
case class SubtractTimes(left: Expression, right: Expression)
- extends BinaryExpression with RuntimeReplaceable with ExpectsInputTypes {
+ extends BinaryExpression with RuntimeReplaceable with ExpectsInputTypes
+ with TimeExpression {
override def nullIntolerant: Boolean = true
override def inputTypes: Seq[AbstractDataType] = Seq(AnyTimeType,
AnyTimeType)
@@ -668,7 +682,8 @@ case class TimeDiff(
end: Expression)
extends TernaryExpression
with RuntimeReplaceable
- with ImplicitCastInputTypes {
+ with ImplicitCastInputTypes
+ with TimeExpression {
override def first: Expression = unit
override def second: Expression = start
@@ -723,7 +738,8 @@ case class TimeDiff(
since = "4.1.0")
// scalastyle:on line.size.limit
case class TimeTrunc(unit: Expression, time: Expression)
- extends BinaryExpression with RuntimeReplaceable with ImplicitCastInputTypes
{
+ extends BinaryExpression with RuntimeReplaceable with ImplicitCastInputTypes
+ with TimeExpression {
override def left: Expression = unit
override def right: Expression = time
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 77b775f6c49f..1cee321846a4 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -4492,4 +4492,10 @@ private[sql] object QueryCompilationErrors extends
QueryErrorsBase with Compilat
"colType" -> "metadata",
"errors" -> errors.mkString("- ", "\n- ", "")))
}
+
+ def unsupportedTimeTypeError(): Throwable = {
+ new AnalysisException(
+ errorClass = "UNSUPPORTED_TIME_TYPE",
+ messageParameters = Map.empty)
+ }
}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 55c5eab5dda0..36febe67f409 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -6621,6 +6621,13 @@ object SQLConf {
.booleanConf
.createWithDefault(false)
+ val TIME_TYPE_ENABLED =
+ buildConf("spark.sql.timeType.enabled")
+ .doc("When true, the TIME data type is supported.")
+ .version("4.1.0")
+ .booleanConf
+ .createWithDefault(Utils.isTesting)
+
/**
* Holds information about keys that have been deprecated.
*
@@ -7781,6 +7788,8 @@ class SQLConf extends Serializable with Logging with
SqlApiConf {
def coerceMergeNestedTypes: Boolean =
getConf(SQLConf.MERGE_INTO_NESTED_TYPE_COERCION_ENABLED)
+ def isTimeTypeEnabled: Boolean = getConf(SQLConf.TIME_TYPE_ENABLED)
+
/** ********************** SQLConf functionality methods ************ */
/** Set Spark SQL configuration properties. */
diff --git
a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala
b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala
index dc20af8e3700..e3c392cd2a2c 100644
---
a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala
+++
b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala
@@ -37,10 +37,11 @@ import
org.apache.spark.sql.connect.config.Connect.{CONNECT_GRPC_ARROW_MAX_BATCH
import org.apache.spark.sql.connect.planner.{InvalidInputErrors,
SparkConnectPlanner}
import org.apache.spark.sql.connect.service.ExecuteHolder
import org.apache.spark.sql.connect.utils.MetricGenerator
+import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.execution.{DoNotCleanup, LocalTableScanExec,
QueryExecution, RemoveShuffleFiles, SkipMigration, SQLExecution}
import org.apache.spark.sql.execution.arrow.ArrowConverters
import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{DataType, StructType}
+import org.apache.spark.sql.types.{DataType, StructType, TimeType}
import org.apache.spark.util.ThreadUtils
/**
@@ -133,6 +134,10 @@ private[execution] class
SparkConnectPlanExecution(executeHolder: ExecuteHolder)
errorClass = "UNSUPPORTED_FEATURE.GEOSPATIAL_DISABLED",
messageParameters = scala.collection.immutable.Map.empty)
}
+ val timeTypeEnabled = spark.sessionState.conf.isTimeTypeEnabled
+ if (!timeTypeEnabled &&
schema.existsRecursively(_.isInstanceOf[TimeType])) {
+ throw QueryCompilationErrors.unsupportedTimeTypeError()
+ }
val maxRecordsPerBatch = spark.sessionState.conf.arrowMaxRecordsPerBatch
val timeZoneId = spark.sessionState.conf.sessionLocalTimeZone
val largeVarTypes = spark.sessionState.conf.arrowUseLargeVarTypes
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
index d43c9eab0a5b..fb5b605bab01 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
@@ -93,6 +93,9 @@ object DataSourceUtils extends PredicateHelper {
* in a driver side.
*/
def verifySchema(format: FileFormat, schema: StructType, readOnly: Boolean =
false): Unit = {
+ if (!SQLConf.get.isTimeTypeEnabled &&
schema.existsRecursively(_.isInstanceOf[TimeType])) {
+ throw QueryCompilationErrors.unsupportedTimeTypeError()
+ }
schema.foreach { field =>
val supported = if (readOnly) {
format.supportReadDataType(field.dataType)
diff --git a/sql/gen-sql-functions-docs.py b/sql/gen-sql-functions-docs.py
index a1facbaaf7e3..b49124ece086 100644
--- a/sql/gen-sql-functions-docs.py
+++ b/sql/gen-sql-functions-docs.py
@@ -240,6 +240,8 @@ def generate_functions_examples_html(jvm, jspark,
html_output_dir):
</pre></div>
"""
+ print("Enabling TIME data type")
+ jspark.sql("SET spark.sql.timeType.enabled = true")
print("Running SQL examples to generate formatted output.")
for key, infos in _list_grouped_function_infos(jvm):
examples = _make_pretty_examples(jspark, infos)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]