This is an automated email from the ASF dual-hosted git repository.
MaxGekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 4bbf75e9e672 [SPARK-56965][SQL] Add SQL parser support for
TIMESTAMP_NTZ(p) and TIMESTAMP_LTZ(p)
4bbf75e9e672 is described below
commit 4bbf75e9e672ccbcf762f5c7258be501b0ea7f5a
Author: Stevo Mitric <[email protected]>
AuthorDate: Sat May 23 08:54:25 2026 +0200
[SPARK-56965][SQL] Add SQL parser support for TIMESTAMP_NTZ(p) and
TIMESTAMP_LTZ(p)
### What changes were proposed in this pull request?
Adds SQL parser support for parameterized nanosecond-precision timestamp
types introduced in SPARK-56876. The parser now accepts:
- TIMESTAMP_NTZ(p) -> TimestampNTZNanosType(p)
- TIMESTAMP_LTZ(p) -> TimestampLTZNanosType(p)
- TIMESTAMP(p) WITHOUT TIME ZONE (alias for TIMESTAMP_NTZ(p))
- TIMESTAMP(p) WITH LOCAL TIME ZONE (alias for TIMESTAMP_LTZ(p))
- TIMESTAMP(p) (resolves via spark.sql.timestampType session default)
with `p` from `[7, 9]`. Out-of-range precision throws
`INVALID_TIMESTAMP_PRECISION`; negative precision is rejected by the grammar as
`PARSE_SYNTAX_ERROR`.
The new syntax is gated behind a new internal preview flag
`spark.sql.timestampNanosTypes.enabled` (default false).
Unparameterized TIMESTAMP, TIMESTAMP_NTZ, TIMESTAMP_LTZ, and the
WITH/WITHOUT TIME ZONE variants continue to return the existing microsecond
types - no behavior change.
Part of SPIP SPARK-56822
(https://issues.apache.org/jira/browse/SPARK-56822).
### Why are the changes needed?
SPARK-56876 added TimestampNTZNanosType / TimestampLTZNanosType to the type
system but explicitly left out SQL/DDL integration - users cannot declare these
types in CREATE TABLE, CAST, or Column.cast(String) today. This PR is the
parser sub-task of the SPIP and wires those spellings through
DataTypeAstBuilder, behind a preview flag so the surface is opt-in until the
cast/runtime sub-tasks land.
### Does this PR introduce _any_ user-facing change?
Yes made the parser changes to allow timestamps with precision.
### How was this patch tested?
Extended DataTypeParserSuite
### Was this patch authored or co-authored using generative AI tooling?
Generated-by: Claude Opus 4.7
Closes #56041 from stevomitric/stevomitric/add-parser-support.
Authored-by: Stevo Mitric <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
.../spark/sql/catalyst/parser/SqlBaseParser.g4 | 6 +-
.../sql/catalyst/parser/DataTypeAstBuilder.scala | 61 ++++++++-
.../apache/spark/sql/errors/DataTypeErrors.scala | 13 ++
.../org/apache/spark/sql/internal/SqlApiConf.scala | 2 +
.../org/apache/spark/sql/types/DataType.scala | 2 +
.../org/apache/spark/sql/internal/SQLConf.scala | 15 +++
.../sql/catalyst/parser/DataTypeParserSuite.scala | 116 +++++++++++++++++
.../org/apache/spark/sql/types/DataTypeSuite.scala | 143 +++++++++++++--------
8 files changed, 292 insertions(+), 66 deletions(-)
diff --git
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 5761028f6023..744c472b2017 100644
---
a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++
b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -1483,7 +1483,10 @@ nonTrivialPrimitiveType
| INTERVAL
(fromYearMonth=(YEAR | MONTH) (TO to=MONTH)? |
fromDayTime=(DAY | HOUR | MINUTE | SECOND) (TO to=(HOUR | MINUTE |
SECOND))?)?
- | TIMESTAMP (withLocalTimeZone | withoutTimeZone)?
+ | TIMESTAMP (LEFT_PAREN precision=integerValue RIGHT_PAREN)?
+ (withLocalTimeZone | withoutTimeZone)?
+ | TIMESTAMP_LTZ (LEFT_PAREN precision=integerValue RIGHT_PAREN)?
+ | TIMESTAMP_NTZ (LEFT_PAREN precision=integerValue RIGHT_PAREN)?
| TIME (LEFT_PAREN precision=integerValue RIGHT_PAREN)? (withoutTimeZone)?
| GEOGRAPHY LEFT_PAREN (srid=integerValue | any=ANY) RIGHT_PAREN
| GEOMETRY LEFT_PAREN (srid=integerValue | any=ANY) RIGHT_PAREN
@@ -1498,7 +1501,6 @@ trivialPrimitiveType
| FLOAT | REAL
| DOUBLE
| DATE
- | TIMESTAMP_LTZ | TIMESTAMP_NTZ
| BINARY
| VOID
| VARIANT
diff --git
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
index 43b93f8f3d06..9de6aceb757b 100644
---
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
+++
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
@@ -28,9 +28,9 @@ import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
import org.apache.spark.sql.catalyst.util.CollationFactory
import org.apache.spark.sql.catalyst.util.SparkParserUtils.{string, withOrigin}
import org.apache.spark.sql.connector.catalog.IdentityColumnSpec
-import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryParsingErrors}
+import org.apache.spark.sql.errors.{DataTypeErrors, DataTypeErrorsBase,
QueryParsingErrors}
import org.apache.spark.sql.internal.SqlApiConf
-import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType,
ByteType, CalendarIntervalType, CharType, DataType, DateType,
DayTimeIntervalType, DecimalType, DoubleType, FloatType, GeographyType,
GeometryType, IntegerType, LongType, MapType, MetadataBuilder, NullType,
ShortType, StringType, StructField, StructType, TimestampNTZType,
TimestampType, TimeType, VarcharType, VariantType, YearMonthIntervalType}
+import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType,
ByteType, CalendarIntervalType, CharType, DataType, DateType,
DayTimeIntervalType, DecimalType, DoubleType, FloatType, GeographyType,
GeometryType, IntegerType, LongType, MapType, MetadataBuilder, NullType,
ShortType, StringType, StructField, StructType, TimestampLTZNanosType,
TimestampNTZNanosType, TimestampNTZType, TimestampType, TimeType, VarcharType,
VariantType, YearMonthIntervalType}
/**
* AST builder for parsing data type definitions and table schemas.
@@ -350,11 +350,42 @@ class DataTypeAstBuilder extends
SqlBaseParserBaseVisitor[AnyRef] with DataTypeE
CalendarIntervalType
}
case TIMESTAMP if currentCtx.withLocalTimeZone() != null =>
- TimestampType
+ if (currentCtx.precision == null) {
+ TimestampType
+ } else {
+ parseTimestampLtzNanosPrecision(currentCtx.precision.getText)
+ }
case TIMESTAMP if currentCtx.withoutTimeZone() != null =>
- TimestampNTZType
+ if (currentCtx.precision == null) {
+ TimestampNTZType
+ } else {
+ parseTimestampNtzNanosPrecision(currentCtx.precision.getText)
+ }
case TIMESTAMP =>
- SqlApiConf.get.timestampType
+ if (currentCtx.precision == null) {
+ SqlApiConf.get.timestampType
+ } else {
+ SqlApiConf.get.timestampType match {
+ case TimestampType =>
+ parseTimestampLtzNanosPrecision(currentCtx.precision.getText)
+ case TimestampNTZType =>
+ parseTimestampNtzNanosPrecision(currentCtx.precision.getText)
+ case other =>
+ throw SparkException.internalError(s"Unexpected default
timestamp type: $other")
+ }
+ }
+ case TIMESTAMP_LTZ =>
+ if (currentCtx.precision == null) {
+ TimestampType
+ } else {
+ parseTimestampLtzNanosPrecision(currentCtx.precision.getText)
+ }
+ case TIMESTAMP_NTZ =>
+ if (currentCtx.precision == null) {
+ TimestampNTZType
+ } else {
+ parseTimestampNtzNanosPrecision(currentCtx.precision.getText)
+ }
case TIME =>
val precision = if (currentCtx.precision == null) {
TimeType.DEFAULT_PRECISION
@@ -398,8 +429,6 @@ class DataTypeAstBuilder extends
SqlBaseParserBaseVisitor[AnyRef] with DataTypeE
case FLOAT | REAL => FloatType
case DOUBLE => DoubleType
case DATE => DateType
- case TIMESTAMP_LTZ => TimestampType
- case TIMESTAMP_NTZ => TimestampNTZType
case BINARY => BinaryType
case VOID => NullType
case VARIANT => VariantType
@@ -448,6 +477,24 @@ class DataTypeAstBuilder extends
SqlBaseParserBaseVisitor[AnyRef] with DataTypeE
}
}
+ private def parseTimestampLtzNanosPrecision(precision: String):
TimestampLTZNanosType = {
+ DataTypeErrors.checkTimestampNanosTypesEnabled()
+ try TimestampLTZNanosType(precision.toInt)
+ catch {
+ case _: NumberFormatException =>
+ throw DataTypeErrors.invalidTimestampPrecisionError(precision,
"TIMESTAMP_LTZ")
+ }
+ }
+
+ private def parseTimestampNtzNanosPrecision(precision: String):
TimestampNTZNanosType = {
+ DataTypeErrors.checkTimestampNanosTypesEnabled()
+ try TimestampNTZNanosType(precision.toInt)
+ catch {
+ case _: NumberFormatException =>
+ throw DataTypeErrors.invalidTimestampPrecisionError(precision,
"TIMESTAMP_NTZ")
+ }
+ }
+
/**
* Create a complex DataType. Arrays, Maps and Structures are supported.
*/
diff --git
a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala
b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala
index 6e8cb8077be8..b89da2c246a7 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.trees.Origin
import org.apache.spark.sql.catalyst.util.QuotingUtils
import org.apache.spark.sql.catalyst.util.QuotingUtils.toSQLSchema
+import org.apache.spark.sql.internal.SqlApiConf
import org.apache.spark.sql.types.{DataType, Decimal, StringType}
import org.apache.spark.unsafe.types.UTF8String
@@ -282,4 +283,16 @@ private[sql] object DataTypeErrors extends
DataTypeErrorsBase {
messageParameters = Map("precision" -> precision, "type" -> typeName),
cause = null)
}
+
+ def checkTimestampNanosTypesEnabled(): Unit = {
+ if (!SqlApiConf.get.timestampNanosTypesEnabled) {
+ throw new SparkException(
+ errorClass = "FEATURE_NOT_ENABLED",
+ messageParameters = Map(
+ "featureName" -> "Nanosecond-precision timestamp types",
+ "configKey" -> "spark.sql.timestampNanosTypes.enabled",
+ "configValue" -> "true"),
+ cause = null)
+ }
+ }
}
diff --git
a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala
b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala
index bedd4afe0ed5..6bd747c74399 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala
@@ -54,6 +54,7 @@ private[sql] trait SqlApiConf {
def legacyParameterSubstitutionConstantsOnly: Boolean
def legacyIdentifierClauseOnly: Boolean
def typesFrameworkEnabled: Boolean
+ def timestampNanosTypesEnabled: Boolean
}
private[sql] object SqlApiConf {
@@ -112,4 +113,5 @@ private[sql] object DefaultSqlApiConf extends SqlApiConf {
override def legacyParameterSubstitutionConstantsOnly: Boolean = false
override def legacyIdentifierClauseOnly: Boolean = false
override def typesFrameworkEnabled: Boolean = false
+ override def timestampNanosTypesEnabled: Boolean = false
}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
index fbd70cf8b899..c1d1430bacee 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -236,12 +236,14 @@ object DataType {
// For backwards compatibility, previously the type name of NullType is
"null"
case "null" => NullType
case TIMESTAMP_LTZ_NANOS_TYPE(precision) =>
+ DataTypeErrors.checkTimestampNanosTypesEnabled()
try TimestampLTZNanosType(precision.toInt)
catch {
case _: NumberFormatException =>
throw DataTypeErrors.invalidTimestampPrecisionError(precision,
"TIMESTAMP_LTZ")
}
case TIMESTAMP_NTZ_NANOS_TYPE(precision) =>
+ DataTypeErrors.checkTimestampNanosTypesEnabled()
try TimestampNTZNanosType(precision.toInt)
catch {
case _: NumberFormatException =>
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 6a2f9ad17b8b..270b8aa31a56 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -644,6 +644,19 @@ object SQLConf {
.booleanConf
.createWithDefaultFunction(() => Utils.isTesting)
+ val TIMESTAMP_NANOS_TYPES_ENABLED =
+ buildConf("spark.sql.timestampNanosTypes.enabled")
+ .internal()
+ .doc("When true, the parameterized nanosecond-precision timestamp types
" +
+ "TIMESTAMP_NTZ(p) / TIMESTAMP_LTZ(p) for p in [7, 9] are recognized as
" +
+ "Spark SQL data types at user-facing entry points. Default is false
because " +
+ "downstream execution paths (Cast, PhysicalDataType, AnyTimestampType,
encoders, " +
+ "Connect proto) are not yet wired for these types. See SPARK-56822.")
+ .version("4.2.0")
+ .withBindingPolicy(ConfigBindingPolicy.SESSION)
+ .booleanConf
+ .createWithDefault(false)
+
val EXTENDED_EXPLAIN_PROVIDERS =
buildConf("spark.sql.extendedExplainProviders")
.doc("A comma-separated list of classes that implement the" +
" org.apache.spark.sql.ExtendedExplainGenerator trait. If provided,
Spark will print" +
@@ -7560,6 +7573,8 @@ class SQLConf extends Serializable with Logging with
SqlApiConf {
def typesFrameworkEnabled: Boolean = getConf(TYPES_FRAMEWORK_ENABLED)
+ def timestampNanosTypesEnabled: Boolean =
getConf(TIMESTAMP_NANOS_TYPES_ENABLED)
+
def dataSourceV2JoinPushdown: Boolean = getConf(DATA_SOURCE_V2_JOIN_PUSHDOWN)
def dynamicPartitionPruningEnabled: Boolean =
getConf(DYNAMIC_PARTITION_PRUNING_ENABLED)
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
index 03dbf0a28663..b55ed2b9c18a 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
@@ -161,14 +161,81 @@ class DataTypeParserSuite extends SparkFunSuite with
SQLHelper {
assert(parse("timestamp") === TimestampNTZType)
assert(parse("timestamp with local time zone") === TimestampType)
assert(parse("timestamp without time zone") === TimestampNTZType)
+ withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") {
+ assert(parse("timestamp(9)") === TimestampNTZNanosType(9))
+ // Bare TIMESTAMP(p) routes through SqlApiConf.get.timestampType, so an
+ // out-of-range precision must surface as the NTZ error here.
+ Seq("6", "10").foreach { p =>
+ checkError(
+ exception = intercept[SparkException] {
+ CatalystSqlParser.parseDataType(s"timestamp($p)")
+ },
+ condition = "INVALID_TIMESTAMP_PRECISION",
+ parameters = Map("precision" -> p, "type" -> "TIMESTAMP_NTZ"))
+ }
+ }
}
withSQLConf(SQLConf.TIMESTAMP_TYPE.key ->
TimestampTypes.TIMESTAMP_LTZ.toString) {
assert(parse("timestamp") === TimestampType)
assert(parse("timestamp with local time zone") === TimestampType)
assert(parse("timestamp without time zone") === TimestampNTZType)
+ withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") {
+ assert(parse("timestamp(9)") === TimestampLTZNanosType(9))
+ // Bare TIMESTAMP(p) under LTZ default must surface as the LTZ error.
+ Seq("6", "10").foreach { p =>
+ checkError(
+ exception = intercept[SparkException] {
+ CatalystSqlParser.parseDataType(s"timestamp($p)")
+ },
+ condition = "INVALID_TIMESTAMP_PRECISION",
+ parameters = Map("precision" -> p, "type" -> "TIMESTAMP_LTZ"))
+ }
+ }
}
}
+ test("parse nanos timestamp types when the preview flag is enabled") {
+ withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") {
+ assert(parse("TIMESTAMP_NTZ(7)") === TimestampNTZNanosType(7))
+ assert(parse("TIMESTAMP_NTZ(8)") === TimestampNTZNanosType(8))
+ assert(parse("TIMESTAMP_NTZ(9)") === TimestampNTZNanosType(9))
+ assert(parse("TIMESTAMP_LTZ(7)") === TimestampLTZNanosType(7))
+ assert(parse("TIMESTAMP_LTZ(8)") === TimestampLTZNanosType(8))
+ assert(parse("TIMESTAMP_LTZ(9)") === TimestampLTZNanosType(9))
+ assert(parse("Timestamp_Ntz(9)") === TimestampNTZNanosType(9))
+ assert(parse("timestamp_ltz(7)") === TimestampLTZNanosType(7))
+ assert(parse("TIMESTAMP(9) WITHOUT TIME ZONE") ===
TimestampNTZNanosType(9))
+ assert(parse("TIMESTAMP(7) WITH LOCAL TIME ZONE") ===
TimestampLTZNanosType(7))
+ assert(parse("timestamp(8) without time zone") ===
TimestampNTZNanosType(8))
+ assert(parse("timestamp(8) with local time zone") ===
TimestampLTZNanosType(8))
+ }
+ }
+
+ test("nanos timestamp parser surface is gated by SQL conf, disabled by
default") {
+ val gatedSpellings = Seq(
+ "TIMESTAMP_NTZ(7)",
+ "TIMESTAMP_LTZ(9)",
+ "TIMESTAMP(9) WITHOUT TIME ZONE",
+ "TIMESTAMP(9) WITH LOCAL TIME ZONE",
+ "TIMESTAMP(9)")
+ gatedSpellings.foreach { spelling =>
+ checkError(
+ exception = intercept[SparkException] {
+ CatalystSqlParser.parseDataType(spelling)
+ },
+ condition = "FEATURE_NOT_ENABLED",
+ parameters = Map(
+ "featureName" -> "Nanosecond-precision timestamp types",
+ "configKey" -> "spark.sql.timestampNanosTypes.enabled",
+ "configValue" -> "true"))
+ }
+ // Bare unparameterized forms remain accepted even with the gate off.
+ assert(parse("TIMESTAMP_NTZ") === TimestampNTZType)
+ assert(parse("TIMESTAMP_LTZ") === TimestampType)
+ assert(parse("TIMESTAMP WITHOUT TIME ZONE") === TimestampNTZType)
+ assert(parse("TIMESTAMP WITH LOCAL TIME ZONE") === TimestampType)
+ }
+
// DataType parser accepts certain reserved keywords.
checkDataType(
"Struct<TABLE: string, DATE:boolean>",
@@ -241,4 +308,53 @@ class DataTypeParserSuite extends SparkFunSuite with
SQLHelper {
condition = "PARSE_SYNTAX_ERROR",
parameters = Map("error" -> "'WITH'", "hint" -> ""))
}
+
+ test("invalid precision of the nanos timestamp data type") {
+ withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") {
+ Seq("TIMESTAMP_NTZ" -> "TIMESTAMP_NTZ", "TIMESTAMP_LTZ" ->
"TIMESTAMP_LTZ").foreach {
+ case (spelling, errorType) =>
+ Seq(0, 1, 6, 10, 99).foreach { p =>
+ checkError(
+ exception = intercept[SparkException] {
+ CatalystSqlParser.parseDataType(s"$spelling($p)")
+ },
+ condition = "INVALID_TIMESTAMP_PRECISION",
+ parameters = Map("precision" -> p.toString, "type" -> errorType))
+ }
+ }
+ // Integer overflow: regex matches but Int.parseInt fails. Original
digits are preserved.
+ checkError(
+ exception = intercept[SparkException] {
+ CatalystSqlParser.parseDataType("TIMESTAMP_NTZ(99999999999)")
+ },
+ condition = "INVALID_TIMESTAMP_PRECISION",
+ parameters = Map("precision" -> "99999999999", "type" ->
"TIMESTAMP_NTZ"))
+ // TIMESTAMP(p) with zone aliases route to the corresponding nanos
type's error.
+ checkError(
+ exception = intercept[SparkException] {
+ CatalystSqlParser.parseDataType("TIMESTAMP(6) WITHOUT TIME ZONE")
+ },
+ condition = "INVALID_TIMESTAMP_PRECISION",
+ parameters = Map("precision" -> "6", "type" -> "TIMESTAMP_NTZ"))
+ checkError(
+ exception = intercept[SparkException] {
+ CatalystSqlParser.parseDataType("TIMESTAMP(10) WITH LOCAL TIME ZONE")
+ },
+ condition = "INVALID_TIMESTAMP_PRECISION",
+ parameters = Map("precision" -> "10", "type" -> "TIMESTAMP_LTZ"))
+ // Negative precision is rejected by the parser, not by the type
constructor.
+ checkError(
+ exception = intercept[ParseException] {
+ CatalystSqlParser.parseDataType("TIMESTAMP_NTZ(-1)")
+ },
+ condition = "PARSE_SYNTAX_ERROR",
+ parameters = Map("error" -> "'-'", "hint" -> ""))
+ checkError(
+ exception = intercept[ParseException] {
+ CatalystSqlParser.parseDataType("TIMESTAMP_LTZ(-100)")
+ },
+ condition = "PARSE_SYNTAX_ERROR",
+ parameters = Map("error" -> "'-'", "hint" -> ""))
+ }
+ }
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
index 1a7524dbc5a7..afa657c95ede 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
@@ -25,11 +25,13 @@ import org.json4s.jackson.JsonMethods
import org.apache.spark.{SparkException, SparkFunSuite,
SparkIllegalArgumentException}
import org.apache.spark.sql.catalyst.analysis.{caseInsensitiveResolution,
caseSensitiveResolution}
import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
+import org.apache.spark.sql.catalyst.plans.SQLHelper
import org.apache.spark.sql.catalyst.types.{DataTypeUtils, PhysicalDataType,
UninitializedPhysicalType}
import org.apache.spark.sql.catalyst.util.{CollationFactory, StringConcat}
+import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.DataTypeTestUtils.{dayTimeIntervalTypes,
yearMonthIntervalTypes}
-class DataTypeSuite extends SparkFunSuite {
+class DataTypeSuite extends SparkFunSuite with SQLHelper {
private val UNICODE_COLLATION_ID =
CollationFactory.collationNameToId("UNICODE")
private val UTF8_LCASE_COLLATION_ID =
CollationFactory.collationNameToId("UTF8_LCASE")
@@ -257,12 +259,19 @@ class DataTypeSuite extends SparkFunSuite {
checkDataTypeFromJson(TimestampNTZType)
checkDataTypeFromDDL(TimestampNTZType)
-
checkDataTypeFromJson(TimestampLTZNanosType(TimestampLTZNanosType.MIN_PRECISION))
- checkDataTypeFromJson(TimestampLTZNanosType(8))
-
checkDataTypeFromJson(TimestampLTZNanosType(TimestampLTZNanosType.MAX_PRECISION))
-
checkDataTypeFromJson(TimestampNTZNanosType(TimestampNTZNanosType.MIN_PRECISION))
- checkDataTypeFromJson(TimestampNTZNanosType(8))
-
checkDataTypeFromJson(TimestampNTZNanosType(TimestampNTZNanosType.MAX_PRECISION))
+ test("SPARK-56876: from Json roundtrip for nanos timestamp types (preview
flag enabled)") {
+ withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") {
+ Seq(
+ TimestampLTZNanosType(TimestampLTZNanosType.MIN_PRECISION),
+ TimestampLTZNanosType(8),
+ TimestampLTZNanosType(TimestampLTZNanosType.MAX_PRECISION),
+ TimestampNTZNanosType(TimestampNTZNanosType.MIN_PRECISION),
+ TimestampNTZNanosType(8),
+ TimestampNTZNanosType(TimestampNTZNanosType.MAX_PRECISION)).foreach {
dt =>
+ assert(DataType.fromJson(dt.json) === dt)
+ }
+ }
+ }
checkDataTypeFromJson(StringType)
checkDataTypeFromDDL(StringType)
@@ -1484,64 +1493,84 @@ class DataTypeSuite extends SparkFunSuite {
}
test("SPARK-56876: parse timestamp with nanosecond precision from JSON") {
- // (json-type-name, sql-type-name-in-error, factory)
- val variants = Seq[(String, String, Int => DataType)](
- ("timestamp_ltz", "TIMESTAMP_LTZ", TimestampLTZNanosType(_)),
- ("timestamp_ntz", "TIMESTAMP_NTZ", TimestampNTZNanosType(_)))
- val overflowing = "9" * 20
-
- variants.foreach { case (name, sqlTypeName, factory) =>
- // Happy path across valid precisions, tolerant of surrounding
whitespace.
- TimestampLTZNanosType.MIN_PRECISION to
TimestampLTZNanosType.MAX_PRECISION foreach { n =>
- assert(DataType.fromJson(s"""\"$name($n)\"""") === factory(n))
- assert(DataType.fromJson(s"""\"$name( $n)\"""") === factory(n))
- assert(DataType.fromJson(s"""\"$name($n )\"""") === factory(n))
- }
-
- // Out-of-range precisions surface as INVALID_TIMESTAMP_PRECISION. The
overflowing
- // case verifies the original digit string is preserved instead of
leaking
- // NumberFormatException.
- Seq("0", "6", "10", overflowing).foreach { p =>
- checkError(
- exception = intercept[SparkException] {
- DataType.fromJson(s"""\"$name($p)\"""")
- },
- condition = "INVALID_TIMESTAMP_PRECISION",
- parameters = Map("precision" -> p, "type" -> sqlTypeName))
+ withSQLConf(SQLConf.TIMESTAMP_NANOS_TYPES_ENABLED.key -> "true") {
+ // (json-type-name, sql-type-name-in-error, factory)
+ val variants = Seq[(String, String, Int => DataType)](
+ ("timestamp_ltz", "TIMESTAMP_LTZ", TimestampLTZNanosType(_)),
+ ("timestamp_ntz", "TIMESTAMP_NTZ", TimestampNTZNanosType(_)))
+ val overflowing = "9" * 20
+
+ variants.foreach { case (name, sqlTypeName, factory) =>
+ // Happy path across valid precisions, tolerant of surrounding
whitespace.
+ TimestampLTZNanosType.MIN_PRECISION to
TimestampLTZNanosType.MAX_PRECISION foreach { n =>
+ assert(DataType.fromJson(s"""\"$name($n)\"""") === factory(n))
+ assert(DataType.fromJson(s"""\"$name( $n)\"""") === factory(n))
+ assert(DataType.fromJson(s"""\"$name($n )\"""") === factory(n))
+ }
+
+ // Out-of-range precisions surface as INVALID_TIMESTAMP_PRECISION. The
overflowing
+ // case verifies the original digit string is preserved instead of
leaking
+ // NumberFormatException.
+ Seq("0", "6", "10", overflowing).foreach { p =>
+ checkError(
+ exception = intercept[SparkException] {
+ DataType.fromJson(s"""\"$name($p)\"""")
+ },
+ condition = "INVALID_TIMESTAMP_PRECISION",
+ parameters = Map("precision" -> p, "type" -> sqlTypeName))
+ }
+
+ // Malformed precision forms that don't match the regex fall through to
+ // INVALID_JSON_DATA_TYPE: negative, empty parens, non-numeric, and
uppercase
+ // (JSON type-name convention is lowercase).
+ Seq(
+ s"$name(-1)",
+ s"$name()",
+ s"$name(abc)",
+ s"${name.toUpperCase(Locale.ROOT)}(7)").foreach { raw =>
+ checkError(
+ exception = intercept[SparkIllegalArgumentException] {
+ DataType.fromJson(s"""\"$raw\"""")
+ },
+ condition = "INVALID_JSON_DATA_TYPE",
+ parameters = Map("invalidType" -> raw))
+ }
}
- // Malformed precision forms that don't match the regex fall through to
- // INVALID_JSON_DATA_TYPE: negative, empty parens, non-numeric, and
uppercase
- // (JSON type-name convention is lowercase).
- Seq(
- s"$name(-1)",
- s"$name()",
- s"$name(abc)",
- s"${name.toUpperCase(Locale.ROOT)}(7)").foreach { raw =>
- checkError(
- exception = intercept[SparkIllegalArgumentException] {
- DataType.fromJson(s"""\"$raw\"""")
- },
- condition = "INVALID_JSON_DATA_TYPE",
- parameters = Map("invalidType" -> raw))
- }
+ // JSON round-trip for nanos timestamp types inside struct, array, and
map.
+ val structWithNanos = StructType(Seq(
+ StructField("ntz", TimestampNTZNanosType(7)),
+ StructField("ltz", TimestampLTZNanosType(8))))
+ assert(DataType.fromJson(structWithNanos.json) === structWithNanos)
+ val arrayOfNanos = ArrayType(TimestampNTZNanosType(9), containsNull =
false)
+ assert(DataType.fromJson(arrayOfNanos.json) === arrayOfNanos)
+ val mapOfNanos = MapType(StringType, TimestampNTZNanosType(7),
valueContainsNull = true)
+ assert(DataType.fromJson(mapOfNanos.json) === mapOfNanos)
}
- // JSON round-trip for nanos timestamp types inside struct, array, and map.
- val structWithNanos = StructType(Seq(
- StructField("ntz", TimestampNTZNanosType(7)),
- StructField("ltz", TimestampLTZNanosType(8))))
- assert(DataType.fromJson(structWithNanos.json) === structWithNanos)
- val arrayOfNanos = ArrayType(TimestampNTZNanosType(9), containsNull =
false)
- assert(DataType.fromJson(arrayOfNanos.json) === arrayOfNanos)
- val mapOfNanos = MapType(StringType, TimestampNTZNanosType(7),
valueContainsNull = true)
- assert(DataType.fromJson(mapOfNanos.json) === mapOfNanos)
-
- // Bare names without parens still map to the legacy single-precision
types.
+ // Bare names without parens still map to the legacy single-precision
types, regardless
+ // of the preview flag.
assert(DataType.fromJson("\"timestamp_ltz\"") === TimestampType)
assert(DataType.fromJson("\"timestamp_ntz\"") === TimestampNTZType)
}
+ test("SPARK-56965: JSON parser rejects nanos timestamp types when preview
flag is off") {
+ Seq(
+ "\"timestamp_ltz(7)\"" -> "Nanosecond-precision timestamp types",
+ "\"timestamp_ntz(9)\"" -> "Nanosecond-precision timestamp
types").foreach {
+ case (json, featureName) =>
+ checkError(
+ exception = intercept[SparkException] {
+ DataType.fromJson(json)
+ },
+ condition = "FEATURE_NOT_ENABLED",
+ parameters = Map(
+ "featureName" -> featureName,
+ "configKey" -> "spark.sql.timestampNanosTypes.enabled",
+ "configValue" -> "true"))
+ }
+ }
+
test("singleton DataType equality after deserialization") {
// Singleton DataTypes that use `case object` pattern matching (e.g.,
`case BinaryType =>`).
// If a non-singleton instance is created (e.g., via Kryo deserialization
which doesn't call
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]