This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 1005cd5576ef [SPARK-46447][SQL] Remove the legacy datetime rebasing SQL configs 1005cd5576ef is described below commit 1005cd5576ef073afee243848bcad5e5f4a9d309 Author: Max Gekk <max.g...@gmail.com> AuthorDate: Wed Dec 20 20:22:09 2023 +0300 [SPARK-46447][SQL] Remove the legacy datetime rebasing SQL configs ### What changes were proposed in this pull request? In the PR, I propose to remove already deprecated SQL configs (alternatives to other configs): - spark.sql.legacy.parquet.int96RebaseModeInWrite - spark.sql.legacy.parquet.datetimeRebaseModeInWrite - spark.sql.legacy.parquet.int96RebaseModeInRead - spark.sql.legacy.avro.datetimeRebaseModeInWrite - spark.sql.legacy.avro.datetimeRebaseModeInRead ### Why are the changes needed? To improve code maintenance. ### Does this PR introduce _any_ user-facing change? Should not. ### How was this patch tested? By existing test suites. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #44402 from MaxGekk/remove-legacy-rebase-confs-2. Authored-by: Max Gekk <max.g...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- docs/sql-migration-guide.md | 6 ++++ .../org/apache/spark/sql/internal/SQLConf.scala | 36 +++++++++++----------- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 4e8e2422d7e0..30a37d97042a 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -30,6 +30,12 @@ license: | - Since Spark 4.0, `spark.sql.parquet.compression.codec` drops the support of codec name `lz4raw`, please use `lz4_raw` instead. - Since Spark 4.0, when overflowing during casting timestamp to byte/short/int under non-ansi mode, Spark will return null instead a wrapping value. - Since Spark 4.0, the `encode()` and `decode()` functions support only the following charsets 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16'. To restore the previous behavior when the function accepts charsets of the current JDK used by Spark, set `spark.sql.legacy.javaCharsets` to `true`. +- Since Spark 4.0, the legacy datetime rebasing SQL configs with the prefix `spark.sql.legacy` are removed. To restore the previous behavior, use the following configs: + - `spark.sql.parquet.int96RebaseModeInWrite` instead of `spark.sql.legacy.parquet.int96RebaseModeInWrite` + - `spark.sql.parquet.datetimeRebaseModeInWrite` instead of `spark.sql.legacy.parquet.datetimeRebaseModeInWrite` + - `spark.sql.parquet.int96RebaseModeInRead` instead of `spark.sql.legacy.parquet.int96RebaseModeInRead` + - `spark.sql.avro.datetimeRebaseModeInWrite` instead of `spark.sql.legacy.avro.datetimeRebaseModeInWrite` + - `spark.sql.avro.datetimeRebaseModeInRead` instead of `spark.sql.legacy.avro.datetimeRebaseModeInRead` ## Upgrading from Spark SQL 3.4 to 3.5 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 6404779f30ac..d54cb3756638 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -4081,7 +4081,6 @@ object SQLConf { "When EXCEPTION, which is the default, Spark will fail the writing if it sees ancient " + "timestamps that are ambiguous between the two calendars.") .version("3.1.0") - .withAlternative("spark.sql.legacy.parquet.int96RebaseModeInWrite") .stringConf .transform(_.toUpperCase(Locale.ROOT)) .checkValues(LegacyBehaviorPolicy.values.map(_.toString)) @@ -4099,7 +4098,6 @@ object SQLConf { "TIMESTAMP_MILLIS, TIMESTAMP_MICROS. The INT96 type has the separate config: " + s"${PARQUET_INT96_REBASE_MODE_IN_WRITE.key}.") .version("3.0.0") - .withAlternative("spark.sql.legacy.parquet.datetimeRebaseModeInWrite") .stringConf .transform(_.toUpperCase(Locale.ROOT)) .checkValues(LegacyBehaviorPolicy.values.map(_.toString)) @@ -4115,7 +4113,6 @@ object SQLConf { "timestamps that are ambiguous between the two calendars. This config is only effective " + "if the writer info (like Spark, Hive) of the Parquet files is unknown.") .version("3.1.0") - .withAlternative("spark.sql.legacy.parquet.int96RebaseModeInRead") .stringConf .transform(_.toUpperCase(Locale.ROOT)) .checkValues(LegacyBehaviorPolicy.values.map(_.toString)) @@ -4149,7 +4146,6 @@ object SQLConf { "When EXCEPTION, which is the default, Spark will fail the writing if it sees " + "ancient dates/timestamps that are ambiguous between the two calendars.") .version("3.0.0") - .withAlternative("spark.sql.legacy.avro.datetimeRebaseModeInWrite") .stringConf .transform(_.toUpperCase(Locale.ROOT)) .checkValues(LegacyBehaviorPolicy.values.map(_.toString)) @@ -4165,7 +4161,6 @@ object SQLConf { "ancient dates/timestamps that are ambiguous between the two calendars. This config is " + "only effective if the writer info (like Spark, Hive) of the Avro files is unknown.") .version("3.0.0") - .withAlternative("spark.sql.legacy.avro.datetimeRebaseModeInRead") .stringConf .transform(_.toUpperCase(Locale.ROOT)) .checkValues(LegacyBehaviorPolicy.values.map(_.toString)) @@ -4657,22 +4652,12 @@ object SQLConf { s"Set '${LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key}' to false instead."), DeprecatedConfig("spark.sql.sources.schemaStringLengthThreshold", "3.2", s"Use '${HIVE_TABLE_PROPERTY_LENGTH_THRESHOLD.key}' instead."), - DeprecatedConfig(PARQUET_INT96_REBASE_MODE_IN_WRITE.alternatives.head, "3.2", - s"Use '${PARQUET_INT96_REBASE_MODE_IN_WRITE.key}' instead."), - DeprecatedConfig(PARQUET_INT96_REBASE_MODE_IN_READ.alternatives.head, "3.2", - s"Use '${PARQUET_INT96_REBASE_MODE_IN_READ.key}' instead."), - DeprecatedConfig(PARQUET_REBASE_MODE_IN_WRITE.alternatives.head, "3.2", - s"Use '${PARQUET_REBASE_MODE_IN_WRITE.key}' instead."), - DeprecatedConfig(PARQUET_REBASE_MODE_IN_READ.alternatives.head, "3.2", - s"Use '${PARQUET_REBASE_MODE_IN_READ.key}' instead."), - DeprecatedConfig(AVRO_REBASE_MODE_IN_WRITE.alternatives.head, "3.2", - s"Use '${AVRO_REBASE_MODE_IN_WRITE.key}' instead."), - DeprecatedConfig(AVRO_REBASE_MODE_IN_READ.alternatives.head, "3.2", - s"Use '${AVRO_REBASE_MODE_IN_READ.key}' instead."), DeprecatedConfig(LEGACY_REPLACE_DATABRICKS_SPARK_AVRO_ENABLED.key, "3.2", """Use `.format("avro")` in `DataFrameWriter` or `DataFrameReader` instead."""), DeprecatedConfig(COALESCE_PARTITIONS_MIN_PARTITION_NUM.key, "3.2", s"Use '${COALESCE_PARTITIONS_MIN_PARTITION_SIZE.key}' instead."), + DeprecatedConfig(PARQUET_REBASE_MODE_IN_READ.alternatives.head, "3.2", + s"Use '${PARQUET_REBASE_MODE_IN_READ.key}' instead."), DeprecatedConfig(ESCAPED_STRING_LITERALS.key, "4.0", "Use raw string literals with the `r` prefix instead. "), DeprecatedConfig("spark.connect.copyFromLocalToFs.allowDestLocal", "4.0", @@ -4735,7 +4720,22 @@ object SQLConf { RemovedConfig("spark.sql.hive.verifyPartitionPath", "4.0.0", "false", s"This config was replaced by '${IGNORE_MISSING_FILES.key}'."), RemovedConfig("spark.sql.optimizer.runtimeFilter.semiJoinReduction.enabled", "4.0.0", "false", - "This optimizer config is useless as runtime filter cannot be an IN subquery now.") + "This optimizer config is useless as runtime filter cannot be an IN subquery now."), + RemovedConfig("spark.sql.legacy.parquet.int96RebaseModeInWrite", "4.0.0", + LegacyBehaviorPolicy.CORRECTED.toString, + s"Use '${PARQUET_INT96_REBASE_MODE_IN_WRITE.key}' instead."), + RemovedConfig("spark.sql.legacy.parquet.int96RebaseModeInRead", "4.0.0", + LegacyBehaviorPolicy.CORRECTED.toString, + s"Use '${PARQUET_INT96_REBASE_MODE_IN_READ.key}' instead."), + RemovedConfig("spark.sql.legacy.parquet.datetimeRebaseModeInWrite", "4.0.0", + LegacyBehaviorPolicy.CORRECTED.toString, + s"Use '${PARQUET_REBASE_MODE_IN_WRITE.key}' instead."), + RemovedConfig("spark.sql.legacy.avro.datetimeRebaseModeInWrite", "4.0.0", + LegacyBehaviorPolicy.CORRECTED.toString, + s"Use '${AVRO_REBASE_MODE_IN_WRITE.key}' instead."), + RemovedConfig("spark.sql.legacy.avro.datetimeRebaseModeInRead", "4.0.0", + LegacyBehaviorPolicy.CORRECTED.toString, + s"Use '${AVRO_REBASE_MODE_IN_READ.key}' instead.") ) Map(configs.map { cfg => cfg.key -> cfg } : _*) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org