This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new 60b4c0b690ad [SPARK-47368][SQL]][3.5] Remove inferTimestampNTZ config check in ParquetRo… 60b4c0b690ad is described below commit 60b4c0b690ad980ff4eef93180c70d6e64e5e347 Author: Gengliang Wang <gengli...@apache.org> AuthorDate: Tue Mar 12 22:42:45 2024 -0700 [SPARK-47368][SQL]][3.5] Remove inferTimestampNTZ config check in ParquetRo… ### What changes were proposed in this pull request? The configuration `spark.sql.parquet.inferTimestampNTZ.enabled` is not related the parquet row converter. This PR is the remove the config check `spark.sql.parquet.inferTimestampNTZ.enabled` in the ParquetRowConverter ### Why are the changes needed? Bug fix. Otherwise reading TimestampNTZ columns may fail when `spark.sql.parquet.inferTimestampNTZ.enabled` is disabled. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? New UT ### Was this patch authored or co-authored using generative AI tooling? No Closes #45492 from gengliangwang/PR_TOOL_PICK_PR_45480_BRANCH-3.5. Authored-by: Gengliang Wang <gengli...@apache.org> Signed-off-by: Gengliang Wang <gengli...@apache.org> (cherry picked from commit 3018a5d8cd96a569b3bfe7e11b4b26fb4fb54f32) Signed-off-by: Gengliang Wang <gengli...@apache.org> --- .../datasources/parquet/ParquetRowConverter.scala | 9 +++--- .../parquet/ParquetSchemaConverter.scala | 7 ----- .../datasources/parquet/ParquetQuerySuite.scala | 36 +++++++++++++--------- 3 files changed, 25 insertions(+), 27 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala index 9101e7d0ac52..1e07c6db2a06 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala @@ -505,11 +505,10 @@ private[parquet] class ParquetRowConverter( // can be read as Spark's TimestampNTZ type. This is to avoid mistakes in reading the timestamp // values. private def canReadAsTimestampNTZ(parquetType: Type): Boolean = - schemaConverter.isTimestampNTZEnabled() && - parquetType.asPrimitiveType().getPrimitiveTypeName == INT64 && - parquetType.getLogicalTypeAnnotation.isInstanceOf[TimestampLogicalTypeAnnotation] && - !parquetType.getLogicalTypeAnnotation - .asInstanceOf[TimestampLogicalTypeAnnotation].isAdjustedToUTC + parquetType.asPrimitiveType().getPrimitiveTypeName == INT64 && + parquetType.getLogicalTypeAnnotation.isInstanceOf[TimestampLogicalTypeAnnotation] && + !parquetType.getLogicalTypeAnnotation + .asInstanceOf[TimestampLogicalTypeAnnotation].isAdjustedToUTC /** * Parquet converter for strings. A dictionary is used to minimize string decoding cost. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala index 9c9e7ce729c1..a78b96ae6fcc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala @@ -72,13 +72,6 @@ class ParquetToSparkSchemaConverter( inferTimestampNTZ = conf.get(SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED.key).toBoolean, nanosAsLong = conf.get(SQLConf.LEGACY_PARQUET_NANOS_AS_LONG.key).toBoolean) - /** - * Returns true if TIMESTAMP_NTZ type is enabled in this ParquetToSparkSchemaConverter. - */ - def isTimestampNTZEnabled(): Boolean = { - inferTimestampNTZ - } - /** * Converts Parquet [[MessageType]] `parquetSchema` to a Spark SQL [[StructType]]. */ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala index 828ec39c7d72..29cb224c8787 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala @@ -160,21 +160,27 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS } } - test("SPARK-36182: writing and reading TimestampNTZType column") { - withTable("ts") { - sql("create table ts (c1 timestamp_ntz) using parquet") - sql("insert into ts values (timestamp_ntz'2016-01-01 10:11:12.123456')") - sql("insert into ts values (null)") - sql("insert into ts values (timestamp_ntz'1965-01-01 10:11:12.123456')") - val expectedSchema = new StructType().add(StructField("c1", TimestampNTZType)) - assert(spark.table("ts").schema == expectedSchema) - val expected = Seq( - ("2016-01-01 10:11:12.123456"), - (null), - ("1965-01-01 10:11:12.123456")) - .toDS().select($"value".cast("timestamp_ntz")) - withAllParquetReaders { - checkAnswer(sql("select * from ts"), expected) + test("SPARK-36182, SPARK-47368: writing and reading TimestampNTZType column") { + Seq("true", "false").foreach { inferNTZ => + // The SQL Conf PARQUET_INFER_TIMESTAMP_NTZ_ENABLED should not affect the file written + // by Spark. + withSQLConf(SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED.key -> inferNTZ) { + withTable("ts") { + sql("create table ts (c1 timestamp_ntz) using parquet") + sql("insert into ts values (timestamp_ntz'2016-01-01 10:11:12.123456')") + sql("insert into ts values (null)") + sql("insert into ts values (timestamp_ntz'1965-01-01 10:11:12.123456')") + val expectedSchema = new StructType().add(StructField("c1", TimestampNTZType)) + assert(spark.table("ts").schema == expectedSchema) + val expected = Seq( + ("2016-01-01 10:11:12.123456"), + (null), + ("1965-01-01 10:11:12.123456")) + .toDS().select($"value".cast("timestamp_ntz")) + withAllParquetReaders { + checkAnswer(sql("select * from ts"), expected) + } + } } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org