This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new dfb916f [SPARK-31361][SQL][TESTS][FOLLOWUP] Check non-vectorized Parquet reader while date/timestamp rebasing dfb916f is described below commit dfb916f6b65b05dd7fd58853d97e06bc7e75d8be Author: Max Gekk <max.g...@gmail.com> AuthorDate: Thu May 7 07:52:29 2020 +0000 [SPARK-31361][SQL][TESTS][FOLLOWUP] Check non-vectorized Parquet reader while date/timestamp rebasing ### What changes were proposed in this pull request? In PR, I propose to modify two tests of `ParquetIOSuite`: - SPARK-31159: rebasing timestamps in write - SPARK-31159: rebasing dates in write to check non-vectorized Parquet reader together with vectorized reader. ### Why are the changes needed? To improve test coverage and make sure that non-vectorized reader behaves similar to the vectorized reader. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running `PaquetIOSuite`: ``` $ ./build/sbt "test:testOnly *ParquetIOSuite" ``` Closes #28466 from MaxGekk/test-novec-rebase-ParquetIOSuite. Authored-by: Max Gekk <max.g...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 272d229005b7166ab83bbb8f44a4d5e9d89424a1) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../datasources/parquet/ParquetIOSuite.scala | 54 +++++++++++++--------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala index 7f0a228..af66aa0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala @@ -952,18 +952,24 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession .write .parquet(path) } - // The file metadata indicates if it needs rebase or not, so we can always get the - // correct result regardless of the "rebaseInRead" config. - Seq(true, false).foreach { rebase => - withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_READ.key -> rebase.toString) { - checkAnswer(spark.read.parquet(path), Row(Timestamp.valueOf(tsStr))) - } - } - // Force to not rebase to prove the written datetime values are rebased and we will get - // wrong result if we don't rebase while reading. - withSQLConf("spark.test.forceNoRebase" -> "true") { - checkAnswer(spark.read.parquet(path), Row(Timestamp.valueOf(nonRebased))) + Seq(false, true).foreach { vectorized => + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized.toString) { + // The file metadata indicates if it needs rebase or not, so we can always get the + // correct result regardless of the "rebaseInRead" config. + Seq(true, false).foreach { rebase => + withSQLConf( + SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_READ.key -> rebase.toString) { + checkAnswer(spark.read.parquet(path), Row(Timestamp.valueOf(tsStr))) + } + } + + // Force to not rebase to prove the written datetime values are rebased + // and we will get wrong result if we don't rebase while reading. + withSQLConf("spark.test.forceNoRebase" -> "true") { + checkAnswer(spark.read.parquet(path), Row(Timestamp.valueOf(nonRebased))) + } + } } } } @@ -981,18 +987,22 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession .parquet(path) } - // The file metadata indicates if it needs rebase or not, so we can always get the correct - // result regardless of the "rebaseInRead" config. - Seq(true, false).foreach { rebase => - withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_READ.key -> rebase.toString) { - checkAnswer(spark.read.parquet(path), Row(Date.valueOf("1001-01-01"))) - } - } + Seq(false, true).foreach { vectorized => + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorized.toString) { + // The file metadata indicates if it needs rebase or not, so we can always get the correct + // result regardless of the "rebaseInRead" config. + Seq(true, false).foreach { rebase => + withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_DATETIME_IN_READ.key -> rebase.toString) { + checkAnswer(spark.read.parquet(path), Row(Date.valueOf("1001-01-01"))) + } + } - // Force to not rebase to prove the written datetime values are rebased and we will get - // wrong result if we don't rebase while reading. - withSQLConf("spark.test.forceNoRebase" -> "true") { - checkAnswer(spark.read.parquet(path), Row(Date.valueOf("1001-01-07"))) + // Force to not rebase to prove the written datetime values are rebased and we will get + // wrong result if we don't rebase while reading. + withSQLConf("spark.test.forceNoRebase" -> "true") { + checkAnswer(spark.read.parquet(path), Row(Date.valueOf("1001-01-07"))) + } + } } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org