This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new ba3e2713c8ff [SPARK-45891][SQL][FOLLOWUP] Disable `spark.sql.variant.allowReadingShredded` by default ba3e2713c8ff is described below commit ba3e2713c8ff3b1b27ac0b2d28b9173459a2a3c2 Author: Cheng Pan <cheng...@apache.org> AuthorDate: Tue Feb 11 06:45:45 2025 -0800 [SPARK-45891][SQL][FOLLOWUP] Disable `spark.sql.variant.allowReadingShredded` by default ### What changes were proposed in this pull request? Disable `spark.sql.variant.allowReadingShredded` by default ### Why are the changes needed? https://github.com/apache/parquet-format/pull/461 made incompatible changes on the shredding spec, if Spark delivers the current shredding implementation as-is in Spark 4.0, additional migration/compatible efforts will be required in the future. ### Does this PR introduce _any_ user-facing change? No, variant is an unreleased feature. ### How was this patch tested? Pass GHA. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #49874 from pan3793/SPARK-45891-followup. Authored-by: Cheng Pan <cheng...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +- .../test/scala/org/apache/spark/sql/VariantShreddingSuite.scala | 8 +++++--- .../datasources/parquet/ParquetVariantShreddingSuite.scala | 3 +++ 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 84b8e1264be9..a2811fb843aa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -4755,7 +4755,7 @@ object SQLConf { "When false, it only reads unshredded variant.") .version("4.0.0") .booleanConf - .createWithDefault(true) + .createWithDefault(false) val PUSH_VARIANT_INTO_SCAN = buildConf("spark.sql.variant.pushVariantIntoScan") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/VariantShreddingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/VariantShreddingSuite.scala index 3443028ba45b..fee375db10ad 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/VariantShreddingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/VariantShreddingSuite.scala @@ -74,9 +74,11 @@ class VariantShreddingSuite extends QueryTest with SharedSparkSession with Parqu def isPushEnabled: Boolean = SQLConf.get.getConf(SQLConf.PUSH_VARIANT_INTO_SCAN) def testWithTempPath(name: String)(block: File => Unit): Unit = test(name) { - withPushConfigs() { - withTempPath { path => - block(path) + withSQLConf(SQLConf.VARIANT_ALLOW_READING_SHREDDED.key-> "true") { + withPushConfigs() { + withTempPath { path => + block(path) + } } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVariantShreddingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVariantShreddingSuite.scala index 8bb5a4b1d0bc..4da5c264655d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVariantShreddingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVariantShreddingSuite.scala @@ -49,6 +49,7 @@ class ParquetVariantShreddingSuite extends QueryTest with ParquetTest with Share "a struct<value binary, typed_value int>, b struct<value binary, typed_value string>," + "c struct<value binary, typed_value decimal(15, 1)>>>" withSQLConf(SQLConf.VARIANT_WRITE_SHREDDING_ENABLED.key -> true.toString, + SQLConf.VARIANT_ALLOW_READING_SHREDDED.key -> true.toString, SQLConf.VARIANT_FORCE_SHREDDING_SCHEMA_FOR_TEST.key -> schema) { df.write.mode("overwrite").parquet(dir.getAbsolutePath) @@ -122,6 +123,7 @@ class ParquetVariantShreddingSuite extends QueryTest with ParquetTest with Share val fullSchema = "v struct<metadata binary, value binary, typed_value array<" + "struct<value binary, typed_value int>>>" withSQLConf(SQLConf.VARIANT_WRITE_SHREDDING_ENABLED.key -> true.toString, + SQLConf.VARIANT_ALLOW_READING_SHREDDED.key -> true.toString, SQLConf.VARIANT_FORCE_SHREDDING_SCHEMA_FOR_TEST.key -> schema) { df.write.mode("overwrite").parquet(dir.getAbsolutePath) @@ -186,6 +188,7 @@ class ParquetVariantShreddingSuite extends QueryTest with ParquetTest with Share "arr array<struct<metadata binary, value binary>>, " + "m map<string, struct<metadata binary, value binary>>" withSQLConf(SQLConf.VARIANT_WRITE_SHREDDING_ENABLED.key -> true.toString, + SQLConf.VARIANT_ALLOW_READING_SHREDDED.key -> true.toString, SQLConf.VARIANT_FORCE_SHREDDING_SCHEMA_FOR_TEST.key -> schema) { df.write.mode("overwrite").parquet(dir.getAbsolutePath) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org