(spark) branch master updated: [SPARK-45891][SQL][FOLLOWUP] Disable `spark.sql.variant.allowReadingShredded` by default

dongjoon Tue, 11 Feb 2025 06:46:02 -0800

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new ba3e2713c8ff [SPARK-45891][SQL][FOLLOWUP] Disable 
`spark.sql.variant.allowReadingShredded` by default
ba3e2713c8ff is described below

commit ba3e2713c8ff3b1b27ac0b2d28b9173459a2a3c2
Author: Cheng Pan <cheng...@apache.org>
AuthorDate: Tue Feb 11 06:45:45 2025 -0800

    [SPARK-45891][SQL][FOLLOWUP] Disable 
`spark.sql.variant.allowReadingShredded` by default
    
    ### What changes were proposed in this pull request?
    
    Disable `spark.sql.variant.allowReadingShredded` by default
    
    ### Why are the changes needed?
    
    https://github.com/apache/parquet-format/pull/461 made incompatible changes 
on the shredding spec, if Spark delivers the current shredding implementation 
as-is in Spark 4.0, additional migration/compatible efforts will be required in 
the future.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, variant is an unreleased feature.
    
    ### How was this patch tested?
    
    Pass GHA.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #49874 from pan3793/SPARK-45891-followup.
    
    Authored-by: Cheng Pan <cheng...@apache.org>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala    | 2 +-
 .../test/scala/org/apache/spark/sql/VariantShreddingSuite.scala   | 8 +++++---
 .../datasources/parquet/ParquetVariantShreddingSuite.scala        | 3 +++
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 84b8e1264be9..a2811fb843aa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -4755,7 +4755,7 @@ object SQLConf {
         "When false, it only reads unshredded variant.")
       .version("4.0.0")
       .booleanConf
-      .createWithDefault(true)
+      .createWithDefault(false)
 
   val PUSH_VARIANT_INTO_SCAN =
     buildConf("spark.sql.variant.pushVariantIntoScan")
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/VariantShreddingSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/VariantShreddingSuite.scala
index 3443028ba45b..fee375db10ad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/VariantShreddingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/VariantShreddingSuite.scala
@@ -74,9 +74,11 @@ class VariantShreddingSuite extends QueryTest with 
SharedSparkSession with Parqu
   def isPushEnabled: Boolean = 
SQLConf.get.getConf(SQLConf.PUSH_VARIANT_INTO_SCAN)
 
   def testWithTempPath(name: String)(block: File => Unit): Unit = test(name) {
-    withPushConfigs() {
-      withTempPath { path =>
-        block(path)
+    withSQLConf(SQLConf.VARIANT_ALLOW_READING_SHREDDED.key-> "true") {
+      withPushConfigs() {
+        withTempPath { path =>
+          block(path)
+        }
       }
     }
   }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVariantShreddingSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVariantShreddingSuite.scala
index 8bb5a4b1d0bc..4da5c264655d 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVariantShreddingSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVariantShreddingSuite.scala
@@ -49,6 +49,7 @@ class ParquetVariantShreddingSuite extends QueryTest with 
ParquetTest with Share
       "a struct<value binary, typed_value int>, b struct<value binary, 
typed_value string>," +
       "c struct<value binary, typed_value decimal(15, 1)>>>"
     withSQLConf(SQLConf.VARIANT_WRITE_SHREDDING_ENABLED.key -> true.toString,
+      SQLConf.VARIANT_ALLOW_READING_SHREDDED.key -> true.toString,
       SQLConf.VARIANT_FORCE_SHREDDING_SCHEMA_FOR_TEST.key -> schema) {
       df.write.mode("overwrite").parquet(dir.getAbsolutePath)
 
@@ -122,6 +123,7 @@ class ParquetVariantShreddingSuite extends QueryTest with 
ParquetTest with Share
     val fullSchema = "v struct<metadata binary, value binary, typed_value 
array<" +
       "struct<value binary, typed_value int>>>"
     withSQLConf(SQLConf.VARIANT_WRITE_SHREDDING_ENABLED.key -> true.toString,
+      SQLConf.VARIANT_ALLOW_READING_SHREDDED.key -> true.toString,
       SQLConf.VARIANT_FORCE_SHREDDING_SCHEMA_FOR_TEST.key -> schema) {
       df.write.mode("overwrite").parquet(dir.getAbsolutePath)
 
@@ -186,6 +188,7 @@ class ParquetVariantShreddingSuite extends QueryTest with 
ParquetTest with Share
       "arr array<struct<metadata binary, value binary>>, " +
       "m map<string, struct<metadata binary, value binary>>"
     withSQLConf(SQLConf.VARIANT_WRITE_SHREDDING_ENABLED.key -> true.toString,
+      SQLConf.VARIANT_ALLOW_READING_SHREDDED.key -> true.toString,
       SQLConf.VARIANT_FORCE_SHREDDING_SCHEMA_FOR_TEST.key -> schema) {
       df.write.mode("overwrite").parquet(dir.getAbsolutePath)
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-45891][SQL][FOLLOWUP] Disable `spark.sql.variant.allowReadingShredded` by default

Reply via email to