This is an automated email from the ASF dual-hosted git repository.

chengchengjin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new a70837d11b [GLUTEN-11238][VL] Deprecate parquet encrypted check config 
(#11240)
a70837d11b is described below

commit a70837d11b7747037dc88146f0fc21a04c739e74
Author: Jin Chengcheng <[email protected]>
AuthorDate: Fri Dec 5 06:52:08 2025 +0000

    [GLUTEN-11238][VL] Deprecate parquet encrypted check config (#11240)
---
 .../gluten/backendsapi/velox/VeloxBackend.scala    |  1 -
 .../apache/gluten/utils/ParquetMetadataUtils.scala | 36 +++++++++-------------
 .../utils/ParquetEncryptionDetectionSuite.scala    |  1 -
 docs/Configuration.md                              |  2 --
 .../org/apache/gluten/config/GlutenConfig.scala    | 28 -----------------
 5 files changed, 15 insertions(+), 53 deletions(-)

diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
index 743e6bd9e5..91f20dbc77 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
@@ -200,7 +200,6 @@ object VeloxBackendSettings extends BackendSettingsApi {
         return None
       }
       val fileLimit = GlutenConfig.get.parquetMetadataFallbackFileLimit
-        .max(GlutenConfig.get.parquetEncryptionValidationFileLimit)
       val parquetOptions = new ParquetOptions(CaseInsensitiveMap(properties), 
SQLConf.get)
       val parquetMetadataValidationResult =
         ParquetMetadataUtils.validateMetadata(rootPaths, hadoopConf, 
parquetOptions, fileLimit)
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala
index 1085630dd5..4ea4ebad65 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/utils/ParquetMetadataUtils.scala
@@ -48,20 +48,22 @@ object ParquetMetadataUtils {
       parquetOptions: ParquetOptions,
       fileLimit: Int
   ): Option[String] = {
+    var remaining = fileLimit
     rootPaths.foreach {
       rootPath =>
         val fs = new Path(rootPath).getFileSystem(hadoopConf)
         try {
-          val reason =
+          val (maybeReason, filesScanned) =
             checkForUnexpectedMetadataWithLimit(
               fs,
               new Path(rootPath),
               hadoopConf,
               parquetOptions,
               fileLimit = fileLimit)
-          if (reason.nonEmpty) {
-            return reason
+          if (maybeReason.isDefined) {
+            return maybeReason
           }
+          remaining -= filesScanned
         } catch {
           case e: Exception =>
         }
@@ -95,7 +97,8 @@ object ParquetMetadataUtils {
    * @param fileLimit
    *   Maximum number of files to inspect
    * @return
-   *   True if an encrypted file is detected, false otherwise
+   *   (String, Int) if an unsupported metadata is detected,empty otherwise 
and the number of
+   *   checked files
    */
   private def checkForUnexpectedMetadataWithLimit(
       fs: FileSystem,
@@ -103,7 +106,7 @@ object ParquetMetadataUtils {
       conf: Configuration,
       parquetOptions: ParquetOptions,
       fileLimit: Int
-  ): Option[String] = {
+  ): (Option[String], Int) = {
     val filesIterator = fs.listFiles(path, true)
     var checkedFileCount = 0
     while (filesIterator.hasNext && checkedFileCount < fileLimit) {
@@ -111,10 +114,10 @@ object ParquetMetadataUtils {
       checkedFileCount += 1
       val metadataUnsupported = isUnsupportedMetadata(fileStatus, conf, 
parquetOptions)
       if (metadataUnsupported.isDefined) {
-        return metadataUnsupported
+        return (metadataUnsupported, checkedFileCount)
       }
     }
-    None
+    (None, checkedFileCount)
   }
 
   /**
@@ -126,9 +129,7 @@ object ParquetMetadataUtils {
       fileStatus: LocatedFileStatus,
       conf: Configuration,
       parquetOptions: ParquetOptions): Option[String] = {
-    val isEncryptionValidationEnabled = 
GlutenConfig.get.parquetEncryptionValidationEnabled
-    val isMetadataValidationEnabled = 
GlutenConfig.get.parquetMetadataValidationEnabled
-    if (!isMetadataValidationEnabled && !isEncryptionValidationEnabled) {
+    if (!GlutenConfig.get.parquetMetadataValidationEnabled) {
       return None
     }
     val footer =
@@ -136,9 +137,6 @@ object ParquetMetadataUtils {
         ParquetFooterReader.readFooter(conf, fileStatus, 
ParquetMetadataConverter.NO_FILTER)
       } catch {
         case e: Exception if ExceptionUtils.hasCause(e, 
classOf[ParquetCryptoRuntimeException]) =>
-          if (!isEncryptionValidationEnabled) {
-            return None
-          }
           return Some("Encrypted Parquet footer detected.")
         case _: RuntimeException =>
           // Ignored as it's could be a "Not a Parquet file" exception.
@@ -149,19 +147,15 @@ object ParquetMetadataUtils {
       isTimezoneFoundInMetadata(footer, parquetOptions)
     )
 
-    if (isMetadataValidationEnabled) {
-      for (check <- validationChecks) {
-        if (check.isDefined) {
-          return check
-        }
+    for (check <- validationChecks) {
+      if (check.isDefined) {
+        return check
       }
     }
 
     // Previous Spark3.4 version uses toString to check if the data is 
encrypted,
     // so place the check to the end
-    if (
-      isEncryptionValidationEnabled && 
SparkShimLoader.getSparkShims.isParquetFileEncrypted(footer)
-    ) {
+    if (SparkShimLoader.getSparkShims.isParquetFileEncrypted(footer)) {
       return Some("Encrypted Parquet file detected.")
     }
     None
diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/utils/ParquetEncryptionDetectionSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/utils/ParquetEncryptionDetectionSuite.scala
index 6003caaa5a..834fa8907f 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/utils/ParquetEncryptionDetectionSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/utils/ParquetEncryptionDetectionSuite.scala
@@ -73,7 +73,6 @@ class ParquetEncryptionDetectionSuite extends 
SharedSparkSession {
 
   override def sparkConf: SparkConf = {
     super.sparkConf
-      .set(GlutenConfig.ENCRYPTED_PARQUET_FALLBACK_ENABLED.key, "true")
       .set(GlutenConfig.PARQUET_UNEXPECTED_METADATA_FALLBACK_ENABLED.key, 
"true")
   }
 
diff --git a/docs/Configuration.md b/docs/Configuration.md
index 9521f47478..8ef5060351 100644
--- a/docs/Configuration.md
+++ b/docs/Configuration.md
@@ -115,8 +115,6 @@ nav_order: 15
 | spark.gluten.sql.columnarToRowMemoryThreshold                      | 64MB    
          |
 | spark.gluten.sql.countDistinctWithoutExpand                        | false   
          | Convert Count Distinct to a UDAF called count_distinct to prevent 
SparkPlanner converting it to Expand+Count. WARNING: When enabled, count 
distinct queries will fail to fallback!!!                                       
                                                                                
                                                                    |
 | spark.gluten.sql.extendedColumnPruning.enabled                     | true    
          | Do extended nested column pruning for cases ignored by vanilla 
Spark.                                                                          
                                                                                
                                                                                
                                                                |
-| spark.gluten.sql.fallbackEncryptedParquet                          | 
&lt;undefined&gt; | If enabled, Gluten will not offload scan when encrypted 
parquet files are detected. Defaulted to 
spark.gluten.sql.fallbackUnexpectedMetadataParquet.                             
                                                                                
                                                                                
                              |
-| spark.gluten.sql.fallbackEncryptedParquet.limit                    | 
&lt;undefined&gt; | If supplied, `limit` number of files will be checked to 
determine encryption and falling back to java scan. Defaulted to 
spark.gluten.sql.fallbackUnexpectedMetadataParquet.limit.                       
                                                                                
                                                                                
      |
 | spark.gluten.sql.fallbackRegexpExpressions                         | false   
          | If true, fall back all regexp expressions. There are a few 
incompatible cases between RE2 (used by native engine) and java.util.regex 
(used by Spark). User should enable this property if their incompatibility is 
intolerable.                                                                    
                                                                           |
 | spark.gluten.sql.fallbackUnexpectedMetadataParquet                 | false   
          | If enabled, Gluten will not offload scan when unexpected metadata 
is detected.                                                                    
                                                                                
                                                                                
                                                             |
 | spark.gluten.sql.fallbackUnexpectedMetadataParquet.limit           | 10      
          | If supplied, metadata of `limit` number of Parquet files will be 
checked to determine whether to fall back to java scan.                         
                                                                                
                                                                                
                                                              |
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/config/GlutenConfig.scala 
b/gluten-substrait/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
index 9896bd7cd1..340c490993 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
@@ -375,16 +375,6 @@ class GlutenConfig(conf: SQLConf) extends 
GlutenCoreConfig(conf) {
     getConf(PARQUET_UNEXPECTED_METADATA_FALLBACK_FILE_LIMIT)
   }
 
-  def parquetEncryptionValidationEnabled: Boolean = {
-    getConf(ENCRYPTED_PARQUET_FALLBACK_ENABLED)
-      .getOrElse(getConf(PARQUET_UNEXPECTED_METADATA_FALLBACK_ENABLED))
-  }
-
-  def parquetEncryptionValidationFileLimit: Int = {
-    getConf(PARQUET_ENCRYPTED_FALLBACK_FILE_LIMIT).getOrElse(
-      getConf(PARQUET_UNEXPECTED_METADATA_FALLBACK_FILE_LIMIT))
-  }
-
   def enableColumnarRange: Boolean = getConf(COLUMNAR_RANGE_ENABLED)
   def enableColumnarCollectLimit: Boolean = 
getConf(COLUMNAR_COLLECT_LIMIT_ENABLED)
   def enableColumnarCollectTail: Boolean = 
getConf(COLUMNAR_COLLECT_TAIL_ENABLED)
@@ -1576,24 +1566,6 @@ object GlutenConfig extends ConfigRegistry {
       .checkValue(_ > 0, s"must be positive.")
       .createWithDefault(10)
 
-  val ENCRYPTED_PARQUET_FALLBACK_ENABLED =
-    buildConf("spark.gluten.sql.fallbackEncryptedParquet")
-      .doc(
-        "If enabled, Gluten will not offload scan when encrypted parquet files 
are" +
-          " detected. Defaulted to " + 
s"${PARQUET_UNEXPECTED_METADATA_FALLBACK_ENABLED.key}.")
-      .booleanConf
-      .createOptional
-
-  val PARQUET_ENCRYPTED_FALLBACK_FILE_LIMIT =
-    buildConf("spark.gluten.sql.fallbackEncryptedParquet.limit")
-      .doc(
-        "If supplied, `limit` number of files will be checked to determine 
encryption " +
-          s"and falling back to java scan. Defaulted to " +
-          s"${PARQUET_UNEXPECTED_METADATA_FALLBACK_FILE_LIMIT.key}.")
-      .intConf
-      .checkValue(_ > 0, s"must be positive.")
-      .createOptional
-
   val COLUMNAR_RANGE_ENABLED =
     buildConf("spark.gluten.sql.columnar.range")
       .doc("Enable or disable columnar range.")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to