This is an automated email from the ASF dual-hosted git repository.

yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new acafe677f1 [GLUTEN-1433] [VL] Add config to disable TimestampNTZ 
validation fallback (#11720)
acafe677f1 is described below

commit acafe677f192e8b1902a0b0c5c90f884a656865c
Author: Mariam AlMesfer <[email protected]>
AuthorDate: Tue Apr 7 12:31:56 2026 +0300

    [GLUTEN-1433] [VL] Add config to disable TimestampNTZ validation fallback 
(#11720)
    
    This PR adds a config to control fallback validation for TimestampNTZType 
in the Velox backend and adds a test for localtimestamp().
    
    Currently, the validator treats TimestampNTZType as unsupported and forces 
the query to fall back to Spark. This makes it hard to develop and test 
features related to TimestampNTZ, including functions like localtimestamp().
    
    With this change, the validation rule can be temporarily disabled during 
development and testing.
    
    Related issue: #1433
    
    Co-authored-by: Mariam-Almesfer <[email protected]>
---
 .../backendsapi/velox/VeloxValidatorApi.scala      |  8 +++++
 .../org/apache/gluten/config/VeloxConfig.scala     | 11 +++++++
 .../functions/ScalarFunctionsValidateSuite.scala   | 28 +++++++++++++++++
 docs/velox-configuration.md                        |  1 +
 .../extension/columnar/validator/Validators.scala  | 36 ++++++++++++++++++++--
 5 files changed, 82 insertions(+), 2 deletions(-)

diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
index 1676e91d17..9ff33229fa 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
@@ -17,6 +17,7 @@
 package org.apache.gluten.backendsapi.velox
 
 import org.apache.gluten.backendsapi.{BackendsApiManager, ValidatorApi}
+import org.apache.gluten.config.VeloxConfig
 import org.apache.gluten.execution.ValidationResult
 import org.apache.gluten.substrait.`type`.TypeNode
 import org.apache.gluten.substrait.SubstraitContext
@@ -104,11 +105,18 @@ class VeloxValidatorApi extends ValidatorApi {
 
 object VeloxValidatorApi {
   private def isPrimitiveType(dataType: DataType): Boolean = {
+    val enableTimestampNtzValidation = 
VeloxConfig.get.enableTimestampNtzValidation
     dataType match {
       case BooleanType | ByteType | ShortType | IntegerType | LongType | 
FloatType | DoubleType |
           StringType | BinaryType | _: DecimalType | DateType | TimestampType |
           YearMonthIntervalType.DEFAULT | NullType =>
         true
+      case dt
+          if !enableTimestampNtzValidation &&
+            dt.getClass.getSimpleName == "TimestampNTZType" =>
+        // Allow TimestampNTZ when validation is disabled (for 
development/testing)
+        // Use reflection to avoid compile-time dependency on Spark 3.4+ 
TimestampNTZType
+        true
       case _ => false
     }
   }
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala 
b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
index 633979613d..8c39f02128 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
@@ -99,6 +99,8 @@ class VeloxConfig(conf: SQLConf) extends GlutenConfig(conf) {
 
   def valueStreamDynamicFilterEnabled: Boolean =
     getConf(VALUE_STREAM_DYNAMIC_FILTER_ENABLED)
+
+  def enableTimestampNtzValidation: Boolean = 
getConf(ENABLE_TIMESTAMP_NTZ_VALIDATION)
 }
 
 object VeloxConfig extends ConfigRegistry {
@@ -755,4 +757,13 @@ object VeloxConfig extends ConfigRegistry {
       .doc("Maps table field names to file field names using names, not 
indices for Parquet files.")
       .booleanConf
       .createWithDefault(true)
+
+  val ENABLE_TIMESTAMP_NTZ_VALIDATION =
+    
buildConf("spark.gluten.sql.columnar.backend.velox.enableTimestampNtzValidation")
+      .doc(
+        "Enable validation fallback for TimestampNTZ type. When true 
(default), any plan " +
+          "containing TimestampNTZ will fall back to Spark execution. Set to 
false during " +
+          "development/testing of TimestampNTZ support to allow native 
execution.")
+      .booleanConf
+      .createWithDefault(true)
 }
diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala
index fa4fc638a6..6a0516ce66 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala
@@ -1561,4 +1561,32 @@ abstract class ScalarFunctionsValidateSuite extends 
FunctionsValidateSuite {
       }
     }
   }
+
+  testWithMinSparkVersion("localtimestamp with validation enabled", "3.4") {
+    // With validation enabled (default), localtimestamp should fallback to 
Spark
+    // because it returns TimestampNTZType
+    
withSQLConf("spark.gluten.sql.columnar.backend.velox.enableTimestampNtzValidation"
 -> "true") {
+      val df = spark.sql("SELECT l_orderkey, localtimestamp() from lineitem 
limit 1")
+      // Should fallback to Spark execution due to TimestampNTZ validation
+      checkFallbackOperators(df, 1)
+      df.collect()
+    }
+  }
+
+  testWithMinSparkVersion("localtimestamp with validation disabled", "3.4") {
+    // With validation disabled, localtimestamp can use native execution
+    // This allows developers to test TimestampNTZ support
+    
withSQLConf("spark.gluten.sql.columnar.backend.velox.enableTimestampNtzValidation"
 -> "false") {
+      val df = spark.sql("SELECT l_orderkey, localtimestamp() from lineitem 
limit 1")
+      val optimizedPlan = df.queryExecution.optimizedPlan.toString()
+      assert(
+        !optimizedPlan.contains("LocalTimestamp"),
+        s"Expected LocalTimestamp to be folded to a literal, but got: 
$optimizedPlan"
+      )
+      // Should use native execution when validation is disabled
+      checkGlutenPlan[ProjectExecTransformer](df)
+      checkFallbackOperators(df, 0)
+      df.collect()
+    }
+  }
 }
diff --git a/docs/velox-configuration.md b/docs/velox-configuration.md
index 859c6356c1..b01e343c10 100644
--- a/docs/velox-configuration.md
+++ b/docs/velox-configuration.md
@@ -27,6 +27,7 @@ nav_order: 16
 | spark.gluten.sql.columnar.backend.velox.cudf.memoryResource                  
    | async             | GPU RMM memory resource.                              
                                                                                
                                                                                
                                                                                
                                                                                
              [...]
 | spark.gluten.sql.columnar.backend.velox.cudf.shuffleMaxPrefetchBytes         
    | 1028MB            | Maximum bytes to prefetch in CPU memory during GPU 
shuffle read while waitingfor GPU available.                                    
                                                                                
                                                                                
                                                                                
                 [...]
 | spark.gluten.sql.columnar.backend.velox.directorySizeGuess                   
    | 32KB              | Deprecated, rename to 
spark.gluten.sql.columnar.backend.velox.footerEstimatedSize                     
                                                                                
                                                                                
                                                                                
                                              [...]
+| spark.gluten.sql.columnar.backend.velox.enableTimestampNtzValidation         
    | true              | Enable validation fallback for TimestampNTZ type. 
When true (default), any plan containing TimestampNTZ will fall back to Spark 
execution. Set to false during development/testing of TimestampNTZ support to 
allow native execution.                                                         
                                                                                
                      [...]
 | spark.gluten.sql.columnar.backend.velox.fileHandleCacheEnabled               
    | false             | Disables caching if false. File handle cache should 
be disabled if files are mutable, i.e. file content may change while file path 
stays the same.                                                                 
                                                                                
                                                                                
                 [...]
 | spark.gluten.sql.columnar.backend.velox.filePreloadThreshold                 
    | 1MB               | Set the file preload threshold for velox file scan, 
refer to Velox's file-preload-threshold                                         
                                                                                
                                                                                
                                                                                
                [...]
 | spark.gluten.sql.columnar.backend.velox.floatingPointMode                    
    | loose             | Config used to control the tolerance of floating 
point operations alignment with Spark. When the mode is set to strict, flushing 
is disabled for sum(float/double)and avg(float/double). When set to loose, 
flushing will be enabled.                                                       
                                                                                
                        [...]
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/validator/Validators.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/validator/Validators.scala
index 7148214365..faa0d9e2e5 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/validator/Validators.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/validator/Validators.scala
@@ -40,6 +40,19 @@ object Validators {
     private val conf = GlutenConfig.get
     private val settings = BackendsApiManager.getSettings
 
+    // Get VeloxConfig if available
+    private val veloxConf: Option[Any] = {
+      try {
+        // scalastyle:off classforname
+        val veloxConfigClass = 
Class.forName("org.apache.gluten.config.VeloxConfig")
+        // scalastyle:on classforname
+        val getMethod = veloxConfigClass.getMethod("get")
+        Some(getMethod.invoke(null))
+      } catch {
+        case _: Exception => None
+      }
+    }
+
     /** Fails validation if a plan node was already tagged with 
TRANSFORM_UNSUPPORTED. */
     def fallbackByHint(): Validator.Builder = {
       builder.add(FallbackByHint)
@@ -81,7 +94,7 @@ object Validators {
 
     /** Fails validation if a plan node's input or output schema contains 
TimestampNTZType. */
     def fallbackByTimestampNTZ(): Validator.Builder = {
-      builder.add(new FallbackByTimestampNTZ())
+      builder.add(new FallbackByTimestampNTZ(veloxConf))
     }
 
     /**
@@ -218,8 +231,27 @@ object Validators {
     }
   }
 
-  private class FallbackByTimestampNTZ() extends Validator {
+  private class FallbackByTimestampNTZ(veloxConf: Option[Any]) extends 
Validator {
+    // Check if TimestampNTZ validation is enabled via VeloxConfig
+    // Default to true (enabled) if VeloxConfig is not available or method 
call fails
+    private val enableValidation: Boolean = veloxConf
+      .flatMap {
+        config =>
+          try {
+            val enableMethod = 
config.getClass.getMethod("enableTimestampNtzValidation")
+            Some(enableMethod.invoke(config).asInstanceOf[Boolean])
+          } catch {
+            case _: Exception => None
+          }
+      }
+      .getOrElse(true)
+
     override def validate(plan: SparkPlan): Validator.OutCome = {
+      if (!enableValidation) {
+        // Validation is disabled, allow TimestampNTZ
+        return pass()
+      }
+
       def containsNTZ(dataType: DataType): Boolean = dataType match {
         case dt if dt.catalogString == "timestamp_ntz" => true
         case st: StructType => st.exists(f => containsNTZ(f.dataType))


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to