This is an automated email from the ASF dual-hosted git repository.
yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git
The following commit(s) were added to refs/heads/main by this push:
new acafe677f1 [GLUTEN-1433] [VL] Add config to disable TimestampNTZ
validation fallback (#11720)
acafe677f1 is described below
commit acafe677f192e8b1902a0b0c5c90f884a656865c
Author: Mariam AlMesfer <[email protected]>
AuthorDate: Tue Apr 7 12:31:56 2026 +0300
[GLUTEN-1433] [VL] Add config to disable TimestampNTZ validation fallback
(#11720)
This PR adds a config to control fallback validation for TimestampNTZType
in the Velox backend and adds a test for localtimestamp().
Currently, the validator treats TimestampNTZType as unsupported and forces
the query to fall back to Spark. This makes it hard to develop and test
features related to TimestampNTZ, including functions like localtimestamp().
With this change, the validation rule can be temporarily disabled during
development and testing.
Related issue: #1433
Co-authored-by: Mariam-Almesfer <[email protected]>
---
.../backendsapi/velox/VeloxValidatorApi.scala | 8 +++++
.../org/apache/gluten/config/VeloxConfig.scala | 11 +++++++
.../functions/ScalarFunctionsValidateSuite.scala | 28 +++++++++++++++++
docs/velox-configuration.md | 1 +
.../extension/columnar/validator/Validators.scala | 36 ++++++++++++++++++++--
5 files changed, 82 insertions(+), 2 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
index 1676e91d17..9ff33229fa 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxValidatorApi.scala
@@ -17,6 +17,7 @@
package org.apache.gluten.backendsapi.velox
import org.apache.gluten.backendsapi.{BackendsApiManager, ValidatorApi}
+import org.apache.gluten.config.VeloxConfig
import org.apache.gluten.execution.ValidationResult
import org.apache.gluten.substrait.`type`.TypeNode
import org.apache.gluten.substrait.SubstraitContext
@@ -104,11 +105,18 @@ class VeloxValidatorApi extends ValidatorApi {
object VeloxValidatorApi {
private def isPrimitiveType(dataType: DataType): Boolean = {
+ val enableTimestampNtzValidation =
VeloxConfig.get.enableTimestampNtzValidation
dataType match {
case BooleanType | ByteType | ShortType | IntegerType | LongType |
FloatType | DoubleType |
StringType | BinaryType | _: DecimalType | DateType | TimestampType |
YearMonthIntervalType.DEFAULT | NullType =>
true
+ case dt
+ if !enableTimestampNtzValidation &&
+ dt.getClass.getSimpleName == "TimestampNTZType" =>
+ // Allow TimestampNTZ when validation is disabled (for
development/testing)
+ // Use reflection to avoid compile-time dependency on Spark 3.4+
TimestampNTZType
+ true
case _ => false
}
}
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
index 633979613d..8c39f02128 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/config/VeloxConfig.scala
@@ -99,6 +99,8 @@ class VeloxConfig(conf: SQLConf) extends GlutenConfig(conf) {
def valueStreamDynamicFilterEnabled: Boolean =
getConf(VALUE_STREAM_DYNAMIC_FILTER_ENABLED)
+
+ def enableTimestampNtzValidation: Boolean =
getConf(ENABLE_TIMESTAMP_NTZ_VALIDATION)
}
object VeloxConfig extends ConfigRegistry {
@@ -755,4 +757,13 @@ object VeloxConfig extends ConfigRegistry {
.doc("Maps table field names to file field names using names, not
indices for Parquet files.")
.booleanConf
.createWithDefault(true)
+
+ val ENABLE_TIMESTAMP_NTZ_VALIDATION =
+
buildConf("spark.gluten.sql.columnar.backend.velox.enableTimestampNtzValidation")
+ .doc(
+ "Enable validation fallback for TimestampNTZ type. When true
(default), any plan " +
+ "containing TimestampNTZ will fall back to Spark execution. Set to
false during " +
+ "development/testing of TimestampNTZ support to allow native
execution.")
+ .booleanConf
+ .createWithDefault(true)
}
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala
index fa4fc638a6..6a0516ce66 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/functions/ScalarFunctionsValidateSuite.scala
@@ -1561,4 +1561,32 @@ abstract class ScalarFunctionsValidateSuite extends
FunctionsValidateSuite {
}
}
}
+
+ testWithMinSparkVersion("localtimestamp with validation enabled", "3.4") {
+ // With validation enabled (default), localtimestamp should fallback to
Spark
+ // because it returns TimestampNTZType
+
withSQLConf("spark.gluten.sql.columnar.backend.velox.enableTimestampNtzValidation"
-> "true") {
+ val df = spark.sql("SELECT l_orderkey, localtimestamp() from lineitem
limit 1")
+ // Should fallback to Spark execution due to TimestampNTZ validation
+ checkFallbackOperators(df, 1)
+ df.collect()
+ }
+ }
+
+ testWithMinSparkVersion("localtimestamp with validation disabled", "3.4") {
+ // With validation disabled, localtimestamp can use native execution
+ // This allows developers to test TimestampNTZ support
+
withSQLConf("spark.gluten.sql.columnar.backend.velox.enableTimestampNtzValidation"
-> "false") {
+ val df = spark.sql("SELECT l_orderkey, localtimestamp() from lineitem
limit 1")
+ val optimizedPlan = df.queryExecution.optimizedPlan.toString()
+ assert(
+ !optimizedPlan.contains("LocalTimestamp"),
+ s"Expected LocalTimestamp to be folded to a literal, but got:
$optimizedPlan"
+ )
+ // Should use native execution when validation is disabled
+ checkGlutenPlan[ProjectExecTransformer](df)
+ checkFallbackOperators(df, 0)
+ df.collect()
+ }
+ }
}
diff --git a/docs/velox-configuration.md b/docs/velox-configuration.md
index 859c6356c1..b01e343c10 100644
--- a/docs/velox-configuration.md
+++ b/docs/velox-configuration.md
@@ -27,6 +27,7 @@ nav_order: 16
| spark.gluten.sql.columnar.backend.velox.cudf.memoryResource
| async | GPU RMM memory resource.
[...]
| spark.gluten.sql.columnar.backend.velox.cudf.shuffleMaxPrefetchBytes
| 1028MB | Maximum bytes to prefetch in CPU memory during GPU
shuffle read while waitingfor GPU available.
[...]
| spark.gluten.sql.columnar.backend.velox.directorySizeGuess
| 32KB | Deprecated, rename to
spark.gluten.sql.columnar.backend.velox.footerEstimatedSize
[...]
+| spark.gluten.sql.columnar.backend.velox.enableTimestampNtzValidation
| true | Enable validation fallback for TimestampNTZ type.
When true (default), any plan containing TimestampNTZ will fall back to Spark
execution. Set to false during development/testing of TimestampNTZ support to
allow native execution.
[...]
| spark.gluten.sql.columnar.backend.velox.fileHandleCacheEnabled
| false | Disables caching if false. File handle cache should
be disabled if files are mutable, i.e. file content may change while file path
stays the same.
[...]
| spark.gluten.sql.columnar.backend.velox.filePreloadThreshold
| 1MB | Set the file preload threshold for velox file scan,
refer to Velox's file-preload-threshold
[...]
| spark.gluten.sql.columnar.backend.velox.floatingPointMode
| loose | Config used to control the tolerance of floating
point operations alignment with Spark. When the mode is set to strict, flushing
is disabled for sum(float/double)and avg(float/double). When set to loose,
flushing will be enabled.
[...]
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/validator/Validators.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/validator/Validators.scala
index 7148214365..faa0d9e2e5 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/validator/Validators.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/validator/Validators.scala
@@ -40,6 +40,19 @@ object Validators {
private val conf = GlutenConfig.get
private val settings = BackendsApiManager.getSettings
+ // Get VeloxConfig if available
+ private val veloxConf: Option[Any] = {
+ try {
+ // scalastyle:off classforname
+ val veloxConfigClass =
Class.forName("org.apache.gluten.config.VeloxConfig")
+ // scalastyle:on classforname
+ val getMethod = veloxConfigClass.getMethod("get")
+ Some(getMethod.invoke(null))
+ } catch {
+ case _: Exception => None
+ }
+ }
+
/** Fails validation if a plan node was already tagged with
TRANSFORM_UNSUPPORTED. */
def fallbackByHint(): Validator.Builder = {
builder.add(FallbackByHint)
@@ -81,7 +94,7 @@ object Validators {
/** Fails validation if a plan node's input or output schema contains
TimestampNTZType. */
def fallbackByTimestampNTZ(): Validator.Builder = {
- builder.add(new FallbackByTimestampNTZ())
+ builder.add(new FallbackByTimestampNTZ(veloxConf))
}
/**
@@ -218,8 +231,27 @@ object Validators {
}
}
- private class FallbackByTimestampNTZ() extends Validator {
+ private class FallbackByTimestampNTZ(veloxConf: Option[Any]) extends
Validator {
+ // Check if TimestampNTZ validation is enabled via VeloxConfig
+ // Default to true (enabled) if VeloxConfig is not available or method
call fails
+ private val enableValidation: Boolean = veloxConf
+ .flatMap {
+ config =>
+ try {
+ val enableMethod =
config.getClass.getMethod("enableTimestampNtzValidation")
+ Some(enableMethod.invoke(config).asInstanceOf[Boolean])
+ } catch {
+ case _: Exception => None
+ }
+ }
+ .getOrElse(true)
+
override def validate(plan: SparkPlan): Validator.OutCome = {
+ if (!enableValidation) {
+ // Validation is disabled, allow TimestampNTZ
+ return pass()
+ }
+
def containsNTZ(dataType: DataType): Boolean = dataType match {
case dt if dt.catalogString == "timestamp_ntz" => true
case st: StructType => st.exists(f => containsNTZ(f.dataType))
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]