Copilot commented on code in PR #12229:
URL: https://github.com/apache/gluten/pull/12229#discussion_r3385244000
##########
gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/validator/Validators.scala:
##########
@@ -231,41 +218,30 @@ object Validators {
}
}
- private class FallbackByTimestampNTZ(veloxConf: Option[Any]) extends
Validator {
- // Check if TimestampNTZ validation is enabled via VeloxConfig
- // Default to true (enabled) if VeloxConfig is not available or method
call fails
- private val enableValidation: Boolean = veloxConf
- .flatMap {
- config =>
- try {
- val enableMethod =
config.getClass.getMethod("enableTimestampNtzValidation")
- Some(enableMethod.invoke(config).asInstanceOf[Boolean])
- } catch {
- case _: Exception => None
- }
- }
- .getOrElse(true)
-
+ private class FallbackByTimestampNTZ(enableValidation: Boolean) extends
Validator {
override def validate(plan: SparkPlan): Validator.OutCome = {
if (!enableValidation) {
- // Validation is disabled, allow TimestampNTZ
- return pass()
- }
-
- def containsNTZ(dataType: DataType): Boolean = dataType match {
- case dt if dt.catalogString == "timestamp_ntz" => true
- case st: StructType => st.exists(f => containsNTZ(f.dataType))
- case at: ArrayType => containsNTZ(at.elementType)
- case mt: MapType => containsNTZ(mt.keyType) ||
containsNTZ(mt.valueType)
- case _ => false
- }
- val hasNTZ = plan.output.exists(a => containsNTZ(a.dataType)) ||
- plan.children.exists(_.output.exists(a => containsNTZ(a.dataType)))
- if (hasNTZ) {
- fail(s"${plan.nodeName} has TimestampNTZType in input/output schema")
- } else {
- pass()
+ // Validation is disabled, allow supported operators.
+ def containsNTZ(dataType: DataType): Boolean = dataType match {
+ case dt if dt.typeName == "timestamp_ntz" => true
+ case st: StructType => st.exists(f => containsNTZ(f.dataType))
+ case at: ArrayType => containsNTZ(at.elementType)
+ case mt: MapType => containsNTZ(mt.keyType) ||
containsNTZ(mt.valueType)
+ case _ => false
+ }
+ val isScan = plan match {
+ case _: BatchScanExec => true
+ case _: FileSourceScanExec => true
+ case p if HiveTableScanExecTransformer.isHiveTableScan(p) => true
+ case _ => false
+ }
+ val hasNTZ = plan.output.exists(a => containsNTZ(a.dataType)) ||
+ plan.children.exists(_.output.exists(a => containsNTZ(a.dataType)))
+ if (isScan || !hasNTZ) {
+ return pass()
+ }
}
+ fail(s"${plan.nodeName} has TimestampNTZType in input/output schema")
}
Review Comment:
`FallbackByTimestampNTZ` currently fails validation unconditionally when
`enableTimestampNtzValidation` is `true` (the default in `BackendSettingsApi`).
This makes *all* Spark plans fail validation (and therefore fall back) even
when the plan schema has no `timestamp_ntz`, which is a severe regression for
non-Velox backends that don't override the setting.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]