This is an automated email from the ASF dual-hosted git repository.

philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 67c1667366 [GLUTEN-8742][VL] Improve the cast validation on native 
side (#8743)
67c1667366 is described below

commit 67c166736628535824b8e0ce2785bfd420fcf0c1
Author: Arnav Balyan <[email protected]>
AuthorDate: Wed Mar 5 13:18:35 2025 +0530

    [GLUTEN-8742][VL] Improve the cast validation on native side (#8743)
---
 .../substrait/SubstraitToVeloxPlanValidator.cc     | 88 +++++++++++++---------
 .../substrait/SubstraitToVeloxPlanValidator.h      |  2 +
 2 files changed, 53 insertions(+), 37 deletions(-)

diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc 
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
index 0b92a76148..7ca788a58d 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -240,59 +240,73 @@ bool 
SubstraitToVeloxPlanValidator::validateScalarFunction(
   return true;
 }
 
-bool SubstraitToVeloxPlanValidator::validateCast(
-    const ::substrait::Expression::Cast& castExpr,
-    const RowTypePtr& inputType) {
-  if (!validateExpression(castExpr.input(), inputType)) {
+bool SubstraitToVeloxPlanValidator::isAllowedCast(const TypePtr& fromType, 
const TypePtr& toType) {
+  // Currently cast is not allowed for various categories, code has a bunch of 
rules
+  // which define the cast categories and if we should offload to velox. 
Currently
+  // the following categories are denied.
+  //
+  // 1. from/to isIntervalYearMonth is not allowed.
+  // 2. Date to most categories except few supported types is not allowed.
+  // 3. Timestamp to most categories except few supported types is not allowed.
+  // 4. Certain complex types are not allowed.
+
+  TypeKind fromKind = fromType->kind();
+  TypeKind toKind = toType->kind();
+
+  static const std::unordered_set<TypeKind> complexTypeList = {
+      TypeKind::ARRAY, TypeKind::MAP, TypeKind::ROW, TypeKind::VARBINARY};
+
+  // Don't support isIntervalYearMonth.
+  if (fromType->isIntervalYearMonth() || toType->isIntervalYearMonth()) {
+    LOG_VALIDATION_MSG("Casting involving INTERVAL_YEAR_MONTH is not 
supported.");
     return false;
   }
 
-  const auto& toType = SubstraitParser::parseType(castExpr.type());
-  core::TypedExprPtr input = exprConverter_->toVeloxExpr(castExpr.input(), 
inputType);
+  // Limited support for DATE to X.
+  if (fromType->isDate() && toKind != TypeKind::TIMESTAMP && toKind != 
TypeKind::VARCHAR) {
+    LOG_VALIDATION_MSG("Casting from DATE to " + toType->toString() + " is not 
supported.");
+    return false;
+  }
 
-  // Only support cast from date to timestamp
-  if (toType->kind() == TypeKind::TIMESTAMP && !input->type()->isDate()) {
+  // Limited support for Timestamp to X.
+  if (fromKind == TypeKind::TIMESTAMP && !(toType->isDate() || toKind == 
TypeKind::VARCHAR)) {
     LOG_VALIDATION_MSG(
-        "Casting from " + input->type()->toString() + " to " + 
toType->toString() + " is not supported.");
+        "Casting from TIMESTAMP to " + toType->toString() + " is not supported 
or has incorrect result.");
     return false;
   }
 
-  if (toType->isIntervalYearMonth()) {
-    LOG_VALIDATION_MSG("Casting to " + toType->toString() + " is not 
supported.");
+  // Limited support for X to Timestamp.
+  if (toKind == TypeKind::TIMESTAMP && !fromType->isDate()) {
+    LOG_VALIDATION_MSG("Casting from " + fromType->toString() + " to TIMESTAMP 
is not supported.");
     return false;
   }
 
-  // Casting from some types is not supported. See CastExpr::applyPeeled.
-  if (input->type()->isDate()) {
-    // Only support cast date to varchar & timestamp
-    if (toType->kind() != TypeKind::VARCHAR && toType->kind() != 
TypeKind::TIMESTAMP) {
-      LOG_VALIDATION_MSG("Casting from DATE to " + toType->toString() + " is 
not supported.");
-      return false;
-    }
-  } else if (input->type()->isIntervalYearMonth()) {
-    LOG_VALIDATION_MSG("Casting from INTERVAL_YEAR_MONTH is not supported.");
+  // Limited support for Complex types.
+  if (complexTypeList.find(fromKind) != complexTypeList.end()) {
+    LOG_VALIDATION_MSG("Casting from " + fromType->toString() + " is not 
currently supported.");
     return false;
   }
-  switch (input->type()->kind()) {
-    case TypeKind::ARRAY:
-    case TypeKind::MAP:
-    case TypeKind::ROW:
-    case TypeKind::VARBINARY:
-      LOG_VALIDATION_MSG("Invalid input type in casting: 
ARRAY/MAP/ROW/VARBINARY.");
-      return false;
-    case TypeKind::TIMESTAMP:
-      // Only support casting timestamp to date or varchar.
-      if (!toType->isDate() && toType->kind() != TypeKind::VARCHAR) {
-        LOG_VALIDATION_MSG(
-            "Casting from TIMESTAMP to " + toType->toString() + " is not 
supported or has incorrect result.");
-        return false;
-      }
-    default: {
-    }
-  }
+
   return true;
 }
 
+bool SubstraitToVeloxPlanValidator::validateCast(
+    const ::substrait::Expression::Cast& castExpr,
+    const RowTypePtr& inputType) {
+  if (!validateExpression(castExpr.input(), inputType)) {
+    return false;
+  }
+
+  const auto& toType = SubstraitParser::parseType(castExpr.type());
+  core::TypedExprPtr input = exprConverter_->toVeloxExpr(castExpr.input(), 
inputType);
+
+  if (SubstraitToVeloxPlanValidator::isAllowedCast(input->type(), toType)) {
+    return true;
+  }
+
+  return false;
+}
+
 bool SubstraitToVeloxPlanValidator::validateIfThen(
     const ::substrait::Expression_IfThen& ifThen,
     const RowTypePtr& inputType) {
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h 
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h
index 100c4dc95d..881a0e5148 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h
@@ -153,6 +153,8 @@ class SubstraitToVeloxPlanValidator {
   void logValidateMsg(const std::string& log) {
     validateLog_.emplace_back(log);
   }
+
+  bool isAllowedCast(const TypePtr& fromType, const TypePtr& toType);
 };
 
 } // namespace gluten


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to