This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 67c1667366 [GLUTEN-8742][VL] Improve the cast validation on native
side (#8743)
67c1667366 is described below
commit 67c166736628535824b8e0ce2785bfd420fcf0c1
Author: Arnav Balyan <[email protected]>
AuthorDate: Wed Mar 5 13:18:35 2025 +0530
[GLUTEN-8742][VL] Improve the cast validation on native side (#8743)
---
.../substrait/SubstraitToVeloxPlanValidator.cc | 88 +++++++++++++---------
.../substrait/SubstraitToVeloxPlanValidator.h | 2 +
2 files changed, 53 insertions(+), 37 deletions(-)
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
index 0b92a76148..7ca788a58d 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -240,59 +240,73 @@ bool
SubstraitToVeloxPlanValidator::validateScalarFunction(
return true;
}
-bool SubstraitToVeloxPlanValidator::validateCast(
- const ::substrait::Expression::Cast& castExpr,
- const RowTypePtr& inputType) {
- if (!validateExpression(castExpr.input(), inputType)) {
+bool SubstraitToVeloxPlanValidator::isAllowedCast(const TypePtr& fromType,
const TypePtr& toType) {
+ // Currently cast is not allowed for various categories, code has a bunch of
rules
+ // which define the cast categories and if we should offload to velox.
Currently
+ // the following categories are denied.
+ //
+ // 1. from/to isIntervalYearMonth is not allowed.
+ // 2. Date to most categories except few supported types is not allowed.
+ // 3. Timestamp to most categories except few supported types is not allowed.
+ // 4. Certain complex types are not allowed.
+
+ TypeKind fromKind = fromType->kind();
+ TypeKind toKind = toType->kind();
+
+ static const std::unordered_set<TypeKind> complexTypeList = {
+ TypeKind::ARRAY, TypeKind::MAP, TypeKind::ROW, TypeKind::VARBINARY};
+
+ // Don't support isIntervalYearMonth.
+ if (fromType->isIntervalYearMonth() || toType->isIntervalYearMonth()) {
+ LOG_VALIDATION_MSG("Casting involving INTERVAL_YEAR_MONTH is not
supported.");
return false;
}
- const auto& toType = SubstraitParser::parseType(castExpr.type());
- core::TypedExprPtr input = exprConverter_->toVeloxExpr(castExpr.input(),
inputType);
+ // Limited support for DATE to X.
+ if (fromType->isDate() && toKind != TypeKind::TIMESTAMP && toKind !=
TypeKind::VARCHAR) {
+ LOG_VALIDATION_MSG("Casting from DATE to " + toType->toString() + " is not
supported.");
+ return false;
+ }
- // Only support cast from date to timestamp
- if (toType->kind() == TypeKind::TIMESTAMP && !input->type()->isDate()) {
+ // Limited support for Timestamp to X.
+ if (fromKind == TypeKind::TIMESTAMP && !(toType->isDate() || toKind ==
TypeKind::VARCHAR)) {
LOG_VALIDATION_MSG(
- "Casting from " + input->type()->toString() + " to " +
toType->toString() + " is not supported.");
+ "Casting from TIMESTAMP to " + toType->toString() + " is not supported
or has incorrect result.");
return false;
}
- if (toType->isIntervalYearMonth()) {
- LOG_VALIDATION_MSG("Casting to " + toType->toString() + " is not
supported.");
+ // Limited support for X to Timestamp.
+ if (toKind == TypeKind::TIMESTAMP && !fromType->isDate()) {
+ LOG_VALIDATION_MSG("Casting from " + fromType->toString() + " to TIMESTAMP
is not supported.");
return false;
}
- // Casting from some types is not supported. See CastExpr::applyPeeled.
- if (input->type()->isDate()) {
- // Only support cast date to varchar & timestamp
- if (toType->kind() != TypeKind::VARCHAR && toType->kind() !=
TypeKind::TIMESTAMP) {
- LOG_VALIDATION_MSG("Casting from DATE to " + toType->toString() + " is
not supported.");
- return false;
- }
- } else if (input->type()->isIntervalYearMonth()) {
- LOG_VALIDATION_MSG("Casting from INTERVAL_YEAR_MONTH is not supported.");
+ // Limited support for Complex types.
+ if (complexTypeList.find(fromKind) != complexTypeList.end()) {
+ LOG_VALIDATION_MSG("Casting from " + fromType->toString() + " is not
currently supported.");
return false;
}
- switch (input->type()->kind()) {
- case TypeKind::ARRAY:
- case TypeKind::MAP:
- case TypeKind::ROW:
- case TypeKind::VARBINARY:
- LOG_VALIDATION_MSG("Invalid input type in casting:
ARRAY/MAP/ROW/VARBINARY.");
- return false;
- case TypeKind::TIMESTAMP:
- // Only support casting timestamp to date or varchar.
- if (!toType->isDate() && toType->kind() != TypeKind::VARCHAR) {
- LOG_VALIDATION_MSG(
- "Casting from TIMESTAMP to " + toType->toString() + " is not
supported or has incorrect result.");
- return false;
- }
- default: {
- }
- }
+
return true;
}
+bool SubstraitToVeloxPlanValidator::validateCast(
+ const ::substrait::Expression::Cast& castExpr,
+ const RowTypePtr& inputType) {
+ if (!validateExpression(castExpr.input(), inputType)) {
+ return false;
+ }
+
+ const auto& toType = SubstraitParser::parseType(castExpr.type());
+ core::TypedExprPtr input = exprConverter_->toVeloxExpr(castExpr.input(),
inputType);
+
+ if (SubstraitToVeloxPlanValidator::isAllowedCast(input->type(), toType)) {
+ return true;
+ }
+
+ return false;
+}
+
bool SubstraitToVeloxPlanValidator::validateIfThen(
const ::substrait::Expression_IfThen& ifThen,
const RowTypePtr& inputType) {
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h
index 100c4dc95d..881a0e5148 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h
@@ -153,6 +153,8 @@ class SubstraitToVeloxPlanValidator {
void logValidateMsg(const std::string& log) {
validateLog_.emplace_back(log);
}
+
+ bool isAllowedCast(const TypePtr& fromType, const TypePtr& toType);
};
} // namespace gluten
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]