This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 1cfee63c84 [VL] Improve native plan validation code (#9092)
1cfee63c84 is described below
commit 1cfee63c8479c09f9d9d04dc00adf60443b8a4a0
Author: PHILO-HE <[email protected]>
AuthorDate: Wed Mar 26 17:31:36 2025 +0800
[VL] Improve native plan validation code (#9092)
---
cpp/core/jni/JniCommon.h | 2 +-
cpp/velox/benchmarks/PlanValidatorUtil.cc | 6 +--
cpp/velox/jni/VeloxJniWrapper.cc | 45 +++++++++-------------
.../substrait/SubstraitToVeloxPlanValidator.cc | 22 +++++------
.../substrait/SubstraitToVeloxPlanValidator.h | 19 +++++----
.../tests/Substrait2VeloxPlanValidatorTest.cc | 7 +---
6 files changed, 43 insertions(+), 58 deletions(-)
diff --git a/cpp/core/jni/JniCommon.h b/cpp/core/jni/JniCommon.h
index dfd0f8c094..0436c986a1 100644
--- a/cpp/core/jni/JniCommon.h
+++ b/cpp/core/jni/JniCommon.h
@@ -70,7 +70,7 @@ static inline jclass createGlobalClassReference(JNIEnv* env,
const char* classNa
static inline jclass createGlobalClassReferenceOrError(JNIEnv* env, const
char* className) {
jclass globalClass = createGlobalClassReference(env, className);
if (globalClass == nullptr) {
- std::string errorMessage = "Unable to CreateGlobalClassReferenceOrError
for" + std::string(className);
+ std::string errorMessage = "Unable to create global class reference for"
+ std::string(className);
throw gluten::GlutenException(errorMessage);
}
return globalClass;
diff --git a/cpp/velox/benchmarks/PlanValidatorUtil.cc
b/cpp/velox/benchmarks/PlanValidatorUtil.cc
index 46f2733f29..20d02db6c4 100644
--- a/cpp/velox/benchmarks/PlanValidatorUtil.cc
+++ b/cpp/velox/benchmarks/PlanValidatorUtil.cc
@@ -44,15 +44,11 @@ int main(int argc, char** argv) {
std::unordered_map<std::string, std::string> conf;
conf.insert({kDebugModeEnabled, "true"});
initVeloxBackend(conf);
- std::unordered_map<std::string, std::string>
configs{{core::QueryConfig::kSparkPartitionId, "0"}};
- auto queryCtx = core::QueryCtx::create(nullptr, core::QueryConfig(configs));
auto pool = defaultLeafVeloxMemoryPool().get();
- core::ExecCtx execCtx(pool, queryCtx.get());
+ SubstraitToVeloxPlanValidator planValidator(pool);
::substrait::Plan subPlan;
parseProtobuf(reinterpret_cast<uint8_t*>(plan.data()), plan.size(),
&subPlan);
-
- SubstraitToVeloxPlanValidator planValidator(pool, &execCtx);
try {
if (!planValidator.validate(subPlan)) {
auto reason = planValidator.getValidateLog();
diff --git a/cpp/velox/jni/VeloxJniWrapper.cc b/cpp/velox/jni/VeloxJniWrapper.cc
index 879ade9dc7..4f2d1c1ab7 100644
--- a/cpp/velox/jni/VeloxJniWrapper.cc
+++ b/cpp/velox/jni/VeloxJniWrapper.cc
@@ -42,6 +42,9 @@ using namespace gluten;
using namespace facebook;
namespace {
+jclass infoCls;
+jmethodID infoClsInitMethod;
+
jclass blockStripesClass;
jmethodID blockStripesConstructor;
} // namespace
@@ -61,6 +64,9 @@ jint JNI_OnLoad(JavaVM* vm, void*) {
initVeloxJniFileSystem(env);
initVeloxJniUDF(env);
+ infoCls = createGlobalClassReferenceOrError(env,
"Lorg/apache/gluten/validate/NativePlanValidationInfo;");
+ infoClsInitMethod = env->GetMethodID(infoCls, "<init>",
"(ILjava/lang/String;)V");
+
blockStripesClass =
createGlobalClassReferenceOrError(env,
"Lorg/apache/spark/sql/execution/datasources/BlockStripes;");
blockStripesConstructor = env->GetMethodID(blockStripesClass, "<init>",
"(J[J[II[B)V");
@@ -124,14 +130,14 @@
Java_org_apache_gluten_vectorized_PlanEvaluatorJniWrapper_nativeValidateWithFail
jobject wrapper,
jbyteArray planArray) {
JNI_METHOD_START
- auto ctx = getRuntime(env, wrapper);
- auto safeArray = getByteArrayElementsSafe(env, planArray);
- auto planData = safeArray.elems();
- auto planSize = env->GetArrayLength(planArray);
- auto runtime = dynamic_cast<VeloxRuntime*>(ctx);
+ const auto ctx = getRuntime(env, wrapper);
+ const auto safeArray = getByteArrayElementsSafe(env, planArray);
+ const auto planData = safeArray.elems();
+ const auto planSize = env->GetArrayLength(planArray);
+ const auto runtime = dynamic_cast<VeloxRuntime*>(ctx);
if (runtime->debugModeEnabled()) {
try {
- auto jsonPlan = substraitFromPbToJson("Plan", planData, planSize,
std::nullopt);
+ const auto jsonPlan = substraitFromPbToJson("Plan", planData, planSize,
std::nullopt);
LOG(INFO) << std::string(50, '#') << " received substrait::Plan: for
validation";
LOG(INFO) << jsonPlan;
} catch (const std::exception& e) {
@@ -139,37 +145,22 @@
Java_org_apache_gluten_vectorized_PlanEvaluatorJniWrapper_nativeValidateWithFail
}
}
+ const auto pool = defaultLeafVeloxMemoryPool().get();
+ SubstraitToVeloxPlanValidator planValidator(pool);
::substrait::Plan subPlan;
parseProtobuf(planData, planSize, &subPlan);
- // A query context with dummy configs. Used for function validation.
- std::unordered_map<std::string, std::string> configs{
- {velox::core::QueryConfig::kSparkPartitionId, "0"},
{velox::core::QueryConfig::kSessionTimezone, "GMT"}};
- auto queryCtx = velox::core::QueryCtx::create(nullptr,
velox::core::QueryConfig(configs));
- auto pool = defaultLeafVeloxMemoryPool().get();
- // An execution context used for function validation.
- velox::core::ExecCtx execCtx(pool, queryCtx.get());
-
- SubstraitToVeloxPlanValidator planValidator(pool, &execCtx);
- jclass infoCls =
env->FindClass("Lorg/apache/gluten/validate/NativePlanValidationInfo;");
- if (infoCls == nullptr) {
- std::string errorMessage = "Unable to CreateGlobalClassReferenceOrError
for NativePlanValidationInfo";
- throw GlutenException(errorMessage);
- }
- jmethodID method = env->GetMethodID(infoCls, "<init>",
"(ILjava/lang/String;)V");
try {
- auto isSupported = planValidator.validate(subPlan);
- auto logs = planValidator.getValidateLog();
+ const auto isSupported = planValidator.validate(subPlan);
+ const auto logs = planValidator.getValidateLog();
std::string concatLog;
for (int i = 0; i < logs.size(); i++) {
concatLog += logs[i] + "@";
}
- return env->NewObject(infoCls, method, isSupported,
env->NewStringUTF(concatLog.c_str()));
+ return env->NewObject(infoCls, infoClsInitMethod, isSupported,
env->NewStringUTF(concatLog.c_str()));
} catch (std::invalid_argument& e) {
LOG(INFO) << "Failed to validate substrait plan because " << e.what();
- // return false;
- auto isSupported = false;
- return env->NewObject(infoCls, method, isSupported, env->NewStringUTF(""));
+ return env->NewObject(infoCls, infoClsInitMethod, false,
env->NewStringUTF(""));
}
JNI_METHOD_END(nullptr)
}
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
index 71524472ec..15b6949c78 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -553,7 +553,7 @@ bool SubstraitToVeloxPlanValidator::validate(const
::substrait::ExpandRel& expan
if (rowType) {
// Try to compile the expressions. If there is any unregistered
// function or mismatched type, exception will be thrown.
- exec::ExprSet exprSet(std::move(expressions), execCtx_);
+ exec::ExprSet exprSet(std::move(expressions), execCtx_.get());
}
} else {
LOG_VALIDATION_MSG("Only SwitchingField is supported in ExpandRel.");
@@ -667,7 +667,7 @@ bool SubstraitToVeloxPlanValidator::validate(const
::substrait::WindowRel& windo
}
// Try to compile the expressions. If there is any unregistred funciton or
// mismatched type, exception will be thrown.
- exec::ExprSet exprSet(std::move(expressions), execCtx_);
+ exec::ExprSet exprSet(std::move(expressions), execCtx_.get());
// Validate Sort expression
const auto& sorts = windowRel.sorts();
@@ -690,7 +690,7 @@ bool SubstraitToVeloxPlanValidator::validate(const
::substrait::WindowRel& windo
LOG_VALIDATION_MSG("in windowRel, the sorting key in Sort Operator
only support field.");
return false;
}
- exec::ExprSet exprSet1({std::move(expression)}, execCtx_);
+ exec::ExprSet exprSet1({std::move(expression)}, execCtx_.get());
}
}
@@ -738,7 +738,7 @@ bool SubstraitToVeloxPlanValidator::validate(const
::substrait::WindowGroupLimit
}
// Try to compile the expressions. If there is any unregistered function or
// mismatched type, exception will be thrown.
- exec::ExprSet exprSet(std::move(expressions), execCtx_);
+ exec::ExprSet exprSet(std::move(expressions), execCtx_.get());
// Validate Sort expression
const auto& sorts = windowGroupLimitRel.sorts();
for (const auto& sort : sorts) {
@@ -760,7 +760,7 @@ bool SubstraitToVeloxPlanValidator::validate(const
::substrait::WindowGroupLimit
LOG_VALIDATION_MSG("in windowGroupLimitRel, the sorting key in Sort
Operator only support field.");
return false;
}
- exec::ExprSet exprSet1({std::move(expression)}, execCtx_);
+ exec::ExprSet exprSet1({std::move(expression)}, execCtx_.get());
}
}
@@ -862,7 +862,7 @@ bool SubstraitToVeloxPlanValidator::validate(const
::substrait::SortRel& sortRel
LOG_VALIDATION_MSG("in SortRel, the sorting key in Sort Operator only
support field.");
return false;
}
- exec::ExprSet exprSet({std::move(expression)}, execCtx_);
+ exec::ExprSet exprSet({std::move(expression)}, execCtx_.get());
}
}
@@ -909,7 +909,7 @@ bool SubstraitToVeloxPlanValidator::validate(const
::substrait::ProjectRel& proj
}
// Try to compile the expressions. If there is any unregistered function or
// mismatched type, exception will be thrown.
- exec::ExprSet exprSet(std::move(expressions), execCtx_);
+ exec::ExprSet exprSet(std::move(expressions), execCtx_.get());
return true;
}
@@ -948,7 +948,7 @@ bool SubstraitToVeloxPlanValidator::validate(const
::substrait::FilterRel& filte
expressions.emplace_back(exprConverter_->toVeloxExpr(filterRel.condition(),
rowType));
// Try to compile the expressions. If there is any unregistered function
// or mismatched type, exception will be thrown.
- exec::ExprSet exprSet(std::move(expressions), execCtx_);
+ exec::ExprSet exprSet(std::move(expressions), execCtx_.get());
return true;
}
@@ -1022,7 +1022,7 @@ bool SubstraitToVeloxPlanValidator::validate(const
::substrait::JoinRel& joinRel
if (joinRel.has_post_join_filter()) {
auto expression = exprConverter_->toVeloxExpr(joinRel.post_join_filter(),
rowType);
- exec::ExprSet exprSet({std::move(expression)}, execCtx_);
+ exec::ExprSet exprSet({std::move(expression)}, execCtx_.get());
}
return true;
}
@@ -1078,7 +1078,7 @@ bool SubstraitToVeloxPlanValidator::validate(const
::substrait::CrossRel& crossR
if (crossRel.has_expression()) {
auto expression = exprConverter_->toVeloxExpr(crossRel.expression(),
rowType);
- exec::ExprSet exprSet({std::move(expression)}, execCtx_);
+ exec::ExprSet exprSet({std::move(expression)}, execCtx_.get());
}
return true;
@@ -1304,7 +1304,7 @@ bool SubstraitToVeloxPlanValidator::validate(const
::substrait::ReadRel& readRel
expressions.emplace_back(exprConverter_->toVeloxExpr(readRel.filter(),
rowType));
// Try to compile the expressions. If there is any unregistered function
// or mismatched type, exception will be thrown.
- exec::ExprSet exprSet(std::move(expressions), execCtx_);
+ exec::ExprSet exprSet(std::move(expressions), execCtx_.get());
}
return true;
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h
index 881a0e5148..28d82f9cce 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h
@@ -20,14 +20,21 @@
#include "SubstraitToVeloxPlan.h"
#include "velox/core/QueryCtx.h"
+using namespace facebook;
+
namespace gluten {
/// This class is used to validate whether the computing of
/// a Substrait plan is supported in Velox.
class SubstraitToVeloxPlanValidator {
public:
- SubstraitToVeloxPlanValidator(memory::MemoryPool* pool, core::ExecCtx*
execCtx)
- : pool_(pool), execCtx_(execCtx), planConverter_(pool_, confMap_,
std::nullopt, true) {}
+ SubstraitToVeloxPlanValidator(memory::MemoryPool* pool) :
planConverter_(pool, {}, std::nullopt, true) {
+ const std::unordered_map<std::string, std::string> configs{
+ {velox::core::QueryConfig::kSparkPartitionId, "0"},
{velox::core::QueryConfig::kSessionTimezone, "GMT"}};
+ queryCtx_ = velox::core::QueryCtx::create(nullptr,
velox::core::QueryConfig(configs));
+ // An execution context used for function validation.
+ execCtx_ = std::make_unique<velox::core::ExecCtx>(pool, queryCtx_.get());
+ }
/// Used to validate whether the computing of this Plan is supported.
bool validate(const ::substrait::Plan& plan);
@@ -88,14 +95,10 @@ class SubstraitToVeloxPlanValidator {
/// Used to validate whether the computing of this RelRoot is supported.
bool validate(const ::substrait::RelRoot& relRoot);
- /// A memory pool used for function validation.
- memory::MemoryPool* pool_;
+ std::shared_ptr<velox::core::QueryCtx> queryCtx_;
/// An execution context used for function validation.
- core::ExecCtx* execCtx_;
-
- // Unused customized conf map.
- std::unordered_map<std::string, std::string> confMap_ = {};
+ std::unique_ptr<core::ExecCtx> execCtx_;
/// A converter used to convert Substrait plan into Velox's plan node.
SubstraitToVeloxPlanConverter planConverter_;
diff --git a/cpp/velox/tests/Substrait2VeloxPlanValidatorTest.cc
b/cpp/velox/tests/Substrait2VeloxPlanValidatorTest.cc
index 3f90c865df..2476e2a2f8 100644
--- a/cpp/velox/tests/Substrait2VeloxPlanValidatorTest.cc
+++ b/cpp/velox/tests/Substrait2VeloxPlanValidatorTest.cc
@@ -47,12 +47,7 @@ class Substrait2VeloxPlanValidatorTest : public
exec::test::HiveConnectorTestBas
}
bool validatePlan(::substrait::Plan& plan) {
- auto queryCtx = core::QueryCtx::create();
-
- // An execution context used for function validation.
- std::unique_ptr<core::ExecCtx> execCtx =
std::make_unique<core::ExecCtx>(pool_.get(), queryCtx.get());
-
- auto planValidator =
std::make_shared<SubstraitToVeloxPlanValidator>(pool_.get(), execCtx.get());
+ auto planValidator =
std::make_shared<SubstraitToVeloxPlanValidator>(pool_.get());
return planValidator->validate(plan);
}
};
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]