This is an automated email from the ASF dual-hosted git repository.
zhouyuan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git
The following commit(s) were added to refs/heads/main by this push:
new cb9e5d7b24 [GLUTEN-6887][VL] Daily Update Velox Version (2026_05_25)
(#12133)
cb9e5d7b24 is described below
commit cb9e5d7b243550445e62cbdf8b3007ae0669f342
Author: Gluten Performance Bot
<[email protected]>
AuthorDate: Tue May 26 06:55:33 2026 +0100
[GLUTEN-6887][VL] Daily Update Velox Version (2026_05_25) (#12133)
* [GLUTEN-6887][VL] Daily Update Velox Version (dft-2026_05_25)
Upstream Velox's New Commits:
9c406e973 by Xiaoxuan Meng, feat: Use CachedBufferedInput in
NimbleIndexProjector when cache is available (#17603)
00f1eb68c by Masha Basmanova, refactor: Introduce PrestoQueryConfig with
array_agg key (#17591)
edfee872c by wecharyu, fix(parquet): Avoid generate negative nanoseconds
for Timestamp type (#16325)
c5fa4f2e3 by Bikramjeet Vig, misc: Simplify loadReusedLazyVectors to use
identityProjections (#17570)
f611ba23a by Henry Edwin Dikeman, feat(cursor): Honor outputPool and
copyResult in SingleThreadedTaskCursor (#17343)
19f8f6ab8 by Henry Edwin Dikeman, fix: Make TimeZoneMap resilient to
missing timezone database (#17262)
99e57ee1c by Masha Basmanova, docs(review): Expand self-review checklist
(#17595)
559fc3297 by Masha Basmanova, refactor: Move Spark configs into
SparkQueryConfig (#17583)
Signed-off-by: glutenperfbot <[email protected]>
* fix to use right spark related configurations
Signed-off-by: Yuan <[email protected]>
fix format
Signed-off-by: Yuan <[email protected]>
fix
Signed-off-by: Yuan <[email protected]>
fix
Signed-off-by: Yuan <[email protected]>
fix format
Signed-off-by: Yuan <[email protected]>
fix
Signed-off-by: Yuan <[email protected]>
---------
Signed-off-by: glutenperfbot <[email protected]>
Signed-off-by: Yuan <[email protected]>
Co-authored-by: glutenperfbot <[email protected]>
Co-authored-by: Yuan <[email protected]>
---
cpp/velox/compute/WholeStageResultIterator.cc | 23 ++++++++++++----------
.../substrait/SubstraitToVeloxPlanValidator.h | 6 ++++--
ep/build-velox/src/get-velox.sh | 4 ++--
3 files changed, 19 insertions(+), 14 deletions(-)
diff --git a/cpp/velox/compute/WholeStageResultIterator.cc
b/cpp/velox/compute/WholeStageResultIterator.cc
index 2b957ce54d..c3ac095cdc 100644
--- a/cpp/velox/compute/WholeStageResultIterator.cc
+++ b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -23,6 +23,7 @@
#include "velox/connectors/hive/HiveConfig.h"
#include "velox/connectors/hive/HiveConnectorSplit.h"
#include "velox/exec/PlanNodeStats.h"
+#include "velox/functions/sparksql/SparkQueryConfig.h"
#ifdef GLUTEN_ENABLE_GPU
#include <cudf/io/types.hpp>
#include "cudf/GpuLock.h"
@@ -33,6 +34,7 @@
#include "operators/plannodes/RowVectorStream.h"
using namespace facebook;
+using facebook::velox::functions::sparksql::SparkQueryConfig;
namespace gluten {
@@ -589,7 +591,8 @@ std::unordered_map<std::string, std::string>
WholeStageResultIterator::getQueryC
configs[velox::core::QueryConfig::kPreferredOutputBatchBytes] =
std::to_string(veloxCfg_->get<uint64_t>(kVeloxPreferredBatchBytes, 10L
<< 20));
try {
- configs[velox::core::QueryConfig::kSparkAnsiEnabled] =
veloxCfg_->get<std::string>(kAnsiEnabled, "false");
+ configs[SparkQueryConfig::qualify(SparkQueryConfig::kAnsiEnabled)] =
+ veloxCfg_->get<std::string>(kAnsiEnabled, "false");
configs[velox::core::QueryConfig::kSessionTimezone] =
normalizeSessionTimezone(veloxCfg_->get<std::string>(kSessionTimezone,
""));
// Adjust timestamp according to the above configured session timezone.
@@ -665,17 +668,17 @@ std::unordered_map<std::string, std::string>
WholeStageResultIterator::getQueryC
std::to_string(veloxCfg_->get<uint64_t>(kHashProbeBloomFilterPushdownMaxSize,
0));
if (const auto opt =
veloxCfg_->get<std::string>(kSparkBloomFilterExpectedNumItems)) {
- configs[velox::core::QueryConfig::kSparkBloomFilterExpectedNumItems] =
opt.value();
+
configs[SparkQueryConfig::qualify(SparkQueryConfig::kBloomFilterExpectedNumItems)]
= opt.value();
}
if (const auto opt =
veloxCfg_->get<std::string>(kSparkBloomFilterNumBits)) {
- configs[velox::core::QueryConfig::kSparkBloomFilterNumBits] =
opt.value();
+
configs[SparkQueryConfig::qualify(SparkQueryConfig::kBloomFilterNumBits)] =
opt.value();
}
if (const auto opt =
veloxCfg_->get<std::string>(kSparkBloomFilterMaxNumBits)) {
// Velox will check memory cannot exceed 4194304.
- configs[velox::core::QueryConfig::kSparkBloomFilterMaxNumBits] =
opt.value();
+
configs[SparkQueryConfig::qualify(SparkQueryConfig::kBloomFilterMaxNumBits)] =
opt.value();
}
if (const auto opt =
veloxCfg_->get<std::string>(kSparkBloomFilterMaxNumItems)) {
- configs[velox::core::QueryConfig::kSparkBloomFilterMaxNumItems] =
opt.value();
+
configs[SparkQueryConfig::qualify(SparkQueryConfig::kBloomFilterMaxNumItems)] =
opt.value();
}
// spark.gluten.sql.columnar.backend.velox.SplitPreloadPerDriver takes no
effect if
// spark.gluten.sql.columnar.backend.velox.IOThreads is set to 0
@@ -691,14 +694,14 @@ std::unordered_map<std::string, std::string>
WholeStageResultIterator::getQueryC
// Disable driver cpu time slicing.
configs[velox::core::QueryConfig::kDriverCpuTimeSliceLimitMs] = "0";
- configs[velox::core::QueryConfig::kSparkPartitionId] =
std::to_string(taskInfo_.partitionId);
+ configs[SparkQueryConfig::qualify(SparkQueryConfig::kPartitionId)] =
std::to_string(taskInfo_.partitionId);
// Enable Spark legacy date formatter if spark.sql.legacy.timeParserPolicy
is set to 'LEGACY'
// or 'legacy'
if (veloxCfg_->get<std::string>(kSparkLegacyTimeParserPolicy, "") ==
"LEGACY") {
- configs[velox::core::QueryConfig::kSparkLegacyDateFormatter] = "true";
+
configs[SparkQueryConfig::qualify(SparkQueryConfig::kLegacyDateFormatter)] =
"true";
} else {
- configs[velox::core::QueryConfig::kSparkLegacyDateFormatter] = "false";
+
configs[SparkQueryConfig::qualify(SparkQueryConfig::kLegacyDateFormatter)] =
"false";
}
if (veloxCfg_->get<std::string>(kSparkMapKeyDedupPolicy, "") ==
"EXCEPTION") {
@@ -707,10 +710,10 @@ std::unordered_map<std::string, std::string>
WholeStageResultIterator::getQueryC
configs[velox::core::QueryConfig::kThrowExceptionOnDuplicateMapKeys] =
"false";
}
- configs[velox::core::QueryConfig::kSparkLegacyStatisticalAggregate] =
+
configs[SparkQueryConfig::qualify(SparkQueryConfig::kLegacyStatisticalAggregate)]
=
std::to_string(veloxCfg_->get<bool>(kSparkLegacyStatisticalAggregate,
false));
- configs[velox::core::QueryConfig::kSparkJsonIgnoreNullFields] =
+
configs[SparkQueryConfig::qualify(SparkQueryConfig::kJsonIgnoreNullFields)] =
std::to_string(veloxCfg_->get<bool>(kSparkJsonIgnoreNullFields, true));
configs[velox::core::QueryConfig::kExprMaxCompiledRegexes] =
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h
index 55d2d2c968..ca9f6c4012 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.h
@@ -22,9 +22,10 @@
#include "config/VeloxConfig.h"
#include "operators/plannodes/IteratorSplit.h"
#include "velox/core/QueryCtx.h"
+#include "velox/functions/sparksql/SparkQueryConfig.h"
using namespace facebook;
-
+using facebook::velox::functions::sparksql::SparkQueryConfig;
namespace gluten {
/// This class is used to validate whether the computing of
@@ -33,7 +34,8 @@ class SubstraitToVeloxPlanValidator {
public:
SubstraitToVeloxPlanValidator(memory::MemoryPool* pool) {
std::unordered_map<std::string, std::string> configs{
- {velox::core::QueryConfig::kSparkPartitionId, "0"},
{velox::core::QueryConfig::kSessionTimezone, "UTC"}};
+ {SparkQueryConfig::qualify(SparkQueryConfig::kPartitionId), "0"},
+ {velox::core::QueryConfig::kSessionTimezone, "UTC"}};
veloxCfg_ =
std::make_shared<facebook::velox::config::ConfigBase>(std::move(configs));
planConverter_ = std::make_unique<SubstraitToVeloxPlanConverter>(
pool,
diff --git a/ep/build-velox/src/get-velox.sh b/ep/build-velox/src/get-velox.sh
index 2387962aec..6bc124c5e8 100755
--- a/ep/build-velox/src/get-velox.sh
+++ b/ep/build-velox/src/get-velox.sh
@@ -18,8 +18,8 @@ set -exu
CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
VELOX_REPO=https://github.com/IBM/velox.git
-VELOX_BRANCH=dft-2026_05_22
-VELOX_ENHANCED_BRANCH=ibm-2026_05_22
+VELOX_BRANCH=dft-2026_05_25
+VELOX_ENHANCED_BRANCH=ibm-2026_05_25
VELOX_HOME=""
RUN_SETUP_SCRIPT=ON
ENABLE_ENHANCED_FEATURES=OFF
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]