This is an automated email from the ASF dual-hosted git repository.
yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git
The following commit(s) were added to refs/heads/main by this push:
new cffe24a558 [GLUTEN-6887][VL] Daily Update Velox Version (2026_03_24)
(#11817)
cffe24a558 is described below
commit cffe24a5589f8ef8d4bd1838442e95949669b682
Author: Gluten Performance Bot
<[email protected]>
AuthorDate: Wed Mar 25 21:47:54 2026 +0000
[GLUTEN-6887][VL] Daily Update Velox Version (2026_03_24) (#11817)
* [GLUTEN-6887][VL] Daily Update Velox Version (dft-2026_03_24)
Upstream Velox's New Commits:
a6b99fc9e by Masha Basmanova, fix: Handle non-decimal HUGEINT types in
Variant::toString and toJson (#16893)
1e19ac67f by Avinash Raj, fix(build): Register /usr/local/lib64 with
ldconfig after gflags install on CentOS 9 (#16817)
49cbdb1d5 by Christian Zentgraf, fix(build): Use correct namespace for
HiveCommitMessage (#16884)
cf9ee6b0a by Masha Basmanova, feat: Add toPrestoTypeSql() for Presto SQL
type formatting (#16876)
9357ee458 by Henry Edwin Dikeman, refactor(hive): Extract JSON field name
constants in HiveDataSink (#16863)
92917548a by Han Yan, Back out D96046667: "refactor: Remove
VectorSerde::kind() method, use static serializer names" (#16860)
49fc9e092 by Jiaqi Zhang, Add fileformat runtime stats (#16862)
3f00bb12d by Alex Zhavnerchik, fix(hive): allow FileIndexReader for Flux
(#16850)
06efd699b by Kevin Wilfong, feat: Add templated return type to
CardinalityFunction (#16849)
47aa762ed by Masha Basmanova, docs: Add coding style rule against generic
*Utils/*Helpers naming (#16858)
666cbef90 by Zac Wen, fix(hive): Simplify UnionResultIterator::hasNext()
logic (#16851)
9886ef8f6 by Zhichen Xu, fix(rpc): Remove gmock dependency from RPCNodeTest
to fix OSS GCC-14 build (#16848)
b7fdad2c5 by Kent Yao, feat(sparksql): Support RESPECT NULLS for Spark
collect_set aggregate function (#16416)
66e644edf by Rui Mo, feat: Add TIME_MICRO_UTC type (#16468)
3cd19c594 by Xiaoxuan Meng, feat: Add simdFill utility to SimdUtil (#16845)
d8f5c77e9 by Masha Basmanova, feat(joins): Add counting semi-join and
anti-join (#16841)
512aa153f by Abhinav Mukherjee, feat: Add vector_sum aggregate function
using Simple API (#16498)
9a54e15bb by Simon Eves, feat(cudf): GPU Decimal (Part 1 of 3) (#16612)
922d64ce5 by Zac Wen, feat(hive): Add UnionResultIterator for multi-split
index lookup (#16812)
abed025a6 by Kk Pulla, feat(operator): Add MarkSorted Python bindings
(#16654)
230b50c42 by Kk Pulla, perf(operator): Add MarkSorted performance
optimizations (#16653)
27be0f31b by Krishna Pai, fix(build): Run single exchange fuzzer instance
to avoid OOM (#16846)
e3f194b4c by Masha Basmanova, feat: Add valueToString to
TimestampWithTimeZoneType (#16840)
59585ffe0 by Krishna Pai, build: Optimize fuzzer compile with higher
parallelism, shared build, and targeted targets (#16797)
99656e94f by Amit Dutta, fix: Copy pattern string in LikeGeneric to prevent
use-after-free crash (#16830)
c674e4aa9 by Ke Wang, misc: Remove unused footer estimated size constant
(#16822)
3bd5486c7 by Zhichen Xu, feat(rpc): Add RPC function stubs for sidecar
discovery [5/8] (OSS) (#16793)
0e190e17f by Krishna Pai, fix: Back out Avoid redundant outputBuffer
clearing (#16829)
455daccc2 by Han Yan, refactor: Remove VectorSerde::kind() method, use
static serializer names (#16710)
33d609bfe by Manikanta Loya, fix(dwrf): Fix dangling StringView keys in
FlatMapColumnWriter (#16800)
0ab919d30 by Jimmy Lu, fix: Validate buffer index in Arrow Utf8View import
(#16808)
4cdf5a279 by Deepak Majeti, feat(cudf): Run tests in CI (#15700)
16c2a8ad8 by lingbin, fix: Remove redundant checks in BufferInputStream
(#16780)
58863985a by Ping Liu, feat(parquet): Support read TIME_MILLIS parquet type
(#16217)
48cb6b10e by Masha Basmanova, fix: Fix map_from_entries on empty array with
UNKNOWN element type (#16815)
8448c71f0 by Masha Basmanova, fix:
SignatureBinder::tryBindVariablesWithCoercion failure on integer variables
(#16814)
cfa5bff99 by Zhichen Xu, feat(rpc): Add unit tests and reference
implementation for RPC framework [4/8] (OSS) (#16792)
495bc9157 by Simon Eves, feat(cudf): Add CUDF concat(VARCHAR) for TPC-DS
(#16729)
c8ff3e8dc by Ping Liu, docs: Add blog post of processing unicode with SIMD
(#16764)
82cffb9d9 by Xiao Du, fix: Add per-iteration seed logging to
MemoryArbitrationFuzzer (#16810)
e0169a0b2 by Zac Wen, refactor(hive): Add pluggable index reader support
(#16803)
522d04499 by Patrick Sullivan, Make remote function execution async (#16598)
550e5e3fb by Krishna Pai, Remove VELOX_ENABLE_BACKWARD_COMPATIBILITY from
Writer::close() (#16801)
36a7e35ca by Pedro Eugenio Rocha Pedreira, test: Add PrintTo for test param
structs to improve gtest output (#16796)
4e4b841ed by rexan, fix: Allow scientific when casting from decimal to
string (#14910)
965ada28c by Jimmy Lu, fix(parquet): Potential out of bound access reading
bad data (#16799)
daa26a157 by Han Yan, Refactor KeyEncoderTest: extract test loop
boilerplate (#16766)
0b7f909ba by Jialiang Tan, feat(spill): Add
row_number_spill_file_create_config for RowNumber operator (#16802)
30bc96568 by Zhichen Xu, feat(rpc): Add RPCOperator, RPCState,
RPCRateLimiter, and RPCPlanNodeTranslator [3/8] (OSS) (#16787)
c696df4b2 by Ping Liu, perf: Optimize cappedLengthUnicode and
cappedByteLengthUnicode with SIMD (#16428)
4a479841a by Pedro Eugenio Rocha Pedreira, test: Add
ArraySplitIntoChunksTest.cpp back to CMake (#16788)
0bed498ba by Kevin Wilfong, misc: Add option to UnnestNode to
enable/disable splitting of output (#16762)
18fe4451e by Pratik Pugalia, build: Add additional_context and dry_run
inputs to Claude workflow_dispatch (#16798)
8e248e232 by Xiao Du, fix: Disable MarkDistinct spill (#16790)
0322c97a3 by Zhichen Xu, feat(rpc): Add RPCNode plan node to
core/PlanNode.h [2/8] (OSS) (#16727)
bf972e6a7 by Rui Mo, build: Undefined symbols
`registerArraySplitIntoChunksFunctions` (#16781)
e2e5f11af by Jimmy Lu, fix: Validate deserialized sizes before
vector::resize to prevent process crash (#16763)
cf2322827 by Xiaoxuan Meng, feat: Add native preload support to
DirectBufferedInput and CachedBufferedInput (#16768)
a3770113e by Allen Shen, feat: Add array_split_into_chunks function (#16584)
5864de104 by lifulong, fix: Fix cast sum(decimal(18,4)) to float precision
miss (#16588)
4609a36cb by Ping Liu, test: Add stats based parquet file filter test
(#16709)
c935428e7 by Rui Mo, misc: Prepare for time type extension (#16662)
48320d536 by lingbin, refactor: Clean up AllocationTest includes and remove
leftover debug log (#16720)
64a2fd39f by Christian Zentgraf, feat(ci): Move gh action installs to the
dependency image (#16667)
98f803492 by Shakyan Kushwaha, docs: Update coverage.rst for localtimestamp
and current_time (#16565)
77212fb04 by Simon Eves, feat(prestosql): Add ceil(DECIMAL) PrestoSQL
function (#16253)
0c6a01f51 by Shruti Shivakumar, feat(cudf): Update cudf and related
dependency pins to 2026-03-12 (#16752)
63efbb0a5 by Ping Liu, docs: Add comments to PositionalDeleteFileReader
(#16746)
18b4f71d3 by Xiao Du, feat: Add MarkDistinct Fuzzer (#16600)
ec4af5027 by Han Yan, Add name() accessor to VectorSerde classes (#16772)
9b35b9638 by Pratik Pugalia, Fix pre-commit lint issues in website files
(#16773)
8fe74b331 by lingbin, fix: Fix redundant prefix increment before assignment
in StreamArena (#16717)
810b19d6d by Abhinav Mukherjee, Re-add dot_product UDF with test fix
(#16740)
bef4c3772 by Ali LeClerc, docs: update veloxcon banner on velox-lib.io
(#16754)
74f127c20 by Krishna Pai, build: Optimize CI with test splitting and
32-core runner (#16691)
e7dd656c9 by Ping Liu, feat: Collect Iceberg stats (#16062)
---
cpp/velox/operators/hashjoin/HashTableBuilder.cc | 4 ++++
ep/build-velox/src/get-velox.sh | 4 ++--
.../scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala | 2 ++
.../scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala | 2 ++
.../scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala | 2 ++
.../scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala | 8 ++++++++
6 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/cpp/velox/operators/hashjoin/HashTableBuilder.cc
b/cpp/velox/operators/hashjoin/HashTableBuilder.cc
index 7c42cf5b49..2dcc098dad 100644
--- a/cpp/velox/operators/hashjoin/HashTableBuilder.cc
+++ b/cpp/velox/operators/hashjoin/HashTableBuilder.cc
@@ -127,6 +127,7 @@ void HashTableBuilder::setupTable() {
dependentTypes,
true, // allowDuplicates
true, // hasProbedFlag
+ false, // hasCountFlag
1'000, //
operatorCtx_->driverCtx()->queryConfig().minTableRowsForParallelJoinBuild()
pool_,
true);
@@ -137,6 +138,7 @@ void HashTableBuilder::setupTable() {
!withFilter_ && (isLeftSemiFilterJoin(joinType_) ||
isLeftSemiProjectJoin(joinType_) || isAntiJoin(joinType_));
// Right semi join needs to tag build rows that were probed.
const bool needProbedFlag = isRightSemiFilterJoin(joinType_);
+ const bool hasCountFlag = facebook::velox::core::isCountingJoin(joinType_);
if (isLeftNullAwareJoinWithFilter(joinType_, nullAware_, withFilter_)) {
// We need to check null key rows in build side in case of null-aware
anti
// or left semi project join with filter set.
@@ -145,6 +147,7 @@ void HashTableBuilder::setupTable() {
dependentTypes,
!dropDuplicates_, // allowDuplicates
needProbedFlag, // hasProbedFlag
+ hasCountFlag, // hasCountFlag
1'000, //
operatorCtx_->driverCtx()->queryConfig().minTableRowsForParallelJoinBuild()
pool_,
true);
@@ -155,6 +158,7 @@ void HashTableBuilder::setupTable() {
dependentTypes,
!dropDuplicates_, // allowDuplicates
needProbedFlag, // hasProbedFlag
+ hasCountFlag, // hasCountFlag
1'000, //
operatorCtx_->driverCtx()->queryConfig().minTableRowsForParallelJoinBuild()
pool_,
bloomFilterPushdownSize_);
diff --git a/ep/build-velox/src/get-velox.sh b/ep/build-velox/src/get-velox.sh
index 4105366f6b..1e3f71c358 100755
--- a/ep/build-velox/src/get-velox.sh
+++ b/ep/build-velox/src/get-velox.sh
@@ -18,8 +18,8 @@ set -exu
CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
VELOX_REPO=https://github.com/IBM/velox.git
-VELOX_BRANCH=dft-2026_03_15-iceberg
-VELOX_ENHANCED_BRANCH=ibm-2026_03_15
+VELOX_BRANCH=dft-2026_03_24
+VELOX_ENHANCED_BRANCH=ibm-2026_03_24
VELOX_HOME=""
RUN_SETUP_SCRIPT=ON
ENABLE_ENHANCED_FEATURES=OFF
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 9ff2ce221e..c5455b6c6b 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -115,6 +115,8 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("data type casting")
// Revised by setting timezone through config and commented unsupported
cases.
.exclude("cast string to timestamp")
+ // TODO: fix after https://github.com/facebookincubator/velox/pull/14910
+ .exclude("SPARK-39749: cast Decimal to string")
enableSuite[GlutenCollectionExpressionsSuite]
// Rewrite in Gluten to replace Seq with Array
.exclude("Shuffle")
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index e12d5c7e32..6d30450e62 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -115,6 +115,8 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("data type casting")
// Revised by setting timezone through config and commented unsupported
cases.
.exclude("cast string to timestamp")
+ // TODO: fix after https://github.com/facebookincubator/velox/pull/14910
+ .exclude("SPARK-39749: cast Decimal to string")
enableSuite[GlutenCollectionExpressionsSuite]
// Rewrite in Gluten to replace Seq with Array
.exclude("Shuffle")
diff --git
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 80338a70c1..d0716932b7 100644
---
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -127,6 +127,8 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("data type casting")
// Revised by setting timezone through config and commented unsupported
cases.
.exclude("cast string to timestamp")
+ // TODO: fix after https://github.com/facebookincubator/velox/pull/14910
+ .exclude("SPARK-39749: cast Decimal to string")
enableSuite[GlutenCollectionExpressionsSuite]
// Rewrite in Gluten to replace Seq with Array
.exclude("Shuffle")
diff --git
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 19fd731b18..47a1ff3d66 100644
---
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -132,6 +132,8 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("data type casting")
// Revised by setting timezone through config and commented unsupported
cases.
.exclude("cast string to timestamp")
+ // TODO: fix after https://github.com/facebookincubator/velox/pull/14910
+ .exclude("SPARK-39749: cast Decimal to string")
enableSuite[GlutenCollectionExpressionsSuite]
// Rewrite in Gluten to replace Seq with Array
.exclude("Shuffle")
@@ -408,11 +410,17 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenV2SessionCatalogNamespaceSuite]
enableSuite[GlutenV2SessionCatalogTableSuite]
enableSuite[GlutenCSVv1Suite]
+ // https://github.com/apache/gluten/issues/11825
+ .exclude("corrupted ZSTD compressed csv respects ignoreCorruptFiles")
enableSuite[GlutenCSVv2Suite]
+ // https://github.com/apache/gluten/issues/11825
+ .exclude("corrupted ZSTD compressed csv respects ignoreCorruptFiles")
// https://github.com/apache/gluten/issues/11505
enableSuite[GlutenCSVLegacyTimeParserSuite]
.exclude("Write timestamps correctly in ISO8601 format by default")
.exclude("csv with variant")
+ // https://github.com/apache/gluten/issues/11825
+ .exclude("corrupted ZSTD compressed csv respects ignoreCorruptFiles")
enableSuite[GlutenJsonV1Suite]
// FIXME: Array direct selection fails
.exclude("Complex field and type inferring")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]