This is an automated email from the ASF dual-hosted git repository.

yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new cffe24a558 [GLUTEN-6887][VL] Daily Update Velox Version (2026_03_24) 
(#11817)
cffe24a558 is described below

commit cffe24a5589f8ef8d4bd1838442e95949669b682
Author: Gluten Performance Bot 
<[email protected]>
AuthorDate: Wed Mar 25 21:47:54 2026 +0000

    [GLUTEN-6887][VL] Daily Update Velox Version (2026_03_24) (#11817)
    
    * [GLUTEN-6887][VL] Daily Update Velox Version (dft-2026_03_24)
    
    Upstream Velox's New Commits:
    a6b99fc9e by Masha Basmanova, fix: Handle non-decimal HUGEINT types in 
Variant::toString and toJson (#16893)
    1e19ac67f by Avinash Raj, fix(build): Register /usr/local/lib64 with 
ldconfig after gflags install on CentOS 9 (#16817)
    49cbdb1d5 by Christian Zentgraf, fix(build): Use correct namespace for 
HiveCommitMessage (#16884)
    cf9ee6b0a by Masha Basmanova, feat: Add toPrestoTypeSql() for Presto SQL 
type formatting (#16876)
    9357ee458 by Henry Edwin Dikeman, refactor(hive): Extract JSON field name 
constants in HiveDataSink (#16863)
    92917548a by Han Yan, Back out D96046667: "refactor: Remove 
VectorSerde::kind() method, use static serializer names" (#16860)
    49fc9e092 by Jiaqi Zhang, Add fileformat runtime stats (#16862)
    3f00bb12d by Alex Zhavnerchik, fix(hive): allow FileIndexReader for Flux 
(#16850)
    06efd699b by Kevin Wilfong, feat: Add templated return type to 
CardinalityFunction (#16849)
    47aa762ed by Masha Basmanova, docs: Add coding style rule against generic 
*Utils/*Helpers naming (#16858)
    666cbef90 by Zac Wen, fix(hive): Simplify UnionResultIterator::hasNext() 
logic (#16851)
    9886ef8f6 by Zhichen Xu, fix(rpc): Remove gmock dependency from RPCNodeTest 
to fix OSS GCC-14 build (#16848)
    b7fdad2c5 by Kent Yao, feat(sparksql): Support RESPECT NULLS for Spark 
collect_set aggregate function (#16416)
    66e644edf by Rui Mo, feat: Add TIME_MICRO_UTC type (#16468)
    3cd19c594 by Xiaoxuan Meng, feat: Add simdFill utility to SimdUtil (#16845)
    d8f5c77e9 by Masha Basmanova, feat(joins): Add counting semi-join and 
anti-join (#16841)
    512aa153f by Abhinav Mukherjee, feat: Add vector_sum aggregate function 
using Simple API (#16498)
    9a54e15bb by Simon Eves, feat(cudf): GPU Decimal (Part 1 of 3) (#16612)
    922d64ce5 by Zac Wen, feat(hive): Add UnionResultIterator for multi-split 
index lookup (#16812)
    abed025a6 by Kk Pulla, feat(operator): Add MarkSorted Python bindings 
(#16654)
    230b50c42 by Kk Pulla, perf(operator): Add MarkSorted performance 
optimizations (#16653)
    27be0f31b by Krishna Pai, fix(build): Run single exchange fuzzer instance 
to avoid OOM (#16846)
    e3f194b4c by Masha Basmanova, feat: Add valueToString to 
TimestampWithTimeZoneType (#16840)
    59585ffe0 by Krishna Pai, build: Optimize fuzzer compile with higher 
parallelism, shared build, and targeted targets (#16797)
    99656e94f by Amit Dutta, fix: Copy pattern string in LikeGeneric to prevent 
use-after-free crash (#16830)
    c674e4aa9 by Ke Wang, misc: Remove unused footer estimated size constant 
(#16822)
    3bd5486c7 by Zhichen Xu, feat(rpc): Add RPC function stubs for sidecar 
discovery [5/8] (OSS) (#16793)
    0e190e17f by Krishna Pai, fix: Back out Avoid redundant outputBuffer 
clearing (#16829)
    455daccc2 by Han Yan, refactor: Remove VectorSerde::kind() method, use 
static serializer names (#16710)
    33d609bfe by Manikanta Loya, fix(dwrf): Fix dangling StringView keys in 
FlatMapColumnWriter (#16800)
    0ab919d30 by Jimmy Lu, fix: Validate buffer index in Arrow Utf8View import 
(#16808)
    4cdf5a279 by Deepak Majeti, feat(cudf): Run tests in CI (#15700)
    16c2a8ad8 by lingbin, fix: Remove redundant checks in BufferInputStream 
(#16780)
    58863985a by Ping Liu, feat(parquet): Support read TIME_MILLIS parquet type 
(#16217)
    48cb6b10e by Masha Basmanova, fix: Fix map_from_entries on empty array with 
UNKNOWN element type (#16815)
    8448c71f0 by Masha Basmanova, fix: 
SignatureBinder::tryBindVariablesWithCoercion failure on integer variables 
(#16814)
    cfa5bff99 by Zhichen Xu, feat(rpc): Add unit tests and reference 
implementation for RPC framework [4/8] (OSS) (#16792)
    495bc9157 by Simon Eves, feat(cudf): Add CUDF concat(VARCHAR) for TPC-DS 
(#16729)
    c8ff3e8dc by Ping Liu, docs: Add blog post of processing unicode with SIMD 
(#16764)
    82cffb9d9 by Xiao Du, fix: Add per-iteration seed logging to 
MemoryArbitrationFuzzer (#16810)
    e0169a0b2 by Zac Wen, refactor(hive): Add pluggable index reader support 
(#16803)
    522d04499 by Patrick Sullivan, Make remote function execution async (#16598)
    550e5e3fb by Krishna Pai, Remove VELOX_ENABLE_BACKWARD_COMPATIBILITY from 
Writer::close() (#16801)
    36a7e35ca by Pedro Eugenio Rocha Pedreira, test: Add PrintTo for test param 
structs to improve gtest output (#16796)
    4e4b841ed by rexan, fix: Allow scientific when casting from decimal to 
string (#14910)
    965ada28c by Jimmy Lu, fix(parquet): Potential out of bound access reading 
bad data (#16799)
    daa26a157 by Han Yan, Refactor KeyEncoderTest: extract test loop 
boilerplate (#16766)
    0b7f909ba by Jialiang Tan, feat(spill): Add 
row_number_spill_file_create_config for RowNumber operator (#16802)
    30bc96568 by Zhichen Xu, feat(rpc): Add RPCOperator, RPCState, 
RPCRateLimiter, and RPCPlanNodeTranslator [3/8] (OSS) (#16787)
    c696df4b2 by Ping Liu, perf: Optimize cappedLengthUnicode and 
cappedByteLengthUnicode with SIMD (#16428)
    4a479841a by Pedro Eugenio Rocha Pedreira, test: Add 
ArraySplitIntoChunksTest.cpp back to CMake (#16788)
    0bed498ba by Kevin Wilfong, misc: Add option to UnnestNode to 
enable/disable splitting of output (#16762)
    18fe4451e by Pratik Pugalia, build: Add additional_context and dry_run 
inputs to Claude workflow_dispatch (#16798)
    8e248e232 by Xiao Du, fix: Disable MarkDistinct spill (#16790)
    0322c97a3 by Zhichen Xu, feat(rpc): Add RPCNode plan node to 
core/PlanNode.h [2/8] (OSS) (#16727)
    bf972e6a7 by Rui Mo, build: Undefined symbols 
`registerArraySplitIntoChunksFunctions` (#16781)
    e2e5f11af by Jimmy Lu, fix: Validate deserialized sizes before 
vector::resize to prevent process crash (#16763)
    cf2322827 by Xiaoxuan Meng, feat: Add native preload support to 
DirectBufferedInput and CachedBufferedInput (#16768)
    a3770113e by Allen Shen, feat: Add array_split_into_chunks function (#16584)
    5864de104 by lifulong, fix: Fix cast sum(decimal(18,4)) to float precision 
miss (#16588)
    4609a36cb by Ping Liu, test: Add stats based parquet file filter test 
(#16709)
    c935428e7 by Rui Mo, misc: Prepare for time type extension (#16662)
    48320d536 by lingbin, refactor: Clean up AllocationTest includes and remove 
leftover debug log (#16720)
    64a2fd39f by Christian Zentgraf, feat(ci): Move gh action installs to the 
dependency image (#16667)
    98f803492 by Shakyan Kushwaha, docs: Update coverage.rst for localtimestamp 
and current_time (#16565)
    77212fb04 by Simon Eves, feat(prestosql): Add ceil(DECIMAL) PrestoSQL 
function (#16253)
    0c6a01f51 by Shruti Shivakumar, feat(cudf): Update cudf and related 
dependency pins to 2026-03-12 (#16752)
    63efbb0a5 by Ping Liu, docs: Add comments to PositionalDeleteFileReader 
(#16746)
    18b4f71d3 by Xiao Du, feat: Add MarkDistinct Fuzzer (#16600)
    ec4af5027 by Han Yan, Add name() accessor to VectorSerde classes (#16772)
    9b35b9638 by Pratik Pugalia, Fix pre-commit lint issues in website files 
(#16773)
    8fe74b331 by lingbin, fix: Fix redundant prefix increment before assignment 
in StreamArena (#16717)
    810b19d6d by Abhinav Mukherjee, Re-add dot_product UDF with test fix 
(#16740)
    bef4c3772 by Ali LeClerc, docs: update veloxcon banner on velox-lib.io 
(#16754)
    74f127c20 by Krishna Pai, build: Optimize CI with test splitting and 
32-core runner (#16691)
    e7dd656c9 by Ping Liu, feat: Collect Iceberg stats (#16062)
---
 cpp/velox/operators/hashjoin/HashTableBuilder.cc                  | 4 ++++
 ep/build-velox/src/get-velox.sh                                   | 4 ++--
 .../scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala   | 2 ++
 .../scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala   | 2 ++
 .../scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala   | 2 ++
 .../scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala   | 8 ++++++++
 6 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/cpp/velox/operators/hashjoin/HashTableBuilder.cc 
b/cpp/velox/operators/hashjoin/HashTableBuilder.cc
index 7c42cf5b49..2dcc098dad 100644
--- a/cpp/velox/operators/hashjoin/HashTableBuilder.cc
+++ b/cpp/velox/operators/hashjoin/HashTableBuilder.cc
@@ -127,6 +127,7 @@ void HashTableBuilder::setupTable() {
         dependentTypes,
         true, // allowDuplicates
         true, // hasProbedFlag
+        false, // hasCountFlag
         1'000, // 
operatorCtx_->driverCtx()->queryConfig().minTableRowsForParallelJoinBuild()
         pool_,
         true);
@@ -137,6 +138,7 @@ void HashTableBuilder::setupTable() {
         !withFilter_ && (isLeftSemiFilterJoin(joinType_) || 
isLeftSemiProjectJoin(joinType_) || isAntiJoin(joinType_));
     // Right semi join needs to tag build rows that were probed.
     const bool needProbedFlag = isRightSemiFilterJoin(joinType_);
+    const bool hasCountFlag = facebook::velox::core::isCountingJoin(joinType_);
     if (isLeftNullAwareJoinWithFilter(joinType_, nullAware_, withFilter_)) {
       // We need to check null key rows in build side in case of null-aware 
anti
       // or left semi project join with filter set.
@@ -145,6 +147,7 @@ void HashTableBuilder::setupTable() {
           dependentTypes,
           !dropDuplicates_, // allowDuplicates
           needProbedFlag, // hasProbedFlag
+          hasCountFlag, // hasCountFlag
           1'000, // 
operatorCtx_->driverCtx()->queryConfig().minTableRowsForParallelJoinBuild()
           pool_,
           true);
@@ -155,6 +158,7 @@ void HashTableBuilder::setupTable() {
           dependentTypes,
           !dropDuplicates_, // allowDuplicates
           needProbedFlag, // hasProbedFlag
+          hasCountFlag, // hasCountFlag
           1'000, // 
operatorCtx_->driverCtx()->queryConfig().minTableRowsForParallelJoinBuild()
           pool_,
           bloomFilterPushdownSize_);
diff --git a/ep/build-velox/src/get-velox.sh b/ep/build-velox/src/get-velox.sh
index 4105366f6b..1e3f71c358 100755
--- a/ep/build-velox/src/get-velox.sh
+++ b/ep/build-velox/src/get-velox.sh
@@ -18,8 +18,8 @@ set -exu
 
 CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
 VELOX_REPO=https://github.com/IBM/velox.git
-VELOX_BRANCH=dft-2026_03_15-iceberg
-VELOX_ENHANCED_BRANCH=ibm-2026_03_15
+VELOX_BRANCH=dft-2026_03_24
+VELOX_ENHANCED_BRANCH=ibm-2026_03_24
 VELOX_HOME=""
 RUN_SETUP_SCRIPT=ON
 ENABLE_ENHANCED_FEATURES=OFF
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 9ff2ce221e..c5455b6c6b 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -115,6 +115,8 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("data type casting")
     // Revised by setting timezone through config and commented unsupported 
cases.
     .exclude("cast string to timestamp")
+    // TODO: fix after https://github.com/facebookincubator/velox/pull/14910
+    .exclude("SPARK-39749: cast Decimal to string")
   enableSuite[GlutenCollectionExpressionsSuite]
     // Rewrite in Gluten to replace Seq with Array
     .exclude("Shuffle")
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index e12d5c7e32..6d30450e62 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -115,6 +115,8 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("data type casting")
     // Revised by setting timezone through config and commented unsupported 
cases.
     .exclude("cast string to timestamp")
+    // TODO: fix after https://github.com/facebookincubator/velox/pull/14910
+    .exclude("SPARK-39749: cast Decimal to string")
   enableSuite[GlutenCollectionExpressionsSuite]
     // Rewrite in Gluten to replace Seq with Array
     .exclude("Shuffle")
diff --git 
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 80338a70c1..d0716932b7 100644
--- 
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -127,6 +127,8 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("data type casting")
     // Revised by setting timezone through config and commented unsupported 
cases.
     .exclude("cast string to timestamp")
+    // TODO: fix after https://github.com/facebookincubator/velox/pull/14910
+    .exclude("SPARK-39749: cast Decimal to string")
   enableSuite[GlutenCollectionExpressionsSuite]
     // Rewrite in Gluten to replace Seq with Array
     .exclude("Shuffle")
diff --git 
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 19fd731b18..47a1ff3d66 100644
--- 
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -132,6 +132,8 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("data type casting")
     // Revised by setting timezone through config and commented unsupported 
cases.
     .exclude("cast string to timestamp")
+    // TODO: fix after https://github.com/facebookincubator/velox/pull/14910
+    .exclude("SPARK-39749: cast Decimal to string")
   enableSuite[GlutenCollectionExpressionsSuite]
     // Rewrite in Gluten to replace Seq with Array
     .exclude("Shuffle")
@@ -408,11 +410,17 @@ class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenV2SessionCatalogNamespaceSuite]
   enableSuite[GlutenV2SessionCatalogTableSuite]
   enableSuite[GlutenCSVv1Suite]
+    // https://github.com/apache/gluten/issues/11825
+    .exclude("corrupted ZSTD compressed csv respects ignoreCorruptFiles")
   enableSuite[GlutenCSVv2Suite]
+    // https://github.com/apache/gluten/issues/11825
+    .exclude("corrupted ZSTD compressed csv respects ignoreCorruptFiles")
   // https://github.com/apache/gluten/issues/11505
   enableSuite[GlutenCSVLegacyTimeParserSuite]
     .exclude("Write timestamps correctly in ISO8601 format by default")
     .exclude("csv with variant")
+    // https://github.com/apache/gluten/issues/11825
+    .exclude("corrupted ZSTD compressed csv respects ignoreCorruptFiles")
   enableSuite[GlutenJsonV1Suite]
     // FIXME: Array direct selection fails
     .exclude("Complex field and type inferring")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to