This is an automated email from the ASF dual-hosted git repository.
rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new a673988066 [GLUTEN-6887][VL] Daily Update Velox Version (2026_02_17)
(#11624)
a673988066 is described below
commit a6739880664ecb7de06f0d888373b4e3bb897206
Author: Gluten Performance Bot
<[email protected]>
AuthorDate: Thu Feb 19 14:33:39 2026 +0000
[GLUTEN-6887][VL] Daily Update Velox Version (2026_02_17) (#11624)
* [GLUTEN-6887][VL] Daily Update Velox Version (dft-2026_02_17)
Upstream Velox's New Commits:
54f466296 by Pedro Eugenio Rocha Pedreira, feat(cursor): Support parallel
execution in TaskDebuggerCursor (#16384)
2a7eb0840 by Shanyue Wan, feat: Add
resolveVectorFunctionWithMetadataWithCoercions (#16099)
6a4d205df by Suryadev Sahadevan Rajesh, docs: Add build badge for velox
(#16383)
6dbd60423 by Abhinav Mukherjee, Add the map_update UDF (#15968)
9ea254547 by Bradley Dice, build(cuda): Add CUDA_VERSION build arg to
adapters dockerfile (#16234)
da458a4ea by Abhinav Mukherjee, Add the L2_NORM UDF (#15970)
e7493488b by Muhammad Haseeb, fix(cudf): Fix Velox cuDF table scan tests
(#16335)
2c26f11b7 by PHILO-HE, misc: Optimize Spark dayname function (#16194)
c07126302 by Rui Mo, fix: Load reused lazy vector in filter project (#16108)
f5fec6c03 by Ping Liu, feat: Add IcebergConfig (#16350)
8830b83f2 by Kent Yao, fix: Use XORShiftRandom in Spark rand function for
compatibility (#16308)
44f99c3d3 by Shruti, feat(cudf): Enable precomputation support for join
filters (#16212)
c99aa5555 by Shruti Shivakumar, feat(cudf): Support full outer join in
Velox-cuDF (#16229)
806c60fe6 by Jialiang Tan, feat: Add fast path to PrefixEncoding when no
duplicates (#16321)
99b6b832f by Kent Yao, fix(test): Fix race condition in
SkewedPartitionRebalancerTest.serializedRebalanceExecution (#16244)
e839804c0 by Simon Eves, fix(cudf): Fix velox_cudf_s3_read_test (#16331)
f41a1bc39 by Karthikeyan Natarajan, refactor(cudf): Refactor Cudf Driver
Adapter and CudfLocalPartition (#16264)
0cbee9e96 by David Goode, feat(operator): Documentation for MixedUnion
operator (#16345)
786c53a26 by Muhammad Haseeb, fix(cudf): Update cuDF dependency tree to fix
debug builds (#16316)
d02f09fcd by Deepak Majeti, feat(cudf): Use BufferedInput enqueue/load APIs
(#16259)
b3c6e3b02 by Kk Pulla, fix(expr): Use stable folly::hasher for expression
hashing (#16284)
a4b3458b5 by Ke Wang, feat: Add ssdFlushThresholdBytes options to SSD cache
(#16313)
818f7f12c by Jiahao Liang, Populate WS IO stats for Velox SST Writer
(#16326)
6fc36a796 by Pedro Eugenio Rocha Pedreira, docs(blog): velox::StringView
API Changes and Best Practices (#16333)
f1f6e8e34 by Xiaoxuan Meng, feat: Add IndexReader interface for index-based
lookups (#16330)
b88ce66a7 by Jialiang Tan, feat: Add overriding file create config for
different operators (#16318)
887721970 by Bradley Dice, fix(cudf): Add missing Folly::folly dependency
to velox_cudf_config_test (#16319)
80ad4c170 by jiangtian, fix: Call prepareReuse on argVectors in
AggregateWindow to clear string buffers (#15680)
6f5a4853d by Xiaoxuan Meng, feat: Add batched index bounds encoding support
to KeyEncoder (#16329)
62c4a0615 by Xiaoxuan Meng, fix: Backout "Flush row group by buffered bytes
in parquet writer" (#16317)
0122842cd by Jialiang Tan, refactor: Move file io stats inside spill stats
(#16255)
d9caff3de by Abhinav Mukherjee, Add custom fuzzer tests for array_subset
UDF (#16027)
a88d36cd6 by Jialiang Tan, fix: Fix flaky
SkewedPartitionRebalancerTest.serializedRebalanceExecution (#16300)
e21b995f0 by Masha Basmanova, feat: Add EnforceDistinct operator (#16297)
f0a3b54a4 by Kent Yao, feat(sparksql): Add transform with index parameter
support (#16211)
f5d95719a by Pedro Eugenio Rocha Pedreira, feat(python): Hook support in
LocalDebuggerRunner (#16291)
40f3787ab by Ping Liu, refactor(parquet): Arrow writer to align with Velox
coding standards (#16295)
0e99bded8 by Masha Basmanova, test: Enhance parsing of IF expressions in
DuckParser (#16294)
15da8e764 by Devavret Makkar, perf(cudf): Fix several usages of default
stream (#16258)
6984fc2cd by Christian Zentgraf, fix(build): Define serialize function in
CudfHiveConnectorSplit (#16287)
5ce6c4827 by Rui Mo, misc: Make `requests_` and `coalescedLoads_` protected
(#16249)
b7ac8b584 by Kent Yao, feat(function): Add randstr Spark function (#16014)
022acc10f by Masha Basmanova, fix: Rename Type::isUnKnown() to isUnknown()
(#16292)
b3b3ee2d0 by Natasha Sehgal, feat: Add ExprSet and ExprMap type aliases
using folly::F14 for expression deduplication (#16272)
6e01ab2c1 by wecharyu, feat: Flush row group by buffered bytes in parquet
writer (#15751)
8c1a8aa1f by Ping Liu, feat(parquet): Add NaN statistics to Parquet writer
(#14725)
b9e6b55de by Kent Yao, feat: Add Spark monthname function (#16011)
c2d2181be by Mariam Almesfer, test: Validate ANSI support for Spark
CAST(decimal as string) (#16124)
df3455499 by Chengcheng Jin, feat(sparksql): Add Spark to_pretty_string
function (#16245)
d42096b97 by David Goode, feat(operator): Velox MixedUnion support (#16184)
843bea84c by Mohammad Linjawi, test: Validate Spark string-to-date cast
(#16092)
ad7805bf2 by Pedro Eugenio Rocha Pedreira, feat(cursor): Support custom
callbacks on breakpoints (#16267)
Signed-off-by: glutenperfbot <[email protected]>
---------
Signed-off-by: glutenperfbot <[email protected]>
Co-authored-by: glutenperfbot <[email protected]>
Co-authored-by: Rui Mo <[email protected]>
---
.github/workflows/velox_backend_x86.yml | 2 +-
.../gluten/execution/VeloxIcebergSuite.scala | 3 ++
.../gluten/execution/VeloxTPCHIcebergSuite.scala | 3 ++
cpp/velox/memory/GlutenBufferedInputBuilder.h | 10 ++---
cpp/velox/memory/GlutenDirectBufferedInput.h | 6 +--
cpp/velox/utils/VeloxWriterUtils.cc | 4 +-
ep/build-velox/src/get-velox.sh | 4 +-
.../sql-tests/results/group-by-ordinal.sql.out | 12 +++---
.../resources/sql-tests/results/group-by.sql.out | 2 +-
.../resources/sql-tests/results/random.sql.out | 8 ++--
.../sql-tests/results/group-by-ordinal.sql.out | 12 +++---
.../resources/sql-tests/results/group-by.sql.out | 2 +-
.../resources/sql-tests/results/random.sql.out | 8 ++--
.../sql-tests/results/group-by-ordinal.sql.out | 12 +++---
.../resources/sql-tests/results/group-by.sql.out | 2 +-
.../resources/sql-tests/results/random.sql.out | 8 ++--
.../sql-tests/results/group-by-ordinal.sql.out | 12 +++---
.../sql-tests/results/group-by.sql.out | 2 +-
.../sql-tests/results/random.sql.out | 8 ++--
.../results/table-valued-functions.sql.out | 2 +-
.../spark/sql/GlutenGeneratorFunctionSuite.scala | 2 +-
.../results/udaf/udaf-group-by-ordinal.sql.out | 6 +--
.../gluten/utils/velox/VeloxTestSettings.scala | 2 -
.../spark/sql/GlutenGeneratorFunctionSuite.scala | 2 +-
.../GlutenTakeOrderedAndProjectSuite.scala | 46 +---------------------
.../results/udaf/udaf-group-by-ordinal.sql.out | 6 +--
.../gluten/utils/velox/VeloxTestSettings.scala | 2 -
.../spark/sql/GlutenGeneratorFunctionSuite.scala | 2 +-
.../GlutenTakeOrderedAndProjectSuite.scala | 46 +---------------------
29 files changed, 77 insertions(+), 159 deletions(-)
diff --git a/.github/workflows/velox_backend_x86.yml
b/.github/workflows/velox_backend_x86.yml
index 6edd8162ea..3ac8edc424 100644
--- a/.github/workflows/velox_backend_x86.yml
+++ b/.github/workflows/velox_backend_x86.yml
@@ -821,7 +821,7 @@ jobs:
java -version
export SPARK_HOME=/opt/shims/spark34/spark_home/
ls -l $SPARK_HOME
- $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg
-Piceberg-test -Pdelta -Phudi -Ppaimon -Pspark-ut \
+ $MVN_CMD clean test -Pspark-3.4 -Pjava-17 -Pbackends-velox -Piceberg
-Pdelta -Phudi -Ppaimon -Pspark-ut \
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest
\
-DargLine="-Dspark.test.home=$SPARK_HOME"
- name: Upload test report
diff --git
a/backends-velox/src-iceberg/test/scala/org/apache/gluten/execution/VeloxIcebergSuite.scala
b/backends-velox/src-iceberg/test/scala/org/apache/gluten/execution/VeloxIcebergSuite.scala
index edb30dac61..de9eaba597 100644
---
a/backends-velox/src-iceberg/test/scala/org/apache/gluten/execution/VeloxIcebergSuite.scala
+++
b/backends-velox/src-iceberg/test/scala/org/apache/gluten/execution/VeloxIcebergSuite.scala
@@ -16,4 +16,7 @@
*/
package org.apache.gluten.execution
+import org.apache.gluten.tags.SkipTest
+
+@SkipTest
class VeloxIcebergSuite extends IcebergSuite
diff --git
a/backends-velox/src-iceberg/test/scala/org/apache/gluten/execution/VeloxTPCHIcebergSuite.scala
b/backends-velox/src-iceberg/test/scala/org/apache/gluten/execution/VeloxTPCHIcebergSuite.scala
index 5456e0ba89..42820c04dc 100644
---
a/backends-velox/src-iceberg/test/scala/org/apache/gluten/execution/VeloxTPCHIcebergSuite.scala
+++
b/backends-velox/src-iceberg/test/scala/org/apache/gluten/execution/VeloxTPCHIcebergSuite.scala
@@ -16,6 +16,8 @@
*/
package org.apache.gluten.execution
+import org.apache.gluten.tags.SkipTest
+
import org.apache.spark.SparkConf
import org.apache.spark.sql.functions.col
@@ -23,6 +25,7 @@ import org.apache.iceberg.spark.SparkWriteOptions
import java.io.File
+@SkipTest
class VeloxTPCHIcebergSuite extends VeloxTPCHSuite {
protected val tpchBasePath: String =
getClass.getResource("/").getPath + "../../../src/test/resources"
diff --git a/cpp/velox/memory/GlutenBufferedInputBuilder.h
b/cpp/velox/memory/GlutenBufferedInputBuilder.h
index 86116ff1e8..25b05f9870 100644
--- a/cpp/velox/memory/GlutenBufferedInputBuilder.h
+++ b/cpp/velox/memory/GlutenBufferedInputBuilder.h
@@ -30,8 +30,8 @@ class GlutenBufferedInputBuilder : public
facebook::velox::connector::hive::Buff
const facebook::velox::FileHandle& fileHandle,
const facebook::velox::dwio::common::ReaderOptions& readerOpts,
const facebook::velox::connector::ConnectorQueryCtx* connectorQueryCtx,
- std::shared_ptr<facebook::velox::io::IoStatistics> ioStats,
- std::shared_ptr<facebook::velox::filesystems::File::IoStats> fsStats,
+ std::shared_ptr<facebook::velox::io::IoStatistics> ioStatistics,
+ std::shared_ptr<facebook::velox::IoStats> ioStats,
folly::Executor* executor,
const folly::F14FastMap<std::string, std::string>& fileReadOps = {})
override {
if (connectorQueryCtx->cache()) {
@@ -42,8 +42,8 @@ class GlutenBufferedInputBuilder : public
facebook::velox::connector::hive::Buff
connectorQueryCtx->cache(),
facebook::velox::connector::Connector::getTracker(connectorQueryCtx->scanId(),
readerOpts.loadQuantum()),
fileHandle.groupId,
- ioStats,
- std::move(fsStats),
+ std::move(ioStatistics),
+ std::move(ioStats),
executor,
readerOpts,
fileReadOps);
@@ -54,8 +54,8 @@ class GlutenBufferedInputBuilder : public
facebook::velox::connector::hive::Buff
fileHandle.uuid,
facebook::velox::connector::Connector::getTracker(connectorQueryCtx->scanId(),
readerOpts.loadQuantum()),
fileHandle.groupId,
+ std::move(ioStatistics),
std::move(ioStats),
- std::move(fsStats),
executor,
readerOpts,
fileReadOps);
diff --git a/cpp/velox/memory/GlutenDirectBufferedInput.h
b/cpp/velox/memory/GlutenDirectBufferedInput.h
index edaff5c603..3aef323da7 100644
--- a/cpp/velox/memory/GlutenDirectBufferedInput.h
+++ b/cpp/velox/memory/GlutenDirectBufferedInput.h
@@ -29,8 +29,8 @@ class GlutenDirectBufferedInput : public
facebook::velox::dwio::common::DirectBu
facebook::velox::StringIdLease fileNum,
std::shared_ptr<facebook::velox::cache::ScanTracker> tracker,
facebook::velox::StringIdLease groupId,
- std::shared_ptr<facebook::velox::io::IoStatistics> ioStats,
- std::shared_ptr<facebook::velox::filesystems::File::IoStats> fsStats,
+ std::shared_ptr<facebook::velox::io::IoStatistics> ioStatistics,
+ std::shared_ptr<facebook::velox::IoStats> ioStats,
folly::Executor* executor,
const facebook::velox::io::ReaderOptions& readerOptions,
folly::F14FastMap<std::string, std::string> fileReadOps = {})
@@ -40,8 +40,8 @@ class GlutenDirectBufferedInput : public
facebook::velox::dwio::common::DirectBu
std::move(fileNum),
std::move(tracker),
std::move(groupId),
+ std::move(ioStatistics),
std::move(ioStats),
- std::move(fsStats),
executor,
readerOptions,
std::move(fileReadOps)) {}
diff --git a/cpp/velox/utils/VeloxWriterUtils.cc
b/cpp/velox/utils/VeloxWriterUtils.cc
index 50e4ca601e..026418a223 100644
--- a/cpp/velox/utils/VeloxWriterUtils.cc
+++ b/cpp/velox/utils/VeloxWriterUtils.cc
@@ -61,7 +61,7 @@ std::unique_ptr<WriterOptions> makeParquetWriteOption(const
std::unordered_map<s
auto parquetGzipWindowSizeStr =
sparkConfs.find(kParquetGzipWindowSize)->second;
if (parquetGzipWindowSizeStr == kGzipWindowSize4k) {
auto codecOptions =
std::make_shared<parquet::arrow::util::GZipCodecOptions>();
- codecOptions->window_bits = kGzipWindowBits4k;
+ codecOptions->windowBits = kGzipWindowBits4k;
writeOption->codecOptions = std::move(codecOptions);
}
}
@@ -77,7 +77,7 @@ std::unique_ptr<WriterOptions> makeParquetWriteOption(const
std::unordered_map<s
auto codecOptions =
std::make_shared<parquet::arrow::util::CodecOptions>();
auto it = sparkConfs.find(kParquetZSTDCompressionLevel);
auto compressionLevel = it != sparkConfs.end() ? std::stoi(it->second) :
kZSTDDefaultCompressionLevel;
- codecOptions->compression_level = compressionLevel;
+ codecOptions->compressionLevel = compressionLevel;
writeOption->codecOptions = std::move(codecOptions);
} else if (boost::iequals(compressionCodecStr, "uncompressed")) {
compressionCodec = CompressionKind::CompressionKind_NONE;
diff --git a/ep/build-velox/src/get-velox.sh b/ep/build-velox/src/get-velox.sh
index 1f54431a14..c395089c72 100755
--- a/ep/build-velox/src/get-velox.sh
+++ b/ep/build-velox/src/get-velox.sh
@@ -18,8 +18,8 @@ set -exu
CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
VELOX_REPO=https://github.com/IBM/velox.git
-VELOX_BRANCH=dft-2026_02_06
-VELOX_ENHANCED_BRANCH=ibm-2026_02_06
+VELOX_BRANCH=dft-2026_02_17
+VELOX_ENHANCED_BRANCH=ibm-2026_02_17
VELOX_HOME=""
RUN_SETUP_SCRIPT=ON
ENABLE_ENHANCED_FEATURES=OFF
diff --git
a/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
b/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
index cc20dd33e0..92e4a861fa 100644
---
a/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
+++
b/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
@@ -141,12 +141,12 @@ from
-- !query schema
struct<a:int,rand(0):double,sum(b):bigint>
-- !query output
-1 0.5488135024422883 1
-1 0.7151893651681639 2
-2 0.5448831775801376 2
-2 0.6027633705776989 1
-3 0.4236547969336536 1
-3 0.6458941151817286 2
+1 0.5234194256885571 2
+1 0.7604953758285915 1
+2 0.0953472826424725 1
+2 0.3163249920547614 2
+3 0.2710259815484829 2
+3 0.7141011170991605 1
-- !query
diff --git
a/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by.sql.out
b/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by.sql.out
index b54621f8ec..a12e830c11 100644
--- a/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by.sql.out
@@ -662,5 +662,5 @@ GROUP BY a IS NULL
-- !query schema
struct<(IF((NOT (a IS NULL)), rand(0), 1)):double,c:bigint>
-- !query output
-0.5488135024422883 7
+0.7604953758285915 7
1.0 2
diff --git
a/gluten-ut/spark32/src/test/resources/sql-tests/results/random.sql.out
b/gluten-ut/spark32/src/test/resources/sql-tests/results/random.sql.out
index a9d334e7f2..b269d40c35 100644
--- a/gluten-ut/spark32/src/test/resources/sql-tests/results/random.sql.out
+++ b/gluten-ut/spark32/src/test/resources/sql-tests/results/random.sql.out
@@ -7,7 +7,7 @@ SELECT rand(0)
-- !query schema
struct<rand(0):double>
-- !query output
-0.5488135024422883
+0.7604953758285915
-- !query
@@ -15,7 +15,7 @@ SELECT rand(cast(3 / 7 AS int))
-- !query schema
struct<rand(CAST((3 / 7) AS INT)):double>
-- !query output
-0.5488135024422883
+0.7604953758285915
-- !query
@@ -23,7 +23,7 @@ SELECT rand(NULL)
-- !query schema
struct<rand(NULL):double>
-- !query output
-0.5488135024422883
+0.7604953758285915
-- !query
@@ -31,7 +31,7 @@ SELECT rand(cast(NULL AS int))
-- !query schema
struct<rand(CAST(NULL AS INT)):double>
-- !query output
-0.5488135024422883
+0.7604953758285915
-- !query
diff --git
a/gluten-ut/spark33/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
b/gluten-ut/spark33/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
index cc20dd33e0..92e4a861fa 100644
---
a/gluten-ut/spark33/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
+++
b/gluten-ut/spark33/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
@@ -141,12 +141,12 @@ from
-- !query schema
struct<a:int,rand(0):double,sum(b):bigint>
-- !query output
-1 0.5488135024422883 1
-1 0.7151893651681639 2
-2 0.5448831775801376 2
-2 0.6027633705776989 1
-3 0.4236547969336536 1
-3 0.6458941151817286 2
+1 0.5234194256885571 2
+1 0.7604953758285915 1
+2 0.0953472826424725 1
+2 0.3163249920547614 2
+3 0.2710259815484829 2
+3 0.7141011170991605 1
-- !query
diff --git
a/gluten-ut/spark33/src/test/resources/sql-tests/results/group-by.sql.out
b/gluten-ut/spark33/src/test/resources/sql-tests/results/group-by.sql.out
index 408b9f9425..48b35bf1e0 100644
--- a/gluten-ut/spark33/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/gluten-ut/spark33/src/test/resources/sql-tests/results/group-by.sql.out
@@ -691,7 +691,7 @@ GROUP BY a IS NULL
-- !query schema
struct<(IF((NOT (a IS NULL)), rand(0), 1)):double,c:bigint>
-- !query output
-0.5488135024422883 7
+0.7604953758285915 7
1.0 2
diff --git
a/gluten-ut/spark33/src/test/resources/sql-tests/results/random.sql.out
b/gluten-ut/spark33/src/test/resources/sql-tests/results/random.sql.out
index a9d334e7f2..b269d40c35 100644
--- a/gluten-ut/spark33/src/test/resources/sql-tests/results/random.sql.out
+++ b/gluten-ut/spark33/src/test/resources/sql-tests/results/random.sql.out
@@ -7,7 +7,7 @@ SELECT rand(0)
-- !query schema
struct<rand(0):double>
-- !query output
-0.5488135024422883
+0.7604953758285915
-- !query
@@ -15,7 +15,7 @@ SELECT rand(cast(3 / 7 AS int))
-- !query schema
struct<rand(CAST((3 / 7) AS INT)):double>
-- !query output
-0.5488135024422883
+0.7604953758285915
-- !query
@@ -23,7 +23,7 @@ SELECT rand(NULL)
-- !query schema
struct<rand(NULL):double>
-- !query output
-0.5488135024422883
+0.7604953758285915
-- !query
@@ -31,7 +31,7 @@ SELECT rand(cast(NULL AS int))
-- !query schema
struct<rand(CAST(NULL AS INT)):double>
-- !query output
-0.5488135024422883
+0.7604953758285915
-- !query
diff --git
a/gluten-ut/spark34/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
b/gluten-ut/spark34/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
index 5b8637012e..bf85afd626 100644
---
a/gluten-ut/spark34/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
+++
b/gluten-ut/spark34/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
@@ -208,12 +208,12 @@ from
-- !query schema
struct<a:int,rand(0):double,sum(b):bigint>
-- !query output
-1 0.5488135024422883 1
-1 0.7151893651681639 2
-2 0.5448831775801376 2
-2 0.6027633705776989 1
-3 0.4236547969336536 1
-3 0.6458941151817286 2
+1 0.5234194256885571 2
+1 0.7604953758285915 1
+2 0.0953472826424725 1
+2 0.3163249920547614 2
+3 0.2710259815484829 2
+3 0.7141011170991605 1
-- !query
select * from data group by a, b, 1
diff --git
a/gluten-ut/spark34/src/test/resources/sql-tests/results/group-by.sql.out
b/gluten-ut/spark34/src/test/resources/sql-tests/results/group-by.sql.out
index a4a3f76fa6..f564209260 100644
--- a/gluten-ut/spark34/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/gluten-ut/spark34/src/test/resources/sql-tests/results/group-by.sql.out
@@ -907,7 +907,7 @@ GROUP BY a IS NULL
-- !query schema
struct<(IF((NOT (a IS NULL)), rand(0), 1)):double,c:bigint>
-- !query output
-0.5488135024422883 7
+0.7604953758285915 7
1.0 2
diff --git
a/gluten-ut/spark34/src/test/resources/sql-tests/results/random.sql.out
b/gluten-ut/spark34/src/test/resources/sql-tests/results/random.sql.out
index f8460c1d43..dea2c69ba0 100644
--- a/gluten-ut/spark34/src/test/resources/sql-tests/results/random.sql.out
+++ b/gluten-ut/spark34/src/test/resources/sql-tests/results/random.sql.out
@@ -4,7 +4,7 @@ SELECT rand(0)
-- !query schema
struct<rand(0):double>
-- !query output
-0.5488135024422883
+0.7604953758285915
-- !query
@@ -12,7 +12,7 @@ SELECT rand(cast(3 / 7 AS int))
-- !query schema
struct<rand(CAST((3 / 7) AS INT)):double>
-- !query output
-0.5488135024422883
+0.7604953758285915
-- !query
@@ -20,7 +20,7 @@ SELECT rand(NULL)
-- !query schema
struct<rand(NULL):double>
-- !query output
-0.5488135024422883
+0.7604953758285915
-- !query
@@ -28,7 +28,7 @@ SELECT rand(cast(NULL AS int))
-- !query schema
struct<rand(CAST(NULL AS INT)):double>
-- !query output
-0.5488135024422883
+0.7604953758285915
-- !query
diff --git
a/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/group-by-ordinal.sql.out
b/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/group-by-ordinal.sql.out
index b968b4e09f..0f29c27268 100644
---
a/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/group-by-ordinal.sql.out
+++
b/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/group-by-ordinal.sql.out
@@ -208,12 +208,12 @@ from
-- !query schema
struct<a:int,rand(0):double,sum(b):bigint>
-- !query output
-1 0.5488135024422883 1
-1 0.7151893651681639 2
-2 0.5448831775801376 2
-2 0.6027633705776989 1
-3 0.4236547969336536 1
-3 0.6458941151817286 2
+1 0.5234194256885571 2
+1 0.7604953758285915 1
+2 0.0953472826424725 1
+2 0.3163249920547614 2
+3 0.2710259815484829 2
+3 0.7141011170991605 1
-- !query
diff --git
a/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/group-by.sql.out
b/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/group-by.sql.out
index db0b74cd6a..4e3a176ba9 100644
---
a/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/group-by.sql.out
+++
b/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/group-by.sql.out
@@ -907,7 +907,7 @@ GROUP BY a IS NULL
-- !query schema
struct<(IF((NOT (a IS NULL)), rand(0), 1)):double,c:bigint>
-- !query output
-0.5488135024422883 7
+0.7604953758285915 7
1.0 2
diff --git
a/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/random.sql.out
b/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/random.sql.out
index 17e6f871b9..8a182a0646 100644
---
a/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/random.sql.out
+++
b/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/random.sql.out
@@ -4,7 +4,7 @@ SELECT rand(0)
-- !query schema
struct<rand(0):double>
-- !query output
-0.5488135024422883
+0.7604953758285915
-- !query
@@ -12,7 +12,7 @@ SELECT rand(cast(3 / 7 AS int))
-- !query schema
struct<rand(CAST((3 / 7) AS INT)):double>
-- !query output
-0.5488135024422883
+0.7604953758285915
-- !query
@@ -20,7 +20,7 @@ SELECT rand(NULL)
-- !query schema
struct<rand(NULL):double>
-- !query output
-0.5488135024422883
+0.7604953758285915
-- !query
@@ -28,7 +28,7 @@ SELECT rand(cast(NULL AS int))
-- !query schema
struct<rand(CAST(NULL AS INT)):double>
-- !query output
-0.5488135024422883
+0.7604953758285915
-- !query
diff --git
a/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/table-valued-functions.sql.out
b/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/table-valued-functions.sql.out
index 0d5675fa6f..1995e9e87b 100644
---
a/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/table-valued-functions.sql.out
+++
b/gluten-ut/spark35/src/test/resources/backends-velox/sql-tests/results/table-valued-functions.sql.out
@@ -247,7 +247,7 @@ select * from explode(array(rand(0)))
-- !query schema
struct<col:double>
-- !query output
-0.5488135024422883
+0.7604953758285915
-- !query
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala
index b3d51e8029..2050237e0d 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala
@@ -22,7 +22,7 @@ class GlutenGeneratorFunctionSuite extends
GeneratorFunctionSuite with GlutenSQL
testGluten("SPARK-45171: Handle evaluated nondeterministic expression") {
withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") {
val df = sql("select explode(array(rand(0)))")
- checkAnswer(df, Row(0.5488135024422883))
+ checkAnswer(df, Row(0.7604953758285915))
}
}
}
diff --git
a/gluten-ut/spark40/src/test/resources/backends-velox/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out
b/gluten-ut/spark40/src/test/resources/backends-velox/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out
index 45a19ba2c3..5eb1f6689b 100644
---
a/gluten-ut/spark40/src/test/resources/backends-velox/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out
+++
b/gluten-ut/spark40/src/test/resources/backends-velox/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out
@@ -139,11 +139,11 @@ from
-- !query schema
struct<a:int,rand(0):double,udaf(b):int>
-- !query output
-1 0.5234194256885571 1
+1 0.5234194256885571 2
1 0.7604953758285915 1
2 0.0953472826424725 1
-2 0.3163249920547614 1
-3 0.2710259815484829 1
+2 0.3163249920547614 2
+3 0.2710259815484829 2
3 0.7141011170991605 1
diff --git
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index f5c9d22db6..6e94d4cc0e 100644
---
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -742,8 +742,6 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenSQLWindowFunctionSuite]
.exclude("test with low buffer spill threshold")
enableSuite[GlutenTakeOrderedAndProjectSuite]
- // The results of rand() differ between vanilla spark and velox.
- .exclude("SPARK-47104: Non-deterministic expressions in projection")
enableSuite[GlutenSessionExtensionSuite]
enableSuite[TestFileSourceScanExecTransformer]
enableSuite[GlutenBucketedReadWithoutHiveSupportSuite]
diff --git
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala
index b3d51e8029..2050237e0d 100644
---
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala
+++
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala
@@ -22,7 +22,7 @@ class GlutenGeneratorFunctionSuite extends
GeneratorFunctionSuite with GlutenSQL
testGluten("SPARK-45171: Handle evaluated nondeterministic expression") {
withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") {
val df = sql("select explode(array(rand(0)))")
- checkAnswer(df, Row(0.5488135024422883))
+ checkAnswer(df, Row(0.7604953758285915))
}
}
}
diff --git
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala
index 2731e05471..bc231e52ad 100644
---
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala
+++
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala
@@ -16,50 +16,8 @@
*/
package org.apache.spark.sql.execution
-import org.apache.spark.sql.{GlutenSQLTestsBaseTrait, Row}
-import org.apache.spark.sql.catalyst.expressions.{Alias, Literal, Rand}
-import org.apache.spark.sql.types.{IntegerType, StructType}
+import org.apache.spark.sql.GlutenSQLTestsBaseTrait
class GlutenTakeOrderedAndProjectSuite
extends TakeOrderedAndProjectSuite
- with GlutenSQLTestsBaseTrait {
-
- private def noOpFilter(plan: SparkPlan): SparkPlan =
FilterExec(Literal(true), plan)
-
- testGluten("SPARK-47104: Non-deterministic expressions in projection") {
- val expected = (input: SparkPlan) => {
- GlobalLimitExec(limit, LocalLimitExec(limit, SortExec(sortOrder, true,
input)))
- }
- val schema = StructType.fromDDL("a int, b int, c double")
- val rdd = sparkContext.parallelize(
- Seq(
- Row(1, 2, 0.6027633705776989d),
- Row(2, 3, 0.7151893651681639d),
- Row(3, 4, 0.5488135024422883d)),
- 1)
- val df = spark.createDataFrame(rdd, schema)
- val projection = df.queryExecution.sparkPlan.output.take(2) :+
- Alias(Rand(Literal(0, IntegerType)), "_uuid")()
-
- // test executeCollect
- checkThatPlansAgree(
- df,
- input =>
- TakeOrderedAndProjectExec(limit, sortOrder, projection,
SortExec(sortOrder, false, input)),
- input => expected(input),
- sortAnswers = false)
-
- // test doExecute
- checkThatPlansAgree(
- df,
- input =>
- noOpFilter(
- TakeOrderedAndProjectExec(
- limit,
- sortOrder,
- projection,
- SortExec(sortOrder, false, input))),
- input => expected(input),
- sortAnswers = false)
- }
-}
+ with GlutenSQLTestsBaseTrait {}
diff --git
a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out
b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out
index 45a19ba2c3..5eb1f6689b 100644
---
a/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out
+++
b/gluten-ut/spark41/src/test/resources/backends-velox/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out
@@ -139,11 +139,11 @@ from
-- !query schema
struct<a:int,rand(0):double,udaf(b):int>
-- !query output
-1 0.5234194256885571 1
+1 0.5234194256885571 2
1 0.7604953758285915 1
2 0.0953472826424725 1
-2 0.3163249920547614 1
-3 0.2710259815484829 1
+2 0.3163249920547614 2
+3 0.2710259815484829 2
3 0.7141011170991605 1
diff --git
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index e8f8dfa762..2b9e4555d7 100644
---
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -708,8 +708,6 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenSQLWindowFunctionSuite]
.exclude("test with low buffer spill threshold")
enableSuite[GlutenTakeOrderedAndProjectSuite]
- // The results of rand() differ between vanilla spark and velox.
- .exclude("SPARK-47104: Non-deterministic expressions in projection")
enableSuite[GlutenSessionExtensionSuite]
enableSuite[TestFileSourceScanExecTransformer]
enableSuite[GlutenBucketedReadWithoutHiveSupportSuite]
diff --git
a/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala
b/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala
index b3d51e8029..2050237e0d 100644
---
a/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala
+++
b/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/GlutenGeneratorFunctionSuite.scala
@@ -22,7 +22,7 @@ class GlutenGeneratorFunctionSuite extends
GeneratorFunctionSuite with GlutenSQL
testGluten("SPARK-45171: Handle evaluated nondeterministic expression") {
withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") {
val df = sql("select explode(array(rand(0)))")
- checkAnswer(df, Row(0.5488135024422883))
+ checkAnswer(df, Row(0.7604953758285915))
}
}
}
diff --git
a/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala
b/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala
index 2731e05471..bc231e52ad 100644
---
a/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala
+++
b/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/execution/GlutenTakeOrderedAndProjectSuite.scala
@@ -16,50 +16,8 @@
*/
package org.apache.spark.sql.execution
-import org.apache.spark.sql.{GlutenSQLTestsBaseTrait, Row}
-import org.apache.spark.sql.catalyst.expressions.{Alias, Literal, Rand}
-import org.apache.spark.sql.types.{IntegerType, StructType}
+import org.apache.spark.sql.GlutenSQLTestsBaseTrait
class GlutenTakeOrderedAndProjectSuite
extends TakeOrderedAndProjectSuite
- with GlutenSQLTestsBaseTrait {
-
- private def noOpFilter(plan: SparkPlan): SparkPlan =
FilterExec(Literal(true), plan)
-
- testGluten("SPARK-47104: Non-deterministic expressions in projection") {
- val expected = (input: SparkPlan) => {
- GlobalLimitExec(limit, LocalLimitExec(limit, SortExec(sortOrder, true,
input)))
- }
- val schema = StructType.fromDDL("a int, b int, c double")
- val rdd = sparkContext.parallelize(
- Seq(
- Row(1, 2, 0.6027633705776989d),
- Row(2, 3, 0.7151893651681639d),
- Row(3, 4, 0.5488135024422883d)),
- 1)
- val df = spark.createDataFrame(rdd, schema)
- val projection = df.queryExecution.sparkPlan.output.take(2) :+
- Alias(Rand(Literal(0, IntegerType)), "_uuid")()
-
- // test executeCollect
- checkThatPlansAgree(
- df,
- input =>
- TakeOrderedAndProjectExec(limit, sortOrder, projection,
SortExec(sortOrder, false, input)),
- input => expected(input),
- sortAnswers = false)
-
- // test doExecute
- checkThatPlansAgree(
- df,
- input =>
- noOpFilter(
- TakeOrderedAndProjectExec(
- limit,
- sortOrder,
- projection,
- SortExec(sortOrder, false, input))),
- input => expected(input),
- sortAnswers = false)
- }
-}
+ with GlutenSQLTestsBaseTrait {}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]