This is an automated email from the ASF dual-hosted git repository.
zhli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 6db8920e6 [VL] Add test for shuffle function (#5722)
6db8920e6 is described below
commit 6db8920e6bcb423724fbbb9888ad71f663ba3053
Author: Zhen Li <[email protected]>
AuthorDate: Mon May 13 15:41:23 2024 +0800
[VL] Add test for shuffle function (#5722)
[VL] Add test for shuffle function.
---
.../gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala | 10 ++++++++++
.../gluten/execution/ScalarFunctionsValidateSuite.scala | 16 ++++++++++++++++
docs/velox-backend-support-progress.md | 2 +-
.../org/apache/gluten/backendsapi/SparkPlanExecApi.scala | 7 +++++++
.../apache/gluten/expression/ExpressionConverter.scala | 6 ++++++
.../apache/gluten/utils/velox/VeloxTestSettings.scala | 3 ---
.../apache/gluten/utils/velox/VeloxTestSettings.scala | 3 ---
.../apache/gluten/utils/velox/VeloxTestSettings.scala | 3 ---
.../apache/gluten/utils/velox/VeloxTestSettings.scala | 3 ---
9 files changed, 40 insertions(+), 13 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index 8d01ab96b..4d41ed0c0 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -143,6 +143,16 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
original)
}
+ override def genShuffleTransformer(
+ substraitExprName: String,
+ child: ExpressionTransformer,
+ original: Shuffle): ExpressionTransformer = {
+ GenericExpressionTransformer(
+ substraitExprName,
+ Seq(child, LiteralTransformer(Literal(original.randomSeed.get))),
+ original)
+ }
+
override def genTryAddTransformer(
substraitExprName: String,
left: ExpressionTransformer,
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
index 485d70f9d..834e172f8 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
@@ -878,6 +878,22 @@ class ScalarFunctionsValidateSuite extends
FunctionsValidateTest {
}
}
+ test("test shuffle") {
+ withTempPath {
+ path =>
+ Seq[Seq[Integer]](Seq(1, null, 5, 4), Seq(5, -1, 8, 9, -7, 2),
Seq.empty, null)
+ .toDF("value")
+ .write
+ .parquet(path.getCanonicalPath)
+
+
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("array_tbl")
+
+ runQueryAndCompare("select shuffle(value) from array_tbl;", false) {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ }
+ }
+
test("negative") {
runQueryAndCompare("select negative(l_orderkey) from lineitem") {
checkGlutenOperatorMatch[ProjectExecTransformer]
diff --git a/docs/velox-backend-support-progress.md
b/docs/velox-backend-support-progress.md
index 3d1d25be0..8b640c081 100644
--- a/docs/velox-backend-support-progress.md
+++ b/docs/velox-backend-support-progress.md
@@ -295,7 +295,7 @@ Gluten supports 199 functions. (Drag to right to see all
data types)
| named_struct,struct | row_construct | named_struct
| S | | | | | | |
| | | | | | | | |
| | S | |
| posexplode_outer,posexplode | |
| | | | | | | |
| | | | | | | | |
| | | |
| sequence | |
| | | | | | | |
| | | | | | | | |
| | | |
-| shuffle | shuffle |
| | | | | | | |
| | | | | | | | |
| | | |
+| shuffle | shuffle | shuffle
| S | | | | | | |
| | | | | | | | |
| | | |
| size | | size
| S | | | | | | |
| | | | | | | | |
| | | |
| slice | slice |
| | | | | | | |
| | | | | | | | |
| | | |
| sort_array | | sort_array
| S | | | | | | |
| | | | | | | | |
| | | |
diff --git
a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
index 8f2ef19f1..c2c733070 100644
---
a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
+++
b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
@@ -211,6 +211,13 @@ trait SparkPlanExecApi {
GenericExpressionTransformer(substraitExprName, Seq(), original)
}
+ def genShuffleTransformer(
+ substraitExprName: String,
+ child: ExpressionTransformer,
+ original: Shuffle): ExpressionTransformer = {
+ GenericExpressionTransformer(substraitExprName, Seq(child), original)
+ }
+
def genTryAddTransformer(
substraitExprName: String,
left: ExpressionTransformer,
diff --git
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
index 562ae294e..495fbf8d5 100644
---
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
+++
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
@@ -652,6 +652,12 @@ object ExpressionConverter extends SQLConfHelper with
Logging {
a
)
+ case s: Shuffle =>
+ BackendsApiManager.getSparkPlanExecApiInstance.genShuffleTransformer(
+ substraitExprName,
+ replaceWithExpressionTransformerInternal(s.child, attributeSeq,
expressionsMap),
+ s
+ )
case expr =>
GenericExpressionTransformer(
substraitExprName,
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 1207514c2..dbd7dc187 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -280,9 +280,6 @@ class VeloxTestSettings extends BackendTestSettings {
// blocked by Velox-5768
.exclude("aggregate function - array for primitive type containing null")
.exclude("aggregate function - array for non-primitive type")
- .exclude("shuffle function - array for primitive type not containing null")
- .exclude("shuffle function - array for primitive type containing null")
- .exclude("shuffle function - array for non-primitive type")
// Rewrite this test because Velox sorts rows by key for primitive data
types, which disrupts the original row sequence.
.exclude("map_zip_with function - map of primitive types")
enableSuite[GlutenDataFrameTungstenSuite]
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 40185aa63..9b469a98d 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -966,9 +966,6 @@ class VeloxTestSettings extends BackendTestSettings {
// blocked by Velox-5768
.exclude("aggregate function - array for primitive type containing null")
.exclude("aggregate function - array for non-primitive type")
- .exclude("shuffle function - array for primitive type not containing null")
- .exclude("shuffle function - array for primitive type containing null")
- .exclude("shuffle function - array for non-primitive type")
// Rewrite this test because Velox sorts rows by key for primitive data
types, which disrupts the original row sequence.
.exclude("map_zip_with function - map of primitive types")
enableSuite[GlutenDataFrameHintSuite]
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 47ad21958..498ed5ef4 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -971,9 +971,6 @@ class VeloxTestSettings extends BackendTestSettings {
// blocked by Velox-5768
.exclude("aggregate function - array for primitive type containing null")
.exclude("aggregate function - array for non-primitive type")
- .exclude("shuffle function - array for primitive type not containing null")
- .exclude("shuffle function - array for primitive type containing null")
- .exclude("shuffle function - array for non-primitive type")
// Rewrite this test because Velox sorts rows by key for primitive data
types, which disrupts the original row sequence.
.exclude("map_zip_with function - map of primitive types")
enableSuite[GlutenDataFrameHintSuite]
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 2aed0ff78..a59819411 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -986,9 +986,6 @@ class VeloxTestSettings extends BackendTestSettings {
// blocked by Velox-5768
.exclude("aggregate function - array for primitive type containing null")
.exclude("aggregate function - array for non-primitive type")
- .exclude("shuffle function - array for primitive type not containing null")
- .exclude("shuffle function - array for primitive type containing null")
- .exclude("shuffle function - array for non-primitive type")
// Rewrite this test because Velox sorts rows by key for primitive data
types, which disrupts the original row sequence.
.exclude("map_zip_with function - map of primitive types")
enableSuite[GlutenDataFrameHintSuite]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]