This is an automated email from the ASF dual-hosted git repository.

zhli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 6db8920e6 [VL] Add test for shuffle function (#5722)
6db8920e6 is described below

commit 6db8920e6bcb423724fbbb9888ad71f663ba3053
Author: Zhen Li <[email protected]>
AuthorDate: Mon May 13 15:41:23 2024 +0800

    [VL] Add test for shuffle function (#5722)
    
    [VL] Add test for shuffle function.
---
 .../gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala | 10 ++++++++++
 .../gluten/execution/ScalarFunctionsValidateSuite.scala  | 16 ++++++++++++++++
 docs/velox-backend-support-progress.md                   |  2 +-
 .../org/apache/gluten/backendsapi/SparkPlanExecApi.scala |  7 +++++++
 .../apache/gluten/expression/ExpressionConverter.scala   |  6 ++++++
 .../apache/gluten/utils/velox/VeloxTestSettings.scala    |  3 ---
 .../apache/gluten/utils/velox/VeloxTestSettings.scala    |  3 ---
 .../apache/gluten/utils/velox/VeloxTestSettings.scala    |  3 ---
 .../apache/gluten/utils/velox/VeloxTestSettings.scala    |  3 ---
 9 files changed, 40 insertions(+), 13 deletions(-)

diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index 8d01ab96b..4d41ed0c0 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -143,6 +143,16 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
       original)
   }
 
+  override def genShuffleTransformer(
+      substraitExprName: String,
+      child: ExpressionTransformer,
+      original: Shuffle): ExpressionTransformer = {
+    GenericExpressionTransformer(
+      substraitExprName,
+      Seq(child, LiteralTransformer(Literal(original.randomSeed.get))),
+      original)
+  }
+
   override def genTryAddTransformer(
       substraitExprName: String,
       left: ExpressionTransformer,
diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
index 485d70f9d..834e172f8 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
@@ -878,6 +878,22 @@ class ScalarFunctionsValidateSuite extends 
FunctionsValidateTest {
     }
   }
 
+  test("test shuffle") {
+    withTempPath {
+      path =>
+        Seq[Seq[Integer]](Seq(1, null, 5, 4), Seq(5, -1, 8, 9, -7, 2), 
Seq.empty, null)
+          .toDF("value")
+          .write
+          .parquet(path.getCanonicalPath)
+
+        
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("array_tbl")
+
+        runQueryAndCompare("select shuffle(value) from array_tbl;", false) {
+          checkGlutenOperatorMatch[ProjectExecTransformer]
+        }
+    }
+  }
+
   test("negative") {
     runQueryAndCompare("select negative(l_orderkey) from lineitem") {
       checkGlutenOperatorMatch[ProjectExecTransformer]
diff --git a/docs/velox-backend-support-progress.md 
b/docs/velox-backend-support-progress.md
index 3d1d25be0..8b640c081 100644
--- a/docs/velox-backend-support-progress.md
+++ b/docs/velox-backend-support-progress.md
@@ -295,7 +295,7 @@ Gluten supports 199 functions. (Drag to right to see all 
data types)
 | named_struct,struct           | row_construct          | named_struct        
  | S      |                          |         |      |       |     |      |   
    |        |      |           |        |         |      |        |          | 
      |     | S      |     |
 | posexplode_outer,posexplode   |                        |                     
  |        |                          |         |      |       |     |      |   
    |        |      |           |        |         |      |        |          | 
      |     |        |     |
 | sequence                      |                        |                     
  |        |                          |         |      |       |     |      |   
    |        |      |           |        |         |      |        |          | 
      |     |        |     |
-| shuffle                       | shuffle                |                     
  |        |                          |         |      |       |     |      |   
    |        |      |           |        |         |      |        |          | 
      |     |        |     |
+| shuffle                       | shuffle                | shuffle             
  | S      |                          |         |      |       |     |      |   
    |        |      |           |        |         |      |        |          | 
      |     |        |     |
 | size                          |                        | size                
  | S      |                          |         |      |       |     |      |   
    |        |      |           |        |         |      |        |          | 
      |     |        |     |
 | slice                         | slice                  |                     
  |        |                          |         |      |       |     |      |   
    |        |      |           |        |         |      |        |          | 
      |     |        |     |
 | sort_array                    |                        | sort_array          
  | S      |                          |         |      |       |     |      |   
    |        |      |           |        |         |      |        |          | 
      |     |        |     |
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
index 8f2ef19f1..c2c733070 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala
@@ -211,6 +211,13 @@ trait SparkPlanExecApi {
     GenericExpressionTransformer(substraitExprName, Seq(), original)
   }
 
+  def genShuffleTransformer(
+      substraitExprName: String,
+      child: ExpressionTransformer,
+      original: Shuffle): ExpressionTransformer = {
+    GenericExpressionTransformer(substraitExprName, Seq(child), original)
+  }
+
   def genTryAddTransformer(
       substraitExprName: String,
       left: ExpressionTransformer,
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
index 562ae294e..495fbf8d5 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala
@@ -652,6 +652,12 @@ object ExpressionConverter extends SQLConfHelper with 
Logging {
           a
         )
 
+      case s: Shuffle =>
+        BackendsApiManager.getSparkPlanExecApiInstance.genShuffleTransformer(
+          substraitExprName,
+          replaceWithExpressionTransformerInternal(s.child, attributeSeq, 
expressionsMap),
+          s
+        )
       case expr =>
         GenericExpressionTransformer(
           substraitExprName,
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 1207514c2..dbd7dc187 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -280,9 +280,6 @@ class VeloxTestSettings extends BackendTestSettings {
     // blocked by Velox-5768
     .exclude("aggregate function - array for primitive type containing null")
     .exclude("aggregate function - array for non-primitive type")
-    .exclude("shuffle function - array for primitive type not containing null")
-    .exclude("shuffle function - array for primitive type containing null")
-    .exclude("shuffle function - array for non-primitive type")
     // Rewrite this test because Velox sorts rows by key for primitive data 
types, which disrupts the original row sequence.
     .exclude("map_zip_with function - map of primitive types")
   enableSuite[GlutenDataFrameTungstenSuite]
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 40185aa63..9b469a98d 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -966,9 +966,6 @@ class VeloxTestSettings extends BackendTestSettings {
     // blocked by Velox-5768
     .exclude("aggregate function - array for primitive type containing null")
     .exclude("aggregate function - array for non-primitive type")
-    .exclude("shuffle function - array for primitive type not containing null")
-    .exclude("shuffle function - array for primitive type containing null")
-    .exclude("shuffle function - array for non-primitive type")
     // Rewrite this test because Velox sorts rows by key for primitive data 
types, which disrupts the original row sequence.
     .exclude("map_zip_with function - map of primitive types")
   enableSuite[GlutenDataFrameHintSuite]
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 47ad21958..498ed5ef4 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -971,9 +971,6 @@ class VeloxTestSettings extends BackendTestSettings {
     // blocked by Velox-5768
     .exclude("aggregate function - array for primitive type containing null")
     .exclude("aggregate function - array for non-primitive type")
-    .exclude("shuffle function - array for primitive type not containing null")
-    .exclude("shuffle function - array for primitive type containing null")
-    .exclude("shuffle function - array for non-primitive type")
     // Rewrite this test because Velox sorts rows by key for primitive data 
types, which disrupts the original row sequence.
     .exclude("map_zip_with function - map of primitive types")
   enableSuite[GlutenDataFrameHintSuite]
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 2aed0ff78..a59819411 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -986,9 +986,6 @@ class VeloxTestSettings extends BackendTestSettings {
     // blocked by Velox-5768
     .exclude("aggregate function - array for primitive type containing null")
     .exclude("aggregate function - array for non-primitive type")
-    .exclude("shuffle function - array for primitive type not containing null")
-    .exclude("shuffle function - array for primitive type containing null")
-    .exclude("shuffle function - array for non-primitive type")
     // Rewrite this test because Velox sorts rows by key for primitive data 
types, which disrupts the original row sequence.
     .exclude("map_zip_with function - map of primitive types")
   enableSuite[GlutenDataFrameHintSuite]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to