This is an automated email from the ASF dual-hosted git repository.

liuneng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 65dd411e1 [CH] Support shuffle function (#5432)
65dd411e1 is described below

commit 65dd411e1cea7ef164295bdd1f694e34257ba070
Author: exmy <xumov...@gmail.com>
AuthorDate: Thu Apr 18 20:42:12 2024 +0800

    [CH] Support shuffle function (#5432)
    
    What changes were proposed in this pull request?
    How was this patch tested?
    Pass CI
---
 .../GlutenClickHouseTPCHSaltNullParquetSuite.scala       | 16 ++++++++++++++++
 cpp-ch/local-engine/Parser/SerializedPlanParser.h        |  1 +
 .../apache/gluten/expression/ExpressionMappings.scala    |  1 +
 .../gluten/utils/clickhouse/ClickHouseTestSettings.scala |  4 ++++
 .../apache/gluten/utils/velox/VeloxTestSettings.scala    |  4 ++++
 .../gluten/utils/clickhouse/ClickHouseTestSettings.scala |  4 ++++
 .../apache/gluten/utils/velox/VeloxTestSettings.scala    |  4 ++++
 .../gluten/utils/clickhouse/ClickHouseTestSettings.scala |  4 ++++
 .../apache/gluten/utils/velox/VeloxTestSettings.scala    |  4 ++++
 .../gluten/utils/clickhouse/ClickHouseTestSettings.scala |  4 ++++
 .../apache/gluten/utils/velox/VeloxTestSettings.scala    |  4 ++++
 .../org/apache/gluten/expression/ExpressionNames.scala   |  1 +
 12 files changed, 51 insertions(+)

diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
index 27cb39584..615cceae9 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
@@ -688,6 +688,22 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends 
GlutenClickHouseTPCHAbstr
     }
   }
 
+  test("test shuffle function") {
+    withSQLConf(
+      SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> (ConstantFolding.ruleName + "," 
+ NullPropagation.ruleName)) {
+      runQueryAndCompare(
+        "select shuffle(split(n_comment, ' ')) from nation",
+        compareResult = false
+      )(checkGlutenOperatorMatch[ProjectExecTransformer])
+
+      runQueryAndCompare(
+        "select shuffle(array(1,2,3,4,5)), shuffle(array(1,3,null,3,4)), 
shuffle(null)",
+        compareResult = false,
+        noFallBack = false
+      )(checkGlutenOperatorMatch[ProjectExecTransformer])
+    }
+  }
+
   test("test 'function regexp_extract_all'") {
     runQueryAndCompare(
       "select l_orderkey, regexp_extract_all(l_comment, '([a-z])', 1) " +
diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.h 
b/cpp-ch/local-engine/Parser/SerializedPlanParser.h
index b9a72cf05..019d49b02 100644
--- a/cpp-ch/local-engine/Parser/SerializedPlanParser.h
+++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.h
@@ -181,6 +181,7 @@ static const std::map<std::string, std::string> 
SCALAR_FUNCTIONS
 
        // array functions
        {"array", "array"},
+       {"shuffle", "arrayShuffle"},
        {"range", "range"}, /// dummy mapping
 
        // map functions
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
index 618798b15..e34ea8840 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
@@ -229,6 +229,7 @@ object ExpressionMappings {
     Sig[ArrayRepeat](ARRAY_REPEAT),
     Sig[ArrayRemove](ARRAY_REMOVE),
     Sig[ArrayFilter](FILTER),
+    Sig[Shuffle](SHUFFLE),
     // Map functions
     Sig[CreateMap](CREATE_MAP),
     Sig[GetMapValue](GET_MAP_VALUE),
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 9b987b0e4..a51157e62 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -167,6 +167,9 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("transform values function - test empty")
     .exclude("SPARK-14393: values generated by non-deterministic functions 
shouldn't change after coalesce or union")
     .exclude("SPARK-24734: Fix containsNull of Concat for array type")
+    .exclude("shuffle function - array for primitive type not containing null")
+    .exclude("shuffle function - array for primitive type containing null")
+    .exclude("shuffle function - array for non-primitive type")
   enableSuite[GlutenDataFrameHintSuite]
   enableSuite[GlutenDataFrameImplicitsSuite]
   enableSuite[GlutenDataFrameJoinSuite].exclude(
@@ -686,6 +689,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-36755: ArraysOverlap hould handle duplicated Double.NaN 
and Float.Nan")
     .exclude(
       "SPARK-36740: ArrayMin/ArrayMax/SortArray should handle NaN greater then 
non-NaN value")
+    .excludeGlutenTest("Shuffle")
   enableSuite[GlutenComplexTypeSuite]
     .exclude("SPARK-33386: GetArrayItem ArrayIndexOutOfBoundsException")
     .exclude("SPARK-33460: GetMapValue NoSuchElementException")
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 189a09d35..7e0ccb17c 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -195,6 +195,7 @@ class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenCollectionExpressionsSuite]
     // Rewrite in Gluten to replace Seq with Array
     .exclude("Shuffle")
+    .excludeGlutenTest("Shuffle")
     // TODO: ArrayDistinct should handle duplicated Double.NaN
     .excludeByPrefix("SPARK-36741")
     // TODO: ArrayIntersect should handle duplicated Double.NaN
@@ -273,6 +274,9 @@ class VeloxTestSettings extends BackendTestSettings {
     // blocked by Velox-5768
     .exclude("aggregate function - array for primitive type containing null")
     .exclude("aggregate function - array for non-primitive type")
+    .exclude("shuffle function - array for primitive type not containing null")
+    .exclude("shuffle function - array for primitive type containing null")
+    .exclude("shuffle function - array for non-primitive type")
   enableSuite[GlutenDataFrameTungstenSuite]
   enableSuite[GlutenDataFrameSetOperationsSuite]
     // Result depends on the implementation for nondeterministic expression 
rand.
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 05e00ca5d..144d103c0 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -185,6 +185,9 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("transform values function - test empty")
     .exclude("SPARK-14393: values generated by non-deterministic functions 
shouldn't change after coalesce or union")
     .exclude("SPARK-24734: Fix containsNull of Concat for array type")
+    .exclude("shuffle function - array for primitive type not containing null")
+    .exclude("shuffle function - array for primitive type containing null")
+    .exclude("shuffle function - array for non-primitive type")
   enableSuite[GlutenDataFrameHintSuite]
   enableSuite[GlutenDataFrameImplicitsSuite]
   enableSuite[GlutenDataFrameJoinSuite].exclude(
@@ -727,6 +730,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude(
       "SPARK-36740: ArrayMin/ArrayMax/SortArray should handle NaN greater then 
non-NaN value")
     .exclude("SPARK-39184: Avoid ArrayIndexOutOfBoundsException when crossing 
DST boundary")
+    .excludeGlutenTest("Shuffle")
   enableSuite[GlutenComplexTypeSuite]
     .exclude("SPARK-33386: GetArrayItem ArrayIndexOutOfBoundsException")
     .exclude("SPARK-33460: GetMapValue NoSuchElementException")
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 518908c9c..00eb455c0 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -114,6 +114,7 @@ class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenCollectionExpressionsSuite]
     // Rewrite in Gluten to replace Seq with Array
     .exclude("Shuffle")
+    .excludeGlutenTest("Shuffle")
     // TODO: ArrayDistinct should handle duplicated Double.NaN
     .excludeByPrefix("SPARK-36741")
     // TODO: ArrayIntersect should handle duplicated Double.NaN
@@ -938,6 +939,9 @@ class VeloxTestSettings extends BackendTestSettings {
     // blocked by Velox-5768
     .exclude("aggregate function - array for primitive type containing null")
     .exclude("aggregate function - array for non-primitive type")
+    .exclude("shuffle function - array for primitive type not containing null")
+    .exclude("shuffle function - array for primitive type containing null")
+    .exclude("shuffle function - array for non-primitive type")
   enableSuite[GlutenDataFrameHintSuite]
   enableSuite[GlutenDataFrameImplicitsSuite]
   enableSuite[GlutenDataFrameJoinSuite]
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index d90722301..679893bb6 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -187,6 +187,9 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("transform values function - test empty")
     .exclude("SPARK-14393: values generated by non-deterministic functions 
shouldn't change after coalesce or union")
     .exclude("SPARK-24734: Fix containsNull of Concat for array type")
+    .exclude("shuffle function - array for primitive type not containing null")
+    .exclude("shuffle function - array for primitive type containing null")
+    .exclude("shuffle function - array for non-primitive type")
   enableSuite[GlutenDataFrameHintSuite]
   enableSuite[GlutenDataFrameImplicitsSuite]
   enableSuite[GlutenDataFrameJoinSuite].exclude(
@@ -567,6 +570,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude(
       "SPARK-36740: ArrayMin/ArrayMax/SortArray should handle NaN greater then 
non-NaN value")
     .exclude("SPARK-39184: Avoid ArrayIndexOutOfBoundsException when crossing 
DST boundary")
+    .excludeGlutenTest("Shuffle")
   enableSuite[GlutenComplexTypeSuite]
     .exclude("SPARK-33386: GetArrayItem ArrayIndexOutOfBoundsException")
     .exclude("SPARK-33460: GetMapValue NoSuchElementException")
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 9d297e4ea..4f8afe579 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -95,6 +95,7 @@ class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenCollectionExpressionsSuite]
     // Rewrite in Gluten to replace Seq with Array
     .exclude("Shuffle")
+    .excludeGlutenTest("Shuffle")
     // TODO: ArrayDistinct should handle duplicated Double.NaN
     .excludeByPrefix("SPARK-36741")
     // TODO: ArrayIntersect should handle duplicated Double.NaN
@@ -943,6 +944,9 @@ class VeloxTestSettings extends BackendTestSettings {
     // blocked by Velox-5768
     .exclude("aggregate function - array for primitive type containing null")
     .exclude("aggregate function - array for non-primitive type")
+    .exclude("shuffle function - array for primitive type not containing null")
+    .exclude("shuffle function - array for primitive type containing null")
+    .exclude("shuffle function - array for non-primitive type")
   enableSuite[GlutenDataFrameHintSuite]
   enableSuite[GlutenDataFrameImplicitsSuite]
   enableSuite[GlutenDataFrameJoinSuite]
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index d90722301..679893bb6 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -187,6 +187,9 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("transform values function - test empty")
     .exclude("SPARK-14393: values generated by non-deterministic functions 
shouldn't change after coalesce or union")
     .exclude("SPARK-24734: Fix containsNull of Concat for array type")
+    .exclude("shuffle function - array for primitive type not containing null")
+    .exclude("shuffle function - array for primitive type containing null")
+    .exclude("shuffle function - array for non-primitive type")
   enableSuite[GlutenDataFrameHintSuite]
   enableSuite[GlutenDataFrameImplicitsSuite]
   enableSuite[GlutenDataFrameJoinSuite].exclude(
@@ -567,6 +570,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude(
       "SPARK-36740: ArrayMin/ArrayMax/SortArray should handle NaN greater then 
non-NaN value")
     .exclude("SPARK-39184: Avoid ArrayIndexOutOfBoundsException when crossing 
DST boundary")
+    .excludeGlutenTest("Shuffle")
   enableSuite[GlutenComplexTypeSuite]
     .exclude("SPARK-33386: GetArrayItem ArrayIndexOutOfBoundsException")
     .exclude("SPARK-33460: GetMapValue NoSuchElementException")
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 1c74bd247..6f6c6d05a 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -96,6 +96,7 @@ class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenCollectionExpressionsSuite]
     // Rewrite in Gluten to replace Seq with Array
     .exclude("Shuffle")
+    .excludeGlutenTest("Shuffle")
     // TODO: ArrayDistinct should handle duplicated Double.NaN
     .excludeByPrefix("SPARK-36741")
     // TODO: ArrayIntersect should handle duplicated Double.NaN
@@ -959,6 +960,9 @@ class VeloxTestSettings extends BackendTestSettings {
     // blocked by Velox-5768
     .exclude("aggregate function - array for primitive type containing null")
     .exclude("aggregate function - array for non-primitive type")
+    .exclude("shuffle function - array for primitive type not containing null")
+    .exclude("shuffle function - array for primitive type containing null")
+    .exclude("shuffle function - array for non-primitive type")
   enableSuite[GlutenDataFrameHintSuite]
   enableSuite[GlutenDataFrameImplicitsSuite]
   enableSuite[GlutenDataFrameJoinSuite]
diff --git 
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
 
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
index ca8b098aa..f097d5362 100644
--- 
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
+++ 
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
@@ -245,6 +245,7 @@ object ExpressionNames {
   final val ARRAY_REPEAT = "array_repeat"
   final val ARRAY_REMOVE = "array_remove"
   final val FILTER = "filter"
+  final val SHUFFLE = "shuffle"
 
   // Map functions
   final val CREATE_MAP = "map"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org

Reply via email to