(incubator-gluten) branch main updated: [CORE] Use the smaller table to build hashmap in shuffled hash join (#5750)

2024-05-30 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new f48b9fa09 [CORE] Use the smaller table to build hashmap in shuffled 
hash join (#5750)
f48b9fa09 is described below

commit f48b9fa09f6421f861a22d8027d2cb81767f4e5c
Author: Mingliang Zhu 
AuthorDate: Fri May 31 11:12:28 2024 +0800

[CORE] Use the smaller table to build hashmap in shuffled hash join (#5750)
---
 .../gluten/backendsapi/velox/VeloxBackend.scala|  20 +-
 .../gluten/extension/StrategyOverrides.scala   |  12 +-
 .../extension/columnar/OffloadSingleNode.scala |  43 +-
 .../extension/columnar/TransformHintRule.scala |  31 +-
 .../gluten/planner/cost/GlutenCostModel.scala  |   6 -
 .../sql/execution/joins/HashJoin.scala.deprecated  | 774 -
 .../sql/execution/joins/HashJoin.scala.deprecated  | 774 -
 .../sql/execution/joins/HashJoin.scala.deprecated  | 774 -
 .../sql/execution/joins/HashJoin.scala.deprecated  | 774 -
 9 files changed, 41 insertions(+), 3167 deletions(-)

diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
index c8dbfb29e..f06929fff 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala
@@ -29,7 +29,7 @@ import 
org.apache.gluten.substrait.rel.LocalFilesNode.ReadFileFormat.{DwrfReadFo
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.expressions.{Alias, CumeDist, DenseRank, 
Descending, Expression, Lag, Lead, Literal, MakeYMInterval, NamedExpression, 
NthValue, NTile, PercentRank, Rand, RangeFrame, Rank, RowNumber, SortOrder, 
SparkPartitionID, SpecialFrameBoundary, SpecifiedWindowFrame, Uuid}
 import 
org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, 
ApproximatePercentile, Count, Sum}
-import org.apache.spark.sql.catalyst.plans.JoinType
+import org.apache.spark.sql.catalyst.plans.{JoinType, LeftOuter, RightOuter}
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.execution.{ProjectExec, SparkPlan}
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
@@ -375,13 +375,10 @@ object VeloxBackendSettings extends BackendSettingsApi {
   } else {
 t match {
   // OPPRO-266: For Velox backend, build right and left are both 
supported for
-  // LeftOuter and LeftSemi.
-  // FIXME Hongze 22/12/06
-  //  HashJoin.scala in shim was not always loaded by class loader.
-  //  The file should be removed and we temporarily disable the 
improvement
-  //  introduced by OPPRO-266 by commenting out the following 
prerequisite
-  //  condition.
-//  case LeftOuter | LeftSemi => true
+  // LeftOuter.
+  // TODO: Support LeftSemi after resolve issue
+  // https://github.com/facebookincubator/velox/issues/9980
+  case LeftOuter => true
   case _ => false
 }
   }
@@ -393,12 +390,7 @@ object VeloxBackendSettings extends BackendSettingsApi {
   } else {
 t match {
   // OPPRO-266: For Velox backend, build right and left are both 
supported for RightOuter.
-  // FIXME Hongze 22/12/06
-  //  HashJoin.scala in shim was not always loaded by class loader.
-  //  The file should be removed and we temporarily disable the 
improvement
-  //  introduced by OPPRO-266 by commenting out the following 
prerequisite
-  //  condition.
-//  case RightOuter => true
+  case RightOuter => true
   case _ => false
 }
   }
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/extension/StrategyOverrides.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/extension/StrategyOverrides.scala
index d016eaccc..f2f786259 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/extension/StrategyOverrides.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/extension/StrategyOverrides.scala
@@ -111,7 +111,9 @@ case class JoinSelectionOverrides(session: SparkSession)
   // it don't use this side as the build side
   (!leftHintMergeEnabled, !rightHintMergeEnabled)
 } else {
-  (canBuildShuffledHashJoinLeft(joinType), 
canBuildShuffledHashJoinRight(joinType))
+  (
+
BackendsApiManager.getSettings.supportHashBuildJoinTypeOnLeft(joinType),
+
BackendsApiManager.getSettings.supportHashBuildJoinTypeOnRight(joinType))
 }

(incubator-gluten) branch main updated: [VL] Enable SortShuffleSuite with ColumnarShuffleManager (#5816)

2024-05-29 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new 588faae35 [VL] Enable SortShuffleSuite with ColumnarShuffleManager 
(#5816)
588faae35 is described below

commit 588faae351bff29d868336f530aa72eb99a57083
Author: Ankita Victor 
AuthorDate: Wed May 29 18:33:29 2024 +0530

[VL] Enable SortShuffleSuite with ColumnarShuffleManager (#5816)
---
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  1 +
 .../gluten/utils/velox/VeloxTestSettings.scala |  2 ++
 .../org/apache/spark/GlutenSortShuffleSuite.scala  | 24 ++
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  1 +
 .../gluten/utils/velox/VeloxTestSettings.scala |  2 ++
 .../org/apache/spark/GlutenSortShuffleSuite.scala  | 24 ++
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  1 +
 .../gluten/utils/velox/VeloxTestSettings.scala |  2 ++
 .../org/apache/spark/GlutenSortShuffleSuite.scala  | 24 ++
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  1 +
 .../gluten/utils/velox/VeloxTestSettings.scala |  2 ++
 .../org/apache/spark/GlutenSortShuffleSuite.scala  | 24 ++
 12 files changed, 108 insertions(+)

diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index afc427cd3..2c34baa63 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -68,6 +68,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
 false // nativeDoValidate failed due to spark conf cleanup
   case "GlutenDataSourceV2SQLSuite" =>
 false // nativeDoValidate failed due to spark conf cleanup
+  case "GlutenSortShuffleSuite" => false
   case _ => true
 }
 preCheck && super.shouldRun(suiteName, testName)
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index c78d8230e..664cd37d1 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -18,6 +18,7 @@ package org.apache.gluten.utils.velox
 
 import org.apache.gluten.utils.{BackendTestSettings, SQLQueryTestSettings}
 
+import org.apache.spark.GlutenSortShuffleSuite
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.expressions._
 import 
org.apache.spark.sql.connector.{GlutenDataSourceV2DataFrameSessionCatalogSuite, 
GlutenDataSourceV2DataFrameSuite, GlutenDataSourceV2FunctionSuite, 
GlutenDataSourceV2SQLSessionCatalogSuite, GlutenDataSourceV2SQLSuite, 
GlutenDataSourceV2Suite, GlutenFileDataSourceV2FallBackSuite, 
GlutenLocalScanSuite, GlutenSupportsCatalogOptionsSuite, 
GlutenTableCapabilityCheckSuite, GlutenWriteDistributionAndOrderingSuite}
@@ -229,6 +230,7 @@ class VeloxTestSettings extends BackendTestSettings {
 // Spark round UT for round(3.1415,3) is not correct.
 .exclude("round/bround")
   enableSuite[GlutenMathFunctionsSuite]
+  enableSuite[GlutenSortShuffleSuite]
   enableSuite[GlutenSortOrderExpressionsSuite]
   enableSuite[GlutenBitwiseExpressionsSuite]
   enableSuite[GlutenStringExpressionsSuite]
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/spark/GlutenSortShuffleSuite.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/spark/GlutenSortShuffleSuite.scala
new file mode 100644
index 0..338d7992e
--- /dev/null
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/spark/GlutenSortShuffleSuite.scala
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark
+
+class GlutenSortShuffl

(incubator-gluten) branch main updated: [VL] Daily Update Velox Version (2024_05_27) (#5872)

2024-05-27 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new e1469f0c3 [VL] Daily Update Velox Version (2024_05_27) (#5872)
e1469f0c3 is described below

commit e1469f0c3859c9dd5f0edb9d2f5890794a540cee
Author: Gluten Performance Bot 
<137994563+glutenperf...@users.noreply.github.com>
AuthorDate: Mon May 27 14:35:46 2024 +0800

[VL] Daily Update Velox Version (2024_05_27) (#5872)
---
 ep/build-velox/src/get_velox.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh
index 6c25c8f08..b71a7ad47 100755
--- a/ep/build-velox/src/get_velox.sh
+++ b/ep/build-velox/src/get_velox.sh
@@ -17,7 +17,7 @@
 set -exu
 
 VELOX_REPO=https://github.com/oap-project/velox.git
-VELOX_BRANCH=2024_05_24
+VELOX_BRANCH=2024_05_27
 VELOX_HOME=""
 
 #Set on run gluten on HDFS


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



(incubator-gluten) branch main updated: [VL] Daily Update Velox Version (2024_05_24) (#5860)

2024-05-23 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new aeea6498e [VL] Daily Update Velox Version (2024_05_24) (#5860)
aeea6498e is described below

commit aeea6498ece025004d71e1090141a25d39771245
Author: Gluten Performance Bot 
<137994563+glutenperf...@users.noreply.github.com>
AuthorDate: Fri May 24 11:40:08 2024 +0800

[VL] Daily Update Velox Version (2024_05_24) (#5860)
---
 ep/build-velox/src/get_velox.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh
index af032e186..6c25c8f08 100755
--- a/ep/build-velox/src/get_velox.sh
+++ b/ep/build-velox/src/get_velox.sh
@@ -17,7 +17,7 @@
 set -exu
 
 VELOX_REPO=https://github.com/oap-project/velox.git
-VELOX_BRANCH=2024_05_23
+VELOX_BRANCH=2024_05_24
 VELOX_HOME=""
 
 #Set on run gluten on HDFS


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



(incubator-gluten) branch main updated: [GLUTEN-4039][VL] Support width_bucket function (#5634)

2024-05-23 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new ec3f6b318 [GLUTEN-4039][VL] Support width_bucket function (#5634)
ec3f6b318 is described below

commit ec3f6b318ef09d1f697997efdbb24e75bd2e0835
Author: 高阳阳 
AuthorDate: Fri May 24 09:27:04 2024 +0800

[GLUTEN-4039][VL] Support width_bucket function (#5634)
---
 .../org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala | 7 +++
 .../scala/org/apache/gluten/expression/ExpressionMappings.scala| 1 +
 .../main/scala/org/apache/gluten/expression/ExpressionNames.scala  | 1 +
 3 files changed, 9 insertions(+)

diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
index 3180842ad..f9ec07619 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
@@ -594,6 +594,13 @@ class ScalarFunctionsValidateSuite extends 
FunctionsValidateTest {
 }
   }
 
+  testWithSpecifiedSparkVersion("Test width_bucket function", Some("3.4")) {
+runQueryAndCompare("""SELECT width_bucket(2, 0, 4, 3), l_orderkey
+ | from lineitem limit 100""".stripMargin) {
+  checkGlutenOperatorMatch[ProjectExecTransformer]
+}
+  }
+
   testWithSpecifiedSparkVersion("Test url_decode function", Some("3.4")) {
 withTempPath {
   path =>
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
index c734967de..14371a71e 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
@@ -275,6 +275,7 @@ object ExpressionMappings {
 Sig[PromotePrecision](PROMOTE_PRECISION),
 Sig[MonotonicallyIncreasingID](MONOTONICALLY_INCREASING_ID),
 Sig[SparkPartitionID](SPARK_PARTITION_ID),
+Sig[WidthBucket](WIDTH_BUCKET),
 // Decimal
 Sig[UnscaledValue](UNSCALED_VALUE),
 // Generator function
diff --git 
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
 
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
index eded85e06..6e6502c19 100644
--- 
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
+++ 
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
@@ -302,6 +302,7 @@ object ExpressionNames {
   final val PROMOTE_PRECISION = "promote_precision"
   final val SPARK_PARTITION_ID = "spark_partition_id"
   final val MONOTONICALLY_INCREASING_ID = "monotonically_increasing_id"
+  final val WIDTH_BUCKET = "width_bucket"
 
   // Directly use child expression transformer
   final val KNOWN_NULLABLE = "known_nullable"


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



(incubator-gluten) branch main updated: [VL] Enable NaN tests for array functions (#5854)

2024-05-23 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new 9a38eba11 [VL] Enable NaN tests for array functions (#5854)
9a38eba11 is described below

commit 9a38eba11aaab68d7e0722a1029fe7412acbbfb1
Author: Rui Mo 
AuthorDate: Fri May 24 08:45:46 2024 +0800

[VL] Enable NaN tests for array functions (#5854)
---
 .../scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala | 6 --
 .../scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala | 6 --
 .../scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala | 6 --
 .../scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala | 6 --
 4 files changed, 24 deletions(-)

diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 5e3591203..c78d8230e 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -202,12 +202,6 @@ class VeloxTestSettings extends BackendTestSettings {
 // Rewrite in Gluten to replace Seq with Array
 .exclude("Shuffle")
 .excludeGlutenTest("Shuffle")
-// TODO: ArrayDistinct should handle duplicated Double.NaN
-.excludeByPrefix("SPARK-36741")
-// TODO: ArrayIntersect should handle duplicated Double.NaN
-.excludeByPrefix("SPARK-36754")
-// Not supported case.
-.exclude("SPARK-36753: ArrayExcept should handle duplicated Double.NaN and 
Float.Nan")
   enableSuite[GlutenDateExpressionsSuite]
 // Rewrite because Spark collect causes long overflow.
 .exclude("TIMESTAMP_MICROS")
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 1d796aa1b..3b32cebca 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -115,12 +115,6 @@ class VeloxTestSettings extends BackendTestSettings {
 // Rewrite in Gluten to replace Seq with Array
 .exclude("Shuffle")
 .excludeGlutenTest("Shuffle")
-// TODO: ArrayDistinct should handle duplicated Double.NaN
-.excludeByPrefix("SPARK-36741")
-// TODO: ArrayIntersect should handle duplicated Double.NaN
-.excludeByPrefix("SPARK-36754")
-// Not supported case.
-.exclude("SPARK-36753: ArrayExcept should handle duplicated Double.NaN and 
Float.Nan")
   enableSuite[GlutenConditionalExpressionSuite]
   enableSuite[GlutenDateExpressionsSuite]
 // Has exception in fallback execution when we use resultDF.collect in 
evaluation.
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 7c8509f80..3a993189d 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -96,12 +96,6 @@ class VeloxTestSettings extends BackendTestSettings {
 // Rewrite in Gluten to replace Seq with Array
 .exclude("Shuffle")
 .excludeGlutenTest("Shuffle")
-// TODO: ArrayDistinct should handle duplicated Double.NaN
-.excludeByPrefix("SPARK-36741")
-// TODO: ArrayIntersect should handle duplicated Double.NaN
-.excludeByPrefix("SPARK-36754")
-// Not supported case.
-.exclude("SPARK-36753: ArrayExcept should handle duplicated Double.NaN and 
Float.Nan")
   enableSuite[GlutenConditionalExpressionSuite]
   enableSuite[GlutenDateExpressionsSuite]
 // Has exception in fallback execution when we use resultDF.collect in 
evaluation.
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 76b666779..98942462a 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -97,12 +97,6 @@ class VeloxTestSettings extends BackendTestSettings {
 // Rewrite in Gluten to replace Seq with Array
 .exclude("Shuffle")
 .excludeGlutenTest("Shuffle")
-// TODO: ArrayDistinct should handle duplicated Double.NaN
-.exclude

(incubator-gluten) branch main updated: [VL] Enable rand function (#5829)

2024-05-23 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new 33eadbfa6 [VL] Enable rand function (#5829)
33eadbfa6 is described below

commit 33eadbfa62e988aeef43c1c888abf9b601de4cab
Author: 高阳阳 
AuthorDate: Thu May 23 20:10:54 2024 +0800

[VL] Enable rand function (#5829)
---
 .../clickhouse/CHSparkPlanExecApi.scala|7 -
 .../execution/ScalarFunctionsValidateSuite.scala   |6 +
 .../functions/RegistrationAllFunctions.cc  |4 +
 cpp/velox/substrait/SubstraitParser.cc |3 +
 docs/velox-backend-support-progress.md |2 +
 .../gluten/backendsapi/SparkPlanExecApi.scala  |7 -
 .../gluten/expression/ExpressionConverter.scala|5 -
 .../expression/UnaryExpressionTransformer.scala|   22 -
 .../sql-tests/inputs/group-by-ordinal.sql  |   96 ++
 .../src/test/resources/sql-tests/inputs/random.sql |   17 +
 .../sql-tests/results/group-by-ordinal.sql.out |  398 
 .../resources/sql-tests/results/group-by.sql.out   |2 +-
 .../resources/sql-tests/results/random.sql.out |   84 ++
 .../sql-tests/inputs/group-by-ordinal.sql  |   96 ++
 .../src/test/resources/sql-tests/inputs/random.sql |   17 +
 .../sql-tests/results/group-by-ordinal.sql.out |  398 
 .../resources/sql-tests/results/group-by.sql.out   |2 +-
 .../resources/sql-tests/results/random.sql.out |   84 ++
 .../sql-tests/inputs/group-by-ordinal.sql  |   96 ++
 .../src/test/resources/sql-tests/inputs/random.sql |   17 +
 .../sql-tests/results/group-by-ordinal.sql.out |  523 ++
 .../resources/sql-tests/results/group-by.sql.out   |2 +-
 .../resources/sql-tests/results/random.sql.out |  115 +++
 .../sql-tests/inputs/group-by-ordinal.sql  |   96 ++
 .../src/test/resources/sql-tests/inputs/random.sql |   17 +
 .../sql-tests/inputs/table-valued-functions.sql|  126 +++
 .../sql-tests/results/group-by-ordinal.sql.out |  524 ++
 .../resources/sql-tests/results/group-by.sql.out   |2 +-
 .../resources/sql-tests/results/random.sql.out |  115 +++
 .../results/table-valued-functions.sql.out | 1017 
 .../gluten/utils/velox/VeloxTestSettings.scala |1 +
 .../spark/sql/GlutenGeneratorFunctionSuite.scala   |   11 +-
 32 files changed, 3866 insertions(+), 46 deletions(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
index 6a154cd94..8c2b20db6 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala
@@ -387,13 +387,6 @@ class CHSparkPlanExecApi extends SparkPlanExecApi {
   original: GetMapValue): ExpressionTransformer =
 GetMapValueTransformer(substraitExprName, left, right, 
original.failOnError, original)
 
-  override def genRandTransformer(
-  substraitExprName: String,
-  explicitSeed: ExpressionTransformer,
-  original: Rand): ExpressionTransformer = {
-GenericExpressionTransformer(substraitExprName, Seq(explicitSeed), 
original)
-  }
-
   /**
* Generate ShuffleDependency for ColumnarShuffleExchangeExec.
*
diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
index b3753ab83..3180842ad 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
@@ -717,6 +717,12 @@ class ScalarFunctionsValidateSuite extends 
FunctionsValidateTest {
 }
   }
 
+  test("Test rand function") {
+runQueryAndCompare("""SELECT rand() from lineitem limit 
100""".stripMargin, false) {
+  checkGlutenOperatorMatch[ProjectExecTransformer]
+}
+  }
+
   test("regexp_replace") {
 runQueryAndCompare(
   "SELECT regexp_replace(c_comment, '\\w', 'something') FROM customer 
limit 50") {
diff --git a/cpp/velox/operators/functions/RegistrationAllFunctions.cc 
b/cpp/velox/operators/functions/RegistrationAllFunctions.cc
index 5a6b0f6aa..b88d781b6 100644
--- a/cpp/velox/operators/functions/RegistrationAllFunctions.cc
+++ b/cpp/velox/operators/functions/RegistrationAllFunctions.cc
@@ -27,6 +27,7 @@
 #include "velox/functions/prestosql/window/WindowFunctionsRegistration.h"
 #include "velox/functions/sparksql/Bitwise.h"
 #include "velox/fu

(incubator-gluten) branch main updated: [VL] Upgrade cmake version to 3.28.3 in CI image (#5842)

2024-05-22 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new 7c777be25 [VL] Upgrade cmake version to 3.28.3 in CI image (#5842)
7c777be25 is described below

commit 7c777be25fcd549dd53653986c6d40bd6cdcb965
Author: Yuan 
AuthorDate: Thu May 23 08:40:04 2024 +0800

[VL] Upgrade cmake version to 3.28.3 in CI image (#5842)
---
 .github/workflows/velox_docker.yml | 2 +-
 dev/ci-velox-buildstatic.sh| 2 +-
 dev/vcpkg/init.sh  | 3 +++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/velox_docker.yml 
b/.github/workflows/velox_docker.yml
index 88c6c2a24..d7644b5d0 100644
--- a/.github/workflows/velox_docker.yml
+++ b/.github/workflows/velox_docker.yml
@@ -49,7 +49,7 @@ concurrency:
 jobs:
   build-native-lib-centos-7:
 runs-on: ubuntu-20.04
-container: apache/gluten:gluten-vcpkg-builder_2024_03_17 # centos7 with 
dependencies installed
+container: apache/gluten:gluten-vcpkg-builder_2024_05_22 # centos7 with 
dependencies installed
 steps:
   - uses: actions/checkout@v2
   - name: Generate cache key
diff --git a/dev/ci-velox-buildstatic.sh b/dev/ci-velox-buildstatic.sh
index a9b9d2c3f..208490d1c 100755
--- a/dev/ci-velox-buildstatic.sh
+++ b/dev/ci-velox-buildstatic.sh
@@ -2,7 +2,7 @@ yum install sudo patch java-1.8.0-openjdk-devel -y
 cd $GITHUB_WORKSPACE/ep/build-velox/src
 ./get_velox.sh
 source /opt/rh/devtoolset-9/enable
-source $GITHUB_WORKSPACE//dev/vcpkg/env.sh
+source /opt/gluten/dev/vcpkg/env.sh
 cd $GITHUB_WORKSPACE/
 sed -i '/^headers/d' ep/build-velox/build/velox_ep/CMakeLists.txt
 export NUM_THREADS=4
diff --git a/dev/vcpkg/init.sh b/dev/vcpkg/init.sh
index 141543af4..e69aec94a 100755
--- a/dev/vcpkg/init.sh
+++ b/dev/vcpkg/init.sh
@@ -16,6 +16,9 @@ if [ ! -d "$VCPKG_ROOT" ] || [ -z "$(ls "$VCPKG_ROOT")" ]; 
then
 fi
 [ -f "$VCPKG" ] || "$VCPKG_ROOT/bootstrap-vcpkg.sh" -disableMetrics
 
+sed -i "s/3.27.1/3.28.3/g" $VCPKG_ROOT/scripts/vcpkgTools.xml
+sed -i 
"s/192374a68e2971f04974a194645726196d9b8ee7abd650d1e6f65f7aa2ccc9b186c3edb473bb4958c764532edcdd42f4182ee1fcb86b17d78b0bcd6305ce3df1/bd311ca835ef0914952f21d70d1753564d58de2ede02e80ede96e78cd2f40b4189e006007643ebb37792e13edd97eb4a33810bc8aca1eab6dd428eaffe1d2e38/g"
 $VCPKG_ROOT/scripts/vcpkgTools.xml
+
 $VCPKG install --no-print-usage \
 --triplet="${VCPKG_TRIPLET}" --host-triplet="${VCPKG_TRIPLET}"
 


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



(incubator-gluten) branch main updated: [VL] Daily Update Velox Version (2024_05_22) (#5834)

2024-05-22 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new a7e536ebb [VL] Daily Update Velox Version (2024_05_22) (#5834)
a7e536ebb is described below

commit a7e536ebb11a685381bf8a799f16f42789b7bc43
Author: Gluten Performance Bot 
<137994563+glutenperf...@users.noreply.github.com>
AuthorDate: Wed May 22 18:59:31 2024 +0800

[VL] Daily Update Velox Version (2024_05_22) (#5834)
---
 cpp/velox/benchmarks/PlanValidatorUtil.cc   | 4 ++--
 cpp/velox/compute/WholeStageResultIterator.cc   | 2 +-
 cpp/velox/jni/VeloxJniWrapper.cc| 4 ++--
 cpp/velox/tests/Substrait2VeloxPlanValidatorTest.cc | 2 +-
 ep/build-velox/src/get_velox.sh | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/cpp/velox/benchmarks/PlanValidatorUtil.cc 
b/cpp/velox/benchmarks/PlanValidatorUtil.cc
index e299b4620..46f2733f2 100644
--- a/cpp/velox/benchmarks/PlanValidatorUtil.cc
+++ b/cpp/velox/benchmarks/PlanValidatorUtil.cc
@@ -45,9 +45,9 @@ int main(int argc, char** argv) {
   conf.insert({kDebugModeEnabled, "true"});
   initVeloxBackend(conf);
   std::unordered_map 
configs{{core::QueryConfig::kSparkPartitionId, "0"}};
-  core::QueryCtx queryCtx(nullptr, core::QueryConfig(configs));
+  auto queryCtx = core::QueryCtx::create(nullptr, core::QueryConfig(configs));
   auto pool = defaultLeafVeloxMemoryPool().get();
-  core::ExecCtx execCtx(pool, );
+  core::ExecCtx execCtx(pool, queryCtx.get());
 
   ::substrait::Plan subPlan;
   parseProtobuf(reinterpret_cast(plan.data()), plan.size(), 
);
diff --git a/cpp/velox/compute/WholeStageResultIterator.cc 
b/cpp/velox/compute/WholeStageResultIterator.cc
index 852c7e3cc..f719c119c 100644
--- a/cpp/velox/compute/WholeStageResultIterator.cc
+++ b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -169,7 +169,7 @@ std::shared_ptr 
WholeStageResultIterator::createNewVeloxQ
   std::unordered_map> 
connectorConfigs;
   connectorConfigs[kHiveConnectorId] = createConnectorConfig();
 
-  std::shared_ptr ctx = 
std::make_shared(
+  std::shared_ptr ctx = velox::core::QueryCtx::create(
   nullptr,
   facebook::velox::core::QueryConfig{getQueryContextConf()},
   connectorConfigs,
diff --git a/cpp/velox/jni/VeloxJniWrapper.cc b/cpp/velox/jni/VeloxJniWrapper.cc
index 7884280c3..9da7355d1 100644
--- a/cpp/velox/jni/VeloxJniWrapper.cc
+++ b/cpp/velox/jni/VeloxJniWrapper.cc
@@ -120,10 +120,10 @@ 
Java_org_apache_gluten_vectorized_PlanEvaluatorJniWrapper_nativeValidateWithFail
   // A query context with dummy configs. Used for function validation.
   std::unordered_map configs{
   {velox::core::QueryConfig::kSparkPartitionId, "0"}, 
{velox::core::QueryConfig::kSessionTimezone, "GMT"}};
-  velox::core::QueryCtx queryCtx(nullptr, velox::core::QueryConfig(configs));
+  auto queryCtx = velox::core::QueryCtx::create(nullptr, 
velox::core::QueryConfig(configs));
   auto pool = gluten::defaultLeafVeloxMemoryPool().get();
   // An execution context used for function validation.
-  velox::core::ExecCtx execCtx(pool, );
+  velox::core::ExecCtx execCtx(pool, queryCtx.get());
 
   gluten::SubstraitToVeloxPlanValidator planValidator(pool, );
   jclass infoCls = 
env->FindClass("Lorg/apache/gluten/validate/NativePlanValidationInfo;");
diff --git a/cpp/velox/tests/Substrait2VeloxPlanValidatorTest.cc 
b/cpp/velox/tests/Substrait2VeloxPlanValidatorTest.cc
index d5eafa1e2..0a957f038 100644
--- a/cpp/velox/tests/Substrait2VeloxPlanValidatorTest.cc
+++ b/cpp/velox/tests/Substrait2VeloxPlanValidatorTest.cc
@@ -46,7 +46,7 @@ class Substrait2VeloxPlanValidatorTest : public 
exec::test::HiveConnectorTestBas
   }
 
   bool validatePlan(::substrait::Plan& plan) {
-std::shared_ptr queryCtx = 
std::make_shared();
+auto queryCtx = core::QueryCtx::create();
 
 // An execution context used for function validation.
 std::unique_ptr execCtx = 
std::make_unique(pool_.get(), queryCtx.get());
diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh
index 70b3a9b09..fbb0f7067 100755
--- a/ep/build-velox/src/get_velox.sh
+++ b/ep/build-velox/src/get_velox.sh
@@ -17,7 +17,7 @@
 set -exu
 
 VELOX_REPO=https://github.com/oap-project/velox.git
-VELOX_BRANCH=2024_05_21
+VELOX_BRANCH=2024_05_22
 VELOX_HOME=""
 
 #Set on run gluten on HDFS


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



(incubator-gluten) branch main updated: [VL] Daily Update Velox Version (2024_05_21) (#5819)

2024-05-20 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new ebd9f9f96 [VL] Daily Update Velox Version (2024_05_21) (#5819)
ebd9f9f96 is described below

commit ebd9f9f96aff26e56d16a76d994e357c1880c6da
Author: Gluten Performance Bot 
<137994563+glutenperf...@users.noreply.github.com>
AuthorDate: Tue May 21 11:59:16 2024 +0800

[VL] Daily Update Velox Version (2024_05_21) (#5819)
---
 ep/build-velox/src/get_velox.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh
index c37933d10..70b3a9b09 100755
--- a/ep/build-velox/src/get_velox.sh
+++ b/ep/build-velox/src/get_velox.sh
@@ -17,7 +17,7 @@
 set -exu
 
 VELOX_REPO=https://github.com/oap-project/velox.git
-VELOX_BRANCH=2024_05_20
+VELOX_BRANCH=2024_05_21
 VELOX_HOME=""
 
 #Set on run gluten on HDFS


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



(incubator-gluten) branch main updated: [VL] Daily Update Velox Version (2024_05_20) (#5807)

2024-05-20 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new 864c6bb66 [VL] Daily Update Velox Version (2024_05_20) (#5807)
864c6bb66 is described below

commit 864c6bb84dac673038beb227579ed7eb0e6a
Author: Gluten Performance Bot 
<137994563+glutenperf...@users.noreply.github.com>
AuthorDate: Mon May 20 19:27:43 2024 +0800

[VL] Daily Update Velox Version (2024_05_20) (#5807)
---
 ep/build-velox/src/get_velox.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh
index 33a82ca57..c37933d10 100755
--- a/ep/build-velox/src/get_velox.sh
+++ b/ep/build-velox/src/get_velox.sh
@@ -17,7 +17,7 @@
 set -exu
 
 VELOX_REPO=https://github.com/oap-project/velox.git
-VELOX_BRANCH=2024_05_17
+VELOX_BRANCH=2024_05_20
 VELOX_HOME=""
 
 #Set on run gluten on HDFS


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



(incubator-gluten) branch main updated: [VL] Daily Update Velox Version (2024_05_17) (#5781)

2024-05-18 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new be760ee6e [VL] Daily Update Velox Version (2024_05_17) (#5781)
be760ee6e is described below

commit be760ee6e2f8346f679af1f43dc94e029c5579a3
Author: Rui Mo 
AuthorDate: Sat May 18 16:05:49 2024 +0800

[VL] Daily Update Velox Version (2024_05_17) (#5781)
---
 cpp/velox/compute/WholeStageResultIterator.cc | 12 ++--
 cpp/velox/compute/WholeStageResultIterator.h  |  1 +
 ep/build-velox/src/get_velox.sh   |  2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/cpp/velox/compute/WholeStageResultIterator.cc 
b/cpp/velox/compute/WholeStageResultIterator.cc
index 06a7a7c39..852c7e3cc 100644
--- a/cpp/velox/compute/WholeStageResultIterator.cc
+++ b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -72,6 +72,11 @@ WholeStageResultIterator::WholeStageResultIterator(
   gluten::updateHdfsTokens(veloxCfg_.get());
 #endif
   spillStrategy_ = veloxCfg_->get(kSpillStrategy, 
kSpillStrategyDefaultValue);
+  auto spillThreadNum = veloxCfg_->get(kSpillThreadNum, 
kSpillThreadNumDefaultValue);
+  if (spillThreadNum > 0) {
+spillExecutor_ = 
std::make_shared(spillThreadNum);
+  }
+
   getOrderedNodeIds(veloxPlan_, orderedNodeIds_);
 
   // Create task instance.
@@ -164,18 +169,13 @@ std::shared_ptr 
WholeStageResultIterator::createNewVeloxQ
   std::unordered_map> 
connectorConfigs;
   connectorConfigs[kHiveConnectorId] = createConnectorConfig();
 
-  auto spillThreadNum = veloxCfg_->get(kSpillThreadNum, 
kSpillThreadNumDefaultValue);
-  std::shared_ptr spillExecutor = nullptr;
-  if (spillThreadNum > 0) {
-spillExecutor = 
std::make_shared(spillThreadNum);
-  }
   std::shared_ptr ctx = 
std::make_shared(
   nullptr,
   facebook::velox::core::QueryConfig{getQueryContextConf()},
   connectorConfigs,
   gluten::VeloxBackend::get()->getAsyncDataCache(),
   memoryManager_->getAggregateMemoryPool(),
-  std::move(spillExecutor),
+  spillExecutor_.get(),
   "");
   return ctx;
 }
diff --git a/cpp/velox/compute/WholeStageResultIterator.h 
b/cpp/velox/compute/WholeStageResultIterator.h
index 0ad3877ff..5e661f404 100644
--- a/cpp/velox/compute/WholeStageResultIterator.h
+++ b/cpp/velox/compute/WholeStageResultIterator.h
@@ -110,6 +110,7 @@ class WholeStageResultIterator : public 
ColumnarBatchIterator {
 
   /// Spill.
   std::string spillStrategy_;
+  std::shared_ptr spillExecutor_ = nullptr;
 
   /// Metrics
   std::unique_ptr metrics_{};
diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh
index 17a0b3796..33a82ca57 100755
--- a/ep/build-velox/src/get_velox.sh
+++ b/ep/build-velox/src/get_velox.sh
@@ -17,7 +17,7 @@
 set -exu
 
 VELOX_REPO=https://github.com/oap-project/velox.git
-VELOX_BRANCH=2024_05_16
+VELOX_BRANCH=2024_05_17
 VELOX_HOME=""
 
 #Set on run gluten on HDFS


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



(incubator-gluten) branch main updated: [VL] Daily Update Velox Version (2024_05_16) (#5756)

2024-05-16 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new 0d4258d78 [VL] Daily Update Velox Version (2024_05_16) (#5756)
0d4258d78 is described below

commit 0d4258d7848a9349aba5ec143c503407ba8f50be
Author: Gluten Performance Bot 
<137994563+glutenperf...@users.noreply.github.com>
AuthorDate: Thu May 16 19:48:42 2024 +0800

[VL] Daily Update Velox Version (2024_05_16) (#5756)
---
 .github/workflows/velox_docker_cache.yml | 78 
 ep/build-velox/src/get_velox.sh  |  2 +-
 2 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/.github/workflows/velox_docker_cache.yml 
b/.github/workflows/velox_docker_cache.yml
index ec95f48a2..cbc24384d 100644
--- a/.github/workflows/velox_docker_cache.yml
+++ b/.github/workflows/velox_docker_cache.yml
@@ -84,42 +84,42 @@ jobs:
 with:
   path: '${{ env.CCACHE_DIR }}'
   key: ccache-ubuntu-release-default
-  ccache-native-lib-centos-velox-ut:
-runs-on: ubuntu-20.04
-env:
-  CCACHE_DIR: "${{ github.workspace }}/.ccache"
-container: ghcr.io/facebookincubator/velox-dev:circleci-avx
-steps:
-  - uses: actions/checkout@v2
-  - name: Setup java and maven
-run: |
-  yum install sudo patch java-1.8.0-openjdk-devel wget -y && \
-  wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
-  tar -xvf apache-maven-3.8.8-bin.tar.gz
-  mv apache-maven-3.8.8 /usr/lib/maven
-  - name: Get Ccache
-uses: actions/cache/restore@v3
-with:
-  path: '${{ env.CCACHE_DIR }}'
-  key: ccache-centos-release-default
-  - name: Ensure Cache Dirs Exists
-working-directory: ${{ github.workspace }}
-run: |
-  mkdir -p '${{ env.CCACHE_DIR }}'
-  - name: Build Gluten velox third party
-run: |
-  rm -rf /opt/miniconda-for-velox/
-  cd ep/build-velox/src && \
-  ./get_velox.sh
-  cd ../build/velox_ep/
-  source /opt/rh/gcc-toolset-9/enable
-  make EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_PARQUET=ON 
-DVELOX_BUILD_TESTING=ON -DVELOX_BUILD_TEST_UTILS=ON" 
-
-  - name: CCache after
-run: |
-  ccache -s
-
-  - uses: actions/cache/save@v3
-with:
-  path: '${{ env.CCACHE_DIR }}'
-  key: ccache-centos-release-default
\ No newline at end of file
+#  ccache-native-lib-centos-velox-ut:
+#runs-on: ubuntu-20.04
+#env:
+#  CCACHE_DIR: "${{ github.workspace }}/.ccache"
+#container: ghcr.io/facebookincubator/velox-dev:circleci-avx
+#steps:
+#  - uses: actions/checkout@v2
+#  - name: Setup java and maven
+#run: |
+#  yum install sudo patch java-1.8.0-openjdk-devel wget -y && \
+#  wget 
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
+#  tar -xvf apache-maven-3.8.8-bin.tar.gz
+#  mv apache-maven-3.8.8 /usr/lib/maven
+#  - name: Get Ccache
+#uses: actions/cache/restore@v3
+#with:
+#  path: '${{ env.CCACHE_DIR }}'
+#  key: ccache-centos-release-default
+#  - name: Ensure Cache Dirs Exists
+#working-directory: ${{ github.workspace }}
+#run: |
+#  mkdir -p '${{ env.CCACHE_DIR }}'
+#  - name: Build Gluten velox third party
+#run: |
+#  rm -rf /opt/miniconda-for-velox/
+#  cd ep/build-velox/src && \
+#  ./get_velox.sh
+#  cd ../build/velox_ep/
+#  source /opt/rh/gcc-toolset-9/enable
+#  make EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_PARQUET=ON 
-DVELOX_BUILD_TESTING=ON -DVELOX_BUILD_TEST_UTILS=ON"
+#
+#  - name: CCache after
+#run: |
+#  ccache -s
+#
+#  - uses: actions/cache/save@v3
+#with:
+#  path: '${{ env.CCACHE_DIR }}'
+#  key: ccache-centos-release-default
\ No newline at end of file
diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh
index 497befbe6..17a0b3796 100755
--- a/ep/build-velox/src/get_velox.sh
+++ b/ep/build-velox/src/get_velox.sh
@@ -17,7 +17,7 @@
 set -exu
 
 VELOX_REPO=https://github.com/oap-project/velox.git
-VELOX_BRANCH=2024_05_15
+VELOX_BRANCH=2024_05_16
 VELOX_HOME=""
 
 #Set on run gluten on HDFS


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



(incubator-gluten) branch main updated: [VL] Enable length function for binary type (#5761)

2024-05-15 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new cb02cdb0a [VL] Enable length function for binary type (#5761)
cb02cdb0a is described below

commit cb02cdb0a4095a8f194e62268147182afd48821c
Author: Zhen Li <10524738+zhli1142...@users.noreply.github.com>
AuthorDate: Thu May 16 13:51:29 2024 +0800

[VL] Enable length function for binary type (#5761)
---
 .../apache/gluten/execution/ScalarFunctionsValidateSuite.scala| 8 
 cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc  | 6 --
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
index 834e172f8..e88e9699a 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
@@ -977,4 +977,12 @@ class ScalarFunctionsValidateSuite extends 
FunctionsValidateTest {
 }
 }
   }
+
+  test("length") {
+runQueryAndCompare(
+  "select length(c_comment), length(cast(c_comment as binary))" +
+" from customer limit 50") {
+  checkGlutenOperatorMatch[ProjectExecTransformer]
+}
+  }
 }
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc 
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
index fc8b912e0..51f39a3ab 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -191,12 +191,6 @@ bool SubstraitToVeloxPlanValidator::validateScalarFunction(
 return validateRound(scalarFunction, inputType);
   } else if (name == "extract") {
 return validateExtractExpr(params);
-  } else if (name == "char_length") {
-VELOX_CHECK(types.size() == 1);
-if (types[0] == "vbin") {
-  LOG_VALIDATION_MSG("Binary type is not supported in " + name);
-  return false;
-}
   } else if (name == "map_from_arrays") {
 LOG_VALIDATION_MSG("map_from_arrays is not supported.");
 return false;


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



(incubator-gluten) branch main updated: [VL] Daily Update Velox Version (2024_05_15) (#5748)

2024-05-15 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new 888e1e244 [VL] Daily Update Velox Version (2024_05_15) (#5748)
888e1e244 is described below

commit 888e1e24403a7d42a936586bc4563e143769ae17
Author: Gluten Performance Bot 
<137994563+glutenperf...@users.noreply.github.com>
AuthorDate: Thu May 16 13:34:13 2024 +0800

[VL] Daily Update Velox Version (2024_05_15) (#5748)
---
 ep/build-velox/src/get_velox.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh
index d1e6054d8..497befbe6 100755
--- a/ep/build-velox/src/get_velox.sh
+++ b/ep/build-velox/src/get_velox.sh
@@ -17,7 +17,7 @@
 set -exu
 
 VELOX_REPO=https://github.com/oap-project/velox.git
-VELOX_BRANCH=2024_05_14
+VELOX_BRANCH=2024_05_15
 VELOX_HOME=""
 
 #Set on run gluten on HDFS


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



(incubator-gluten) branch main updated (8ade7a9cb -> 731c17ea4)

2024-04-25 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


from 8ade7a9cb [VL] Use slice instead of resize in ensureFlattened (#5523)
 add 731c17ea4  [GLUTEN-5532] Code clean up for GlutenPlugin (#5533)

No new revisions were added by this update.

Summary of changes:
 .../main/scala/org/apache/gluten/GlutenPlugin.scala   | 19 ---
 1 file changed, 19 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



(incubator-gluten) branch main updated: [VL] Support regr_sxx and regr_syy aggregate functions for Spark 3.4 (#5444)

2024-04-18 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new 628763fc3 [VL] Support regr_sxx and regr_syy aggregate functions for 
Spark 3.4 (#5444)
628763fc3 is described below

commit 628763fc3a9f471e4b2c25c1d07efc968857be16
Author: Joey 
AuthorDate: Fri Apr 19 13:02:15 2024 +0800

[VL] Support regr_sxx and regr_syy aggregate functions for Spark 3.4 (#5444)
---
 .../execution/VeloxAggregateFunctionsSuite.scala   | 38 --
 .../substrait/SubstraitToVeloxPlanValidator.cc |  3 +-
 docs/velox-backend-support-progress.md |  3 ++
 .../apache/gluten/expression/ExpressionNames.scala |  1 +
 .../gluten/sql/shims/spark34/Spark34Shims.scala|  3 +-
 5 files changed, 43 insertions(+), 5 deletions(-)

diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala
index 2573725a7..df0817410 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala
@@ -432,14 +432,46 @@ abstract class VeloxAggregateFunctionsSuite extends 
VeloxWholeStageTransformerSu
   }
 
   // Disable for Sparke3.5.
-  testWithSpecifiedSparkVersion("regr_sxy", Some("3.4"), Some("3.4")) {
+  testWithSpecifiedSparkVersion("regr_sxy regr_sxx regr_syy", Some("3.4"), 
Some("3.4")) {
 runQueryAndCompare("""
- |select regr_sxy(l_partkey, l_suppkey) from lineitem;
+ |select regr_sxy(l_quantity, l_tax) from lineitem;
  |""".stripMargin) {
   checkGlutenOperatorMatch[HashAggregateExecTransformer]
 }
 runQueryAndCompare(
-  "select regr_sxy(l_partkey, l_suppkey), count(distinct l_orderkey) from 
lineitem") {
+  "select regr_sxy(l_quantity, l_tax), count(distinct l_orderkey) from 
lineitem") {
+  df =>
+{
+  assert(
+getExecutedPlan(df).count(
+  plan => {
+plan.isInstanceOf[HashAggregateExecTransformer]
+  }) == 4)
+}
+}
+runQueryAndCompare("""
+ |select regr_sxx(l_quantity, l_tax) from lineitem;
+ |""".stripMargin) {
+  checkGlutenOperatorMatch[HashAggregateExecTransformer]
+}
+runQueryAndCompare(
+  "select regr_sxx(l_quantity, l_tax), count(distinct l_orderkey) from 
lineitem") {
+  df =>
+{
+  assert(
+getExecutedPlan(df).count(
+  plan => {
+plan.isInstanceOf[HashAggregateExecTransformer]
+  }) == 4)
+}
+}
+runQueryAndCompare("""
+ |select regr_syy(l_quantity, l_tax) from lineitem;
+ |""".stripMargin) {
+  checkGlutenOperatorMatch[HashAggregateExecTransformer]
+}
+runQueryAndCompare(
+  "select regr_syy(l_quantity, l_tax), count(distinct l_orderkey) from 
lineitem") {
   df =>
 {
   assert(
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc 
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
index f992b94c3..2a5857ae9 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -1106,7 +1106,8 @@ bool SubstraitToVeloxPlanValidator::validate(const 
::substrait::AggregateRel& ag
   "kurtosis",
   "regr_slope",
   "regr_intercept",
-  "regr_sxy"};
+  "regr_sxy",
+  "regr_replacement"};
 
   auto udfFuncs = UdfLoader::getInstance()->getRegisteredUdafNames();
 
diff --git a/docs/velox-backend-support-progress.md 
b/docs/velox-backend-support-progress.md
index 5e81081b7..4b480529e 100644
--- a/docs/velox-backend-support-progress.md
+++ b/docs/velox-backend-support-progress.md
@@ -384,6 +384,9 @@ Gluten supports 199 functions. (Drag to right to see all 
data types)
 | regr_r2   | regr_r2| regr_r2 
  | S  |  | |  | S | S   | S| S 
| S  |  |   || |  ||  | 
  | || |
 | regr_intercept| regr_intercept | regr_intercept  
  | S  |  | |  | S | S   | S| S 
| S

(incubator-gluten) branch main updated: [VL] Fix negative buffer size (#5441)

2024-04-17 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new 74c54f39d [VL] Fix negative buffer size (#5441)
74c54f39d is described below

commit 74c54f39d92967fc45733a6270ceabfcedd3866b
Author: WangGuangxin 
AuthorDate: Thu Apr 18 10:42:06 2024 +0800

[VL] Fix negative buffer size (#5441)
---
 .../scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala| 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala
index 05d663d05..f1807fe4f 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala
@@ -153,7 +153,7 @@ object RowToVeloxColumnarExec {
 }
 val rowLength = new ListBuffer[Long]()
 var rowCount = 0
-var offset = 0
+var offset = 0L
 val sizeInBytes = row.getSizeInBytes
 // allocate buffer based on 1st row, but if first row is very big, 
this will cause OOM
 // maybe we should optimize to list ArrayBuf to native to avoid buf 
close and allocate
@@ -182,7 +182,7 @@ object RowToVeloxColumnarExec {
 val unsafeRow = convertToUnsafeRow(row)
 val sizeInBytes = unsafeRow.getSizeInBytes
 if ((offset + sizeInBytes) > arrowBuf.capacity()) {
-  val tmpBuf = arrowAllocator.buffer(((offset + sizeInBytes) * 
2).toLong)
+  val tmpBuf = arrowAllocator.buffer((offset + sizeInBytes) * 2)
   tmpBuf.setBytes(0, arrowBuf, 0, offset)
   arrowBuf.close()
   arrowBuf = tmpBuf


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



(incubator-gluten) branch main updated: [VL] Fix kParquetWriteTimestampUnit to kParquetWriteTimestampUnitSession (#5281)

2024-04-10 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new d36b76957 [VL] Fix kParquetWriteTimestampUnit to 
kParquetWriteTimestampUnitSession (#5281)
d36b76957 is described below

commit d36b76957cf2133d4ca801603808e4c15d0c759d
Author: Yang Zhang 
AuthorDate: Thu Apr 11 09:05:05 2024 +0800

[VL] Fix kParquetWriteTimestampUnit to kParquetWriteTimestampUnitSession 
(#5281)
---
 cpp/velox/compute/VeloxBackend.cc | 4 ++--
 cpp/velox/compute/WholeStageResultIterator.cc | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/cpp/velox/compute/VeloxBackend.cc 
b/cpp/velox/compute/VeloxBackend.cc
index 6e010ec18..8f1cab48b 100644
--- a/cpp/velox/compute/VeloxBackend.cc
+++ b/cpp/velox/compute/VeloxBackend.cc
@@ -258,8 +258,7 @@ void VeloxBackend::initCache(const std::shared_ptr& conf) {
-  int32_t ioThreads = conf->get(kVeloxIOThreads, 
kVeloxIOThreadsDefault);
-
+  // The configs below are used at process level.
   auto mutableConf = 
std::make_shared(conf->valuesCopy());
 
   auto hiveConf = getHiveConfig(conf);
@@ -303,6 +302,7 @@ void VeloxBackend::initConnector(const 
std::shared_ptrget(kCachePrefetchMinPct, 0);
 
+  auto ioThreads = conf->get(kVeloxIOThreads, kVeloxIOThreadsDefault);
   if (ioThreads > 0) {
 ioExecutor_ = std::make_unique(ioThreads);
   }
diff --git a/cpp/velox/compute/WholeStageResultIterator.cc 
b/cpp/velox/compute/WholeStageResultIterator.cc
index 89b77ac85..e105a0d64 100644
--- a/cpp/velox/compute/WholeStageResultIterator.cc
+++ b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -559,12 +559,13 @@ std::unordered_map 
WholeStageResultIterator::getQueryC
 }
 
 std::shared_ptr 
WholeStageResultIterator::createConnectorConfig() {
+  // The configs below are used at session level.
   std::unordered_map configs = {};
   // The semantics of reading as lower case is opposite with case-sensitive.
   
configs[velox::connector::hive::HiveConfig::kFileColumnNamesReadAsLowerCaseSession]
 =
   !veloxCfg_->get(kCaseSensitive, false) ? "true" : "false";
   
configs[velox::connector::hive::HiveConfig::kPartitionPathAsLowerCaseSession] = 
"false";
-  configs[velox::connector::hive::HiveConfig::kParquetWriteTimestampUnit] = 
"6";
+  
configs[velox::connector::hive::HiveConfig::kParquetWriteTimestampUnitSession] 
= "6";
   configs[velox::connector::hive::HiveConfig::kMaxPartitionsPerWritersSession] 
=
   std::to_string(veloxCfg_->get(kMaxPartitions, 1));
   configs[velox::connector::hive::HiveConfig::kIgnoreMissingFilesSession] =


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



(incubator-gluten) branch main updated: [VL] Restore the test cases for corr in group-by.sql and udf-group-by.sql (#5175)

2024-03-29 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new ae41b54dc [VL] Restore the test cases for corr in group-by.sql and 
udf-group-by.sql (#5175)
ae41b54dc is described below

commit ae41b54dcaacb7fde8e7bac7689ff7797ad9fd06
Author: Joey 
AuthorDate: Fri Mar 29 14:35:04 2024 +0800

[VL] Restore the test cases for corr in group-by.sql and udf-group-by.sql 
(#5175)
---
 .../test/resources/sql-tests/inputs/group-by.sql   |  4 
 .../sql-tests/inputs/udf/udf-group-by.sql  |  4 
 .../resources/sql-tests/results/group-by.sql.out   |  9 
 .../sql-tests/results/udf/udf-group-by.sql.out |  9 
 .../utils/velox/VeloxSQLQueryTestSettings.scala|  4 ++--
 .../test/resources/sql-tests/inputs/group-by.sql   |  6 +
 .../sql-tests/inputs/udf/udf-group-by.sql  |  4 
 .../resources/sql-tests/results/group-by.sql.out   | 27 ++
 .../sql-tests/results/udf/udf-group-by.sql.out |  9 
 .../utils/velox/VeloxSQLQueryTestSettings.scala|  5 ++--
 .../test/resources/sql-tests/inputs/group-by.sql   |  4 
 .../sql-tests/inputs/udf/udf-group-by.sql  |  4 
 .../resources/sql-tests/results/group-by.sql.out   | 10 
 .../sql-tests/results/udf/udf-group-by.sql.out | 10 
 .../utils/velox/VeloxSQLQueryTestSettings.scala|  3 ++-
 15 files changed, 106 insertions(+), 6 deletions(-)

diff --git a/gluten-ut/spark32/src/test/resources/sql-tests/inputs/group-by.sql 
b/gluten-ut/spark32/src/test/resources/sql-tests/inputs/group-by.sql
index 4b2e12975..e2c3672a2 100644
--- a/gluten-ut/spark32/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/gluten-ut/spark32/src/test/resources/sql-tests/inputs/group-by.sql
@@ -75,6 +75,10 @@ SELECT 1 from (
 ) b
 where b.z != b.z;
 
+-- SPARK-24369 multiple distinct aggregations having the same argument set
+SELECT corr(DISTINCT x, y), corr(DISTINCT y, x), count(*)
+  FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y);
+
 -- SPARK-25708 HAVING without GROUP BY means global aggregate
 SELECT 1 FROM range(10) HAVING true;
 
diff --git 
a/gluten-ut/spark32/src/test/resources/sql-tests/inputs/udf/udf-group-by.sql 
b/gluten-ut/spark32/src/test/resources/sql-tests/inputs/udf/udf-group-by.sql
index a4df72f44..0cc57c97b 100644
--- a/gluten-ut/spark32/src/test/resources/sql-tests/inputs/udf/udf-group-by.sql
+++ b/gluten-ut/spark32/src/test/resources/sql-tests/inputs/udf/udf-group-by.sql
@@ -71,6 +71,10 @@ SELECT 1 from (
 ) b
 where b.z != b.z;
 
+-- SPARK-24369 multiple distinct aggregations having the same argument set
+SELECT corr(DISTINCT x, y), udf(corr(DISTINCT y, x)), count(*)
+  FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y);
+
 -- SPARK-25708 HAVING without GROUP BY means global aggregate
 SELECT udf(1) FROM range(10) HAVING true;
 
diff --git 
a/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by.sql.out 
b/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by.sql.out
index 8986ca9b0..79e6f72df 100644
--- a/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by.sql.out
@@ -243,6 +243,15 @@ struct<1:int>
 
 
 
+-- !query
+SELECT corr(DISTINCT x, y), corr(DISTINCT y, x), count(*)
+  FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y)
+-- !query schema
+struct
+-- !query output
+0. 0.  3
+
+
 -- !query
 SELECT 1 FROM range(10) HAVING true
 -- !query schema
diff --git 
a/gluten-ut/spark32/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
 
b/gluten-ut/spark32/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
index 26d55d341..986815c97 100644
--- 
a/gluten-ut/spark32/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
+++ 
b/gluten-ut/spark32/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
@@ -243,6 +243,15 @@ struct<1:int>
 
 
 
+-- !query
+SELECT corr(DISTINCT x, y), udf(corr(DISTINCT y, x)), count(*)
+  FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y)
+-- !query schema
+struct
+-- !query output
+0. 0.  3
+
+
 -- !query
 SELECT udf(1) FROM range(10) HAVING true
 -- !query schema
diff --git 
a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala
 
b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala
index 4464dbefd..9ec55f015 100644
--- 
a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala
+++ 
b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala
@@ -230,9 +230,9 @@ object VeloxSQLQueryTestSettings extends 
SQLQueryTestSettings {
 
   val OVERWRITE_SQL_QUERY_LIST: Set[String] = Set(
 // Ve

(incubator-gluten) branch main updated: [VL] Enable SPARK-10634 timestamp test case (#5090)

2024-03-26 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new b962e7cc7 [VL] Enable SPARK-10634 timestamp test case (#5090)
b962e7cc7 is described below

commit b962e7cc74f7a7114770e9a882f10d5eaa59a355
Author: Joey 
AuthorDate: Wed Mar 27 09:32:41 2024 +0800

[VL] Enable SPARK-10634 timestamp test case (#5090)
---
 .../src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala | 2 --
 .../src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala | 2 --
 .../src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala | 2 --
 3 files changed, 6 deletions(-)

diff --git 
a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala
index 5f66df1a0..2d92c5ca2 100644
--- 
a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala
@@ -857,7 +857,6 @@ class VeloxTestSettings extends BackendTestSettings {
 // decimal failed ut
 .exclude("SPARK-34212 Parquet should read decimals correctly")
 // Timestamp is read as INT96.
-.exclude("SPARK-10634 timestamp written and read as INT64 - truncation")
 .exclude("Migration from INT96 to TIMESTAMP_MICROS timestamp type")
 .exclude("SPARK-10365 timestamp written and read as INT64 - 
TIMESTAMP_MICROS")
 // Rewrite because the filter after datasource is not needed.
@@ -869,7 +868,6 @@ class VeloxTestSettings extends BackendTestSettings {
 // decimal failed ut
 .exclude("SPARK-34212 Parquet should read decimals correctly")
 // Timestamp is read as INT96.
-.exclude("SPARK-10634 timestamp written and read as INT64 - truncation")
 .exclude("Migration from INT96 to TIMESTAMP_MICROS timestamp type")
 .exclude("SPARK-10365 timestamp written and read as INT64 - 
TIMESTAMP_MICROS")
 // Rewrite because the filter after datasource is not needed.
diff --git 
a/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala
index f2e75f84f..dd14a604b 100644
--- 
a/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala
@@ -682,7 +682,6 @@ class VeloxTestSettings extends BackendTestSettings {
 // decimal failed ut
 .exclude("SPARK-34212 Parquet should read decimals correctly")
 // Timestamp is read as INT96.
-.exclude("SPARK-10634 timestamp written and read as INT64 - truncation")
 .exclude("Migration from INT96 to TIMESTAMP_MICROS timestamp type")
 .exclude("SPARK-10365 timestamp written and read as INT64 - 
TIMESTAMP_MICROS")
 .exclude("SPARK-36182: read TimestampNTZ as TimestampLTZ")
@@ -698,7 +697,6 @@ class VeloxTestSettings extends BackendTestSettings {
 // decimal failed ut
 .exclude("SPARK-34212 Parquet should read decimals correctly")
 // Timestamp is read as INT96.
-.exclude("SPARK-10634 timestamp written and read as INT64 - truncation")
 .exclude("Migration from INT96 to TIMESTAMP_MICROS timestamp type")
 .exclude("SPARK-10365 timestamp written and read as INT64 - 
TIMESTAMP_MICROS")
 .exclude("SPARK-36182: read TimestampNTZ as TimestampLTZ")
diff --git 
a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala
index 1c37e787b..d2555007b 100644
--- 
a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala
@@ -668,7 +668,6 @@ class VeloxTestSettings extends BackendTestSettings {
 // decimal failed ut
 .exclude("SPARK-34212 Parquet should read decimals correctly")
 // Timestamp is read as INT96.
-.exclude("SPARK-10634 timestamp written and read as INT64 - truncation")
 .exclude("Migration from INT96 to TIMESTAMP_MICROS timestamp type")
 .exclude("SPARK-10365 timestamp written and read as INT64 - 
TIMESTAMP_MICROS")
 .exclude("SPARK-36182: read TimestampNTZ as TimestampLTZ")
@@ -684,7 +683,6 @@ class VeloxTestSettings extends BackendTestSettings {
 // decimal failed ut
 .exclude("SPARK-34212 Parquet should read decimals correctly")
 // Timestamp is read as INT96.
-.exc

(incubator-gluten) branch main updated: [GLUTEN-4946][CH] Fix avg(bigint) overflow (#5048)

2024-03-24 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new e9034ff5e [GLUTEN-4946][CH] Fix avg(bigint) overflow (#5048)
e9034ff5e is described below

commit e9034ff5e2ec4cce8cd5defaf2ade9b44b8c8aa3
Author: loudongfeng 
AuthorDate: Mon Mar 25 12:55:00 2024 +0800

[GLUTEN-4946][CH] Fix avg(bigint) overflow (#5048)
---
 .../clickhouse/CHSparkPlanExecApi.scala|  2 +
 .../catalyst/CHAggregateFunctionRewriteRule.scala  | 60 ++
 .../execution/GlutenFunctionValidateSuite.scala| 21 
 .../main/scala/io/glutenproject/GlutenConfig.scala |  8 +++
 4 files changed, 91 insertions(+)

diff --git 
a/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHSparkPlanExecApi.scala
 
b/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHSparkPlanExecApi.scala
index 29af5a0e5..4b6ee1909 100644
--- 
a/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHSparkPlanExecApi.scala
+++ 
b/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHSparkPlanExecApi.scala
@@ -35,6 +35,7 @@ import org.apache.spark.serializer.Serializer
 import org.apache.spark.shuffle.{GenShuffleWriterParameters, 
GlutenShuffleWriterWrapper, HashPartitioningWrapper}
 import org.apache.spark.shuffle.utils.CHShuffleUtil
 import org.apache.spark.sql.{SparkSession, Strategy}
+import org.apache.spark.sql.catalyst.CHAggregateFunctionRewriteRule
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions._
@@ -518,6 +519,7 @@ class CHSparkPlanExecApi extends SparkPlanExecApi {
   override def genExtendedOptimizers(): List[SparkSession => 
Rule[LogicalPlan]] = {
 List(
   spark => new CommonSubexpressionEliminateRule(spark, 
spark.sessionState.conf),
+  spark => CHAggregateFunctionRewriteRule(spark),
   _ => CountDistinctWithoutExpand
 )
   }
diff --git 
a/backends-clickhouse/src/main/scala/org/apache/spark/sql/catalyst/CHAggregateFunctionRewriteRule.scala
 
b/backends-clickhouse/src/main/scala/org/apache/spark/sql/catalyst/CHAggregateFunctionRewriteRule.scala
new file mode 100644
index 0..623db7993
--- /dev/null
+++ 
b/backends-clickhouse/src/main/scala/org/apache/spark/sql/catalyst/CHAggregateFunctionRewriteRule.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst
+
+import io.glutenproject.GlutenConfig
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.expressions.Cast
+import 
org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, 
Average}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.types._
+
+/**
+ * Avg(Int) function: CH use input type for intermediate sum type, while spark 
use double so need
+ * convert .
+ * @param spark
+ */
+case class CHAggregateFunctionRewriteRule(spark: SparkSession) extends 
Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp 
{
+case a: Aggregate =>
+  a.transformExpressions {
+case avgExpr @ AggregateExpression(avg: Average, _, _, _, _)
+if GlutenConfig.getConf.enableCastAvgAggregateFunction &&
+  GlutenConfig.getConf.enableColumnarHashAgg &&
+  !avgExpr.isDistinct && isDataTypeNeedConvert(avg.child.dataType) 
=>
+  AggregateExpression(
+avg.copy(child = Cast(avg.child, DoubleType)),
+avgExpr.mode,
+avgExpr.isDistinct,
+avgExpr.filter,
+avgExpr.resultId
+  )
+  }
+  }
+
+  private def isDataTypeNeedConvert(dataType: DataType): Boolean = {
+dataType match {
+  case FloatType => true
+  case IntegerType => true
+  case L

(incubator-gluten) branch main updated: [VL] Add large precision tests for decimal sum and avg (#4961)

2024-03-18 Thread rui
This is an automated email from the ASF dual-hosted git repository.

rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
 new 94e1638c5 [VL] Add large precision tests for decimal sum and avg 
(#4961)
94e1638c5 is described below

commit 94e1638c543b1397b709e2fe5ad0717223053c80
Author: Joey 
AuthorDate: Tue Mar 19 13:11:44 2024 +0800

[VL] Add large precision tests for decimal sum and avg (#4961)
---
 .../execution/VeloxAggregateFunctionsSuite.scala   | 27 ++
 1 file changed, 27 insertions(+)

diff --git 
a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxAggregateFunctionsSuite.scala
 
b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxAggregateFunctionsSuite.scala
index 26bea5b1c..c0143d0ae 100644
--- 
a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxAggregateFunctionsSuite.scala
+++ 
b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxAggregateFunctionsSuite.scala
@@ -99,6 +99,19 @@ abstract class VeloxAggregateFunctionsSuite extends 
VeloxWholeStageTransformerSu
   }) == 4)
 }
 }
+// Test the situation that precision + 4 of input decimal value exceeds 38.
+runQueryAndCompare(
+  "select avg(cast (l_quantity as DECIMAL(36, 2))), " +
+"count(distinct l_partkey) from lineitem") {
+  df =>
+{
+  assert(
+getExecutedPlan(df).count(
+  plan => {
+plan.isInstanceOf[HashAggregateExecTransformer]
+  }) == 4)
+}
+}
   }
 
   test("sum") {
@@ -142,6 +155,20 @@ abstract class VeloxAggregateFunctionsSuite extends 
VeloxWholeStageTransformerSu
   }) == 4)
 }
 }
+
+// Test the situation that precision + 4 of input decimal value exceeds 38.
+runQueryAndCompare(
+  "select sum(cast (l_quantity as DECIMAL(36, 2))), " +
+"count(distinct l_partkey) from lineitem") {
+  df =>
+{
+  assert(
+getExecutedPlan(df).count(
+  plan => {
+plan.isInstanceOf[HashAggregateExecTransformer]
+  }) == 4)
+}
+}
   }
 
   test("min and max") {


-
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org



[no subject]

2024-02-26 Thread Rui Mo