(incubator-gluten) branch main updated: [CORE] Use the smaller table to build hashmap in shuffled hash join (#5750)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new f48b9fa09 [CORE] Use the smaller table to build hashmap in shuffled hash join (#5750) f48b9fa09 is described below commit f48b9fa09f6421f861a22d8027d2cb81767f4e5c Author: Mingliang Zhu AuthorDate: Fri May 31 11:12:28 2024 +0800 [CORE] Use the smaller table to build hashmap in shuffled hash join (#5750) --- .../gluten/backendsapi/velox/VeloxBackend.scala| 20 +- .../gluten/extension/StrategyOverrides.scala | 12 +- .../extension/columnar/OffloadSingleNode.scala | 43 +- .../extension/columnar/TransformHintRule.scala | 31 +- .../gluten/planner/cost/GlutenCostModel.scala | 6 - .../sql/execution/joins/HashJoin.scala.deprecated | 774 - .../sql/execution/joins/HashJoin.scala.deprecated | 774 - .../sql/execution/joins/HashJoin.scala.deprecated | 774 - .../sql/execution/joins/HashJoin.scala.deprecated | 774 - 9 files changed, 41 insertions(+), 3167 deletions(-) diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala index c8dbfb29e..f06929fff 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala @@ -29,7 +29,7 @@ import org.apache.gluten.substrait.rel.LocalFilesNode.ReadFileFormat.{DwrfReadFo import org.apache.spark.sql.catalyst.catalog.BucketSpec import org.apache.spark.sql.catalyst.expressions.{Alias, CumeDist, DenseRank, Descending, Expression, Lag, Lead, Literal, MakeYMInterval, NamedExpression, NthValue, NTile, PercentRank, Rand, RangeFrame, Rank, RowNumber, SortOrder, SparkPartitionID, SpecialFrameBoundary, SpecifiedWindowFrame, Uuid} import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, ApproximatePercentile, Count, Sum} -import org.apache.spark.sql.catalyst.plans.JoinType +import org.apache.spark.sql.catalyst.plans.{JoinType, LeftOuter, RightOuter} import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.execution.{ProjectExec, SparkPlan} import org.apache.spark.sql.execution.aggregate.HashAggregateExec @@ -375,13 +375,10 @@ object VeloxBackendSettings extends BackendSettingsApi { } else { t match { // OPPRO-266: For Velox backend, build right and left are both supported for - // LeftOuter and LeftSemi. - // FIXME Hongze 22/12/06 - // HashJoin.scala in shim was not always loaded by class loader. - // The file should be removed and we temporarily disable the improvement - // introduced by OPPRO-266 by commenting out the following prerequisite - // condition. -// case LeftOuter | LeftSemi => true + // LeftOuter. + // TODO: Support LeftSemi after resolve issue + // https://github.com/facebookincubator/velox/issues/9980 + case LeftOuter => true case _ => false } } @@ -393,12 +390,7 @@ object VeloxBackendSettings extends BackendSettingsApi { } else { t match { // OPPRO-266: For Velox backend, build right and left are both supported for RightOuter. - // FIXME Hongze 22/12/06 - // HashJoin.scala in shim was not always loaded by class loader. - // The file should be removed and we temporarily disable the improvement - // introduced by OPPRO-266 by commenting out the following prerequisite - // condition. -// case RightOuter => true + case RightOuter => true case _ => false } } diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/StrategyOverrides.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/StrategyOverrides.scala index d016eaccc..f2f786259 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/extension/StrategyOverrides.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/extension/StrategyOverrides.scala @@ -111,7 +111,9 @@ case class JoinSelectionOverrides(session: SparkSession) // it don't use this side as the build side (!leftHintMergeEnabled, !rightHintMergeEnabled) } else { - (canBuildShuffledHashJoinLeft(joinType), canBuildShuffledHashJoinRight(joinType)) + ( + BackendsApiManager.getSettings.supportHashBuildJoinTypeOnLeft(joinType), + BackendsApiManager.getSettings.supportHashBuildJoinTypeOnRight(joinType)) }
(incubator-gluten) branch main updated: [VL] Enable SortShuffleSuite with ColumnarShuffleManager (#5816)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 588faae35 [VL] Enable SortShuffleSuite with ColumnarShuffleManager (#5816) 588faae35 is described below commit 588faae351bff29d868336f530aa72eb99a57083 Author: Ankita Victor AuthorDate: Wed May 29 18:33:29 2024 +0530 [VL] Enable SortShuffleSuite with ColumnarShuffleManager (#5816) --- .../utils/clickhouse/ClickHouseTestSettings.scala | 1 + .../gluten/utils/velox/VeloxTestSettings.scala | 2 ++ .../org/apache/spark/GlutenSortShuffleSuite.scala | 24 ++ .../utils/clickhouse/ClickHouseTestSettings.scala | 1 + .../gluten/utils/velox/VeloxTestSettings.scala | 2 ++ .../org/apache/spark/GlutenSortShuffleSuite.scala | 24 ++ .../utils/clickhouse/ClickHouseTestSettings.scala | 1 + .../gluten/utils/velox/VeloxTestSettings.scala | 2 ++ .../org/apache/spark/GlutenSortShuffleSuite.scala | 24 ++ .../utils/clickhouse/ClickHouseTestSettings.scala | 1 + .../gluten/utils/velox/VeloxTestSettings.scala | 2 ++ .../org/apache/spark/GlutenSortShuffleSuite.scala | 24 ++ 12 files changed, 108 insertions(+) diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala index afc427cd3..2c34baa63 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala @@ -68,6 +68,7 @@ class ClickHouseTestSettings extends BackendTestSettings { false // nativeDoValidate failed due to spark conf cleanup case "GlutenDataSourceV2SQLSuite" => false // nativeDoValidate failed due to spark conf cleanup + case "GlutenSortShuffleSuite" => false case _ => true } preCheck && super.shouldRun(suiteName, testName) diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index c78d8230e..664cd37d1 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -18,6 +18,7 @@ package org.apache.gluten.utils.velox import org.apache.gluten.utils.{BackendTestSettings, SQLQueryTestSettings} +import org.apache.spark.GlutenSortShuffleSuite import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.connector.{GlutenDataSourceV2DataFrameSessionCatalogSuite, GlutenDataSourceV2DataFrameSuite, GlutenDataSourceV2FunctionSuite, GlutenDataSourceV2SQLSessionCatalogSuite, GlutenDataSourceV2SQLSuite, GlutenDataSourceV2Suite, GlutenFileDataSourceV2FallBackSuite, GlutenLocalScanSuite, GlutenSupportsCatalogOptionsSuite, GlutenTableCapabilityCheckSuite, GlutenWriteDistributionAndOrderingSuite} @@ -229,6 +230,7 @@ class VeloxTestSettings extends BackendTestSettings { // Spark round UT for round(3.1415,3) is not correct. .exclude("round/bround") enableSuite[GlutenMathFunctionsSuite] + enableSuite[GlutenSortShuffleSuite] enableSuite[GlutenSortOrderExpressionsSuite] enableSuite[GlutenBitwiseExpressionsSuite] enableSuite[GlutenStringExpressionsSuite] diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/GlutenSortShuffleSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/GlutenSortShuffleSuite.scala new file mode 100644 index 0..338d7992e --- /dev/null +++ b/gluten-ut/spark32/src/test/scala/org/apache/spark/GlutenSortShuffleSuite.scala @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark + +class GlutenSortShuffl
(incubator-gluten) branch main updated: [VL] Daily Update Velox Version (2024_05_27) (#5872)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new e1469f0c3 [VL] Daily Update Velox Version (2024_05_27) (#5872) e1469f0c3 is described below commit e1469f0c3859c9dd5f0edb9d2f5890794a540cee Author: Gluten Performance Bot <137994563+glutenperf...@users.noreply.github.com> AuthorDate: Mon May 27 14:35:46 2024 +0800 [VL] Daily Update Velox Version (2024_05_27) (#5872) --- ep/build-velox/src/get_velox.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh index 6c25c8f08..b71a7ad47 100755 --- a/ep/build-velox/src/get_velox.sh +++ b/ep/build-velox/src/get_velox.sh @@ -17,7 +17,7 @@ set -exu VELOX_REPO=https://github.com/oap-project/velox.git -VELOX_BRANCH=2024_05_24 +VELOX_BRANCH=2024_05_27 VELOX_HOME="" #Set on run gluten on HDFS - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [VL] Daily Update Velox Version (2024_05_24) (#5860)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new aeea6498e [VL] Daily Update Velox Version (2024_05_24) (#5860) aeea6498e is described below commit aeea6498ece025004d71e1090141a25d39771245 Author: Gluten Performance Bot <137994563+glutenperf...@users.noreply.github.com> AuthorDate: Fri May 24 11:40:08 2024 +0800 [VL] Daily Update Velox Version (2024_05_24) (#5860) --- ep/build-velox/src/get_velox.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh index af032e186..6c25c8f08 100755 --- a/ep/build-velox/src/get_velox.sh +++ b/ep/build-velox/src/get_velox.sh @@ -17,7 +17,7 @@ set -exu VELOX_REPO=https://github.com/oap-project/velox.git -VELOX_BRANCH=2024_05_23 +VELOX_BRANCH=2024_05_24 VELOX_HOME="" #Set on run gluten on HDFS - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [GLUTEN-4039][VL] Support width_bucket function (#5634)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new ec3f6b318 [GLUTEN-4039][VL] Support width_bucket function (#5634) ec3f6b318 is described below commit ec3f6b318ef09d1f697997efdbb24e75bd2e0835 Author: 高阳阳 AuthorDate: Fri May 24 09:27:04 2024 +0800 [GLUTEN-4039][VL] Support width_bucket function (#5634) --- .../org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala | 7 +++ .../scala/org/apache/gluten/expression/ExpressionMappings.scala| 1 + .../main/scala/org/apache/gluten/expression/ExpressionNames.scala | 1 + 3 files changed, 9 insertions(+) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala index 3180842ad..f9ec07619 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala @@ -594,6 +594,13 @@ class ScalarFunctionsValidateSuite extends FunctionsValidateTest { } } + testWithSpecifiedSparkVersion("Test width_bucket function", Some("3.4")) { +runQueryAndCompare("""SELECT width_bucket(2, 0, 4, 3), l_orderkey + | from lineitem limit 100""".stripMargin) { + checkGlutenOperatorMatch[ProjectExecTransformer] +} + } + testWithSpecifiedSparkVersion("Test url_decode function", Some("3.4")) { withTempPath { path => diff --git a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala index c734967de..14371a71e 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala @@ -275,6 +275,7 @@ object ExpressionMappings { Sig[PromotePrecision](PROMOTE_PRECISION), Sig[MonotonicallyIncreasingID](MONOTONICALLY_INCREASING_ID), Sig[SparkPartitionID](SPARK_PARTITION_ID), +Sig[WidthBucket](WIDTH_BUCKET), // Decimal Sig[UnscaledValue](UNSCALED_VALUE), // Generator function diff --git a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala index eded85e06..6e6502c19 100644 --- a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala +++ b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala @@ -302,6 +302,7 @@ object ExpressionNames { final val PROMOTE_PRECISION = "promote_precision" final val SPARK_PARTITION_ID = "spark_partition_id" final val MONOTONICALLY_INCREASING_ID = "monotonically_increasing_id" + final val WIDTH_BUCKET = "width_bucket" // Directly use child expression transformer final val KNOWN_NULLABLE = "known_nullable" - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [VL] Enable NaN tests for array functions (#5854)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 9a38eba11 [VL] Enable NaN tests for array functions (#5854) 9a38eba11 is described below commit 9a38eba11aaab68d7e0722a1029fe7412acbbfb1 Author: Rui Mo AuthorDate: Fri May 24 08:45:46 2024 +0800 [VL] Enable NaN tests for array functions (#5854) --- .../scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala | 6 -- .../scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala | 6 -- .../scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala | 6 -- .../scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala | 6 -- 4 files changed, 24 deletions(-) diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 5e3591203..c78d8230e 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -202,12 +202,6 @@ class VeloxTestSettings extends BackendTestSettings { // Rewrite in Gluten to replace Seq with Array .exclude("Shuffle") .excludeGlutenTest("Shuffle") -// TODO: ArrayDistinct should handle duplicated Double.NaN -.excludeByPrefix("SPARK-36741") -// TODO: ArrayIntersect should handle duplicated Double.NaN -.excludeByPrefix("SPARK-36754") -// Not supported case. -.exclude("SPARK-36753: ArrayExcept should handle duplicated Double.NaN and Float.Nan") enableSuite[GlutenDateExpressionsSuite] // Rewrite because Spark collect causes long overflow. .exclude("TIMESTAMP_MICROS") diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 1d796aa1b..3b32cebca 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -115,12 +115,6 @@ class VeloxTestSettings extends BackendTestSettings { // Rewrite in Gluten to replace Seq with Array .exclude("Shuffle") .excludeGlutenTest("Shuffle") -// TODO: ArrayDistinct should handle duplicated Double.NaN -.excludeByPrefix("SPARK-36741") -// TODO: ArrayIntersect should handle duplicated Double.NaN -.excludeByPrefix("SPARK-36754") -// Not supported case. -.exclude("SPARK-36753: ArrayExcept should handle duplicated Double.NaN and Float.Nan") enableSuite[GlutenConditionalExpressionSuite] enableSuite[GlutenDateExpressionsSuite] // Has exception in fallback execution when we use resultDF.collect in evaluation. diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 7c8509f80..3a993189d 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -96,12 +96,6 @@ class VeloxTestSettings extends BackendTestSettings { // Rewrite in Gluten to replace Seq with Array .exclude("Shuffle") .excludeGlutenTest("Shuffle") -// TODO: ArrayDistinct should handle duplicated Double.NaN -.excludeByPrefix("SPARK-36741") -// TODO: ArrayIntersect should handle duplicated Double.NaN -.excludeByPrefix("SPARK-36754") -// Not supported case. -.exclude("SPARK-36753: ArrayExcept should handle duplicated Double.NaN and Float.Nan") enableSuite[GlutenConditionalExpressionSuite] enableSuite[GlutenDateExpressionsSuite] // Has exception in fallback execution when we use resultDF.collect in evaluation. diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 76b666779..98942462a 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -97,12 +97,6 @@ class VeloxTestSettings extends BackendTestSettings { // Rewrite in Gluten to replace Seq with Array .exclude("Shuffle") .excludeGlutenTest("Shuffle") -// TODO: ArrayDistinct should handle duplicated Double.NaN -.exclude
(incubator-gluten) branch main updated: [VL] Enable rand function (#5829)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 33eadbfa6 [VL] Enable rand function (#5829) 33eadbfa6 is described below commit 33eadbfa62e988aeef43c1c888abf9b601de4cab Author: 高阳阳 AuthorDate: Thu May 23 20:10:54 2024 +0800 [VL] Enable rand function (#5829) --- .../clickhouse/CHSparkPlanExecApi.scala|7 - .../execution/ScalarFunctionsValidateSuite.scala |6 + .../functions/RegistrationAllFunctions.cc |4 + cpp/velox/substrait/SubstraitParser.cc |3 + docs/velox-backend-support-progress.md |2 + .../gluten/backendsapi/SparkPlanExecApi.scala |7 - .../gluten/expression/ExpressionConverter.scala|5 - .../expression/UnaryExpressionTransformer.scala| 22 - .../sql-tests/inputs/group-by-ordinal.sql | 96 ++ .../src/test/resources/sql-tests/inputs/random.sql | 17 + .../sql-tests/results/group-by-ordinal.sql.out | 398 .../resources/sql-tests/results/group-by.sql.out |2 +- .../resources/sql-tests/results/random.sql.out | 84 ++ .../sql-tests/inputs/group-by-ordinal.sql | 96 ++ .../src/test/resources/sql-tests/inputs/random.sql | 17 + .../sql-tests/results/group-by-ordinal.sql.out | 398 .../resources/sql-tests/results/group-by.sql.out |2 +- .../resources/sql-tests/results/random.sql.out | 84 ++ .../sql-tests/inputs/group-by-ordinal.sql | 96 ++ .../src/test/resources/sql-tests/inputs/random.sql | 17 + .../sql-tests/results/group-by-ordinal.sql.out | 523 ++ .../resources/sql-tests/results/group-by.sql.out |2 +- .../resources/sql-tests/results/random.sql.out | 115 +++ .../sql-tests/inputs/group-by-ordinal.sql | 96 ++ .../src/test/resources/sql-tests/inputs/random.sql | 17 + .../sql-tests/inputs/table-valued-functions.sql| 126 +++ .../sql-tests/results/group-by-ordinal.sql.out | 524 ++ .../resources/sql-tests/results/group-by.sql.out |2 +- .../resources/sql-tests/results/random.sql.out | 115 +++ .../results/table-valued-functions.sql.out | 1017 .../gluten/utils/velox/VeloxTestSettings.scala |1 + .../spark/sql/GlutenGeneratorFunctionSuite.scala | 11 +- 32 files changed, 3866 insertions(+), 46 deletions(-) diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala index 6a154cd94..8c2b20db6 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala @@ -387,13 +387,6 @@ class CHSparkPlanExecApi extends SparkPlanExecApi { original: GetMapValue): ExpressionTransformer = GetMapValueTransformer(substraitExprName, left, right, original.failOnError, original) - override def genRandTransformer( - substraitExprName: String, - explicitSeed: ExpressionTransformer, - original: Rand): ExpressionTransformer = { -GenericExpressionTransformer(substraitExprName, Seq(explicitSeed), original) - } - /** * Generate ShuffleDependency for ColumnarShuffleExchangeExec. * diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala index b3753ab83..3180842ad 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala @@ -717,6 +717,12 @@ class ScalarFunctionsValidateSuite extends FunctionsValidateTest { } } + test("Test rand function") { +runQueryAndCompare("""SELECT rand() from lineitem limit 100""".stripMargin, false) { + checkGlutenOperatorMatch[ProjectExecTransformer] +} + } + test("regexp_replace") { runQueryAndCompare( "SELECT regexp_replace(c_comment, '\\w', 'something') FROM customer limit 50") { diff --git a/cpp/velox/operators/functions/RegistrationAllFunctions.cc b/cpp/velox/operators/functions/RegistrationAllFunctions.cc index 5a6b0f6aa..b88d781b6 100644 --- a/cpp/velox/operators/functions/RegistrationAllFunctions.cc +++ b/cpp/velox/operators/functions/RegistrationAllFunctions.cc @@ -27,6 +27,7 @@ #include "velox/functions/prestosql/window/WindowFunctionsRegistration.h" #include "velox/functions/sparksql/Bitwise.h" #include "velox/fu
(incubator-gluten) branch main updated: [VL] Upgrade cmake version to 3.28.3 in CI image (#5842)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 7c777be25 [VL] Upgrade cmake version to 3.28.3 in CI image (#5842) 7c777be25 is described below commit 7c777be25fcd549dd53653986c6d40bd6cdcb965 Author: Yuan AuthorDate: Thu May 23 08:40:04 2024 +0800 [VL] Upgrade cmake version to 3.28.3 in CI image (#5842) --- .github/workflows/velox_docker.yml | 2 +- dev/ci-velox-buildstatic.sh| 2 +- dev/vcpkg/init.sh | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/velox_docker.yml b/.github/workflows/velox_docker.yml index 88c6c2a24..d7644b5d0 100644 --- a/.github/workflows/velox_docker.yml +++ b/.github/workflows/velox_docker.yml @@ -49,7 +49,7 @@ concurrency: jobs: build-native-lib-centos-7: runs-on: ubuntu-20.04 -container: apache/gluten:gluten-vcpkg-builder_2024_03_17 # centos7 with dependencies installed +container: apache/gluten:gluten-vcpkg-builder_2024_05_22 # centos7 with dependencies installed steps: - uses: actions/checkout@v2 - name: Generate cache key diff --git a/dev/ci-velox-buildstatic.sh b/dev/ci-velox-buildstatic.sh index a9b9d2c3f..208490d1c 100755 --- a/dev/ci-velox-buildstatic.sh +++ b/dev/ci-velox-buildstatic.sh @@ -2,7 +2,7 @@ yum install sudo patch java-1.8.0-openjdk-devel -y cd $GITHUB_WORKSPACE/ep/build-velox/src ./get_velox.sh source /opt/rh/devtoolset-9/enable -source $GITHUB_WORKSPACE//dev/vcpkg/env.sh +source /opt/gluten/dev/vcpkg/env.sh cd $GITHUB_WORKSPACE/ sed -i '/^headers/d' ep/build-velox/build/velox_ep/CMakeLists.txt export NUM_THREADS=4 diff --git a/dev/vcpkg/init.sh b/dev/vcpkg/init.sh index 141543af4..e69aec94a 100755 --- a/dev/vcpkg/init.sh +++ b/dev/vcpkg/init.sh @@ -16,6 +16,9 @@ if [ ! -d "$VCPKG_ROOT" ] || [ -z "$(ls "$VCPKG_ROOT")" ]; then fi [ -f "$VCPKG" ] || "$VCPKG_ROOT/bootstrap-vcpkg.sh" -disableMetrics +sed -i "s/3.27.1/3.28.3/g" $VCPKG_ROOT/scripts/vcpkgTools.xml +sed -i "s/192374a68e2971f04974a194645726196d9b8ee7abd650d1e6f65f7aa2ccc9b186c3edb473bb4958c764532edcdd42f4182ee1fcb86b17d78b0bcd6305ce3df1/bd311ca835ef0914952f21d70d1753564d58de2ede02e80ede96e78cd2f40b4189e006007643ebb37792e13edd97eb4a33810bc8aca1eab6dd428eaffe1d2e38/g" $VCPKG_ROOT/scripts/vcpkgTools.xml + $VCPKG install --no-print-usage \ --triplet="${VCPKG_TRIPLET}" --host-triplet="${VCPKG_TRIPLET}" - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [VL] Daily Update Velox Version (2024_05_22) (#5834)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new a7e536ebb [VL] Daily Update Velox Version (2024_05_22) (#5834) a7e536ebb is described below commit a7e536ebb11a685381bf8a799f16f42789b7bc43 Author: Gluten Performance Bot <137994563+glutenperf...@users.noreply.github.com> AuthorDate: Wed May 22 18:59:31 2024 +0800 [VL] Daily Update Velox Version (2024_05_22) (#5834) --- cpp/velox/benchmarks/PlanValidatorUtil.cc | 4 ++-- cpp/velox/compute/WholeStageResultIterator.cc | 2 +- cpp/velox/jni/VeloxJniWrapper.cc| 4 ++-- cpp/velox/tests/Substrait2VeloxPlanValidatorTest.cc | 2 +- ep/build-velox/src/get_velox.sh | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/velox/benchmarks/PlanValidatorUtil.cc b/cpp/velox/benchmarks/PlanValidatorUtil.cc index e299b4620..46f2733f2 100644 --- a/cpp/velox/benchmarks/PlanValidatorUtil.cc +++ b/cpp/velox/benchmarks/PlanValidatorUtil.cc @@ -45,9 +45,9 @@ int main(int argc, char** argv) { conf.insert({kDebugModeEnabled, "true"}); initVeloxBackend(conf); std::unordered_map configs{{core::QueryConfig::kSparkPartitionId, "0"}}; - core::QueryCtx queryCtx(nullptr, core::QueryConfig(configs)); + auto queryCtx = core::QueryCtx::create(nullptr, core::QueryConfig(configs)); auto pool = defaultLeafVeloxMemoryPool().get(); - core::ExecCtx execCtx(pool, ); + core::ExecCtx execCtx(pool, queryCtx.get()); ::substrait::Plan subPlan; parseProtobuf(reinterpret_cast(plan.data()), plan.size(), ); diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc index 852c7e3cc..f719c119c 100644 --- a/cpp/velox/compute/WholeStageResultIterator.cc +++ b/cpp/velox/compute/WholeStageResultIterator.cc @@ -169,7 +169,7 @@ std::shared_ptr WholeStageResultIterator::createNewVeloxQ std::unordered_map> connectorConfigs; connectorConfigs[kHiveConnectorId] = createConnectorConfig(); - std::shared_ptr ctx = std::make_shared( + std::shared_ptr ctx = velox::core::QueryCtx::create( nullptr, facebook::velox::core::QueryConfig{getQueryContextConf()}, connectorConfigs, diff --git a/cpp/velox/jni/VeloxJniWrapper.cc b/cpp/velox/jni/VeloxJniWrapper.cc index 7884280c3..9da7355d1 100644 --- a/cpp/velox/jni/VeloxJniWrapper.cc +++ b/cpp/velox/jni/VeloxJniWrapper.cc @@ -120,10 +120,10 @@ Java_org_apache_gluten_vectorized_PlanEvaluatorJniWrapper_nativeValidateWithFail // A query context with dummy configs. Used for function validation. std::unordered_map configs{ {velox::core::QueryConfig::kSparkPartitionId, "0"}, {velox::core::QueryConfig::kSessionTimezone, "GMT"}}; - velox::core::QueryCtx queryCtx(nullptr, velox::core::QueryConfig(configs)); + auto queryCtx = velox::core::QueryCtx::create(nullptr, velox::core::QueryConfig(configs)); auto pool = gluten::defaultLeafVeloxMemoryPool().get(); // An execution context used for function validation. - velox::core::ExecCtx execCtx(pool, ); + velox::core::ExecCtx execCtx(pool, queryCtx.get()); gluten::SubstraitToVeloxPlanValidator planValidator(pool, ); jclass infoCls = env->FindClass("Lorg/apache/gluten/validate/NativePlanValidationInfo;"); diff --git a/cpp/velox/tests/Substrait2VeloxPlanValidatorTest.cc b/cpp/velox/tests/Substrait2VeloxPlanValidatorTest.cc index d5eafa1e2..0a957f038 100644 --- a/cpp/velox/tests/Substrait2VeloxPlanValidatorTest.cc +++ b/cpp/velox/tests/Substrait2VeloxPlanValidatorTest.cc @@ -46,7 +46,7 @@ class Substrait2VeloxPlanValidatorTest : public exec::test::HiveConnectorTestBas } bool validatePlan(::substrait::Plan& plan) { -std::shared_ptr queryCtx = std::make_shared(); +auto queryCtx = core::QueryCtx::create(); // An execution context used for function validation. std::unique_ptr execCtx = std::make_unique(pool_.get(), queryCtx.get()); diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh index 70b3a9b09..fbb0f7067 100755 --- a/ep/build-velox/src/get_velox.sh +++ b/ep/build-velox/src/get_velox.sh @@ -17,7 +17,7 @@ set -exu VELOX_REPO=https://github.com/oap-project/velox.git -VELOX_BRANCH=2024_05_21 +VELOX_BRANCH=2024_05_22 VELOX_HOME="" #Set on run gluten on HDFS - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [VL] Daily Update Velox Version (2024_05_21) (#5819)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new ebd9f9f96 [VL] Daily Update Velox Version (2024_05_21) (#5819) ebd9f9f96 is described below commit ebd9f9f96aff26e56d16a76d994e357c1880c6da Author: Gluten Performance Bot <137994563+glutenperf...@users.noreply.github.com> AuthorDate: Tue May 21 11:59:16 2024 +0800 [VL] Daily Update Velox Version (2024_05_21) (#5819) --- ep/build-velox/src/get_velox.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh index c37933d10..70b3a9b09 100755 --- a/ep/build-velox/src/get_velox.sh +++ b/ep/build-velox/src/get_velox.sh @@ -17,7 +17,7 @@ set -exu VELOX_REPO=https://github.com/oap-project/velox.git -VELOX_BRANCH=2024_05_20 +VELOX_BRANCH=2024_05_21 VELOX_HOME="" #Set on run gluten on HDFS - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [VL] Daily Update Velox Version (2024_05_20) (#5807)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 864c6bb66 [VL] Daily Update Velox Version (2024_05_20) (#5807) 864c6bb66 is described below commit 864c6bb84dac673038beb227579ed7eb0e6a Author: Gluten Performance Bot <137994563+glutenperf...@users.noreply.github.com> AuthorDate: Mon May 20 19:27:43 2024 +0800 [VL] Daily Update Velox Version (2024_05_20) (#5807) --- ep/build-velox/src/get_velox.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh index 33a82ca57..c37933d10 100755 --- a/ep/build-velox/src/get_velox.sh +++ b/ep/build-velox/src/get_velox.sh @@ -17,7 +17,7 @@ set -exu VELOX_REPO=https://github.com/oap-project/velox.git -VELOX_BRANCH=2024_05_17 +VELOX_BRANCH=2024_05_20 VELOX_HOME="" #Set on run gluten on HDFS - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [VL] Daily Update Velox Version (2024_05_17) (#5781)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new be760ee6e [VL] Daily Update Velox Version (2024_05_17) (#5781) be760ee6e is described below commit be760ee6e2f8346f679af1f43dc94e029c5579a3 Author: Rui Mo AuthorDate: Sat May 18 16:05:49 2024 +0800 [VL] Daily Update Velox Version (2024_05_17) (#5781) --- cpp/velox/compute/WholeStageResultIterator.cc | 12 ++-- cpp/velox/compute/WholeStageResultIterator.h | 1 + ep/build-velox/src/get_velox.sh | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc index 06a7a7c39..852c7e3cc 100644 --- a/cpp/velox/compute/WholeStageResultIterator.cc +++ b/cpp/velox/compute/WholeStageResultIterator.cc @@ -72,6 +72,11 @@ WholeStageResultIterator::WholeStageResultIterator( gluten::updateHdfsTokens(veloxCfg_.get()); #endif spillStrategy_ = veloxCfg_->get(kSpillStrategy, kSpillStrategyDefaultValue); + auto spillThreadNum = veloxCfg_->get(kSpillThreadNum, kSpillThreadNumDefaultValue); + if (spillThreadNum > 0) { +spillExecutor_ = std::make_shared(spillThreadNum); + } + getOrderedNodeIds(veloxPlan_, orderedNodeIds_); // Create task instance. @@ -164,18 +169,13 @@ std::shared_ptr WholeStageResultIterator::createNewVeloxQ std::unordered_map> connectorConfigs; connectorConfigs[kHiveConnectorId] = createConnectorConfig(); - auto spillThreadNum = veloxCfg_->get(kSpillThreadNum, kSpillThreadNumDefaultValue); - std::shared_ptr spillExecutor = nullptr; - if (spillThreadNum > 0) { -spillExecutor = std::make_shared(spillThreadNum); - } std::shared_ptr ctx = std::make_shared( nullptr, facebook::velox::core::QueryConfig{getQueryContextConf()}, connectorConfigs, gluten::VeloxBackend::get()->getAsyncDataCache(), memoryManager_->getAggregateMemoryPool(), - std::move(spillExecutor), + spillExecutor_.get(), ""); return ctx; } diff --git a/cpp/velox/compute/WholeStageResultIterator.h b/cpp/velox/compute/WholeStageResultIterator.h index 0ad3877ff..5e661f404 100644 --- a/cpp/velox/compute/WholeStageResultIterator.h +++ b/cpp/velox/compute/WholeStageResultIterator.h @@ -110,6 +110,7 @@ class WholeStageResultIterator : public ColumnarBatchIterator { /// Spill. std::string spillStrategy_; + std::shared_ptr spillExecutor_ = nullptr; /// Metrics std::unique_ptr metrics_{}; diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh index 17a0b3796..33a82ca57 100755 --- a/ep/build-velox/src/get_velox.sh +++ b/ep/build-velox/src/get_velox.sh @@ -17,7 +17,7 @@ set -exu VELOX_REPO=https://github.com/oap-project/velox.git -VELOX_BRANCH=2024_05_16 +VELOX_BRANCH=2024_05_17 VELOX_HOME="" #Set on run gluten on HDFS - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [VL] Daily Update Velox Version (2024_05_16) (#5756)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 0d4258d78 [VL] Daily Update Velox Version (2024_05_16) (#5756) 0d4258d78 is described below commit 0d4258d7848a9349aba5ec143c503407ba8f50be Author: Gluten Performance Bot <137994563+glutenperf...@users.noreply.github.com> AuthorDate: Thu May 16 19:48:42 2024 +0800 [VL] Daily Update Velox Version (2024_05_16) (#5756) --- .github/workflows/velox_docker_cache.yml | 78 ep/build-velox/src/get_velox.sh | 2 +- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/.github/workflows/velox_docker_cache.yml b/.github/workflows/velox_docker_cache.yml index ec95f48a2..cbc24384d 100644 --- a/.github/workflows/velox_docker_cache.yml +++ b/.github/workflows/velox_docker_cache.yml @@ -84,42 +84,42 @@ jobs: with: path: '${{ env.CCACHE_DIR }}' key: ccache-ubuntu-release-default - ccache-native-lib-centos-velox-ut: -runs-on: ubuntu-20.04 -env: - CCACHE_DIR: "${{ github.workspace }}/.ccache" -container: ghcr.io/facebookincubator/velox-dev:circleci-avx -steps: - - uses: actions/checkout@v2 - - name: Setup java and maven -run: | - yum install sudo patch java-1.8.0-openjdk-devel wget -y && \ - wget https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz - tar -xvf apache-maven-3.8.8-bin.tar.gz - mv apache-maven-3.8.8 /usr/lib/maven - - name: Get Ccache -uses: actions/cache/restore@v3 -with: - path: '${{ env.CCACHE_DIR }}' - key: ccache-centos-release-default - - name: Ensure Cache Dirs Exists -working-directory: ${{ github.workspace }} -run: | - mkdir -p '${{ env.CCACHE_DIR }}' - - name: Build Gluten velox third party -run: | - rm -rf /opt/miniconda-for-velox/ - cd ep/build-velox/src && \ - ./get_velox.sh - cd ../build/velox_ep/ - source /opt/rh/gcc-toolset-9/enable - make EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_PARQUET=ON -DVELOX_BUILD_TESTING=ON -DVELOX_BUILD_TEST_UTILS=ON" - - - name: CCache after -run: | - ccache -s - - - uses: actions/cache/save@v3 -with: - path: '${{ env.CCACHE_DIR }}' - key: ccache-centos-release-default \ No newline at end of file +# ccache-native-lib-centos-velox-ut: +#runs-on: ubuntu-20.04 +#env: +# CCACHE_DIR: "${{ github.workspace }}/.ccache" +#container: ghcr.io/facebookincubator/velox-dev:circleci-avx +#steps: +# - uses: actions/checkout@v2 +# - name: Setup java and maven +#run: | +# yum install sudo patch java-1.8.0-openjdk-devel wget -y && \ +# wget https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz +# tar -xvf apache-maven-3.8.8-bin.tar.gz +# mv apache-maven-3.8.8 /usr/lib/maven +# - name: Get Ccache +#uses: actions/cache/restore@v3 +#with: +# path: '${{ env.CCACHE_DIR }}' +# key: ccache-centos-release-default +# - name: Ensure Cache Dirs Exists +#working-directory: ${{ github.workspace }} +#run: | +# mkdir -p '${{ env.CCACHE_DIR }}' +# - name: Build Gluten velox third party +#run: | +# rm -rf /opt/miniconda-for-velox/ +# cd ep/build-velox/src && \ +# ./get_velox.sh +# cd ../build/velox_ep/ +# source /opt/rh/gcc-toolset-9/enable +# make EXTRA_CMAKE_FLAGS="-DVELOX_ENABLE_PARQUET=ON -DVELOX_BUILD_TESTING=ON -DVELOX_BUILD_TEST_UTILS=ON" +# +# - name: CCache after +#run: | +# ccache -s +# +# - uses: actions/cache/save@v3 +#with: +# path: '${{ env.CCACHE_DIR }}' +# key: ccache-centos-release-default \ No newline at end of file diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh index 497befbe6..17a0b3796 100755 --- a/ep/build-velox/src/get_velox.sh +++ b/ep/build-velox/src/get_velox.sh @@ -17,7 +17,7 @@ set -exu VELOX_REPO=https://github.com/oap-project/velox.git -VELOX_BRANCH=2024_05_15 +VELOX_BRANCH=2024_05_16 VELOX_HOME="" #Set on run gluten on HDFS - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [VL] Enable length function for binary type (#5761)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new cb02cdb0a [VL] Enable length function for binary type (#5761) cb02cdb0a is described below commit cb02cdb0a4095a8f194e62268147182afd48821c Author: Zhen Li <10524738+zhli1142...@users.noreply.github.com> AuthorDate: Thu May 16 13:51:29 2024 +0800 [VL] Enable length function for binary type (#5761) --- .../apache/gluten/execution/ScalarFunctionsValidateSuite.scala| 8 cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc | 6 -- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala index 834e172f8..e88e9699a 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala @@ -977,4 +977,12 @@ class ScalarFunctionsValidateSuite extends FunctionsValidateTest { } } } + + test("length") { +runQueryAndCompare( + "select length(c_comment), length(cast(c_comment as binary))" + +" from customer limit 50") { + checkGlutenOperatorMatch[ProjectExecTransformer] +} + } } diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc index fc8b912e0..51f39a3ab 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc @@ -191,12 +191,6 @@ bool SubstraitToVeloxPlanValidator::validateScalarFunction( return validateRound(scalarFunction, inputType); } else if (name == "extract") { return validateExtractExpr(params); - } else if (name == "char_length") { -VELOX_CHECK(types.size() == 1); -if (types[0] == "vbin") { - LOG_VALIDATION_MSG("Binary type is not supported in " + name); - return false; -} } else if (name == "map_from_arrays") { LOG_VALIDATION_MSG("map_from_arrays is not supported."); return false; - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [VL] Daily Update Velox Version (2024_05_15) (#5748)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 888e1e244 [VL] Daily Update Velox Version (2024_05_15) (#5748) 888e1e244 is described below commit 888e1e24403a7d42a936586bc4563e143769ae17 Author: Gluten Performance Bot <137994563+glutenperf...@users.noreply.github.com> AuthorDate: Thu May 16 13:34:13 2024 +0800 [VL] Daily Update Velox Version (2024_05_15) (#5748) --- ep/build-velox/src/get_velox.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh index d1e6054d8..497befbe6 100755 --- a/ep/build-velox/src/get_velox.sh +++ b/ep/build-velox/src/get_velox.sh @@ -17,7 +17,7 @@ set -exu VELOX_REPO=https://github.com/oap-project/velox.git -VELOX_BRANCH=2024_05_14 +VELOX_BRANCH=2024_05_15 VELOX_HOME="" #Set on run gluten on HDFS - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated (8ade7a9cb -> 731c17ea4)
This is an automated email from the ASF dual-hosted git repository. rui pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git from 8ade7a9cb [VL] Use slice instead of resize in ensureFlattened (#5523) add 731c17ea4 [GLUTEN-5532] Code clean up for GlutenPlugin (#5533) No new revisions were added by this update. Summary of changes: .../main/scala/org/apache/gluten/GlutenPlugin.scala | 19 --- 1 file changed, 19 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [VL] Support regr_sxx and regr_syy aggregate functions for Spark 3.4 (#5444)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 628763fc3 [VL] Support regr_sxx and regr_syy aggregate functions for Spark 3.4 (#5444) 628763fc3 is described below commit 628763fc3a9f471e4b2c25c1d07efc968857be16 Author: Joey AuthorDate: Fri Apr 19 13:02:15 2024 +0800 [VL] Support regr_sxx and regr_syy aggregate functions for Spark 3.4 (#5444) --- .../execution/VeloxAggregateFunctionsSuite.scala | 38 -- .../substrait/SubstraitToVeloxPlanValidator.cc | 3 +- docs/velox-backend-support-progress.md | 3 ++ .../apache/gluten/expression/ExpressionNames.scala | 1 + .../gluten/sql/shims/spark34/Spark34Shims.scala| 3 +- 5 files changed, 43 insertions(+), 5 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala index 2573725a7..df0817410 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala @@ -432,14 +432,46 @@ abstract class VeloxAggregateFunctionsSuite extends VeloxWholeStageTransformerSu } // Disable for Sparke3.5. - testWithSpecifiedSparkVersion("regr_sxy", Some("3.4"), Some("3.4")) { + testWithSpecifiedSparkVersion("regr_sxy regr_sxx regr_syy", Some("3.4"), Some("3.4")) { runQueryAndCompare(""" - |select regr_sxy(l_partkey, l_suppkey) from lineitem; + |select regr_sxy(l_quantity, l_tax) from lineitem; |""".stripMargin) { checkGlutenOperatorMatch[HashAggregateExecTransformer] } runQueryAndCompare( - "select regr_sxy(l_partkey, l_suppkey), count(distinct l_orderkey) from lineitem") { + "select regr_sxy(l_quantity, l_tax), count(distinct l_orderkey) from lineitem") { + df => +{ + assert( +getExecutedPlan(df).count( + plan => { +plan.isInstanceOf[HashAggregateExecTransformer] + }) == 4) +} +} +runQueryAndCompare(""" + |select regr_sxx(l_quantity, l_tax) from lineitem; + |""".stripMargin) { + checkGlutenOperatorMatch[HashAggregateExecTransformer] +} +runQueryAndCompare( + "select regr_sxx(l_quantity, l_tax), count(distinct l_orderkey) from lineitem") { + df => +{ + assert( +getExecutedPlan(df).count( + plan => { +plan.isInstanceOf[HashAggregateExecTransformer] + }) == 4) +} +} +runQueryAndCompare(""" + |select regr_syy(l_quantity, l_tax) from lineitem; + |""".stripMargin) { + checkGlutenOperatorMatch[HashAggregateExecTransformer] +} +runQueryAndCompare( + "select regr_syy(l_quantity, l_tax), count(distinct l_orderkey) from lineitem") { df => { assert( diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc index f992b94c3..2a5857ae9 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc @@ -1106,7 +1106,8 @@ bool SubstraitToVeloxPlanValidator::validate(const ::substrait::AggregateRel& ag "kurtosis", "regr_slope", "regr_intercept", - "regr_sxy"}; + "regr_sxy", + "regr_replacement"}; auto udfFuncs = UdfLoader::getInstance()->getRegisteredUdafNames(); diff --git a/docs/velox-backend-support-progress.md b/docs/velox-backend-support-progress.md index 5e81081b7..4b480529e 100644 --- a/docs/velox-backend-support-progress.md +++ b/docs/velox-backend-support-progress.md @@ -384,6 +384,9 @@ Gluten supports 199 functions. (Drag to right to see all data types) | regr_r2 | regr_r2| regr_r2 | S | | | | S | S | S| S | S | | || | || | | || | | regr_intercept| regr_intercept | regr_intercept | S | | | | S | S | S| S | S
(incubator-gluten) branch main updated: [VL] Fix negative buffer size (#5441)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 74c54f39d [VL] Fix negative buffer size (#5441) 74c54f39d is described below commit 74c54f39d92967fc45733a6270ceabfcedd3866b Author: WangGuangxin AuthorDate: Thu Apr 18 10:42:06 2024 +0800 [VL] Fix negative buffer size (#5441) --- .../scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala| 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala b/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala index 05d663d05..f1807fe4f 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala @@ -153,7 +153,7 @@ object RowToVeloxColumnarExec { } val rowLength = new ListBuffer[Long]() var rowCount = 0 -var offset = 0 +var offset = 0L val sizeInBytes = row.getSizeInBytes // allocate buffer based on 1st row, but if first row is very big, this will cause OOM // maybe we should optimize to list ArrayBuf to native to avoid buf close and allocate @@ -182,7 +182,7 @@ object RowToVeloxColumnarExec { val unsafeRow = convertToUnsafeRow(row) val sizeInBytes = unsafeRow.getSizeInBytes if ((offset + sizeInBytes) > arrowBuf.capacity()) { - val tmpBuf = arrowAllocator.buffer(((offset + sizeInBytes) * 2).toLong) + val tmpBuf = arrowAllocator.buffer((offset + sizeInBytes) * 2) tmpBuf.setBytes(0, arrowBuf, 0, offset) arrowBuf.close() arrowBuf = tmpBuf - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [VL] Fix kParquetWriteTimestampUnit to kParquetWriteTimestampUnitSession (#5281)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new d36b76957 [VL] Fix kParquetWriteTimestampUnit to kParquetWriteTimestampUnitSession (#5281) d36b76957 is described below commit d36b76957cf2133d4ca801603808e4c15d0c759d Author: Yang Zhang AuthorDate: Thu Apr 11 09:05:05 2024 +0800 [VL] Fix kParquetWriteTimestampUnit to kParquetWriteTimestampUnitSession (#5281) --- cpp/velox/compute/VeloxBackend.cc | 4 ++-- cpp/velox/compute/WholeStageResultIterator.cc | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/velox/compute/VeloxBackend.cc b/cpp/velox/compute/VeloxBackend.cc index 6e010ec18..8f1cab48b 100644 --- a/cpp/velox/compute/VeloxBackend.cc +++ b/cpp/velox/compute/VeloxBackend.cc @@ -258,8 +258,7 @@ void VeloxBackend::initCache(const std::shared_ptr& conf) { - int32_t ioThreads = conf->get(kVeloxIOThreads, kVeloxIOThreadsDefault); - + // The configs below are used at process level. auto mutableConf = std::make_shared(conf->valuesCopy()); auto hiveConf = getHiveConfig(conf); @@ -303,6 +302,7 @@ void VeloxBackend::initConnector(const std::shared_ptrget(kCachePrefetchMinPct, 0); + auto ioThreads = conf->get(kVeloxIOThreads, kVeloxIOThreadsDefault); if (ioThreads > 0) { ioExecutor_ = std::make_unique(ioThreads); } diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc index 89b77ac85..e105a0d64 100644 --- a/cpp/velox/compute/WholeStageResultIterator.cc +++ b/cpp/velox/compute/WholeStageResultIterator.cc @@ -559,12 +559,13 @@ std::unordered_map WholeStageResultIterator::getQueryC } std::shared_ptr WholeStageResultIterator::createConnectorConfig() { + // The configs below are used at session level. std::unordered_map configs = {}; // The semantics of reading as lower case is opposite with case-sensitive. configs[velox::connector::hive::HiveConfig::kFileColumnNamesReadAsLowerCaseSession] = !veloxCfg_->get(kCaseSensitive, false) ? "true" : "false"; configs[velox::connector::hive::HiveConfig::kPartitionPathAsLowerCaseSession] = "false"; - configs[velox::connector::hive::HiveConfig::kParquetWriteTimestampUnit] = "6"; + configs[velox::connector::hive::HiveConfig::kParquetWriteTimestampUnitSession] = "6"; configs[velox::connector::hive::HiveConfig::kMaxPartitionsPerWritersSession] = std::to_string(veloxCfg_->get(kMaxPartitions, 1)); configs[velox::connector::hive::HiveConfig::kIgnoreMissingFilesSession] = - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [VL] Restore the test cases for corr in group-by.sql and udf-group-by.sql (#5175)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new ae41b54dc [VL] Restore the test cases for corr in group-by.sql and udf-group-by.sql (#5175) ae41b54dc is described below commit ae41b54dcaacb7fde8e7bac7689ff7797ad9fd06 Author: Joey AuthorDate: Fri Mar 29 14:35:04 2024 +0800 [VL] Restore the test cases for corr in group-by.sql and udf-group-by.sql (#5175) --- .../test/resources/sql-tests/inputs/group-by.sql | 4 .../sql-tests/inputs/udf/udf-group-by.sql | 4 .../resources/sql-tests/results/group-by.sql.out | 9 .../sql-tests/results/udf/udf-group-by.sql.out | 9 .../utils/velox/VeloxSQLQueryTestSettings.scala| 4 ++-- .../test/resources/sql-tests/inputs/group-by.sql | 6 + .../sql-tests/inputs/udf/udf-group-by.sql | 4 .../resources/sql-tests/results/group-by.sql.out | 27 ++ .../sql-tests/results/udf/udf-group-by.sql.out | 9 .../utils/velox/VeloxSQLQueryTestSettings.scala| 5 ++-- .../test/resources/sql-tests/inputs/group-by.sql | 4 .../sql-tests/inputs/udf/udf-group-by.sql | 4 .../resources/sql-tests/results/group-by.sql.out | 10 .../sql-tests/results/udf/udf-group-by.sql.out | 10 .../utils/velox/VeloxSQLQueryTestSettings.scala| 3 ++- 15 files changed, 106 insertions(+), 6 deletions(-) diff --git a/gluten-ut/spark32/src/test/resources/sql-tests/inputs/group-by.sql b/gluten-ut/spark32/src/test/resources/sql-tests/inputs/group-by.sql index 4b2e12975..e2c3672a2 100644 --- a/gluten-ut/spark32/src/test/resources/sql-tests/inputs/group-by.sql +++ b/gluten-ut/spark32/src/test/resources/sql-tests/inputs/group-by.sql @@ -75,6 +75,10 @@ SELECT 1 from ( ) b where b.z != b.z; +-- SPARK-24369 multiple distinct aggregations having the same argument set +SELECT corr(DISTINCT x, y), corr(DISTINCT y, x), count(*) + FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y); + -- SPARK-25708 HAVING without GROUP BY means global aggregate SELECT 1 FROM range(10) HAVING true; diff --git a/gluten-ut/spark32/src/test/resources/sql-tests/inputs/udf/udf-group-by.sql b/gluten-ut/spark32/src/test/resources/sql-tests/inputs/udf/udf-group-by.sql index a4df72f44..0cc57c97b 100644 --- a/gluten-ut/spark32/src/test/resources/sql-tests/inputs/udf/udf-group-by.sql +++ b/gluten-ut/spark32/src/test/resources/sql-tests/inputs/udf/udf-group-by.sql @@ -71,6 +71,10 @@ SELECT 1 from ( ) b where b.z != b.z; +-- SPARK-24369 multiple distinct aggregations having the same argument set +SELECT corr(DISTINCT x, y), udf(corr(DISTINCT y, x)), count(*) + FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y); + -- SPARK-25708 HAVING without GROUP BY means global aggregate SELECT udf(1) FROM range(10) HAVING true; diff --git a/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by.sql.out b/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by.sql.out index 8986ca9b0..79e6f72df 100644 --- a/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by.sql.out +++ b/gluten-ut/spark32/src/test/resources/sql-tests/results/group-by.sql.out @@ -243,6 +243,15 @@ struct<1:int> +-- !query +SELECT corr(DISTINCT x, y), corr(DISTINCT y, x), count(*) + FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y) +-- !query schema +struct +-- !query output +0. 0. 3 + + -- !query SELECT 1 FROM range(10) HAVING true -- !query schema diff --git a/gluten-ut/spark32/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out b/gluten-ut/spark32/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out index 26d55d341..986815c97 100644 --- a/gluten-ut/spark32/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out +++ b/gluten-ut/spark32/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out @@ -243,6 +243,15 @@ struct<1:int> +-- !query +SELECT corr(DISTINCT x, y), udf(corr(DISTINCT y, x)), count(*) + FROM (VALUES (1, 1), (2, 2), (2, 2)) t(x, y) +-- !query schema +struct +-- !query output +0. 0. 3 + + -- !query SELECT udf(1) FROM range(10) HAVING true -- !query schema diff --git a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala index 4464dbefd..9ec55f015 100644 --- a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala @@ -230,9 +230,9 @@ object VeloxSQLQueryTestSettings extends SQLQueryTestSettings { val OVERWRITE_SQL_QUERY_LIST: Set[String] = Set( // Ve
(incubator-gluten) branch main updated: [VL] Enable SPARK-10634 timestamp test case (#5090)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new b962e7cc7 [VL] Enable SPARK-10634 timestamp test case (#5090) b962e7cc7 is described below commit b962e7cc74f7a7114770e9a882f10d5eaa59a355 Author: Joey AuthorDate: Wed Mar 27 09:32:41 2024 +0800 [VL] Enable SPARK-10634 timestamp test case (#5090) --- .../src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala | 2 -- .../src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala | 2 -- .../src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala | 2 -- 3 files changed, 6 deletions(-) diff --git a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala index 5f66df1a0..2d92c5ca2 100644 --- a/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala @@ -857,7 +857,6 @@ class VeloxTestSettings extends BackendTestSettings { // decimal failed ut .exclude("SPARK-34212 Parquet should read decimals correctly") // Timestamp is read as INT96. -.exclude("SPARK-10634 timestamp written and read as INT64 - truncation") .exclude("Migration from INT96 to TIMESTAMP_MICROS timestamp type") .exclude("SPARK-10365 timestamp written and read as INT64 - TIMESTAMP_MICROS") // Rewrite because the filter after datasource is not needed. @@ -869,7 +868,6 @@ class VeloxTestSettings extends BackendTestSettings { // decimal failed ut .exclude("SPARK-34212 Parquet should read decimals correctly") // Timestamp is read as INT96. -.exclude("SPARK-10634 timestamp written and read as INT64 - truncation") .exclude("Migration from INT96 to TIMESTAMP_MICROS timestamp type") .exclude("SPARK-10365 timestamp written and read as INT64 - TIMESTAMP_MICROS") // Rewrite because the filter after datasource is not needed. diff --git a/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala index f2e75f84f..dd14a604b 100644 --- a/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala @@ -682,7 +682,6 @@ class VeloxTestSettings extends BackendTestSettings { // decimal failed ut .exclude("SPARK-34212 Parquet should read decimals correctly") // Timestamp is read as INT96. -.exclude("SPARK-10634 timestamp written and read as INT64 - truncation") .exclude("Migration from INT96 to TIMESTAMP_MICROS timestamp type") .exclude("SPARK-10365 timestamp written and read as INT64 - TIMESTAMP_MICROS") .exclude("SPARK-36182: read TimestampNTZ as TimestampLTZ") @@ -698,7 +697,6 @@ class VeloxTestSettings extends BackendTestSettings { // decimal failed ut .exclude("SPARK-34212 Parquet should read decimals correctly") // Timestamp is read as INT96. -.exclude("SPARK-10634 timestamp written and read as INT64 - truncation") .exclude("Migration from INT96 to TIMESTAMP_MICROS timestamp type") .exclude("SPARK-10365 timestamp written and read as INT64 - TIMESTAMP_MICROS") .exclude("SPARK-36182: read TimestampNTZ as TimestampLTZ") diff --git a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala index 1c37e787b..d2555007b 100644 --- a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala @@ -668,7 +668,6 @@ class VeloxTestSettings extends BackendTestSettings { // decimal failed ut .exclude("SPARK-34212 Parquet should read decimals correctly") // Timestamp is read as INT96. -.exclude("SPARK-10634 timestamp written and read as INT64 - truncation") .exclude("Migration from INT96 to TIMESTAMP_MICROS timestamp type") .exclude("SPARK-10365 timestamp written and read as INT64 - TIMESTAMP_MICROS") .exclude("SPARK-36182: read TimestampNTZ as TimestampLTZ") @@ -684,7 +683,6 @@ class VeloxTestSettings extends BackendTestSettings { // decimal failed ut .exclude("SPARK-34212 Parquet should read decimals correctly") // Timestamp is read as INT96. -.exc
(incubator-gluten) branch main updated: [GLUTEN-4946][CH] Fix avg(bigint) overflow (#5048)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new e9034ff5e [GLUTEN-4946][CH] Fix avg(bigint) overflow (#5048) e9034ff5e is described below commit e9034ff5e2ec4cce8cd5defaf2ade9b44b8c8aa3 Author: loudongfeng AuthorDate: Mon Mar 25 12:55:00 2024 +0800 [GLUTEN-4946][CH] Fix avg(bigint) overflow (#5048) --- .../clickhouse/CHSparkPlanExecApi.scala| 2 + .../catalyst/CHAggregateFunctionRewriteRule.scala | 60 ++ .../execution/GlutenFunctionValidateSuite.scala| 21 .../main/scala/io/glutenproject/GlutenConfig.scala | 8 +++ 4 files changed, 91 insertions(+) diff --git a/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHSparkPlanExecApi.scala b/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHSparkPlanExecApi.scala index 29af5a0e5..4b6ee1909 100644 --- a/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHSparkPlanExecApi.scala +++ b/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHSparkPlanExecApi.scala @@ -35,6 +35,7 @@ import org.apache.spark.serializer.Serializer import org.apache.spark.shuffle.{GenShuffleWriterParameters, GlutenShuffleWriterWrapper, HashPartitioningWrapper} import org.apache.spark.shuffle.utils.CHShuffleUtil import org.apache.spark.sql.{SparkSession, Strategy} +import org.apache.spark.sql.catalyst.CHAggregateFunctionRewriteRule import org.apache.spark.sql.catalyst.catalog.BucketSpec import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions._ @@ -518,6 +519,7 @@ class CHSparkPlanExecApi extends SparkPlanExecApi { override def genExtendedOptimizers(): List[SparkSession => Rule[LogicalPlan]] = { List( spark => new CommonSubexpressionEliminateRule(spark, spark.sessionState.conf), + spark => CHAggregateFunctionRewriteRule(spark), _ => CountDistinctWithoutExpand ) } diff --git a/backends-clickhouse/src/main/scala/org/apache/spark/sql/catalyst/CHAggregateFunctionRewriteRule.scala b/backends-clickhouse/src/main/scala/org/apache/spark/sql/catalyst/CHAggregateFunctionRewriteRule.scala new file mode 100644 index 0..623db7993 --- /dev/null +++ b/backends-clickhouse/src/main/scala/org/apache/spark/sql/catalyst/CHAggregateFunctionRewriteRule.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.catalyst + +import io.glutenproject.GlutenConfig + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.expressions.Cast +import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Average} +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.types._ + +/** + * Avg(Int) function: CH use input type for intermediate sum type, while spark use double so need + * convert . + * @param spark + */ +case class CHAggregateFunctionRewriteRule(spark: SparkSession) extends Rule[LogicalPlan] { + override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp { +case a: Aggregate => + a.transformExpressions { +case avgExpr @ AggregateExpression(avg: Average, _, _, _, _) +if GlutenConfig.getConf.enableCastAvgAggregateFunction && + GlutenConfig.getConf.enableColumnarHashAgg && + !avgExpr.isDistinct && isDataTypeNeedConvert(avg.child.dataType) => + AggregateExpression( +avg.copy(child = Cast(avg.child, DoubleType)), +avgExpr.mode, +avgExpr.isDistinct, +avgExpr.filter, +avgExpr.resultId + ) + } + } + + private def isDataTypeNeedConvert(dataType: DataType): Boolean = { +dataType match { + case FloatType => true + case IntegerType => true + case L
(incubator-gluten) branch main updated: [VL] Add large precision tests for decimal sum and avg (#4961)
This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 94e1638c5 [VL] Add large precision tests for decimal sum and avg (#4961) 94e1638c5 is described below commit 94e1638c543b1397b709e2fe5ad0717223053c80 Author: Joey AuthorDate: Tue Mar 19 13:11:44 2024 +0800 [VL] Add large precision tests for decimal sum and avg (#4961) --- .../execution/VeloxAggregateFunctionsSuite.scala | 27 ++ 1 file changed, 27 insertions(+) diff --git a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxAggregateFunctionsSuite.scala b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxAggregateFunctionsSuite.scala index 26bea5b1c..c0143d0ae 100644 --- a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxAggregateFunctionsSuite.scala +++ b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxAggregateFunctionsSuite.scala @@ -99,6 +99,19 @@ abstract class VeloxAggregateFunctionsSuite extends VeloxWholeStageTransformerSu }) == 4) } } +// Test the situation that precision + 4 of input decimal value exceeds 38. +runQueryAndCompare( + "select avg(cast (l_quantity as DECIMAL(36, 2))), " + +"count(distinct l_partkey) from lineitem") { + df => +{ + assert( +getExecutedPlan(df).count( + plan => { +plan.isInstanceOf[HashAggregateExecTransformer] + }) == 4) +} +} } test("sum") { @@ -142,6 +155,20 @@ abstract class VeloxAggregateFunctionsSuite extends VeloxWholeStageTransformerSu }) == 4) } } + +// Test the situation that precision + 4 of input decimal value exceeds 38. +runQueryAndCompare( + "select sum(cast (l_quantity as DECIMAL(36, 2))), " + +"count(distinct l_partkey) from lineitem") { + df => +{ + assert( +getExecutedPlan(df).count( + plan => { +plan.isInstanceOf[HashAggregateExecTransformer] + }) == 4) +} +} } test("min and max") { - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org