This is an automated email from the ASF dual-hosted git repository.
exmy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 29c5e0050c [CH] Support map_concat function (#9841)
29c5e0050c is described below
commit 29c5e0050c741ddac560db3e7acd83548e402f32
Author: exmy <[email protected]>
AuthorDate: Thu Jul 31 10:11:34 2025 +0800
[CH] Support map_concat function (#9841)
* [CH] Support map_concat function
* fix
* fix
* fix
---------
Co-authored-by: xumingyong <[email protected]>
---
.../org/apache/gluten/utils/CHExpressionUtil.scala | 1 -
.../execution/GlutenFunctionValidateSuite.scala | 10 ++++
cpp-ch/local-engine/Parser/ExpressionParser.cpp | 8 +--
.../Parser/scalar_function_parser/mapConcat.cpp | 57 ++++++++++++++++++++++
.../utils/clickhouse/ClickHouseTestSettings.scala | 2 +
.../utils/clickhouse/ClickHouseTestSettings.scala | 2 +
.../utils/clickhouse/ClickHouseTestSettings.scala | 2 +
7 files changed, 74 insertions(+), 8 deletions(-)
diff --git
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
index 34d70b17c4..8365245b84 100644
---
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
@@ -205,7 +205,6 @@ object CHExpressionUtil {
RAISE_ERROR -> DefaultValidator(),
WIDTH_BUCKET -> DefaultValidator(),
MAKE_DATE -> DefaultValidator(),
- MAP_CONCAT -> DefaultValidator(),
ARRAY_APPEND -> DefaultValidator(),
JSON_OBJECT_KEYS -> DefaultValidator(),
LUHN_CHECK -> DefaultValidator()
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
index 51ba9fd8d3..d08501674d 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
@@ -1051,6 +1051,16 @@ class GlutenFunctionValidateSuite extends
GlutenClickHouseWholeStageTransformerS
}
}
+ test("Test map_concat") {
+ withSQLConf(
+ SQLConf.OPTIMIZER_EXCLUDED_RULES.key ->
+ (ConstantFolding.ruleName + "," + NullPropagation.ruleName)) {
+ runQueryAndCompare(
+ "select map_concat(map(1, 'a', 2, 'b'), map(3, null)), map_concat()"
+ )(checkGlutenOperatorMatch[ProjectExecTransformer])
+ }
+ }
+
test("Test transform_keys/transform_values") {
val sql =
"""
diff --git a/cpp-ch/local-engine/Parser/ExpressionParser.cpp
b/cpp-ch/local-engine/Parser/ExpressionParser.cpp
index 07d3741a26..39aabcf2d2 100644
--- a/cpp-ch/local-engine/Parser/ExpressionParser.cpp
+++ b/cpp-ch/local-engine/Parser/ExpressionParser.cpp
@@ -199,13 +199,7 @@ std::pair<DB::DataTypePtr, DB::Field>
LiteralParser::parse(const substrait::Expr
DB::DataTypePtr key_type;
std::tie(key_type, tuple[0]) = parse(key_value.key());
- /// Each key should has the same type
- if (!common_key_type->equals(*key_type))
- throw DB::Exception(
- DB::ErrorCodes::LOGICAL_ERROR,
- "Literal map key type mismatch:{} and {}",
- common_key_type->getName(),
- key_type->getName());
+ common_key_type =
getLeastSupertype(DB::DataTypes{common_key_type, key_type});
DB::DataTypePtr value_type;
std::tie(value_type, tuple[1]) = parse(key_value.value());
diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/mapConcat.cpp
b/cpp-ch/local-engine/Parser/scalar_function_parser/mapConcat.cpp
new file mode 100644
index 0000000000..0a5ccc8599
--- /dev/null
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/mapConcat.cpp
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <Parser/FunctionParser.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+ extern const int BAD_ARGUMENTS;
+ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+}
+
+namespace local_engine
+{
+
+class FunctionMapConcat : public FunctionParser
+{
+public:
+ explicit FunctionMapConcat(ParserContextPtr parser_context_) :
FunctionParser(parser_context_) { }
+ ~FunctionMapConcat() override = default;
+
+ static constexpr auto name = "map_concat";
+
+ String getName() const override { return name; }
+
+ const DB::ActionsDAG::Node * parse(
+ const substrait::Expression_ScalarFunction & substrait_func,
DB::ActionsDAG & actions_dag) const override
+ {
+ auto parsed_args = parseFunctionArguments(substrait_func, actions_dag);
+ const DB::ActionsDAG::Node * result_node = nullptr;
+ if (!parsed_args.size())
+ result_node = toFunctionNode(actions_dag, "map", {});
+ else
+ result_node = toFunctionNode(actions_dag, "mapConcat",
parsed_args);
+
+ return convertNodeTypeIfNeeded(substrait_func, result_node,
actions_dag);
+ }
+};
+static FunctionParserRegister<FunctionMapConcat> register_map_concat;
+
+}
\ No newline at end of file
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index dc6fb49ea1..04f4b3baa3 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -177,6 +177,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
enableSuite[GlutenDataFrameAsOfJoinSuite]
enableSuite[GlutenDataFrameComplexTypeSuite]
enableSuite[GlutenDataFrameFunctionsSuite]
+ // Expected exception org.apache.spark.SparkException to be thrown, but no
exception was thrown
+ .exclude("map_concat function")
.exclude("map with arrays")
.exclude("flatten function")
.exclude("aggregate function - array for primitive type not containing
null")
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 46acce6476..7ccf68e9e0 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -179,6 +179,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
enableSuite[GlutenDataFrameAsOfJoinSuite]
enableSuite[GlutenDataFrameComplexTypeSuite]
enableSuite[GlutenDataFrameFunctionsSuite]
+ // Expected exception org.apache.spark.SparkException to be thrown, but no
exception was thrown
+ .exclude("map_concat function")
.exclude("map with arrays")
.exclude("flatten function")
.exclude("aggregate function - array for primitive type not containing
null")
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 0860b5e3e7..dade07afff 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -486,6 +486,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
// blocked by Velox-5768
.exclude("aggregate function - array for primitive type containing null")
.exclude("aggregate function - array for non-primitive type")
+ // Expected exception org.apache.spark.SparkException to be thrown, but no
exception was thrown
+ .exclude("map_concat function")
// Rewrite this test because Velox sorts rows by key for primitive data
types, which disrupts the original row sequence.
.includeCH("map_zip_with function - map of primitive types")
.excludeCH("map with arrays")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]