This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-c108335-hive-sql
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-c108335-hive-sql by
this push:
new 819b393d839 [feature](function) impl str_to_map (#49142) (#49916)
819b393d839 is described below
commit 819b393d83975047f3add767fcbb2a252a1b7ccb
Author: Socrates <[email protected]>
AuthorDate: Thu Apr 10 04:03:20 2025 +0800
[feature](function) impl str_to_map (#49142) (#49916)
patch to fix
---
be/src/vec/functions/function_map.cpp | 83 ++++++--
.../expressions/functions/scalar/StrToMap.java | 8 +-
.../string_functions/test_str_to_map.out | Bin 0 -> 17405 bytes
.../string_functions/test_str_to_map.groovy | 232 +++++++++++++++++++++
4 files changed, 295 insertions(+), 28 deletions(-)
diff --git a/be/src/vec/functions/function_map.cpp
b/be/src/vec/functions/function_map.cpp
index 3d8b84bdf37..6a825c5e76f 100644
--- a/be/src/vec/functions/function_map.cpp
+++ b/be/src/vec/functions/function_map.cpp
@@ -296,8 +296,6 @@ public:
String get_name() const override { return name; }
- bool is_variadic() const override { return true; }
-
size_t get_number_of_arguments() const override { return 3; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
@@ -309,29 +307,75 @@ public:
uint32_t result, size_t input_rows_count) const
override {
DCHECK(arguments.size() == 3);
+ bool cols_const[2];
+ ColumnPtr cols[2];
+ for (size_t i = 0; i < 2; ++i) {
+ cols_const[i] =
is_column_const(*block.get_by_position(arguments[i]).column);
+ }
+ // convert to full column if necessary
+ default_preprocess_parameter_columns(cols, cols_const, {0, 1}, block,
arguments);
+ const auto& [col3, col3_const] =
+ unpack_if_const(block.get_by_position(arguments[2]).column);
+
+ const auto& str_column = assert_cast<const
ColumnString*>(cols[0].get());
+ const auto& pair_delim_column = assert_cast<const
ColumnString*>(cols[1].get());
+ const auto& kv_delim_column = assert_cast<const
ColumnString*>(col3.get());
+
+ ColumnPtr result_col;
+ if (cols_const[0] && cols_const[1]) {
+ result_col = execute_vector<true, false>(input_rows_count,
*str_column,
+ *pair_delim_column,
*kv_delim_column);
+ } else if (col3_const) {
+ result_col = execute_vector<false, true>(input_rows_count,
*str_column,
+ *pair_delim_column,
*kv_delim_column);
+ } else {
+ result_col = execute_vector<false, false>(input_rows_count,
*str_column,
+ *pair_delim_column,
*kv_delim_column);
+ }
+
+ block.replace_by_position(result, std::move(result_col));
+
+ return Status::OK();
+ }
+
+private:
+ template <bool is_str_and_pair_delim_const, bool is_kv_delim_const>
+ static ColumnPtr execute_vector(const size_t input_rows_count, const
ColumnString& str_col,
+ const ColumnString& pair_delim_col,
+ const ColumnString& kv_delim_col) {
// map keys column
auto result_col_map_keys_data =
ColumnNullable::create(ColumnString::create(),
ColumnUInt8::create());
+ result_col_map_keys_data->reserve(input_rows_count);
// map values column
auto result_col_map_vals_data =
ColumnNullable::create(ColumnString::create(),
ColumnUInt8::create());
+ result_col_map_vals_data->reserve(input_rows_count);
// map offsets column
auto result_col_map_offsets = ColumnUInt64::create();
-
- auto& str_col = block.get_by_position(arguments[0]).column;
- auto& pair_delim_col = block.get_by_position(arguments[1]).column;
- auto& kv_delim_col = block.get_by_position(arguments[2]).column;
-
- const auto* str_column = assert_cast<const
ColumnString*>(str_col.get());
- const auto* pair_delim_column = assert_cast<const
ColumnString*>(pair_delim_col.get());
- const auto* kv_delim_column = assert_cast<const
ColumnString*>(kv_delim_col.get());
+ result_col_map_offsets->reserve(input_rows_count);
+
+ std::vector<std::string_view> kvs;
+ std::string_view kv_delim;
+ if constexpr (is_str_and_pair_delim_const) {
+ auto str = str_col.get_data_at(0).to_string_view();
+ auto pair_delim = pair_delim_col.get_data_at(0).to_string_view();
+ kvs = split_pair_by_delim(str, pair_delim);
+ }
+ if constexpr (is_kv_delim_const) {
+ kv_delim = kv_delim_col.get_data_at(0).to_string_view();
+ }
for (size_t i = 0; i < input_rows_count; ++i) {
- const auto str = str_column->get_data_at(i).to_string_view();
- const auto pair_delim =
pair_delim_column->get_data_at(i).to_string_view();
- const auto kv_delim =
kv_delim_column->get_data_at(i).to_string_view();
+ if constexpr (!is_str_and_pair_delim_const) {
+ auto str = str_col.get_data_at(i).to_string_view();
+ auto pair_delim =
pair_delim_col.get_data_at(i).to_string_view();
+ kvs = split_pair_by_delim(str, pair_delim);
+ }
+ if constexpr (!is_kv_delim_const) {
+ kv_delim = kv_delim_col.get_data_at(i).to_string_view();
+ }
- auto kvs = split_pair_by_delim(str, pair_delim);
for (const auto& kv : kvs) {
auto kv_parts = split_kv_by_delim(kv, kv_delim);
if (kv_parts.size() == 2) {
@@ -345,16 +389,11 @@ public:
result_col_map_offsets->insert_value(result_col_map_keys_data->size());
}
- auto result_col =
ColumnMap::create(std::move(result_col_map_keys_data),
-
std::move(result_col_map_vals_data),
- std::move(result_col_map_offsets));
-
- block.replace_by_position(result, std::move(result_col));
-
- return Status::OK();
+ return ColumnMap::create(std::move(result_col_map_keys_data),
+ std::move(result_col_map_vals_data),
+ std::move(result_col_map_offsets));
}
-private:
static std::vector<std::string_view> split_pair_by_delim(const
std::string_view& str,
const
std::string_view& delim) {
if (str.empty()) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StrToMap.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StrToMap.java
index d31f76cdefd..89df45d01c0 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StrToMap.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/StrToMap.java
@@ -20,6 +20,7 @@ package
org.apache.doris.nereids.trees.expressions.functions.scalar;
import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import
org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
+import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
import org.apache.doris.nereids.trees.expressions.literal.Literal;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.MapType;
@@ -44,7 +45,7 @@ import java.util.List;
* Both pairDelim and keyValueDelim are treated as regular expressions.
*/
public class StrToMap extends ScalarFunction
- implements ExplicitlyCastableSignature {
+ implements ExplicitlyCastableSignature, PropagateNullable {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(MapType.of(StringType.INSTANCE,
StringType.INSTANCE))
@@ -82,11 +83,6 @@ public class StrToMap extends ScalarFunction
super("str_to_map", arg0, arg1, arg2);
}
- @Override
- public boolean nullable() {
- return false;
- }
-
/**
* withChildren.
*/
diff --git
a/regression-test/data/query_p0/sql_functions/string_functions/test_str_to_map.out
b/regression-test/data/query_p0/sql_functions/string_functions/test_str_to_map.out
new file mode 100644
index 00000000000..6dd44129806
Binary files /dev/null and
b/regression-test/data/query_p0/sql_functions/string_functions/test_str_to_map.out
differ
diff --git
a/regression-test/suites/query_p0/sql_functions/string_functions/test_str_to_map.groovy
b/regression-test/suites/query_p0/sql_functions/string_functions/test_str_to_map.groovy
new file mode 100644
index 00000000000..e11e2310b73
--- /dev/null
+++
b/regression-test/suites/query_p0/sql_functions/string_functions/test_str_to_map.groovy
@@ -0,0 +1,232 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_str_to_map") {
+ sql "drop table if exists str_to_map_args;"
+ sql """
+ create table str_to_map_args (
+ k0 int,
+ map_str_not_null string not null,
+ map_str_null string null,
+ key_delim_not_null string not null,
+ key_delim_null string null,
+ value_delim_not_null string not null,
+ value_delim_null string null
+ )
+ DISTRIBUTED BY HASH(k0)
+ PROPERTIES
+ (
+ "replication_num" = "1"
+ );
+ """
+
+ // Test empty table with different nullable combinations
+ order_qt_all_not_null "select str_to_map(map_str_not_null,
key_delim_not_null, value_delim_not_null) from str_to_map_args"
+
+ order_qt_all_args_null "select str_to_map(map_str_null, key_delim_null,
value_delim_null) from str_to_map_args"
+
+ order_qt_partial_null "select str_to_map(map_str_not_null, key_delim_null,
value_delim_null) from str_to_map_args"
+
+ order_qt_nullable_no_null "select str_to_map(nullable(map_str_not_null),
nullable(key_delim_not_null), nullable(value_delim_not_null)) from
str_to_map_args"
+
+ sql '''
+ insert into str_to_map_args values
+ (1, 'a:1,b:2,c:3', 'a:1,b:2,c:3', ',', ',', ':', ':'),
+ (2, '', '', ',', ',', ':', ':'), -- Empty string test
+ (3, 'a:1', 'a:1', ',', ',', ':', ':'), -- Single key-value pair
+ (4, 'a:1,b:2,b:3', 'a:1,b:2,b:3', ',', ',', ':', ':'), -- Duplicate
keys
+ (5, 'a:,b:,c:', 'a:,b:,c:', ',', ',', ':', ':'), -- Empty values
+ (6, ':1,:2,:3', ':1,:2,:3', ',', ',', ':', ':'), -- Empty keys
+ (7, 'a=1;b=2;c=3', 'a=1;b=2;c=3', ';', ';', '=', '='), -- Different
delimiters
+ (8, '中文:值,英文:value', '中文:值,英文:value', ',', ',', ':', ':'), -- Unicode
characters
+ (9, 'special@#:123,chars!:456', 'special@#:123,chars!:456', ',', ',',
':', ':'), -- Special characters in keys
+ (10, 'a:123!@#,b:456$%^', 'a:123!@#,b:456$%^', ',', ',', ':', ':'), --
Special characters in values
+ (11, 'verylongkey:verylongvalue,anotherlongkey:anotherlongvalue',
'verylongkey:verylongvalue,anotherlongkey:anotherlongvalue', ',', ',', ':',
':'), -- Long strings
+ (12, 'a::1,b::2', 'a::1,b::2', ',', ',', '::', '::'), --
Multi-character delimiter
+ (13, 'a:1\nb:2\nc:3', 'a:1\nb:2\nc:3', '\n', '\n', ':', ':'), --
Newline as delimiter
+ (14, 'a:1\tb:2\tc:3', 'a:1\tb:2\tc:3', '\t', '\t', ':', ':'), -- Tab
as delimiter
+ (15, ' a : 1 , b : 2 ', ' a : 1 , b : 2 ', ',', ',', ':', ':') --
Spaces in string
+ '''
+
+ // Test different nullable combinations with data
+ order_qt_all_not_null_data """
+ select str_to_map(map_str_not_null, key_delim_not_null,
value_delim_not_null)
+ from str_to_map_args
+ order by k0;
+ """
+
+ order_qt_all_args_null_data """
+ select str_to_map(map_str_null, key_delim_null, value_delim_null)
+ from str_to_map_args
+ order by k0;
+ """
+
+ order_qt_partial_null_data """
+ select str_to_map(map_str_not_null, key_delim_null, value_delim_null)
+ from str_to_map_args
+ order by k0;
+ """
+
+ order_qt_nullable_no_null_data """
+ select str_to_map(nullable(map_str_not_null),
nullable(key_delim_not_null), nullable(value_delim_not_null))
+ from str_to_map_args
+ order by k0;
+ """
+
+ // Test mixed nullable combinations
+ order_qt_mixed_null_1 """
+ select str_to_map(map_str_null, key_delim_not_null,
value_delim_not_null)
+ from str_to_map_args
+ order by k0;
+ """
+
+ order_qt_mixed_null_2 """
+ select str_to_map(map_str_not_null, key_delim_null,
value_delim_not_null)
+ from str_to_map_args
+ order by k0;
+ """
+
+ order_qt_mixed_null_3 """
+ select str_to_map(map_str_not_null, key_delim_not_null,
value_delim_null)
+ from str_to_map_args
+ order by k0;
+ """
+
+ // Test with constant null values
+ order_qt_const_null_1 """
+ select str_to_map(null, key_delim_not_null, value_delim_not_null)
+ from str_to_map_args
+ order by k0;
+ """
+
+ order_qt_const_null_2 """
+ select str_to_map(map_str_not_null, null, value_delim_not_null)
+ from str_to_map_args
+ order by k0;
+ """
+
+ order_qt_const_null_3 """
+ select str_to_map(map_str_not_null, key_delim_not_null, null)
+ from str_to_map_args
+ order by k0;
+ """
+
+ /// consts. most by BE-UT
+ // Test const string with column delimiters
+ order_qt_const_str """
+ select str_to_map('a:1,b:2', key_delim_not_null, value_delim_not_null)
+ from str_to_map_args order by k0
+ """
+
+ // Test column string with const delimiters
+ order_qt_const_delims """
+ select str_to_map(map_str_not_null, ',', ':')
+ from str_to_map_args order by k0
+ """
+
+ // Test const string with one const delimiter and one column delimiter
+ order_qt_mixed_const1 """
+ select str_to_map('x=1;y=2', ';', value_delim_not_null)
+ from str_to_map_args order by k0
+ """
+
+ order_qt_mixed_const2 """
+ select str_to_map('p-1|q-2', key_delim_not_null, '-')
+ from str_to_map_args order by k0
+ """
+
+ // Test all const non-null arguments
+ order_qt_all_const """
+ select str_to_map('a=1|b=2', '|', '=')
+ from str_to_map_args order by k0
+ """
+
+ // Test const string with nullable column delimiters
+ order_qt_const_str_null_delims """
+ select str_to_map('m:1,n:2', key_delim_null, value_delim_null)
+ from str_to_map_args order by k0
+ """
+
+ // Test nullable column string with const delimiters
+ order_qt_null_str_const_delims '''
+ select str_to_map(map_str_null, '#', '$')
+ from str_to_map_args order by k0
+ '''
+
+ // Test basic str_to_map functionality with all parameters
+ qt_basic_1 "select str_to_map('a:1,b:2,c:3', ',', ':');"
+ qt_basic_2 "select str_to_map('key1=val1;key2=val2', ';', '=');"
+ qt_basic_3 "select str_to_map('x-1|y-2|z-3', '|', '-');"
+
+ // Test with default parameters (omitting both delimiters)
+ // Default pair delimiter is ',' and key-value delimiter is ':'
+ qt_default_both_1 "select str_to_map('a:1,b:2,c:3');"
+ qt_default_both_2 "select str_to_map('key1:value1,key2:value2');"
+ qt_default_both_3 "select str_to_map('x:1,y:2,z:');"
+ qt_default_both_4 "select str_to_map('');"
+
+ // Test with default key-value delimiter (omitting last parameter)
+ // Default key-value delimiter is ':'
+ qt_default_value_1 "select str_to_map('a:1;b:2;c:3', ';');"
+ qt_default_value_2 "select str_to_map('key:val|foo:bar', '|');"
+ qt_default_value_3 "select str_to_map('x:1#y:2#z:3', '#');"
+ qt_default_value_4 "select str_to_map('a:1...b:2...c:3', '...');"
+
+ // Test empty string cases
+ qt_empty_1 "select str_to_map('');"
+ qt_empty_2 "select str_to_map('a:1,,b:2');"
+ qt_empty_3 "select str_to_map('a:,b:2,c:');"
+ qt_empty_4 "select str_to_map(',,,');"
+
+ // Test missing key-value delimiter
+ qt_missing_value_1 "select str_to_map('a,b:2,c');"
+ qt_missing_value_2 "select str_to_map('val1,val2,val3');"
+ qt_missing_value_3 "select str_to_map('key1,key2:val2,key3');"
+
+ // Test with special characters
+ qt_special_1 "select str_to_map('\ta:1\n,\tb:2\n');"
+ qt_special_2 "select str_to_map('a\\nb:1,c\\td:2');"
+ qt_special_3 "select str_to_map('key1:value1,key2:value2', ',', ':');"
+
+ // Test with spaces
+ qt_spaces_1 "select str_to_map('a : 1, b : 2');"
+ qt_spaces_2 "select str_to_map(' a:1 , b:2 ');"
+ qt_spaces_3 "select str_to_map(' a:1, b:2 ');"
+ qt_spaces_4 "select str_to_map(' ');"
+
+ // Test with Unicode characters
+ qt_unicode_1 "select str_to_map('键1:值1,键2:值2');"
+ qt_unicode_2 "select str_to_map('标题①:内容①,标题②:内容②');"
+ qt_unicode_3 "select str_to_map('🔑:🔒,📝:📖');"
+ qt_unicode_4 "select str_to_map('あ:い,う:え');"
+
+ // Test with duplicate keys
+ qt_dup_1 "select str_to_map('a:1,b:2,a:3');"
+ qt_dup_2 "select str_to_map('key:val1,key:val2,key:val3');"
+ qt_dup_3 "select str_to_map('a:1,a:,a:3');"
+
+ // Test edge cases
+ qt_edge_1 "select str_to_map('a:1:2,b:3:4');"
+ qt_edge_2 "select str_to_map(':::');"
+ qt_edge_3 "select str_to_map('a:1:2');"
+ qt_edge_4 "select str_to_map('key::value');"
+ qt_edge_5 "select str_to_map(':');"
+
+ // Test extremely long strings
+ qt_long_1 "select str_to_map(repeat('a:1,', 1000));"
+ qt_long_2 "select str_to_map(concat(repeat('key', 100), ':',
repeat('value', 100)));"
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]