This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 47edc5a06e [fix](functions) Support nullable column for multi_string
functions (#19498)
47edc5a06e is described below
commit 47edc5a06ef09744f8019fdc39aa6b5c44d0206d
Author: Jerry Hu <[email protected]>
AuthorDate: Thu May 11 01:13:13 2023 +0800
[fix](functions) Support nullable column for multi_string functions (#19498)
---
.../functions/functions_multi_string_position.cpp | 51 ++++++++++++++++-
.../functions/functions_multi_string_search.cpp | 65 ++++++++++++++++++----
.../test_multi_string_position.out | 6 ++
.../search_functions/test_multi_string_search.out | 44 ++++++++++++++-
.../test_multi_string_position.groovy | 11 ++--
.../test_multi_string_search.groovy | 33 +++++++++++
6 files changed, 192 insertions(+), 18 deletions(-)
diff --git a/be/src/vec/functions/functions_multi_string_position.cpp
b/be/src/vec/functions/functions_multi_string_position.cpp
index 062f34f995..3db079efad 100644
--- a/be/src/vec/functions/functions_multi_string_position.cpp
+++ b/be/src/vec/functions/functions_multi_string_position.cpp
@@ -73,14 +73,34 @@ public:
bool use_default_implementation_for_constants() const override { return
true; }
+ bool use_default_implementation_for_nulls() const override { return false;
}
+
DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
return
std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeInt32>()));
}
Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
- ColumnPtr haystack_ptr = block.get_by_position(arguments[0]).column;
- ColumnPtr needles_ptr = block.get_by_position(arguments[1]).column;
+ auto haystack_column = block.get_by_position(arguments[0]).column;
+ auto haystack_ptr = haystack_column;
+
+ auto needles_column = block.get_by_position(arguments[1]).column;
+ auto needles_ptr = needles_column;
+
+ bool haystack_nullable = false;
+ bool needles_nullable = false;
+
+ if (haystack_column->is_nullable()) {
+ haystack_ptr =
check_and_get_column<ColumnNullable>(haystack_column.get())
+ ->get_nested_column_ptr();
+ haystack_nullable = true;
+ }
+
+ if (needles_column->is_nullable()) {
+ needles_ptr =
check_and_get_column<ColumnNullable>(needles_column.get())
+ ->get_nested_column_ptr();
+ needles_nullable = true;
+ }
const ColumnString* col_haystack_vector =
check_and_get_column<ColumnString>(&*haystack_ptr);
@@ -122,6 +142,30 @@ public:
return status;
}
+ if (haystack_nullable) {
+ auto column_nullable =
check_and_get_column<ColumnNullable>(haystack_column.get());
+ auto& null_map = column_nullable->get_null_map_data();
+ for (size_t i = 0; i != input_rows_count; ++i) {
+ if (null_map[i] == 1) {
+ for (size_t offset = offsets_res[i - 1]; offset !=
offsets_res[i]; ++offset) {
+ vec_res[offset] = 0;
+ }
+ }
+ }
+ }
+
+ if (needles_nullable) {
+ auto column_nullable =
check_and_get_column<ColumnNullable>(needles_column.get());
+ auto& null_map = column_nullable->get_null_map_data();
+ for (size_t i = 0; i != input_rows_count; ++i) {
+ if (null_map[i] == 1) {
+ for (size_t offset = offsets_res[i - 1]; offset !=
offsets_res[i]; ++offset) {
+ vec_res[offset] = 0;
+ }
+ }
+ }
+ }
+
auto nullable_col =
ColumnNullable::create(std::move(col_res),
ColumnUInt8::create(col_res->size(), 0));
block.get_by_position(result).column =
@@ -151,6 +195,9 @@ public:
std::vector<SingleSearcher> searchers;
searchers.reserve(needles_size);
for (const auto& needle : needles_arr) {
+ if (needle.get_type() != Field::Types::String) {
+ return Status::InvalidArgument("invalid type of needle {}",
needle.get_type_name());
+ }
searchers.emplace_back(needle.get<StringRef>().data,
needle.get<StringRef>().size);
}
diff --git a/be/src/vec/functions/functions_multi_string_search.cpp
b/be/src/vec/functions/functions_multi_string_search.cpp
index 5c19fe6c8b..0791a96125 100644
--- a/be/src/vec/functions/functions_multi_string_search.cpp
+++ b/be/src/vec/functions/functions_multi_string_search.cpp
@@ -71,14 +71,34 @@ public:
bool use_default_implementation_for_constants() const override { return
true; }
+ bool use_default_implementation_for_nulls() const override { return false;
}
+
DataTypePtr get_return_type_impl(const DataTypes& arguments) const
override {
return Impl::get_return_type();
}
Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
- ColumnPtr haystack_ptr = block.get_by_position(arguments[0]).column;
- ColumnPtr needles_ptr = block.get_by_position(arguments[1]).column;
+ auto haystack_column = block.get_by_position(arguments[0]).column;
+ auto haystack_ptr = haystack_column;
+
+ auto needles_column = block.get_by_position(arguments[1]).column;
+ auto needles_ptr = needles_column;
+
+ bool haystack_nullable = false;
+ bool needles_nullable = false;
+
+ if (haystack_column->is_nullable()) {
+ haystack_ptr =
check_and_get_column<ColumnNullable>(haystack_column.get())
+ ->get_nested_column_ptr();
+ haystack_nullable = true;
+ }
+
+ if (needles_column->is_nullable()) {
+ needles_ptr =
check_and_get_column<ColumnNullable>(needles_column.get())
+ ->get_nested_column_ptr();
+ needles_nullable = true;
+ }
const ColumnString* col_haystack_vector =
check_and_get_column<ColumnString>(&*haystack_ptr);
@@ -104,24 +124,44 @@ public:
auto& offsets_res = col_offsets->get_data();
Status status;
- if (col_needles_const)
+ if (col_needles_const) {
status = Impl::vector_constant(
col_haystack_vector->get_chars(),
col_haystack_vector->get_offsets(),
col_needles_const->get_value<Array>(), vec_res,
offsets_res, allow_hyperscan_,
max_hyperscan_regexp_length_,
max_hyperscan_regexp_total_length_);
- else
+ } else {
status = Impl::vector_vector(
col_haystack_vector->get_chars(),
col_haystack_vector->get_offsets(),
col_needles_vector->get_data(),
col_needles_vector->get_offsets(), vec_res,
offsets_res, allow_hyperscan_,
max_hyperscan_regexp_length_,
max_hyperscan_regexp_total_length_);
- if (!status.ok()) return status;
+ }
+
+ if (!status.ok()) {
+ return status;
+ }
- if constexpr (Impl::is_column_array)
- block.get_by_position(result).column =
- ColumnArray::create(std::move(col_res),
std::move(col_offsets));
- else
- block.replace_by_position(result, std::move(col_res));
+ if (haystack_nullable) {
+ auto column_nullable =
check_and_get_column<ColumnNullable>(haystack_column.get());
+ auto& null_map = column_nullable->get_null_map_data();
+ for (size_t i = 0; i != input_rows_count; ++i) {
+ if (null_map[i] == 1) {
+ vec_res[i] = 0;
+ }
+ }
+ }
+
+ if (needles_nullable) {
+ auto column_nullable =
check_and_get_column<ColumnNullable>(needles_column.get());
+ auto& null_map = column_nullable->get_null_map_data();
+ for (size_t i = 0; i != input_rows_count; ++i) {
+ if (null_map[i] == 1) {
+ vec_res[i] = 0;
+ }
+ }
+ }
+
+ block.replace_by_position(result, std::move(col_res));
return status;
}
@@ -145,7 +185,6 @@ struct FunctionMultiMatchAnyImpl {
static constexpr bool FindAnyIndex = (Find ==
MultiMatchTraits::Find::AnyIndex);
static constexpr auto name = "multi_match_any";
- static constexpr bool is_column_array = false;
static auto get_return_type() { return
std::make_shared<DataTypeNumber<ResultType>>(); }
@@ -231,6 +270,10 @@ struct FunctionMultiMatchAnyImpl {
->get_nested_column();
const ColumnString* needles_data_string =
check_and_get_column<ColumnString>(nested_column);
+ if (!needles_data_string) {
+ return Status::InvalidArgument("needles should be string");
+ }
+
std::vector<StringRef> needles;
for (size_t i = 0; i < haystack_offsets.size(); ++i) {
needles.reserve(needles_offsets[i] - prev_needles_offset);
diff --git
a/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_position.out
b/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_position.out
index 017fa620c4..f408d2d543 100644
---
a/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_position.out
+++
b/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_position.out
@@ -5,6 +5,9 @@
[1, 13, 8, 0, 0]
[1, 13, 8, 0, 0]
[0, 6, 0, 0, 0]
+[0, 0, 0, 0, 0]
+[0, 0, 0, 0, 0]
+[0, 0, 0, 0, 0]
-- !table_select2 --
[0, 0]
@@ -12,6 +15,9 @@
[0, 8]
[1, 8, 0, 13]
[1, 1, 4, 0]
+[]
+[]
+[0, 0, 0]
-- !select1 --
[4, 1, 1, 2, 6, 1, 1, 0, 4, 1, 14, 0, 10, 0, 16, 6]
diff --git
a/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_search.out
b/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_search.out
index 2eef34a650..efaec678d0 100644
---
a/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_search.out
+++
b/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_search.out
@@ -1,45 +1,87 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select1 --
+1
+1
+1
+1
+1
+0
+0
+0
+
+-- !select2 --
+0
+1
+1
+1
+1
+0
+0
+0
+
-- !select --
0
+
-- !select --
0
+
-- !select --
0
+
-- !select --
1
+
-- !select --
0
+
-- !select --
0
+
-- !select --
0
+
-- !select --
1
+
-- !select --
1
+
-- !select --
1
+
-- !select --
0
+
-- !select --
1
+
-- !select --
0
+
-- !select --
1
+
-- !select --
1
+
-- !select --
1
+
-- !select --
0
+
-- !select --
0
+
-- !select --
0
+
-- !select --
1
+
-- !select --
1
+
-- !select --
-1
\ No newline at end of file
+1
+
diff --git
a/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy
b/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy
index fa3ec92b66..f6a8aa110e 100644
---
a/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy
+++
b/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy
@@ -16,14 +16,14 @@
// under the License.
suite("test_multi_string_position") {
- def table_name = "strings"
+ def table_name = "test_multi_string_position_strings"
sql """ DROP TABLE IF EXISTS ${table_name} """
sql """ CREATE TABLE IF NOT EXISTS ${table_name}
(
`col1` INT NOT NULL,
- `content` TEXT NOT NULL,
- `mode` ARRAY<TEXT> NOT NULL
+ `content` TEXT NULL,
+ `mode` ARRAY<TEXT> NULL
) ENGINE=OLAP
DUPLICATE KEY(`col1`)
COMMENT 'OLAP'
@@ -40,7 +40,10 @@ suite("test_multi_string_position") {
(2, 'Hello, World!', ['hello', 'world', 'Hello', '!'] ),
(3, 'hello, world!', ['Hello', 'world'] ),
(4, 'hello, world!', ['hello', 'world', 'Hello', '!'] ),
- (5, 'HHHHW!', ['H', 'HHHH', 'HW', 'WH'] );
+ (5, 'HHHHW!', ['H', 'HHHH', 'HW', 'WH'] ),
+ (6, 'abc', null),
+ (7, null, null),
+ (8, null, ['a', 'b', 'c']);
"""
qt_table_select1 "select multi_search_all_positions(content, ['hello',
'!', 'world', 'Hello', 'World']) from ${table_name} order by col1"
diff --git
a/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_search.groovy
b/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_search.groovy
index e258ab7fc8..5a3229ce36 100644
---
a/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_search.groovy
+++
b/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_search.groovy
@@ -16,6 +16,39 @@
// under the License.
suite("test_multi_string_search") {
+ def table_name = "test_multi_string_search_strings"
+
+ sql """ DROP TABLE IF EXISTS ${table_name} """
+ sql """ CREATE TABLE IF NOT EXISTS ${table_name}
+ (
+ `col1` INT NOT NULL,
+ `content` TEXT NULL,
+ `mode` ARRAY<TEXT> NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`col1`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`col1`) BUCKETS 3
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "in_memory" = "false",
+ "storage_format" = "V2"
+ );
+ """
+
+ sql """ INSERT INTO ${table_name} (col1, content, mode) VALUES
+ (1, 'Hello, World!', ['hello', 'world'] ),
+ (2, 'Hello, World!', ['hello', 'world', 'Hello', '!'] ),
+ (3, 'hello, world!', ['Hello', 'world'] ),
+ (4, 'hello, world!', ['hello', 'world', 'Hello', '!'] ),
+ (5, 'HHHHW!', ['H', 'HHHH', 'HW', 'WH'] ),
+ (6, 'abc', null),
+ (7, null, null),
+ (8, null, ['a', 'b', 'c']);
+ """
+
+ qt_select1 "select multi_match_any(content, ['hello', '!', 'world',
'Hello', 'World']) from ${table_name} order by col1"
+ qt_select2 "select multi_match_any(content, mode) from ${table_name} order
by col1"
+
qt_select "select multi_match_any('mpnsguhwsitzvuleiwebwjfitmsg',
['wbirxqoabpblrnvvmjizj', 'cfcxhuvrexyzyjsh', 'oldhtubemyuqlqbwvwwkwin',
'bumoozxdkjglzu', 'intxlfohlxmajjomw', 'dxkeghohv', 'arsvmwwkjeopnlwnan',
'ouugllgowpqtaxslcopkytbfhifaxbgt', 'hkedmjlbcrzvryaopjqdjjc',
'tbqkljywstuahzh', 'o', 'wowoclosyfcuwotmvjygzuzhrery', 'vpefjiffkhlggntcu',
'ytdixvasrorhripzfhjdmlhqksmctyycwp'])"
qt_select "select multi_match_any('qjjzqexjpgkglgxpzrbqbnskq',
['vaiatcjacmlffdzsejpdareqzy', 'xspcfzdufkmecud', 'bcvtbuqtctq',
'nkcopwbfytgemkqcfnnno', 'dylxnzuyhq', 'tno', 'scukuhufly', 'cdyquzuqlptv',
'ohluyfeksyxepezdhqmtfmgkvzsyph', 'ualzwtahvqvtijwp', 'jg',
'gwbawqlngzcknzgtmlj', 'qimvjcgbkkp', 'eaedbcgyrdvv', 'qcwrncjoewwedyyewcdkh',
'uqcvhngoqngmitjfxpznqomertqnqcveoqk', 'ydrgjiankgygpm', 'axepgap'])"
qt_select "select multi_match_any('fdkmtqmxnegwvnjhghjq',
['vynkybvdmhgeezybbdqfrukibisj', 'knazzamgjjpavwhvdkwigykh',
'peumnifrmdhhmrqqnemw', 'lmsnyvqoisinlaqobxojlwfbi', 'oqwfzs',
'dymudxxeodwjpgbibnkvr', 'vomtfsnizkplgzktqyoiw', 'yoyfuhlpgrzds', 'cefao',
'gi', 'srpgxfjwl', 'etsjusdeiwbfe', 'ikvtzdopxo', 'ljfkavrau',
'soqdhxtenfrkmeic', 'ktprjwfcelzbup', 'pcvuoddqwsaurcqdtjfnczekwni',
'agkqkqxkfbkfgyqliahsljim'])"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]