This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 47edc5a06e [fix](functions) Support nullable column for multi_string 
functions (#19498)
47edc5a06e is described below

commit 47edc5a06ef09744f8019fdc39aa6b5c44d0206d
Author: Jerry Hu <[email protected]>
AuthorDate: Thu May 11 01:13:13 2023 +0800

    [fix](functions) Support nullable column for multi_string functions (#19498)
---
 .../functions/functions_multi_string_position.cpp  | 51 ++++++++++++++++-
 .../functions/functions_multi_string_search.cpp    | 65 ++++++++++++++++++----
 .../test_multi_string_position.out                 |  6 ++
 .../search_functions/test_multi_string_search.out  | 44 ++++++++++++++-
 .../test_multi_string_position.groovy              | 11 ++--
 .../test_multi_string_search.groovy                | 33 +++++++++++
 6 files changed, 192 insertions(+), 18 deletions(-)

diff --git a/be/src/vec/functions/functions_multi_string_position.cpp 
b/be/src/vec/functions/functions_multi_string_position.cpp
index 062f34f995..3db079efad 100644
--- a/be/src/vec/functions/functions_multi_string_position.cpp
+++ b/be/src/vec/functions/functions_multi_string_position.cpp
@@ -73,14 +73,34 @@ public:
 
     bool use_default_implementation_for_constants() const override { return 
true; }
 
+    bool use_default_implementation_for_nulls() const override { return false; 
}
+
     DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
         return 
std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeInt32>()));
     }
 
     Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
                         size_t result, size_t input_rows_count) override {
-        ColumnPtr haystack_ptr = block.get_by_position(arguments[0]).column;
-        ColumnPtr needles_ptr = block.get_by_position(arguments[1]).column;
+        auto haystack_column = block.get_by_position(arguments[0]).column;
+        auto haystack_ptr = haystack_column;
+
+        auto needles_column = block.get_by_position(arguments[1]).column;
+        auto needles_ptr = needles_column;
+
+        bool haystack_nullable = false;
+        bool needles_nullable = false;
+
+        if (haystack_column->is_nullable()) {
+            haystack_ptr = 
check_and_get_column<ColumnNullable>(haystack_column.get())
+                                   ->get_nested_column_ptr();
+            haystack_nullable = true;
+        }
+
+        if (needles_column->is_nullable()) {
+            needles_ptr = 
check_and_get_column<ColumnNullable>(needles_column.get())
+                                  ->get_nested_column_ptr();
+            needles_nullable = true;
+        }
 
         const ColumnString* col_haystack_vector =
                 check_and_get_column<ColumnString>(&*haystack_ptr);
@@ -122,6 +142,30 @@ public:
             return status;
         }
 
+        if (haystack_nullable) {
+            auto column_nullable = 
check_and_get_column<ColumnNullable>(haystack_column.get());
+            auto& null_map = column_nullable->get_null_map_data();
+            for (size_t i = 0; i != input_rows_count; ++i) {
+                if (null_map[i] == 1) {
+                    for (size_t offset = offsets_res[i - 1]; offset != 
offsets_res[i]; ++offset) {
+                        vec_res[offset] = 0;
+                    }
+                }
+            }
+        }
+
+        if (needles_nullable) {
+            auto column_nullable = 
check_and_get_column<ColumnNullable>(needles_column.get());
+            auto& null_map = column_nullable->get_null_map_data();
+            for (size_t i = 0; i != input_rows_count; ++i) {
+                if (null_map[i] == 1) {
+                    for (size_t offset = offsets_res[i - 1]; offset != 
offsets_res[i]; ++offset) {
+                        vec_res[offset] = 0;
+                    }
+                }
+            }
+        }
+
         auto nullable_col =
                 ColumnNullable::create(std::move(col_res), 
ColumnUInt8::create(col_res->size(), 0));
         block.get_by_position(result).column =
@@ -151,6 +195,9 @@ public:
         std::vector<SingleSearcher> searchers;
         searchers.reserve(needles_size);
         for (const auto& needle : needles_arr) {
+            if (needle.get_type() != Field::Types::String) {
+                return Status::InvalidArgument("invalid type of needle {}", 
needle.get_type_name());
+            }
             searchers.emplace_back(needle.get<StringRef>().data, 
needle.get<StringRef>().size);
         }
 
diff --git a/be/src/vec/functions/functions_multi_string_search.cpp 
b/be/src/vec/functions/functions_multi_string_search.cpp
index 5c19fe6c8b..0791a96125 100644
--- a/be/src/vec/functions/functions_multi_string_search.cpp
+++ b/be/src/vec/functions/functions_multi_string_search.cpp
@@ -71,14 +71,34 @@ public:
 
     bool use_default_implementation_for_constants() const override { return 
true; }
 
+    bool use_default_implementation_for_nulls() const override { return false; 
}
+
     DataTypePtr get_return_type_impl(const DataTypes& arguments) const 
override {
         return Impl::get_return_type();
     }
 
     Status execute_impl(FunctionContext* context, Block& block, const 
ColumnNumbers& arguments,
                         size_t result, size_t input_rows_count) override {
-        ColumnPtr haystack_ptr = block.get_by_position(arguments[0]).column;
-        ColumnPtr needles_ptr = block.get_by_position(arguments[1]).column;
+        auto haystack_column = block.get_by_position(arguments[0]).column;
+        auto haystack_ptr = haystack_column;
+
+        auto needles_column = block.get_by_position(arguments[1]).column;
+        auto needles_ptr = needles_column;
+
+        bool haystack_nullable = false;
+        bool needles_nullable = false;
+
+        if (haystack_column->is_nullable()) {
+            haystack_ptr = 
check_and_get_column<ColumnNullable>(haystack_column.get())
+                                   ->get_nested_column_ptr();
+            haystack_nullable = true;
+        }
+
+        if (needles_column->is_nullable()) {
+            needles_ptr = 
check_and_get_column<ColumnNullable>(needles_column.get())
+                                  ->get_nested_column_ptr();
+            needles_nullable = true;
+        }
 
         const ColumnString* col_haystack_vector =
                 check_and_get_column<ColumnString>(&*haystack_ptr);
@@ -104,24 +124,44 @@ public:
         auto& offsets_res = col_offsets->get_data();
 
         Status status;
-        if (col_needles_const)
+        if (col_needles_const) {
             status = Impl::vector_constant(
                     col_haystack_vector->get_chars(), 
col_haystack_vector->get_offsets(),
                     col_needles_const->get_value<Array>(), vec_res, 
offsets_res, allow_hyperscan_,
                     max_hyperscan_regexp_length_, 
max_hyperscan_regexp_total_length_);
-        else
+        } else {
             status = Impl::vector_vector(
                     col_haystack_vector->get_chars(), 
col_haystack_vector->get_offsets(),
                     col_needles_vector->get_data(), 
col_needles_vector->get_offsets(), vec_res,
                     offsets_res, allow_hyperscan_, 
max_hyperscan_regexp_length_,
                     max_hyperscan_regexp_total_length_);
-        if (!status.ok()) return status;
+        }
+
+        if (!status.ok()) {
+            return status;
+        }
 
-        if constexpr (Impl::is_column_array)
-            block.get_by_position(result).column =
-                    ColumnArray::create(std::move(col_res), 
std::move(col_offsets));
-        else
-            block.replace_by_position(result, std::move(col_res));
+        if (haystack_nullable) {
+            auto column_nullable = 
check_and_get_column<ColumnNullable>(haystack_column.get());
+            auto& null_map = column_nullable->get_null_map_data();
+            for (size_t i = 0; i != input_rows_count; ++i) {
+                if (null_map[i] == 1) {
+                    vec_res[i] = 0;
+                }
+            }
+        }
+
+        if (needles_nullable) {
+            auto column_nullable = 
check_and_get_column<ColumnNullable>(needles_column.get());
+            auto& null_map = column_nullable->get_null_map_data();
+            for (size_t i = 0; i != input_rows_count; ++i) {
+                if (null_map[i] == 1) {
+                    vec_res[i] = 0;
+                }
+            }
+        }
+
+        block.replace_by_position(result, std::move(col_res));
 
         return status;
     }
@@ -145,7 +185,6 @@ struct FunctionMultiMatchAnyImpl {
     static constexpr bool FindAnyIndex = (Find == 
MultiMatchTraits::Find::AnyIndex);
 
     static constexpr auto name = "multi_match_any";
-    static constexpr bool is_column_array = false;
 
     static auto get_return_type() { return 
std::make_shared<DataTypeNumber<ResultType>>(); }
 
@@ -231,6 +270,10 @@ struct FunctionMultiMatchAnyImpl {
                         ->get_nested_column();
         const ColumnString* needles_data_string = 
check_and_get_column<ColumnString>(nested_column);
 
+        if (!needles_data_string) {
+            return Status::InvalidArgument("needles should be string");
+        }
+
         std::vector<StringRef> needles;
         for (size_t i = 0; i < haystack_offsets.size(); ++i) {
             needles.reserve(needles_offsets[i] - prev_needles_offset);
diff --git 
a/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_position.out
 
b/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_position.out
index 017fa620c4..f408d2d543 100644
--- 
a/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_position.out
+++ 
b/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_position.out
@@ -5,6 +5,9 @@
 [1, 13, 8, 0, 0]
 [1, 13, 8, 0, 0]
 [0, 6, 0, 0, 0]
+[0, 0, 0, 0, 0]
+[0, 0, 0, 0, 0]
+[0, 0, 0, 0, 0]
 
 -- !table_select2 --
 [0, 0]
@@ -12,6 +15,9 @@
 [0, 8]
 [1, 8, 0, 13]
 [1, 1, 4, 0]
+[]
+[]
+[0, 0, 0]
 
 -- !select1 --
 [4, 1, 1, 2, 6, 1, 1, 0, 4, 1, 14, 0, 10, 0, 16, 6]
diff --git 
a/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_search.out
 
b/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_search.out
index 2eef34a650..efaec678d0 100644
--- 
a/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_search.out
+++ 
b/regression-test/data/query_p0/sql_functions/search_functions/test_multi_string_search.out
@@ -1,45 +1,87 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select1 --
+1
+1
+1
+1
+1
+0
+0
+0
+
+-- !select2 --
+0
+1
+1
+1
+1
+0
+0
+0
+
 -- !select --
 0
+
 -- !select --
 0
+
 -- !select --
 0
+
 -- !select --
 1
+
 -- !select --
 0
+
 -- !select --
 0
+
 -- !select --
 0
+
 -- !select --
 1
+
 -- !select --
 1
+
 -- !select --
 1
+
 -- !select --
 0
+
 -- !select --
 1
+
 -- !select --
 0
+
 -- !select --
 1
+
 -- !select --
 1
+
 -- !select --
 1
+
 -- !select --
 0
+
 -- !select --
 0
+
 -- !select --
 0
+
 -- !select --
 1
+
 -- !select --
 1
+
 -- !select --
-1
\ No newline at end of file
+1
+
diff --git 
a/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy
 
b/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy
index fa3ec92b66..f6a8aa110e 100644
--- 
a/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_position.groovy
@@ -16,14 +16,14 @@
 // under the License.
 
 suite("test_multi_string_position") {
-    def table_name = "strings"
+    def table_name = "test_multi_string_position_strings"
 
     sql """ DROP TABLE IF EXISTS ${table_name} """
     sql """ CREATE TABLE IF NOT EXISTS ${table_name}
             (
                 `col1`      INT NOT NULL,
-                `content`   TEXT NOT NULL,
-                `mode`      ARRAY<TEXT> NOT NULL
+                `content`   TEXT NULL,
+                `mode`      ARRAY<TEXT> NULL
             ) ENGINE=OLAP
             DUPLICATE KEY(`col1`)
             COMMENT 'OLAP'
@@ -40,7 +40,10 @@ suite("test_multi_string_position") {
             (2, 'Hello, World!', ['hello', 'world', 'Hello', '!'] ),
             (3, 'hello, world!', ['Hello', 'world'] ),
             (4, 'hello, world!', ['hello', 'world', 'Hello', '!'] ),
-            (5, 'HHHHW!', ['H', 'HHHH', 'HW', 'WH'] );
+            (5, 'HHHHW!', ['H', 'HHHH', 'HW', 'WH'] ),
+            (6, 'abc', null),
+            (7, null, null),
+            (8, null, ['a', 'b', 'c']);
         """
 
     qt_table_select1 "select multi_search_all_positions(content, ['hello', 
'!', 'world', 'Hello', 'World']) from ${table_name} order by col1"
diff --git 
a/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_search.groovy
 
b/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_search.groovy
index e258ab7fc8..5a3229ce36 100644
--- 
a/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_search.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/search_functions/test_multi_string_search.groovy
@@ -16,6 +16,39 @@
 // under the License.
 
 suite("test_multi_string_search") {
+    def table_name = "test_multi_string_search_strings"
+
+    sql """ DROP TABLE IF EXISTS ${table_name} """
+    sql """ CREATE TABLE IF NOT EXISTS ${table_name}
+            (
+                `col1`      INT NOT NULL,
+                `content`   TEXT NULL,
+                `mode`      ARRAY<TEXT> NULL
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`col1`)
+            COMMENT 'OLAP'
+            DISTRIBUTED BY HASH(`col1`) BUCKETS 3
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "in_memory" = "false",
+            "storage_format" = "V2"
+            );
+        """
+
+    sql """ INSERT INTO ${table_name} (col1, content, mode) VALUES
+            (1, 'Hello, World!', ['hello', 'world'] ),
+            (2, 'Hello, World!', ['hello', 'world', 'Hello', '!'] ),
+            (3, 'hello, world!', ['Hello', 'world'] ),
+            (4, 'hello, world!', ['hello', 'world', 'Hello', '!'] ),
+            (5, 'HHHHW!', ['H', 'HHHH', 'HW', 'WH'] ),
+            (6, 'abc', null),
+            (7, null, null),
+            (8, null, ['a', 'b', 'c']);
+        """
+
+    qt_select1 "select multi_match_any(content, ['hello', '!', 'world', 
'Hello', 'World']) from ${table_name} order by col1"
+    qt_select2 "select multi_match_any(content, mode) from ${table_name} order 
by col1"
+
     qt_select "select multi_match_any('mpnsguhwsitzvuleiwebwjfitmsg', 
['wbirxqoabpblrnvvmjizj', 'cfcxhuvrexyzyjsh', 'oldhtubemyuqlqbwvwwkwin', 
'bumoozxdkjglzu', 'intxlfohlxmajjomw', 'dxkeghohv', 'arsvmwwkjeopnlwnan', 
'ouugllgowpqtaxslcopkytbfhifaxbgt', 'hkedmjlbcrzvryaopjqdjjc', 
'tbqkljywstuahzh', 'o', 'wowoclosyfcuwotmvjygzuzhrery', 'vpefjiffkhlggntcu', 
'ytdixvasrorhripzfhjdmlhqksmctyycwp'])"
     qt_select "select multi_match_any('qjjzqexjpgkglgxpzrbqbnskq', 
['vaiatcjacmlffdzsejpdareqzy', 'xspcfzdufkmecud', 'bcvtbuqtctq', 
'nkcopwbfytgemkqcfnnno', 'dylxnzuyhq', 'tno', 'scukuhufly', 'cdyquzuqlptv', 
'ohluyfeksyxepezdhqmtfmgkvzsyph', 'ualzwtahvqvtijwp', 'jg', 
'gwbawqlngzcknzgtmlj', 'qimvjcgbkkp', 'eaedbcgyrdvv', 'qcwrncjoewwedyyewcdkh', 
'uqcvhngoqngmitjfxpznqomertqnqcveoqk', 'ydrgjiankgygpm', 'axepgap'])"
     qt_select "select multi_match_any('fdkmtqmxnegwvnjhghjq', 
['vynkybvdmhgeezybbdqfrukibisj', 'knazzamgjjpavwhvdkwigykh', 
'peumnifrmdhhmrqqnemw', 'lmsnyvqoisinlaqobxojlwfbi', 'oqwfzs', 
'dymudxxeodwjpgbibnkvr', 'vomtfsnizkplgzktqyoiw', 'yoyfuhlpgrzds', 'cefao', 
'gi', 'srpgxfjwl', 'etsjusdeiwbfe', 'ikvtzdopxo', 'ljfkavrau', 
'soqdhxtenfrkmeic', 'ktprjwfcelzbup', 'pcvuoddqwsaurcqdtjfnczekwni', 
'agkqkqxkfbkfgyqliahsljim'])"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to