This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 401750047cd branch-3.0: [fix](function) Undefined behavior in 
parse_url #49149 (#49200)
401750047cd is described below

commit 401750047cda173f8bcfe061e71b25f907668717
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Mar 20 14:50:45 2025 +0800

    branch-3.0: [fix](function) Undefined behavior in parse_url #49149 (#49200)
    
    Cherry-picked from #49149
    
    Co-authored-by: Jerry Hu <[email protected]>
---
 be/src/vec/functions/function_string.h             |   6 +++++-
 be/test/vec/function/function_string_test.cpp      |   3 ++-
 .../data/function_p0/test_function_string.out      | Bin 121 -> 188 bytes
 .../suites/function_p0/test_function_string.groovy |  24 +++++++++++++++++++++
 4 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/be/src/vec/functions/function_string.h 
b/be/src/vec/functions/function_string.h
index c71920c137f..db578ea0570 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -122,6 +122,7 @@ struct StringOP {
 
     static void push_value_string(const std::string_view& string_value, int 
index,
                                   ColumnString::Chars& chars, 
ColumnString::Offsets& offsets) {
+        DCHECK(string_value.data() != nullptr);
         ColumnString::check_chars_length(chars.size() + string_value.size(), 
offsets.size());
 
         chars.insert(string_value.data(), string_value.data() + 
string_value.size());
@@ -2684,11 +2685,14 @@ public:
             StringRef url_val = 
url_col->get_data_at(index_check_const<url_const>(i));
             StringRef parse_res;
             if (UrlParser::parse_url(url_val, url_part, &parse_res)) {
+                if (parse_res.empty()) [[unlikely]] {
+                    StringOP::push_empty_string(i, res_chars, res_offsets);
+                    continue;
+                }
                 StringOP::push_value_string(std::string_view(parse_res.data, 
parse_res.size), i,
                                             res_chars, res_offsets);
             } else {
                 StringOP::push_null_string(i, res_chars, res_offsets, 
null_map_data);
-                continue;
             }
         }
         return Status::OK();
diff --git a/be/test/vec/function/function_string_test.cpp 
b/be/test/vec/function/function_string_test.cpp
index f4381505276..687def9d4a5 100644
--- a/be/test/vec/function/function_string_test.cpp
+++ b/be/test/vec/function/function_string_test.cpp
@@ -2284,7 +2284,8 @@ TEST(function_string_test, function_parse_url_test) {
                 {{std::string(
                           
"https://www.facebook.com/aa/bb?returnpage=https://www.facebook.com/";),
                   std::string("HosT")},
-                 std::string("www.facebook.com")}};
+                 std::string("www.facebook.com")},
+                {{std::string("http://www.baidu.com";), std::string("FILE")}, 
{std::string("")}}};
 
         check_function_all_arg_comb<DataTypeString, true>(func_name, 
input_types, data_set);
     }
diff --git a/regression-test/data/function_p0/test_function_string.out 
b/regression-test/data/function_p0/test_function_string.out
index 226d3e675f3..6524bb82fc0 100644
Binary files a/regression-test/data/function_p0/test_function_string.out and 
b/regression-test/data/function_p0/test_function_string.out differ
diff --git a/regression-test/suites/function_p0/test_function_string.groovy 
b/regression-test/suites/function_p0/test_function_string.groovy
index 5aa46fb6c52..28e4d832336 100644
--- a/regression-test/suites/function_p0/test_function_string.groovy
+++ b/regression-test/suites/function_p0/test_function_string.groovy
@@ -47,4 +47,28 @@ suite("test_function_string") {
         drop table if exists test_tb_function_space;
     """
 
+
+    sql """
+        drop table if exists test_parse_url;
+    """
+
+    sql """
+     CREATE TABLE `test_parse_url` (
+        `id` int NULL,
+        `url` text NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        DISTRIBUTED BY RANDOM BUCKETS AUTO
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+        );
+    """
+
+    sql """
+        insert into test_parse_url values (1, 'http://www.facebook.com'), (2, 
"http://www.google.com/test?name=abc&age=20";);
+    """
+
+    qt_sql """
+        select parse_url(url, 'HOST') as host, parse_url(url, 'FILE') as file 
from test_parse_url order by id;
+    """
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to