This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 401750047cd branch-3.0: [fix](function) Undefined behavior in
parse_url #49149 (#49200)
401750047cd is described below
commit 401750047cda173f8bcfe061e71b25f907668717
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Mar 20 14:50:45 2025 +0800
branch-3.0: [fix](function) Undefined behavior in parse_url #49149 (#49200)
Cherry-picked from #49149
Co-authored-by: Jerry Hu <[email protected]>
---
be/src/vec/functions/function_string.h | 6 +++++-
be/test/vec/function/function_string_test.cpp | 3 ++-
.../data/function_p0/test_function_string.out | Bin 121 -> 188 bytes
.../suites/function_p0/test_function_string.groovy | 24 +++++++++++++++++++++
4 files changed, 31 insertions(+), 2 deletions(-)
diff --git a/be/src/vec/functions/function_string.h
b/be/src/vec/functions/function_string.h
index c71920c137f..db578ea0570 100644
--- a/be/src/vec/functions/function_string.h
+++ b/be/src/vec/functions/function_string.h
@@ -122,6 +122,7 @@ struct StringOP {
static void push_value_string(const std::string_view& string_value, int
index,
ColumnString::Chars& chars,
ColumnString::Offsets& offsets) {
+ DCHECK(string_value.data() != nullptr);
ColumnString::check_chars_length(chars.size() + string_value.size(),
offsets.size());
chars.insert(string_value.data(), string_value.data() +
string_value.size());
@@ -2684,11 +2685,14 @@ public:
StringRef url_val =
url_col->get_data_at(index_check_const<url_const>(i));
StringRef parse_res;
if (UrlParser::parse_url(url_val, url_part, &parse_res)) {
+ if (parse_res.empty()) [[unlikely]] {
+ StringOP::push_empty_string(i, res_chars, res_offsets);
+ continue;
+ }
StringOP::push_value_string(std::string_view(parse_res.data,
parse_res.size), i,
res_chars, res_offsets);
} else {
StringOP::push_null_string(i, res_chars, res_offsets,
null_map_data);
- continue;
}
}
return Status::OK();
diff --git a/be/test/vec/function/function_string_test.cpp
b/be/test/vec/function/function_string_test.cpp
index f4381505276..687def9d4a5 100644
--- a/be/test/vec/function/function_string_test.cpp
+++ b/be/test/vec/function/function_string_test.cpp
@@ -2284,7 +2284,8 @@ TEST(function_string_test, function_parse_url_test) {
{{std::string(
"https://www.facebook.com/aa/bb?returnpage=https://www.facebook.com/"),
std::string("HosT")},
- std::string("www.facebook.com")}};
+ std::string("www.facebook.com")},
+ {{std::string("http://www.baidu.com"), std::string("FILE")},
{std::string("")}}};
check_function_all_arg_comb<DataTypeString, true>(func_name,
input_types, data_set);
}
diff --git a/regression-test/data/function_p0/test_function_string.out
b/regression-test/data/function_p0/test_function_string.out
index 226d3e675f3..6524bb82fc0 100644
Binary files a/regression-test/data/function_p0/test_function_string.out and
b/regression-test/data/function_p0/test_function_string.out differ
diff --git a/regression-test/suites/function_p0/test_function_string.groovy
b/regression-test/suites/function_p0/test_function_string.groovy
index 5aa46fb6c52..28e4d832336 100644
--- a/regression-test/suites/function_p0/test_function_string.groovy
+++ b/regression-test/suites/function_p0/test_function_string.groovy
@@ -47,4 +47,28 @@ suite("test_function_string") {
drop table if exists test_tb_function_space;
"""
+
+ sql """
+ drop table if exists test_parse_url;
+ """
+
+ sql """
+ CREATE TABLE `test_parse_url` (
+ `id` int NULL,
+ `url` text NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY RANDOM BUCKETS AUTO
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+
+ sql """
+ insert into test_parse_url values (1, 'http://www.facebook.com'), (2,
"http://www.google.com/test?name=abc&age=20");
+ """
+
+ qt_sql """
+ select parse_url(url, 'HOST') as host, parse_url(url, 'FILE') as file
from test_parse_url order by id;
+ """
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]