This is an automated email from the ASF dual-hosted git repository.
taiyangli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 9bb48b2367 Fix diff on parse_url and refactor SparkParseURL (#9179)
9bb48b2367 is described below
commit 9bb48b2367bee2b502c2d11846808536c6a2ba50
Author: kevinyhzou <[email protected]>
AuthorDate: Wed Apr 9 09:59:33 2025 +0800
Fix diff on parse_url and refactor SparkParseURL (#9179)
---
.../GlutenClickHouseTPCHSaltNullParquetSuite.scala | 12 +
cpp-ch/local-engine/Functions/SparkParseURL.cpp | 398 +++++++--------------
.../utils/clickhouse/ClickHouseTestSettings.scala | 1 -
.../utils/clickhouse/ClickHouseTestSettings.scala | 1 -
.../utils/clickhouse/ClickHouseTestSettings.scala | 1 -
.../utils/clickhouse/ClickHouseTestSettings.scala | 1 -
6 files changed, 147 insertions(+), 267 deletions(-)
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
index 81aa9e8fc7..18db7e4070 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
@@ -3405,6 +3405,7 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends
GlutenClickHouseTPCHAbstr
}
assert(aggregates.size == 1)
}
+
val sql1 =
"""
|select t1.*, t2.* from nation as t1
@@ -3430,5 +3431,16 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends
GlutenClickHouseTPCHAbstr
compareResultsAgainstVanillaSpark(sql3, true, checkOnlyOneAggregate)
}
}
+
+ test("GLUTEN-9177: Fix diff of parse_url") {
+ val create_tbl_sql = "create table test_9177(id bigint, s string) using
parquet"
+ val insert_data_sql = "insert into test_9177 values(1,
'http://user:pass@locahost')," +
+ "(2, 'http://user:pass@localhost/a/b/c'), (3,
'http://user:pass@localhost:10010/a/b/c')"
+ val select_sql = "select id, parse_url(s, 'HOST') from test_9177"
+ spark.sql(create_tbl_sql)
+ spark.sql(insert_data_sql)
+ compareResultsAgainstVanillaSpark(select_sql, true, { _ => })
+ spark.sql("drop table test_9177")
+ }
}
// scalastyle:on line.size.limit
diff --git a/cpp-ch/local-engine/Functions/SparkParseURL.cpp
b/cpp-ch/local-engine/Functions/SparkParseURL.cpp
index 1e570d587c..97c177c3f2 100644
--- a/cpp-ch/local-engine/Functions/SparkParseURL.cpp
+++ b/cpp-ch/local-engine/Functions/SparkParseURL.cpp
@@ -19,11 +19,11 @@
#include <Columns/IColumn.h>
#include <DataTypes/DataTypeNullable.h>
#include <Functions/FunctionFactory.h>
-#include <Functions/FunctionStringToString.h>
#include <Functions/FunctionsStringSearchToString.h>
#include <Functions/IFunction.h>
#include <Functions/URL/domain.h>
#include <Poco/Logger.h>
+#include <Poco/URI.h>
#include <memory>
namespace DB
@@ -58,8 +58,17 @@ struct ExtractNullableSubstringImpl
for (size_t i = 0; i < size; ++i)
{
- Extractor::execute(reinterpret_cast<const char
*>(&data[prev_offset]), offsets[i] - prev_offset - 1, start, length);
-
+ String s(reinterpret_cast<const char *>(&data[prev_offset]),
offsets[i] - prev_offset - 1);
+ try
+ {
+ Poco::URI uri(s, false);
+ Extractor::execute(uri, s, start, length);
+ }
+ catch (const Poco::SyntaxException &)
+ {
+ start = nullptr;
+ length = 0;
+ }
res_data.resize_exact(res_data.size() + length + 1);
if (start)
{
@@ -176,11 +185,8 @@ public:
if (const DB::ColumnString * col =
DB::checkAndGetColumn<DB::ColumnString>(column.get()))
{
auto col_res = DB::ColumnString::create();
- auto null_map = DB::DataTypeUInt8().createColumn();
-
- DB::ColumnString::Chars & vec_res = col_res->getChars();
- DB::ColumnString::Offsets & offsets_res = col_res->getOffsets();
- Impl::vector(col->getChars(), col->getOffsets(),
col_needle->getValue<String>(), vec_res, offsets_res, *null_map);
+ auto null_map = DB::ColumnUInt8::create(col->size(), 0);
+ Impl::vector(*col, col_needle->getValue<String>(), *col_res,
*null_map);
return DB::ColumnNullable::create(std::move(col_res),
std::move(null_map));
}
@@ -198,45 +204,19 @@ struct NameSparkExtractURLQuery
struct SparkExtractURLQuery
{
- static size_t getReserveLengthForElement() { return 15; }
+ static size_t getReserveLengthForElement() { return 30; }
- static void execute(DB::Pos data, size_t size, DB::Pos & res_data, size_t
& res_size)
+ static void execute(const Poco::URI & uri, const String & data, DB::Pos &
res_data, size_t & res_size)
{
- res_data = data;
- res_size = 0;
- DB::Pos pos = data;
- DB::Pos end = data + size;
- const static String protocol_delim = "://";
- DB::Pos protocol_delim_pos = static_cast<DB::Pos>(memmem(pos, end -
pos, protocol_delim.data(), protocol_delim.size()));
- DB::Pos query_string_begin = nullptr;
- if (protocol_delim_pos)
- {
- query_string_begin = find_first_symbols<'?', '#'>(pos, end);
- }
- else
- {
- query_string_begin = find_first_symbols<'?', '#', ':'>(pos, end);
- }
- if (query_string_begin && query_string_begin < end)
- {
- if (*query_string_begin != '?')
- {
- res_data = nullptr;
- res_size = 0;
- return;
- }
- res_data = query_string_begin + 1;
- DB::Pos query_string_end = find_first_symbols<'#'>(res_data, end);
- if (query_string_end && query_string_end < end)
- {
- res_size = query_string_end - res_data;
- }
- else
- {
- res_size = end - res_data;
- }
- }
- else
+
+ const auto & query = uri.getRawQuery();
+ res_data = query.data();
+ res_size = query.size();
+ String protocol_prefix = uri.getScheme() + "://";
+ DB::Pos query_string_begin = data.starts_with(protocol_prefix) ?
+ find_first_symbols<'?', '#'>(data.data(), data.data() +
data.size()) :
+ find_first_symbols<'?', '#', ':'>(data.data(), data.data() +
data.size());
+ if (query_string_begin && *query_string_begin != '?')
{
res_data = nullptr;
res_size = 0;
@@ -255,100 +235,73 @@ struct NameSparkExtractURLOneQuery
};
struct SparkExtractURLOneQuery
{
- static void vector(const DB::ColumnString::Chars & data,
- const DB::ColumnString::Offsets & offsets,
- std::string pattern,
- DB::ColumnString::Chars & res_data, DB::ColumnString::Offsets &
res_offsets, DB::IColumn & null_map)
+ static void vector(const DB::ColumnString & col, std::string pattern,
DB::IColumn & res_col, DB::IColumn & null_map)
{
- const static String protocol_delim = "://";
- res_data.reserve_exact(data.size() / 5);
- res_offsets.resize_exact(offsets.size());
-
- pattern += '=';
- const char * param_str = pattern.c_str();
- size_t param_len = pattern.size();
-
- DB::ColumnString::Offset prev_offset = 0;
- DB::ColumnString::Offset res_offset = 0;
-
- for (size_t i = 0; i < offsets.size(); ++i)
+ DB::ColumnUInt8 & null_map_col = assert_cast<DB::ColumnUInt8
&>(null_map);
+ DB::PaddedPODArray<UInt8> & null_map_data = null_map_col.getData();
+ for (size_t i = 0; i < col.size(); ++i)
{
- DB::ColumnString::Offset cur_offset = offsets[i];
-
- const char * str = reinterpret_cast<const char
*>(&data[prev_offset]);
- const char * end = reinterpret_cast<const char
*>(&data[cur_offset]);
-
- /// Find query string or fragment identifier.
- /// Note that we support parameters in fragment identifier in the
same way as in query string.
- DB::Pos protocol_delim_pos = static_cast<DB::Pos>(memmem(str, end
- str, protocol_delim.data(), protocol_delim.size()));
- DB::Pos query_string_begin = nullptr;
- if (protocol_delim_pos)
+ try
{
- query_string_begin = find_first_symbols<'?',
'#'>(protocol_delim_pos, end);
- }
- else
- {
- query_string_begin = find_first_symbols<'?', '#', ':'>(str,
end);
- }
-
- if (*query_string_begin != '?')
- {
- query_string_begin = end;
- }
-
- /// Will point to the beginning of "name=value" pair. Then it will
be reassigned to the beginning of "value".
- const char * param_begin = nullptr;
-
- if (query_string_begin + 1 < end)
- {
- param_begin = query_string_begin + 1;
-
- while (true)
+ const String s = col.getDataAt(i).toString();
+ Poco::URI uri(s, false);
+
+ String protocol_prefix = uri.getScheme() + "://";
+ DB::Pos query_string_begin = s.starts_with(protocol_prefix) ?
+ find_first_symbols<'?', '#'>(s.data(), s.data() +
s.size()) :
+ find_first_symbols<'?', '#', ':'>(s.data(), s.data() +
s.size());
+ if (query_string_begin && *query_string_begin != '?')
{
- param_begin = static_cast<const char
*>(memmem(param_begin, end - param_begin, param_str, param_len));
-
- if (!param_begin)
- break;
+ res_col.insertDefault();
+ null_map_data[i] = 1;
+ continue;
+ }
- if (param_begin[-1] != '?' && param_begin[-1] != '#' &&
param_begin[-1] != '&')
+ const String & query = uri.getRawQuery();
+ DB::Pos query_pos = query.data();
+ auto getMatchedValue = [&](const DB::Pos & begin_pos, const
size_t len) -> bool
+ {
+ for (size_t j = 0; j < len; ++j)
{
- /// Parameter name is different but has the same
suffix.
- param_begin += param_len;
- continue;
+ if (*(begin_pos + j) == '=')
+ {
+ if (pattern == String(begin_pos, j))
+ {
+ res_col.insertData(begin_pos + j + 1, len - j
- 1);
+ return true;
+ }
+ }
}
- else
+ return false;
+ };
+
+ bool matched = false;
+ for (size_t j = 0; j < query.size(); ++j)
+ {
+ if (query.at(j) == '&')
{
- param_begin += param_len;
- break;
+ if(getMatchedValue(query_pos, query.data() + j -
query_pos))
+ {
+ matched = true;
+ break;
+ }
+ else
+ query_pos = query.data() + j + 1;
}
}
- }
+ if (!matched && query_pos < query.data() + query.size())
+ matched = getMatchedValue(query_pos, query.data() +
query.size() - query_pos);
- if (param_begin)
- {
- const char * param_end = find_first_symbols<'&',
'#'>(param_begin, end);
- if (param_end == end)
- param_end = param_begin + strlen(param_begin);
-
- size_t param_size = param_end - param_begin;
-
- res_data.resize_exact(res_offset + param_size + 1);
- memcpySmallAllowReadWriteOverflow15(&res_data[res_offset],
param_begin, param_size);
- res_offset += param_size;
- null_map.insert(0);
+ if (!matched)
+ res_col.insertDefault();
+
+ null_map_data[i] = !matched;
}
- else
+ catch (const Poco::SyntaxException &)
{
- /// No parameter found, put empty string in result.
- res_data.resize_exact(res_offset + 1);
- null_map.insert(1);
+ res_col.insertDefault();
+ null_map_data[i] = 1;
}
-
- res_data[res_offset] = 0;
- ++res_offset;
- res_offsets[i] = res_offset;
-
- prev_offset = cur_offset;
}
}
};
@@ -362,40 +315,18 @@ REGISTER_FUNCTION(SparkFunctionURLOneQuery)
struct SparkExtractURLHost
{
- static size_t getReserveLengthForElement() { return 15; }
+ static size_t getReserveLengthForElement() { return 30; }
- static void execute(DB::Pos data, size_t size, DB::Pos & res_data, size_t
& res_size)
+ static void execute(const Poco::URI & uri, const String &, DB::Pos &
res_data, size_t & res_size)
{
- DB::Pos end = data + size;
- const static String protocol_delim = "://";
- DB::Pos protocol_delim_start = static_cast<DB::Pos>(memmem(data, size,
protocol_delim.data(), protocol_delim.size()));
- if (!protocol_delim_start)
- {
- res_data = nullptr;
- res_size = 0;
- return;
- }
- DB::Pos userinfo_delim_pos =
find_first_symbols<'@'>(protocol_delim_start + protocol_delim.size(), end);
- std::string_view host;
- if (userinfo_delim_pos && userinfo_delim_pos < end)
- {
- host = DB::getURLHost(userinfo_delim_pos + 1, end -
userinfo_delim_pos);
- }
- else
- {
- host = DB::getURLHost(protocol_delim_start + protocol_delim.size()
, end - protocol_delim_start - protocol_delim.size());
- }
-
+ const auto & host = uri.getHost();
+ res_data = host.data();
+ res_size = host.size();
if (host.empty())
{
- res_data = data;
+ res_data = nullptr;
res_size = 0;
}
- else
- {
- res_data = host.data();
- res_size = host.size();
- }
}
};
@@ -417,37 +348,14 @@ struct SparkExtractURLPath
{
static size_t getReserveLengthForElement() { return 25; }
- static void execute(DB::Pos data, size_t size, DB::Pos & res_data, size_t
& res_size)
+ static void execute(const Poco::URI & uri, const String &, DB::Pos &
res_data, size_t & res_size)
{
- res_data = data;
- res_size = 0;
- DB::Pos pos = data;
- DB::Pos end = data + size;
- const static String protocol_delim = "://";
- const auto * start_pos = static_cast<DB::Pos>(memmem(pos, end - pos,
protocol_delim.data(), protocol_delim.size()));
- if (start_pos)
- {
- start_pos += protocol_delim.size();
- const auto * path_start_pos = find_first_symbols<'/', '#',
'?'>(start_pos, end);
- if (path_start_pos && path_start_pos < end)
- {
- if (*path_start_pos != '/')
- return;
- res_data = path_start_pos;
- const auto * path_end_pos = find_first_symbols<'?',
'#'>(path_start_pos, end);
- if (path_end_pos && path_end_pos < end)
- {
- res_size = path_end_pos - path_start_pos;
- }
- else
- {
- res_size = end - path_start_pos;
- }
- }
- }
+ const auto & path = uri.getPath();
+ res_data = path.data();
+ res_size = path.size();
}
};
-using SparkFunctionURLPath =
DB::FunctionStringToString<DB::ExtractSubstringImpl<SparkExtractURLPath>,
NameSparkExtractURLPath>;
+using SparkFunctionURLPath =
FunctionStringToNullableString<ExtractNullableSubstringImpl<SparkExtractURLPath>,
NameSparkExtractURLPath>;
REGISTER_FUNCTION(SparkFunctionURLPath)
{
factory.registerFunction<SparkFunctionURLPath>();
@@ -460,30 +368,16 @@ struct NameSparkExtractUserInfo
struct SparkExtractURLUserInfo
{
static size_t getReserveLengthForElement() { return 25; }
- static void execute(DB::Pos data, size_t size, DB::Pos & res_data, size_t
& res_size)
+ static void execute(const Poco::URI & uri, const String &, DB::Pos &
res_data, size_t & res_size)
{
- res_data = data;
- res_size = 0;
- DB::Pos pos = data;
- DB::Pos end = data + size;
- const static String protocol_delim = "://";
- const static String userinfo_delim = "@";
- DB::Pos protocol_delim_start = static_cast<DB::Pos>(memmem(pos, end -
pos, protocol_delim.data(), protocol_delim.size()));
- if (!protocol_delim_start)
+ const auto & userinfo = uri.getUserInfo();
+ res_data = userinfo.data();
+ res_size = userinfo.size();
+ if (userinfo.empty())
{
res_data = nullptr;
res_size = 0;
- return;
}
- res_data = protocol_delim_start + protocol_delim.size();
- DB::Pos userinfo_delim_start = find_first_symbols<'@'>(res_data, end);
- if (!userinfo_delim_start || userinfo_delim_start >= end)
- {
- res_data = nullptr;
- res_size = 0;
- return;
- }
- res_size = userinfo_delim_start - res_data;
}
};
using SparkFunctionURLUserInfo =
FunctionStringToNullableString<ExtractNullableSubstringImpl<SparkExtractURLUserInfo>,
NameSparkExtractUserInfo>;
@@ -499,23 +393,24 @@ struct NameSparkExtractURLRef
struct SparkExtractURLRef
{
static size_t getReserveLengthForElement() { return 25; }
- static void execute(DB::Pos data, size_t size, DB::Pos & res_data, size_t
& res_size)
+ static void execute(const Poco::URI & uri, const String & data, DB::Pos &
res_data, size_t & res_size)
{
- res_data = data;
- res_size = 0;
- DB::Pos pos = data;
- DB::Pos end = data + size;
- const static String ref_delim = "#";
- const auto * ref_delim_pos = find_first_symbols<'#'>(pos, end);
- if (ref_delim_pos && ref_delim_pos < end)
- {
- res_data = ref_delim_pos + 1;
- res_size = end - res_data;
- }
- else
+ const auto & fragment = uri.getFragment();
+ res_data = fragment.data();
+ res_size = fragment.size();
+ if (data.find(fragment) == std::string::npos || fragment.empty())
{
- res_data = nullptr;
- res_size = 0;
+ const auto * ref_delim_pos =
find_first_symbols<'#'>(data.data(),data.data() + data.size());
+ if (ref_delim_pos && ref_delim_pos < data.data() + data.size())
+ {
+ res_data = ref_delim_pos + 1;
+ res_size = data.data() + data.size() - res_data;
+ }
+ else
+ {
+ res_data = nullptr;
+ res_size = 0;
+ }
}
}
};
@@ -532,44 +427,35 @@ struct NameSparkExtractURLFile
struct SparkExtractURLFile
{
static size_t getReserveLengthForElement() { return 25; }
- static void execute(DB::Pos data, size_t size, DB::Pos & res_data, size_t
& res_size)
+ static void execute(const Poco::URI & uri, const String & data, DB::Pos &
res_data, size_t & res_size)
{
- res_data = data;
- res_size = 0;
- DB::Pos pos = data;
- DB::Pos end = data + size;
const static String protocol_delim = "://";
- const static String slash_delim = "/";
- const static String query_delim = "?";
- const auto * protocol_delim_pos = static_cast<DB::Pos>(memmem(pos, end
- pos, protocol_delim.data(), protocol_delim.size()));
+ const auto * protocol_delim_pos =
static_cast<DB::Pos>(memmem(data.data(), data.size(), protocol_delim.data(),
protocol_delim.size()));
if (!protocol_delim_pos)
{
- auto colon_pos = find_first_symbols<':'>(pos, end);
- if (colon_pos && colon_pos + 1 < end)
+ auto colon_pos = find_first_symbols<':'>(data.data(), data.data()
+ data.size());
+ if (colon_pos && colon_pos + 1 < data.data() + data.size())
{
res_data = nullptr;
- return;
- }
- res_size = size;
- return;
- }
- DB::Pos file_begin_pos = find_first_symbols<'/', '?',
'#'>(protocol_delim_pos + protocol_delim.size(), end);
- if (file_begin_pos && file_begin_pos < end)
- {
- if (*file_begin_pos == '#')
- {
- return;
- }
- res_data = file_begin_pos;
- DB::Pos ref_delim_pos = find_first_symbols<'#'>(file_begin_pos +
1, end);
- if (ref_delim_pos && ref_delim_pos < end)
- {
- res_size = ref_delim_pos - res_data;
+ res_size = 0;
}
else
{
- res_size = end - res_data;
+ res_data = data.data();
+ res_size = data.size();
}
+ return;
+ }
+ const String & res = uri.getPath();
+ res_data = res.data();
+ res_size = res.size();
+ DB::Pos query_begin_pos = find_first_symbols<'?'>(protocol_delim_pos +
protocol_delim.size(), data.data() + data.size());
+ if (query_begin_pos && *query_begin_pos == '?')
+ {
+ const String & query = uri.getRawQuery();
+ String new_res = res.empty() && query.empty() ? "" : res + "?" +
query;
+ res_data = new_res.data();
+ res_size = new_res.size();
}
}
};
@@ -586,29 +472,15 @@ struct NameSparkExtractURLAuthority
struct SparkExtractURLAuthority
{
static size_t getReserveLengthForElement() { return 25; }
- static void execute(DB::Pos data, size_t size, DB::Pos & res_data, size_t
& res_size)
+ static void execute(const Poco::URI & uri, const String &, DB::Pos &
res_data, size_t & res_size)
{
- res_data = data;
- res_size = 0;
- DB::Pos pos = data;
- DB::Pos end = data + size;
- const static String protocol_delim = "://";
- const auto * protocol_delim_pos = static_cast<DB::Pos>(memmem(pos, end
- pos, protocol_delim.data(), protocol_delim.size()));
- if (!protocol_delim_pos)
+ const auto & authority = uri.getAuthority();
+ res_data = authority.data();
+ res_size = authority.size();
+ if (authority.empty())
{
res_data = nullptr;
res_size = 0;
- return;
- }
- res_data = protocol_delim_pos + protocol_delim.size();
- DB::Pos end_pos = find_first_symbols<'/', '?', '#'>(res_data, end);
- if (end_pos)
- {
- res_size = end_pos - res_data;
- }
- else
- {
- res_size = end - res_data -1 ;
}
}
};
@@ -627,7 +499,7 @@ struct NameSparkExtractURLInvalid
struct SparkExtractURLInvalid
{
static size_t getReserveLengthForElement() { return 1; }
- static void execute(DB::Pos, size_t, DB::Pos & res_data, size_t & res_size)
+ static void execute(const Poco::URI &, const String &, DB::Pos & res_data,
size_t & res_size)
{
res_data = nullptr;
res_size = 0;
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 9021e8de5d..d569386584 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -439,7 +439,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("string regex_replace / regex_extract")
.exclude("string overlay function")
.exclude("binary overlay function")
- .exclude("string parse_url function")
.exclude("string / binary length function")
.exclude("SPARK-36751: add octet length api for scala")
.exclude("SPARK-36751: add bit length api for scala")
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 66aca582a6..06845c9570 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -487,7 +487,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("string regex_replace / regex_extract")
.exclude("string overlay function")
.exclude("binary overlay function")
- .exclude("string parse_url function")
.exclude("string / binary length function")
.exclude("SPARK-36751: add octet length api for scala")
.exclude("SPARK-36751: add bit length api for scala")
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 7cfcde3995..6d38e85c92 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -485,7 +485,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("string regex_replace / regex_extract")
.exclude("string overlay function")
.exclude("binary overlay function")
- .exclude("string parse_url function")
.exclude("string / binary length function")
.exclude("SPARK-36751: add octet length api for scala")
.exclude("SPARK-36751: add bit length api for scala")
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index dd5779cc3f..caa12f330d 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -2145,7 +2145,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
// Rewrite with NaN test cases excluded.
.exclude("cases when literal is max")
enableSuite[GlutenUrlFunctionsSuite]
- .excludeCH("url parse_url function")
.excludeCH("url encode/decode function")
enableSuite[GlutenV1WriteCommandSuite]
// Rewrite to match SortExecTransformer.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]