This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 8896be60df2 [test](inverted index) add ut test for regexp query
(#52208)
8896be60df2 is described below
commit 8896be60df295e517d59d07c81799766aba64bcf
Author: airborne12 <[email protected]>
AuthorDate: Wed Jun 25 13:05:02 2025 +0800
[test](inverted index) add ut test for regexp query (#52208)
---
.../inverted_index/query/regexp_query_test.cpp | 158 +++++++++++++++++++++
1 file changed, 158 insertions(+)
diff --git
a/be/test/olap/rowset/segment_v2/inverted_index/query/regexp_query_test.cpp
b/be/test/olap/rowset/segment_v2/inverted_index/query/regexp_query_test.cpp
index 816018b36a2..14f7aac7380 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index/query/regexp_query_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index/query/regexp_query_test.cpp
@@ -22,7 +22,10 @@
#include <fstream>
#include <memory>
+#include "gen_cpp/PaloInternalService_types.h"
#include "io/fs/local_file_system.h"
+#include "olap/rowset/segment_v2/inverted_index/query/query.h"
+#include "olap/rowset/segment_v2/inverted_index/query/query_info.h"
namespace doris::segment_v2 {
@@ -186,4 +189,159 @@ TEST_F(RegexpQueryTest,
MultipleRegexPatternsWithCaretPrefix) {
}
}
+TEST_F(RegexpQueryTest, AddWithInvalidTermsSize) {
+ // Create a mock searcher and query options for testing
+ std::shared_ptr<lucene::search::IndexSearcher> searcher = nullptr;
+ TQueryOptions query_options;
+ query_options.inverted_index_max_expansions = 50;
+ io::IOContext io_ctx;
+
+ RegexpQuery regexp_query(searcher, query_options, &io_ctx);
+
+ // Test with empty terms (size == 0) - this should throw before accessing
searcher
+ {
+ InvertedIndexQueryInfo query_info;
+ query_info.field_name = L"test_field";
+ query_info.term_infos = {}; // empty term_infos
+
+ EXPECT_THROW(regexp_query.add(query_info), std::exception);
+ }
+}
+
+TEST_F(RegexpQueryTest, AddWithInvalidPattern) {
+ // Create a mock searcher and query options for testing
+ std::shared_ptr<lucene::search::IndexSearcher> searcher = nullptr;
+ TQueryOptions query_options;
+ query_options.inverted_index_max_expansions = 50;
+ io::IOContext io_ctx;
+
+ RegexpQuery regexp_query(searcher, query_options, &io_ctx);
+
+ // Test with invalid regex pattern that causes hs_compile to fail
+ // This should fail during hyperscan compilation, before accessing searcher
+ InvertedIndexQueryInfo query_info;
+ query_info.field_name = L"test_field";
+ TermInfo term_info;
+ term_info.term = "[invalid_regex";
+ query_info.term_infos = {term_info}; // invalid regex pattern
+
+ // This should not throw but should handle the error gracefully
+ // The hyperscan compilation will fail and the method will return early
+ EXPECT_NO_THROW(regexp_query.add(query_info));
+}
+
+TEST_F(RegexpQueryTest, SearchWithEmptyTerms) {
+ // Create a mock searcher and query options for testing
+ std::shared_ptr<lucene::search::IndexSearcher> searcher = nullptr;
+ TQueryOptions query_options;
+ query_options.inverted_index_max_expansions = 50;
+ io::IOContext io_ctx;
+
+ RegexpQuery regexp_query(searcher, query_options, &io_ctx);
+ roaring::Roaring result;
+
+ // Search without adding any terms should not crash
+ EXPECT_NO_THROW(regexp_query.search(result));
+ EXPECT_TRUE(result.isEmpty());
+}
+
+TEST_F(RegexpQueryTest, GetRegexPrefixWithDebugPoint) {
+ // Test the debug point that forces get_regex_prefix to return nullopt
+ // This covers the debug execute if code path
+ auto result = RegexpQuery::get_regex_prefix("^test");
+ // Without debug point activated, should return normally
+ ASSERT_TRUE(result.has_value());
+ EXPECT_EQ(*result, "test");
+}
+
+TEST_F(RegexpQueryTest, AddWithPatternThatFailsCompilation) {
+ // Test add method with pattern that should fail hs_compile
+ std::shared_ptr<lucene::search::IndexSearcher> searcher = nullptr;
+ TQueryOptions query_options;
+ query_options.inverted_index_max_expansions = 50;
+ io::IOContext io_ctx;
+
+ RegexpQuery regexp_query(searcher, query_options, &io_ctx);
+
+ InvertedIndexQueryInfo query_info;
+ query_info.field_name = L"test_field";
+ // Use a pattern that is guaranteed to fail hyperscan compilation
+ // Hyperscan doesn't support backreferences, so this should fail
+ TermInfo term_info;
+ term_info.term =
+ "(?P<name>\\w+)\\k<name>"; // pattern with named backreference
(not supported by hyperscan)
+ query_info.term_infos = {term_info};
+
+ // Should not crash even with invalid hyperscan pattern (covers the
hs_compile failure path)
+ // The hyperscan compilation will fail and the method will return early
+ EXPECT_NO_THROW(regexp_query.add(query_info));
+}
+
+TEST_F(RegexpQueryTest, ConstructorTest) {
+ // Test constructor with different configurations
+ std::shared_ptr<lucene::search::IndexSearcher> searcher = nullptr;
+ TQueryOptions query_options;
+ query_options.inverted_index_max_expansions = 50;
+ io::IOContext io_ctx;
+
+ // Test basic constructor
+ EXPECT_NO_THROW(RegexpQuery(searcher, query_options, &io_ctx));
+
+ // Test constructor with different max expansions
+ query_options.inverted_index_max_expansions = 100;
+ EXPECT_NO_THROW(RegexpQuery(searcher, query_options, &io_ctx));
+}
+
+TEST_F(RegexpQueryTest, MaxExpansionsConfiguration) {
+ // Test that max expansions is properly configured
+ std::shared_ptr<lucene::search::IndexSearcher> searcher = nullptr;
+ TQueryOptions query_options;
+ query_options.inverted_index_max_expansions = 100;
+ io::IOContext io_ctx;
+
+ RegexpQuery regexp_query(searcher, query_options, &io_ctx);
+
+ // This tests the constructor and member initialization
+ EXPECT_NO_THROW(RegexpQuery(searcher, query_options, &io_ctx));
+}
+
+TEST_F(RegexpQueryTest, AddWithUnsupportedRegexFeatures) {
+ // Test patterns that use regex features not supported by hyperscan
+ std::shared_ptr<lucene::search::IndexSearcher> searcher = nullptr;
+ TQueryOptions query_options;
+ query_options.inverted_index_max_expansions = 50;
+ io::IOContext io_ctx;
+
+ RegexpQuery regexp_query(searcher, query_options, &io_ctx);
+
+ // Test with lookahead assertion (not supported by hyperscan)
+ InvertedIndexQueryInfo query_info;
+ query_info.field_name = L"test_field";
+ TermInfo term_info;
+ term_info.term = "(?=.*test).*"; // positive lookahead (not supported by
hyperscan)
+ query_info.term_infos = {term_info};
+
+ // Should not throw as hyperscan compilation will fail and method returns
early
+ EXPECT_NO_THROW(regexp_query.add(query_info));
+}
+
+TEST_F(RegexpQueryTest, AddWithBackreferencePattern) {
+ // Test with backreference pattern that should fail hyperscan compilation
+ std::shared_ptr<lucene::search::IndexSearcher> searcher = nullptr;
+ TQueryOptions query_options;
+ query_options.inverted_index_max_expansions = 50;
+ io::IOContext io_ctx;
+
+ RegexpQuery regexp_query(searcher, query_options, &io_ctx);
+
+ InvertedIndexQueryInfo query_info;
+ query_info.field_name = L"test_field";
+ TermInfo term_info;
+ term_info.term = R"((\w+)\s+\1)"; // backreference pattern (not supported
by hyperscan)
+ query_info.term_infos = {term_info};
+
+ // Should not throw as hyperscan compilation will fail and method returns
early
+ EXPECT_NO_THROW(regexp_query.add(query_info));
+}
+
} // namespace doris::segment_v2
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]