This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 8896be60df2 [test](inverted index) add ut test for regexp query 
(#52208)
8896be60df2 is described below

commit 8896be60df295e517d59d07c81799766aba64bcf
Author: airborne12 <[email protected]>
AuthorDate: Wed Jun 25 13:05:02 2025 +0800

    [test](inverted index) add ut test for regexp query (#52208)
---
 .../inverted_index/query/regexp_query_test.cpp     | 158 +++++++++++++++++++++
 1 file changed, 158 insertions(+)

diff --git 
a/be/test/olap/rowset/segment_v2/inverted_index/query/regexp_query_test.cpp 
b/be/test/olap/rowset/segment_v2/inverted_index/query/regexp_query_test.cpp
index 816018b36a2..14f7aac7380 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index/query/regexp_query_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index/query/regexp_query_test.cpp
@@ -22,7 +22,10 @@
 #include <fstream>
 #include <memory>
 
+#include "gen_cpp/PaloInternalService_types.h"
 #include "io/fs/local_file_system.h"
+#include "olap/rowset/segment_v2/inverted_index/query/query.h"
+#include "olap/rowset/segment_v2/inverted_index/query/query_info.h"
 
 namespace doris::segment_v2 {
 
@@ -186,4 +189,159 @@ TEST_F(RegexpQueryTest, 
MultipleRegexPatternsWithCaretPrefix) {
     }
 }
 
+TEST_F(RegexpQueryTest, AddWithInvalidTermsSize) {
+    // Create a mock searcher and query options for testing
+    std::shared_ptr<lucene::search::IndexSearcher> searcher = nullptr;
+    TQueryOptions query_options;
+    query_options.inverted_index_max_expansions = 50;
+    io::IOContext io_ctx;
+
+    RegexpQuery regexp_query(searcher, query_options, &io_ctx);
+
+    // Test with empty terms (size == 0) - this should throw before accessing 
searcher
+    {
+        InvertedIndexQueryInfo query_info;
+        query_info.field_name = L"test_field";
+        query_info.term_infos = {}; // empty term_infos
+
+        EXPECT_THROW(regexp_query.add(query_info), std::exception);
+    }
+}
+
+TEST_F(RegexpQueryTest, AddWithInvalidPattern) {
+    // Create a mock searcher and query options for testing
+    std::shared_ptr<lucene::search::IndexSearcher> searcher = nullptr;
+    TQueryOptions query_options;
+    query_options.inverted_index_max_expansions = 50;
+    io::IOContext io_ctx;
+
+    RegexpQuery regexp_query(searcher, query_options, &io_ctx);
+
+    // Test with invalid regex pattern that causes hs_compile to fail
+    // This should fail during hyperscan compilation, before accessing searcher
+    InvertedIndexQueryInfo query_info;
+    query_info.field_name = L"test_field";
+    TermInfo term_info;
+    term_info.term = "[invalid_regex";
+    query_info.term_infos = {term_info}; // invalid regex pattern
+
+    // This should not throw but should handle the error gracefully
+    // The hyperscan compilation will fail and the method will return early
+    EXPECT_NO_THROW(regexp_query.add(query_info));
+}
+
+TEST_F(RegexpQueryTest, SearchWithEmptyTerms) {
+    // Create a mock searcher and query options for testing
+    std::shared_ptr<lucene::search::IndexSearcher> searcher = nullptr;
+    TQueryOptions query_options;
+    query_options.inverted_index_max_expansions = 50;
+    io::IOContext io_ctx;
+
+    RegexpQuery regexp_query(searcher, query_options, &io_ctx);
+    roaring::Roaring result;
+
+    // Search without adding any terms should not crash
+    EXPECT_NO_THROW(regexp_query.search(result));
+    EXPECT_TRUE(result.isEmpty());
+}
+
+TEST_F(RegexpQueryTest, GetRegexPrefixWithDebugPoint) {
+    // Test the debug point that forces get_regex_prefix to return nullopt
+    // This covers the debug execute if code path
+    auto result = RegexpQuery::get_regex_prefix("^test");
+    // Without debug point activated, should return normally
+    ASSERT_TRUE(result.has_value());
+    EXPECT_EQ(*result, "test");
+}
+
+TEST_F(RegexpQueryTest, AddWithPatternThatFailsCompilation) {
+    // Test add method with pattern that should fail hs_compile
+    std::shared_ptr<lucene::search::IndexSearcher> searcher = nullptr;
+    TQueryOptions query_options;
+    query_options.inverted_index_max_expansions = 50;
+    io::IOContext io_ctx;
+
+    RegexpQuery regexp_query(searcher, query_options, &io_ctx);
+
+    InvertedIndexQueryInfo query_info;
+    query_info.field_name = L"test_field";
+    // Use a pattern that is guaranteed to fail hyperscan compilation
+    // Hyperscan doesn't support backreferences, so this should fail
+    TermInfo term_info;
+    term_info.term =
+            "(?P<name>\\w+)\\k<name>"; // pattern with named backreference 
(not supported by hyperscan)
+    query_info.term_infos = {term_info};
+
+    // Should not crash even with invalid hyperscan pattern (covers the 
hs_compile failure path)
+    // The hyperscan compilation will fail and the method will return early
+    EXPECT_NO_THROW(regexp_query.add(query_info));
+}
+
+TEST_F(RegexpQueryTest, ConstructorTest) {
+    // Test constructor with different configurations
+    std::shared_ptr<lucene::search::IndexSearcher> searcher = nullptr;
+    TQueryOptions query_options;
+    query_options.inverted_index_max_expansions = 50;
+    io::IOContext io_ctx;
+
+    // Test basic constructor
+    EXPECT_NO_THROW(RegexpQuery(searcher, query_options, &io_ctx));
+
+    // Test constructor with different max expansions
+    query_options.inverted_index_max_expansions = 100;
+    EXPECT_NO_THROW(RegexpQuery(searcher, query_options, &io_ctx));
+}
+
+TEST_F(RegexpQueryTest, MaxExpansionsConfiguration) {
+    // Test that max expansions is properly configured
+    std::shared_ptr<lucene::search::IndexSearcher> searcher = nullptr;
+    TQueryOptions query_options;
+    query_options.inverted_index_max_expansions = 100;
+    io::IOContext io_ctx;
+
+    RegexpQuery regexp_query(searcher, query_options, &io_ctx);
+
+    // This tests the constructor and member initialization
+    EXPECT_NO_THROW(RegexpQuery(searcher, query_options, &io_ctx));
+}
+
+TEST_F(RegexpQueryTest, AddWithUnsupportedRegexFeatures) {
+    // Test patterns that use regex features not supported by hyperscan
+    std::shared_ptr<lucene::search::IndexSearcher> searcher = nullptr;
+    TQueryOptions query_options;
+    query_options.inverted_index_max_expansions = 50;
+    io::IOContext io_ctx;
+
+    RegexpQuery regexp_query(searcher, query_options, &io_ctx);
+
+    // Test with lookahead assertion (not supported by hyperscan)
+    InvertedIndexQueryInfo query_info;
+    query_info.field_name = L"test_field";
+    TermInfo term_info;
+    term_info.term = "(?=.*test).*"; // positive lookahead (not supported by 
hyperscan)
+    query_info.term_infos = {term_info};
+
+    // Should not throw as hyperscan compilation will fail and method returns 
early
+    EXPECT_NO_THROW(regexp_query.add(query_info));
+}
+
+TEST_F(RegexpQueryTest, AddWithBackreferencePattern) {
+    // Test with backreference pattern that should fail hyperscan compilation
+    std::shared_ptr<lucene::search::IndexSearcher> searcher = nullptr;
+    TQueryOptions query_options;
+    query_options.inverted_index_max_expansions = 50;
+    io::IOContext io_ctx;
+
+    RegexpQuery regexp_query(searcher, query_options, &io_ctx);
+
+    InvertedIndexQueryInfo query_info;
+    query_info.field_name = L"test_field";
+    TermInfo term_info;
+    term_info.term = R"((\w+)\s+\1)"; // backreference pattern (not supported 
by hyperscan)
+    query_info.term_infos = {term_info};
+
+    // Should not throw as hyperscan compilation will fail and method returns 
early
+    EXPECT_NO_THROW(regexp_query.add(query_info));
+}
+
 } // namespace doris::segment_v2
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to