(doris) branch master updated: [refactor](search) Refactor SearchDslParser to single-phase ANTLR parsing and fix ES compatibility issues (#60654)

airborne Tue, 17 Feb 2026 18:55:14 -0800

This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new b4509371589 [refactor](search) Refactor SearchDslParser to 
single-phase ANTLR parsing and fix ES compatibility issues (#60654)
b4509371589 is described below

commit b450937158974dbe8d72a281ba14ea59f7a3ce52
Author: Jack <[email protected]>
AuthorDate: Wed Feb 18 10:54:56 2026 +0800

    [refactor](search) Refactor SearchDslParser to single-phase ANTLR parsing 
and fix ES compatibility issues (#60654)
    
    ### What problem does this PR solve?
    
    Problem Summary:
    
    The `search()` function's DSL parser had multiple ES compatibility
    issues and used a two-phase parsing approach (manual pre-parse + ANTLR)
    that was error-prone. This PR refactors the parser and fixes several
    bugs:
    
    1. **SearchDslParser refactoring**: Consolidated from two-phase (manual
    pre-parse + ANTLR) to single-phase ANTLR parsing. The ANTLR grammar now
    handles all DSL syntax directly, eliminating the fragile manual
    pre-parse layer. This fixes issues with operator precedence, grouping,
    and edge cases.
    
    2. **ANTLR grammar improvements**: Updated `SearchLexer.g4` and
    `SearchParser.g4` to properly handle quoted phrases, field-qualified
    expressions, prefix/wildcard/regexp patterns, range queries, and boolean
    operators with correct precedence.
    
    3. **minimum_should_match pipeline**: Added `default_operator` and
    `minimum_should_match` fields to `TSearchParam` thrift, passing them
    from FE `SearchPredicate` through to BE `function_search`. When
    `minimum_should_match > 0`, uses `OccurBooleanQuery` for proper
    Lucene-style boolean query semantics.
    
    4. **Wildcard/Prefix/Regexp case-sensitivity**: Wildcard and PREFIX
    patterns are now lowercased when the index has `parser +
    lower_case=true` (matching ES query_string normalizer behavior). REGEXP
    patterns are NOT lowercased (matching ES regex behavior where patterns
    bypass analysis).
    
    5. **MATCH_ALL_DOCS support**: Added `MATCH_ALL_DOCS` clause type for
    standalone `*` queries and pure NOT query rewrites. Enhanced `AllQuery`
    with deferred `max_doc` from `context.segment_num_rows` and nullable
    field support via `NullableScorer`.
    
    6. **BE fixes**:
    - `regexp_weight._max_expansions`: Changed from 50 to 0 (unlimited) to
    prevent PREFIX queries from missing documents
    - `occur_boolean_weight`: Fixed swap→append bug when all SHOULD clauses
    must match, preserving existing MUST scorers
    - Variant subcolumn `index_properties` propagation for proper analyzer
    selection
    - `lower_case` default handling: inverted index `lower_case` defaults to
    `"true"` when a parser is configured
---
 .../inverted_index/query_v2/all_query/all_query.h  |  49 ++-
 .../boolean_query/occur_boolean_weight.cpp         |   7 +-
 .../query_v2/regexp_query/regexp_weight.h          |   4 +-
 be/src/olap/tablet_schema.cpp                      |  14 +-
 be/src/vec/functions/function_search.cpp           | 108 ++++-
 be/src/vec/functions/function_search.h             |  12 +-
 .../occur_boolean_query_real_index_test.cpp        |   9 +-
 .../query_v2/occur_boolean_query_test.cpp          |  18 +-
 .../query_v2/regexp_wildcard_lowercase_test.cpp    | 228 +++++++++++
 be/test/vec/function/function_search_test.cpp      |   4 +-
 .../org/apache/doris/analysis/SearchPredicate.java |  31 ++
 .../glue/translator/ExpressionTranslator.java      |  18 +-
 .../functions/scalar/SearchDslParser.java          | 339 ++++++++++------
 .../apache/doris/analysis/SearchPredicateTest.java | 136 +++++++
 .../functions/scalar/SearchDslParserTest.java      | 439 +++++++++++++++++++--
 gensrc/thrift/Exprs.thrift                         |   3 +
 .../data/search/test_search_lucene_mode.out        |   4 +
 .../data/search/test_search_multi_field.out        |   4 +-
 .../data/search/test_search_regexp_lowercase.out   |  39 ++
 .../test_search_variant_subcolumn_analyzer.out     |  30 ++
 .../suites/search/test_search_lucene_mode.groovy   |   9 +-
 .../suites/search/test_search_multi_field.groovy   |   2 +
 .../search/test_search_regexp_lowercase.groovy     | 153 +++++++
 .../test_search_variant_subcolumn_analyzer.groovy  | 175 ++++++++
 24 files changed, 1610 insertions(+), 225 deletions(-)

diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/all_query/all_query.h 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/all_query/all_query.h
index cd73860d46f..aa17338e2b1 100644
--- 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/all_query/all_query.h
+++ 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/all_query/all_query.h
@@ -19,7 +19,9 @@
 
 #include <algorithm>
 #include <memory>
+#include <string>
 
+#include "olap/rowset/segment_v2/inverted_index/query_v2/nullable_scorer.h"
 #include "olap/rowset/segment_v2/inverted_index/query_v2/query.h"
 #include "olap/rowset/segment_v2/inverted_index/query_v2/scorer.h"
 #include "olap/rowset/segment_v2/inverted_index/query_v2/weight.h"
@@ -34,14 +36,14 @@ using AllScorerPtr = std::shared_ptr<AllScorer>;
 using AllWeightPtr = std::shared_ptr<AllWeight>;
 using AllQueryPtr = std::shared_ptr<AllQuery>;
 
+/// Scorer that matches all documents [0, max_doc).
+/// Mirrors Lucene's MatchAllDocsQuery scorer with ConstantScoreWeight:
+/// returns a constant score of 1.0 when scoring is enabled, 0.0 otherwise.
 class AllScorer : public Scorer {
 public:
-    explicit AllScorer(uint32_t max_doc) : _max_doc(max_doc) {
-        if (_max_doc == 0) {
-            _doc = TERMINATED;
-        } else {
-            _doc = 0;
-        }
+    AllScorer(uint32_t max_doc, bool enable_scoring)
+            : _max_doc(max_doc), _score(enable_scoring ? 1.0F : 0.0F) {
+        _doc = (_max_doc == 0) ? TERMINATED : 0;
     }
 
     ~AllScorer() override = default;
@@ -72,41 +74,60 @@ public:
         return _doc;
     }
 
-    float score() override { return 1.0F; }
+    float score() override { return _score; }
 
     uint32_t size_hint() const override { return _max_doc; }
 
 private:
     uint32_t _max_doc = 0;
     uint32_t _doc = TERMINATED;
+    float _score;
 };
 
+/// Weight for AllQuery. Analogous to Lucene's ConstantScoreWeight used by 
MatchAllDocsQuery.
 class AllWeight : public Weight {
 public:
-    explicit AllWeight(uint32_t max_doc) : _max_doc(max_doc) {}
+    explicit AllWeight(bool enable_scoring) : _enable_scoring(enable_scoring) 
{}
+
+    AllWeight(std::wstring field, bool nullable, bool enable_scoring)
+            : _field(std::move(field)), _nullable(nullable), 
_enable_scoring(enable_scoring) {}
 
     ~AllWeight() override = default;
 
     ScorerPtr scorer(const QueryExecutionContext& context) override {
-        return std::make_shared<AllScorer>(_max_doc);
+        auto inner = std::make_shared<AllScorer>(context.segment_num_rows, 
_enable_scoring);
+        if (_nullable && context.null_resolver != nullptr) {
+            std::string logical = logical_field_or_fallback(context, "", 
_field);
+            return make_nullable_scorer(std::move(inner), logical, 
context.null_resolver);
+        }
+        return inner;
     }
 
 private:
-    uint32_t _max_doc = 0;
+    std::wstring _field;
+    bool _nullable = false;
+    bool _enable_scoring = false;
 };
 
+/// Query that matches all documents, analogous to Lucene's MatchAllDocsQuery.
+/// Uses constant scoring (score = 1.0) like Lucene's ConstantScoreWeight.
 class AllQuery : public Query {
 public:
-    explicit AllQuery(uint32_t max_doc) : _max_doc(max_doc) {}
+    AllQuery() = default;
+    AllQuery(std::wstring field, bool nullable) : _field(std::move(field)), 
_nullable(nullable) {}
 
     ~AllQuery() override = default;
 
-    WeightPtr weight(bool /*enable_scoring*/) override {
-        return std::make_shared<AllWeight>(_max_doc);
+    WeightPtr weight(bool enable_scoring) override {
+        if (!_field.empty()) {
+            return std::make_shared<AllWeight>(_field, _nullable, 
enable_scoring);
+        }
+        return std::make_shared<AllWeight>(enable_scoring);
     }
 
 private:
-    uint32_t _max_doc = 0;
+    std::wstring _field;
+    bool _nullable = false;
 };
 
 } // namespace doris::segment_v2::inverted_index::query_v2
diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.cpp
 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.cpp
index e92a32fbe94..9b828708798 100644
--- 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.cpp
+++ 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.cpp
@@ -112,6 +112,7 @@ std::optional<CombinationMethod> 
OccurBooleanWeight<ScoreCombinerPtrT>::build_sh
     } else if (adjusted_minimum == 1) {
         return Required {scorer_union(std::move(should_scorers), combiner)};
     } else if (adjusted_minimum == num_of_should_scorers) {
+        // All SHOULD clauses must match - move them to must_scorers (append, 
not swap)
         for (auto& scorer : should_scorers) {
             must_scorers.push_back(std::move(scorer));
         }
@@ -137,7 +138,7 @@ ScorerPtr 
OccurBooleanWeight<ScoreCombinerPtrT>::effective_must_scorer(
         std::vector<ScorerPtr> must_scorers, size_t must_num_all_scorers) {
     if (must_scorers.empty()) {
         if (must_num_all_scorers > 0) {
-            return std::make_shared<AllScorer>(_max_doc);
+            return std::make_shared<AllScorer>(_max_doc, _enable_scoring);
         }
         return nullptr;
     }
@@ -152,10 +153,10 @@ SpecializedScorer 
OccurBooleanWeight<ScoreCombinerPtrT>::effective_should_scorer
         if (_enable_scoring) {
             std::vector<ScorerPtr> scorers;
             scorers.push_back(into_box_scorer(std::move(should_scorer), 
combiner));
-            scorers.push_back(std::make_shared<AllScorer>(_max_doc));
+            scorers.push_back(std::make_shared<AllScorer>(_max_doc, 
_enable_scoring));
             return make_buffered_union(std::move(scorers), combiner);
         } else {
-            return std::make_shared<AllScorer>(_max_doc);
+            return std::make_shared<AllScorer>(_max_doc, _enable_scoring);
         }
     }
     return should_scorer;
diff --git 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_weight.h
 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_weight.h
index b58d124ed11..f9959ff0d8c 100644
--- 
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_weight.h
+++ 
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_weight.h
@@ -48,7 +48,9 @@ private:
     std::string _pattern;
     bool _enable_scoring = false;
     bool _nullable = true;
-    int32_t _max_expansions = 50;
+    // Set to 0 to disable limit (ES has no default limit for prefix queries)
+    // The limit prevents collecting too many terms, but can cause incorrect 
results
+    int32_t _max_expansions = 0;
 };
 
 } // namespace doris::segment_v2::inverted_index::query_v2
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index d79945f1f89..c1f9ff085ec 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -39,6 +39,7 @@
 #include "olap/inverted_index_parser.h"
 #include "olap/olap_common.h"
 #include "olap/olap_define.h"
+#include "olap/rowset/segment_v2/inverted_index/analyzer/analyzer.h"
 #include "olap/tablet_column_object_pool.h"
 #include "olap/types.h"
 #include "olap/utils.h"
@@ -955,9 +956,16 @@ void TabletIndex::to_schema_pb(TabletIndexPB* index) const 
{
 
     DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", { return; })
 
-    // lowercase by default
-    if (!_properties.empty()) {
-        if (!_properties.contains(INVERTED_INDEX_PARSER_LOWERCASE_KEY)) {
+    // Only add lower_case=true default for built-in analyzers/parsers, NOT 
for custom analyzers
+    // Custom analyzer: lower_case is determined by analyzer's internal token 
filter
+    if (!_properties.empty() && 
!_properties.contains(INVERTED_INDEX_PARSER_LOWERCASE_KEY)) {
+        bool has_parser = _properties.contains(INVERTED_INDEX_PARSER_KEY) ||
+                          
_properties.contains(INVERTED_INDEX_PARSER_KEY_ALIAS);
+        std::string analyzer_name = 
get_analyzer_name_from_properties(_properties);
+        bool is_builtin = analyzer_name.empty() ||
+                          
segment_v2::inverted_index::InvertedIndexAnalyzer::is_builtin_analyzer(
+                                  analyzer_name);
+        if (has_parser || is_builtin) {
             
(*index->mutable_properties())[INVERTED_INDEX_PARSER_LOWERCASE_KEY] =
                     INVERTED_INDEX_PARSER_TRUE;
         }
diff --git a/be/src/vec/functions/function_search.cpp 
b/be/src/vec/functions/function_search.cpp
index 6f737146915..26d38a32934 100644
--- a/be/src/vec/functions/function_search.cpp
+++ b/be/src/vec/functions/function_search.cpp
@@ -37,6 +37,7 @@
 #include "olap/rowset/segment_v2/index_query_context.h"
 #include "olap/rowset/segment_v2/inverted_index/analyzer/analyzer.h"
 #include "olap/rowset/segment_v2/inverted_index/query/query_helper.h"
+#include "olap/rowset/segment_v2/inverted_index/query_v2/all_query/all_query.h"
 #include 
"olap/rowset/segment_v2/inverted_index/query_v2/bit_set_query/bit_set_query.h"
 #include 
"olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/boolean_query_builder.h"
 #include 
"olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/operator.h"
@@ -48,6 +49,7 @@
 #include "olap/rowset/segment_v2/inverted_index/util/string_helper.h"
 #include "olap/rowset/segment_v2/inverted_index_iterator.h"
 #include "olap/rowset/segment_v2/inverted_index_reader.h"
+#include "util/string_util.h"
 #include "vec/columns/column_const.h"
 #include "vec/core/columns_with_type_and_name.h"
 #include "vec/data_types/data_type_string.h"
@@ -166,7 +168,14 @@ Status FieldReaderResolver::resolve(const std::string& 
field_name,
     resolved.query_type = query_type;
     resolved.inverted_reader = inverted_reader;
     resolved.lucene_reader = reader_holder;
-    resolved.index_properties = inverted_reader->get_index_properties();
+    // Prefer FE-provided index_properties (needed for variant subcolumn 
field_pattern matching)
+    auto fb_it = _field_binding_map.find(field_name);
+    if (fb_it != _field_binding_map.end() && 
fb_it->second->__isset.index_properties &&
+        !fb_it->second->index_properties.empty()) {
+        resolved.index_properties = fb_it->second->index_properties;
+    } else {
+        resolved.index_properties = inverted_reader->get_index_properties();
+    }
     resolved.binding_key = binding_key;
     resolved.analyzer_key =
             
normalize_analyzer_key(build_analyzer_key_from_properties(resolved.index_properties));
@@ -217,10 +226,22 @@ Status 
FunctionSearch::evaluate_inverted_index_with_search_param(
     FieldReaderResolver resolver(data_type_with_names, iterators, context,
                                  search_param.field_bindings);
 
+    // Extract default_operator from TSearchParam (default: "or")
+    std::string default_operator = "or";
+    if (search_param.__isset.default_operator && 
!search_param.default_operator.empty()) {
+        default_operator = search_param.default_operator;
+    }
+    // Extract minimum_should_match from TSearchParam (-1 means not set)
+    int32_t minimum_should_match = -1;
+    if (search_param.__isset.minimum_should_match) {
+        minimum_should_match = search_param.minimum_should_match;
+    }
+
     query_v2::QueryPtr root_query;
     std::string root_binding_key;
     RETURN_IF_ERROR(build_query_recursive(search_param.root, context, 
resolver, &root_query,
-                                          &root_binding_key));
+                                          &root_binding_key, default_operator,
+                                          minimum_should_match));
     if (root_query == nullptr) {
         LOG(INFO) << "search: Query tree resolved to empty query, dsl:"
                   << search_param.original_dsl;
@@ -429,7 +450,9 @@ Status FunctionSearch::build_query_recursive(const 
TSearchClause& clause,
                                              const 
std::shared_ptr<IndexQueryContext>& context,
                                              FieldReaderResolver& resolver,
                                              
inverted_index::query_v2::QueryPtr* out,
-                                             std::string* binding_key) const {
+                                             std::string* binding_key,
+                                             const std::string& 
default_operator,
+                                             int32_t minimum_should_match) 
const {
     DCHECK(out != nullptr);
     *out = nullptr;
     if (binding_key) {
@@ -438,6 +461,12 @@ Status FunctionSearch::build_query_recursive(const 
TSearchClause& clause,
 
     const std::string& clause_type = clause.clause_type;
 
+    // Handle MATCH_ALL_DOCS - matches all documents in the segment
+    if (clause_type == "MATCH_ALL_DOCS") {
+        *out = std::make_shared<query_v2::AllQuery>();
+        return Status::OK();
+    }
+
     // Handle OCCUR_BOOLEAN - Lucene-style boolean query with 
MUST/SHOULD/MUST_NOT
     if (clause_type == "OCCUR_BOOLEAN") {
         auto builder = 
segment_v2::inverted_index::query_v2::create_occur_boolean_query_builder();
@@ -452,7 +481,8 @@ Status FunctionSearch::build_query_recursive(const 
TSearchClause& clause,
                 query_v2::QueryPtr child_query;
                 std::string child_binding_key;
                 RETURN_IF_ERROR(build_query_recursive(child_clause, context, 
resolver, &child_query,
-                                                      &child_binding_key));
+                                                      &child_binding_key, 
default_operator,
+                                                      minimum_should_match));
 
                 // Determine occur type from child clause
                 query_v2::Occur occur = query_v2::Occur::MUST; // default
@@ -483,7 +513,8 @@ Status FunctionSearch::build_query_recursive(const 
TSearchClause& clause,
                 query_v2::QueryPtr child_query;
                 std::string child_binding_key;
                 RETURN_IF_ERROR(build_query_recursive(child_clause, context, 
resolver, &child_query,
-                                                      &child_binding_key));
+                                                      &child_binding_key, 
default_operator,
+                                                      minimum_should_match));
                 // Add all children including empty BitSetQuery
                 // BooleanQuery will handle the logic:
                 // - AND with empty bitmap → result is empty
@@ -497,14 +528,17 @@ Status FunctionSearch::build_query_recursive(const 
TSearchClause& clause,
         return Status::OK();
     }
 
-    return build_leaf_query(clause, context, resolver, out, binding_key);
+    return build_leaf_query(clause, context, resolver, out, binding_key, 
default_operator,
+                            minimum_should_match);
 }
 
 Status FunctionSearch::build_leaf_query(const TSearchClause& clause,
                                         const 
std::shared_ptr<IndexQueryContext>& context,
                                         FieldReaderResolver& resolver,
                                         inverted_index::query_v2::QueryPtr* 
out,
-                                        std::string* binding_key) const {
+                                        std::string* binding_key,
+                                        const std::string& default_operator,
+                                        int32_t minimum_should_match) const {
     DCHECK(out != nullptr);
     *out = nullptr;
     if (binding_key) {
@@ -576,7 +610,27 @@ Status FunctionSearch::build_leaf_query(const 
TSearchClause& clause,
                 return Status::OK();
             }
 
-            auto builder = 
create_operator_boolean_query_builder(query_v2::OperatorType::OP_OR);
+            // When minimum_should_match is specified, use OccurBooleanQuery
+            // ES behavior: msm only applies to SHOULD clauses
+            if (minimum_should_match > 0) {
+                auto builder =
+                        
segment_v2::inverted_index::query_v2::create_occur_boolean_query_builder();
+                builder->set_minimum_number_should_match(minimum_should_match);
+                query_v2::Occur occur = (default_operator == "and") ? 
query_v2::Occur::MUST
+                                                                    : 
query_v2::Occur::SHOULD;
+                for (const auto& term_info : term_infos) {
+                    std::wstring term_wstr = 
StringHelper::to_wstring(term_info.get_single_term());
+                    builder->add(make_term_query(term_wstr), occur);
+                }
+                *out = builder->build();
+                return Status::OK();
+            }
+
+            // Use default_operator to determine how to combine tokenized terms
+            query_v2::OperatorType op_type = (default_operator == "and")
+                                                     ? 
query_v2::OperatorType::OP_AND
+                                                     : 
query_v2::OperatorType::OP_OR;
+            auto builder = create_operator_boolean_query_builder(op_type);
             for (const auto& term_info : term_infos) {
                 std::wstring term_wstr = 
StringHelper::to_wstring(term_info.get_single_term());
                 builder->add(make_term_query(term_wstr), binding.binding_key);
@@ -716,20 +770,50 @@ Status FunctionSearch::build_leaf_query(const 
TSearchClause& clause,
             return Status::OK();
         }
         if (clause_type == "PREFIX") {
-            *out = std::make_shared<query_v2::WildcardQuery>(context, 
field_wstr, value);
+            // Apply lowercase only if:
+            // 1. There's a parser/analyzer (otherwise lower_case has no 
effect on indexing)
+            // 2. lower_case is explicitly set to "true"
+            bool has_parser = 
inverted_index::InvertedIndexAnalyzer::should_analyzer(
+                    binding.index_properties);
+            std::string lowercase_setting =
+                    
get_parser_lowercase_from_properties(binding.index_properties);
+            bool should_lowercase = has_parser && (lowercase_setting == 
INVERTED_INDEX_PARSER_TRUE);
+            std::string pattern = should_lowercase ? to_lower(value) : value;
+            *out = std::make_shared<query_v2::WildcardQuery>(context, 
field_wstr, pattern);
             VLOG_DEBUG << "search: PREFIX clause processed, field=" << 
field_name << ", pattern='"
-                       << value << "'";
+                       << pattern << "' (original='" << value << "', 
has_parser=" << has_parser
+                       << ", lower_case=" << lowercase_setting << ")";
             return Status::OK();
         }
 
         if (clause_type == "WILDCARD") {
-            *out = std::make_shared<query_v2::WildcardQuery>(context, 
field_wstr, value);
+            // Standalone wildcard "*" matches all non-null values for this 
field
+            // Consistent with ES query_string behavior where field:* becomes 
FieldExistsQuery
+            if (value == "*") {
+                *out = std::make_shared<query_v2::AllQuery>(field_wstr, true);
+                VLOG_DEBUG << "search: WILDCARD '*' converted to 
AllQuery(nullable=true), field="
+                           << field_name;
+                return Status::OK();
+            }
+            // Apply lowercase only if:
+            // 1. There's a parser/analyzer (otherwise lower_case has no 
effect on indexing)
+            // 2. lower_case is explicitly set to "true"
+            bool has_parser = 
inverted_index::InvertedIndexAnalyzer::should_analyzer(
+                    binding.index_properties);
+            std::string lowercase_setting =
+                    
get_parser_lowercase_from_properties(binding.index_properties);
+            bool should_lowercase = has_parser && (lowercase_setting == 
INVERTED_INDEX_PARSER_TRUE);
+            std::string pattern = should_lowercase ? to_lower(value) : value;
+            *out = std::make_shared<query_v2::WildcardQuery>(context, 
field_wstr, pattern);
             VLOG_DEBUG << "search: WILDCARD clause processed, field=" << 
field_name << ", pattern='"
-                       << value << "'";
+                       << pattern << "' (original='" << value << "', 
has_parser=" << has_parser
+                       << ", lower_case=" << lowercase_setting << ")";
             return Status::OK();
         }
 
         if (clause_type == "REGEXP") {
+            // ES-compatible: regex patterns are NOT lowercased 
(case-sensitive matching)
+            // This matches ES query_string behavior where regex patterns 
bypass analysis
             *out = std::make_shared<query_v2::RegexpQuery>(context, 
field_wstr, value);
             VLOG_DEBUG << "search: REGEXP clause processed, field=" << 
field_name << ", pattern='"
                        << value << "'";
diff --git a/be/src/vec/functions/function_search.h 
b/be/src/vec/functions/function_search.h
index 944f07dd1b6..d8b7c08fac6 100644
--- a/be/src/vec/functions/function_search.h
+++ b/be/src/vec/functions/function_search.h
@@ -64,11 +64,12 @@ public:
               _iterators(iterators),
               _context(std::move(context)),
               _field_bindings(field_bindings) {
-        // Build a lookup map for quick variant subcolumn checks
+        // Build lookup maps for quick access
         for (const auto& binding : _field_bindings) {
             if (binding.__isset.is_variant_subcolumn && 
binding.is_variant_subcolumn) {
                 _variant_subcolumn_fields.insert(binding.field_name);
             }
+            _field_binding_map[binding.field_name] = &binding;
         }
     }
 
@@ -114,6 +115,7 @@ private:
     const std::unordered_map<std::string, IndexIterator*>& _iterators;
     std::shared_ptr<IndexQueryContext> _context;
     std::vector<TSearchFieldBinding> _field_bindings;
+    std::unordered_map<std::string, const TSearchFieldBinding*> 
_field_binding_map;
     std::unordered_set<std::string> _variant_subcolumn_fields;
     std::unordered_map<std::string, FieldReaderBinding> _cache;
     std::vector<std::shared_ptr<lucene::index::IndexReader>> _readers;
@@ -182,13 +184,15 @@ public:
     Status build_query_recursive(const TSearchClause& clause,
                                  const std::shared_ptr<IndexQueryContext>& 
context,
                                  FieldReaderResolver& resolver,
-                                 inverted_index::query_v2::QueryPtr* out,
-                                 std::string* binding_key) const;
+                                 inverted_index::query_v2::QueryPtr* out, 
std::string* binding_key,
+                                 const std::string& default_operator,
+                                 int32_t minimum_should_match) const;
 
     Status build_leaf_query(const TSearchClause& clause,
                             const std::shared_ptr<IndexQueryContext>& context,
                             FieldReaderResolver& resolver, 
inverted_index::query_v2::QueryPtr* out,
-                            std::string* binding_key) const;
+                            std::string* binding_key, const std::string& 
default_operator,
+                            int32_t minimum_should_match) const;
 
     Status collect_all_field_nulls(const TSearchClause& clause,
                                    const std::unordered_map<std::string, 
IndexIterator*>& iterators,
diff --git 
a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/occur_boolean_query_real_index_test.cpp
 
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/occur_boolean_query_real_index_test.cpp
index 4fc01f43e1d..0088b88dd4d 100644
--- 
a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/occur_boolean_query_real_index_test.cpp
+++ 
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/occur_boolean_query_real_index_test.cpp
@@ -136,8 +136,7 @@ TEST_F(OccurBooleanQueryRealIndexTest, NotPhraseQuery) {
 
     auto phrase_query = std::make_shared<PhraseQuery>(context, field, 
term_infos);
 
-    uint32_t max_doc = reader_holder->maxDoc();
-    auto all_query = std::make_shared<AllQuery>(max_doc);
+    auto all_query = std::make_shared<AllQuery>();
 
     std::vector<std::pair<Occur, QueryPtr>> clauses;
     clauses.emplace_back(Occur::SHOULD, all_query);
@@ -255,8 +254,7 @@ TEST_F(OccurBooleanQueryRealIndexTest, 
NotPhraseQueryNonExistent) {
 
     auto phrase_query = std::make_shared<PhraseQuery>(context, field, 
term_infos);
 
-    uint32_t max_doc = reader_holder->maxDoc();
-    auto all_query = std::make_shared<AllQuery>(max_doc);
+    auto all_query = std::make_shared<AllQuery>();
 
     std::vector<std::pair<Occur, QueryPtr>> clauses;
     clauses.emplace_back(Occur::SHOULD, all_query);
@@ -309,8 +307,7 @@ TEST_F(OccurBooleanQueryRealIndexTest, 
NotPhraseQueryExcludesPartial) {
 
     auto phrase_query = std::make_shared<PhraseQuery>(context, field, 
term_infos);
 
-    uint32_t max_doc = reader_holder->maxDoc();
-    auto all_query = std::make_shared<AllQuery>(max_doc);
+    auto all_query = std::make_shared<AllQuery>();
 
     std::vector<std::pair<Occur, QueryPtr>> clauses;
     clauses.emplace_back(Occur::SHOULD, all_query);
diff --git 
a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/occur_boolean_query_test.cpp
 
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/occur_boolean_query_test.cpp
index 244ddfb8dcc..7d885ecce19 100644
--- 
a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/occur_boolean_query_test.cpp
+++ 
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/occur_boolean_query_test.cpp
@@ -874,7 +874,7 @@ TEST_F(OccurBooleanQueryTest, AllQueryWithMustClause) {
 
     std::vector<std::pair<Occur, QueryPtr>> clauses;
     clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs));
-    clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>(100));
+    clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>());
 
     OccurBooleanQuery query(std::move(clauses));
     auto weight = query.weight(false);
@@ -891,7 +891,7 @@ TEST_F(OccurBooleanQueryTest, AllQueryWithShouldClause) {
 
     std::vector<std::pair<Occur, QueryPtr>> clauses;
     clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should_docs));
-    clauses.emplace_back(Occur::SHOULD, std::make_shared<AllQuery>(50));
+    clauses.emplace_back(Occur::SHOULD, std::make_shared<AllQuery>());
 
     OccurBooleanQuery query(std::move(clauses));
     auto weight = query.weight(false);
@@ -909,7 +909,7 @@ TEST_F(OccurBooleanQueryTest, AllQueryWithMustNotClause) {
     auto must_not_docs = std::vector<uint32_t> {10, 20, 30, 40, 50};
 
     std::vector<std::pair<Occur, QueryPtr>> clauses;
-    clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>(100));
+    clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>());
     clauses.emplace_back(Occur::MUST_NOT, 
std::make_shared<MockQuery>(must_not_docs));
 
     OccurBooleanQuery query(std::move(clauses));
@@ -930,8 +930,8 @@ TEST_F(OccurBooleanQueryTest, MultipleAllQueriesWithMust) {
 
     std::vector<std::pair<Occur, QueryPtr>> clauses;
     clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs));
-    clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>(100));
-    clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>(100));
+    clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>());
+    clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>());
 
     OccurBooleanQuery query(std::move(clauses));
     auto weight = query.weight(false);
@@ -945,7 +945,7 @@ TEST_F(OccurBooleanQueryTest, AllQueryOnlyMust) {
     _ctx.segment_num_rows = 50;
 
     std::vector<std::pair<Occur, QueryPtr>> clauses;
-    clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>(50));
+    clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>());
 
     OccurBooleanQuery query(std::move(clauses));
     auto weight = query.weight(false);
@@ -967,7 +967,7 @@ TEST_F(OccurBooleanQueryTest, 
AllQueryWithMustAndShouldMinMatch) {
 
     std::vector<std::pair<Occur, QueryPtr>> clauses;
     clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs));
-    clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>(100));
+    clauses.emplace_back(Occur::MUST, std::make_shared<AllQuery>());
     clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should1_docs));
     clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should2_docs));
 
@@ -1014,7 +1014,7 @@ TEST_F(OccurBooleanQueryTest, 
ShouldOnlyWithAllQueryMinShouldMatch) {
 
     std::vector<std::pair<Occur, QueryPtr>> clauses;
     clauses.emplace_back(Occur::SHOULD, 
std::make_shared<MockQuery>(should_docs));
-    clauses.emplace_back(Occur::SHOULD, std::make_shared<AllQuery>(50));
+    clauses.emplace_back(Occur::SHOULD, std::make_shared<AllQuery>());
 
     OccurBooleanQuery query(std::move(clauses), 2);
     auto weight = query.weight(false);
@@ -1031,7 +1031,7 @@ TEST_F(OccurBooleanQueryTest, ShouldOnlyAllQueryScoring) {
     std::vector<std::pair<Occur, QueryPtr>> clauses;
     clauses.emplace_back(Occur::SHOULD,
                          std::make_shared<MockQuery>(std::vector<uint32_t> {1, 
2}, 2.0F));
-    clauses.emplace_back(Occur::SHOULD, std::make_shared<AllQuery>(10));
+    clauses.emplace_back(Occur::SHOULD, std::make_shared<AllQuery>());
 
     OccurBooleanQuery query(std::move(clauses));
     auto weight = query.weight(true);
diff --git 
a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/regexp_wildcard_lowercase_test.cpp
 
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/regexp_wildcard_lowercase_test.cpp
new file mode 100644
index 00000000000..f25ed8db8f0
--- /dev/null
+++ 
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/regexp_wildcard_lowercase_test.cpp
@@ -0,0 +1,228 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <roaring/roaring.hh>
+#include <string>
+#include <vector>
+
+#include "io/fs/local_file_system.h"
+#include "olap/rowset/segment_v2/index_query_context.h"
+#include "olap/rowset/segment_v2/inverted_index/analyzer/custom_analyzer.h"
+#include 
"olap/rowset/segment_v2/inverted_index/query_v2/regexp_query/regexp_query.h"
+#include 
"olap/rowset/segment_v2/inverted_index/query_v2/wildcard_query/wildcard_query.h"
+#include "olap/rowset/segment_v2/inverted_index/util/string_helper.h"
+
+CL_NS_USE(search)
+CL_NS_USE(store)
+CL_NS_USE(index)
+
+namespace doris::segment_v2 {
+
+using namespace inverted_index;
+using namespace inverted_index::query_v2;
+
+// Test that REGEXP queries match directly against the term dictionary (no 
lowercasing),
+// while WILDCARD queries are expected to receive already-lowercased patterns 
from function_search.cpp.
+//
+// This test creates an index with lowercased terms (simulating 
parser=english, lower_case=true)
+// and verifies:
+// 1. REGEXP with uppercase pattern does NOT match lowercased terms 
(ES-compatible)
+// 2. REGEXP with lowercase pattern DOES match lowercased terms
+// 3. WILDCARD with lowercase pattern DOES match lowercased terms
+class RegexpWildcardLowercaseTest : public testing::Test {
+public:
+    const std::string kTestDir = "./ut_dir/regexp_wildcard_lowercase_test";
+
+    void SetUp() override {
+        auto st = io::global_local_filesystem()->delete_directory(kTestDir);
+        ASSERT_TRUE(st.ok()) << st;
+        st = io::global_local_filesystem()->create_directory(kTestDir);
+        ASSERT_TRUE(st.ok()) << st;
+        // Create index with lowercased terms (simulating lower_case=true 
analyzer)
+        create_test_index("title", kTestDir);
+    }
+
+    void TearDown() override {
+        
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kTestDir).ok());
+    }
+
+private:
+    void create_test_index(const std::string& field_name, const std::string& 
dir) {
+        // Simulate data that was indexed with lower_case=true:
+        // Original data: "ABC DEF", "abc def", "Apple Banana", "cherry date"
+        // After english analyzer with lower_case=true, terms are all lowercase
+        std::vector<std::string> test_data = {"abc def", "abc def", "apple 
banana", "cherry date"};
+
+        // Use standard tokenizer (which lowercases by default)
+        CustomAnalyzerConfig::Builder builder;
+        builder.with_tokenizer_config("standard", {});
+        auto custom_analyzer_config = builder.build();
+        auto custom_analyzer = 
CustomAnalyzer::build_custom_analyzer(custom_analyzer_config);
+
+        auto* indexwriter =
+                _CLNEW lucene::index::IndexWriter(dir.c_str(), 
custom_analyzer.get(), true);
+        indexwriter->setMaxBufferedDocs(100);
+        indexwriter->setRAMBufferSizeMB(-1);
+        indexwriter->setMaxFieldLength(0x7FFFFFFFL);
+        indexwriter->setMergeFactor(1000000000);
+        indexwriter->setUseCompoundFile(false);
+
+        auto char_string_reader = 
std::make_shared<lucene::util::SStringReader<char>>();
+
+        auto* doc = _CLNEW lucene::document::Document();
+        int32_t field_config = lucene::document::Field::STORE_NO;
+        field_config |= lucene::document::Field::INDEX_NONORMS;
+        field_config |= lucene::document::Field::INDEX_TOKENIZED;
+        auto field_name_w = std::wstring(field_name.begin(), field_name.end());
+        auto* field = _CLNEW lucene::document::Field(field_name_w.c_str(), 
field_config);
+        field->setOmitTermFreqAndPositions(false);
+        doc->add(*field);
+
+        for (const auto& data : test_data) {
+            char_string_reader->init(data.data(), data.size(), false);
+            auto* stream = custom_analyzer->reusableTokenStream(field->name(), 
char_string_reader);
+            field->setValue(stream);
+            indexwriter->addDocument(doc);
+        }
+
+        indexwriter->close();
+        _CLLDELETE(indexwriter);
+        _CLLDELETE(doc);
+    }
+};
+
+static std::shared_ptr<lucene::index::IndexReader> make_shared_reader(
+        lucene::index::IndexReader* raw_reader) {
+    return {raw_reader, [](lucene::index::IndexReader* reader) {
+                if (reader != nullptr) {
+                    reader->close();
+                    _CLDELETE(reader);
+                }
+            }};
+}
+
+static std::vector<uint32_t> execute_query(const std::string& test_dir, const 
std::wstring& field,
+                                           const std::shared_ptr<Query>& 
query) {
+    auto* dir = FSDirectory::getDirectory(test_dir.c_str());
+    auto reader_holder = 
make_shared_reader(lucene::index::IndexReader::open(dir, true));
+
+    auto weight = query->weight(false);
+
+    QueryExecutionContext exec_ctx;
+    exec_ctx.segment_num_rows = reader_holder->maxDoc();
+    exec_ctx.readers = {reader_holder};
+    exec_ctx.field_reader_bindings.emplace(field, reader_holder);
+
+    auto scorer = weight->scorer(exec_ctx);
+    std::vector<uint32_t> matched_docs;
+    if (scorer) {
+        uint32_t doc = scorer->doc();
+        while (doc != TERMINATED) {
+            matched_docs.push_back(doc);
+            doc = scorer->advance();
+        }
+    }
+
+    _CLDECDELETE(dir);
+    return matched_docs;
+}
+
+// REGEXP with uppercase pattern should NOT match lowercased index terms.
+// This is consistent with ES query_string regex behavior.
+TEST_F(RegexpWildcardLowercaseTest, RegexpUppercasePatternNoMatch) {
+    auto context = std::make_shared<IndexQueryContext>();
+    std::wstring field = StringHelper::to_wstring("title");
+
+    // Pattern "AB.*" should NOT match "abc" (uppercase vs lowercase)
+    auto query = std::make_shared<RegexpQuery>(context, field, "AB.*");
+    auto matched = execute_query(kTestDir, field, query);
+
+    EXPECT_EQ(matched.size(), 0)
+            << "Uppercase regex 'AB.*' should not match lowercased terms 
'abc'";
+}
+
+// REGEXP with lowercase pattern SHOULD match lowercased index terms.
+TEST_F(RegexpWildcardLowercaseTest, RegexpLowercasePatternMatches) {
+    auto context = std::make_shared<IndexQueryContext>();
+    std::wstring field = StringHelper::to_wstring("title");
+
+    // Pattern "ab.*" should match "abc" (both lowercase)
+    auto query = std::make_shared<RegexpQuery>(context, field, "ab.*");
+    auto matched = execute_query(kTestDir, field, query);
+
+    // Docs 0 and 1 contain "abc", docs 2 and 3 don't
+    EXPECT_EQ(matched.size(), 2) << "Lowercase regex 'ab.*' should match 
lowercased terms 'abc'";
+}
+
+// WILDCARD with lowercase pattern SHOULD match.
+// In function_search.cpp, WILDCARD patterns are lowercased before being 
passed here.
+TEST_F(RegexpWildcardLowercaseTest, WildcardLowercasePatternMatches) {
+    auto context = std::make_shared<IndexQueryContext>();
+    std::wstring field = StringHelper::to_wstring("title");
+
+    // Pattern "ab*" (already lowercased by function_search.cpp) should match 
"abc"
+    auto query = std::make_shared<WildcardQuery>(context, field, "ab*");
+    auto matched = execute_query(kTestDir, field, query);
+
+    EXPECT_EQ(matched.size(), 2) << "Lowercase wildcard 'ab*' should match 
lowercased terms 'abc'";
+}
+
+// WILDCARD with uppercase pattern should NOT match lowercased index terms
+// (but in practice, function_search.cpp lowercases before passing to 
WildcardQuery).
+TEST_F(RegexpWildcardLowercaseTest, WildcardUppercasePatternNoMatch) {
+    auto context = std::make_shared<IndexQueryContext>();
+    std::wstring field = StringHelper::to_wstring("title");
+
+    // Pattern "AB*" should NOT match "abc" at the WildcardQuery level
+    auto query = std::make_shared<WildcardQuery>(context, field, "AB*");
+    auto matched = execute_query(kTestDir, field, query);
+
+    EXPECT_EQ(matched.size(), 0) << "Uppercase wildcard 'AB*' should not match 
lowercased terms";
+}
+
+// REGEXP with a more complex pattern
+TEST_F(RegexpWildcardLowercaseTest, RegexpComplexPatternMatches) {
+    auto context = std::make_shared<IndexQueryContext>();
+    std::wstring field = StringHelper::to_wstring("title");
+
+    // Pattern "ch.*y" should match "cherry" (lowercased)
+    auto query = std::make_shared<RegexpQuery>(context, field, "ch.*y");
+    auto matched = execute_query(kTestDir, field, query);
+
+    EXPECT_EQ(matched.size(), 1) << "Regex 'ch.*y' should match 'cherry' in 
doc 3";
+    if (!matched.empty()) {
+        EXPECT_EQ(matched[0], 3);
+    }
+}
+
+// WILDCARD matching all terms with '*'
+TEST_F(RegexpWildcardLowercaseTest, WildcardStarMatchesAll) {
+    auto context = std::make_shared<IndexQueryContext>();
+    std::wstring field = StringHelper::to_wstring("title");
+
+    // Pattern "a*" should match "abc" and "apple"
+    auto query = std::make_shared<WildcardQuery>(context, field, "a*");
+    auto matched = execute_query(kTestDir, field, query);
+
+    // Docs 0,1 have "abc", doc 2 has "apple", doc 3 has no "a*" terms
+    EXPECT_EQ(matched.size(), 3) << "Wildcard 'a*' should match docs with 
'abc' and 'apple'";
+}
+
+} // namespace doris::segment_v2
diff --git a/be/test/vec/function/function_search_test.cpp 
b/be/test/vec/function/function_search_test.cpp
index 64b64b0d667..4daa48f662a 100644
--- a/be/test/vec/function/function_search_test.cpp
+++ b/be/test/vec/function/function_search_test.cpp
@@ -1716,8 +1716,8 @@ TEST_F(FunctionSearchTest, TestBuildLeafQueryPhrase) {
 
     inverted_index::query_v2::QueryPtr out;
     std::string out_binding_key;
-    Status st =
-            function_search->build_leaf_query(clause, context, resolver, &out, 
&out_binding_key);
+    Status st = function_search->build_leaf_query(clause, context, resolver, 
&out, &out_binding_key,
+                                                  "OR", 0);
     EXPECT_TRUE(st.ok());
 
     auto phrase_query = 
std::dynamic_pointer_cast<inverted_index::query_v2::PhraseQuery>(out);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java
index b53386206e9..2cd1035e298 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SearchPredicate.java
@@ -17,6 +17,7 @@
 
 package org.apache.doris.analysis;
 
+import org.apache.doris.catalog.Index;
 import org.apache.doris.catalog.Type;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.SearchDslParser;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.SearchDslParser.QsPlan;
@@ -33,7 +34,9 @@ import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
+import java.util.Map;
 import java.util.stream.IntStream;
 
 /**
@@ -45,11 +48,18 @@ public class SearchPredicate extends Predicate {
 
     private final String dslString;
     private final QsPlan qsPlan;
+    private final List<Index> fieldIndexes;
 
     public SearchPredicate(String dslString, QsPlan qsPlan, List<Expr> 
children, boolean nullable) {
+        this(dslString, qsPlan, children, Collections.emptyList(), nullable);
+    }
+
+    public SearchPredicate(String dslString, QsPlan qsPlan, List<Expr> 
children,
+            List<Index> fieldIndexes, boolean nullable) {
         super();
         this.dslString = dslString;
         this.qsPlan = qsPlan;
+        this.fieldIndexes = fieldIndexes != null ? fieldIndexes : 
Collections.emptyList();
         this.type = Type.BOOLEAN;
 
         // Add children (SlotReferences)
@@ -63,6 +73,7 @@ public class SearchPredicate extends Predicate {
         super(other);
         this.dslString = other.dslString;
         this.qsPlan = other.qsPlan;
+        this.fieldIndexes = other.fieldIndexes;
     }
 
     @Override
@@ -183,10 +194,30 @@ public class SearchPredicate extends Predicate {
                 thriftBinding.setSlotIndex(i); // fallback to position
             }
 
+            // Set index properties from FE Index lookup (needed for variant 
subcolumn analyzer)
+            if (i < fieldIndexes.size() && fieldIndexes.get(i) != null) {
+                Map<String, String> properties = 
fieldIndexes.get(i).getProperties();
+                if (properties != null && !properties.isEmpty()) {
+                    thriftBinding.setIndexProperties(properties);
+                    LOG.debug("buildThriftParam: field='{}' 
index_properties={}",
+                            fieldPath, properties);
+                }
+            }
+
             bindings.add(thriftBinding);
         }
         param.setFieldBindings(bindings);
 
+        // Set default_operator for BE to use when tokenizing TERM queries
+        if (qsPlan.getDefaultOperator() != null) {
+            param.setDefaultOperator(qsPlan.getDefaultOperator());
+        }
+
+        // Set minimum_should_match for BE to use when tokenizing TERM queries 
in Lucene mode
+        if (qsPlan.getMinimumShouldMatch() != null) {
+            param.setMinimumShouldMatch(qsPlan.getMinimumShouldMatch());
+        }
+
         return param;
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java
index a437e315371..c489576c7eb 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/ExpressionTranslator.java
@@ -656,16 +656,32 @@ public class ExpressionTranslator extends 
DefaultExpressionVisitor<Expr, PlanTra
     public Expr visitSearchExpression(SearchExpression searchExpression,
             PlanTranslatorContext context) {
         List<Expr> slotChildren = new ArrayList<>();
+        List<Index> fieldIndexes = new ArrayList<>();
 
         // Convert slot reference children from Nereids to Analysis
         for (Expression slotExpr : searchExpression.getSlotChildren()) {
             Expr translatedSlot = slotExpr.accept(this, context);
             slotChildren.add(translatedSlot);
+
+            // Look up the inverted index for each field (needed for variant 
subcolumn analyzer)
+            Index invertedIndex = null;
+            if (slotExpr instanceof SlotReference) {
+                SlotReference slot = (SlotReference) slotExpr;
+                OlapTable olapTbl = getOlapTableDirectly(slot);
+                if (olapTbl != null) {
+                    Column column = slot.getOriginalColumn().orElse(null);
+                    if (column != null) {
+                        invertedIndex = olapTbl.getInvertedIndex(column, 
slot.getSubPath());
+                    }
+                }
+            }
+            fieldIndexes.add(invertedIndex);
         }
 
         // Create SearchPredicate with proper slot children for BE "action on 
slot" detection
         SearchPredicate searchPredicate = new 
SearchPredicate(searchExpression.getDslString(),
-                searchExpression.getQsPlan(), slotChildren, 
searchExpression.nullable());
+                searchExpression.getQsPlan(), slotChildren, fieldIndexes,
+                searchExpression.nullable());
         return searchPredicate;
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
index 61e253d710e..fbaba2b6e5d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
@@ -22,6 +22,7 @@ import org.apache.doris.nereids.search.SearchParser;
 import org.apache.doris.nereids.search.SearchParserBaseVisitor;
 
 import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.annotation.JsonSetter;
 import com.fasterxml.jackson.core.JsonProcessingException;
@@ -110,23 +111,30 @@ public class SearchDslParser {
         String defaultField = searchOptions.getDefaultField();
         String defaultOperator = searchOptions.getDefaultOperator();
 
+        QsPlan plan;
         // Use Lucene mode parser if specified
         if (searchOptions.isLuceneMode()) {
             // Multi-field + Lucene mode: first expand DSL, then parse with 
Lucene semantics
             if (searchOptions.isMultiFieldMode()) {
-                return parseDslMultiFieldLuceneMode(dsl, 
searchOptions.getFields(),
+                plan = parseDslMultiFieldLuceneMode(dsl, 
searchOptions.getFields(),
                         defaultOperator, searchOptions);
-            }
-            return parseDslLuceneMode(dsl, defaultField, defaultOperator, 
searchOptions);
-        }
-
-        // Multi-field mode parsing (standard mode)
-        if (searchOptions.isMultiFieldMode()) {
-            return parseDslMultiFieldMode(dsl, searchOptions.getFields(), 
defaultOperator, searchOptions);
-        }
-
-        // Standard mode parsing
-        return parseDslStandardMode(dsl, defaultField, defaultOperator);
+            } else {
+                plan = parseDslLuceneMode(dsl, defaultField, defaultOperator, 
searchOptions);
+            }
+        } else if (searchOptions.isMultiFieldMode()) {
+            // Multi-field mode parsing (standard mode)
+            plan = parseDslMultiFieldMode(dsl, searchOptions.getFields(), 
defaultOperator, searchOptions);
+        } else {
+            // Standard mode parsing
+            plan = parseDslStandardMode(dsl, defaultField, defaultOperator);
+        }
+
+        // Wrap plan with options for BE serialization
+        // NOTE: Must use normalizeDefaultOperator() here because BE compares
+        // default_operator case-sensitively against lowercase "and"/"or"
+        return new QsPlan(plan.getRoot(), plan.getFieldBindings(),
+                normalizeDefaultOperator(searchOptions.getDefaultOperator()),
+                searchOptions.getMinimumShouldMatch());
     }
 
     /**
@@ -480,6 +488,12 @@ public class SearchDslParser {
         }
         validateFieldsList(fields);
 
+        // For multi-field mode (fields.size() > 1), ignore 
minimum_should_match.
+        // The expanded DSL creates complex nested boolean structures where msm
+        // semantics become ambiguous. This is a deliberate design decision.
+        final SearchOptions effectiveOptions = fields.size() > 1
+                ? options.withMinimumShouldMatch(null) : options;
+
         String trimmedDsl = dsl.trim();
 
         try {
@@ -507,22 +521,15 @@ public class SearchDslParser {
 
             // Build AST using Lucene-mode visitor with first field as 
placeholder for bare queries
             // Use constructor with override to avoid mutating shared options 
object (thread-safety)
-            QsLuceneModeAstBuilder visitor = new 
QsLuceneModeAstBuilder(options, fields.get(0));
+            QsLuceneModeAstBuilder visitor = new 
QsLuceneModeAstBuilder(effectiveOptions, fields.get(0));
             QsNode root = visitor.visit(tree);
 
-            // Apply multi-field expansion based on type
-            // Pass luceneMode=true since this is Lucene mode parsing
-            QsNode expandedRoot;
-            if (options.isCrossFieldsMode()) {
-                // cross_fields: each term expands to 
OCCUR_BOOLEAN(field1:term, field2:term)
-                expandedRoot = MultiFieldExpander.expandCrossFields(root, 
fields, true);
-            } else if (options.isBestFieldsMode()) {
-                // best_fields: entire query copied per field, joined with 
OCCUR_BOOLEAN
-                expandedRoot = MultiFieldExpander.expandBestFields(root, 
fields, true);
-            } else {
-                throw new IllegalStateException(
-                        "Invalid type value: '" + options.getType() + "'. 
Expected 'best_fields' or 'cross_fields'");
-            }
+            // In ES query_string, both best_fields and cross_fields use 
per-clause expansion
+            // (each clause is independently expanded across fields). The 
difference is only
+            // in scoring (dis_max vs blended analysis), which doesn't apply 
to Doris since
+            // search() is a boolean filter. So we always use 
expandCrossFields here.
+            // Type validation already happened in SearchOptions.setType().
+            QsNode expandedRoot = MultiFieldExpander.expandCrossFields(root, 
fields, true);
 
             // Extract field bindings from expanded AST
             Set<String> fieldNames = collectFieldNames(expandedRoot);
@@ -532,7 +539,10 @@ public class SearchDslParser {
                 bindings.add(new QsFieldBinding(fieldName, slotIndex++));
             }
 
-            return new QsPlan(expandedRoot, bindings);
+            // Include default_operator and minimum_should_match for BE
+            return new QsPlan(expandedRoot, bindings,
+                    
normalizeDefaultOperator(effectiveOptions.getDefaultOperator()),
+                    effectiveOptions.getMinimumShouldMatch());
 
         } catch (SearchDslSyntaxException e) {
             LOG.error("Failed to parse search DSL in multi-field Lucene mode: 
'{}'", dsl, e);
@@ -560,7 +570,8 @@ public class SearchDslParser {
         AND,            // clause1 AND clause2 (standard boolean algebra)
         OR,             // clause1 OR clause2 (standard boolean algebra)
         NOT,            // NOT clause (standard boolean algebra)
-        OCCUR_BOOLEAN   // Lucene-style boolean query with MUST/SHOULD/MUST_NOT
+        OCCUR_BOOLEAN,  // Lucene-style boolean query with MUST/SHOULD/MUST_NOT
+        MATCH_ALL_DOCS  // Matches all documents (used for pure NOT query 
rewriting)
     }
 
     /**
@@ -816,6 +827,8 @@ public class SearchDslParser {
                 if (result == null) {
                     throw new RuntimeException("Invalid search value");
                 }
+                // Mark as explicit field - user wrote "field:term" syntax
+                result.setExplicitField(true);
                 return result;
             } finally {
                 // Restore previous context
@@ -875,6 +888,10 @@ public class SearchDslParser {
         }
 
         private QsNode createPrefixNode(String fieldName, String value) {
+            // Standalone * → MATCH_ALL_DOCS (matches ES behavior: field:* 
becomes ExistsQuery)
+            if ("*".equals(value)) {
+                return new QsNode(QsClauseType.MATCH_ALL_DOCS, (List<QsNode>) 
null);
+            }
             return new QsNode(QsClauseType.PREFIX, fieldName, 
unescapeTermValue(value));
         }
 
@@ -996,11 +1013,28 @@ public class SearchDslParser {
         @JsonProperty("fieldBindings")
         private final List<QsFieldBinding> fieldBindings;
 
+        @JsonProperty("defaultOperator")
+        private final String defaultOperator;
+
+        @JsonProperty("minimumShouldMatch")
+        private final Integer minimumShouldMatch;
+
         @JsonCreator
         public QsPlan(@JsonProperty("root") QsNode root,
                 @JsonProperty("fieldBindings") List<QsFieldBinding> 
fieldBindings) {
+            this(root, fieldBindings, null, null);
+        }
+
+        public QsPlan(QsNode root, List<QsFieldBinding> fieldBindings, String 
defaultOperator) {
+            this(root, fieldBindings, defaultOperator, null);
+        }
+
+        public QsPlan(QsNode root, List<QsFieldBinding> fieldBindings, String 
defaultOperator,
+                Integer minimumShouldMatch) {
             this.root = Objects.requireNonNull(root, "root cannot be null");
             this.fieldBindings = fieldBindings != null ? new 
ArrayList<>(fieldBindings) : new ArrayList<>();
+            this.defaultOperator = defaultOperator;
+            this.minimumShouldMatch = minimumShouldMatch;
         }
 
         public QsNode getRoot() {
@@ -1011,6 +1045,14 @@ public class SearchDslParser {
             return Collections.unmodifiableList(fieldBindings);
         }
 
+        public String getDefaultOperator() {
+            return defaultOperator;
+        }
+
+        public Integer getMinimumShouldMatch() {
+            return minimumShouldMatch;
+        }
+
         /**
          * Parse QsPlan from JSON string
          */
@@ -1036,7 +1078,7 @@ public class SearchDslParser {
 
         @Override
         public int hashCode() {
-            return Objects.hash(root, fieldBindings);
+            return Objects.hash(root, fieldBindings, defaultOperator, 
minimumShouldMatch);
         }
 
         @Override
@@ -1049,7 +1091,9 @@ public class SearchDslParser {
             }
             QsPlan qsPlan = (QsPlan) o;
             return Objects.equals(root, qsPlan.getRoot())
-                    && Objects.equals(fieldBindings, 
qsPlan.getFieldBindings());
+                    && Objects.equals(fieldBindings, qsPlan.getFieldBindings())
+                    && Objects.equals(defaultOperator, 
qsPlan.getDefaultOperator())
+                    && Objects.equals(minimumShouldMatch, 
qsPlan.getMinimumShouldMatch());
         }
     }
 
@@ -1081,6 +1125,15 @@ public class SearchDslParser {
         @JsonProperty("minimumShouldMatch")
         private final Integer minimumShouldMatch;
 
+        /**
+         * Whether the field was explicitly specified in the DSL syntax (e.g., 
title:music)
+         * vs assigned from default field for bare queries (e.g., music).
+         * Used internally by MultiFieldExpander to avoid expanding explicit 
field prefixes.
+         * Not serialized to JSON since it's only needed during FE-side AST 
expansion.
+         */
+        @JsonIgnore
+        private boolean explicitField;
+
         /**
          * Constructor for JSON deserialization
          *
@@ -1185,6 +1238,23 @@ public class SearchDslParser {
             return minimumShouldMatch;
         }
 
+        /**
+         * Returns whether the field was explicitly specified in the DSL 
syntax.
+         */
+        public boolean isExplicitField() {
+            return explicitField;
+        }
+
+        /**
+         * Sets whether the field was explicitly specified in the DSL syntax.
+         * @param explicitField true if field was explicitly specified (e.g., 
title:music)
+         * @return this node for method chaining
+         */
+        public QsNode setExplicitField(boolean explicitField) {
+            this.explicitField = explicitField;
+            return this;
+        }
+
         /**
          * Sets the occur type for this node.
          * @param occur the occur type (MUST, SHOULD, MUST_NOT)
@@ -1319,51 +1389,23 @@ public class SearchDslParser {
          * @return Expanded AST
          */
         public static QsNode expandBestFields(QsNode root, List<String> 
fields) {
-            return expandBestFields(root, fields, false);
-        }
-
-        /**
-         * Expand AST using best_fields strategy with optional Lucene mode.
-         * @param root The AST root node
-         * @param fields List of fields to expand across
-         * @param luceneMode If true, use Lucene-style OCCUR_BOOLEAN; if 
false, use standard OR
-         */
-        public static QsNode expandBestFields(QsNode root, List<String> 
fields, boolean luceneMode) {
             if (fields == null || fields.isEmpty()) {
                 return root;
             }
             if (fields.size() == 1) {
-                // Single field - just set the field on all leaf nodes
                 return setFieldOnLeaves(root, fields.get(0), fields);
             }
 
-            // Use the explicit luceneMode parameter only - don't infer from 
node properties
-            boolean isLuceneMode = luceneMode;
-
-            // Create a copy of the entire AST for each field
+            // Non-lucene mode (used by parseDslMultiFieldMode for multi_match 
semantics):
+            // Copy entire AST per field, join with OR.
+            // Example: "hello AND world" with fields=[title,content] becomes
+            //   (title:hello AND title:world) OR (content:hello AND 
content:world)
             List<QsNode> fieldTrees = new ArrayList<>();
             for (String field : fields) {
                 QsNode copy = deepCopyWithField(root, field, fields);
-                // In Lucene mode, set SHOULD on each field tree
-                if (isLuceneMode) {
-                    copy.setOccur(QsOccur.SHOULD);
-                }
                 fieldTrees.add(copy);
             }
-
-            // In Lucene mode, create OCCUR_BOOLEAN instead of OR
-            if (isLuceneMode) {
-                // Preserve minimum_should_match from root if it has one
-                Integer minShouldMatch = root.getMinimumShouldMatch();
-                if (minShouldMatch == null) {
-                    // Default: at least 1 field should match
-                    minShouldMatch = 1;
-                }
-                return new QsNode(QsClauseType.OCCUR_BOOLEAN, fieldTrees, 
minShouldMatch);
-            } else {
-                // Standard mode: join with OR
-                return new QsNode(QsClauseType.OR, fieldTrees);
-            }
+            return new QsNode(QsClauseType.OR, fieldTrees);
         }
 
         /**
@@ -1371,13 +1413,15 @@ public class SearchDslParser {
          * Always returns a new copy or new node structure, never the original 
node.
          */
         private static QsNode expandNodeCrossFields(QsNode node, List<String> 
fields, boolean luceneMode) {
+            // MATCH_ALL_DOCS matches all documents regardless of field - 
don't expand
+            if (node.getType() == QsClauseType.MATCH_ALL_DOCS) {
+                return new QsNode(QsClauseType.MATCH_ALL_DOCS, (List<QsNode>) 
null);
+            }
+
             // Check if this is a leaf node (no children)
             if (isLeafNode(node)) {
-                // Check if the node has an explicit field that's NOT in the 
fields list
-                // If so, don't expand but still return a copy
-                String nodeField = node.getField();
-                if (nodeField != null && !nodeField.isEmpty() && 
!fields.contains(nodeField)) {
-                    // Explicit field not in expansion list - return a copy 
preserving all fields
+                // If the user explicitly wrote "field:term" syntax, respect 
it - don't expand
+                if (node.isExplicitField()) {
                     return new QsNode(
                             node.getType(),
                             node.getField(),
@@ -1450,17 +1494,13 @@ public class SearchDslParser {
          * Always returns a new copy, never the original node.
          */
         private static QsNode deepCopyWithField(QsNode node, String field, 
List<String> fields) {
+            // MATCH_ALL_DOCS matches all documents regardless of field - 
don't set field
+            if (node.getType() == QsClauseType.MATCH_ALL_DOCS) {
+                return new QsNode(QsClauseType.MATCH_ALL_DOCS, (List<QsNode>) 
null);
+            }
             if (isLeafNode(node)) {
-                // Check if the node has an explicit field that's NOT in the 
fields list
-                String nodeField = node.getField();
-                String targetField;
-                if (nodeField != null && !nodeField.isEmpty() && 
!fields.contains(nodeField)) {
-                    // Explicit field not in expansion list - preserve 
original field
-                    targetField = nodeField;
-                } else {
-                    // Use new field
-                    targetField = field;
-                }
+                // If the user explicitly wrote "field:term" syntax, preserve 
original field
+                String targetField = node.isExplicitField() ? node.getField() 
: field;
 
                 // Create a complete copy of the leaf node
                 QsNode copy = new QsNode(
@@ -1471,6 +1511,7 @@ public class SearchDslParser {
                         node.getOccur(),
                         node.getMinimumShouldMatch()
                 );
+                copy.setExplicitField(node.isExplicitField());
                 return copy;
             }
 
@@ -1500,16 +1541,13 @@ public class SearchDslParser {
          * Always returns a new copy, never the original node.
          */
         private static QsNode setFieldOnLeaves(QsNode node, String field, 
List<String> fields) {
+            // MATCH_ALL_DOCS matches all documents regardless of field - 
don't set field
+            if (node.getType() == QsClauseType.MATCH_ALL_DOCS) {
+                return new QsNode(QsClauseType.MATCH_ALL_DOCS, (List<QsNode>) 
null);
+            }
             if (isLeafNode(node)) {
-                // Check if the node has an explicit field that's NOT in the 
fields list
-                String nodeField = node.getField();
-                String targetField;
-                if (nodeField != null && !nodeField.isEmpty() && 
!fields.contains(nodeField)) {
-                    // Explicit field not in expansion list - preserve 
original field
-                    targetField = nodeField;
-                } else {
-                    targetField = field;
-                }
+                // If the user explicitly wrote "field:term" syntax, preserve 
original field
+                String targetField = node.isExplicitField() ? node.getField() 
: field;
 
                 // Create complete copy
                 return new QsNode(
@@ -1676,6 +1714,21 @@ public class SearchDslParser {
             return "cross_fields".equals(type);
         }
 
+        /**
+         * Create a copy of this SearchOptions with a different 
minimum_should_match value.
+         * Used for ES compatibility in multi-field mode where msm is ignored.
+         */
+        public SearchOptions withMinimumShouldMatch(Integer newMsm) {
+            SearchOptions copy = new SearchOptions();
+            copy.defaultField = this.defaultField;
+            copy.defaultOperator = this.defaultOperator;
+            copy.mode = this.mode;
+            copy.minimumShouldMatch = newMsm;
+            copy.fields = this.fields != null ? new ArrayList<>(this.fields) : 
null;
+            copy.type = this.type;
+            return copy;
+        }
+
         /**
          * Validate the options after deserialization.
          * Checks for:
@@ -1793,7 +1846,10 @@ public class SearchDslParser {
                 bindings.add(new QsFieldBinding(fieldName, slotIndex++));
             }
 
-            return new QsPlan(root, bindings);
+            // Include default_operator and minimum_should_match for BE
+            return new QsPlan(root, bindings,
+                    normalizeDefaultOperator(defaultOperator),
+                    options.getMinimumShouldMatch());
 
         } catch (SearchDslSyntaxException e) {
             // Syntax error in DSL - user input issue
@@ -1831,6 +1887,7 @@ public class SearchDslParser {
         private String currentFieldName = null;
         // Override for default field - used in multi-field mode to avoid 
mutating options
         private final String overrideDefaultField;
+        private int nestingLevel = 0;
 
         public QsLuceneModeAstBuilder(SearchOptions options) {
             this.options = options;
@@ -1894,11 +1951,17 @@ public class SearchDslParser {
             if (terms.size() == 1) {
                 TermWithOccur singleTerm = terms.get(0);
                 if (singleTerm.isNegated) {
-                    // Single negated term - must wrap in OCCUR_BOOLEAN for BE 
to handle MUST_NOT
+                    // Single negated term - rewrite to: 
SHOULD(MATCH_ALL_DOCS) + MUST_NOT(term)
+                    // This ensures proper Lucene semantics: match all docs 
EXCEPT those matching the term
                     singleTerm.node.setOccur(QsOccur.MUST_NOT);
+
+                    QsNode matchAllNode = new 
QsNode(QsClauseType.MATCH_ALL_DOCS, (List<QsNode>) null);
+                    matchAllNode.setOccur(QsOccur.SHOULD);
+
                     List<QsNode> children = new ArrayList<>();
+                    children.add(matchAllNode);
                     children.add(singleTerm.node);
-                    return new QsNode(QsClauseType.OCCUR_BOOLEAN, children, 0);
+                    return new QsNode(QsClauseType.OCCUR_BOOLEAN, children, 1);
                 }
                 // Single non-negated term - return directly without wrapper
                 return singleTerm.node;
@@ -1908,37 +1971,32 @@ public class SearchDslParser {
             applyLuceneBooleanLogic(terms);
 
             // Determine minimum_should_match
-            Integer minShouldMatch = options.getMinimumShouldMatch();
+            // Only use explicit option at top level; nested clauses use 
default logic
+            Integer minShouldMatch = (nestingLevel == 0) ? 
options.getMinimumShouldMatch() : null;
             if (minShouldMatch == null) {
                 // Default: 0 if there are MUST clauses, 1 if only SHOULD
+                // This matches Lucene BooleanQuery default behavior
                 boolean hasMust = terms.stream().anyMatch(t -> t.occur == 
QsOccur.MUST);
                 boolean hasMustNot = terms.stream().anyMatch(t -> t.occur == 
QsOccur.MUST_NOT);
                 minShouldMatch = (hasMust || hasMustNot) ? 0 : 1;
             }
 
-            // Filter out SHOULD clauses if minimum_should_match=0 and there 
are MUST clauses
             final int finalMinShouldMatch = minShouldMatch;
-            if (minShouldMatch == 0) {
-                boolean hasMust = terms.stream().anyMatch(t -> t.occur == 
QsOccur.MUST);
-                if (hasMust) {
-                    terms = terms.stream()
-                            .filter(t -> t.occur != QsOccur.SHOULD)
-                            .collect(Collectors.toList());
-                }
-            }
-
-            if (terms.isEmpty()) {
-                throw new RuntimeException("All terms filtered out in Lucene 
boolean logic");
-            }
 
             if (terms.size() == 1) {
                 TermWithOccur remainingTerm = terms.get(0);
                 if (remainingTerm.occur == QsOccur.MUST_NOT) {
-                    // Single MUST_NOT term - must wrap in OCCUR_BOOLEAN for 
BE to handle
+                    // Single MUST_NOT term - rewrite to: 
SHOULD(MATCH_ALL_DOCS) + MUST_NOT(term)
+                    // This ensures proper Lucene semantics: match all docs 
EXCEPT those matching the term
                     remainingTerm.node.setOccur(QsOccur.MUST_NOT);
+
+                    QsNode matchAllNode = new 
QsNode(QsClauseType.MATCH_ALL_DOCS, (List<QsNode>) null);
+                    matchAllNode.setOccur(QsOccur.SHOULD);
+
                     List<QsNode> children = new ArrayList<>();
+                    children.add(matchAllNode);
                     children.add(remainingTerm.node);
-                    return new QsNode(QsClauseType.OCCUR_BOOLEAN, children, 0);
+                    return new QsNode(QsClauseType.OCCUR_BOOLEAN, children, 1);
                 }
                 return remainingTerm.node;
             }
@@ -2026,8 +2084,14 @@ public class SearchDslParser {
 
             QsNode node;
             if (atomCtx.clause() != null) {
-                // Parenthesized clause - visit recursively
-                node = visit(atomCtx.clause());
+                // Parenthesized clause - visit recursively with incremented 
nesting level
+                // This ensures nested clauses don't use top-level 
minimum_should_match
+                nestingLevel++;
+                try {
+                    node = visit(atomCtx.clause());
+                } finally {
+                    nestingLevel--;
+                }
             } else if (atomCtx.fieldQuery() != null) {
                 // Field query with explicit field prefix
                 node = visit(atomCtx.fieldQuery());
@@ -2048,14 +2112,23 @@ public class SearchDslParser {
         /**
          * Apply Lucene boolean logic to determine final MUST/SHOULD/MUST_NOT 
for each term.
          * <p>
-         * Rules (processed left-to-right):
-         * 1. First term: MUST (due to default_operator=AND)
-         * 2. AND introduces: marks preceding and current as MUST
-         * 3. OR introduces: marks preceding and current as SHOULD
-         * 4. NOT modifier: marks current as MUST_NOT
-         * 5. AND after MUST_NOT: the MUST_NOT term is not affected, current 
becomes MUST
+         * Faithfully replicates Lucene QueryParserBase.addClause() semantics:
+         * - Processes terms left-to-right with NO operator precedence (AND/OR 
are equal)
+         * - Each conjunction affects at most the immediately preceding term
+         * <p>
+         * With OR_OPERATOR (default_operator=OR):
+         *   - First term / no conjunction: SHOULD
+         *   - AND: preceding becomes MUST, current MUST
+         *   - OR: current SHOULD (preceding unchanged)
+         * <p>
+         * With AND_OPERATOR (default_operator=AND):
+         *   - First term / no conjunction: MUST
+         *   - AND: preceding becomes MUST, current MUST
+         *   - OR: preceding becomes SHOULD, current SHOULD
          */
         private void applyLuceneBooleanLogic(List<TermWithOccur> terms) {
+            boolean useAnd = 
"AND".equalsIgnoreCase(options.getDefaultOperator());
+
             for (int i = 0; i < terms.size(); i++) {
                 TermWithOccur current = terms.get(i);
 
@@ -2063,36 +2136,44 @@ public class SearchDslParser {
                     // NOT modifier - mark as MUST_NOT
                     current.occur = QsOccur.MUST_NOT;
 
-                    // OR + NOT: preceding becomes SHOULD (if not already 
MUST_NOT)
-                    if (current.introducedByOr && i > 0) {
+                    if (current.introducedByAnd && i > 0) {
+                        // AND + NOT: AND still makes preceding MUST
+                        TermWithOccur prev = terms.get(i - 1);
+                        if (prev.occur != QsOccur.MUST_NOT) {
+                            prev.occur = QsOccur.MUST;
+                        }
+                    } else if (current.introducedByOr && i > 0 && useAnd) {
+                        // OR + NOT with AND_OPERATOR: preceding becomes SHOULD
                         TermWithOccur prev = terms.get(i - 1);
                         if (prev.occur != QsOccur.MUST_NOT) {
                             prev.occur = QsOccur.SHOULD;
                         }
                     }
+                    // OR + NOT with OR_OPERATOR: no change to preceding
                 } else if (current.introducedByAnd) {
-                    // AND introduces: both preceding and current are MUST
+                    // AND: preceding becomes MUST, current MUST
                     current.occur = QsOccur.MUST;
                     if (i > 0) {
                         TermWithOccur prev = terms.get(i - 1);
-                        // Don't change MUST_NOT to MUST
                         if (prev.occur != QsOccur.MUST_NOT) {
                             prev.occur = QsOccur.MUST;
                         }
                     }
                 } else if (current.introducedByOr) {
-                    // OR introduces: both preceding and current are SHOULD
+                    // OR: current is SHOULD
                     current.occur = QsOccur.SHOULD;
-                    if (i > 0) {
+                    // Only change preceding to SHOULD if default_operator=AND
+                    // (Lucene: OR_OPERATOR + CONJ_OR does NOT modify 
preceding)
+                    if (useAnd && i > 0) {
                         TermWithOccur prev = terms.get(i - 1);
-                        // Don't change MUST_NOT to SHOULD
                         if (prev.occur != QsOccur.MUST_NOT) {
                             prev.occur = QsOccur.SHOULD;
                         }
                     }
                 } else {
-                    // First term: MUST (default_operator=AND)
-                    current.occur = QsOccur.MUST;
+                    // First term or implicit conjunction (no explicit AND/OR)
+                    // Lucene: SHOULD for OR_OPERATOR, MUST for AND_OPERATOR
+                    current.occur = useAnd ? QsOccur.MUST : QsOccur.SHOULD;
                 }
             }
         }
@@ -2218,7 +2299,10 @@ public class SearchDslParser {
             currentFieldName = fieldPath;
 
             try {
-                return visit(ctx.searchValue());
+                QsNode result = visit(ctx.searchValue());
+                // Mark as explicit field - user wrote "field:term" syntax
+                result.setExplicitField(true);
+                return result;
             } finally {
                 currentFieldName = previousFieldName;
             }
@@ -2242,7 +2326,12 @@ public class SearchDslParser {
                 return new QsNode(QsClauseType.TERM, fieldName, 
unescapeTermValue(ctx.TERM().getText()));
             }
             if (ctx.PREFIX() != null) {
-                return new QsNode(QsClauseType.PREFIX, fieldName, 
unescapeTermValue(ctx.PREFIX().getText()));
+                String prefixText = ctx.PREFIX().getText();
+                // Standalone * → MATCH_ALL_DOCS (matches ES behavior: field:* 
becomes ExistsQuery)
+                if ("*".equals(prefixText)) {
+                    return new QsNode(QsClauseType.MATCH_ALL_DOCS, 
(List<QsNode>) null);
+                }
+                return new QsNode(QsClauseType.PREFIX, fieldName, 
unescapeTermValue(prefixText));
             }
             if (ctx.WILDCARD() != null) {
                 return new QsNode(QsClauseType.WILDCARD, fieldName, 
unescapeTermValue(ctx.WILDCARD().getText()));
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/analysis/SearchPredicateTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/analysis/SearchPredicateTest.java
index 65c6a750766..62c45e9a60d 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/analysis/SearchPredicateTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/analysis/SearchPredicateTest.java
@@ -17,10 +17,12 @@
 
 package org.apache.doris.analysis;
 
+import org.apache.doris.catalog.Index;
 import org.apache.doris.catalog.Type;
 import 
org.apache.doris.nereids.trees.expressions.functions.scalar.SearchDslParser;
 import org.apache.doris.thrift.TExprNode;
 import org.apache.doris.thrift.TExprNodeType;
+import org.apache.doris.thrift.TSearchFieldBinding;
 import org.apache.doris.thrift.TSearchParam;
 
 import org.junit.jupiter.api.Assertions;
@@ -28,7 +30,9 @@ import org.junit.jupiter.api.Test;
 
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 
 /**
@@ -252,4 +256,136 @@ public class SearchPredicateTest {
         Assertions.assertNotNull(thriftNode.search_param);
         Assertions.assertEquals(dsl, thriftNode.search_param.original_dsl);
     }
+
+    @Test
+    public void testFieldIndexesPassedToThrift() {
+        // Simulate a variant subcolumn search where FE passes index properties
+        String dsl = "data.string_8:admin";
+
+        SearchDslParser.QsNode root = new SearchDslParser.QsNode(
+                SearchDslParser.QsClauseType.TERM, "data.string_8", "admin");
+        List<SearchDslParser.QsFieldBinding> bindings = Arrays.asList(
+                new SearchDslParser.QsFieldBinding("data.string_8", 0));
+        SearchDslParser.QsPlan plan = new SearchDslParser.QsPlan(root, 
bindings);
+
+        SlotRef dataSlot = createTestSlotRef("data");
+        List<Expr> children = Arrays.asList(dataSlot);
+
+        // Create an Index with analyzer properties (simulates field_pattern 
matched index)
+        Map<String, String> indexProps = new HashMap<>();
+        indexProps.put("parser", "unicode");
+        indexProps.put("lower_case", "true");
+        Index invertedIndex = new Index(1L, "idx_text", Arrays.asList("data"),
+                
org.apache.doris.nereids.trees.plans.commands.info.IndexDefinition.IndexType.INVERTED,
 indexProps, "");
+
+        List<Index> fieldIndexes = Arrays.asList(invertedIndex);
+
+        SearchPredicate predicate = new SearchPredicate(dsl, plan, children, 
fieldIndexes, true);
+
+        TExprNode thriftNode = new TExprNode();
+        predicate.toThrift(thriftNode);
+
+        TSearchParam param = thriftNode.search_param;
+        Assertions.assertNotNull(param);
+        Assertions.assertEquals(1, param.field_bindings.size());
+
+        TSearchFieldBinding binding = param.field_bindings.get(0);
+        Assertions.assertEquals("data.string_8", binding.field_name);
+        Assertions.assertTrue(binding.is_variant_subcolumn);
+        Assertions.assertEquals("data", binding.parent_field_name);
+        Assertions.assertEquals("string_8", binding.subcolumn_path);
+
+        // Verify index_properties are set
+        Assertions.assertNotNull(binding.index_properties);
+        Assertions.assertEquals("unicode", 
binding.index_properties.get("parser"));
+        Assertions.assertEquals("true", 
binding.index_properties.get("lower_case"));
+    }
+
+    @Test
+    public void testFieldIndexesNullDoesNotSetProperties() {
+        String dsl = "title:hello";
+        SearchDslParser.QsPlan plan = createTestPlan();
+        SlotRef titleSlot = createTestSlotRef("title");
+        List<Expr> children = Arrays.asList(titleSlot);
+
+        // Pass null Index in the fieldIndexes list
+        List<Index> fieldIndexes = Arrays.asList((Index) null);
+
+        SearchPredicate predicate = new SearchPredicate(dsl, plan, children, 
fieldIndexes, true);
+
+        TExprNode thriftNode = new TExprNode();
+        predicate.toThrift(thriftNode);
+
+        TSearchParam param = thriftNode.search_param;
+        TSearchFieldBinding binding = param.field_bindings.get(0);
+
+        // index_properties should not be set when Index is null
+        Assertions.assertFalse(binding.isSetIndexProperties());
+    }
+
+    @Test
+    public void testFieldIndexesEmptyListBackwardCompatible() {
+        // Verify that using the old constructor (without fieldIndexes) still 
works
+        String dsl = "title:hello";
+        SearchDslParser.QsPlan plan = createTestPlan();
+        SlotRef titleSlot = createTestSlotRef("title");
+        List<Expr> children = Arrays.asList(titleSlot);
+
+        // Constructor without fieldIndexes
+        SearchPredicate predicate = new SearchPredicate(dsl, plan, children, 
true);
+
+        TExprNode thriftNode = new TExprNode();
+        predicate.toThrift(thriftNode);
+
+        TSearchParam param = thriftNode.search_param;
+        TSearchFieldBinding binding = param.field_bindings.get(0);
+
+        // index_properties should not be set
+        Assertions.assertFalse(binding.isSetIndexProperties());
+    }
+
+    @Test
+    public void testMultipleFieldsWithMixedIndexes() {
+        String dsl = "title:hello AND data.string_8:admin";
+
+        SearchDslParser.QsNode leftChild = new SearchDslParser.QsNode(
+                SearchDslParser.QsClauseType.TERM, "title", "hello");
+        SearchDslParser.QsNode rightChild = new SearchDslParser.QsNode(
+                SearchDslParser.QsClauseType.TERM, "data.string_8", "admin");
+        SearchDslParser.QsNode root = new SearchDslParser.QsNode(
+                SearchDslParser.QsClauseType.AND, Arrays.asList(leftChild, 
rightChild));
+
+        List<SearchDslParser.QsFieldBinding> fieldBindings = Arrays.asList(
+                new SearchDslParser.QsFieldBinding("title", 0),
+                new SearchDslParser.QsFieldBinding("data.string_8", 1));
+        SearchDslParser.QsPlan plan = new SearchDslParser.QsPlan(root, 
fieldBindings);
+
+        List<Expr> children = Arrays.asList(
+                createTestSlotRef("title"),
+                createTestSlotRef("data"));
+
+        // First field has no index, second has index with analyzer
+        Map<String, String> indexProps = new HashMap<>();
+        indexProps.put("parser", "unicode");
+        indexProps.put("lower_case", "true");
+        Index variantIndex = new Index(1L, "idx_text", Arrays.asList("data"),
+                
org.apache.doris.nereids.trees.plans.commands.info.IndexDefinition.IndexType.INVERTED,
 indexProps, "");
+
+        List<Index> fieldIndexes = Arrays.asList(null, variantIndex);
+
+        SearchPredicate predicate = new SearchPredicate(dsl, plan, children, 
fieldIndexes, true);
+
+        TExprNode thriftNode = new TExprNode();
+        predicate.toThrift(thriftNode);
+
+        TSearchParam param = thriftNode.search_param;
+        Assertions.assertEquals(2, param.field_bindings.size());
+
+        // First field: no index_properties
+        
Assertions.assertFalse(param.field_bindings.get(0).isSetIndexProperties());
+
+        // Second field: has index_properties
+        
Assertions.assertTrue(param.field_bindings.get(1).isSetIndexProperties());
+        Assertions.assertEquals("unicode", 
param.field_bindings.get(1).index_properties.get("parser"));
+    }
 }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
index d293433eb1a..214f309bded 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
@@ -664,29 +664,39 @@ public class SearchDslParserTest {
     @Test
     public void testLuceneModeAndOrMixed() {
         // Test: "a AND b OR c" in Lucene mode with minimum_should_match=0
-        // Expected: +a (SHOULD terms discarded because MUST exists)
+        // Lucene addClause semantics (left-to-right, no precedence, 
default_operator=OR):
+        //   a(CONJ_NONE)→SHOULD, b(CONJ_AND)→prev MUST, b MUST, 
c(CONJ_OR)→SHOULD (prev unchanged)
+        //   Result: [MUST(a), MUST(b), SHOULD(c)] with msm=0
+        //   ES: +a +b c  (SHOULD(c) kept, not filtered — msm=0 means 
optional, not removed)
         String dsl = "field:a AND field:b OR field:c";
         String options = "{\"mode\":\"lucene\",\"minimum_should_match\":0}";
         QsPlan plan = SearchDslParser.parseDsl(dsl, options);
 
         Assertions.assertNotNull(plan);
-        // With minimum_should_match=0 and MUST clauses present, SHOULD is 
discarded
-        // Only "a" remains with MUST
-        Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType());
-        Assertions.assertEquals("field", plan.getRoot().getField());
-        Assertions.assertEquals("a", plan.getRoot().getValue());
+        Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
plan.getRoot().getType());
+        Assertions.assertEquals(3, plan.getRoot().getChildren().size());
+
+        QsNode nodeA = plan.getRoot().getChildren().get(0);
+        Assertions.assertEquals("a", nodeA.getValue());
+        Assertions.assertEquals(SearchDslParser.QsOccur.MUST, 
nodeA.getOccur());
+
+        QsNode nodeB = plan.getRoot().getChildren().get(1);
+        Assertions.assertEquals("b", nodeB.getValue());
+        Assertions.assertEquals(SearchDslParser.QsOccur.MUST, 
nodeB.getOccur());
+
+        QsNode nodeC = plan.getRoot().getChildren().get(2);
+        Assertions.assertEquals("c", nodeC.getValue());
+        Assertions.assertEquals(SearchDslParser.QsOccur.SHOULD, 
nodeC.getOccur());
     }
 
     @Test
     public void testLuceneModeAndOrNotMixed() {
         // Test: "a AND b OR NOT c AND d" in Lucene mode
-        // Expected processing:
-        // - a: MUST (first term, default_operator=AND)
-        // - b: MUST (AND introduces)
-        // - c: MUST_NOT (OR + NOT, but OR makes preceding SHOULD, NOT makes 
current MUST_NOT)
-        // - d: MUST (AND introduces)
-        // With minimum_should_match=0: b becomes SHOULD and is discarded
-        // Result: +a -c +d
+        // Lucene addClause semantics (left-to-right, no precedence):
+        //   a(CONJ_NONE)→SHOULD, b(CONJ_AND)→prev MUST, b MUST,
+        //   NOT c(CONJ_OR, MOD_NOT)→MUST_NOT (prev unchanged with 
OR_OPERATOR),
+        //   d(CONJ_AND)→prev(c) skip (MUST_NOT), d MUST
+        //   Result: [MUST(a), MUST(b), MUST_NOT(c), MUST(d)] = +a +b -c +d
         String dsl = "field:a AND field:b OR NOT field:c AND field:d";
         String options = "{\"mode\":\"lucene\",\"minimum_should_match\":0}";
         QsPlan plan = SearchDslParser.parseDsl(dsl, options);
@@ -694,19 +704,22 @@ public class SearchDslParserTest {
         Assertions.assertNotNull(plan);
         Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
plan.getRoot().getType());
 
-        // Should have 3 children: a(MUST), c(MUST_NOT), d(MUST)
-        // b is filtered out because it becomes SHOULD
-        Assertions.assertEquals(3, plan.getRoot().getChildren().size());
+        // Should have 4 children: a(MUST), b(MUST), c(MUST_NOT), d(MUST)
+        Assertions.assertEquals(4, plan.getRoot().getChildren().size());
 
         QsNode nodeA = plan.getRoot().getChildren().get(0);
         Assertions.assertEquals("a", nodeA.getValue());
         Assertions.assertEquals(SearchDslParser.QsOccur.MUST, 
nodeA.getOccur());
 
-        QsNode nodeC = plan.getRoot().getChildren().get(1);
+        QsNode nodeB = plan.getRoot().getChildren().get(1);
+        Assertions.assertEquals("b", nodeB.getValue());
+        Assertions.assertEquals(SearchDslParser.QsOccur.MUST, 
nodeB.getOccur());
+
+        QsNode nodeC = plan.getRoot().getChildren().get(2);
         Assertions.assertEquals("c", nodeC.getValue());
         Assertions.assertEquals(SearchDslParser.QsOccur.MUST_NOT, 
nodeC.getOccur());
 
-        QsNode nodeD = plan.getRoot().getChildren().get(2);
+        QsNode nodeD = plan.getRoot().getChildren().get(3);
         Assertions.assertEquals("d", nodeD.getValue());
         Assertions.assertEquals(SearchDslParser.QsOccur.MUST, 
nodeD.getOccur());
     }
@@ -714,33 +727,58 @@ public class SearchDslParserTest {
     @Test
     public void testLuceneModeWithDefaultField() {
         // Test: Lucene mode with default field expansion
+        // Lucene addClause semantics with default_operator=AND (AND_OPERATOR):
+        //   aterm(CONJ_NONE)→MUST, bterm(CONJ_AND)→prev MUST, bterm MUST,
+        //   cterm(CONJ_OR)→SHOULD + prev(bterm) becomes SHOULD (AND_OPERATOR 
+ CONJ_OR)
+        //   Result: [MUST(aterm), SHOULD(bterm), SHOULD(cterm)] with msm=0
+        //   ES: +aterm bterm cterm
         String dsl = "aterm AND bterm OR cterm";
-        // Now default_field and default_operator are inside the options JSON
         String options = 
"{\"default_field\":\"firstname\",\"default_operator\":\"and\","
                 + "\"mode\":\"lucene\",\"minimum_should_match\":0}";
         QsPlan plan = SearchDslParser.parseDsl(dsl, options);
 
         Assertions.assertNotNull(plan);
-        // With minimum_should_match=0, only aterm (MUST) remains
-        Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType());
-        Assertions.assertEquals("firstname", plan.getRoot().getField());
-        Assertions.assertEquals("aterm", plan.getRoot().getValue());
+        Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
plan.getRoot().getType());
+        Assertions.assertEquals(3, plan.getRoot().getChildren().size());
+
+        QsNode nodeA = plan.getRoot().getChildren().get(0);
+        Assertions.assertEquals("firstname", nodeA.getField());
+        Assertions.assertEquals("aterm", nodeA.getValue());
+        Assertions.assertEquals(SearchDslParser.QsOccur.MUST, 
nodeA.getOccur());
+
+        QsNode nodeB = plan.getRoot().getChildren().get(1);
+        Assertions.assertEquals("bterm", nodeB.getValue());
+        Assertions.assertEquals(SearchDslParser.QsOccur.SHOULD, 
nodeB.getOccur());
+
+        QsNode nodeC = plan.getRoot().getChildren().get(2);
+        Assertions.assertEquals("cterm", nodeC.getValue());
+        Assertions.assertEquals(SearchDslParser.QsOccur.SHOULD, 
nodeC.getOccur());
     }
 
     @Test
     public void testLuceneModeNotOperator() {
         // Test: "NOT a" in Lucene mode
-        // In Lucene mode, single NOT produces OCCUR_BOOLEAN with a MUST_NOT 
child
-        // (wrapped for BE to handle the negation properly)
+        // Pure NOT queries are rewritten to: SHOULD(MATCH_ALL_DOCS) + 
MUST_NOT(term)
+        // with minimum_should_match=1, following ES/Lucene semantics where 
pure NOT
+        // should return all documents EXCEPT those matching the NOT clause
         String dsl = "NOT field:a";
         String options = "{\"mode\":\"lucene\"}";
         QsPlan plan = SearchDslParser.parseDsl(dsl, options);
 
         Assertions.assertNotNull(plan);
         Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
plan.getRoot().getType());
-        Assertions.assertEquals(1, plan.getRoot().getChildren().size());
-        Assertions.assertEquals(QsClauseType.TERM, 
plan.getRoot().getChildren().get(0).getType());
-        Assertions.assertEquals(QsOccur.MUST_NOT, 
plan.getRoot().getChildren().get(0).getOccur());
+        Assertions.assertEquals(2, plan.getRoot().getChildren().size());
+        Assertions.assertEquals(Integer.valueOf(1), 
plan.getRoot().getMinimumShouldMatch());
+
+        // First child: MATCH_ALL_DOCS with SHOULD
+        QsNode matchAllNode = plan.getRoot().getChildren().get(0);
+        Assertions.assertEquals(QsClauseType.MATCH_ALL_DOCS, 
matchAllNode.getType());
+        Assertions.assertEquals(QsOccur.SHOULD, matchAllNode.getOccur());
+
+        // Second child: TERM with MUST_NOT
+        QsNode termNode = plan.getRoot().getChildren().get(1);
+        Assertions.assertEquals(QsClauseType.TERM, termNode.getType());
+        Assertions.assertEquals(QsOccur.MUST_NOT, termNode.getOccur());
     }
 
     @Test
@@ -817,6 +855,40 @@ public class SearchDslParserTest {
         Assertions.assertEquals("First Value", plan.getRoot().getValue());
     }
 
+    @Test
+    public void testEscapedSpaceInBareQueryLuceneMode() {
+        // Test: "Josh\ Brolin" (bare query, no field prefix) in lucene mode
+        // Should be treated as a single term "Josh Brolin", not split into 
two terms
+        String dsl = "Josh\\ Brolin";
+        String optionsJson = 
"{\"default_field\":\"title\",\"default_operator\":\"AND\","
+                + "\"mode\":\"lucene\",\"minimum_should_match\":0}";
+        QsPlan plan = SearchDslParser.parseDsl(dsl, optionsJson);
+
+        Assertions.assertNotNull(plan);
+        Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType());
+        Assertions.assertEquals("title", plan.getRoot().getField());
+        Assertions.assertEquals("Josh Brolin", plan.getRoot().getValue());
+        // defaultOperator must be lowercase for BE case-sensitive comparison
+        Assertions.assertEquals("and", plan.getDefaultOperator());
+    }
+
+    @Test
+    public void testDefaultOperatorNormalization() {
+        // Verify defaultOperator is always normalized to lowercase in the 
plan,
+        // regardless of the case used in the options JSON.
+        // BE compares case-sensitively: (default_operator == "and")
+        String dsl = "foo bar";
+        String optionsJson = 
"{\"default_field\":\"title\",\"default_operator\":\"AND\","
+                + "\"mode\":\"lucene\"}";
+        QsPlan plan = SearchDslParser.parseDsl(dsl, optionsJson);
+        Assertions.assertEquals("and", plan.getDefaultOperator());
+
+        optionsJson = 
"{\"default_field\":\"title\",\"default_operator\":\"OR\","
+                + "\"mode\":\"lucene\"}";
+        plan = SearchDslParser.parseDsl(dsl, optionsJson);
+        Assertions.assertEquals("or", plan.getDefaultOperator());
+    }
+
     @Test
     public void testEscapedParentheses() {
         // Test: \( and \) should be treated as literal characters, not 
grouping
@@ -1040,6 +1112,112 @@ public class SearchDslParserTest {
                 .anyMatch(b -> "category".equals(b.getFieldName())));
     }
 
+    @Test
+    public void testMultiFieldExplicitFieldInFieldsList() {
+        // Bug fix: explicit field prefix should NOT be expanded even when the 
field IS in the fields list
+        // ES query_string always respects explicit "field:term" syntax 
regardless of the fields parameter.
+        // "title:music AND content:history" with fields=["title","content"]
+        // → title:music AND content:history (NOT expanded to multi-field OR)
+        String dsl = "title:music AND content:history";
+        String options = 
"{\"fields\":[\"title\",\"content\"],\"type\":\"cross_fields\"}";
+        QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+        Assertions.assertNotNull(plan);
+        Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType());
+        Assertions.assertEquals(2, plan.getRoot().getChildren().size());
+
+        // First child: title:music - NOT expanded
+        QsNode first = plan.getRoot().getChildren().get(0);
+        Assertions.assertEquals(QsClauseType.TERM, first.getType());
+        Assertions.assertEquals("title", first.getField());
+        Assertions.assertEquals("music", first.getValue());
+
+        // Second child: content:history - NOT expanded
+        QsNode second = plan.getRoot().getChildren().get(1);
+        Assertions.assertEquals(QsClauseType.TERM, second.getType());
+        Assertions.assertEquals("content", second.getField());
+        Assertions.assertEquals("history", second.getValue());
+    }
+
+    @Test
+    public void testMultiFieldExplicitFieldInFieldsListBestFields() {
+        // Same test as above but with best_fields type
+        String dsl = "title:music AND content:history";
+        String options = 
"{\"fields\":[\"title\",\"content\"],\"type\":\"best_fields\"}";
+        QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+        Assertions.assertNotNull(plan);
+        // best_fields wraps in OR for multi-field, but explicit fields should 
be preserved in each copy
+        QsNode root = plan.getRoot();
+        Assertions.assertEquals(QsClauseType.OR, root.getType());
+        Assertions.assertEquals(2, root.getChildren().size());
+
+        // Each OR branch should have AND(title:music, content:history) - both 
explicit fields preserved
+        for (QsNode branch : root.getChildren()) {
+            Assertions.assertEquals(QsClauseType.AND, branch.getType());
+            Assertions.assertEquals(2, branch.getChildren().size());
+
+            QsNode titleNode = branch.getChildren().get(0);
+            Assertions.assertEquals("title", titleNode.getField());
+            Assertions.assertEquals("music", titleNode.getValue());
+
+            QsNode contentNode = branch.getChildren().get(1);
+            Assertions.assertEquals("content", contentNode.getField());
+            Assertions.assertEquals("history", contentNode.getValue());
+        }
+    }
+
+    @Test
+    public void testMultiFieldMixedExplicitAndBareQuery() {
+        // "title:football AND american" with fields=["title","content"]
+        // → title:football AND (title:american OR content:american)
+        // title:football should NOT be expanded; "american" (bare) should be 
expanded
+        String dsl = "title:football AND american";
+        String options = 
"{\"fields\":[\"title\",\"content\"],\"type\":\"cross_fields\"}";
+        QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+        Assertions.assertNotNull(plan);
+        Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType());
+        Assertions.assertEquals(2, plan.getRoot().getChildren().size());
+
+        // First child: title:football - NOT expanded (explicit field)
+        QsNode first = plan.getRoot().getChildren().get(0);
+        Assertions.assertEquals(QsClauseType.TERM, first.getType());
+        Assertions.assertEquals("title", first.getField());
+        Assertions.assertEquals("football", first.getValue());
+
+        // Second child: (title:american OR content:american) - expanded (bare 
term)
+        QsNode second = plan.getRoot().getChildren().get(1);
+        Assertions.assertEquals(QsClauseType.OR, second.getType());
+        Assertions.assertEquals(2, second.getChildren().size());
+    }
+
+    @Test
+    public void testMultiFieldLuceneModeExplicitFieldInFieldsList() {
+        // Lucene mode: "title:music AND content:history" with 
fields=["title","content"]
+        // Explicit fields should be preserved, not expanded
+        String dsl = "title:music AND content:history";
+        String options = 
"{\"fields\":[\"title\",\"content\"],\"default_operator\":\"and\","
+                + "\"mode\":\"lucene\",\"type\":\"cross_fields\"}";
+        QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+        Assertions.assertNotNull(plan);
+        QsNode root = plan.getRoot();
+        Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, root.getType());
+        Assertions.assertEquals(2, root.getChildren().size());
+
+        // Both children should be leaf TERM nodes (not expanded to 
OCCUR_BOOLEAN wrappers)
+        QsNode first = root.getChildren().get(0);
+        Assertions.assertEquals(QsClauseType.TERM, first.getType());
+        Assertions.assertEquals("title", first.getField());
+        Assertions.assertEquals("music", first.getValue());
+
+        QsNode second = root.getChildren().get(1);
+        Assertions.assertEquals(QsClauseType.TERM, second.getType());
+        Assertions.assertEquals("content", second.getField());
+        Assertions.assertEquals("history", second.getValue());
+    }
+
     @Test
     public void testMultiFieldWithWildcard() {
         // Test: "hello*" + fields=["title","content"]
@@ -1175,16 +1353,22 @@ public class SearchDslParserTest {
     @Test
     public void testMultiFieldLuceneModeAndOrMixed() {
         // Test: "a AND b OR c" + fields=["title","content"] + lucene mode + 
minimum_should_match=0 + cross_fields
-        // With Lucene semantics and minimum_should_match=0: SHOULD groups are 
discarded
-        // Only "a" (MUST) remains - wrapped in OCCUR_BOOLEAN
+        // With no default_operator (default is OR_OPERATOR in Lucene):
+        //   a=MUST (promoted by AND), b=MUST (from AND), c=SHOULD (from OR)
+        //   With OR_OPERATOR, OR does NOT change preceding term's occur
+        // msm is ignored for multi-field mode, node-level msm defaults to 0 
(since MUST exists)
         String dsl = "a AND b OR c";
         String options = 
"{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\",\"minimum_should_match\":0,\"type\":\"cross_fields\"}";
         QsPlan plan = SearchDslParser.parseDsl(dsl, options);
 
         Assertions.assertNotNull(plan);
-        // With minimum_should_match=0, only (title:a OR content:a) remains
-        // In Lucene mode, this is wrapped as OCCUR_BOOLEAN
+        // Root is OCCUR_BOOLEAN with 3 children: MUST(a), MUST(b), SHOULD(c)
         Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
plan.getRoot().getType());
+        Assertions.assertEquals(3, plan.getRoot().getChildren().size());
+        // a and b are MUST, c is SHOULD
+        Assertions.assertEquals(QsOccur.MUST, 
plan.getRoot().getChildren().get(0).getOccur());
+        Assertions.assertEquals(QsOccur.MUST, 
plan.getRoot().getChildren().get(1).getOccur());
+        Assertions.assertEquals(QsOccur.SHOULD, 
plan.getRoot().getChildren().get(2).getOccur());
     }
 
     @Test
@@ -1236,16 +1420,18 @@ public class SearchDslParserTest {
 
     @Test
     public void testMultiFieldLuceneModeMinimumShouldMatchOne() {
-        // Test: "a AND b OR c" with minimum_should_match=1 keeps all clauses 
+ cross_fields
+        // Test: "a AND b OR c" with minimum_should_match=1 + cross_fields + 
multi-field
+        // For multi-field mode (fields.size() > 1), minimum_should_match is 
nullified.
+        // Lucene addClause with default_operator=OR: [MUST(a), MUST(b), 
SHOULD(c)] msm=0
+        // No SHOULD filtering — all 3 terms kept, each expanded to 2 fields 
via cross_fields
         String dsl = "a AND b OR c";
         String options = 
"{\"fields\":[\"title\",\"content\"],\"mode\":\"lucene\",\"minimum_should_match\":1,\"type\":\"cross_fields\"}";
         QsPlan plan = SearchDslParser.parseDsl(dsl, options);
 
         Assertions.assertNotNull(plan);
         Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
plan.getRoot().getType());
-        // All 3 groups should be present
+        // 3 terms (a, b, c), each expanded to cross_fields OCCUR_BOOLEAN
         Assertions.assertEquals(3, plan.getRoot().getChildren().size());
-        Assertions.assertEquals(Integer.valueOf(1), 
plan.getRoot().getMinimumShouldMatch());
     }
 
     // ============ Tests for type parameter (best_fields vs cross_fields) 
============
@@ -1306,13 +1492,53 @@ public class SearchDslParserTest {
 
     @Test
     public void testMultiFieldBestFieldsLuceneMode() {
-        // Test: best_fields with Lucene mode
+        // Test: best_fields with Lucene mode uses per-clause expansion 
(matching ES query_string)
+        // "hello world" with AND → each term independently expanded across 
fields:
+        //   MUST(SHOULD(title:hello, content:hello)) AND 
MUST(SHOULD(title:world, content:world))
         String dsl = "hello world";
         String options = 
"{\"fields\":[\"title\",\"content\"],\"default_operator\":\"and\",\"mode\":\"lucene\",\"type\":\"best_fields\"}";
         QsPlan plan = SearchDslParser.parseDsl(dsl, options);
 
         Assertions.assertNotNull(plan);
         Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
plan.getRoot().getType());
+        // Per-clause expansion: 2 children (one per term), each expanded 
across fields
+        Assertions.assertEquals(2, plan.getRoot().getChildren().size());
+        for (QsNode child : plan.getRoot().getChildren()) {
+            // Each child is an OCCUR_BOOLEAN wrapping the per-field expansion
+            Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
child.getType());
+            Assertions.assertEquals(2, child.getChildren().size()); // one per 
field
+        }
+    }
+
+    @Test
+    public void testMultiFieldBestFieldsLuceneModePerClauseExpansion() {
+        // Test: best_fields with phrase + regex uses per-clause expansion 
(not per-field)
+        // ES query_string expands each clause independently across fields:
+        //   ("Costner" AND /Li../) → MUST(title:"Costner" | 
content:"Costner") AND MUST(title:/Li../ | content:/Li../)
+        // NOT: (title:"Costner" AND title:/Li../) OR (content:"Costner" AND 
content:/Li../)
+        String dsl = "\"Costner\" /Li../";
+        String options = 
"{\"fields\":[\"title\",\"content\"],\"default_operator\":\"and\",\"mode\":\"lucene\",\"type\":\"best_fields\"}";
+        QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+        Assertions.assertNotNull(plan);
+        QsNode root = plan.getRoot();
+        Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, root.getType());
+        // 2 children: one for phrase "Costner", one for regex /Li../
+        Assertions.assertEquals(2, root.getChildren().size());
+
+        // First child: phrase "Costner" expanded across fields
+        QsNode phraseGroup = root.getChildren().get(0);
+        Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
phraseGroup.getType());
+        Assertions.assertEquals(2, phraseGroup.getChildren().size());
+        Assertions.assertEquals(QsClauseType.PHRASE, 
phraseGroup.getChildren().get(0).getType());
+        Assertions.assertEquals(QsClauseType.PHRASE, 
phraseGroup.getChildren().get(1).getType());
+
+        // Second child: regex /Li../ expanded across fields
+        QsNode regexpGroup = root.getChildren().get(1);
+        Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
regexpGroup.getType());
+        Assertions.assertEquals(2, regexpGroup.getChildren().size());
+        Assertions.assertEquals(QsClauseType.REGEXP, 
regexpGroup.getChildren().get(0).getType());
+        Assertions.assertEquals(QsClauseType.REGEXP, 
regexpGroup.getChildren().get(1).getType());
     }
 
     @Test
@@ -1582,4 +1808,145 @@ public class SearchDslParserTest {
         Assertions.assertEquals(QsClauseType.TERM, termNode.getType());
         Assertions.assertEquals("title", termNode.getField());
     }
+
+    // =====================================================================
+    // Hubspot-specific tests
+    // =====================================================================
+
+    @Test
+    public void testPhraseWithImplicitOrOperator() {
+        // Test: '"2003 NBA draft" Darrell' with default_operator=OR should 
produce same result as
+        // '"2003 NBA draft" OR Darrell'
+        String dsl1 = "\"2003 NBA draft\" Darrell";
+        String dsl2 = "\"2003 NBA draft\" OR Darrell";
+        String options = 
"{\"default_field\":\"title\",\"default_operator\":\"OR\","
+                + "\"mode\":\"lucene\",\"minimum_should_match\":0}";
+
+        QsPlan plan1 = SearchDslParser.parseDsl(dsl1, options);
+        QsPlan plan2 = SearchDslParser.parseDsl(dsl2, options);
+
+        Assertions.assertNotNull(plan1);
+        Assertions.assertNotNull(plan2);
+
+        // Both should have the same structure - OCCUR_BOOLEAN with 2 SHOULD 
children
+        Assertions.assertEquals(plan2.getRoot().getType(), 
plan1.getRoot().getType());
+        Assertions.assertEquals(plan2.getRoot().getChildren().size(), 
plan1.getRoot().getChildren().size());
+
+        // Verify the phrase is preserved as PHRASE type, not broken into terms
+        boolean hasPhrase1 = plan1.getRoot().getChildren().stream()
+                .anyMatch(n -> n.getType() == QsClauseType.PHRASE);
+        boolean hasPhrase2 = plan2.getRoot().getChildren().stream()
+                .anyMatch(n -> n.getType() == QsClauseType.PHRASE);
+        Assertions.assertTrue(hasPhrase1, "Plan 1 should contain a PHRASE 
node");
+        Assertions.assertTrue(hasPhrase2, "Plan 2 should contain a PHRASE 
node");
+    }
+
+    @Test
+    public void testPhraseWithImplicitAndOperator() {
+        // Test: '"hello world" foo' with default_operator=AND
+        String dsl = "\"hello world\" foo";
+        String options = 
"{\"default_field\":\"title\",\"default_operator\":\"AND\"}";
+
+        QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+        Assertions.assertNotNull(plan);
+        // Should create AND query: title:"hello world" AND title:foo
+        Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType());
+        Assertions.assertEquals(2, plan.getRoot().getChildren().size());
+
+        // Verify the phrase is preserved
+        boolean hasPhrase = plan.getRoot().getChildren().stream()
+                .anyMatch(n -> n.getType() == QsClauseType.PHRASE);
+        Assertions.assertTrue(hasPhrase, "Should contain a PHRASE node");
+    }
+
+    @Test
+    public void testMultiplePhrases() {
+        // Test: '"hello world" "foo bar"' with default_operator=OR
+        String dsl = "\"hello world\" \"foo bar\"";
+        String options = 
"{\"default_field\":\"title\",\"default_operator\":\"OR\"}";
+
+        QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+        Assertions.assertNotNull(plan);
+        Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType());
+        Assertions.assertEquals(2, plan.getRoot().getChildren().size());
+
+        // Both children should be PHRASE type
+        for (QsNode child : plan.getRoot().getChildren()) {
+            Assertions.assertEquals(QsClauseType.PHRASE, child.getType());
+        }
+    }
+
+    // ============ Tests for Standalone Wildcard * ============
+
+    @Test
+    public void testStandaloneWildcardWithAnd() {
+        // Test: "Dollar AND *" should produce: MUST(title:Dollar) AND 
MUST(MATCH_ALL_DOCS)
+        // Standalone "*" becomes MATCH_ALL_DOCS (matches ES behavior: field:* 
→ ExistsQuery)
+        String dsl = "Dollar AND *";
+        String options = 
"{\"default_field\":\"title\",\"default_operator\":\"OR\","
+                + "\"mode\":\"lucene\",\"minimum_should_match\":0}";
+
+        QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+        Assertions.assertNotNull(plan);
+        Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
plan.getRoot().getType());
+        Assertions.assertEquals(2, plan.getRoot().getChildren().size());
+
+        // Both children should have MUST occur (AND)
+        for (QsNode child : plan.getRoot().getChildren()) {
+            Assertions.assertEquals(QsOccur.MUST, child.getOccur());
+        }
+
+        // One should be TERM (Dollar), one should be MATCH_ALL_DOCS
+        boolean hasTerm = plan.getRoot().getChildren().stream()
+                .anyMatch(n -> n.getType() == QsClauseType.TERM && 
"Dollar".equals(n.getValue()));
+        boolean hasMatchAll = plan.getRoot().getChildren().stream()
+                .anyMatch(n -> n.getType() == QsClauseType.MATCH_ALL_DOCS);
+
+        Assertions.assertTrue(hasTerm, "Should contain TERM node for 
'Dollar'");
+        Assertions.assertTrue(hasMatchAll, "Should contain MATCH_ALL_DOCS node 
for '*'");
+    }
+
+    @Test
+    public void testStandaloneWildcardAlone() {
+        // Test: "*" alone becomes MATCH_ALL_DOCS (matches ES behavior: 
field:* → ExistsQuery)
+        String dsl = "*";
+        String options = 
"{\"default_field\":\"title\",\"default_operator\":\"OR\"}";
+
+        QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+        Assertions.assertNotNull(plan);
+        Assertions.assertEquals(QsClauseType.MATCH_ALL_DOCS, 
plan.getRoot().getType());
+    }
+
+    @Test
+    public void testStandaloneWildcardWithOr() {
+        // Test: "Dollar OR *" should produce: SHOULD(title:Dollar) OR 
SHOULD(MATCH_ALL_DOCS)
+        // Standalone "*" becomes MATCH_ALL_DOCS (matches ES behavior: field:* 
→ ExistsQuery)
+        String dsl = "Dollar OR *";
+        String options = 
"{\"default_field\":\"title\",\"default_operator\":\"OR\","
+                + "\"mode\":\"lucene\",\"minimum_should_match\":0}";
+
+        QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+        Assertions.assertNotNull(plan);
+        Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
plan.getRoot().getType());
+        Assertions.assertEquals(2, plan.getRoot().getChildren().size());
+
+        // Both children should have SHOULD occur (OR)
+        for (QsNode child : plan.getRoot().getChildren()) {
+            Assertions.assertEquals(QsOccur.SHOULD, child.getOccur());
+        }
+
+        // One should be TERM (Dollar), one should be MATCH_ALL_DOCS
+        boolean hasTerm = plan.getRoot().getChildren().stream()
+                .anyMatch(n -> n.getType() == QsClauseType.TERM && 
"Dollar".equals(n.getValue()));
+        boolean hasMatchAll = plan.getRoot().getChildren().stream()
+                .anyMatch(n -> n.getType() == QsClauseType.MATCH_ALL_DOCS);
+
+        Assertions.assertTrue(hasTerm, "Should contain TERM node for 
'Dollar'");
+        Assertions.assertTrue(hasMatchAll, "Should contain MATCH_ALL_DOCS node 
for '*'");
+    }
 }
diff --git a/gensrc/thrift/Exprs.thrift b/gensrc/thrift/Exprs.thrift
index 6eaa5ff8e79..e74e1083243 100644
--- a/gensrc/thrift/Exprs.thrift
+++ b/gensrc/thrift/Exprs.thrift
@@ -261,12 +261,15 @@ struct TSearchFieldBinding {
   3: optional string parent_field_name    // Parent field name for variant 
subcolumns
   4: optional string subcolumn_path       // Subcolumn path for variant fields 
(e.g., "subcolumn" or "sub1.sub2")
   5: optional bool is_variant_subcolumn   // True if this is a variant 
subcolumn access
+  6: optional map<string, string> index_properties  // Index properties 
(parser, lower_case, etc.) from FE Index lookup
 }
 
 struct TSearchParam {
   1: required string original_dsl         // Original DSL string for debugging
   2: required TSearchClause root     // Parsed AST root
   3: required list<TSearchFieldBinding> field_bindings  // Field to slot 
mappings
+  4: optional string default_operator     // "and" or "or" for TERM 
tokenization (default: "or")
+  5: optional i32 minimum_should_match    // Minimum number of SHOULD clauses 
that must match (for Lucene mode TERM tokenization)
 }
 
 // This is essentially a union over the subclasses of Expr.
diff --git a/regression-test/data/search/test_search_lucene_mode.out 
b/regression-test/data/search/test_search_lucene_mode.out
index 68d8e6c1279..5eb4346b50c 100644
--- a/regression-test/data/search/test_search_lucene_mode.out
+++ b/regression-test/data/search/test_search_lucene_mode.out
@@ -34,6 +34,10 @@
 2      apple banana
 
 -- !lucene_not --
+4      banana cherry
+5      cherry date
+6      date elderberry
+7      fig grape
 
 -- !lucene_and_not --
 3      apple
diff --git a/regression-test/data/search/test_search_multi_field.out 
b/regression-test/data/search/test_search_multi_field.out
index 4a4923a4c3b..e22811bddde 100644
--- a/regression-test/data/search/test_search_multi_field.out
+++ b/regression-test/data/search/test_search_multi_field.out
@@ -75,13 +75,10 @@
 
 -- !multi_field_lucene_and_or --
 1      machine learning basics
-4      machine maintenance
-8      cooking machine reviews
 9      machine guide
 
 -- !multi_field_lucene_min_should_1 --
 1      machine learning basics
-8      cooking machine reviews
 9      machine guide
 
 -- !multi_field_lucene_and_not --
@@ -119,6 +116,7 @@
 
 -- !multi_field_best_fields_lucene --
 1      machine learning basics
+9      machine guide
 
 -- !multi_field_cross_fields_lucene --
 1      machine learning basics
diff --git a/regression-test/data/search/test_search_regexp_lowercase.out 
b/regression-test/data/search/test_search_regexp_lowercase.out
new file mode 100644
index 00000000000..0ae25fc613f
--- /dev/null
+++ b/regression-test/data/search/test_search_regexp_lowercase.out
@@ -0,0 +1,39 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !regexp_uppercase_no_match --
+
+-- !match_regexp_uppercase_no_match --
+
+-- !regexp_lowercase_match --
+1      ABC DEF
+2      abc def
+
+-- !match_regexp_lowercase_match --
+1      ABC DEF
+2      abc def
+
+-- !wildcard_uppercase_match --
+1      ABC DEF
+2      abc def
+
+-- !wildcard_lowercase_match --
+1      ABC DEF
+2      abc def
+
+-- !regexp_apple_lowercase --
+3      Apple Banana Cherry
+4      apple banana cherry
+
+-- !regexp_apple_uppercase_no_match --
+
+-- !consistency_regexp_cherry --
+3
+4
+
+-- !consistency_match_regexp_cherry --
+3
+4
+
+-- !consistency_regexp_cherry_upper --
+
+-- !consistency_match_regexp_cherry_upper --
+
diff --git 
a/regression-test/data/search/test_search_variant_subcolumn_analyzer.out 
b/regression-test/data/search/test_search_variant_subcolumn_analyzer.out
new file mode 100644
index 00000000000..d1eff343b82
--- /dev/null
+++ b/regression-test/data/search/test_search_variant_subcolumn_analyzer.out
@@ -0,0 +1,30 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !search_variant_analyzer_basic --
+1
+3
+
+-- !match_variant_baseline --
+1
+3
+
+-- !search_variant_analyzer_multi --
+3
+
+-- !search_variant_analyzer_other_field --
+4
+
+-- !search_variant_analyzer_field_syntax --
+2
+5
+
+-- !search_variant_analyzer_lowercase --
+1
+3
+
+-- !search_variant_analyzer_phrase --
+1
+
+-- !search_variant_direct_index --
+1
+3
+
diff --git a/regression-test/suites/search/test_search_lucene_mode.groovy 
b/regression-test/suites/search/test_search_lucene_mode.groovy
index 8e9d4edb7e3..8e95a27a377 100644
--- a/regression-test/suites/search/test_search_lucene_mode.groovy
+++ b/regression-test/suites/search/test_search_lucene_mode.groovy
@@ -137,12 +137,9 @@ suite("test_search_lucene_mode") {
     """
 
     // ============ Test 7: Lucene mode NOT operator (pure negative query) 
============
-    // 'NOT a' in Lucene mode produces a pure MUST_NOT query.
-    // IMPORTANT: In Lucene/ES semantics, a pure negative query (only 
MUST_NOT, no MUST/SHOULD)
-    // returns EMPTY results because there's no positive clause to match 
against.
-    // This is correct Lucene behavior - to get "all except X", you need:
-    //   match_all AND NOT X (i.e., a positive clause combined with negation)
-    // Expected: empty result (correct Lucene semantics)
+    // 'NOT a' in Lucene mode is rewritten to: SHOULD(MATCH_ALL_DOCS) + 
MUST_NOT(a)
+    // This matches all documents EXCEPT those containing the negated term.
+    // Expected: all docs without "apple" in title (4, 5, 6, 7)
     qt_lucene_not """
         SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title
         FROM ${tableName}
diff --git a/regression-test/suites/search/test_search_multi_field.groovy 
b/regression-test/suites/search/test_search_multi_field.groovy
index f71db33f2b0..cc94a31bc14 100644
--- a/regression-test/suites/search/test_search_multi_field.groovy
+++ b/regression-test/suites/search/test_search_multi_field.groovy
@@ -277,6 +277,8 @@ suite("test_search_multi_field") {
     """
 
     // ============ Test 21: best_fields with Lucene mode ============
+    // In lucene mode, best_fields uses per-clause expansion (matching ES 
query_string),
+    // so id=1 and id=9 both match (terms can be across different fields)
     qt_multi_field_best_fields_lucene """
         SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id, title
         FROM ${tableName}
diff --git a/regression-test/suites/search/test_search_regexp_lowercase.groovy 
b/regression-test/suites/search/test_search_regexp_lowercase.groovy
new file mode 100644
index 00000000000..957027c2610
--- /dev/null
+++ b/regression-test/suites/search/test_search_regexp_lowercase.groovy
@@ -0,0 +1,153 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// DORIS-24464: search() REGEXP with lower_case=true should be consistent with 
match_regexp
+// Regex patterns are NOT lowercased (matching ES query_string behavior).
+// Wildcard patterns ARE lowercased (matching ES query_string normalizer 
behavior).
+
+suite("test_search_regexp_lowercase") {
+    def tableName = "search_regexp_lowercase_test"
+
+    sql "DROP TABLE IF EXISTS ${tableName}"
+
+    sql """
+        CREATE TABLE ${tableName} (
+            a INT,
+            title VARCHAR(512) NOT NULL,
+            INDEX idx_title (title) USING INVERTED PROPERTIES("lower_case" = 
"true", "parser" = "english", "support_phrase" = "true")
+        ) ENGINE=OLAP
+        DUPLICATE KEY(a)
+        DISTRIBUTED BY HASH(a) BUCKETS 1
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+        )
+    """
+
+    sql "INSERT INTO ${tableName} VALUES(1, 'ABC DEF')"
+    sql "INSERT INTO ${tableName} VALUES(2, 'abc def')"
+    sql "INSERT INTO ${tableName} VALUES(3, 'Apple Banana Cherry')"
+    sql "INSERT INTO ${tableName} VALUES(4, 'apple banana cherry')"
+
+    // Wait for data to be ready
+    Thread.sleep(5000)
+
+    // 
=========================================================================
+    // Test 1: REGEXP with uppercase pattern should NOT match lowercased terms
+    // (ES-compatible behavior: regex patterns are not analyzed/lowercased)
+    // 
=========================================================================
+
+    // search() REGEXP with uppercase pattern - should return 0 rows
+    // because indexed terms are lowercased (abc, def) but pattern AB.* is 
case-sensitive
+    qt_regexp_uppercase_no_match """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ * FROM 
${tableName}
+        WHERE search('/AB.*/', 
'{"default_field":"title","default_operator":"AND","mode":"lucene", 
"minimum_should_match": 0}')
+        ORDER BY a
+    """
+
+    // match_regexp with uppercase pattern - should also return 0 rows
+    qt_match_regexp_uppercase_no_match """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ * FROM 
${tableName}
+        WHERE title match_regexp 'AB.*'
+        ORDER BY a
+    """
+
+    // 
=========================================================================
+    // Test 2: REGEXP with lowercase pattern SHOULD match lowercased terms
+    // 
=========================================================================
+
+    // search() REGEXP with lowercase pattern - should match both rows with 
"abc"
+    qt_regexp_lowercase_match """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ * FROM 
${tableName}
+        WHERE search('/ab.*/', 
'{"default_field":"title","default_operator":"AND","mode":"lucene", 
"minimum_should_match": 0}')
+        ORDER BY a
+    """
+
+    // match_regexp with lowercase pattern - should also match
+    qt_match_regexp_lowercase_match """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ * FROM 
${tableName}
+        WHERE title match_regexp 'ab.*'
+        ORDER BY a
+    """
+
+    // 
=========================================================================
+    // Test 3: WILDCARD with uppercase pattern should match (wildcards ARE 
lowercased)
+    // 
=========================================================================
+
+    // search() WILDCARD with uppercase - should match because wildcard 
patterns are lowercased
+    qt_wildcard_uppercase_match """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ * FROM 
${tableName}
+        WHERE search('AB*', 
'{"default_field":"title","default_operator":"AND","mode":"lucene", 
"minimum_should_match": 0}')
+        ORDER BY a
+    """
+
+    // search() WILDCARD with lowercase - should also match
+    qt_wildcard_lowercase_match """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ * FROM 
${tableName}
+        WHERE search('ab*', 
'{"default_field":"title","default_operator":"AND","mode":"lucene", 
"minimum_should_match": 0}')
+        ORDER BY a
+    """
+
+    // 
=========================================================================
+    // Test 4: More complex REGEXP patterns
+    // 
=========================================================================
+
+    // Lowercase regex that matches "apple" - should match rows 3 and 4
+    qt_regexp_apple_lowercase """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ * FROM 
${tableName}
+        WHERE search('/app.*/', 
'{"default_field":"title","default_operator":"AND","mode":"lucene", 
"minimum_should_match": 0}')
+        ORDER BY a
+    """
+
+    // Uppercase regex "App.*" should NOT match (terms are lowercased as 
"apple")
+    qt_regexp_apple_uppercase_no_match """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ * FROM 
${tableName}
+        WHERE search('/App.*/', 
'{"default_field":"title","default_operator":"AND","mode":"lucene", 
"minimum_should_match": 0}')
+        ORDER BY a
+    """
+
+    // 
=========================================================================
+    // Test 5: REGEXP consistency with match_regexp for various patterns
+    // 
=========================================================================
+
+    // Both should return same results for lowercase pattern
+    qt_consistency_regexp_cherry """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ a FROM 
${tableName}
+        WHERE search('/cher.*/', 
'{"default_field":"title","default_operator":"AND","mode":"lucene", 
"minimum_should_match": 0}')
+        ORDER BY a
+    """
+
+    qt_consistency_match_regexp_cherry """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ a FROM 
${tableName}
+        WHERE title match_regexp 'cher.*'
+        ORDER BY a
+    """
+
+    // Both should return 0 rows for uppercase pattern
+    qt_consistency_regexp_cherry_upper """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ a FROM 
${tableName}
+        WHERE search('/CHER.*/', 
'{"default_field":"title","default_operator":"AND","mode":"lucene", 
"minimum_should_match": 0}')
+        ORDER BY a
+    """
+
+    qt_consistency_match_regexp_cherry_upper """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ a FROM 
${tableName}
+        WHERE title match_regexp 'CHER.*'
+        ORDER BY a
+    """
+
+    sql "DROP TABLE IF EXISTS ${tableName}"
+}
diff --git 
a/regression-test/suites/search/test_search_variant_subcolumn_analyzer.groovy 
b/regression-test/suites/search/test_search_variant_subcolumn_analyzer.groovy
new file mode 100644
index 00000000000..d14cf15f7a3
--- /dev/null
+++ 
b/regression-test/suites/search/test_search_variant_subcolumn_analyzer.groovy
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/**
+ * Test search() function with variant subcolumn and field_pattern index.
+ *
+ * This test verifies that the analyzer (parser) from field_pattern matched 
indexes
+ * is correctly applied when using search() on variant subcolumns.
+ *
+ * Bug: When using search() on variant subcolumns with field_pattern indexes,
+ * the analyzer was not applied because FE did not pass index properties to BE.
+ * This caused exact-match-only behavior instead of tokenized matching.
+ *
+ * Fix: FE now looks up the Index for each field in SearchExpression and passes
+ * the index_properties via TSearchFieldBinding to BE.
+ */
+suite("test_search_variant_subcolumn_analyzer") {
+    def tableName = "test_variant_subcolumn_analyzer"
+
+    sql """ set enable_match_without_inverted_index = false """
+    sql """ set enable_common_expr_pushdown = true """
+    sql """ set default_variant_enable_typed_paths_to_sparse = false """
+
+    sql "DROP TABLE IF EXISTS ${tableName}"
+
+    // Create table with variant column using predefined field pattern and 
field_pattern index
+    sql """
+        CREATE TABLE ${tableName} (
+            `id` INT NOT NULL,
+            `data` variant<
+                MATCH_NAME_GLOB 'string_*' : string,
+                properties("variant_max_subcolumns_count" = "100")
+            > NULL,
+            INDEX idx_text (data) USING INVERTED PROPERTIES(
+                "parser" = "unicode",
+                "field_pattern" = "string_*",
+                "lower_case" = "true"
+            )
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        DISTRIBUTED BY HASH(`id`) BUCKETS 1
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "disable_auto_compaction" = "true"
+        )
+    """
+
+    // Insert test data
+    sql """INSERT INTO ${tableName} VALUES
+        (1, '{"string_8": "admin only"}'),
+        (2, '{"string_8": "user access"}'),
+        (3, '{"string_8": "admin access granted"}'),
+        (4, '{"string_1": "hello world"}'),
+        (5, '{"string_8": "readonly user"}'),
+        (6, '{"number_1": 42}')
+    """
+
+    // Wait for data to be flushed and indexes built
+    sql "sync"
+    Thread.sleep(5000)
+
+    // Test 1: search() with default_field on variant subcolumn matching 
field_pattern
+    // "admin" should match "admin only" and "admin access granted" because 
the unicode
+    // parser tokenizes them into ["admin", "only"] and ["admin", "access", 
"granted"]
+    qt_search_variant_analyzer_basic """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true)*/ id FROM 
${tableName}
+        WHERE search('admin', 
'{"default_field":"data.string_8","mode":"lucene"}')
+        ORDER BY id
+    """
+
+    // Test 2: Verify MATCH also works (as a baseline)
+    qt_match_variant_baseline """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true)*/ id FROM 
${tableName}
+        WHERE data['string_8'] MATCH_ANY 'admin'
+        ORDER BY id
+    """
+
+    // Test 3: Multi-term search should also work with tokenization
+    qt_search_variant_analyzer_multi """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true)*/ id FROM 
${tableName}
+        WHERE search('admin access', 
'{"default_field":"data.string_8","mode":"lucene","default_operator":"AND"}')
+        ORDER BY id
+    """
+
+    // Test 4: Search on a different subcolumn matching the same field_pattern
+    qt_search_variant_analyzer_other_field """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true)*/ id FROM 
${tableName}
+        WHERE search('hello', 
'{"default_field":"data.string_1","mode":"lucene"}')
+        ORDER BY id
+    """
+
+    // Test 5: Search with field-qualified syntax on variant subcolumn
+    qt_search_variant_analyzer_field_syntax """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true)*/ id FROM 
${tableName}
+        WHERE search('data.string_8:user', '{"mode":"lucene"}')
+        ORDER BY id
+    """
+
+    // Test 6: Verify lowercase is applied (search for "ADMIN" should match 
"admin only")
+    qt_search_variant_analyzer_lowercase """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true)*/ id FROM 
${tableName}
+        WHERE search('ADMIN', 
'{"default_field":"data.string_8","mode":"lucene"}')
+        ORDER BY id
+    """
+
+    // Test 7: Phrase search on variant subcolumn with analyzer
+    qt_search_variant_analyzer_phrase """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true)*/ id FROM 
${tableName}
+        WHERE search('"admin only"', 
'{"default_field":"data.string_8","mode":"lucene"}')
+        ORDER BY id
+    """
+
+    // Clean up
+    sql "DROP TABLE IF EXISTS ${tableName}"
+
+    // Test Case 2: Variant with direct named field and field_pattern index 
for comparison
+    def tableName2 = "test_variant_direct_index"
+
+    sql "DROP TABLE IF EXISTS ${tableName2}"
+
+    sql """
+        CREATE TABLE ${tableName2} (
+            `id` INT NOT NULL,
+            `data` variant<
+                'name' : string,
+                properties("variant_max_subcolumns_count" = "10")
+            > NULL,
+            INDEX idx_text (data) USING INVERTED PROPERTIES(
+                "parser" = "unicode",
+                "field_pattern" = "name",
+                "lower_case" = "true"
+            )
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`id`)
+        DISTRIBUTED BY HASH(`id`) BUCKETS 1
+        PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "disable_auto_compaction" = "true"
+        )
+    """
+
+    sql """INSERT INTO ${tableName2} VALUES
+        (1, '{"name": "admin only"}'),
+        (2, '{"name": "user access"}'),
+        (3, '{"name": "admin access granted"}')
+    """
+
+    sql "sync"
+    Thread.sleep(5000)
+
+    // Test 8: search() on variant subcolumn with named field_pattern (direct 
match)
+    qt_search_variant_direct_index """
+        SELECT /*+SET_VAR(enable_common_expr_pushdown=true)*/ id FROM 
${tableName2}
+        WHERE search('admin', '{"default_field":"data.name","mode":"lucene"}')
+        ORDER BY id
+    """
+
+    sql "DROP TABLE IF EXISTS ${tableName2}"
+
+    logger.info("All variant subcolumn analyzer tests completed!")
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch master updated: [refactor](search) Refactor SearchDslParser to single-phase ANTLR parsing and fix ES compatibility issues (#60654)

Reply via email to