This is an automated email from the ASF dual-hosted git repository.

airborne12 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 4d643725ef1 [fix](inverted index) resolve variant sub-column indexes 
for score() (#62992)
4d643725ef1 is described below

commit 4d643725ef1d0c6b1483ae7d0715050c9acbc43e
Author: Jack <[email protected]>
AuthorDate: Fri May 8 17:15:53 2026 +0800

    [fix](inverted index) resolve variant sub-column indexes for score() 
(#62992)
    
    ### What problem does this PR solve?
    
    Issue Number: N/A
    
    Related PR:
    
    Problem Summary:
    
    Fix `score()` query failing on variant sub-columns with:
    
    ```text
    Index statistics collection failed: Score query is not supported without 
inverted index for column=<variant.subcolumn>
    ```
    
    `MatchPredicateCollector::collect` previously used
    `TabletSchema::inverted_indexs(int32_t col_unique_id, const std::string&
    suffix_path)`, which only consults `_col_id_suffix_to_index`. Variant
    sub-column indexes can also live in:
    
    1. `_path_set_info_map` (`subcolumn_indexes` / `typed_path_set`), when
    the parent variant index is inherited by sub-columns or a typed path
    index is materialized.
    2. `_index_by_unique_id_with_pattern`, when the inverted index is
    created on the parent variant column with
    `PROPERTIES("field_pattern"="...")`.
    
    This PR fixes both paths:
    
    - Use the column-aware `TabletSchema::inverted_indexs(const
    TabletColumn&)` lookup so inherited / typed-path variant sub-column
    indexes are resolved.
    - Reuse the parent variant sub-column pattern matching logic from
    `variant_util` to resolve `MATCH_NAME` / `MATCH_NAME_GLOB` templates,
    then look up `inverted_index_by_field_pattern(parent_uid,
    matched_pattern)`.
    - Clone matched field-pattern index metadata and set the actual Lucene
    field suffix so score collection uses keys such as
    `<parent_uid>.<parent_col>.user.name`, while
    `CollectInfo::owned_index_meta` keeps the cloned metadata alive.
    
    Behaviour after this PR:
    
    | Scenario | Behaviour |
    | --- | --- |
    | Variant sub-column index is inherited or materialized in
    `_path_set_info_map` | Schema lookup succeeds through
    `inverted_indexs(const TabletColumn&)`; score collection uses the real
    sub-column Lucene field. |
    | Parent variant index uses exact `field_pattern`, e.g. `host` | Pattern
    lookup resolves the index and score collection uses
    `<parent_uid>.<parent_col>.host`. |
    | Parent variant index uses glob `field_pattern`, e.g. `user.*`, and
    slot path is `user.name` | Parent template matching returns `user.*`;
    index lookup uses the matched pattern; score collection uses
    `<parent_uid>.<parent_col>.user.name`. |
    | No index matches the sub-column | Existing unsupported-index error is
    preserved; BM25 score still requires an inverted index. |
    
    ### Release note
    
    Fix `score()` queries on variant sub-columns whose inverted index is
    inherited, typed-path based, or defined through `field_pattern`.
---
 be/src/storage/predicate_collector.cpp             |  57 +-
 be/src/storage/predicate_collector.h               |   1 +
 .../compaction/collection_statistics_test.cpp      | 660 ++++++++++++++++++++-
 be/test/testutil/mock/mock_descriptors.h           |  32 +-
 .../inverted_index_p0/test_bm25_score.groovy       |  49 +-
 .../test_bm25_score_variant.groovy                 | 106 ++++
 6 files changed, 898 insertions(+), 7 deletions(-)

diff --git a/be/src/storage/predicate_collector.cpp 
b/be/src/storage/predicate_collector.cpp
index 8e319ae329f..fa8fc0117ce 100644
--- a/be/src/storage/predicate_collector.cpp
+++ b/be/src/storage/predicate_collector.cpp
@@ -19,6 +19,9 @@
 
 #include <glog/logging.h>
 
+#include <vector>
+
+#include "exec/common/variant_util.h"
 #include "exprs/vexpr.h"
 #include "exprs/vexpr_context.h"
 #include "exprs/vliteral.h"
@@ -91,7 +94,49 @@ Status MatchPredicateCollector::collect(RuntimeState* state, 
const TabletSchemaS
     }
 
     const auto& column = tablet_schema->column(col_idx);
-    auto index_metas = tablet_schema->inverted_indexs(sd->col_unique_id(), 
column.suffix_path());
+    auto index_metas = tablet_schema->inverted_indexs(column);
+    std::vector<std::shared_ptr<const TabletIndex>> owned_index_metas;
+    std::string index_suffix_path = column.suffix_path();
+
+    // Schema-only fallback for variant sub-columns. Collector runs at tablet
+    // level without segment context, so we cannot do nested-group inference
+    // or inherit_index runtime-type dispatch. Two paths cover what is
+    // resolvable from schema alone:
+    //   1. field_pattern templates (MATCH_NAME / MATCH_NAME_GLOB) via
+    //      generate_sub_column_info.
+    //   2. Plain parent inverted index when the schema column is the dynamic
+    //      path's VARIANT placeholder produced by _init_variant_columns. In
+    //      that state inverted_indexs(column) misses because
+    //      _path_set_info_map.subcolumn_indexes is only populated for typed
+    //      paths / field_pattern outputs, not for plain parent indexes added
+    //      by ALTER. Clone the parent's non-field-pattern indexes with the
+    //      variant path as suffix so segment-side BM25 statistics can be
+    //      collected.
+    if (index_metas.empty() && column.is_extracted_column()) {
+        TabletSchema::SubColumnInfo sub_column_info;
+        const std::string relative_path = 
column.path_info_ptr()->copy_pop_front().get_path();
+        if (variant_util::generate_sub_column_info(*tablet_schema, 
column.parent_unique_id(),
+                                                   relative_path, 
&sub_column_info) &&
+            !sub_column_info.indexes.empty()) {
+            index_suffix_path = sub_column_info.column.suffix_path();
+            for (auto& idx : sub_column_info.indexes) {
+                index_metas.push_back(idx.get());
+                owned_index_metas.emplace_back(std::move(idx));
+            }
+        } else if (column.is_variant_type()) {
+            const auto parent_indexes = 
tablet_schema->inverted_indexs(column.parent_unique_id());
+            for (const auto* index : parent_indexes) {
+                if (!index->field_pattern().empty()) {
+                    continue;
+                }
+                auto index_ptr = std::make_shared<TabletIndex>(*index);
+                index_ptr->set_escaped_escaped_index_suffix_path(
+                        column.path_info_ptr()->get_path());
+                index_metas.push_back(index_ptr.get());
+                owned_index_metas.emplace_back(std::move(index_ptr));
+            }
+        }
+    }
 
 #ifndef BE_TEST
     if (index_metas.empty()) {
@@ -117,7 +162,7 @@ Status MatchPredicateCollector::collect(RuntimeState* 
state, const TabletSchemaS
                                                                     
index_meta->properties());
 
         std::string field_name =
-                build_field_name(index_meta->col_unique_ids()[0], 
column.suffix_path());
+                build_field_name(index_meta->col_unique_ids()[0], 
index_suffix_path);
         std::wstring ws_field_name = StringHelper::to_wstring(field_name);
 
         auto iter = collect_infos->find(ws_field_name);
@@ -125,6 +170,12 @@ Status MatchPredicateCollector::collect(RuntimeState* 
state, const TabletSchemaS
             CollectInfo collect_info;
             collect_info.term_infos.insert(term_infos.begin(), 
term_infos.end());
             collect_info.index_meta = index_meta;
+            for (const auto& owned_index_meta : owned_index_metas) {
+                if (owned_index_meta.get() == index_meta) {
+                    collect_info.owned_index_meta = owned_index_meta;
+                    break;
+                }
+            }
             (*collect_infos)[ws_field_name] = std::move(collect_info);
         } else {
             iter->second.term_infos.insert(term_infos.begin(), 
term_infos.end());
@@ -260,4 +311,4 @@ SearchPredicateCollector::ClauseTypeCategory 
SearchPredicateCollector::get_claus
     }
 }
 
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/storage/predicate_collector.h 
b/be/src/storage/predicate_collector.h
index aa5a49344b9..c96e0af9c45 100644
--- a/be/src/storage/predicate_collector.h
+++ b/be/src/storage/predicate_collector.h
@@ -43,6 +43,7 @@ struct TermInfoComparer {
 
 struct CollectInfo {
     std::set<segment_v2::TermInfo, TermInfoComparer> term_infos;
+    std::shared_ptr<const TabletIndex> owned_index_meta;
     const TabletIndex* index_meta = nullptr;
 };
 using CollectInfoMap = std::unordered_map<std::wstring, CollectInfo>;
diff --git a/be/test/storage/compaction/collection_statistics_test.cpp 
b/be/test/storage/compaction/collection_statistics_test.cpp
index 92b1522f767..b21c2264bd9 100644
--- a/be/test/storage/compaction/collection_statistics_test.cpp
+++ b/be/test/storage/compaction/collection_statistics_test.cpp
@@ -25,6 +25,8 @@
 #include <string>
 
 #include "common/exception.h"
+#include "core/data_type/data_type_string.h"
+#include "exec/common/variant_util.h"
 #include "exprs/vexpr.h"
 #include "exprs/vexpr_context.h"
 #include "exprs/vliteral.h"
@@ -43,7 +45,11 @@ namespace collection_statistics {
 
 class MockVExpr : public VExpr {
 public:
-    MockVExpr(TExprNodeType::type node_type) : _mock_node_type(node_type) {}
+    MockVExpr(TExprNodeType::type node_type) : _mock_node_type(node_type) {
+        if (node_type == TExprNodeType::MATCH_PRED) {
+            _opcode = TExprOpcode::MATCH_PHRASE;
+        }
+    }
 
     TExprNodeType::type node_type() const override { return _mock_node_type; }
 
@@ -100,6 +106,7 @@ public:
     MockVLiteral(const std::string& value) : _value(value) {}
 
     std::string value() const override { return _value; }
+    std::string value(const DataTypeSerDe::FormatOptions& options) const 
override { return _value; }
     const std::string& expr_name() const override { return _value; }
     std::string debug_string() const override { return "MockVLiteral: " + 
_value; }
 
@@ -268,6 +275,7 @@ protected:
         index._col_unique_ids.push_back(1);
         std::map<std::string, std::string> properties;
         properties["parser"] = "standard";
+        properties["support_phrase"] = "true";
         index._properties = properties;
 
         tablet_schema->append_index(std::move(index));
@@ -614,6 +622,654 @@ TEST_F(CollectionStatisticsTest, 
CollectWithDoubleCastWrappedSlotRef) {
     EXPECT_TRUE(status.ok()) << status.msg();
 }
 
+// Regression for AIR-36: match score collection must resolve indexes for
+// variant sub-columns whose indexes live in _path_set_info_map (typed paths or
+// inherited sub-column indexes). The previous simple lookup using
+// inverted_indexs(col_unique_id, suffix_path) missed those indexes.
+TEST_F(CollectionStatisticsTest, ExtractCollectInfoForVariantSubcolumnIndex) {
+    auto tablet_schema = std::make_shared<TabletSchema>();
+
+    constexpr int32_t kVariantUid = 9001;
+
+    TabletColumn variant_col;
+    variant_col.set_unique_id(kVariantUid);
+    variant_col.set_name("v");
+    variant_col.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+    tablet_schema->append_column(variant_col);
+
+    TabletColumn sub_col;
+    sub_col.set_unique_id(-1);
+    sub_col.set_name("v.host");
+    sub_col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+    sub_col.set_parent_unique_id(kVariantUid);
+    PathInData path("v.host");
+    sub_col.set_path_info(path);
+    tablet_schema->append_column(sub_col);
+
+    auto sub_index = std::make_shared<TabletIndex>();
+    TabletIndexPB index_pb;
+    index_pb.set_index_id(2001);
+    index_pb.set_index_name("variant_subcolumn_idx");
+    index_pb.set_index_type(IndexType::INVERTED);
+    index_pb.add_col_unique_id(kVariantUid);
+    auto* props = index_pb.mutable_properties();
+    (*props)["parser"] = "standard";
+    (*props)["support_phrase"] = "true";
+    sub_index->init_from_pb(index_pb);
+
+    TabletSchema::PathsSetInfo path_set_info;
+    TabletIndexes sub_indexes = {sub_index};
+    path_set_info.subcolumn_indexes["host"] = sub_indexes;
+    std::unordered_map<int32_t, TabletSchema::PathsSetInfo> path_set_info_map;
+    path_set_info_map[kVariantUid] = std::move(path_set_info);
+    tablet_schema->set_path_set_info(std::move(path_set_info_map));
+
+    EXPECT_TRUE(tablet_schema->inverted_indexs(kVariantUid, "host").empty());
+
+    auto found = 
tablet_schema->inverted_indexs(tablet_schema->column(/*ordinal=*/1));
+    ASSERT_EQ(found.size(), 1u);
+    EXPECT_EQ(found[0]->index_name(), "variant_subcolumn_idx");
+
+    constexpr int kSlotId = 42;
+    runtime_state_->_mock_desc_tbl->add_slot_descriptor(SlotId(kSlotId), 
kVariantUid);
+
+    auto match_expr = 
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+    auto slot_ref =
+            std::make_shared<collection_statistics::MockVSlotRef>("v.host", 
SlotId(kSlotId));
+    auto literal = 
std::make_shared<collection_statistics::MockVLiteral>("foo");
+    match_expr->_children.push_back(slot_ref);
+    match_expr->_children.push_back(literal);
+
+    VExprContextSPtrs contexts;
+    contexts.push_back(std::make_shared<VExprContext>(match_expr));
+
+    std::unordered_map<std::wstring, CollectInfo> collect_infos;
+    auto status = stats_->extract_collect_info(runtime_state_.get(), contexts, 
tablet_schema,
+                                               &collect_infos);
+    ASSERT_TRUE(status.ok()) << status.msg();
+    ASSERT_EQ(collect_infos.size(), 1u);
+    auto it = 
collect_infos.find(StringHelper::to_wstring(std::to_string(kVariantUid) + 
".v.host"));
+    ASSERT_NE(it, collect_infos.end());
+    ASSERT_NE(it->second.index_meta, nullptr);
+    EXPECT_EQ(it->second.index_meta->index_name(), "variant_subcolumn_idx");
+}
+
+// Regression for score on a dynamic variant sub-column inherited from a plain
+// parent variant inverted index (no field_pattern template). Matches the
+// scan-time schema shape: _init_variant_columns materializes the accessed
+// path as an extracted VARIANT placeholder, so neither inverted_indexs(column)
+// nor generate_sub_column_info resolves the parent index. Collector clones
+// the parent's non-field-pattern indexes with the variant path as suffix.
+TEST_F(CollectionStatisticsTest, 
ExtractCollectInfoForVariantParentIndexWithoutTemplate) {
+    auto tablet_schema = std::make_shared<TabletSchema>();
+
+    constexpr int32_t kVariantUid = 9004;
+
+    TabletColumn variant_col;
+    variant_col.set_unique_id(kVariantUid);
+    variant_col.set_name("v");
+    variant_col.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+    tablet_schema->append_column(variant_col);
+
+    TabletColumn sub_col;
+    sub_col.set_unique_id(-1);
+    sub_col.set_name("v.key");
+    sub_col.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+    sub_col.set_parent_unique_id(kVariantUid);
+    PathInData path("v.key");
+    sub_col.set_path_info(path);
+    tablet_schema->append_column(sub_col);
+
+    TabletIndexPB index_pb;
+    index_pb.set_index_id(2004);
+    index_pb.set_index_name("variant_parent_idx");
+    index_pb.set_index_type(IndexType::INVERTED);
+    index_pb.add_col_unique_id(kVariantUid);
+    auto* props = index_pb.mutable_properties();
+    (*props)["parser"] = "english";
+    (*props)["support_phrase"] = "true";
+
+    TabletIndex index;
+    index.init_from_pb(index_pb);
+    tablet_schema->append_index(std::move(index));
+
+    // Pre-conditions: column-aware lookup is empty (no inheritance 
pre-populated)
+    // and generate_sub_column_info returns false (no field_pattern template).
+    // The collector must still resolve through the VARIANT-placeholder branch.
+    
ASSERT_TRUE(tablet_schema->inverted_indexs(tablet_schema->column(/*ordinal=*/1)).empty());
+    ASSERT_EQ(tablet_schema->inverted_indexs(kVariantUid).size(), 1u);
+    TabletSchema::SubColumnInfo sub_column_info;
+    ASSERT_FALSE(variant_util::generate_sub_column_info(*tablet_schema, 
kVariantUid, "key",
+                                                        &sub_column_info));
+
+    constexpr int kSlotId = 45;
+    runtime_state_->_mock_desc_tbl->add_slot_descriptor(SlotId(kSlotId), 
kVariantUid, "v.key",
+                                                        {"key"});
+
+    auto match_expr = 
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+    auto cast_expr = 
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::CAST_EXPR);
+    cast_expr->_data_type = std::make_shared<DataTypeString>();
+    auto slot_ref = 
std::make_shared<collection_statistics::MockVSlotRef>("v.key", SlotId(kSlotId));
+    auto literal = 
std::make_shared<collection_statistics::MockVLiteral>("abc");
+    cast_expr->_children.push_back(slot_ref);
+    match_expr->_children.push_back(cast_expr);
+    match_expr->_children.push_back(literal);
+
+    VExprContextSPtrs contexts;
+    contexts.push_back(std::make_shared<VExprContext>(match_expr));
+
+    std::unordered_map<std::wstring, CollectInfo> collect_infos;
+    auto status = stats_->extract_collect_info(runtime_state_.get(), contexts, 
tablet_schema,
+                                               &collect_infos);
+    ASSERT_TRUE(status.ok()) << status.msg();
+    ASSERT_EQ(collect_infos.size(), 1u);
+    auto it = 
collect_infos.find(StringHelper::to_wstring(std::to_string(kVariantUid) + 
".v.key"));
+    ASSERT_NE(it, collect_infos.end());
+    ASSERT_NE(it->second.index_meta, nullptr);
+    ASSERT_NE(it->second.owned_index_meta, nullptr);
+    EXPECT_EQ(it->second.index_meta->index_name(), "variant_parent_idx");
+}
+
+namespace {
+
+// Build a sub-column template for the parent variant column. pattern_type has 
no
+// public setter on TabletColumn, so construct through ColumnPB.
+TabletColumn make_subcolumn_template(const std::string& pattern, PatternTypePB 
pattern_type) {
+    ColumnPB column_pb;
+    column_pb.set_unique_id(-1);
+    column_pb.set_name(pattern);
+    column_pb.set_type("STRING");
+    column_pb.set_is_nullable(true);
+    column_pb.set_pattern_type(pattern_type);
+
+    TabletColumn templ;
+    templ.init_from_pb(column_pb);
+    return templ;
+}
+
+} // namespace
+
+TEST_F(CollectionStatisticsTest, 
ExtractCollectInfoForVariantFieldPatternIndex) {
+    auto tablet_schema = std::make_shared<TabletSchema>();
+
+    constexpr int32_t kVariantUid = 9002;
+
+    TabletColumn variant_col;
+    variant_col.set_unique_id(kVariantUid);
+    variant_col.set_name("meta");
+    variant_col.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+    TabletColumn host_template = make_subcolumn_template("host", 
PatternTypePB::MATCH_NAME);
+    variant_col.add_sub_column(host_template);
+    tablet_schema->append_column(variant_col);
+
+    TabletColumn sub_col;
+    sub_col.set_unique_id(-1);
+    sub_col.set_name("meta.host");
+    sub_col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+    sub_col.set_parent_unique_id(kVariantUid);
+    PathInData path("meta.host");
+    sub_col.set_path_info(path);
+    tablet_schema->append_column(sub_col);
+
+    TabletIndexPB index_pb;
+    index_pb.set_index_id(2002);
+    index_pb.set_index_name("variant_field_pattern_idx");
+    index_pb.set_index_type(IndexType::INVERTED);
+    index_pb.add_col_unique_id(kVariantUid);
+    auto* props = index_pb.mutable_properties();
+    (*props)["parser"] = "standard";
+    (*props)["support_phrase"] = "true";
+    (*props)["field_pattern"] = "host";
+
+    TabletIndex index;
+    index.init_from_pb(index_pb);
+    tablet_schema->append_index(std::move(index));
+
+    
ASSERT_TRUE(tablet_schema->inverted_indexs(tablet_schema->column(/*ordinal=*/1)).empty());
+    ASSERT_EQ(tablet_schema->inverted_index_by_field_pattern(kVariantUid, 
"host").size(), 1u);
+
+    constexpr int kSlotId = 43;
+    runtime_state_->_mock_desc_tbl->add_slot_descriptor(SlotId(kSlotId), 
kVariantUid, "meta.host",
+                                                        {"host"});
+
+    auto match_expr = 
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+    auto slot_ref =
+            std::make_shared<collection_statistics::MockVSlotRef>("meta.host", 
SlotId(kSlotId));
+    auto literal = 
std::make_shared<collection_statistics::MockVLiteral>("alpha");
+    match_expr->_children.push_back(slot_ref);
+    match_expr->_children.push_back(literal);
+
+    VExprContextSPtrs contexts;
+    contexts.push_back(std::make_shared<VExprContext>(match_expr));
+
+    std::unordered_map<std::wstring, CollectInfo> collect_infos;
+    auto status = stats_->extract_collect_info(runtime_state_.get(), contexts, 
tablet_schema,
+                                               &collect_infos);
+    ASSERT_TRUE(status.ok()) << status.msg();
+    ASSERT_EQ(collect_infos.size(), 1u);
+    auto it = collect_infos.find(
+            StringHelper::to_wstring(std::to_string(kVariantUid) + 
".meta.host"));
+    ASSERT_NE(it, collect_infos.end());
+    ASSERT_NE(it->second.index_meta, nullptr);
+    ASSERT_NE(it->second.owned_index_meta, nullptr);
+    EXPECT_EQ(it->second.index_meta->index_name(), 
"variant_field_pattern_idx");
+}
+
+// Regression: field_pattern="user.*" is registered under the pattern string,
+// while the query slot resolves to column_paths=["user", "name"]. The fallback
+// must match the parent variant's sub-column template first, then use the
+// matched pattern to fetch the index, and collect under the actual Lucene 
field.
+TEST_F(CollectionStatisticsTest, 
ExtractCollectInfoForVariantFieldPatternGlobIndex) {
+    auto tablet_schema = std::make_shared<TabletSchema>();
+
+    constexpr int32_t kVariantUid = 9003;
+
+    TabletColumn variant_col;
+    variant_col.set_unique_id(kVariantUid);
+    variant_col.set_name("meta");
+    variant_col.set_type(FieldType::OLAP_FIELD_TYPE_VARIANT);
+    TabletColumn glob_template = make_subcolumn_template("user.*", 
PatternTypePB::MATCH_NAME_GLOB);
+    variant_col.add_sub_column(glob_template);
+    tablet_schema->append_column(variant_col);
+
+    TabletColumn sub_col;
+    sub_col.set_unique_id(-1);
+    sub_col.set_name("meta.user.name");
+    sub_col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+    sub_col.set_parent_unique_id(kVariantUid);
+    PathInData path("meta.user.name");
+    sub_col.set_path_info(path);
+    tablet_schema->append_column(sub_col);
+
+    TabletIndexPB index_pb;
+    index_pb.set_index_id(2003);
+    index_pb.set_index_name("variant_field_pattern_glob_idx");
+    index_pb.set_index_type(IndexType::INVERTED);
+    index_pb.add_col_unique_id(kVariantUid);
+    auto* props = index_pb.mutable_properties();
+    (*props)["parser"] = "standard";
+    (*props)["support_phrase"] = "true";
+    (*props)["field_pattern"] = "user.*";
+
+    TabletIndex index;
+    index.init_from_pb(index_pb);
+    tablet_schema->append_index(std::move(index));
+
+    
ASSERT_TRUE(tablet_schema->inverted_indexs(tablet_schema->column(/*ordinal=*/1)).empty());
+    ASSERT_TRUE(tablet_schema->inverted_index_by_field_pattern(kVariantUid, 
"user.name").empty());
+    ASSERT_EQ(tablet_schema->inverted_index_by_field_pattern(kVariantUid, 
"user.*").size(), 1u);
+    TabletSchema::SubColumnInfo sub_column_info;
+    ASSERT_TRUE(variant_util::generate_sub_column_info(*tablet_schema, 
kVariantUid, "user.name",
+                                                       &sub_column_info));
+    ASSERT_EQ(sub_column_info.indexes.size(), 1u);
+    EXPECT_EQ(sub_column_info.column.suffix_path(), "meta.user.name");
+    EXPECT_EQ(sub_column_info.indexes[0]->index_name(), 
"variant_field_pattern_glob_idx");
+
+    constexpr int kSlotId = 44;
+    runtime_state_->_mock_desc_tbl->add_slot_descriptor(SlotId(kSlotId), 
kVariantUid,
+                                                        "meta.user.name", 
{"user", "name"});
+
+    auto match_expr = 
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+    auto slot_ref = 
std::make_shared<collection_statistics::MockVSlotRef>("meta.user.name",
+                                                                          
SlotId(kSlotId));
+    auto literal = 
std::make_shared<collection_statistics::MockVLiteral>("alice");
+    match_expr->_children.push_back(slot_ref);
+    match_expr->_children.push_back(literal);
+
+    VExprContextSPtrs contexts;
+    contexts.push_back(std::make_shared<VExprContext>(match_expr));
+
+    std::unordered_map<std::wstring, CollectInfo> collect_infos;
+    auto status = stats_->extract_collect_info(runtime_state_.get(), contexts, 
tablet_schema,
+                                               &collect_infos);
+    ASSERT_TRUE(status.ok()) << status.msg();
+    ASSERT_EQ(collect_infos.size(), 1u);
+    auto it = collect_infos.find(
+            StringHelper::to_wstring(std::to_string(kVariantUid) + 
".meta.user.name"));
+    ASSERT_NE(it, collect_infos.end());
+    ASSERT_NE(it->second.index_meta, nullptr);
+    ASSERT_NE(it->second.owned_index_meta, nullptr);
+    EXPECT_EQ(it->second.index_meta->index_name(), 
"variant_field_pattern_glob_idx");
+}
+
+// E1: Match predicate whose left subtree contains no VSlotRef.
+// find_slot_ref recurses through children; when it returns nullptr the
+// collector reports INVERTED_INDEX_NOT_SUPPORTED.
+// Calls MatchPredicateCollector::collect() directly so coverage attribution
+// is not muddied by extract_collect_info's virtual-dispatch indirection.
+TEST_F(CollectionStatisticsTest, CollectMissingSlotRefReturnsError) {
+    auto tablet_schema = std::make_shared<TabletSchema>();
+    TabletColumn col;
+    col.set_unique_id(1001);
+    col.set_name("c");
+    col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+    tablet_schema->append_column(col);
+
+    auto match_expr = 
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+    auto literal_left = 
std::make_shared<collection_statistics::MockVLiteral>("foo");
+    auto literal_right = 
std::make_shared<collection_statistics::MockVLiteral>("bar");
+    match_expr->_children.push_back(literal_left);
+    match_expr->_children.push_back(literal_right);
+
+    MatchPredicateCollector collector;
+    std::unordered_map<std::wstring, CollectInfo> collect_infos;
+    auto status =
+            collector.collect(runtime_state_.get(), tablet_schema, match_expr, 
&collect_infos);
+    ASSERT_FALSE(status.ok());
+    EXPECT_EQ(status.code(), ErrorCode::INVERTED_INDEX_NOT_SUPPORTED);
+    EXPECT_TRUE(status.msg().find("Cannot find slot reference") != 
std::string::npos);
+}
+
+// E2: SlotRef points to a slot_id absent from the runtime descriptor table.
+TEST_F(CollectionStatisticsTest, CollectMissingSlotDescriptorReturnsError) {
+    auto tablet_schema = std::make_shared<TabletSchema>();
+    TabletColumn col;
+    col.set_unique_id(1002);
+    col.set_name("c");
+    col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+    tablet_schema->append_column(col);
+
+    constexpr int kAbsentSlotId = 99999;
+
+    auto match_expr = 
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+    auto slot_ref =
+            std::make_shared<collection_statistics::MockVSlotRef>("c", 
SlotId(kAbsentSlotId));
+    auto literal = std::make_shared<collection_statistics::MockVLiteral>("v");
+    match_expr->_children.push_back(slot_ref);
+    match_expr->_children.push_back(literal);
+
+    MatchPredicateCollector collector;
+    std::unordered_map<std::wstring, CollectInfo> collect_infos;
+    auto status =
+            collector.collect(runtime_state_.get(), tablet_schema, match_expr, 
&collect_infos);
+    ASSERT_FALSE(status.ok());
+    EXPECT_EQ(status.code(), ErrorCode::INVERTED_INDEX_NOT_SUPPORTED);
+    EXPECT_TRUE(status.msg().find("Cannot find slot descriptor") != 
std::string::npos);
+}
+
+// E3: SlotRef name does not exist in tablet_schema (field_index returns -1).
+TEST_F(CollectionStatisticsTest, CollectUnknownColumnNameReturnsError) {
+    auto tablet_schema = std::make_shared<TabletSchema>();
+    TabletColumn col;
+    col.set_unique_id(1003);
+    col.set_name("declared");
+    col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+    tablet_schema->append_column(col);
+
+    constexpr int kSlotId = 50;
+    runtime_state_->_mock_desc_tbl->add_slot_descriptor(SlotId(kSlotId), 1003, 
"missing", {});
+
+    auto match_expr = 
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+    auto slot_ref =
+            std::make_shared<collection_statistics::MockVSlotRef>("missing", 
SlotId(kSlotId));
+    auto literal = std::make_shared<collection_statistics::MockVLiteral>("v");
+    match_expr->_children.push_back(slot_ref);
+    match_expr->_children.push_back(literal);
+
+    MatchPredicateCollector collector;
+    std::unordered_map<std::wstring, CollectInfo> collect_infos;
+    auto status =
+            collector.collect(runtime_state_.get(), tablet_schema, match_expr, 
&collect_infos);
+    ASSERT_FALSE(status.ok());
+    EXPECT_EQ(status.code(), ErrorCode::INVERTED_INDEX_NOT_SUPPORTED);
+    EXPECT_TRUE(status.msg().find("Cannot find column index") != 
std::string::npos);
+}
+
+// I1 + L3 + O1: Plain string column with a direct inverted index.
+// Direct hit produces a CollectInfo whose owned_index_meta is null
+// (the meta lives in the schema and is not cloned).
+TEST_F(CollectionStatisticsTest, CollectDirectIndexHitFromSchema) {
+    auto tablet_schema = std::make_shared<TabletSchema>();
+
+    constexpr int32_t kColUid = 1100;
+    TabletColumn col;
+    col.set_unique_id(kColUid);
+    col.set_name("note");
+    col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+    tablet_schema->append_column(col);
+
+    TabletIndexPB index_pb;
+    index_pb.set_index_id(2100);
+    index_pb.set_index_name("note_idx");
+    index_pb.set_index_type(IndexType::INVERTED);
+    index_pb.add_col_unique_id(kColUid);
+    auto* props = index_pb.mutable_properties();
+    (*props)["parser"] = "english";
+    (*props)["support_phrase"] = "true";
+    TabletIndex index;
+    index.init_from_pb(index_pb);
+    tablet_schema->append_index(std::move(index));
+
+    constexpr int kSlotId = 60;
+    runtime_state_->_mock_desc_tbl->add_slot_descriptor(SlotId(kSlotId), 
kColUid, "note", {});
+
+    auto match_expr = 
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+    auto slot_ref = 
std::make_shared<collection_statistics::MockVSlotRef>("note", SlotId(kSlotId));
+    auto literal = 
std::make_shared<collection_statistics::MockVLiteral>("hello world");
+    match_expr->_children.push_back(slot_ref);
+    match_expr->_children.push_back(literal);
+
+    MatchPredicateCollector collector;
+    std::unordered_map<std::wstring, CollectInfo> collect_infos;
+    auto status =
+            collector.collect(runtime_state_.get(), tablet_schema, match_expr, 
&collect_infos);
+    ASSERT_TRUE(status.ok()) << status.msg();
+    ASSERT_EQ(collect_infos.size(), 1u);
+    auto it = 
collect_infos.find(StringHelper::to_wstring(std::to_string(kColUid)));
+    ASSERT_NE(it, collect_infos.end());
+    EXPECT_NE(it->second.index_meta, nullptr);
+    EXPECT_EQ(it->second.owned_index_meta, nullptr); // O1: schema-direct meta 
is not owned
+    EXPECT_FALSE(it->second.term_infos.empty());
+}
+
+// I2: Plain string column with no index and not an extracted variant
+// sub-column. Fallback path does not apply (column.is_extracted_column()
+// is false). In BE_TEST builds the empty-index check is skipped, so
+// collect returns OK with no CollectInfo emitted.
+TEST_F(CollectionStatisticsTest, CollectNotExtractedColumnSkipsFallback) {
+    auto tablet_schema = std::make_shared<TabletSchema>();
+
+    constexpr int32_t kColUid = 1200;
+    TabletColumn col;
+    col.set_unique_id(kColUid);
+    col.set_name("plain");
+    col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+    tablet_schema->append_column(col);
+    // no index appended
+
+    constexpr int kSlotId = 70;
+    runtime_state_->_mock_desc_tbl->add_slot_descriptor(SlotId(kSlotId), 
kColUid, "plain", {});
+
+    auto match_expr = 
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+    auto slot_ref = 
std::make_shared<collection_statistics::MockVSlotRef>("plain", SlotId(kSlotId));
+    auto literal = std::make_shared<collection_statistics::MockVLiteral>("v");
+    match_expr->_children.push_back(slot_ref);
+    match_expr->_children.push_back(literal);
+
+    MatchPredicateCollector collector;
+    std::unordered_map<std::wstring, CollectInfo> collect_infos;
+    auto status =
+            collector.collect(runtime_state_.get(), tablet_schema, match_expr, 
&collect_infos);
+    ASSERT_TRUE(status.ok()) << status.msg();
+    EXPECT_TRUE(collect_infos.empty());
+}
+
+// L1: Index whose properties do not request an analyzer
+// (should_analyzer returns false). The matching index_meta is iterated
+// but skipped before insertion.
+TEST_F(CollectionStatisticsTest, CollectSkipsIndexWithoutAnalyzer) {
+    auto tablet_schema = std::make_shared<TabletSchema>();
+
+    constexpr int32_t kColUid = 1300;
+    TabletColumn col;
+    col.set_unique_id(kColUid);
+    col.set_name("kw");
+    col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+    tablet_schema->append_column(col);
+
+    TabletIndexPB index_pb;
+    index_pb.set_index_id(2300);
+    index_pb.set_index_name("kw_idx");
+    index_pb.set_index_type(IndexType::INVERTED);
+    index_pb.add_col_unique_id(kColUid);
+    // No "parser" property -> should_analyzer returns false
+    TabletIndex index;
+    index.init_from_pb(index_pb);
+    tablet_schema->append_index(std::move(index));
+
+    constexpr int kSlotId = 80;
+    runtime_state_->_mock_desc_tbl->add_slot_descriptor(SlotId(kSlotId), 
kColUid, "kw", {});
+
+    auto match_expr = 
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+    auto slot_ref = 
std::make_shared<collection_statistics::MockVSlotRef>("kw", SlotId(kSlotId));
+    auto literal = std::make_shared<collection_statistics::MockVLiteral>("v");
+    match_expr->_children.push_back(slot_ref);
+    match_expr->_children.push_back(literal);
+
+    MatchPredicateCollector collector;
+    std::unordered_map<std::wstring, CollectInfo> collect_infos;
+    auto status =
+            collector.collect(runtime_state_.get(), tablet_schema, match_expr, 
&collect_infos);
+    ASSERT_TRUE(status.ok()) << status.msg();
+    EXPECT_TRUE(collect_infos.empty());
+}
+
+// L2: Index whose analyzer is set (should_analyzer returns true) but does
+// not declare "support_phrase=true". MockVExpr drives MATCH_PHRASE opcode,
+// so is_need_similarity_score returns false and the index is skipped.
+TEST_F(CollectionStatisticsTest, CollectSkipsIndexWithoutSimilarityScore) {
+    auto tablet_schema = std::make_shared<TabletSchema>();
+
+    constexpr int32_t kColUid = 1350;
+    TabletColumn col;
+    col.set_unique_id(kColUid);
+    col.set_name("body");
+    col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+    tablet_schema->append_column(col);
+
+    TabletIndexPB index_pb;
+    index_pb.set_index_id(2350);
+    index_pb.set_index_name("body_idx");
+    index_pb.set_index_type(IndexType::INVERTED);
+    index_pb.add_col_unique_id(kColUid);
+    auto* props = index_pb.mutable_properties();
+    (*props)["parser"] = "english"; // should_analyzer == true
+    // Intentionally omit "support_phrase" -> is_need_similarity_score == false
+    TabletIndex index;
+    index.init_from_pb(index_pb);
+    tablet_schema->append_index(std::move(index));
+
+    constexpr int kSlotId = 85;
+    runtime_state_->_mock_desc_tbl->add_slot_descriptor(SlotId(kSlotId), 
kColUid, "body", {});
+
+    auto match_expr = 
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+    auto slot_ref = 
std::make_shared<collection_statistics::MockVSlotRef>("body", SlotId(kSlotId));
+    auto literal = 
std::make_shared<collection_statistics::MockVLiteral>("hello");
+    match_expr->_children.push_back(slot_ref);
+    match_expr->_children.push_back(literal);
+
+    MatchPredicateCollector collector;
+    std::unordered_map<std::wstring, CollectInfo> collect_infos;
+    auto status =
+            collector.collect(runtime_state_.get(), tablet_schema, match_expr, 
&collect_infos);
+    ASSERT_TRUE(status.ok()) << status.msg();
+    EXPECT_TRUE(collect_infos.empty());
+}
+
+// L4: Two MATCH predicates on the same column produce CollectInfo entries
+// keyed on the same field_name; the second insertion merges term_infos
+// into the first entry.
+TEST_F(CollectionStatisticsTest, CollectMergesTermsForSameFieldName) {
+    auto tablet_schema = std::make_shared<TabletSchema>();
+
+    constexpr int32_t kColUid = 1400;
+    TabletColumn col;
+    col.set_unique_id(kColUid);
+    col.set_name("doc");
+    col.set_type(FieldType::OLAP_FIELD_TYPE_STRING);
+    tablet_schema->append_column(col);
+
+    TabletIndexPB index_pb;
+    index_pb.set_index_id(2400);
+    index_pb.set_index_name("doc_idx");
+    index_pb.set_index_type(IndexType::INVERTED);
+    index_pb.add_col_unique_id(kColUid);
+    auto* props = index_pb.mutable_properties();
+    (*props)["parser"] = "english";
+    (*props)["support_phrase"] = "true";
+    TabletIndex index;
+    index.init_from_pb(index_pb);
+    tablet_schema->append_index(std::move(index));
+
+    constexpr int kSlotId = 90;
+    runtime_state_->_mock_desc_tbl->add_slot_descriptor(SlotId(kSlotId), 
kColUid, "doc", {});
+
+    auto build_match = [&](const std::string& term) {
+        auto m = 
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::MATCH_PRED);
+        auto s = std::make_shared<collection_statistics::MockVSlotRef>("doc", 
SlotId(kSlotId));
+        auto l = std::make_shared<collection_statistics::MockVLiteral>(term);
+        m->_children.push_back(s);
+        m->_children.push_back(l);
+        return m;
+    };
+
+    MatchPredicateCollector collector;
+    std::unordered_map<std::wstring, CollectInfo> collect_infos;
+    auto first = collector.collect(runtime_state_.get(), tablet_schema, 
build_match("alpha"),
+                                   &collect_infos);
+    ASSERT_TRUE(first.ok()) << first.msg();
+    auto second = collector.collect(runtime_state_.get(), tablet_schema, 
build_match("beta"),
+                                    &collect_infos);
+    ASSERT_TRUE(second.ok()) << second.msg();
+    ASSERT_EQ(collect_infos.size(), 1u);
+    auto it = 
collect_infos.find(StringHelper::to_wstring(std::to_string(kColUid)));
+    ASSERT_NE(it, collect_infos.end());
+    EXPECT_GE(it->second.term_infos.size(), 2u); // both "alpha" and "beta" 
present
+}
+
+// Test-only subclass that exposes the protected helpers of PredicateCollector.
+class TestablePredicateCollector : public MatchPredicateCollector {
+public:
+    using MatchPredicateCollector::build_field_name;
+    using MatchPredicateCollector::find_slot_ref;
+};
+
+// find_slot_ref: null shared_ptr returns nullptr (early-return branch).
+TEST_F(CollectionStatisticsTest, FindSlotRefHandlesNullExpr) {
+    TestablePredicateCollector collector;
+    VExprSPtr null_expr;
+    EXPECT_EQ(collector.find_slot_ref(null_expr), nullptr);
+}
+
+// find_slot_ref: when expr is a non-CAST wrapper containing a SLOT_REF in its
+// children, the recursive descent finds the slot via the for-loop body.
+TEST_F(CollectionStatisticsTest, FindSlotRefRecursesIntoChildren) {
+    TestablePredicateCollector collector;
+    auto wrapper = 
std::make_shared<collection_statistics::MockVExpr>(TExprNodeType::FUNCTION_CALL);
+    auto slot_ref = std::make_shared<collection_statistics::MockVSlotRef>("c", 
SlotId(99));
+    wrapper->_children.push_back(slot_ref);
+    EXPECT_EQ(collector.find_slot_ref(wrapper), slot_ref.get());
+}
+
+// find_slot_ref: leaf non-slot (no children) returns nullptr after for-loop.
+TEST_F(CollectionStatisticsTest, FindSlotRefReturnsNullForLeafNonSlot) {
+    TestablePredicateCollector collector;
+    auto literal = std::make_shared<collection_statistics::MockVLiteral>("x");
+    EXPECT_EQ(collector.find_slot_ref(literal), nullptr);
+}
+
+// build_field_name: non-empty suffix is appended with a dot separator.
+TEST_F(CollectionStatisticsTest, BuildFieldNameWithSuffix) {
+    TestablePredicateCollector collector;
+    EXPECT_EQ(collector.build_field_name(42, "a.b"), "42.a.b");
+}
+
+// build_field_name: empty suffix returns just the unique id as string.
+TEST_F(CollectionStatisticsTest, BuildFieldNameWithoutSuffix) {
+    TestablePredicateCollector collector;
+    EXPECT_EQ(collector.build_field_name(42, ""), "42");
+}
+
 TEST(TermInfoComparerTest, OrdersByTermAndDedups) {
     using doris::TermInfoComparer;
     using doris::segment_v2::TermInfo;
@@ -651,4 +1307,4 @@ TEST(TermInfoComparerTest, OrdersByTermAndDedups) {
     EXPECT_THAT(ordered, ::testing::ElementsAre("apple", "banana", "cherry"));
 }
 
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/test/testutil/mock/mock_descriptors.h 
b/be/test/testutil/mock/mock_descriptors.h
index 4fec22bf7a1..cb8833cf8d8 100644
--- a/be/test/testutil/mock/mock_descriptors.h
+++ b/be/test/testutil/mock/mock_descriptors.h
@@ -20,6 +20,8 @@
 #include <gmock/gmock-function-mocker.h>
 #include <gmock/gmock.h>
 
+#include <memory>
+#include <unordered_map>
 #include <vector>
 
 #include "core/data_type/data_type.h"
@@ -106,13 +108,41 @@ public:
         _slot_descriptors[slot_id] = std::move(slot_desc);
     }
 
+    void add_slot_descriptor(SlotId slot_id, int32_t col_unique_id, const 
std::string& col_name,
+                             const std::vector<std::string>& column_paths) {
+        TTypeNode type_node;
+        type_node.__set_type(TTypeNodeType::SCALAR);
+        TScalarType scalar_type;
+        scalar_type.__set_type(TPrimitiveType::STRING);
+        type_node.__set_scalar_type(scalar_type);
+        TTypeDesc type_desc;
+        type_desc.types.push_back(type_node);
+
+        TSlotDescriptor slot_desc;
+        slot_desc.__set_id(slot_id);
+        slot_desc.__set_parent(0);
+        slot_desc.__set_slotType(type_desc);
+        slot_desc.__set_columnPos(0);
+        slot_desc.__set_byteOffset(0);
+        slot_desc.__set_nullIndicatorByte(0);
+        slot_desc.__set_nullIndicatorBit(-1);
+        slot_desc.__set_colName(col_name);
+        slot_desc.__set_slotIdx(0);
+        slot_desc.__set_isMaterialized(true);
+        slot_desc.__set_col_unique_id(col_unique_id);
+        slot_desc.__set_is_key(false);
+        slot_desc.__set_column_paths(column_paths);
+        slot_desc.__set_primitive_type(TPrimitiveType::STRING);
+        _slot_descriptors[slot_id] = 
std::make_unique<SlotDescriptor>(slot_desc);
+    }
+
     SlotDescriptor* get_slot_descriptor(SlotId id) const override {
         auto it = _slot_descriptors.find(id);
         return it != _slot_descriptors.end() ? it->second.get() : nullptr;
     }
 
 private:
-    mutable std::unordered_map<SlotId, std::unique_ptr<MockSlopDescriptor>> 
_slot_descriptors;
+    mutable std::unordered_map<SlotId, std::unique_ptr<SlotDescriptor>> 
_slot_descriptors;
 };
 
 } // namespace doris
\ No newline at end of file
diff --git a/regression-test/suites/inverted_index_p0/test_bm25_score.groovy 
b/regression-test/suites/inverted_index_p0/test_bm25_score.groovy
index 2686011e89e..3a8ad125dc5 100644
--- a/regression-test/suites/inverted_index_p0/test_bm25_score.groovy
+++ b/regression-test/suites/inverted_index_p0/test_bm25_score.groovy
@@ -226,6 +226,53 @@ suite("test_bm25_score", "p0") {
         } finally {
         }
 
+        try {
+            sql """ set enable_common_expr_pushdown = true; """
+            sql """ set enable_match_without_inverted_index = false; """
+            sql """ set default_variant_enable_typed_paths_to_sparse = false; 
"""
+            sql """ set default_variant_enable_doc_mode = false; """
+
+            sql "DROP TABLE IF EXISTS test_variant_field_pattern_score"
+            sql """
+                CREATE TABLE test_variant_field_pattern_score (
+                    id INT,
+                    meta VARIANT<MATCH_NAME_GLOB 'user.*':text, 
PROPERTIES("variant_max_subcolumns_count"="0")>,
+                    INDEX idx_meta_user(meta) USING INVERTED PROPERTIES(
+                        "parser"="english",
+                        "support_phrase"="true",
+                        "field_pattern"="user.*"
+                    )
+                ) ENGINE=OLAP
+                DUPLICATE KEY(id)
+                DISTRIBUTED BY HASH(id) BUCKETS 1
+                PROPERTIES (
+                    "replication_allocation" = "tag.location.default: 1",
+                    "disable_auto_compaction" = "true"
+                )
+            """
+
+            sql """ insert into test_variant_field_pattern_score values(3, 
'{"other": "alice"}'); """
+            sql """ sync """
+            sql """
+                insert into test_variant_field_pattern_score values
+                    (1, '{"user": {"name": "alice alpha"}}'),
+                    (2, '{"user": {"name": "bob beta"}}');
+            """
+            sql """ sync """
+
+            def res = sql """
+                select id, score() as score
+                from test_variant_field_pattern_score
+                where cast(meta["user"]["name"] as string) match_phrase "alice"
+                order by score() desc
+                limit 10;
+            """
+            assertEquals(1, res.size())
+            assertEquals(1, res[0][0] as int)
+            assertTrue(Double.parseDouble(res[0][1].toString()) > 0.0)
+        } finally {
+        }
+
         try {
             sql "DROP TABLE IF EXISTS t2"
             sql """ create table t2(a int, b int, s text) unique key(a) 
DISTRIBUTED BY HASH(a) buckets 1 PROPERTIES ("replication_allocation" = 
"tag.location.default: 1"); """
@@ -247,4 +294,4 @@ suite("test_bm25_score", "p0") {
         } finally {
         }
     }
-}
\ No newline at end of file
+}
diff --git 
a/regression-test/suites/inverted_index_p0/test_bm25_score_variant.groovy 
b/regression-test/suites/inverted_index_p0/test_bm25_score_variant.groovy
new file mode 100644
index 00000000000..885d311bdfc
--- /dev/null
+++ b/regression-test/suites/inverted_index_p0/test_bm25_score_variant.groovy
@@ -0,0 +1,106 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_bm25_score_variant", "p0") {
+    if (isCloudMode()) {
+        return
+    }
+
+    sql """ set enable_common_expr_pushdown = true """
+    sql """ set enable_match_without_inverted_index = false """
+    sql """ set default_variant_enable_typed_paths_to_sparse = false """
+    sql """ set default_variant_enable_doc_mode = false """
+
+    // A1: field_pattern exact name (MATCH_NAME)
+    try {
+        sql "DROP TABLE IF EXISTS test_bm25_score_variant_a1"
+        sql """
+            CREATE TABLE test_bm25_score_variant_a1 (
+                id INT,
+                v variant<
+                    MATCH_NAME 'host' : text,
+                    PROPERTIES("variant_max_subcolumns_count"="0")
+                >,
+                INDEX idx_v_host (v) USING INVERTED PROPERTIES(
+                    "parser"="english",
+                    "support_phrase"="true",
+                    "field_pattern"="host"
+                )
+            ) ENGINE=OLAP DUPLICATE KEY(id)
+            DISTRIBUTED BY HASH(id) BUCKETS 1
+            PROPERTIES (
+                "replication_allocation" = "tag.location.default: 1",
+                "disable_auto_compaction" = "true"
+            )
+        """
+        sql """ insert into test_bm25_score_variant_a1 values
+                (1, '{"host":"alpha database server"}'),
+                (2, '{"host":"beta server cluster"}'),
+                (3, '{"other":"alpha"}')
+        """
+        sql " sync "
+
+        def res = sql """
+            select id, score() as score
+            from test_bm25_score_variant_a1
+            where cast(v["host"] as string) match_phrase "alpha"
+            order by score() desc
+            limit 10
+        """
+        assertEquals(1, res.size())
+        assertEquals(1, res[0][0] as int)
+        assertTrue(Double.parseDouble(res[0][1].toString()) > 0.0)
+    } finally {
+    }
+
+    // C: plain parent inverted index (baseline; not the fallback path)
+    try {
+        sql "DROP TABLE IF EXISTS test_bm25_score_variant_c"
+        sql """
+            CREATE TABLE test_bm25_score_variant_c (
+                id INT,
+                v VARIANT,
+                INDEX idx_v_plain (v) USING INVERTED PROPERTIES(
+                    "parser"="english",
+                    "support_phrase"="true"
+                )
+            ) ENGINE=OLAP DUPLICATE KEY(id)
+            DISTRIBUTED BY HASH(id) BUCKETS 1
+            PROPERTIES (
+                "replication_allocation" = "tag.location.default: 1",
+                "disable_auto_compaction" = "true"
+            )
+        """
+        sql """ insert into test_bm25_score_variant_c values
+                (1, '{"note":"latency spike at noon"}'),
+                (2, '{"note":"all green"}')
+        """
+        sql " sync "
+
+        def res = sql """
+            select id, score() as score
+            from test_bm25_score_variant_c
+            where cast(v["note"] as string) match_phrase "latency"
+            order by score() desc
+            limit 10
+        """
+        assertEquals(1, res.size())
+        assertEquals(1, res[0][0] as int)
+        assertTrue(Double.parseDouble(res[0][1].toString()) > 0.0)
+    } finally {
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to