This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new f77e7b5f1a8 [fix](search) Fix MATCH_ALL_DOCS query failing in
multi-field search mode (#60784)
f77e7b5f1a8 is described below
commit f77e7b5f1a81802722442a7ff250ce7dab544e73
Author: Jack <[email protected]>
AuthorDate: Thu Feb 19 15:33:11 2026 +0800
[fix](search) Fix MATCH_ALL_DOCS query failing in multi-field search mode
(#60784)
### What problem does this PR solve?
Issue Number: close
Problem Summary:
When using `search('*', ...)` with multi-field options (`fields`
parameter), the query fails with:
```
only inverted index queries are supported
```
The root cause is in `SearchDslParser.java`: the multi-field parsing
methods (`parseDslMultiFieldMode` and `parseDslMultiFieldLuceneMode`)
collect field bindings by calling `collectFieldNames()` on the expanded
AST. When the query is `*` (match all), the AST node is `MATCH_ALL_DOCS`
which has no field set — by design it matches all documents regardless
of field. This caused `collectFieldNames` to return an empty set,
resulting in no field bindings. Without field bindings,
`RewriteSearchToSlots` couldn't create slot references, so the search
expression was never pushed down to the inverted index path, and BE fell
back to `execute_impl()` which returns the error.
**Fix**: After `collectFieldNames()`, if the result is empty, fall back
to using the original `fields` list as field bindings. This ensures the
push-down mechanism works for `MATCH_ALL_DOCS` queries.
**Reproducing queries** (from bug report):
```sql
select count(*) from wikipedia where search('*', '{"fields":["title",
"content"], "type": "best_fields", "default_operator":"AND","mode":"lucene",
"minimum_should_match": 0}');
select count(*) from wikipedia where search('*', '{"default_field":
"title", "default_operator":"AND","mode":"lucene", "minimum_should_match": 0}');
```
---
.../functions/scalar/SearchDslParser.java | 10 +++
.../functions/scalar/SearchDslParserTest.java | 85 ++++++++++++++++++++++
.../data/search/test_search_multi_field.out | 12 +++
.../suites/search/test_search_multi_field.groovy | 30 ++++++++
4 files changed, 137 insertions(+)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
index fbaba2b6e5d..14e55fce15c 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
@@ -453,6 +453,11 @@ public class SearchDslParser {
// Extract field bindings from expanded AST
Set<String> fieldNames = collectFieldNames(expandedRoot);
+ // If no fields were collected (e.g., MATCH_ALL_DOCS query that
matches all docs
+ // regardless of field), use the original fields list to ensure
proper push-down
+ if (fieldNames.isEmpty()) {
+ fieldNames = new LinkedHashSet<>(fields);
+ }
List<QsFieldBinding> bindings = new ArrayList<>();
int slotIndex = 0;
for (String fieldName : fieldNames) {
@@ -533,6 +538,11 @@ public class SearchDslParser {
// Extract field bindings from expanded AST
Set<String> fieldNames = collectFieldNames(expandedRoot);
+ // If no fields were collected (e.g., MATCH_ALL_DOCS query that
matches all docs
+ // regardless of field), use the original fields list to ensure
proper push-down
+ if (fieldNames.isEmpty()) {
+ fieldNames = new LinkedHashSet<>(fields);
+ }
List<QsFieldBinding> bindings = new ArrayList<>();
int slotIndex = 0;
for (String fieldName : fieldNames) {
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
index 214f309bded..704874fa0d9 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
@@ -1949,4 +1949,89 @@ public class SearchDslParserTest {
Assertions.assertTrue(hasTerm, "Should contain TERM node for
'Dollar'");
Assertions.assertTrue(hasMatchAll, "Should contain MATCH_ALL_DOCS node
for '*'");
}
+
+ // ============ Tests for MATCH_ALL_DOCS in multi-field mode ============
+
+ @Test
+ public void testMultiFieldMatchAllDocsBestFieldsLuceneMode() {
+ // Test: "*" with best_fields + lucene mode should produce
MATCH_ALL_DOCS
+ // with field bindings for all specified fields (needed for push-down)
+ String dsl = "*";
+ String options =
"{\"fields\":[\"title\",\"content\"],\"type\":\"best_fields\","
+ +
"\"default_operator\":\"AND\",\"mode\":\"lucene\",\"minimum_should_match\":0}";
+
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.MATCH_ALL_DOCS,
plan.getRoot().getType());
+
+ // Must have field bindings for push-down to work
+ Assertions.assertNotNull(plan.getFieldBindings());
+ Assertions.assertFalse(plan.getFieldBindings().isEmpty(),
+ "MATCH_ALL_DOCS in multi-field mode must have field bindings
for push-down");
+ Assertions.assertEquals(2, plan.getFieldBindings().size());
+
+ // Verify field names
+ java.util.List<String> bindingNames = plan.getFieldBindings().stream()
+
.map(QsFieldBinding::getFieldName).collect(java.util.stream.Collectors.toList());
+ Assertions.assertTrue(bindingNames.contains("title"));
+ Assertions.assertTrue(bindingNames.contains("content"));
+ }
+
+ @Test
+ public void testMultiFieldMatchAllDocsCrossFieldsLuceneMode() {
+ // Test: "*" with cross_fields + lucene mode
+ String dsl = "*";
+ String options =
"{\"fields\":[\"title\",\"content\"],\"type\":\"cross_fields\","
+ +
"\"default_operator\":\"AND\",\"mode\":\"lucene\",\"minimum_should_match\":0}";
+
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.MATCH_ALL_DOCS,
plan.getRoot().getType());
+
+ // Must have field bindings for push-down
+ Assertions.assertNotNull(plan.getFieldBindings());
+ Assertions.assertFalse(plan.getFieldBindings().isEmpty(),
+ "MATCH_ALL_DOCS in multi-field mode must have field bindings
for push-down");
+ Assertions.assertEquals(2, plan.getFieldBindings().size());
+ }
+
+ @Test
+ public void testMultiFieldMatchAllDocsStandardMode() {
+ // Test: "*" with multi-field standard mode (no lucene)
+ String dsl = "*";
+ String options =
"{\"fields\":[\"title\",\"content\"],\"type\":\"best_fields\","
+ + "\"default_operator\":\"AND\"}";
+
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+
+ // Must have field bindings for push-down
+ Assertions.assertNotNull(plan.getFieldBindings());
+ Assertions.assertFalse(plan.getFieldBindings().isEmpty(),
+ "MATCH_ALL_DOCS in multi-field standard mode must have field
bindings for push-down");
+ Assertions.assertEquals(2, plan.getFieldBindings().size());
+ }
+
+ @Test
+ public void testSingleFieldMatchAllDocsLuceneMode() {
+ // Test: "*" with single default_field + lucene mode should have field
binding
+ String dsl = "*";
+ String options =
"{\"default_field\":\"title\",\"default_operator\":\"AND\","
+ + "\"mode\":\"lucene\",\"minimum_should_match\":0}";
+
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.MATCH_ALL_DOCS,
plan.getRoot().getType());
+
+ // Must have field bindings for push-down
+ Assertions.assertNotNull(plan.getFieldBindings());
+ Assertions.assertFalse(plan.getFieldBindings().isEmpty(),
+ "MATCH_ALL_DOCS with default_field must have field bindings
for push-down");
+ Assertions.assertEquals(1, plan.getFieldBindings().size());
+ Assertions.assertEquals("title",
plan.getFieldBindings().get(0).getFieldName());
+ }
}
diff --git a/regression-test/data/search/test_search_multi_field.out
b/regression-test/data/search/test_search_multi_field.out
index e22811bddde..59a901665cc 100644
--- a/regression-test/data/search/test_search_multi_field.out
+++ b/regression-test/data/search/test_search_multi_field.out
@@ -122,3 +122,15 @@
1 machine learning basics
9 machine guide
+-- !multi_field_match_all_best_fields --
+9
+
+-- !multi_field_match_all_cross_fields --
+9
+
+-- !match_all_single_field --
+9
+
+-- !multi_field_match_all_standard --
+9
+
diff --git a/regression-test/suites/search/test_search_multi_field.groovy
b/regression-test/suites/search/test_search_multi_field.groovy
index cc94a31bc14..bd55a874f41 100644
--- a/regression-test/suites/search/test_search_multi_field.groovy
+++ b/regression-test/suites/search/test_search_multi_field.groovy
@@ -294,6 +294,36 @@ suite("test_search_multi_field") {
ORDER BY id
"""
+ // ============ Test 23: MATCH_ALL_DOCS (*) with best_fields + lucene mode
============
+ // Regression test for DORIS-24536: search('*', ...) with multi-field
should not error
+ // "*" is a match-all query that should return all rows
+ qt_multi_field_match_all_best_fields """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ count(*)
+ FROM ${tableName}
+ WHERE search('*',
'{"fields":["title","content"],"type":"best_fields","default_operator":"AND","mode":"lucene","minimum_should_match":0}')
+ """
+
+ // ============ Test 24: MATCH_ALL_DOCS (*) with cross_fields + lucene
mode ============
+ qt_multi_field_match_all_cross_fields """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ count(*)
+ FROM ${tableName}
+ WHERE search('*',
'{"fields":["title","content"],"type":"cross_fields","default_operator":"AND","mode":"lucene","minimum_should_match":0}')
+ """
+
+ // ============ Test 25: MATCH_ALL_DOCS (*) with single default_field +
lucene mode ============
+ qt_match_all_single_field """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ count(*)
+ FROM ${tableName}
+ WHERE search('*',
'{"default_field":"title","default_operator":"AND","mode":"lucene","minimum_should_match":0}')
+ """
+
+ // ============ Test 26: MATCH_ALL_DOCS (*) with best_fields standard mode
(no lucene) ============
+ qt_multi_field_match_all_standard """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ count(*)
+ FROM ${tableName}
+ WHERE search('*',
'{"fields":["title","content"],"type":"best_fields","default_operator":"AND"}')
+ """
+
// Cleanup
sql "DROP TABLE IF EXISTS ${tableName}"
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]