This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 939b1f9097b [refactor](search) Refactor SearchDslParser to
single-phase ANTLR parsing (#60635)
939b1f9097b is described below
commit 939b1f9097b9399077b0318fe99ecd77ce8bff0e
Author: Jack <[email protected]>
AuthorDate: Tue Feb 10 22:24:37 2026 +0800
[refactor](search) Refactor SearchDslParser to single-phase ANTLR parsing
(#60635)
### What problem does this PR solve?
Issue Number: close #None
Related PR: #None
Problem Summary:
The previous two-phase parsing architecture in SearchDslParser had a
critical bug where hand-written string preprocessing was mangling regex
patterns like `/(\\d{1,2}:\\d{2} [AP]M)/`. This refactoring eliminates
the preprocessing phase by making the ANTLR grammar natively support:
- **bareQuery**: Terms without field prefix are now parsed directly by
ANTLR
- **Implicit conjunction**: Space-separated terms use `default_operator`
(AND/OR) via optional `AND` in grammar
- **Lucene mode semantics**: Multi-field expansion properly handles
`OCCUR_BOOLEAN` with new `luceneMode` parameter
- **Explicit field preservation**: Fields not in the expansion list are
preserved correctly
### Release note
Refactor SearchDslParser to single-phase ANTLR parsing, fixing regex
pattern handling in search DSL.
---
.../apache/doris/nereids/search/SearchParser.g4 | 10 +-
.../functions/scalar/SearchDslParser.java | 1359 ++++++++++----------
.../functions/scalar/SearchDslParserTest.java | 315 ++++-
3 files changed, 991 insertions(+), 693 deletions(-)
diff --git
a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchParser.g4
b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchParser.g4
index 0b3b9734883..cc5f6082cd6 100644
--- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchParser.g4
+++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/search/SearchParser.g4
@@ -22,12 +22,18 @@ options { tokenVocab=SearchLexer; }
search : clause EOF ;
clause : orClause ;
orClause : andClause (OR andClause)* ;
-andClause : notClause (AND notClause)* ;
+// AND is optional - space-separated terms use default_operator
+andClause : notClause (AND? notClause)* ;
notClause : NOT atomClause | atomClause ;
-atomClause : LPAREN clause RPAREN | fieldQuery ;
+// Note: fieldQuery is listed before bareQuery so ANTLR prioritizes
field:value over bare value.
+// This ensures "field:term" is parsed as fieldQuery, not bareQuery with
"field" as term.
+atomClause : LPAREN clause RPAREN | fieldQuery | bareQuery ;
// Support for variant subcolumn paths (e.g., field.subcolumn, field.sub1.sub2)
fieldQuery : fieldPath COLON searchValue ;
+
+// Bare query without field prefix - uses default_field
+bareQuery : searchValue ;
fieldPath : fieldSegment (DOT fieldSegment)* ;
fieldSegment : TERM | QUOTED ;
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
index 82cbdcdf240..61e253d710e 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
@@ -144,22 +144,22 @@ public class SearchDslParser {
/**
* Standard mode parsing (original behavior)
+ * Now uses single-phase parsing: ANTLR grammar natively supports bare
queries (without field prefix),
+ * and the visitor fills in the default field.
*/
private static QsPlan parseDslStandardMode(String dsl, String
defaultField, String defaultOperator) {
if (dsl == null || dsl.trim().isEmpty()) {
return new QsPlan(new QsNode(QsClauseType.TERM, "error",
"empty_dsl"), new ArrayList<>());
}
- // Expand simplified DSL if default field is provided
- String expandedDsl = dsl;
- if (defaultField != null && !defaultField.trim().isEmpty()) {
- expandedDsl = expandSimplifiedDsl(dsl.trim(), defaultField.trim(),
- normalizeDefaultOperator(defaultOperator));
- }
+ // Parse original DSL directly - no preprocessing needed
+ // The ANTLR grammar now supports bareQuery (without field prefix)
+ // and QsAstBuilder will use defaultField for bare queries
+ String trimmedDsl = dsl.trim();
try {
// Create ANTLR lexer and parser
- SearchLexer lexer = new
SearchLexer(CharStreams.fromString(expandedDsl));
+ SearchLexer lexer = new
SearchLexer(CharStreams.fromString(trimmedDsl));
CommonTokenStream tokens = new CommonTokenStream(lexer);
SearchParser parser = new SearchParser(tokens);
@@ -184,8 +184,8 @@ public class SearchDslParser {
throw new SearchDslSyntaxException("Invalid search DSL syntax:
parsing returned null");
}
- // Build AST using visitor pattern
- QsAstBuilder visitor = new QsAstBuilder();
+ // Build AST using visitor pattern with defaultField and
defaultOperator for bare queries
+ QsAstBuilder visitor = new QsAstBuilder(defaultField,
defaultOperator);
QsNode root = visitor.visit(tree);
// Extract field bindings
@@ -200,25 +200,30 @@ public class SearchDslParser {
} catch (SearchDslSyntaxException e) {
// Syntax error in DSL - user input issue
- LOG.error("Failed to parse search DSL: '{}' (expanded: '{}')",
dsl, expandedDsl, e);
+ LOG.error("Failed to parse search DSL: '{}' (defaultField={},
defaultOperator={})",
+ dsl, defaultField, defaultOperator, e);
throw new SearchDslSyntaxException("Invalid search DSL: " + dsl +
". " + e.getMessage(), e);
} catch (IllegalArgumentException e) {
// Invalid argument - user input issue
- LOG.error("Invalid argument in search DSL: '{}' (expanded: '{}')",
dsl, expandedDsl, e);
+ LOG.error("Invalid argument in search DSL: '{}' (defaultField={},
defaultOperator={})",
+ dsl, defaultField, defaultOperator, e);
throw new IllegalArgumentException("Invalid search DSL argument: "
+ dsl + ". " + e.getMessage(), e);
} catch (NullPointerException e) {
// Internal error - programming bug
- LOG.error("Internal error (NPE) while parsing search DSL: '{}'
(expanded: '{}')", dsl, expandedDsl, e);
+ LOG.error("Internal error (NPE) while parsing search DSL: '{}'
(defaultField={}, defaultOperator={})",
+ dsl, defaultField, defaultOperator, e);
throw new RuntimeException("Internal error while parsing search
DSL: " + dsl
+ ". This may be a bug. Details: " + e.getMessage(), e);
} catch (IndexOutOfBoundsException e) {
// Internal error - programming bug
- LOG.error("Internal error (IOOB) while parsing search DSL: '{}'
(expanded: '{}')", dsl, expandedDsl, e);
+ LOG.error("Internal error (IOOB) while parsing search DSL: '{}'
(defaultField={}, defaultOperator={})",
+ dsl, defaultField, defaultOperator, e);
throw new RuntimeException("Internal error while parsing search
DSL: " + dsl
+ ". This may be a bug. Details: " + e.getMessage(), e);
} catch (RuntimeException e) {
// Other runtime errors
- LOG.error("Unexpected error while parsing search DSL: '{}'
(expanded: '{}')", dsl, expandedDsl, e);
+ LOG.error("Unexpected error while parsing search DSL: '{}'
(defaultField={}, defaultOperator={})",
+ dsl, defaultField, defaultOperator, e);
throw new RuntimeException("Unexpected error parsing search DSL: "
+ dsl + ". " + e.getMessage(), e);
}
}
@@ -238,283 +243,6 @@ public class SearchDslParser {
+ ". Must be 'and' or 'or'");
}
- /**
- * Expand simplified DSL to full DSL format
- * <p>
- * Examples:
- * - "foo bar" + field="tags" + operator="and" → "tags:ALL(foo bar)"
- * - "foo* bar*" + field="tags" + operator="and" → "tags:foo* AND
tags:bar*"
- * - "foo OR bar" + field="tags" → "tags:foo OR tags:bar"
- * - "EXACT(foo bar)" + field="tags" → "tags:EXACT(foo bar)"
- *
- * @param dsl Simple DSL string
- * @param defaultField Default field name
- * @param defaultOperator "and" or "or"
- * @return Expanded full DSL
- */
- private static String expandSimplifiedDsl(String dsl, String defaultField,
String defaultOperator) {
- // 1. If DSL already contains field names (colon), return as-is
- if (containsFieldReference(dsl)) {
- return dsl;
- }
-
- // 2. Check if DSL starts with a function keyword (EXACT, ANY, ALL, IN)
- if (startsWithFunction(dsl)) {
- return defaultField + ":" + dsl;
- }
-
- // 3. Check for explicit boolean operators in DSL
- if (containsExplicitOperators(dsl)) {
- return addFieldPrefixToOperatorExpression(dsl, defaultField);
- }
-
- // 4. Tokenize and analyze terms
- List<String> terms = tokenizeDsl(dsl);
- if (terms.isEmpty()) {
- return defaultField + ":" + dsl;
- }
-
- // 5. Single term - simple case
- if (terms.size() == 1) {
- return defaultField + ":" + terms.get(0);
- }
-
- // 6. Multiple terms - check for wildcards
- boolean hasWildcard =
terms.stream().anyMatch(SearchDslParser::containsWildcard);
-
- if (hasWildcard) {
- // Wildcards cannot be tokenized - must create separate field
queries
- String operator = "and".equals(defaultOperator) ? " AND " : " OR ";
- return terms.stream()
- .map(term -> defaultField + ":" + term)
- .collect(java.util.stream.Collectors.joining(operator));
- } else {
- // Regular multi-term query - use ANY/ALL
- String clauseType = "and".equals(defaultOperator) ? "ALL" : "ANY";
- return defaultField + ":" + clauseType + "(" + dsl + ")";
- }
- }
-
- /**
- * Check if DSL contains field references (has colon not in quoted strings
or escaped)
- */
- private static boolean containsFieldReference(String dsl) {
- boolean inQuotes = false;
- boolean inRegex = false;
- for (int i = 0; i < dsl.length(); i++) {
- char c = dsl.charAt(i);
- // Handle escape sequences - skip the escaped character
- if (c == '\\' && i + 1 < dsl.length()) {
- i++; // Skip next character (it's escaped)
- continue;
- }
- if (c == '"') {
- inQuotes = !inQuotes;
- } else if (c == '/' && !inQuotes) {
- inRegex = !inRegex;
- } else if (c == ':' && !inQuotes && !inRegex) {
- return true;
- }
- }
- return false;
- }
-
- /**
- * Check if DSL starts with function keywords
- */
- private static boolean startsWithFunction(String dsl) {
- String upper = dsl.toUpperCase();
- return upper.startsWith("EXACT(")
- || upper.startsWith("ANY(")
- || upper.startsWith("ALL(")
- || upper.startsWith("IN(");
- }
-
- /**
- * Check if DSL contains explicit boolean operators (AND/OR/NOT)
- */
- private static boolean containsExplicitOperators(String dsl) {
- // Look for standalone AND/OR/NOT keywords (not part of field names)
- String upper = dsl.toUpperCase();
- return upper.matches(".*\\s+(AND|OR)\\s+.*")
- || upper.matches("^NOT\\s+.*")
- || upper.matches(".*\\s+NOT\\s+.*");
- }
-
- /**
- * Add field prefix to expressions with explicit operators
- * Example: "foo AND bar" → "field:foo AND field:bar"
- * Handles escape sequences properly (e.g., "First\ Value" stays as single
term)
- */
- private static String addFieldPrefixToOperatorExpression(String dsl,
String defaultField) {
- StringBuilder result = new StringBuilder();
- StringBuilder currentTerm = new StringBuilder();
- int i = 0;
-
- while (i < dsl.length()) {
- // Skip whitespace (but not escaped whitespace)
- while (i < dsl.length() && Character.isWhitespace(dsl.charAt(i))) {
- i++;
- }
- if (i >= dsl.length()) {
- break;
- }
-
- // Handle escape sequences - include both backslash and next char
- if (dsl.charAt(i) == '\\' && i + 1 < dsl.length()) {
- currentTerm.append(dsl.charAt(i));
- currentTerm.append(dsl.charAt(i + 1));
- i += 2;
- continue;
- }
-
- // Try to match operators
- String remaining = dsl.substring(i);
- String upperRemaining = remaining.toUpperCase();
-
- if (upperRemaining.startsWith("AND ") ||
upperRemaining.startsWith("AND\t")
- || (upperRemaining.equals("AND") && i + 3 >=
dsl.length())) {
- // Found AND operator
- if (currentTerm.length() > 0) {
- if (result.length() > 0) {
- result.append(" ");
- }
-
result.append(defaultField).append(":").append(currentTerm.toString().trim());
- currentTerm.setLength(0);
- }
- if (result.length() > 0) {
- result.append(" ");
- }
- result.append(dsl.substring(i, i + 3)); // Preserve original
case
- i += 3;
- continue;
- } else if (upperRemaining.startsWith("OR ") ||
upperRemaining.startsWith("OR\t")
- || (upperRemaining.equals("OR") && i + 2 >= dsl.length()))
{
- // Found OR operator
- if (currentTerm.length() > 0) {
- if (result.length() > 0) {
- result.append(" ");
- }
-
result.append(defaultField).append(":").append(currentTerm.toString().trim());
- currentTerm.setLength(0);
- }
- if (result.length() > 0) {
- result.append(" ");
- }
- result.append(dsl.substring(i, i + 2)); // Preserve original
case
- i += 2;
- continue;
- } else if (upperRemaining.startsWith("NOT ") ||
upperRemaining.startsWith("NOT\t")
- || (upperRemaining.equals("NOT") && i + 3 >=
dsl.length())) {
- // Found NOT operator
- if (currentTerm.length() > 0) {
- if (result.length() > 0) {
- result.append(" ");
- }
-
result.append(defaultField).append(":").append(currentTerm.toString().trim());
- currentTerm.setLength(0);
- }
- if (result.length() > 0) {
- result.append(" ");
- }
- result.append(dsl.substring(i, i + 3)); // Preserve original
case
- i += 3;
- continue;
- }
-
- // Not an operator, accumulate term
- currentTerm.append(dsl.charAt(i));
- i++;
- }
-
- // Add last term
- if (currentTerm.length() > 0) {
- if (result.length() > 0) {
- result.append(" ");
- }
-
result.append(defaultField).append(":").append(currentTerm.toString().trim());
- }
-
- return result.toString().trim();
- }
-
- /**
- * Tokenize DSL into terms (split by whitespace, respecting quotes,
escapes, and functions)
- */
- private static List<String> tokenizeDsl(String dsl) {
- List<String> terms = new ArrayList<>();
- StringBuilder currentTerm = new StringBuilder();
- boolean inQuotes = false;
- boolean inParens = false;
- int parenDepth = 0;
-
- for (int i = 0; i < dsl.length(); i++) {
- char c = dsl.charAt(i);
-
- if (c == '"' && (i == 0 || dsl.charAt(i - 1) != '\\')) {
- inQuotes = !inQuotes;
- currentTerm.append(c);
- } else if (c == '(' && !inQuotes) {
- parenDepth++;
- inParens = true;
- currentTerm.append(c);
- } else if (c == ')' && !inQuotes) {
- parenDepth--;
- if (parenDepth == 0) {
- inParens = false;
- }
- currentTerm.append(c);
- } else if (c == '\\' && i + 1 < dsl.length()) {
- // Escape sequence - include both backslash and next char in
term
- currentTerm.append(c);
- currentTerm.append(dsl.charAt(i + 1));
- i++; // Skip next character
- } else if (Character.isWhitespace(c) && !inQuotes && !inParens) {
- // End of term (only if not escaped - handled above)
- if (currentTerm.length() > 0) {
- terms.add(currentTerm.toString());
- currentTerm.setLength(0); // Reuse StringBuilder instead
of creating new one
- }
- } else {
- currentTerm.append(c);
- }
- }
-
- // Add last term
- if (currentTerm.length() > 0) {
- terms.add(currentTerm.toString());
- }
-
- return terms;
- }
-
- /**
- * Check if a term contains wildcard characters (* or ?)
- * Escaped wildcards (\* or \?) are not counted.
- */
- private static boolean containsWildcard(String term) {
- // Ignore wildcards in quoted strings or regex
- if (term.startsWith("\"") && term.endsWith("\"")) {
- return false;
- }
- if (term.startsWith("/") && term.endsWith("/")) {
- return false;
- }
- // Check for unescaped wildcards
- for (int i = 0; i < term.length(); i++) {
- char c = term.charAt(i);
- if (c == '\\' && i + 1 < term.length()) {
- // Skip escaped character
- i++;
- continue;
- }
- if (c == '*' || c == '?') {
- return true;
- }
- }
- return false;
- }
-
// ============ Common Helper Methods ============
/**
@@ -543,6 +271,33 @@ public class SearchDslParser {
}
}
+ /**
+ * Collect all field names from an AST node recursively.
+ * @param node The AST node to collect from
+ * @return Set of field names
+ */
+ private static Set<String> collectFieldNames(QsNode node) {
+ Set<String> fieldNames = new LinkedHashSet<>();
+ collectFieldNamesRecursive(node, fieldNames);
+ return fieldNames;
+ }
+
+ private static void collectFieldNamesRecursive(QsNode node, Set<String>
fieldNames) {
+ if (node == null) {
+ return;
+ }
+ // Add field name if it's a leaf node with a field
+ if (node.getField() != null && !node.getField().isEmpty()) {
+ fieldNames.add(node.getField());
+ }
+ // Recursively collect from children
+ if (node.getChildren() != null) {
+ for (QsNode child : node.getChildren()) {
+ collectFieldNamesRecursive(child, fieldNames);
+ }
+ }
+ }
+
/**
* Common ANTLR parsing helper with visitor pattern.
* Reduces code duplication across parsing methods.
@@ -626,26 +381,6 @@ public class SearchDslParser {
}
}
- /**
- * Expand a single item (term or function) across multiple fields with OR.
- * Example: "hello" + ["title", "content"] -> "(title:hello OR
content:hello)"
- * Example: "EXACT(foo)" + ["title", "content"] -> "(title:EXACT(foo) OR
content:EXACT(foo))"
- *
- * @param item The term or function to expand
- * @param fields List of field names
- * @return Expanded DSL string
- */
- private static String expandItemAcrossFields(String item, List<String>
fields) {
- if (fields.size() == 1) {
- return fields.get(0) + ":" + item;
- }
- return fields.stream()
- .map(field -> field + ":" + item)
- .collect(Collectors.joining(" OR ", "(", ")"));
- }
-
- // ============ Multi-Field Expansion Methods ============
-
/**
* Parse DSL in multi-field mode.
* Expansion behavior depends on the type option:
@@ -665,20 +400,66 @@ public class SearchDslParser {
}
validateFieldsList(fields);
- String normalizedOperator = normalizeDefaultOperator(defaultOperator);
- String expandedDsl;
- if (options.isCrossFieldsMode()) {
- // cross_fields: terms can be across different fields
- expandedDsl = expandMultiFieldDsl(dsl.trim(), fields,
normalizedOperator);
- } else if (options.isBestFieldsMode()) {
- // best_fields: all terms must be in the same field
- expandedDsl = expandMultiFieldDslBestFields(dsl.trim(), fields,
normalizedOperator);
- } else {
- // Should never happen due to setType() validation, but provide
fallback
- throw new IllegalStateException(
- "Invalid type value: '" + options.getType() + "'. Expected
'best_fields' or 'cross_fields'");
- }
- return parseWithVisitor(expandedDsl, parser -> new QsAstBuilder(),
dsl, "multi-field mode");
+ String trimmedDsl = dsl.trim();
+
+ try {
+ // Parse original DSL directly using first field as placeholder
for bare queries
+ // The AST will be expanded to all fields in post-processing
+ SearchLexer lexer = new
SearchLexer(CharStreams.fromString(trimmedDsl));
+ CommonTokenStream tokens = new CommonTokenStream(lexer);
+ SearchParser parser = new SearchParser(tokens);
+
+ parser.removeErrorListeners();
+ parser.addErrorListener(new
org.antlr.v4.runtime.BaseErrorListener() {
+ @Override
+ public void syntaxError(org.antlr.v4.runtime.Recognizer<?, ?>
recognizer,
+ Object offendingSymbol,
+ int line, int charPositionInLine,
+ String msg, org.antlr.v4.runtime.RecognitionException
e) {
+ throw new SearchDslSyntaxException("Syntax error at line "
+ line
+ + ":" + charPositionInLine + " " + msg);
+ }
+ });
+
+ ParseTree tree = parser.search();
+ if (tree == null) {
+ throw new SearchDslSyntaxException("Invalid search DSL syntax:
parsing returned null");
+ }
+
+ // Build AST using first field as placeholder for bare queries,
with default operator
+ QsAstBuilder visitor = new QsAstBuilder(fields.get(0),
defaultOperator);
+ QsNode root = visitor.visit(tree);
+
+ // Apply multi-field expansion based on type
+ QsNode expandedRoot;
+ if (options.isCrossFieldsMode()) {
+ // cross_fields: each term expands to (field1:term OR
field2:term)
+ expandedRoot = MultiFieldExpander.expandCrossFields(root,
fields);
+ } else if (options.isBestFieldsMode()) {
+ // best_fields: entire query copied per field, joined with OR
+ expandedRoot = MultiFieldExpander.expandBestFields(root,
fields);
+ } else {
+ throw new IllegalStateException(
+ "Invalid type value: '" + options.getType() + "'.
Expected 'best_fields' or 'cross_fields'");
+ }
+
+ // Extract field bindings from expanded AST
+ Set<String> fieldNames = collectFieldNames(expandedRoot);
+ List<QsFieldBinding> bindings = new ArrayList<>();
+ int slotIndex = 0;
+ for (String fieldName : fieldNames) {
+ bindings.add(new QsFieldBinding(fieldName, slotIndex++));
+ }
+
+ return new QsPlan(expandedRoot, bindings);
+
+ } catch (SearchDslSyntaxException e) {
+ LOG.error("Failed to parse search DSL in multi-field mode: '{}'",
dsl, e);
+ throw new SearchDslSyntaxException("Invalid search DSL: " + dsl +
". " + e.getMessage(), e);
+ } catch (RuntimeException e) {
+ LOG.error("Unexpected error while parsing search DSL in
multi-field mode: '{}'", dsl, e);
+ throw new RuntimeException("Unexpected error parsing search DSL: "
+ dsl + ". " + e.getMessage(), e);
+ }
}
/**
@@ -699,324 +480,67 @@ public class SearchDslParser {
}
validateFieldsList(fields);
- String normalizedOperator = normalizeDefaultOperator(defaultOperator);
- String expandedDsl;
- if (options.isCrossFieldsMode()) {
- // cross_fields: terms can be across different fields
- expandedDsl = expandMultiFieldDsl(dsl.trim(), fields,
normalizedOperator);
- } else if (options.isBestFieldsMode()) {
- // best_fields: all terms must be in the same field
- expandedDsl = expandMultiFieldDslBestFields(dsl.trim(), fields,
normalizedOperator);
- } else {
- // Should never happen due to setType() validation, but provide
fallback
- throw new IllegalStateException(
- "Invalid type value: '" + options.getType() + "'. Expected
'best_fields' or 'cross_fields'");
- }
- return parseWithVisitor(expandedDsl, parser -> new
QsLuceneModeAstBuilder(options),
- dsl, "multi-field Lucene mode");
- }
-
- /**
- * Expand simplified DSL to multi-field format.
- * Each term without field prefix is expanded to OR across all fields.
- *
- * @param dsl Simple DSL string
- * @param fields List of field names to search
- * @param defaultOperator "and" or "or" for joining term groups
- * @return Expanded full DSL
- */
- private static String expandMultiFieldDsl(String dsl, List<String> fields,
String defaultOperator) {
- // Note: fields validation is done by validateFieldsList() before
calling this method
- if (fields.size() == 1) {
- // Single field - delegate to existing method
- return expandSimplifiedDsl(dsl, fields.get(0), defaultOperator);
- }
-
- // 1. If DSL already contains field names, handle mixed case
- if (containsFieldReference(dsl)) {
- return expandOperatorExpressionAcrossFields(dsl, fields);
- }
-
- // 2. Check if DSL starts with a function keyword (EXACT, ANY, ALL, IN)
- if (startsWithFunction(dsl)) {
- // Expand function across fields: EXACT(foo) -> (f1:EXACT(foo) OR
f2:EXACT(foo))
- return expandFunctionAcrossFields(dsl, fields);
- }
-
- // 3. Check for explicit boolean operators in DSL
- if (containsExplicitOperators(dsl)) {
- return expandOperatorExpressionAcrossFields(dsl, fields);
- }
-
- // 4. Tokenize and analyze terms
- List<String> terms = tokenizeDsl(dsl);
- if (terms.isEmpty()) {
- return expandTermAcrossFields(dsl, fields);
- }
-
- // 5. Single term - expand across fields
- if (terms.size() == 1) {
- return expandTermAcrossFields(terms.get(0), fields);
- }
-
- // 6. Multiple terms - expand each across fields, join with operator
- String joinOperator = "and".equals(defaultOperator) ? " AND " : " OR ";
- StringBuilder result = new StringBuilder();
- for (int i = 0; i < terms.size(); i++) {
- if (i > 0) {
- result.append(joinOperator);
- }
- result.append(expandTermAcrossFields(terms.get(i), fields));
- }
- return result.toString();
- }
-
- /**
- * Expand multi-field DSL using best_fields semantics.
- * Each field is wrapped with all terms joined by the default operator,
then fields are ORed.
- *
- * Example: "machine learning" with fields ["title", "content"] and
default_operator "and"
- * Result: (title:machine AND title:learning) OR (content:machine AND
content:learning)
- *
- * @param dsl Simple DSL string
- * @param fields List of field names to search
- * @param defaultOperator "and" or "or" for joining terms within each field
- * @return Expanded full DSL with best_fields semantics
- */
- private static String expandMultiFieldDslBestFields(String dsl,
List<String> fields,
- String defaultOperator) {
- // Note: fields validation is done by validateFieldsList() before
calling this method
- if (fields.size() == 1) {
- // Single field - delegate to existing method
- return expandSimplifiedDsl(dsl, fields.get(0), defaultOperator);
- }
-
- // 1. Check for leading NOT - must use cross_fields semantics for
correct negation
- // "NOT hello" should expand to "NOT (title:hello OR content:hello)"
- // rather than "(NOT title:hello) OR (NOT content:hello)" which has
wrong semantics
String trimmedDsl = dsl.trim();
- if (trimmedDsl.toUpperCase().startsWith("NOT ")
- || trimmedDsl.toUpperCase().startsWith("NOT\t")) {
- // Use cross_fields expansion for leading NOT
- return expandOperatorExpressionAcrossFields(dsl, fields);
- }
-
- // 2. If DSL contains field references or explicit operators, apply
best_fields
- // by expanding the entire expression per field and ORing the results
- if (containsFieldReference(dsl) || containsExplicitOperators(dsl)) {
- return expandOperatorExpressionAcrossFieldsBestFields(dsl, fields,
defaultOperator);
- }
-
- // 3. Check if DSL starts with a function keyword (EXACT, ANY, ALL, IN)
- if (startsWithFunction(dsl)) {
- // For functions, use cross_fields approach (function applied to
each field)
- return expandFunctionAcrossFields(dsl, fields);
- }
-
- // 4. Tokenize and analyze terms
- List<String> terms = tokenizeDsl(dsl);
- if (terms.isEmpty()) {
- // Single term case - expand across fields with OR
- return expandTermAcrossFields(dsl, fields);
- }
-
- // 5. Single term - expand across fields with OR
- if (terms.size() == 1) {
- return expandTermAcrossFields(terms.get(0), fields);
- }
-
- // 6. Multiple terms - best_fields: each field with all terms, then OR
across fields
- String termOperator = "and".equals(defaultOperator) ? " AND " : " OR ";
- StringBuilder result = new StringBuilder();
- for (int fieldIdx = 0; fieldIdx < fields.size(); fieldIdx++) {
- if (fieldIdx > 0) {
- result.append(" OR ");
- }
+ try {
+ // Parse original DSL directly using first field as placeholder
for bare queries
+ SearchLexer lexer = new
SearchLexer(CharStreams.fromString(trimmedDsl));
+ CommonTokenStream tokens = new CommonTokenStream(lexer);
+ SearchParser parser = new SearchParser(tokens);
- String field = fields.get(fieldIdx);
- // Build: (field:term1 AND field:term2 AND ...)
- result.append("(");
- for (int termIdx = 0; termIdx < terms.size(); termIdx++) {
- if (termIdx > 0) {
- result.append(termOperator);
+ parser.removeErrorListeners();
+ parser.addErrorListener(new
org.antlr.v4.runtime.BaseErrorListener() {
+ @Override
+ public void syntaxError(org.antlr.v4.runtime.Recognizer<?, ?>
recognizer,
+ Object offendingSymbol,
+ int line, int charPositionInLine,
+ String msg, org.antlr.v4.runtime.RecognitionException
e) {
+ throw new SearchDslSyntaxException("Syntax error at line "
+ line
+ + ":" + charPositionInLine + " " + msg);
}
- result.append(field).append(":").append(terms.get(termIdx));
- }
- result.append(")");
- }
- return result.toString();
- }
-
- /**
- * Handle DSL with explicit operators using best_fields semantics.
- * For complex expressions, we group by field and OR across fields.
- */
- private static String
expandOperatorExpressionAcrossFieldsBestFields(String dsl,
- List<String> fields, String defaultOperator) {
- // For expressions with explicit operators, we apply the entire
expression to each field
- // and OR the results: (title:expr) OR (content:expr)
- StringBuilder result = new StringBuilder();
- for (int i = 0; i < fields.size(); i++) {
- if (i > 0) {
- result.append(" OR ");
- }
- String field = fields.get(i);
- // Expand the DSL for this single field
- String fieldDsl = expandSimplifiedDsl(dsl, field, defaultOperator);
- result.append("(").append(fieldDsl).append(")");
- }
- return result.toString();
- }
-
- /**
- * Expand a single term across multiple fields with OR.
- * Example: "hello" + ["title", "content"] -> "(title:hello OR
content:hello)"
- * Delegates to expandItemAcrossFields for DRY compliance.
- */
- private static String expandTermAcrossFields(String term, List<String>
fields) {
- return expandItemAcrossFields(term, fields);
- }
-
- /**
- * Expand a function call across multiple fields.
- * Example: "EXACT(foo bar)" + ["title", "content"] -> "(title:EXACT(foo
bar) OR content:EXACT(foo bar))"
- * Delegates to expandItemAcrossFields for DRY compliance.
- */
- private static String expandFunctionAcrossFields(String dsl, List<String>
fields) {
- return expandItemAcrossFields(dsl, fields);
- }
-
- /**
- * Handle DSL with explicit operators (AND/OR/NOT).
- * Each operand without field prefix is expanded across fields.
- * Example: "hello AND world" + ["title", "content"] ->
- * "(title:hello OR content:hello) AND (title:world OR
content:world)"
- */
- private static String expandOperatorExpressionAcrossFields(String dsl,
List<String> fields) {
- StringBuilder result = new StringBuilder();
- StringBuilder currentTerm = new StringBuilder();
- int i = 0;
-
- while (i < dsl.length()) {
- // Skip whitespace
- while (i < dsl.length() && Character.isWhitespace(dsl.charAt(i))) {
- i++;
- }
- if (i >= dsl.length()) {
- break;
- }
-
- // Handle escape sequences
- if (dsl.charAt(i) == '\\' && i + 1 < dsl.length()) {
- currentTerm.append(dsl.charAt(i));
- currentTerm.append(dsl.charAt(i + 1));
- i += 2;
- continue;
- }
+ });
- // Handle parentheses - include entire group as a term
- if (dsl.charAt(i) == '(') {
- int depth = 1;
- currentTerm.append('(');
- i++;
- while (i < dsl.length() && depth > 0) {
- char c = dsl.charAt(i);
- if (c == '(') {
- depth++;
- } else if (c == ')') {
- depth--;
- }
- currentTerm.append(c);
- i++;
- }
- continue;
+ ParseTree tree = parser.search();
+ if (tree == null) {
+ throw new SearchDslSyntaxException("Invalid search DSL syntax:
parsing returned null");
}
- // Try to match operators
- String remaining = dsl.substring(i);
- String upperRemaining = remaining.toUpperCase();
-
- // Check for AND operator
- if (matchesOperatorWord(upperRemaining, "AND")) {
- flushTermAcrossFields(result, currentTerm, fields);
- appendWithSpace(result, "AND");
- i += 3;
- continue;
- }
+ // Build AST using Lucene-mode visitor with first field as
placeholder for bare queries
+ // Use constructor with override to avoid mutating shared options
object (thread-safety)
+ QsLuceneModeAstBuilder visitor = new
QsLuceneModeAstBuilder(options, fields.get(0));
+ QsNode root = visitor.visit(tree);
- // Check for OR operator
- if (matchesOperatorWord(upperRemaining, "OR")) {
- flushTermAcrossFields(result, currentTerm, fields);
- appendWithSpace(result, "OR");
- i += 2;
- continue;
+ // Apply multi-field expansion based on type
+ // Pass luceneMode=true since this is Lucene mode parsing
+ QsNode expandedRoot;
+ if (options.isCrossFieldsMode()) {
+ // cross_fields: each term expands to
OCCUR_BOOLEAN(field1:term, field2:term)
+ expandedRoot = MultiFieldExpander.expandCrossFields(root,
fields, true);
+ } else if (options.isBestFieldsMode()) {
+ // best_fields: entire query copied per field, joined with
OCCUR_BOOLEAN
+ expandedRoot = MultiFieldExpander.expandBestFields(root,
fields, true);
+ } else {
+ throw new IllegalStateException(
+ "Invalid type value: '" + options.getType() + "'.
Expected 'best_fields' or 'cross_fields'");
}
- // Check for NOT operator
- if (matchesOperatorWord(upperRemaining, "NOT")) {
- flushTermAcrossFields(result, currentTerm, fields);
- appendWithSpace(result, "NOT");
- i += 3;
- continue;
+ // Extract field bindings from expanded AST
+ Set<String> fieldNames = collectFieldNames(expandedRoot);
+ List<QsFieldBinding> bindings = new ArrayList<>();
+ int slotIndex = 0;
+ for (String fieldName : fieldNames) {
+ bindings.add(new QsFieldBinding(fieldName, slotIndex++));
}
- // Accumulate term character
- currentTerm.append(dsl.charAt(i));
- i++;
- }
-
- // Flush final term
- flushTermAcrossFields(result, currentTerm, fields);
-
- return result.toString().trim();
- }
-
- /**
- * Check if the string starts with an operator word followed by whitespace
or end of string.
- */
- private static boolean matchesOperatorWord(String upper, String op) {
- if (!upper.startsWith(op)) {
- return false;
- }
- int opLen = op.length();
- // Must be followed by whitespace or end of string
- return upper.length() == opLen ||
Character.isWhitespace(upper.charAt(opLen));
- }
+ return new QsPlan(expandedRoot, bindings);
- /**
- * Flush accumulated term, expanding across fields if needed.
- */
- private static void flushTermAcrossFields(StringBuilder result,
StringBuilder term, List<String> fields) {
- String trimmed = term.toString().trim();
- if (!trimmed.isEmpty()) {
- // Check if term already has a field reference
- if (containsFieldReference(trimmed)) {
- appendWithSpace(result, trimmed);
- } else if (trimmed.startsWith("(") && trimmed.endsWith(")")) {
- // Parenthesized expression - recursively expand
- String inner = trimmed.substring(1, trimmed.length() -
1).trim();
- String expanded = expandOperatorExpressionAcrossFields(inner,
fields);
- appendWithSpace(result, "(" + expanded + ")");
- } else if (startsWithFunction(trimmed)) {
- // Function - expand across fields
- appendWithSpace(result, expandFunctionAcrossFields(trimmed,
fields));
- } else {
- // Regular term - expand across fields
- appendWithSpace(result, expandTermAcrossFields(trimmed,
fields));
- }
- term.setLength(0);
- }
- }
-
- /**
- * Append text to StringBuilder with a leading space if not empty.
- */
- private static void appendWithSpace(StringBuilder sb, String text) {
- if (sb.length() > 0) {
- sb.append(" ");
+ } catch (SearchDslSyntaxException e) {
+ LOG.error("Failed to parse search DSL in multi-field Lucene mode:
'{}'", dsl, e);
+ throw new SearchDslSyntaxException("Invalid search DSL: " + dsl +
". " + e.getMessage(), e);
+ } catch (RuntimeException e) {
+ LOG.error("Unexpected error while parsing search DSL in
multi-field Lucene mode: '{}'", dsl, e);
+ throw new RuntimeException("Unexpected error parsing search DSL: "
+ dsl + ". " + e.getMessage(), e);
}
- sb.append(text);
}
/**
@@ -1065,6 +589,50 @@ public class SearchDslParser {
private final Set<String> fieldNames = new LinkedHashSet<>();
// Context stack to track current field name during parsing
private String currentFieldName = null;
+ // Default field for bare queries (without field: prefix)
+ private final String defaultField;
+ // Default operator for implicit conjunction (space-separated terms):
"AND" or "OR"
+ private final String defaultOperator;
+
+ /**
+ * Creates a QsAstBuilder with no default field.
+ * Bare queries will throw an error.
+ */
+ public QsAstBuilder() {
+ this.defaultField = null;
+ this.defaultOperator = "OR";
+ }
+
+ /**
+ * Creates a QsAstBuilder with a default field for bare queries.
+ * @param defaultField The field to use for queries without explicit
field prefix
+ */
+ public QsAstBuilder(String defaultField) {
+ this.defaultField = defaultField;
+ this.defaultOperator = "OR";
+ }
+
+ /**
+ * Creates a QsAstBuilder with default field and default operator.
+ * @param defaultField The field to use for queries without explicit
field prefix
+ * @param defaultOperator The operator to use for implicit conjunction
("AND" or "OR")
+ * @throws IllegalArgumentException if defaultOperator is not null and
not "and" or "or"
+ */
+ public QsAstBuilder(String defaultField, String defaultOperator) {
+ this.defaultField = defaultField;
+ // Validate default operator
+ if (defaultOperator != null && !defaultOperator.trim().isEmpty()) {
+ String normalized = defaultOperator.trim().toUpperCase();
+ if (!"AND".equals(normalized) && !"OR".equals(normalized)) {
+ throw new IllegalArgumentException(
+ "Invalid default operator: '" + defaultOperator
+ + "'. Must be 'and' or 'or'");
+ }
+ this.defaultOperator = normalized;
+ } else {
+ this.defaultOperator = "OR"; // Default to OR
+ }
+ }
public Set<String> getFieldNames() {
return Collections.unmodifiableSet(fieldNames);
@@ -1118,7 +686,22 @@ public class SearchDslParser {
}
children.add(child);
}
- return new QsNode(QsClauseType.AND, children);
+
+ // Check if there are explicit AND tokens
+ // If no explicit AND tokens, use the default operator for
implicit conjunction
+ List<org.antlr.v4.runtime.tree.TerminalNode> andTokens = ctx.AND();
+ boolean hasExplicitAnd = andTokens != null && !andTokens.isEmpty();
+
+ QsClauseType clauseType;
+ if (hasExplicitAnd) {
+ // Explicit AND - always use AND
+ clauseType = QsClauseType.AND;
+ } else {
+ // Implicit conjunction - use default operator
+ clauseType = "AND".equalsIgnoreCase(defaultOperator) ?
QsClauseType.AND : QsClauseType.OR;
+ }
+
+ return new QsNode(clauseType, children);
}
@Override
@@ -1149,14 +732,51 @@ public class SearchDslParser {
}
return result;
}
- if (ctx.fieldQuery() == null) {
- throw new RuntimeException("Invalid atom clause: missing field
query");
+ if (ctx.fieldQuery() != null) {
+ QsNode result = visit(ctx.fieldQuery());
+ if (result == null) {
+ throw new RuntimeException("Invalid field query");
+ }
+ return result;
}
- QsNode result = visit(ctx.fieldQuery());
- if (result == null) {
- throw new RuntimeException("Invalid field query");
+ if (ctx.bareQuery() != null) {
+ QsNode result = visit(ctx.bareQuery());
+ if (result == null) {
+ throw new RuntimeException("Invalid bare query");
+ }
+ return result;
+ }
+ throw new RuntimeException("Invalid atom clause: missing field or
bare query");
+ }
+
+ @Override
+ public QsNode visitBareQuery(SearchParser.BareQueryContext ctx) {
+ // Bare query - uses default field
+ if (defaultField == null || defaultField.isEmpty()) {
+ throw new SearchDslSyntaxException(
+ "No field specified and no default_field configured. "
+ + "Either use field:value syntax or set default_field in
options.");
+ }
+
+ fieldNames.add(defaultField);
+
+ // Set current field context to default field before visiting
search value
+ String previousFieldName = currentFieldName;
+ currentFieldName = defaultField;
+
+ try {
+ if (ctx.searchValue() == null) {
+ throw new RuntimeException("Invalid bare query: missing
search value");
+ }
+ QsNode result = visit(ctx.searchValue());
+ if (result == null) {
+ throw new RuntimeException("Invalid search value");
+ }
+ return result;
+ } finally {
+ // Restore previous context
+ currentFieldName = previousFieldName;
}
- return result;
}
@Override
@@ -1340,7 +960,16 @@ public class SearchDslParser {
private String getCurrentFieldName() {
// Use the current field name from parsing context
- return currentFieldName != null ? currentFieldName : "_all";
+ if (currentFieldName != null) {
+ return currentFieldName;
+ }
+ // Fall back to default field if set
+ if (defaultField != null && !defaultField.isEmpty()) {
+ return defaultField;
+ }
+ // This should not happen if visitBareQuery is called correctly
+ throw new SearchDslSyntaxException(
+ "No field name available. This indicates a parsing error.");
}
private String stripOuterQuotes(String text) {
@@ -1642,6 +1271,277 @@ public class SearchDslParser {
}
}
+ /**
+ * Multi-field AST expander.
+ * Transforms AST nodes with bareQuery (using placeholder field) into
proper multi-field queries.
+ * Supports two strategies:
+ * - cross_fields: each term expands to (field1:term OR field2:term)
+ * - best_fields: entire query copied per field, joined with OR
+ */
+ private static class MultiFieldExpander {
+
+ /**
+ * Expand AST using cross_fields strategy.
+ * Each leaf node becomes OR of that node across all fields.
+ * Example: "hello AND world" with fields=[title,content] becomes
+ * (title:hello OR content:hello) AND (title:world OR content:world)
+ *
+ * @param root The AST root node
+ * @param fields List of fields to expand across
+ * @return Expanded AST
+ */
+ public static QsNode expandCrossFields(QsNode root, List<String>
fields) {
+ return expandCrossFields(root, fields, false);
+ }
+
+ /**
+ * Expand AST using cross_fields strategy with optional Lucene mode.
+ */
+ public static QsNode expandCrossFields(QsNode root, List<String>
fields, boolean luceneMode) {
+ if (fields == null || fields.isEmpty()) {
+ return root;
+ }
+ if (fields.size() == 1) {
+ // Single field - just set the field on all leaf nodes
+ return setFieldOnLeaves(root, fields.get(0), fields);
+ }
+ return expandNodeCrossFields(root, fields, luceneMode);
+ }
+
+ /**
+ * Expand AST using best_fields strategy.
+ * Entire query is copied for each field, joined with OR.
+ * Example: "hello AND world" with fields=[title,content] becomes
+ * (title:hello AND title:world) OR (content:hello AND content:world)
+ *
+ * @param root The AST root node
+ * @param fields List of fields to expand across
+ * @return Expanded AST
+ */
+ public static QsNode expandBestFields(QsNode root, List<String>
fields) {
+ return expandBestFields(root, fields, false);
+ }
+
+ /**
+ * Expand AST using best_fields strategy with optional Lucene mode.
+ * @param root The AST root node
+ * @param fields List of fields to expand across
+ * @param luceneMode If true, use Lucene-style OCCUR_BOOLEAN; if
false, use standard OR
+ */
+ public static QsNode expandBestFields(QsNode root, List<String>
fields, boolean luceneMode) {
+ if (fields == null || fields.isEmpty()) {
+ return root;
+ }
+ if (fields.size() == 1) {
+ // Single field - just set the field on all leaf nodes
+ return setFieldOnLeaves(root, fields.get(0), fields);
+ }
+
+ // Use the explicit luceneMode parameter only - don't infer from
node properties
+ boolean isLuceneMode = luceneMode;
+
+ // Create a copy of the entire AST for each field
+ List<QsNode> fieldTrees = new ArrayList<>();
+ for (String field : fields) {
+ QsNode copy = deepCopyWithField(root, field, fields);
+ // In Lucene mode, set SHOULD on each field tree
+ if (isLuceneMode) {
+ copy.setOccur(QsOccur.SHOULD);
+ }
+ fieldTrees.add(copy);
+ }
+
+ // In Lucene mode, create OCCUR_BOOLEAN instead of OR
+ if (isLuceneMode) {
+ // Preserve minimum_should_match from root if it has one
+ Integer minShouldMatch = root.getMinimumShouldMatch();
+ if (minShouldMatch == null) {
+ // Default: at least 1 field should match
+ minShouldMatch = 1;
+ }
+ return new QsNode(QsClauseType.OCCUR_BOOLEAN, fieldTrees,
minShouldMatch);
+ } else {
+ // Standard mode: join with OR
+ return new QsNode(QsClauseType.OR, fieldTrees);
+ }
+ }
+
+ /**
+ * Recursively expand a node using cross_fields strategy.
+ * Always returns a new copy or new node structure, never the original
node.
+ */
+ private static QsNode expandNodeCrossFields(QsNode node, List<String>
fields, boolean luceneMode) {
+ // Check if this is a leaf node (no children)
+ if (isLeafNode(node)) {
+ // Check if the node has an explicit field that's NOT in the
fields list
+ // If so, don't expand but still return a copy
+ String nodeField = node.getField();
+ if (nodeField != null && !nodeField.isEmpty() &&
!fields.contains(nodeField)) {
+ // Explicit field not in expansion list - return a copy
preserving all fields
+ return new QsNode(
+ node.getType(),
+ node.getField(),
+ node.getValue(),
+ null,
+ node.getOccur(),
+ node.getMinimumShouldMatch()
+ );
+ }
+
+ // Expand leaf node across all fields
+ List<QsNode> expandedNodes = new ArrayList<>();
+ for (String field : fields) {
+ // Create complete copy with new field
+ QsNode copy = new QsNode(
+ node.getType(),
+ field,
+ node.getValue(),
+ null,
+ luceneMode ? QsOccur.SHOULD : null, // In Lucene
mode, set SHOULD
+ node.getMinimumShouldMatch()
+ );
+ expandedNodes.add(copy);
+ }
+
+ // In Lucene mode, create OCCUR_BOOLEAN with parent occur
+ // Otherwise create OR node
+ if (luceneMode) {
+ QsNode result = new QsNode(QsClauseType.OCCUR_BOOLEAN,
expandedNodes, null);
+ if (node.getOccur() != null) {
+ result.setOccur(node.getOccur());
+ }
+ return result;
+ } else {
+ return new QsNode(QsClauseType.OR, expandedNodes);
+ }
+ }
+
+ // Compound node - recursively expand children
+ List<QsNode> expandedChildren = new ArrayList<>();
+ if (node.getChildren() != null) {
+ for (QsNode child : node.getChildren()) {
+ expandedChildren.add(expandNodeCrossFields(child, fields,
luceneMode));
+ }
+ }
+
+ // Create new compound node with expanded children (always a copy)
+ QsNode result = new QsNode(
+ node.getType(),
+ node.getField(),
+ node.getValue(),
+ expandedChildren,
+ node.getOccur(),
+ node.getMinimumShouldMatch()
+ );
+ return result;
+ }
+
+ /**
+ * Check if a node is a leaf node (no children, representing a
term/phrase/etc.)
+ * A leaf node has no children or empty children list, regardless of
whether it has a value.
+ */
+ private static boolean isLeafNode(QsNode node) {
+ return node.getChildren() == null || node.getChildren().isEmpty();
+ }
+
+ /**
+ * Deep copy an AST node and set the field on leaf nodes.
+ * Preserves explicit fields that are not in the fields list.
+ * Always returns a new copy, never the original node.
+ */
+ private static QsNode deepCopyWithField(QsNode node, String field,
List<String> fields) {
+ if (isLeafNode(node)) {
+ // Check if the node has an explicit field that's NOT in the
fields list
+ String nodeField = node.getField();
+ String targetField;
+ if (nodeField != null && !nodeField.isEmpty() &&
!fields.contains(nodeField)) {
+ // Explicit field not in expansion list - preserve
original field
+ targetField = nodeField;
+ } else {
+ // Use new field
+ targetField = field;
+ }
+
+ // Create a complete copy of the leaf node
+ QsNode copy = new QsNode(
+ node.getType(),
+ targetField,
+ node.getValue(),
+ null, // children
+ node.getOccur(),
+ node.getMinimumShouldMatch()
+ );
+ return copy;
+ }
+
+ // Compound node - recursively copy children
+ List<QsNode> copiedChildren = new ArrayList<>();
+ if (node.getChildren() != null) {
+ for (QsNode child : node.getChildren()) {
+ copiedChildren.add(deepCopyWithField(child, field,
fields));
+ }
+ }
+
+ // Create a complete copy of the compound node
+ QsNode result = new QsNode(
+ node.getType(),
+ node.getField(),
+ node.getValue(),
+ copiedChildren,
+ node.getOccur(),
+ node.getMinimumShouldMatch()
+ );
+ return result;
+ }
+
+ /**
+ * Set field on leaf nodes (for single-field case).
+ * Preserves explicit fields that are different from the target field.
+ * Always returns a new copy, never the original node.
+ */
+ private static QsNode setFieldOnLeaves(QsNode node, String field,
List<String> fields) {
+ if (isLeafNode(node)) {
+ // Check if the node has an explicit field that's NOT in the
fields list
+ String nodeField = node.getField();
+ String targetField;
+ if (nodeField != null && !nodeField.isEmpty() &&
!fields.contains(nodeField)) {
+ // Explicit field not in expansion list - preserve
original field
+ targetField = nodeField;
+ } else {
+ targetField = field;
+ }
+
+ // Create complete copy
+ return new QsNode(
+ node.getType(),
+ targetField,
+ node.getValue(),
+ null,
+ node.getOccur(),
+ node.getMinimumShouldMatch()
+ );
+ }
+
+ // Compound node - recursively process children
+ List<QsNode> updatedChildren = new ArrayList<>();
+ if (node.getChildren() != null) {
+ for (QsNode child : node.getChildren()) {
+ updatedChildren.add(setFieldOnLeaves(child, field,
fields));
+ }
+ }
+
+ // Create complete copy
+ return new QsNode(
+ node.getType(),
+ node.getField(),
+ node.getValue(),
+ updatedChildren,
+ node.getOccur(),
+ node.getMinimumShouldMatch()
+ );
+ }
+ }
+
/**
* Search options parsed from JSON.
* Supports all configuration in a single JSON object:
@@ -1851,16 +1751,14 @@ public class SearchDslParser {
return new QsPlan(new QsNode(QsClauseType.TERM, "error",
"empty_dsl"), new ArrayList<>());
}
- // Expand simplified DSL if default field is provided
- String expandedDsl = dsl;
- if (defaultField != null && !defaultField.trim().isEmpty()) {
- expandedDsl = expandSimplifiedDsl(dsl.trim(), defaultField.trim(),
- normalizeDefaultOperator(defaultOperator));
- }
+ // Parse original DSL directly - no preprocessing needed
+ // The ANTLR grammar now supports bareQuery (without field prefix)
+ // and QsLuceneModeAstBuilder will use defaultField from options for
bare queries
+ String trimmedDsl = dsl.trim();
try {
// Create ANTLR lexer and parser
- SearchLexer lexer = new
SearchLexer(CharStreams.fromString(expandedDsl));
+ SearchLexer lexer = new
SearchLexer(CharStreams.fromString(trimmedDsl));
CommonTokenStream tokens = new CommonTokenStream(lexer);
SearchParser parser = new SearchParser(tokens);
@@ -1899,28 +1797,25 @@ public class SearchDslParser {
} catch (SearchDslSyntaxException e) {
// Syntax error in DSL - user input issue
- LOG.error("Failed to parse search DSL in Lucene mode: '{}'
(expanded: '{}')", dsl, expandedDsl, e);
+ LOG.error("Failed to parse search DSL in Lucene mode: '{}'", dsl,
e);
throw new SearchDslSyntaxException("Invalid search DSL: " + dsl +
". " + e.getMessage(), e);
} catch (IllegalArgumentException e) {
// Invalid argument - user input issue
- LOG.error("Invalid argument in search DSL (Lucene mode): '{}'
(expanded: '{}')", dsl, expandedDsl, e);
+ LOG.error("Invalid argument in search DSL (Lucene mode): '{}'",
dsl, e);
throw new IllegalArgumentException("Invalid search DSL argument: "
+ dsl + ". " + e.getMessage(), e);
} catch (NullPointerException e) {
// Internal error - programming bug
- LOG.error("Internal error (NPE) while parsing search DSL in Lucene
mode: '{}' (expanded: '{}')",
- dsl, expandedDsl, e);
+ LOG.error("Internal error (NPE) while parsing search DSL in Lucene
mode: '{}'", dsl, e);
throw new RuntimeException("Internal error while parsing search
DSL: " + dsl
+ ". This may be a bug. Details: " + e.getMessage(), e);
} catch (IndexOutOfBoundsException e) {
// Internal error - programming bug
- LOG.error("Internal error (IOOB) while parsing search DSL in
Lucene mode: '{}' (expanded: '{}')",
- dsl, expandedDsl, e);
+ LOG.error("Internal error (IOOB) while parsing search DSL in
Lucene mode: '{}'", dsl, e);
throw new RuntimeException("Internal error while parsing search
DSL: " + dsl
+ ". This may be a bug. Details: " + e.getMessage(), e);
} catch (RuntimeException e) {
// Other runtime errors
- LOG.error("Unexpected error while parsing search DSL in Lucene
mode: '{}' (expanded: '{}')",
- dsl, expandedDsl, e);
+ LOG.error("Unexpected error while parsing search DSL in Lucene
mode: '{}'", dsl, e);
throw new RuntimeException("Unexpected error parsing search DSL: "
+ dsl + ". " + e.getMessage(), e);
}
}
@@ -1934,9 +1829,33 @@ public class SearchDslParser {
private final Set<String> fieldNames = new LinkedHashSet<>();
private final SearchOptions options;
private String currentFieldName = null;
+ // Override for default field - used in multi-field mode to avoid
mutating options
+ private final String overrideDefaultField;
public QsLuceneModeAstBuilder(SearchOptions options) {
this.options = options;
+ this.overrideDefaultField = null;
+ }
+
+ /**
+ * Constructor with override default field for multi-field mode.
+ * This avoids mutating the shared SearchOptions object.
+ * @param options Search options
+ * @param overrideDefaultField Field to use as default instead of
options.getDefaultField()
+ */
+ public QsLuceneModeAstBuilder(SearchOptions options, String
overrideDefaultField) {
+ this.options = options;
+ this.overrideDefaultField = overrideDefaultField;
+ }
+
+ /**
+ * Get the effective default field, considering override.
+ */
+ private String getEffectiveDefaultField() {
+ if (overrideDefaultField != null &&
!overrideDefaultField.isEmpty()) {
+ return overrideDefaultField;
+ }
+ return options != null ? options.getDefaultField() : null;
}
public Set<String> getFieldNames() {
@@ -2054,36 +1973,76 @@ public class SearchDslParser {
QsOccur defaultOccur, boolean introducedByOr) {
List<SearchParser.NotClauseContext> notClauses = ctx.notClause();
+ // Determine how to handle implicit operators
+ String defaultOperator = options.getDefaultOperator();
+ boolean useAndForImplicit =
"AND".equalsIgnoreCase(defaultOperator);
+
for (int i = 0; i < notClauses.size(); i++) {
- boolean introducedByAnd = (i > 0);
+ boolean introducedByAnd;
+ if (i > 0) {
+ // Check if there's an explicit AND before this notClause
+ // by walking ctx.children and finding the token
immediately before this notClause
+ introducedByAnd = hasExplicitAndBefore(ctx,
notClauses.get(i), useAndForImplicit);
+ } else {
+ introducedByAnd = false;
+ }
+
collectTermsFromNotClause(notClauses.get(i), terms,
defaultOccur, introducedByOr, introducedByAnd);
// After first term, all subsequent in same AND chain are
introducedByOr=false
introducedByOr = false;
}
}
+ /**
+ * Check if there's an explicit AND token before the target notClause.
+ * Walks ctx.children to find the position of target and checks the
preceding token.
+ * @param ctx The AndClauseContext containing the children
+ * @param target The target NotClauseContext to check
+ * @param implicitDefault Value to return if no explicit AND (use
default_operator)
+ * @return true if explicit AND before target, implicitDefault if no
explicit AND
+ */
+ private boolean hasExplicitAndBefore(SearchParser.AndClauseContext ctx,
+ SearchParser.NotClauseContext target, boolean implicitDefault)
{
+ for (int j = 0; j < ctx.getChildCount(); j++) {
+ if (ctx.getChild(j) == target) {
+ // Found the target - check if the preceding sibling is an
AND token
+ if (j > 0 && ctx.getChild(j - 1) instanceof
org.antlr.v4.runtime.tree.TerminalNode) {
+ org.antlr.v4.runtime.tree.TerminalNode terminal =
+ (org.antlr.v4.runtime.tree.TerminalNode)
ctx.getChild(j - 1);
+ return terminal.getSymbol().getType() ==
SearchParser.AND;
+ }
+ // No explicit AND before this term - use default
+ return implicitDefault;
+ }
+ }
+ // Target not found (should not happen) - use default
+ return implicitDefault;
+ }
+
private void collectTermsFromNotClause(SearchParser.NotClauseContext
ctx, List<TermWithOccur> terms,
QsOccur defaultOccur, boolean introducedByOr, boolean
introducedByAnd) {
boolean isNegated = ctx.NOT() != null;
SearchParser.AtomClauseContext atomCtx = ctx.atomClause();
+ QsNode node;
if (atomCtx.clause() != null) {
// Parenthesized clause - visit recursively
- QsNode subNode = visit(atomCtx.clause());
- TermWithOccur term = new TermWithOccur(subNode, defaultOccur);
- term.introducedByOr = introducedByOr;
- term.introducedByAnd = introducedByAnd;
- term.isNegated = isNegated;
- terms.add(term);
+ node = visit(atomCtx.clause());
+ } else if (atomCtx.fieldQuery() != null) {
+ // Field query with explicit field prefix
+ node = visit(atomCtx.fieldQuery());
+ } else if (atomCtx.bareQuery() != null) {
+ // Bare query - uses default field
+ node = visit(atomCtx.bareQuery());
} else {
- // Field query
- QsNode node = visit(atomCtx.fieldQuery());
- TermWithOccur term = new TermWithOccur(node, defaultOccur);
- term.introducedByOr = introducedByOr;
- term.introducedByAnd = introducedByAnd;
- term.isNegated = isNegated;
- terms.add(term);
+ throw new RuntimeException("Invalid atom clause: missing field
or bare query");
}
+
+ TermWithOccur term = new TermWithOccur(node, defaultOccur);
+ term.introducedByOr = introducedByOr;
+ term.introducedByAnd = introducedByAnd;
+ term.isNegated = isNegated;
+ terms.add(term);
}
/**
@@ -2158,7 +2117,22 @@ public class SearchDslParser {
return children.get(0);
}
- return new QsNode(QsClauseType.AND, children);
+ // Check if there are explicit AND tokens
+ // If no explicit AND tokens, use the default operator for
implicit conjunction
+ List<org.antlr.v4.runtime.tree.TerminalNode> andTokens = ctx.AND();
+ boolean hasExplicitAnd = andTokens != null && !andTokens.isEmpty();
+
+ QsClauseType clauseType;
+ if (hasExplicitAnd) {
+ // Explicit AND - always use AND
+ clauseType = QsClauseType.AND;
+ } else {
+ // Implicit conjunction - use default operator from options
+ String defaultOperator = options.getDefaultOperator();
+ clauseType = "AND".equalsIgnoreCase(defaultOperator) ?
QsClauseType.AND : QsClauseType.OR;
+ }
+
+ return new QsNode(clauseType, children);
}
@Override
@@ -2180,7 +2154,44 @@ public class SearchDslParser {
if (ctx.clause() != null) {
return visit(ctx.clause());
}
- return visit(ctx.fieldQuery());
+ if (ctx.fieldQuery() != null) {
+ return visit(ctx.fieldQuery());
+ }
+ if (ctx.bareQuery() != null) {
+ return visit(ctx.bareQuery());
+ }
+ throw new RuntimeException("Invalid atom clause: missing field or
bare query");
+ }
+
+ @Override
+ public QsNode visitBareQuery(SearchParser.BareQueryContext ctx) {
+ // Bare query - uses effective default field (considering override)
+ String defaultField = getEffectiveDefaultField();
+ if (defaultField == null || defaultField.isEmpty()) {
+ throw new SearchDslSyntaxException(
+ "No field specified and no default_field configured. "
+ + "Either use field:value syntax or set default_field in
options.");
+ }
+
+ fieldNames.add(defaultField);
+
+ // Set current field context to default field before visiting
search value
+ String previousFieldName = currentFieldName;
+ currentFieldName = defaultField;
+
+ try {
+ if (ctx.searchValue() == null) {
+ throw new RuntimeException("Invalid bare query: missing
search value");
+ }
+ QsNode result = visit(ctx.searchValue());
+ if (result == null) {
+ throw new RuntimeException("Invalid search value");
+ }
+ return result;
+ } finally {
+ // Restore previous context
+ currentFieldName = previousFieldName;
+ }
}
@Override
@@ -2215,7 +2226,17 @@ public class SearchDslParser {
@Override
public QsNode visitSearchValue(SearchParser.SearchValueContext ctx) {
- String fieldName = currentFieldName != null ? currentFieldName :
"_all";
+ String fieldName = currentFieldName;
+ if (fieldName == null) {
+ // Fall back to effective default field (considering override)
+ String defaultField = getEffectiveDefaultField();
+ if (defaultField != null && !defaultField.isEmpty()) {
+ fieldName = defaultField;
+ } else {
+ throw new SearchDslSyntaxException(
+ "No field name available. This indicates a parsing
error.");
+ }
+ }
if (ctx.TERM() != null) {
return new QsNode(QsClauseType.TERM, fieldName,
unescapeTermValue(ctx.TERM().getText()));
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
index e45790b4efb..e7daf12f100 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
@@ -292,38 +292,68 @@ public class SearchDslParserTest {
@Test
public void testDefaultFieldWithMultiTermAnd() {
- // Test: "foo bar" + field="tags" + operator="and" → "tags:ALL(foo
bar)"
+ // Test: "foo bar" + field="tags" + operator="and" → "tags:foo AND
tags:bar"
+ // With single-phase parsing, multi-term queries are parsed as
separate terms
String dsl = "foo bar";
QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "and");
Assertions.assertNotNull(plan);
- Assertions.assertEquals(QsClauseType.ALL, plan.getRoot().getType());
- Assertions.assertEquals("tags", plan.getRoot().getField());
- Assertions.assertEquals("foo bar", plan.getRoot().getValue());
+ Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType());
+ Assertions.assertEquals(2, plan.getRoot().getChildren().size());
+
+ QsNode firstChild = plan.getRoot().getChildren().get(0);
+ Assertions.assertEquals(QsClauseType.TERM, firstChild.getType());
+ Assertions.assertEquals("tags", firstChild.getField());
+ Assertions.assertEquals("foo", firstChild.getValue());
+
+ QsNode secondChild = plan.getRoot().getChildren().get(1);
+ Assertions.assertEquals(QsClauseType.TERM, secondChild.getType());
+ Assertions.assertEquals("tags", secondChild.getField());
+ Assertions.assertEquals("bar", secondChild.getValue());
}
@Test
public void testDefaultFieldWithMultiTermOr() {
- // Test: "foo bar" + field="tags" + operator="or" → "tags:ANY(foo bar)"
+ // Test: "foo bar" + field="tags" + operator="or" → "tags:foo OR
tags:bar"
+ // With single-phase parsing, multi-term queries are parsed as
separate terms
String dsl = "foo bar";
QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", "or");
Assertions.assertNotNull(plan);
- Assertions.assertEquals(QsClauseType.ANY, plan.getRoot().getType());
- Assertions.assertEquals("tags", plan.getRoot().getField());
- Assertions.assertEquals("foo bar", plan.getRoot().getValue());
+ Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType());
+ Assertions.assertEquals(2, plan.getRoot().getChildren().size());
+
+ QsNode firstChild = plan.getRoot().getChildren().get(0);
+ Assertions.assertEquals(QsClauseType.TERM, firstChild.getType());
+ Assertions.assertEquals("tags", firstChild.getField());
+ Assertions.assertEquals("foo", firstChild.getValue());
+
+ QsNode secondChild = plan.getRoot().getChildren().get(1);
+ Assertions.assertEquals(QsClauseType.TERM, secondChild.getType());
+ Assertions.assertEquals("tags", secondChild.getField());
+ Assertions.assertEquals("bar", secondChild.getValue());
}
@Test
public void testDefaultFieldWithMultiTermDefaultOr() {
- // Test: "foo bar" + field="tags" (no operator, defaults to OR) →
"tags:ANY(foo bar)"
+ // Test: "foo bar" + field="tags" (no operator, defaults to OR) →
"tags:foo OR tags:bar"
+ // With single-phase parsing, multi-term queries are parsed as
separate terms
String dsl = "foo bar";
QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null);
Assertions.assertNotNull(plan);
- Assertions.assertEquals(QsClauseType.ANY, plan.getRoot().getType());
- Assertions.assertEquals("tags", plan.getRoot().getField());
- Assertions.assertEquals("foo bar", plan.getRoot().getValue());
+ Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType());
+ Assertions.assertEquals(2, plan.getRoot().getChildren().size());
+
+ QsNode firstChild = plan.getRoot().getChildren().get(0);
+ Assertions.assertEquals(QsClauseType.TERM, firstChild.getType());
+ Assertions.assertEquals("tags", firstChild.getField());
+ Assertions.assertEquals("foo", firstChild.getValue());
+
+ QsNode secondChild = plan.getRoot().getChildren().get(1);
+ Assertions.assertEquals(QsClauseType.TERM, secondChild.getType());
+ Assertions.assertEquals("tags", secondChild.getField());
+ Assertions.assertEquals("bar", secondChild.getValue());
}
@Test
@@ -450,33 +480,44 @@ public class SearchDslParserTest {
@Test
public void testInvalidDefaultOperator() {
- // Test: invalid operator should throw exception
+ // Test: invalid operator should throw IllegalArgumentException
String dsl = "foo bar";
- IllegalArgumentException exception =
Assertions.assertThrows(IllegalArgumentException.class, () -> {
+ // Invalid operator should throw exception
+ Assertions.assertThrows(IllegalArgumentException.class, () -> {
SearchDslParser.parseDsl(dsl, "tags", "invalid");
});
+ }
- Assertions.assertTrue(exception.getMessage().contains("Invalid default
operator"));
- Assertions.assertTrue(exception.getMessage().contains("Must be 'and'
or 'or'"));
+ @Test
+ public void testDefaultFieldWithInFunction() {
+ // Test: "IN(value1 value2)" + field="tags" → "tags:IN(value1 value2)"
+ String dsl = "IN(value1 value2)";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.LIST, plan.getRoot().getType());
+ Assertions.assertEquals("tags", plan.getRoot().getField());
+ Assertions.assertEquals("IN(value1 value2)",
plan.getRoot().getValue());
}
@Test
public void testDefaultOperatorCaseInsensitive() {
// Test: operator should be case-insensitive
+ // With single-phase parsing, multi-term queries produce AND/OR nodes
String dsl = "foo bar";
// Test "AND"
QsPlan plan1 = SearchDslParser.parseDsl(dsl, "tags", "AND");
- Assertions.assertEquals(QsClauseType.ALL, plan1.getRoot().getType());
+ Assertions.assertEquals(QsClauseType.AND, plan1.getRoot().getType());
// Test "Or"
QsPlan plan2 = SearchDslParser.parseDsl(dsl, "tags", "Or");
- Assertions.assertEquals(QsClauseType.ANY, plan2.getRoot().getType());
+ Assertions.assertEquals(QsClauseType.OR, plan2.getRoot().getType());
// Test "aNd"
QsPlan plan3 = SearchDslParser.parseDsl(dsl, "tags", "aNd");
- Assertions.assertEquals(QsClauseType.ALL, plan3.getRoot().getType());
+ Assertions.assertEquals(QsClauseType.AND, plan3.getRoot().getType());
}
@Test
@@ -548,11 +589,12 @@ public class SearchDslParserTest {
@Test
public void testDefaultFieldWithNullOperator() {
// Test: null operator should default to OR
+ // With single-phase parsing, multi-term queries produce OR nodes
String dsl = "foo bar";
QsPlan plan = SearchDslParser.parseDsl(dsl, "tags", null);
Assertions.assertNotNull(plan);
- Assertions.assertEquals(QsClauseType.ANY, plan.getRoot().getType());
// Defaults to OR/ANY
+ Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType()); //
Defaults to OR
}
@Test
@@ -1079,9 +1121,10 @@ public class SearchDslParserTest {
@Test
public void testMultiFieldNotOperator() {
- // Test: "NOT hello" + fields=["title","content"]
+ // Test: "NOT hello" + fields=["title","content"] with cross_fields
type
+ // cross_fields: NOT (title:hello OR content:hello)
String dsl = "NOT hello";
- String options = "{\"fields\":[\"title\",\"content\"]}";
+ String options =
"{\"fields\":[\"title\",\"content\"],\"type\":\"cross_fields\"}";
QsPlan plan = SearchDslParser.parseDsl(dsl, options);
Assertions.assertNotNull(plan);
@@ -1318,4 +1361,232 @@ public class SearchDslParserTest {
Assertions.assertEquals(planBest.getRoot().getType(),
planCross.getRoot().getType());
Assertions.assertEquals(planBest.getRoot().getChildren().size(),
planCross.getRoot().getChildren().size());
}
+
+ // =====================================================================
+ // Tests for bare query support (without field: prefix)
+ // These tests verify the single-phase parsing where ANTLR grammar
+ // natively supports bareQuery and visitor fills in default field
+ // =====================================================================
+
+ @Test
+ public void testBareTermWithDefaultField() {
+ // Bare term without field prefix - uses default_field
+ String dsl = "hello";
+ String options = "{\"default_field\":\"title\"}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.TERM, plan.getRoot().getType());
+ Assertions.assertEquals("title", plan.getRoot().getField());
+ Assertions.assertEquals("hello", plan.getRoot().getValue());
+ }
+
+ @Test
+ public void testBareRegexWithDefaultField() {
+ // Bare regex without field prefix - uses default_field
+ String dsl = "/[a-z]+/";
+ String options = "{\"default_field\":\"title\"}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.REGEXP, plan.getRoot().getType());
+ Assertions.assertEquals("title", plan.getRoot().getField());
+ Assertions.assertEquals("[a-z]+", plan.getRoot().getValue());
+ }
+
+ @Test
+ public void testBareTermNotWithRegex() {
+ // DORIS-24368 scenario: bare term NOT bare regex
+ // "Anthony NOT /(\\d{1,2}:\\d{2} [AP]M)/"
+ String dsl = "Anthony NOT /(\\d{1,2}:\\d{2} [AP]M)/";
+ String options =
"{\"default_field\":\"title\",\"default_operator\":\"AND\",\"mode\":\"lucene\"}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ // Should parse correctly without mangling the regex
+ Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN,
plan.getRoot().getType());
+ }
+
+ @Test
+ public void testMixedFieldAndBareQuery() {
+ // Mixed: explicit field + bare query
+ // "content:/[a-z]+/ AND hello" where hello uses default_field=title
+ String dsl = "content:/[a-z]+/ AND hello";
+ String options = "{\"default_field\":\"title\"}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType());
+ Assertions.assertEquals(2, plan.getRoot().getChildren().size());
+
+ // First child: content:/[a-z]+/
+ QsNode first = plan.getRoot().getChildren().get(0);
+ Assertions.assertEquals(QsClauseType.REGEXP, first.getType());
+ Assertions.assertEquals("content", first.getField());
+
+ // Second child: hello with default_field=title
+ QsNode second = plan.getRoot().getChildren().get(1);
+ Assertions.assertEquals(QsClauseType.TERM, second.getType());
+ Assertions.assertEquals("title", second.getField());
+ }
+
+ @Test
+ public void testRegexWithOperatorKeywordsInside() {
+ // Regex containing AND/OR keywords - should not be treated as
operators
+ String dsl = "/pattern AND stuff/";
+ String options = "{\"default_field\":\"title\"}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.REGEXP, plan.getRoot().getType());
+ Assertions.assertEquals("title", plan.getRoot().getField());
+ // The AND inside regex should be preserved literally
+ Assertions.assertEquals("pattern AND stuff",
plan.getRoot().getValue());
+ }
+
+ @Test
+ public void testRegexWithSpacesAndColons() {
+ // Regex containing spaces and colons - should not break parsing
+ String dsl = "/\\d{1,2}:\\d{2} [AP]M/";
+ String options = "{\"default_field\":\"title\"}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.REGEXP, plan.getRoot().getType());
+ Assertions.assertEquals("title", plan.getRoot().getField());
+ Assertions.assertEquals("\\d{1,2}:\\d{2} [AP]M",
plan.getRoot().getValue());
+ }
+
+ @Test
+ public void testBarePhraseWithDefaultField() {
+ // Bare phrase without field prefix
+ String dsl = "\"hello world\"";
+ String options = "{\"default_field\":\"content\"}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.PHRASE, plan.getRoot().getType());
+ Assertions.assertEquals("content", plan.getRoot().getField());
+ Assertions.assertEquals("hello world", plan.getRoot().getValue());
+ }
+
+ @Test
+ public void testBarePrefixWithDefaultField() {
+ // Bare prefix query without field prefix
+ String dsl = "hello*";
+ String options = "{\"default_field\":\"title\"}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.PREFIX, plan.getRoot().getType());
+ Assertions.assertEquals("title", plan.getRoot().getField());
+ Assertions.assertEquals("hello*", plan.getRoot().getValue());
+ }
+
+ @Test
+ public void testBareWildcardWithDefaultField() {
+ // Bare wildcard query without field prefix
+ String dsl = "h*llo";
+ String options = "{\"default_field\":\"title\"}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.WILDCARD,
plan.getRoot().getType());
+ Assertions.assertEquals("title", plan.getRoot().getField());
+ Assertions.assertEquals("h*llo", plan.getRoot().getValue());
+ }
+
+ @Test
+ public void testBareQueryWithoutDefaultFieldThrows() {
+ // Bare query without default_field should throw an error
+ String dsl = "hello";
+ String options = "{}"; // No default_field
+
+ Assertions.assertThrows(RuntimeException.class, () -> {
+ SearchDslParser.parseDsl(dsl, options);
+ });
+ }
+
+ @Test
+ public void testBareAndOperatorWithDefaultField() {
+ // "hello AND world" with default_field
+ String dsl = "hello AND world";
+ String options = "{\"default_field\":\"title\"}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType());
+ Assertions.assertEquals(2, plan.getRoot().getChildren().size());
+
+ for (QsNode child : plan.getRoot().getChildren()) {
+ Assertions.assertEquals(QsClauseType.TERM, child.getType());
+ Assertions.assertEquals("title", child.getField());
+ }
+ }
+
+ @Test
+ public void testBareOrOperatorWithDefaultField() {
+ // "hello OR world" with default_field
+ String dsl = "hello OR world";
+ String options = "{\"default_field\":\"title\"}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.OR, plan.getRoot().getType());
+ Assertions.assertEquals(2, plan.getRoot().getChildren().size());
+
+ for (QsNode child : plan.getRoot().getChildren()) {
+ Assertions.assertEquals(QsClauseType.TERM, child.getType());
+ Assertions.assertEquals("title", child.getField());
+ }
+ }
+
+ @Test
+ public void testBareNotOperatorWithDefaultField() {
+ // "NOT hello" with default_field
+ String dsl = "NOT hello";
+ String options = "{\"default_field\":\"title\"}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.NOT, plan.getRoot().getType());
+ Assertions.assertEquals(1, plan.getRoot().getChildren().size());
+
+ QsNode child = plan.getRoot().getChildren().get(0);
+ Assertions.assertEquals(QsClauseType.TERM, child.getType());
+ Assertions.assertEquals("title", child.getField());
+ }
+
+ @Test
+ public void testComplexBareQueryLuceneMode() {
+ // Complex bare query in Lucene mode
+ // "a AND b OR NOT c" with default_field
+ String dsl = "a AND b OR NOT c";
+ String options =
"{\"default_field\":\"title\",\"mode\":\"lucene\",\"minimum_should_match\":0}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN,
plan.getRoot().getType());
+ }
+
+ @Test
+ public void testParenthesizedBareQuery() {
+ // "(hello OR world) AND foo" with default_field
+ String dsl = "(hello OR world) AND foo";
+ String options = "{\"default_field\":\"title\"}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType());
+ Assertions.assertEquals(2, plan.getRoot().getChildren().size());
+
+ // First child should be OR
+ QsNode orNode = plan.getRoot().getChildren().get(0);
+ Assertions.assertEquals(QsClauseType.OR, orNode.getType());
+
+ // Second child should be TERM
+ QsNode termNode = plan.getRoot().getChildren().get(1);
+ Assertions.assertEquals(QsClauseType.TERM, termNode.getType());
+ Assertions.assertEquals("title", termNode.getField());
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]