This is an automated email from the ASF dual-hosted git repository.
andywebb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new 9809766bd9f SOLR-17959: Add alwaysStopwords option to edismax (#3802)
9809766bd9f is described below
commit 9809766bd9fda66aa67992ab4036e00a15ea1adf
Author: Andy Webb <[email protected]>
AuthorDate: Sat Oct 25 16:19:59 2025 +0100
SOLR-17959: Add alwaysStopwords option to edismax (#3802)
---
changelog/unreleased/SOLR-17959-alwaysStopwords.yml | 9 +++++++++
.../org/apache/solr/search/ExtendedDismaxQParser.java | 17 +++++++++++++----
.../apache/solr/search/TestExtendedDismaxParser.java | 6 ++++++
.../modules/indexing-guide/pages/filters.adoc | 2 ++
.../modules/query-guide/pages/edismax-query-parser.adoc | 6 +++++-
5 files changed, 35 insertions(+), 5 deletions(-)
diff --git a/changelog/unreleased/SOLR-17959-alwaysStopwords.yml
b/changelog/unreleased/SOLR-17959-alwaysStopwords.yml
new file mode 100644
index 00000000000..274ee48261e
--- /dev/null
+++ b/changelog/unreleased/SOLR-17959-alwaysStopwords.yml
@@ -0,0 +1,9 @@
+title: Add alwaysStopwords option to edismax so its "all stopwords" behaviour
can be controlled
+type: changed
+authors:
+ - name: Andy Webb
+links:
+ - name: SOLR-17959
+ url: https://issues.apache.org/jira/browse/SOLR-17959
+issues:
+ - 17959
diff --git
a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java
b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java
index ad43dab593b..b1dcb910a9d 100644
--- a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java
+++ b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java
@@ -97,6 +97,9 @@ public class ExtendedDismaxQParser extends QParser {
/** If set to true, stopwords are removed from the query. */
public static String STOPWORDS = "stopwords";
+
+ /** If set to true, the stopword filter applies even if all terms are
stopwords */
+ public static String ALWAYS_STOPWORDS = "alwaysStopwords";
}
private ExtendedDismaxConfiguration config;
@@ -416,7 +419,7 @@ public class ExtendedDismaxQParser extends QParser {
query = up.parse(mainUserQuery);
if (shouldRemoveStopFilter(config, query)) {
- // if the query was all stop words, remove none of them
+ // if the query was all stopwords, remove none of them (unless
alwaysStopwords is set)
up.setRemoveStopFilter(true);
query = up.parse(mainUserQuery);
}
@@ -425,6 +428,8 @@ public class ExtendedDismaxQParser extends QParser {
up.exceptions = false;
}
+ // query may have become empty if it only contained tokenising characters
or due to
+ // stopword removal if alwaysStopwords is set
if (query == null) {
return null;
}
@@ -447,11 +452,11 @@ public class ExtendedDismaxQParser extends QParser {
/**
* Determines if query should be re-parsed removing the stop filter.
*
- * @return true if there are stopwords configured and the parsed query was
empty false in any
- * other case.
+ * @return true if there are stopwords configured, the alwaysStopwords
option hasn't been set and
+ * the parsed query was empty - return false in any other case.
*/
protected boolean shouldRemoveStopFilter(ExtendedDismaxConfiguration config,
Query query) {
- return config.stopwords && isEmpty(query);
+ return config.stopwords && !config.alwaysStopwords && isEmpty(query);
}
private String escapeUserQuery(List<Clause> clauses) {
@@ -1699,6 +1704,8 @@ public class ExtendedDismaxQParser extends QParser {
protected boolean stopwords;
+ protected boolean alwaysStopwords;
+
protected boolean mmAutoRelax;
protected String altQ;
@@ -1749,6 +1756,8 @@ public class ExtendedDismaxQParser extends QParser {
stopwords = solrParams.getBool(DMP.STOPWORDS, true);
+ alwaysStopwords = solrParams.getBool(DMP.ALWAYS_STOPWORDS, false);
+
mmAutoRelax = solrParams.getBool(DMP.MM_AUTORELAX, false);
altQ = solrParams.get(DisMaxParams.ALTQ);
diff --git
a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
index ed85c7bbf62..19b5117488f 100644
--- a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
+++ b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java
@@ -379,6 +379,12 @@ public class TestExtendedDismaxParser extends
SolrTestCaseJ4 {
"q", "the big"),
oner);
+ // test for ignoring stopwords when all query terms are stopwords
+ assertQ(req("defType", "edismax", "qf", "text_sw", "q", "the"), oner);
+
+ // test for not ignoring stopwords when all query terms are stopwords and
alwaysStopwords is set
+ assertQ(req("defType", "edismax", "qf", "text_sw", "q", "the",
"alwaysStopwords", "true"), nor);
+
// searching for a literal colon value when clearly not used for a field
assertQ(
"expected doc is missing (using standard)",
diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/filters.adoc
b/solr/solr-ref-guide/modules/indexing-guide/pages/filters.adoc
index ba90cb3c725..be8febb91a6 100644
--- a/solr/solr-ref-guide/modules/indexing-guide/pages/filters.adoc
+++ b/solr/solr-ref-guide/modules/indexing-guide/pages/filters.adoc
@@ -2941,6 +2941,8 @@ Spanish stemmer, Spanish words:
This filter discards, or _stops_ analysis of, tokens that are on the given
stop words list.
A standard stop words list is included in the Solr `conf` directory, named
`stopwords.txt`, which is appropriate for typical English language text.
+Note that the xref:query-guide:edismax-query-parser.adoc[eDisMax] query parser
disables the stop filter if all query terms are stop words unless its
`alwaysStopwords` option is enabled.
+
*Factory class:* `solr.StopFilterFactory`
*Arguments:*
diff --git
a/solr/solr-ref-guide/modules/query-guide/pages/edismax-query-parser.adoc
b/solr/solr-ref-guide/modules/query-guide/pages/edismax-query-parser.adoc
index 201507c4df1..b8cd45db46f 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/edismax-query-parser.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/edismax-query-parser.adoc
@@ -27,7 +27,7 @@ In addition to supporting all the DisMax query parser
parameters, Extended DisMa
* includes improved smart partial escaping in the case of syntax errors;
fielded queries, +/-, and phrase queries are still supported in this mode.
* improves proximity boosting by using word shingles; you do not need the
query to match all words in the document before proximity boosting is applied.
* includes advanced stopword handling: stopwords are not required in the
mandatory part of the query but are still used in the proximity boosting part.
-If a query consists of all stopwords, such as "to be or not to be", then all
words are required.
+If a query consists of all stopwords, such as "to be or not to be", then all
words are required. (This feature may be disabled - see `alwaysStopwords`
below.)
* includes improved boost function: in Extended DisMax, the `boost` function
is a multiplier xref:dismax-query-parser.adoc#bq-bf-shortcomings[rather than an
addend], improving your boost results; the additive boost functions of DisMax
(`bf` and `bq`) are also supported.
* supports pure negative nested queries: queries such as `+foo (-foo)` will
match all documents.
* lets you specify which fields the end user is allowed to query, and to
disallow direct fielded searches.
@@ -109,6 +109,10 @@ If not specified, `ps` is used.
A Boolean parameter indicating if the `StopFilterFactory` configured in the
query analyzer should be respected when parsing the query.
If this is set to `false`, then the `StopFilterFactory` in the query analyzer
is ignored.
+`alwaysStopwords`::
+A Boolean parameter indicating that the `StopFilterFactory` configured in the
query analyzer should always be respected even if all query terms are stopwords.
+This defaults to `false`, in which case if a query consists of all stopwords,
such as "to be or not to be", then all words are required.
+
`uf`::
Specifies which schema fields the end user is allowed to explicitly query and
to toggle whether embedded Solr queries are supported.
This parameter supports wildcards.