This is an automated email from the ASF dual-hosted git repository.
houston pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 70132a879ed SOLR-17447 : Support early terminating a search based on
maxHitsAllowed per shard (#2960)
70132a879ed is described below
commit 70132a879ed54f2c9330e74c005c27a679d3a6fc
Author: Siju Varghese <[email protected]>
AuthorDate: Thu Apr 10 09:41:01 2025 -0700
SOLR-17447 : Support early terminating a search based on maxHitsAllowed per
shard (#2960)
"terminateEarly", used by Spellcheck Collation, now uses maxHitsAllowed,
which uses the same EarlyTerminationCollector under the hood.
Co-authored-by: Siju Varghese <[email protected]>
Co-authored-by: Houston Putman <[email protected]>
(cherry picked from commit 900bf3db86956e91879eb140101416ba79662bdb)
---
solr/CHANGES.txt | 2 +
.../solr/handler/component/QueryComponent.java | 47 +++++++++++
.../solr/handler/component/ResponseBuilder.java | 17 ++++
.../apache/solr/response/SolrQueryResponse.java | 12 +++
.../solr/search/EarlyTerminatingCollector.java | 33 ++++++--
.../search/EarlyTerminatingCollectorException.java | 21 ++++-
.../apache/solr/search/MultiThreadedSearcher.java | 27 ++++--
.../java/org/apache/solr/search/QueryCommand.java | 13 +++
.../java/org/apache/solr/search/QueryResult.java | 36 ++++++++
.../org/apache/solr/search/SolrIndexSearcher.java | 32 ++++----
.../solr/search/SolrMultiCollectorManager.java | 61 ++++++++++----
.../apache/solr/spelling/SpellCheckCollator.java | 34 ++++----
.../apache/solr/search/SolrIndexSearcherTest.java | 53 ++++++++++++
.../solr/search/TestEarlyTerminatingQueries.java | 96 ++++++++++++++++++++++
.../query-guide/pages/common-query-parameters.adoc | 20 +++++
.../apache/solr/common/params/CommonParams.java | 3 +
16 files changed, 440 insertions(+), 67 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index b215f4ea0a6..a010d2a4972 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -23,6 +23,8 @@ New Features
* SOLR-17678: ReRank queries can now return the matchScore (original score) in
addition to the re-ranked score. (Siju Varghese, Houston Putman)
+* SSOLR-17447 : Support terminating a search early based on maxHitsAllowed per
shard. (Siju Varghese, Houston Putman, David Smiley, Gus Heck)
+
Improvements
---------------------
* SOLR-15751: The v2 API now has parity with the v1 "COLSTATUS" and "segments"
APIs, which can be used to fetch detailed information about
diff --git
a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
index fbd19cc99bc..e4b5a16b232 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
@@ -411,6 +411,17 @@ public class QueryComponent extends SearchComponent {
params.getBool(
CommonParams.SEGMENT_TERMINATE_EARLY,
CommonParams.SEGMENT_TERMINATE_EARLY_DEFAULT));
+ // max hits allowed per shard
+ final Integer maxHitsAllowed =
params.getInt(CommonParams.MAX_HITS_ALLOWED);
+
+ if (maxHitsAllowed != null) {
+ int maxHits = Math.max(maxHitsAllowed, cmd.getLen());
+ if (cmd.getMinExactCount() < Integer.MAX_VALUE) {
+ maxHits = Math.max(cmd.getMinExactCount(), maxHits);
+ }
+ cmd.setMaxHitsAllowed(maxHits);
+ }
+
//
// grouping / field collapsing
//
@@ -949,6 +960,8 @@ public class QueryComponent extends SearchComponent {
Float maxScore = null;
boolean thereArePartialResults = false;
Boolean segmentTerminatedEarly = null;
+ boolean maxHitsTerminatedEarly = false;
+ long approximateTotalHits = 0;
int failedShardCount = 0;
for (ShardResponse srsp : sreq.responses) {
SolrDocumentList docs = null;
@@ -984,6 +997,16 @@ public class QueryComponent extends SearchComponent {
if (rhste != null) {
nl.add(SolrQueryResponse.RESPONSE_HEADER_SEGMENT_TERMINATED_EARLY_KEY, rhste);
}
+ final Object rhmhte =
+
responseHeader.get(SolrQueryResponse.RESPONSE_HEADER_MAX_HITS_TERMINATED_EARLY_KEY);
+ if (rhmhte != null) {
+
nl.add(SolrQueryResponse.RESPONSE_HEADER_MAX_HITS_TERMINATED_EARLY_KEY, rhmhte);
+ }
+ final Object rhath =
+
responseHeader.get(SolrQueryResponse.RESPONSE_HEADER_APPROXIMATE_TOTAL_HITS_KEY);
+ if (rhath != null) {
+
nl.add(SolrQueryResponse.RESPONSE_HEADER_APPROXIMATE_TOTAL_HITS_KEY, rhath);
+ }
docs =
(SolrDocumentList)
SolrResponseUtil.getSubsectionFromShardResponse(rb, srsp,
"response", false);
@@ -1043,6 +1066,19 @@ public class QueryComponent extends SearchComponent {
}
}
+ if (!maxHitsTerminatedEarly) {
+ if (Boolean.TRUE.equals(
+
responseHeader.get(SolrQueryResponse.RESPONSE_HEADER_MAX_HITS_TERMINATED_EARLY_KEY)))
{
+ maxHitsTerminatedEarly = true;
+ }
+ }
+ Object ath =
responseHeader.get(SolrQueryResponse.RESPONSE_HEADER_APPROXIMATE_TOTAL_HITS_KEY);
+ if (ath == null) {
+ approximateTotalHits += numFound;
+ } else {
+ approximateTotalHits += ((Number) ath).longValue();
+ }
+
// calculate global maxScore and numDocsFound
if (docs.getMaxScore() != null) {
maxScore = maxScore == null ? docs.getMaxScore() : Math.max(maxScore,
docs.getMaxScore());
@@ -1185,6 +1221,17 @@ public class QueryComponent extends SearchComponent {
segmentTerminatedEarly);
}
}
+ if (maxHitsTerminatedEarly) {
+ rb.rsp
+ .getResponseHeader()
+
.add(SolrQueryResponse.RESPONSE_HEADER_MAX_HITS_TERMINATED_EARLY_KEY,
Boolean.TRUE);
+ if (approximateTotalHits > 0) {
+ rb.rsp
+ .getResponseHeader()
+ .add(
+ SolrQueryResponse.RESPONSE_HEADER_APPROXIMATE_TOTAL_HITS_KEY,
approximateTotalHits);
+ }
+ }
}
/**
diff --git
a/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java
b/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java
index 0216063d2f6..2029ffd0d61 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java
@@ -455,6 +455,16 @@ public class ResponseBuilder {
getResults().docList =
new DocSlice(0, 0, new int[] {}, new float[] {}, 0, 0,
TotalHits.Relation.EQUAL_TO);
}
+ final Object partialResponseDetail = result.getPartialResultsDetails();
+ if (partialResponseDetail != null) {
+ rsp.addPartialResponseDetail(partialResponseDetail);
+ }
+ final Object approximateTotalHits = result.getApproximateTotalHits();
+ if (approximateTotalHits != null) {
+ rsp.getResponseHeader()
+ .add(
+ SolrQueryResponse.RESPONSE_HEADER_APPROXIMATE_TOTAL_HITS_KEY,
approximateTotalHits);
+ }
}
final Boolean segmentTerminatedEarly = result.getSegmentTerminatedEarly();
if (segmentTerminatedEarly != null) {
@@ -463,6 +473,13 @@ public class ResponseBuilder {
SolrQueryResponse.RESPONSE_HEADER_SEGMENT_TERMINATED_EARLY_KEY,
segmentTerminatedEarly);
}
+ final Boolean maxHitsTerminatedEarly = result.getMaxHitsTerminatedEarly();
+ if (maxHitsTerminatedEarly != null) {
+ rsp.getResponseHeader()
+ .add(
+ SolrQueryResponse.RESPONSE_HEADER_MAX_HITS_TERMINATED_EARLY_KEY,
+ maxHitsTerminatedEarly);
+ }
if (null != cursorMark) {
assert null != result.getNextCursorMark() : "using cursor but no next
cursor set";
this.setNextCursorMark(result.getNextCursorMark());
diff --git a/solr/core/src/java/org/apache/solr/response/SolrQueryResponse.java
b/solr/core/src/java/org/apache/solr/response/SolrQueryResponse.java
index 5676a63fe10..60aaf4be3ea 100644
--- a/solr/core/src/java/org/apache/solr/response/SolrQueryResponse.java
+++ b/solr/core/src/java/org/apache/solr/response/SolrQueryResponse.java
@@ -68,6 +68,18 @@ public class SolrQueryResponse {
public static final String RESPONSE_HEADER_PARTIAL_RESULTS_DETAILS_KEY =
"partialResultsDetails";
public static final String RESPONSE_HEADER_SEGMENT_TERMINATED_EARLY_KEY =
"segmentTerminatedEarly";
+
+ /**
+ * Header to indicate that the search was terminated early because of hits
exceeding the query
+ * configured limit (<code>maxHitsAllowed</code>). Presence of this flag
also indicates the
+ * partialResults, however in the absence of <i>maxHitsTerminatedEarly</i>,
<i>partialResults</i>
+ * would be due to other limits like time/cpu.
+ */
+ public static final String RESPONSE_HEADER_MAX_HITS_TERMINATED_EARLY_KEY =
+ "maxHitsTerminatedEarly";
+
+ public static final String RESPONSE_HEADER_APPROXIMATE_TOTAL_HITS_KEY =
"approximateTotalHits";
+
public static final String RESPONSE_HEADER_KEY = "responseHeader";
private static final String RESPONSE_KEY = "response";
diff --git
a/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollector.java
b/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollector.java
index c47abfcc52d..04af54b6046 100644
--- a/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollector.java
+++ b/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollector.java
@@ -17,6 +17,7 @@
package org.apache.solr.search;
import java.io.IOException;
+import java.util.concurrent.atomic.LongAdder;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.FilterCollector;
@@ -29,11 +30,15 @@ import org.apache.lucene.search.LeafCollector;
*/
public class EarlyTerminatingCollector extends FilterCollector {
+ private final int chunkSize; // Check across threads only at a chunk size
+
private final int maxDocsToCollect;
- private int numCollected = 0;
+ private int numCollectedLocally = 0;
private int prevReaderCumulativeSize = 0;
private int currentReaderSize = 0;
+ private final LongAdder pendingDocsToCollect;
+ private boolean terminatedEarly = false;
/**
* Wraps a {@link Collector}, throwing {@link
EarlyTerminatingCollectorException} once the
@@ -43,11 +48,17 @@ public class EarlyTerminatingCollector extends
FilterCollector {
* @param maxDocsToCollect - the maximum number of documents to Collect
*/
public EarlyTerminatingCollector(Collector delegate, int maxDocsToCollect) {
+ this(delegate, maxDocsToCollect, null);
+ }
+
+ public EarlyTerminatingCollector(
+ Collector delegate, int maxDocsToCollect, LongAdder docsToCollect) {
super(delegate);
assert 0 < maxDocsToCollect;
assert null != delegate;
-
this.maxDocsToCollect = maxDocsToCollect;
+ this.pendingDocsToCollect = docsToCollect;
+ this.chunkSize = Math.min(100, maxDocsToCollect / 10);
}
@Override
@@ -60,12 +71,24 @@ public class EarlyTerminatingCollector extends
FilterCollector {
@Override
public void collect(int doc) throws IOException {
super.collect(doc);
- numCollected++;
- if (maxDocsToCollect <= numCollected) {
+ numCollectedLocally++;
+ terminatedEarly = numCollectedLocally >= maxDocsToCollect;
+ if (pendingDocsToCollect != null) {
+ pendingDocsToCollect.increment();
+ if (numCollectedLocally % chunkSize == 0) {
+ final long overallCollectedDocCount =
pendingDocsToCollect.intValue();
+ terminatedEarly = overallCollectedDocCount >= maxDocsToCollect;
+ }
+ }
+ if (terminatedEarly) {
throw new EarlyTerminatingCollectorException(
- numCollected, prevReaderCumulativeSize + (doc + 1));
+ maxDocsToCollect, prevReaderCumulativeSize + (doc + 1));
}
}
};
}
+
+ public Collector getDelegate() {
+ return super.in;
+ }
}
diff --git
a/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollectorException.java
b/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollectorException.java
index b6a04468612..8f4470060ef 100644
---
a/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollectorException.java
+++
b/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollectorException.java
@@ -16,19 +16,26 @@
*/
package org.apache.solr.search;
+import java.util.Locale;
+
/**
* Thrown by {@link EarlyTerminatingCollector} when the maximum to abort the
scoring / collection
* process early, when the specified maximum number of documents were
collected.
*/
public class EarlyTerminatingCollectorException extends RuntimeException {
private static final long serialVersionUID = 5939241340763428118L;
- private int numberScanned;
- private int numberCollected;
+ private final int numberScanned;
+ private final int numberCollected;
public EarlyTerminatingCollectorException(int numberCollected, int
numberScanned) {
+ super(
+ String.format(
+ Locale.ROOT,
+ "maxHitsAllowed reached: %d documents collected out of %d scanned",
+ numberCollected,
+ numberScanned));
assert numberCollected <= numberScanned : numberCollected + "<=" +
numberScanned;
assert 0 < numberCollected;
- assert 0 < numberScanned;
this.numberCollected = numberCollected;
this.numberScanned = numberScanned;
@@ -54,4 +61,12 @@ public class EarlyTerminatingCollectorException extends
RuntimeException {
public int getNumberCollected() {
return numberCollected;
}
+
+ public long getApproximateTotalHits(int maxDocId) {
+ if (numberScanned == maxDocId) {
+ return numberCollected;
+ } else {
+ return (long) (maxDocId * ((double) numberCollected) / ((double)
numberScanned));
+ }
+ }
}
diff --git
a/solr/core/src/java/org/apache/solr/search/MultiThreadedSearcher.java
b/solr/core/src/java/org/apache/solr/search/MultiThreadedSearcher.java
index c4bdd921e7a..80300d0bdb3 100644
--- a/solr/core/src/java/org/apache/solr/search/MultiThreadedSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/MultiThreadedSearcher.java
@@ -52,7 +52,8 @@ public class MultiThreadedSearcher {
Query query,
boolean needTopDocs,
boolean needMaxScore,
- boolean needDocSet)
+ boolean needDocSet,
+ QueryResult queryResult)
throws IOException {
Collection<CollectorManager<Collector, Object>> collectors = new
ArrayList<>();
@@ -90,10 +91,17 @@ public class MultiThreadedSearcher {
@SuppressWarnings({"unchecked", "rawtypes"})
CollectorManager<Collector, Object>[] colls = collectors.toArray(new
CollectorManager[0]);
- SolrMultiCollectorManager manager = new SolrMultiCollectorManager(colls);
+ final SolrMultiCollectorManager manager = new
SolrMultiCollectorManager(cmd, colls);
Object[] ret;
try {
ret = searcher.search(query, manager);
+ } catch (EarlyTerminatingCollectorException ex) {
+ ret = manager.reduce();
+ queryResult.setMaxHitsTerminatedEarly(true);
+ queryResult.setPartialResults(Boolean.TRUE);
+ queryResult.setPartialResultsDetails(ex.getMessage());
+ queryResult.setApproximateTotalHits(
+ ex.getApproximateTotalHits(searcher.getIndexReader().maxDoc()));
} catch (Exception ex) {
if (ex instanceof RuntimeException
&& ex.getCause() != null
@@ -191,7 +199,7 @@ public class MultiThreadedSearcher {
final ScoreMode scoreMode;
private final Object[] result;
- public SearchResult(ScoreMode scoreMode, Object[] result) {
+ SearchResult(ScoreMode scoreMode, Object[] result) {
this.scoreMode = scoreMode;
this.result = result;
}
@@ -252,10 +260,14 @@ public class MultiThreadedSearcher {
MaxScoreCollector collector;
float maxScore = 0.0f;
- for (Iterator var4 = collectors.iterator();
- var4.hasNext();
+ for (Iterator collectorIterator = collectors.iterator();
+ collectorIterator.hasNext();
maxScore = Math.max(maxScore, collector.getMaxScore())) {
- collector = (MaxScoreCollector) var4.next();
+ Collector next = (Collector) collectorIterator.next();
+ if (next instanceof EarlyTerminatingCollector) {
+ next = ((EarlyTerminatingCollector) next).getDelegate();
+ }
+ collector = (MaxScoreCollector) next;
}
return new MaxScoreResult(maxScore);
@@ -325,6 +337,9 @@ public class MultiThreadedSearcher {
Collector collector;
for (Object o : collectors) {
collector = (Collector) o;
+ if (collector instanceof EarlyTerminatingCollector) {
+ collector = ((EarlyTerminatingCollector) collector).getDelegate();
+ }
if (collector instanceof TopDocsCollector) {
TopDocs td = ((TopDocsCollector) collector).topDocs(0, len);
assert td != null : Arrays.asList(topDocs);
diff --git a/solr/core/src/java/org/apache/solr/search/QueryCommand.java
b/solr/core/src/java/org/apache/solr/search/QueryCommand.java
index d985a8f17d8..fcd6373ece8 100755
--- a/solr/core/src/java/org/apache/solr/search/QueryCommand.java
+++ b/solr/core/src/java/org/apache/solr/search/QueryCommand.java
@@ -43,6 +43,7 @@ public class QueryCommand {
private int minExactCount = Integer.MAX_VALUE;
private CursorMark cursorMark;
private boolean distribStatsDisabled;
+ private int maxHitsAllowed = Integer.MAX_VALUE;
public CursorMark getCursorMark() {
return cursorMark;
@@ -222,6 +223,10 @@ public class QueryCommand {
return (flags & SolrIndexSearcher.TERMINATE_EARLY) != 0;
}
+ public boolean shouldEarlyTerminateSearch() {
+ return getTerminateEarly() || getMaxHitsAllowed() < Integer.MAX_VALUE;
+ }
+
public QueryCommand setTerminateEarly(boolean segmentTerminateEarly) {
if (segmentTerminateEarly) {
return setFlags(SolrIndexSearcher.TERMINATE_EARLY);
@@ -270,4 +275,12 @@ public class QueryCommand {
public QueryResult search(SolrIndexSearcher searcher) throws IOException {
return searcher.search(this);
}
+
+ public int getMaxHitsAllowed() {
+ return maxHitsAllowed;
+ }
+
+ public void setMaxHitsAllowed(int maxHitsAllowed) {
+ this.maxHitsAllowed = maxHitsAllowed;
+ }
}
diff --git a/solr/core/src/java/org/apache/solr/search/QueryResult.java
b/solr/core/src/java/org/apache/solr/search/QueryResult.java
index 900a72214e7..e047914d44f 100755
--- a/solr/core/src/java/org/apache/solr/search/QueryResult.java
+++ b/solr/core/src/java/org/apache/solr/search/QueryResult.java
@@ -21,9 +21,13 @@ public class QueryResult {
// Object for back compatibility so that we render true not "true" in json
private Object partialResults;
+ private Object partialResultsDetails;
private Boolean segmentTerminatedEarly;
+ private Boolean terminatedEarly;
private DocListAndSet docListAndSet;
private CursorMark nextCursorMark;
+ private Boolean maxHitsTerminatedEarly;
+ private Long approximateTotalHits;
public Object groupedResults; // TODO: currently for testing
@@ -62,6 +66,14 @@ public class QueryResult {
this.partialResults = partialResults;
}
+ public Object getPartialResultsDetails() {
+ return partialResultsDetails;
+ }
+
+ public void setPartialResultsDetails(Object partialResultsDetails) {
+ this.partialResultsDetails = partialResultsDetails;
+ }
+
public Boolean getSegmentTerminatedEarly() {
return segmentTerminatedEarly;
}
@@ -85,4 +97,28 @@ public class QueryResult {
public CursorMark getNextCursorMark() {
return nextCursorMark;
}
+
+ public Boolean getTerminatedEarly() {
+ return terminatedEarly;
+ }
+
+ public void setTerminatedEarly(boolean terminatedEarly) {
+ this.terminatedEarly = terminatedEarly;
+ }
+
+ public Boolean getMaxHitsTerminatedEarly() {
+ return maxHitsTerminatedEarly;
+ }
+
+ public void setMaxHitsTerminatedEarly(Boolean maxHitsTerminatedEarly) {
+ this.maxHitsTerminatedEarly = maxHitsTerminatedEarly;
+ }
+
+ public Long getApproximateTotalHits() {
+ return approximateTotalHits;
+ }
+
+ public void setApproximateTotalHits(long approximateTotalHits) {
+ this.approximateTotalHits = approximateTotalHits;
+ }
}
diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
index 707d1a1be53..ec270103a1b 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@@ -288,9 +288,8 @@ public class SolrIndexSearcher extends IndexSearcher
implements Closeable, SolrI
}
}
- final boolean terminateEarly = cmd.getTerminateEarly();
- if (terminateEarly) {
- collector = new EarlyTerminatingCollector(collector, cmd.getLen());
+ if (cmd.shouldEarlyTerminateSearch()) {
+ collector = new EarlyTerminatingCollector(collector,
cmd.getMaxHitsAllowed());
}
final long timeAllowed = cmd.getTimeAllowed();
@@ -314,29 +313,33 @@ public class SolrIndexSearcher extends IndexSearcher
implements Closeable, SolrI
}
try {
- super.search(query, collector);
+ try {
+ super.search(query, collector);
+ } finally {
+ // The complete() method can use the collectors, so this needs to be
surrounded by the same
+ // catch logic that limit collecting
+ if (collector instanceof DelegatingCollector) {
+ ((DelegatingCollector) collector).complete();
+ }
+ }
} catch (TimeLimitingCollector.TimeExceededException
| ExitableDirectoryReader.ExitingReaderException
| CancellableCollector.QueryCancelledException x) {
log.warn("Query: [{}]; ", query, x);
qr.setPartialResults(true);
} catch (EarlyTerminatingCollectorException etce) {
- if (collector instanceof DelegatingCollector) {
- ((DelegatingCollector) collector).complete();
- }
- throw etce;
+ qr.setPartialResults(true);
+ qr.setMaxHitsTerminatedEarly(true);
+ qr.setPartialResultsDetails(etce.getMessage());
+
qr.setApproximateTotalHits(etce.getApproximateTotalHits(reader.maxDoc()));
} finally {
if (earlyTerminatingSortingCollector != null) {
qr.setSegmentTerminatedEarly(earlyTerminatingSortingCollector.terminatedEarly());
}
-
if (cmd.isQueryCancellable()) {
core.getCancellableQueryTracker().removeCancellableQuery(cmd.getQueryID());
}
}
- if (collector instanceof DelegatingCollector) {
- ((DelegatingCollector) collector).complete();
- }
return collector;
}
@@ -1981,13 +1984,12 @@ public class SolrIndexSearcher extends IndexSearcher
implements Closeable, SolrI
log.trace("MULTI-THREADED search, using CollectorManager int
getDocListNC");
final MultiThreadedSearcher.SearchResult searchResult =
new MultiThreadedSearcher(this)
- .searchCollectorManagers(len, cmd, query, true, needScores,
false);
+ .searchCollectorManagers(len, cmd, query, true, needScores,
false, qr);
scoreModeUsed = searchResult.scoreMode;
MultiThreadedSearcher.TopDocsResult topDocsResult =
searchResult.getTopDocsResult();
totalHits = topDocsResult.totalHits;
topDocs = topDocsResult.topDocs;
-
maxScore = searchResult.getMaxScore(totalHits);
}
@@ -2103,7 +2105,7 @@ public class SolrIndexSearcher extends IndexSearcher
implements Closeable, SolrI
MultiThreadedSearcher.SearchResult searchResult =
new MultiThreadedSearcher(this)
- .searchCollectorManagers(len, cmd, query, true, needScores,
true);
+ .searchCollectorManagers(len, cmd, query, true, needScores,
true, qr);
MultiThreadedSearcher.TopDocsResult topDocsResult =
searchResult.getTopDocsResult();
totalHits = topDocsResult.totalHits;
topDocs = topDocsResult.topDocs;
diff --git
a/solr/core/src/java/org/apache/solr/search/SolrMultiCollectorManager.java
b/solr/core/src/java/org/apache/solr/search/SolrMultiCollectorManager.java
index e2b7e6fe7af..390f409616d 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrMultiCollectorManager.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrMultiCollectorManager.java
@@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
+import java.util.concurrent.atomic.LongAdder;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.CollectorManager;
@@ -37,20 +38,43 @@ public class SolrMultiCollectorManager
implements CollectorManager<SolrMultiCollectorManager.Collectors,
Object[]> {
private final CollectorManager<Collector, ?>[] collectorManagers;
+ private LongAdder runningHits = null;
+ private int maxDocsToCollect;
+ private final List<Collectors> reducableCollectors = new ArrayList<>();
@SafeVarargs
@SuppressWarnings({"varargs", "unchecked"})
public SolrMultiCollectorManager(
+ QueryCommand queryCommand,
final CollectorManager<? extends Collector, ?>... collectorManagers) {
if (collectorManagers.length < 1) {
throw new IllegalArgumentException("There must be at least one
collector");
}
this.collectorManagers = (CollectorManager[]) collectorManagers;
+ if (queryCommand.shouldEarlyTerminateSearch()) {
+ runningHits = new LongAdder();
+ maxDocsToCollect = queryCommand.getMaxHitsAllowed();
+ }
+ }
+
+ // TODO: could Lucene's MultiCollector permit reuse of its logic?
+ public static ScoreMode scoreMode(Collector[] collectors) {
+ ScoreMode scoreMode = null;
+ for (Collector collector : collectors) {
+ if (scoreMode == null) {
+ scoreMode = collector.scoreMode();
+ } else if (scoreMode != collector.scoreMode()) {
+ return ScoreMode.COMPLETE;
+ }
+ }
+ return scoreMode;
}
@Override
public Collectors newCollector() throws IOException {
- return new Collectors();
+ final Collectors collector = new Collectors();
+ reducableCollectors.add(collector);
+ return collector;
}
@Override
@@ -59,24 +83,16 @@ public class SolrMultiCollectorManager
final Object[] results = new Object[collectorManagers.length];
for (int i = 0; i < collectorManagers.length; i++) {
final List<Collector> reducableCollector = new ArrayList<>(size);
- for (Collectors collectors : reducableCollectors)
+ for (Collectors collectors : reducableCollectors) {
reducableCollector.add(collectors.collectors[i]);
+ }
results[i] = collectorManagers[i].reduce(reducableCollector);
}
return results;
}
- // TODO: could Lucene's MultiCollector permit reuse of its logic?
- public static ScoreMode scoreMode(Collector[] collectors) {
- ScoreMode scoreMode = null;
- for (Collector collector : collectors) {
- if (scoreMode == null) {
- scoreMode = collector.scoreMode();
- } else if (scoreMode != collector.scoreMode()) {
- return ScoreMode.COMPLETE;
- }
- }
- return scoreMode;
+ public Object[] reduce() throws IOException {
+ return reduce(reducableCollectors);
}
/** Wraps multiple collectors for processing */
@@ -86,8 +102,13 @@ public class SolrMultiCollectorManager
private Collectors() throws IOException {
collectors = new Collector[collectorManagers.length];
- for (int i = 0; i < collectors.length; i++)
- collectors[i] = collectorManagers[i].newCollector();
+ for (int i = 0; i < collectors.length; i++) {
+ Collector collector = collectorManagers[i].newCollector();
+ if (runningHits != null) {
+ collector = new EarlyTerminatingCollector(collector,
maxDocsToCollect, runningHits);
+ }
+ collectors[i] = collector;
+ }
}
@Override
@@ -115,15 +136,19 @@ public class SolrMultiCollectorManager
throws IOException {
this.skipNonCompetitiveScores = skipNonCompetitiveScores;
leafCollectors = new LeafCollector[collectors.length];
- for (int i = 0; i < collectors.length; i++)
+ for (int i = 0; i < collectors.length; i++) {
leafCollectors[i] = collectors[i].getLeafCollector(context);
+ }
}
@Override
public final void setScorer(final Scorable scorer) throws IOException {
if (skipNonCompetitiveScores) {
- for (LeafCollector leafCollector : leafCollectors)
- if (leafCollector != null) leafCollector.setScorer(scorer);
+ for (LeafCollector leafCollector : leafCollectors) {
+ if (leafCollector != null) {
+ leafCollector.setScorer(scorer);
+ }
+ }
} else {
FilterScorable fScorer =
new FilterScorable(scorer) {
diff --git
a/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java
b/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java
index a099cc17ddc..b65e7d115b2 100644
--- a/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java
+++ b/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java
@@ -32,14 +32,13 @@ import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.SpellingParams;
import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.handler.component.QueryComponent;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.SearchComponent;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
-import org.apache.solr.search.EarlyTerminatingCollectorException;
import org.apache.solr.search.QueryLimits;
-import org.apache.solr.search.SolrIndexSearcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -126,6 +125,9 @@ public class SpellCheckCollator {
params.set(CommonParams.Q, collationQueryStr);
params.remove(CommonParams.START);
params.set(CommonParams.ROWS, "" + docCollectionLimit);
+ if (docCollectionLimit > 0) {
+ params.set(CommonParams.MAX_HITS_ALLOWED, docCollectionLimit);
+ }
// we don't want any stored fields
params.set(CommonParams.FL, ID);
// we'll sort by doc id to ensure no scoring is done.
@@ -155,28 +157,20 @@ public class SpellCheckCollator {
checkResponse.setFilters(ultimateResponse.getFilters());
checkResponse.setQueryString(collationQueryStr);
checkResponse.components = Arrays.asList(queryComponent);
+ checkResponse.rsp.addResponseHeader(new SimpleOrderedMap<>());
try {
queryComponent.prepare(checkResponse);
- if (docCollectionLimit > 0) {
- int f = checkResponse.getFieldFlags();
- checkResponse.setFieldFlags(f |=
SolrIndexSearcher.TERMINATE_EARLY);
- }
queryComponent.process(checkResponse);
- hits = ((Number)
checkResponse.rsp.getToLog().get("hits")).longValue();
- } catch (EarlyTerminatingCollectorException etce) {
- assert (docCollectionLimit > 0);
- assert 0 < etce.getNumberScanned();
- assert 0 < etce.getNumberCollected();
-
- if (etce.getNumberScanned() == maxDocId) {
- hits = etce.getNumberCollected();
- } else {
- hits =
- (long)
- (((float) (maxDocId * etce.getNumberCollected()))
- / (float) etce.getNumberScanned());
- }
+ hits =
+ ((Number)
+ checkResponse
+ .rsp
+ .getResponseHeader()
+ .getOrDefault(
+
SolrQueryResponse.RESPONSE_HEADER_APPROXIMATE_TOTAL_HITS_KEY,
+ checkResponse.rsp.getToLog().get("hits")))
+ .longValue();
} catch (Exception e) {
log.warn(
"Exception trying to re-query to check if a spell check
possibility would return any hits.",
diff --git
a/solr/core/src/test/org/apache/solr/search/SolrIndexSearcherTest.java
b/solr/core/src/test/org/apache/solr/search/SolrIndexSearcherTest.java
index 23877720e7a..7af36024055 100644
--- a/solr/core/src/test/org/apache/solr/search/SolrIndexSearcherTest.java
+++ b/solr/core/src/test/org/apache/solr/search/SolrIndexSearcherTest.java
@@ -433,12 +433,65 @@ public class SolrIndexSearcherTest extends SolrTestCaseJ4
{
});
}
+ public void testMaxHitsAllowed() throws Exception {
+ h.getCore()
+ .withSearcher(
+ searcher -> {
+ // No max hits
+ QueryCommand cmd = createBasicQueryCommand(1000, 10, "field1_s",
"foo");
+ final QueryResult search = searcher.search(cmd);
+ assertEquals(NUM_DOCS, search.getDocList().matches());
+ assertEquals(10, search.getDocList().size());
+ assertFalse(search.isPartialResults());
+ assertNull(search.getTerminatedEarly());
+ assertNull(search.getMaxHitsTerminatedEarly());
+ return null;
+ });
+
+ h.getCore()
+ .withSearcher(
+ searcher -> {
+ // max hits > doc count
+ QueryCommand cmd = createBasicQueryCommand(1000, 10, 1000,
"field1_s", "foo");
+ final QueryResult search = searcher.search(cmd);
+ assertEquals(NUM_DOCS, search.getDocList().matches());
+ assertEquals(10, search.getDocList().size());
+ assertFalse(search.isPartialResults());
+ assertNull(search.getTerminatedEarly());
+ assertNull(search.getMaxHitsTerminatedEarly());
+ return null;
+ });
+
+ h.getCore()
+ .withSearcher(
+ searcher -> {
+ // max hits < doc count
+ QueryCommand cmd = createBasicQueryCommand(1000, 10, 20,
"field1_s", "foo");
+ final QueryResult search = searcher.search(cmd);
+ // in a single threaded search, the maxHitsAllowed will be exact
+ assertEquals(20, search.getDocList().matches());
+ assertEquals(10, search.getDocList().size());
+ assertTrue(search.isPartialResults());
+ assertNull(search.getTerminatedEarly());
+ assertEquals(Boolean.TRUE, search.getMaxHitsTerminatedEarly());
+ return null;
+ });
+ }
+
private QueryCommand createBasicQueryCommand(
int minExactCount, int length, String field, String q) {
+ return createBasicQueryCommand(minExactCount, length, 0, field, q);
+ }
+
+ private QueryCommand createBasicQueryCommand(
+ int minExactCount, int length, int maxHitsAllowed, String field, String
q) {
QueryCommand cmd = new QueryCommand();
cmd.setMinExactCount(minExactCount);
cmd.setLen(length);
cmd.setFlags(SolrIndexSearcher.NO_CHECK_QCACHE |
SolrIndexSearcher.NO_SET_QCACHE);
+ if (maxHitsAllowed > 0) {
+ cmd.setMaxHitsAllowed(maxHitsAllowed);
+ }
cmd.setQuery(new TermQuery(new Term(field, q)));
return cmd;
}
diff --git
a/solr/core/src/test/org/apache/solr/search/TestEarlyTerminatingQueries.java
b/solr/core/src/test/org/apache/solr/search/TestEarlyTerminatingQueries.java
new file mode 100644
index 00000000000..e4f31b4c22c
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/search/TestEarlyTerminatingQueries.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search;
+
+import java.util.Locale;
+import org.apache.lucene.tests.util.TestUtil;
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.util.ThreadCpuTimer;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestEarlyTerminatingQueries extends SolrCloudTestCase {
+
+ private static final String COLLECTION = "test";
+
+ @BeforeClass
+ public static void setupCluster() throws Exception {
+ System.setProperty(ThreadCpuTimer.ENABLE_CPU_TIME, "true");
+ configureCluster(1).addConfig("conf",
configset("cloud-minimal")).configure();
+ SolrClient solrClient = cluster.getSolrClient();
+ CollectionAdminRequest.Create create =
+ CollectionAdminRequest.createCollection(COLLECTION, "conf", 3, 2);
+ create.process(solrClient);
+ waitForState("active", COLLECTION, clusterShape(3, 6));
+ for (int j = 0; j < 100; j++) {
+ solrClient.add(
+ COLLECTION,
+ sdoc(
+ "id",
+ "id-" + j,
+ "val_i",
+ j % 5,
+ "text",
+ TestUtil.randomHtmlishString(random(), 100)));
+ }
+ solrClient.commit(COLLECTION);
+ }
+
+ @Test
+ public void testMaxHitsEarlyTermination() throws Exception {
+ SolrClient solrClient = cluster.getSolrClient();
+ QueryResponse rsp = solrClient.query(COLLECTION, params("q", "*:*"));
+ assertNull("should have full results",
rsp.getHeader().get("partialResults"));
+
+ rsp =
+ solrClient.query(
+ COLLECTION,
+ params(
+ "q",
+ "id:*",
+ "sort",
+ "id asc",
+ "maxHitsAllowed",
+ "5",
+ "rows",
+ "5",
+ "multiThreaded",
+ "false"));
+ assertEquals(
+ "should have partial results for maxHitsAllowed",
+ Boolean.TRUE,
+ rsp.getHeader().get("partialResults"));
+ assertEquals(
+ "should have maxHitsTerminatedEarly response header for
maxHitsAllowed",
+ Boolean.TRUE,
+ rsp.getHeader().get("maxHitsTerminatedEarly"));
+ assertNotNull(
+ "should have approximateTotalHits response header for maxHitsAllowed",
+ rsp.getHeader().get("approximateTotalHits"));
+ assertTrue(
+ String.format(
+ Locale.ROOT,
+ "approximateTotalHits (%s) response header should be greater than
numFound (%d)",
+ rsp.getHeader().get("approximateTotalHits"),
+ rsp.getResults().getNumFound()),
+ ((Number) rsp.getHeader().get("approximateTotalHits")).longValue()
+ > rsp.getResults().getNumFound());
+ }
+}
diff --git
a/solr/solr-ref-guide/modules/query-guide/pages/common-query-parameters.adoc
b/solr/solr-ref-guide/modules/query-guide/pages/common-query-parameters.adoc
index bc8cd641690..eca63e25d36 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/common-query-parameters.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/common-query-parameters.adoc
@@ -399,6 +399,26 @@ For example, setting `cpuAllowed=500` gives a limit of at
most 500 ms of CPU tim
All other considerations regarding partial results listed for the
`timeAllowed` parameter apply here, too.
+
+== maxHitsAllowed Parameter
+
+[%autowidth,frame=none]
+|===
+|Optional |Default: none
+|===
+
+This parameter specifies the max number of hits a searcher will iterate
through.
+Searchers will arbitrarily ignore any number of additional hits beyond this
value.
+In a multi-sharded collection, this parameter will be used independently
**per-shard**.
+When utilizing multi-threaded search, the limit will be best-effort and
collectors may iterate through a few more documents than specified in
`maxHitsAllowed`.
+
+The intention of this feature is to favor speed over perfect relevancy &
recall.
+The trade-off is that if one shard contains many relevant hits and another
contains a few less relevant hits the less relevant hits from the second shard
may get returned instead of the more relevant hits that were clustered in the
first shard.
+
+This parameter works in conjunction with other parameters that could early
terminate a search, ex: `timeAllowed` etc.
+In case the search was early terminated due to it exceeding maxHits a
`maxHitsTerminatedEarly` header in the response will be set along with
`partialResults` to indicate the same.
+The `partialResults` flag could be set in the absence of the `maxHitsAllowed`
parameter due to other limits like `timeAllowed` or `cpuAllowed`.
+
== memAllowed Parameter
This parameter specifies the amount of memory (a float value, in MiB) allowed
for a search thread to allocate
diff --git
a/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java
b/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java
index 3055708d29e..1ba04ea2156 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java
@@ -183,6 +183,9 @@ public interface CommonParams {
*/
String MEM_ALLOWED = "memAllowed";
+ /** The max hits to be collected per shard. */
+ String MAX_HITS_ALLOWED = "maxHitsAllowed";
+
/** Is the query cancellable? */
String IS_QUERY_CANCELLABLE = "canCancel";