mikemccand commented on a change in pull request #854: Shared PQ Based Early Termination for Concurrent Search URL: https://github.com/apache/lucene-solr/pull/854#discussion_r323175031
########## File path: lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java ########## @@ -279,7 +297,130 @@ public void collect(int doc) throws IOException { } - private static final ScoreDoc[] EMPTY_SCOREDOCS = new ScoreDoc[0]; + /* + * Collects hits into a local queue until the requested number of hits are collected + * globally. Post that, a global calibration step is performed + */ + public static class EarlyTerminatingFieldCollector extends TopFieldCollector { + + final Sort sort; + final FieldValueHitQueue<Entry> queue; + final EarlyTerminatingFieldCollectorManager earlyTerminatingFieldCollectorManager; + private final AtomicInteger globalNumberOfHits; + private boolean addedSelfToGlobalQueue; + + //TODO: Refactor this to make an interface only for field collector uses + public EarlyTerminatingFieldCollector(Sort sort, FieldValueHitQueue<Entry> queue, int numHits, int totalHitsThreshold, + EarlyTerminatingFieldCollectorManager collectorManager, AtomicInteger globalNumberOfHits) { + super(queue, numHits, totalHitsThreshold, sort.needsScores()); + this.sort = sort; + this.queue = queue; + this.earlyTerminatingFieldCollectorManager = collectorManager; + this.globalNumberOfHits = globalNumberOfHits; + } + + @Override + public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { + docBase = context.docBase; + + final LeafFieldComparator[] comparators = queue.getComparators(context); + final int[] reverseMul = queue.getReverseMul(); + final Sort indexSort = context.reader().getMetaData().getSort(); + final boolean canEarlyTerminate = canEarlyTerminate(sort, indexSort); + + return new EarlyTerminatingMultiComparatorLeafCollector(comparators, reverseMul, this) { + + boolean collectedAllCompetitiveHits = false; + + @Override + public void setScorer(Scorable scorer) throws IOException { + super.setScorer(scorer); + updateMinCompetitiveScore(scorer); + } + + @Override + public void collect(int doc) throws IOException { + + if (globalNumberOfHits.incrementAndGet() > numHits) { + if (addedSelfToGlobalQueue == false) { + Entry returnedEntry = earlyTerminatingFieldCollectorManager.addCollectorToGlobalQueue(earlyTerminatingFieldCollector, docBase); + + if (returnedEntry != null) { + filterCompetitiveHit(returnedEntry.doc, false, returnedEntry.values); + + if (queue.size() > 0) { + Entry entry = queue.pop(); + + while (entry != null) { + filterCompetitiveHit(entry.doc, false, entry.values); + entry = queue.pop(); + } + } + } + addedSelfToGlobalQueue = true; + } + + filterCompetitiveHit(doc, true, comparator.leafValue(doc)); + } else { + // Startup transient: queue hasn't gathered numHits yet + int slot = totalHits; + ++totalHits; + + comparator.copy(slot, doc); + add(slot, doc, comparator.leafValue(doc)); + } + } + + private void filterCompetitiveHit(int doc, boolean doEarlyTermination, Object value) throws IOException { + if (collectedAllCompetitiveHits || earlyTerminatingFieldCollectorManager.compareAndUpdateBottom(docBase, doc, value) <= 0) { + // since docs are visited in doc Id order, if compare is 0, it means + // this document is largest than anything else in the queue, and + // therefore not competitive. + if (canEarlyTerminate) { + if ((globalNumberOfHits.getAcquire() > totalHitsThreshold) && doEarlyTermination) { + totalHitsRelation = Relation.GREATER_THAN_OR_EQUAL_TO; + throw new CollectionTerminatedException(); + } else { + collectedAllCompetitiveHits = true; + } + } else if (totalHitsRelation == Relation.EQUAL_TO) { + // we just reached totalHitsThreshold, we can start setting the min + // competitive score now + updateMinCompetitiveScore(scorer); + } + return; + } + + updateMinCompetitiveScore(scorer); + } + }; + } + + @Override + public void postProcess() { + if (addedSelfToGlobalQueue == false) { + Entry returnedEntry = earlyTerminatingFieldCollectorManager.addCollectorToGlobalQueue(this, docBase); + + if (returnedEntry != null) { + if (returnedEntry != null) { Review comment: You can remove one of these `if (returnedEntry != null) {`. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org