LuXugang commented on code in PR #12405:
URL: https://github.com/apache/lucene/pull/12405#discussion_r1275868914
##########
lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java:
##########
@@ -329,87 +324,65 @@ private void updateSkipInterval(boolean success) {
* value is 5, we will use a range on [MIN_VALUE, 4].
*/
private void encodeBottom() {
- if (pruning == Pruning.GREATER_THAN_OR_EQUAL_TO && isBottomMinOrMax() ==
false) {
- byte[] bottom = new byte[bytesCount];
- encodeBottom(bottom);
- if (reverse == false) {
- NumericUtils.subtract(bytesCount, 0, bottom, deltaOne,
maxValueAsBytes);
- } else {
- NumericUtils.add(bytesCount, 0, bottom, deltaOne, minValueAsBytes);
+ if (reverse == false) {
+ encodeBottom(maxValueAsBytes);
+ if (pruning == Pruning.GREATER_THAN_OR_EQUAL_TO) {
+ NumericUtils.nextDown(maxValueAsBytes);
}
} else {
- encodeBottom(reverse == false ? maxValueAsBytes : minValueAsBytes);
+ encodeBottom(minValueAsBytes);
+ if (pruning == Pruning.GREATER_THAN_OR_EQUAL_TO) {
+ NumericUtils.nextUp(minValueAsBytes);
+ }
}
}
- private class CompetitiveIterator extends DocIdSetIterator {
-
- private final LeafReaderContext context;
- private final int maxDoc;
- private final String field;
- private int doc = -1;
- private DocIdSetIterator docsWithDocValue;
- private DocIdSetIterator docsWithPoint;
- private final boolean skipWithDocValues;
-
- CompetitiveIterator(LeafReaderContext context, String field, boolean
skipWithDocValues) {
- this.context = context;
- this.maxDoc = context.reader().maxDoc();
- this.field = field;
- this.skipWithDocValues = skipWithDocValues;
- }
-
- @Override
- public int docID() {
- return doc;
- }
-
- @Override
- public int nextDoc() throws IOException {
- return advance(docID() + 1);
- }
-
- @Override
- public int advance(int target) throws IOException {
- if (target >= maxDoc) {
- return doc = NO_MORE_DOCS;
- } else if (docsWithPoint != null) {
- assert hitsThresholdReached == true;
- return doc = docsWithPoint.advance(target);
- } else if (docsWithDocValue != null) {
- assert hitsThresholdReached == true;
- return doc = docsWithDocValue.advance(target);
- } else {
- return doc = target;
+ /**
+ * If {@link NumericComparator#pruning} equals {@link
Pruning#GREATER_THAN_OR_EQUAL_TO}, we
+ * could better tune the {@link
NumericLeafComparator#maxValueAsBytes}/{@link
+ * NumericLeafComparator#minValueAsBytes}. For instance, if the sort is
ascending and top value
+ * is 3, we will use a range on [4, MAX_VALUE].
+ */
+ private void encodeTop() {
+ if (reverse == false) {
+ encodeTop(minValueAsBytes);
+ if (pruning == Pruning.GREATER_THAN_OR_EQUAL_TO) {
Review Comment:
top values could still competitive when queue is not full. instead:
`if (pruning == Pruning.GREATER_THAN_OR_EQUAL_TO && queueFull)`
see this case:
```java
public void testSortOptimizationEqualValues1() throws IOException{
final Directory dir = newDirectory();
IndexWriterConfig config = new
IndexWriterConfig().setCodec(TestUtil.getDefaultCodec());
final IndexWriter writer = new IndexWriter(dir, config);
final int numDocs = atLeast(TEST_NIGHTLY ? 50_000 : 10_000);
for (int i = 1; i <= numDocs; ++i) {
final Document doc = new Document();
doc.add(new NumericDocValuesField("my_field1", 100)); // all docs have
the same value of my_field1
doc.add(new IntPoint("my_field1", 100));
writer.addDocument(doc);
}
writer.flush();
final IndexReader reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = newSearcher(reader, random().nextBoolean(),
random().nextBoolean(), false);
final int numHits = 3;
final int totalHitsThreshold = 3;
final int afterValue = 100;
final int afterDocID = 10 + random().nextInt(1000);
final SortField sortField = new SortField("my_field1",
SortField.Type.INT);
final Sort sort = new Sort(sortField);
FieldDoc after = new FieldDoc(afterDocID, Float.NaN, new Integer[]
{afterValue});
CollectorManager<TopFieldCollector, TopFieldDocs> manager =
TopFieldCollector.createSharedManager(sort, numHits, after, totalHitsThreshold);
TopDocs topDocs1 = searcher.search(new MatchAllDocsQuery(), manager);
assertEquals(topDocs1.scoreDocs.length, numHits);
// without optimization
sortField.setOptimizeSortWithPoints(false);
manager = TopFieldCollector.createSharedManager(sort, numHits, after,
totalHitsThreshold);
TopDocs topDocs2 = searcher.search(new MatchAllDocsQuery(), manager);
assertEquals(topDocs1.scoreDocs.length, topDocs2.scoreDocs.length);
reader.close();
dir.close();
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]