[GitHub] [lucene-solr] mayya-sharipova commented on a change in pull request #2063: LUCENE-9599 Make comparator aware of index sorting

GitBox Fri, 06 Nov 2020 06:56:46 -0800


mayya-sharipova commented on a change in pull request #2063:
URL: https://github.com/apache/lucene-solr/pull/2063#discussion_r518803324




##########
File path: lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
##########
@@ -387,125 +346,225 @@ protected SortedDocValues 
getSortedDocValues(LeafReaderContext context, String f
     
     @Override
     public LeafFieldComparator getLeafComparator(LeafReaderContext context) 
throws IOException {
-      termsIndex = getSortedDocValues(context, field);
-      currentReaderGen++;
+      return new TermOrdValLeafComparator(context);
+    }
 
-      if (topValue != null) {
-        // Recompute topOrd/SameReader
-        int ord = termsIndex.lookupTerm(topValue);
-        if (ord >= 0) {
-          topSameReader = true;
-          topOrd = ord;
-        } else {
-          topSameReader = false;
-          topOrd = -ord-2;
+    @Override
+    public BytesRef value(int slot) {
+      return values[slot];
+    }
+
+    @Override
+    public int compareValues(BytesRef val1, BytesRef val2) {
+      if (val1 == null) {
+        if (val2 == null) {
+          return 0;
         }
-      } else {
-        topOrd = missingOrd;
-        topSameReader = true;
+        return missingSortCmp;
+      } else if (val2 == null) {
+        return -missingSortCmp;
       }
-      //System.out.println("  getLeafComparator topOrd=" + topOrd + " 
topSameReader=" + topSameReader);
+      return val1.compareTo(val2);
+    }
 
-      if (bottomSlot != -1) {
-        // Recompute bottomOrd/SameReader
-        setBottom(bottomSlot);
+    /**
+     * Leaf comparator for {@link TermOrdValComparator} that provides skipping 
functionality when index is sorted
+     */
+    public class TermOrdValLeafComparator implements LeafFieldComparator {
+      private final SortedDocValues termsIndex;
+      private boolean indexSort = false; // true if a query sort is a part of 
the index sort
+      private DocIdSetIterator competitiveIterator;
+      private boolean collectedAllCompetitiveHits = false;
+      private boolean iteratorUpdated = false;
+
+      public TermOrdValLeafComparator(LeafReaderContext context) throws 
IOException {
+        termsIndex = getSortedDocValues(context, field);
+        currentReaderGen++;
+        if (topValue != null) {
+          // Recompute topOrd/SameReader
+          int ord = termsIndex.lookupTerm(topValue);
+          if (ord >= 0) {
+            topSameReader = true;
+            topOrd = ord;
+          } else {
+            topSameReader = false;
+            topOrd = -ord-2;
+          }
+        } else {
+          topOrd = missingOrd;
+          topSameReader = true;
+        }
+        if (bottomSlot != -1) {
+          // Recompute bottomOrd/SameReader
+          setBottom(bottomSlot);
+        }
+        this.competitiveIterator = 
DocIdSetIterator.all(context.reader().maxDoc());
       }
 
-      return this;
-    }
-    
-    @Override
-    public void setBottom(final int bottom) throws IOException {
-      bottomSlot = bottom;
+      protected SortedDocValues getSortedDocValues(LeafReaderContext context, 
String field) throws IOException {
+        return DocValues.getSorted(context.reader(), field);
+      }
 
-      bottomValue = values[bottomSlot];
-      if (currentReaderGen == readerGen[bottomSlot]) {
-        bottomOrd = ords[bottomSlot];
-        bottomSameReader = true;
-      } else {
-        if (bottomValue == null) {
-          // missingOrd is null for all segments
-          assert ords[bottomSlot] == missingOrd;
-          bottomOrd = missingOrd;
+      @Override
+      public void setBottom(final int slot) throws IOException {
+        bottomSlot = slot;
+        bottomValue = values[bottomSlot];
+        if (currentReaderGen == readerGen[bottomSlot]) {
+          bottomOrd = ords[bottomSlot];
           bottomSameReader = true;
-          readerGen[bottomSlot] = currentReaderGen;
         } else {
-          final int ord = termsIndex.lookupTerm(bottomValue);
-          if (ord < 0) {
-            bottomOrd = -ord - 2;
-            bottomSameReader = false;
-          } else {
-            bottomOrd = ord;
-            // exact value match
+          if (bottomValue == null) {
+            // missingOrd is null for all segments
+            assert ords[bottomSlot] == missingOrd;
+            bottomOrd = missingOrd;
             bottomSameReader = true;
-            readerGen[bottomSlot] = currentReaderGen;            
-            ords[bottomSlot] = bottomOrd;
+            readerGen[bottomSlot] = currentReaderGen;
+          } else {
+            final int ord = termsIndex.lookupTerm(bottomValue);
+            if (ord < 0) {
+              bottomOrd = -ord - 2;
+              bottomSameReader = false;
+            } else {
+              bottomOrd = ord;
+              // exact value match
+              bottomSameReader = true;
+              readerGen[bottomSlot] = currentReaderGen;
+              ords[bottomSlot] = bottomOrd;
+            }
           }
         }
       }
-    }
 
-    @Override
-    public void setTopValue(BytesRef value) {
-      // null is fine: it means the last doc of the prior
-      // search was missing this value
-      topValue = value;
-      //System.out.println("setTopValue " + topValue);
-    }
+      @Override
+      public int compareBottom(int doc) throws IOException {
+        assert bottomSlot != -1;
+        int docOrd = getOrdForDoc(doc);
+        if (docOrd == -1) {
+          docOrd = missingOrd;
+        }
+        int result;
+        if (bottomSameReader) {
+          // ord is precisely comparable, even in the equal case
+          result = bottomOrd - docOrd;
+        } else if (bottomOrd >= docOrd) {
+          // the equals case always means bottom is > doc
+          // (because we set bottomOrd to the lower bound in setBottom):
+          result = 1;
+        } else {
+          result = -1;
+        }
+        // for the index sort case, if we encounter a first doc that is 
non-competitive,
+        // and the hits threshold is reached, we can update the iterator to 
skip the rest of docs
+        if (indexSort && (reverse ? result >= 0 : result <= 0)) {
+          collectedAllCompetitiveHits = true;
+          if (hitsThresholdReached && iteratorUpdated == false) {
+            competitiveIterator = DocIdSetIterator.empty();
+            iteratorUpdated = true;

Review comment:
       `iteratorUpdated` variable removed in c52d9a7.    
   `iteratorUpdated`, similarly to early termination in `TopFieldCollector` is 
not necessary and just extra precaution, for cases if there are some BulkScores 
that don't use iterators from collectors. 
   
   > Could we instead create a class constant EMPTY iterator
   
   We can't use a static iterator as we advance it once, it because invalid for 
all other instances. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

[GitHub] [lucene-solr] mayya-sharipova commented on a change in pull request #2063: LUCENE-9599 Make comparator aware of index sorting

Reply via email to