jpountz commented on code in PR #13221: URL: https://github.com/apache/lucene/pull/13221#discussion_r1604500178
########## lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java: ########## @@ -405,5 +395,278 @@ public int advance(int target) throws IOException { protected abstract long bottomAsComparableLong(); protected abstract long topAsComparableLong(); + + class DisjunctionBuildVisitor extends RangeVisitor { + + final Deque<DisiAndMostCompetitiveValue> disis = new ArrayDeque<>(); + // most competitive entry stored last. + final Consumer<DisiAndMostCompetitiveValue> adder = + reverse == false ? disis::addFirst : disis::addLast; + + final int minBlockLength = minBlockLength(); + + final LSBRadixSorter sorter = new LSBRadixSorter(); + int[] docs = IntsRef.EMPTY_INTS; + int index = 0; + int blockMaxDoc = -1; + boolean docsInOrder = true; + long blockMinValue = Long.MAX_VALUE; + long blockMaxValue = Long.MIN_VALUE; + + private DisjunctionBuildVisitor() { + super(minValueAsLong, maxValueAsLong, maxDocVisited); + } + + @Override + public void grow(int count) { + docs = ArrayUtil.grow(docs, index + count + 1); + } + + @Override + protected void consumeDoc(int doc) { + docs[index++] = doc; + if (doc >= blockMaxDoc) { + blockMaxDoc = doc; + } else { + docsInOrder = false; + } + } + + void intersectLeaves(PointValues.PointTree pointTree) throws IOException { + PointValues.Relation r = + compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); + switch (r) { + case CELL_INSIDE_QUERY, CELL_CROSSES_QUERY -> { + if (pointTree.moveToChild()) { + do { + intersectLeaves(pointTree); + } while (pointTree.moveToSibling()); + pointTree.moveToParent(); + } else { + if (r == PointValues.Relation.CELL_CROSSES_QUERY) { + pointTree.visitDocValues(this); + } else { + pointTree.visitDocIDs(this); + } + updateMinMax( + sortableBytesToLong(pointTree.getMinPackedValue()), + sortableBytesToLong(pointTree.getMaxPackedValue())); + } + } + case CELL_OUTSIDE_QUERY -> {} + default -> throw new IllegalStateException("unreachable code"); + } + } + + void updateMinMax(long leafMinValue, long leafMaxValue) throws IOException { + this.blockMinValue = Math.min(blockMinValue, leafMinValue); + this.blockMaxValue = Math.max(blockMaxValue, leafMaxValue); + if (index >= minBlockLength) { + update(); + this.blockMinValue = Long.MAX_VALUE; + this.blockMaxValue = Long.MIN_VALUE; + } + } + + void update() throws IOException { + if (blockMinValue > blockMaxValue) { + return; + } + long mostCompetitiveValue = + reverse == false + ? Math.max(blockMinValue, minValueAsLong) + : Math.min(blockMaxValue, maxValueAsLong); + + if (docsInOrder == false) { + sorter.sort(PackedInts.bitsRequired(blockMaxDoc), docs, index); + } + docs[index] = DocIdSetIterator.NO_MORE_DOCS; + DocIdSetIterator iter = new IntArrayDocIdSet(docs, index).iterator(); + adder.accept(new DisiAndMostCompetitiveValue(iter, mostCompetitiveValue)); + docs = IntsRef.EMPTY_INTS; + index = 0; + blockMaxDoc = -1; + docsInOrder = true; + } + + DocIdSetIterator generateCompetitiveIterator() throws IOException { + intersectLeaves(pointValues.getPointTree()); + update(); + + if (disis.isEmpty()) { + return DocIdSetIterator.empty(); + } + assert assertMostCompetitiveValuesSorted(disis); + + PriorityQueue<DisiAndMostCompetitiveValue> disjunction = + new PriorityQueue<>(disis.size()) { + @Override + protected boolean lessThan( + DisiAndMostCompetitiveValue a, DisiAndMostCompetitiveValue b) { + return a.disi.docID() < b.disi.docID(); + } + }; + disjunction.addAll(disis); + + return new CompetitiveIterator(maxDoc, disis, disjunction); + } + + /** + * Used for assert. When reverse is false, smaller values are more competitive, so + * mostCompetitiveValues should be in desc order. + */ + private boolean assertMostCompetitiveValuesSorted(Deque<DisiAndMostCompetitiveValue> deque) { + long lastValue = reverse == false ? Long.MAX_VALUE : Long.MIN_VALUE; + for (DisiAndMostCompetitiveValue value : deque) { + if (reverse == false) { + assert value.mostCompetitiveValue <= lastValue + : deque.stream().map(d -> d.mostCompetitiveValue).toList().toString(); + } else { + assert value.mostCompetitiveValue >= lastValue + : deque.stream().map(d -> d.mostCompetitiveValue).toList().toString(); + } + lastValue = value.mostCompetitiveValue; + } + return true; + } + + private int minBlockLength() { + // bottom value can be much more competitive than thresholdAsLong, recompute the cost. + int cost = + Math.toIntExact( Review Comment: This sounds too fragile to me, I'd rather check very specific conditions. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org