jpountz commented on a change in pull request #857: LUCENE-8968: Improve performance of WITHIN and DISJOINT queries for Shape queries URL: https://github.com/apache/lucene-solr/pull/857#discussion_r321278855
########## File path: lucene/sandbox/src/java/org/apache/lucene/document/ShapeQuery.java ########## @@ -378,44 +249,328 @@ protected Scorer getIntersectsScorer(ShapeQuery query, LeafReader reader, Weight final DocIdSetIterator iterator = new BitSetIterator(result, cost[0]); return new ConstantScoreScorer(weight, boost, scoreMode, iterator); } - + DocIdSetBuilder docIdSetBuilder = new DocIdSetBuilder(reader.maxDoc(), values, query.getField()); + IntersectVisitor visitor = getIntersectVisitor(query, docIdSetBuilder); values.intersect(visitor); DocIdSetIterator iterator = docIdSetBuilder.build().iterator(); return new ConstantScoreScorer(weight, boost, scoreMode, iterator); } - /** returns a Scorer for all other (non INTERSECT) queries */ - protected Scorer getScorer(ShapeQuery query, Weight weight, - FixedBitSet intersect, FixedBitSet disjoint, final float boost, ScoreMode scoreMode) throws IOException { - values.intersect(visitor); - if (disjointVisitor != null) { - values.intersect(disjointVisitor); - } - DocIdSetIterator iterator; - if (query.queryRelation == ShapeField.QueryRelation.DISJOINT) { - disjoint.andNot(intersect); - iterator = new BitSetIterator(disjoint, cost()); - } else if (query.queryRelation == ShapeField.QueryRelation.WITHIN) { - intersect.andNot(disjoint); - iterator = new BitSetIterator(intersect, cost()); + private Scorer getDisjointScorer(LeafReader reader, Weight weight, final float boost, ScoreMode scoreMode) throws IOException { + if (values.getDocCount() == reader.maxDoc()) { + // We need to visit all docs in the normal visitor so if + // we have all documents in this segment then use the + // inverse visitor + final FixedBitSet result = new FixedBitSet(reader.maxDoc()); + result.set(0, reader.maxDoc()); + values.intersect(getInverseDisjointVisitor(query, result)); + final DocIdSetIterator iterator = new BitSetIterator(result, cost()); + return new ConstantScoreScorer(weight, boost, scoreMode, iterator); } else { - iterator = new BitSetIterator(intersect, cost()); + FixedBitSet intersects = new FixedBitSet(reader.maxDoc()); + FixedBitSet disjoint = new FixedBitSet(reader.maxDoc()); + IntersectVisitor visitor = getDisjointVisitor(query, intersects, disjoint); + values.intersect(visitor); + disjoint.andNot(intersects); + DocIdSetIterator iterator = new BitSetIterator(disjoint, cost()); + return new ConstantScoreScorer(weight, boost, scoreMode, iterator); + } + } + + private Scorer getWithinScorer(LeafReader reader, Weight weight, final float boost, ScoreMode scoreMode) throws IOException { + if (values.getDocCount() == reader.maxDoc()) { + // We need to visit all docs in the normal visitor so if + // we have all documents in this segment then use the + // inverse visitor + final FixedBitSet result = new FixedBitSet(reader.maxDoc()); + result.set(0, reader.maxDoc()); + values.intersect(getInverseWithinVisitor(query, result)); + final DocIdSetIterator iterator = new BitSetIterator(result, cost()); + return new ConstantScoreScorer(weight, boost, scoreMode, iterator); + } else { + FixedBitSet within = new FixedBitSet(reader.maxDoc()); + FixedBitSet notWithin = new FixedBitSet(reader.maxDoc()); + IntersectVisitor visitor = getWithinVisitor(query, within, notWithin); + values.intersect(visitor); + within.andNot(notWithin); + DocIdSetIterator iterator = new BitSetIterator(within, cost()); + return new ConstantScoreScorer(weight, boost, scoreMode, iterator); } - return new ConstantScoreScorer(weight, boost, scoreMode, iterator); } @Override public long cost() { if (cost == -1) { // Computing the cost may be expensive, so only do it if necessary - if (queryRelation == ShapeField.QueryRelation.DISJOINT) { - cost = values.estimatePointCount(disjointVisitor); - } else { - cost = values.estimatePointCount(visitor); - } + cost = values.estimatePointCount(getEstimateVisitor(query, query.getQueryRelation())); assert cost >= 0; } return cost; } } + + /** create a visitor for calculating point count estimates for the provided relation */ + private static IntersectVisitor getEstimateVisitor(final ShapeQuery query, final QueryRelation relation) { + return new IntersectVisitor() { + @Override + public void visit(int docID) { + throw new UnsupportedOperationException(); + } + + @Override + public void visit(int docID, byte[] t) { + throw new UnsupportedOperationException(); + } + + @Override + public Relation compare(byte[] minTriangle, byte[] maxTriangle) { + return query.relateRangeToQuery(minTriangle, maxTriangle, relation); + } + }; + } + + /** create a visitor that adds documents that match the query using a sparse bitset. (Used by INTERSECT) */ + private static IntersectVisitor getIntersectVisitor(final ShapeQuery query, final DocIdSetBuilder result) { + return new IntersectVisitor() { + final int[] scratchTriangle = new int[6]; + DocIdSetBuilder.BulkAdder adder; + + @Override + public void grow(int count) { + adder = result.grow(count); + } + + @Override + public void visit(int docID) { + adder.add(docID); + } + + @Override + public void visit(int docID, byte[] t) { + if (query.queryMatches(t, scratchTriangle, QueryRelation.INTERSECTS)) { + visit(docID); + } + } + + @Override + public void visit(DocIdSetIterator iterator, byte[] t) throws IOException { + if (query.queryMatches(t, scratchTriangle, QueryRelation.INTERSECTS)) { + int docID; + while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + visit(docID); + } + } + } + + @Override + public Relation compare(byte[] minTriangle, byte[] maxTriangle) { + return query.relateRangeToQuery(minTriangle, maxTriangle, ShapeField.QueryRelation.INTERSECTS); + } + }; + } + + /** create a visitor that clears documents that do NOT match the polygon query; used with INTERSECTS */ + private static IntersectVisitor getInverseIntersectVisitor(final ShapeQuery query, final FixedBitSet result, final int[] cost) { + return new IntersectVisitor() { + int[] scratchTriangle = new int[6]; Review comment: make it final? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org