iprithv commented on code in PR #15991: URL: https://github.com/apache/lucene/pull/15991#discussion_r3226675904
########## lucene/core/src/test/org/apache/lucene/search/BasePrimarySortFilterTestCase.java: ########## @@ -0,0 +1,413 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.analysis.MockAnalyzer; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.tests.util.TestUtil; +import org.junit.Assert; + +/** + * Base test case for {@link PrimarySortAlignable} filter queries used with {@link + * FilteredOnPrimaryIndexSortFieldQuery}. Subclasses supply the index schema, filter query, and + * expected hit counts; common tests verify rewrite, hit count, multi-segment correctness vs a + * simpler boolean shape, two FILTER clauses, MUST_NOT interaction, and (when {@link + * #densePrimarySortBulkChecksOrNull()} is non-null) bulk-scorer narrowing, cost, and recorded + * scoring windows. + * + * <p>Concrete implementations exist for each supported filter type (numeric range, point range, + * sorted-set range, term query, etc.). Shared helpers live in {@link RecordingMatchAllQuery}. + */ +public abstract class BasePrimarySortFilterTestCase extends LuceneTestCase { + + /** Number of documents indexed; randomized per test run in {@link #setUp()}. */ + protected int numDocs; + + @Override + public void setUp() throws Exception { + super.setUp(); + numDocs = TestUtil.nextInt(random(), 50, 200); + } + + /** + * When the index maps doc {@code n} to the {@code n}-th sort key in order (so the filter's dense + * doc-id interval is known), return the interval and expected stats for bulk-scorer checks. + * + * <p>{@link #testBulkScorerNarrowingCostAndRecording} assumes a force-merged single segment. + */ + protected record DensePrimarySortBulkChecks( + int denseMinDocInclusive, int denseMaxDocExclusive, int expectedMatchingDocs) {} + + /** Returns {@code null} if bulk-scorer slice tests do not apply to this fixture. */ + protected DensePrimarySortBulkChecks densePrimarySortBulkChecksOrNull() { + return null; + } + + /** Returns the index sort for the primary sort field under test. */ + protected abstract Sort buildIndexSort(); + + /** Adds a single document at logical position {@code i} (0-based) to the writer. */ + protected abstract void addDocument(IndexWriter writer, int i) throws IOException; + + /** + * Returns the FILTER query that selects a contiguous sub-range of the sorted index. The query + * must implement {@link PrimarySortAlignable}. + */ + protected abstract Query buildFilterQuery(); + + /** Expected number of documents matched by {@link #buildFilterQuery()}. */ + protected abstract int expectedFilteredHitCount(); + + /** + * If the filter type supports a second, wider range for the two-filter intersection test, return + * it here. Return {@code null} to skip {@link #testTwoOptimizableFilters()}. + */ + protected Query buildWiderFilterQuery() { + return null; + } + + // ---- index helpers ---- + + private IndexWriterConfig buildIndexWriterConfig() { + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + iwc.setIndexSort(buildIndexSort()); + iwc.setMergePolicy(newMergePolicy()); + return iwc; + } + + // ---- tests ---- + + /** The filter query must implement {@link PrimarySortAlignable}. */ + public void testFilterImplementsPrimarySortAlignable() { + Query filter = buildFilterQuery(); + assertTrue( + filter.getClass().getSimpleName() + " must implement PrimarySortAlignable", + filter instanceof PrimarySortAlignable); + } + + /** + * BooleanQuery with the filter as FILTER rewrites to {@link FilteredOnPrimaryIndexSortFieldQuery} + * (possibly wrapped, e.g. in {@link ConstantScoreQuery}). + * + * <p>Uses {@link RecordingMatchAllQuery} as the MUST clause: {@link MatchAllDocsQuery} + FILTER + * is rewritten to {@link ConstantScoreQuery} earlier in {@link BooleanQuery#rewrite}, which + * prevents the primary-sort optimization from applying at the query shape we need to assert here. + */ + public void testRewriteProducesFilteredQuery() throws IOException { + try (Directory dir = newDirectory()) { + buildAndPopulateIndex(dir, true); + try (DirectoryReader reader = DirectoryReader.open(dir)) { + IndexSearcher searcher = newSearcher(reader); + Query query = buildBooleanQuery(new RecordingMatchAllQuery(), buildFilterQuery()); + Query rewritten = searcher.rewrite(query); + assertNotNull( + "expected FilteredOnPrimaryIndexSortFieldQuery in rewrite chain", + unwrapFilteredOnPrimaryIndexSortFieldQuery(rewritten)); + } + } + } + + /** Filtered search returns exactly the expected number of hits. */ + public void testFilteredHitCount() throws IOException { Review Comment: sure, done. Thanks! ########## lucene/core/src/java/org/apache/lucene/search/IndexSortSortedNumericDocValuesRangeQuery.java: ########## @@ -544,6 +545,44 @@ private IteratorAndCount getDocIdSetIteratorOrNull(LeafReaderContext context) th return null; } + DocIdRange getDenseDocIdRangeForPrimarySort(LeafReaderContext context) throws IOException { + IteratorAndCount itAndCount = getDocIdSetIteratorOrNull(context); + if (itAndCount == null || itAndCount.count() < 0) { + return null; + } + return new DocIdRange(itAndCount.minDoc(), itAndCount.maxDoc()); + } + + @Override + public DocIdRange denseDocIdRangeOrNull(LeafReaderContext context) throws IOException { + return getDenseDocIdRangeForPrimarySort(context); + } + + /** + * Dense doc-id span for the same primary-sort fast path as this query, for sorted-numeric bounds + * {@code field}, {@code lowerValue}, and {@code upperValue} (same semantics as this class's + * constructor). Used when the filter query is not an {@code + * IndexSortSortedNumericDocValuesRangeQuery} instance but matches those bounds (e.g. {@code + * SortedNumericDocValuesRangeQuery} or a 1D {@link PointRangeQuery}). + * + * <p>This allocates a short-lived query instance internally. It is called once per leaf at weight + * creation time, not on a hot scoring path. + * + * @param context the leaf reader context + * @param field the sorted-numeric field name (must be the primary index sort field) + * @param lowerValue inclusive lower bound + * @param upperValue inclusive upper bound + * @return the dense doc-id range, or {@code null} if a safe range cannot be determined + * @lucene.internal + */ + public static DocIdRange denseDocIdRangeOrNullForSortedNumericBounds( Review Comment: sure, moved it. thanks! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
