benwtrent commented on issue #15707:
URL: https://github.com/apache/lucene/issues/15707#issuecomment-3899517617
OK, I have a test that repeats the failure, I am not 100% sure if it fully
represents our use-case.
Our test uses a compound field as the index sort. This sorting by many
various fields might be construed as not really sorting effectively by a single
field depending on its cardinality and distribution. Basically, the index could
be considered not sorted by the field at all. This is hypothetical. I need
@romseygeek to help me here ;).
Here is a test that repeatably fails which the competitive iterator, but
passes without it:
```
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.BytesRef;
public class TestTermOrdValComparatorSkipperNightly extends LuceneTestCase {
private static final String SORT_FIELD_SKIP = "meta.file.skip";
private static final String SORT_FIELD_NO_SKIP = "meta.file.noskip";
private static final String QUERY_FIELD = "process.name";
private static final String QUERY_VALUE = "kernel";
private static final int NUM_DOCS = 200_000;
private static final int NUM_HITS = 10;
private static final int TOTAL_HITS_THRESHOLD = 1;
private static final int WARMUP_ITERS = 2;
private static final int MEASURE_ITERS = 5;
private static final double MAX_SLOWDOWN_RATIO = 1.2d;
@Nightly
public void testSkipperRegressionOnUncorrelatedDocOrder() throws Exception
{
try (Directory dir = newDirectory()) {
IndexWriterConfig config = newIndexWriterConfig();
BytesRef a = new BytesRef("a");
BytesRef z = new BytesRef("z");
try (IndexWriter writer = new IndexWriter(dir, config)) {
for (int i = 0; i < NUM_DOCS; i++) {
Document doc = new Document();
doc.add(new StringField(QUERY_FIELD, QUERY_VALUE, Field.Store.NO));
BytesRef value = (i & 1) == 0 ? a : z;
doc.add(new SortedDocValuesField(SORT_FIELD_NO_SKIP, value));
doc.add(SortedDocValuesField.indexedField(SORT_FIELD_SKIP, value));
writer.addDocument(doc);
}
writer.forceMerge(1);
}
try (IndexReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = new IndexSearcher(reader);
Query query = new TermQuery(new Term(QUERY_FIELD, QUERY_VALUE));
Sort sortWithSkipping = buildSort(SORT_FIELD_SKIP);
Sort sortWithoutSkipping = buildSort(SORT_FIELD_NO_SKIP);
warmup(searcher, query, sortWithSkipping);
warmup(searcher, query, sortWithoutSkipping);
long withSkipping = measure(searcher, query, sortWithSkipping);
long withoutSkipping = measure(searcher, query, sortWithoutSkipping);
double ratio = (double) withSkipping / (double) withoutSkipping;
assertTrue(
"skipping-enabled sort should not be much slower; ratio=" +
ratio,
ratio <= MAX_SLOWDOWN_RATIO);
}
}
}
private static Sort buildSort(String field) {
SortField sortField = new SortField(field, SortField.Type.STRING);
return new Sort(sortField, SortField.FIELD_SCORE);
}
private static void warmup(IndexSearcher searcher, Query query, Sort sort)
throws IOException {
for (int i = 0; i < WARMUP_ITERS; i++) {
TopFieldCollectorManager manager =
new TopFieldCollectorManager(sort, NUM_HITS, null,
TOTAL_HITS_THRESHOLD);
searcher.search(query, manager);
}
}
private static long measure(IndexSearcher searcher, Query query, Sort
sort) throws IOException {
long[] times = new long[MEASURE_ITERS];
for (int i = 0; i < MEASURE_ITERS; i++) {
TopFieldCollectorManager manager =
new TopFieldCollectorManager(sort, NUM_HITS, null,
TOTAL_HITS_THRESHOLD);
long start = System.nanoTime();
searcher.search(query, manager);
times[i] = System.nanoTime() - start;
}
Arrays.sort(times);
return times[times.length / 2];
}
}
```
Run with:
```
./gradlew :lucene:core:test --tests
"org.apache.lucene.search.TestTermOrdValComparatorSkipperNightly"
-Ptests.nightly=true
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]