OK I opened & fixed https://issues.apache.org/jira/browse/LUCENE-1744.
Thanks Shalin! Mike On Wed, Jul 15, 2009 at 7:04 AM, Michael McCandless<luc...@mikemccandless.com> wrote: > OK this is a bug in BooleanScorer2! I'll open it shortly... thanks Shalin! > > Mike > > On Wed, Jul 15, 2009 at 6:32 AM, Michael > McCandless<luc...@mikemccandless.com> wrote: >> I'll look into this... >> >> Mike >> >> On Wed, Jul 15, 2009 at 3:55 AM, Shalin Shekhar >> Mangar<shalinman...@gmail.com> wrote: >>> Hello, >>> >>> Over in Solr land, I'm facing a problem while upgrading the lucene version >>> to trunk. Solr has a QueryElevationComponent which is used to boost certain >>> documents to the top. It pre-processes the query to add a few boolean >>> clauses of its own and uses a FieldComparator for the sorting part. This >>> worked fine before the upgrade. There's a test which fixes the position of >>> two docs and then sorts on score ascending. After the upgrade, the score asc >>> does not seem to take effect and documents are sorted by score descending. >>> >>> I've tried to remove the solr baggage in the following code. Changing the >>> score sort to ascending/descending gives the exact same order of the >>> results. Any ideas on what may be the problem? >>> >>> package org.apache.solr; >>> >>> import org.apache.lucene.analysis.WhitespaceAnalyzer; >>> import org.apache.lucene.document.Document; >>> import org.apache.lucene.document.Field; >>> import org.apache.lucene.index.IndexReader; >>> import org.apache.lucene.index.IndexWriter; >>> import org.apache.lucene.index.Term; >>> import org.apache.lucene.search.*; >>> import org.apache.lucene.store.RAMDirectory; >>> import org.junit.Test; >>> >>> import java.io.IOException; >>> import java.util.HashMap; >>> import java.util.Map; >>> >>> public class TestSort { >>> >>> private final Map<String, Integer> priority = new HashMap<String, >>> Integer>(); >>> >>> �...@test >>> public void testSorting() throws IOException { >>> RAMDirectory directory = new RAMDirectory(); >>> IndexWriter writer = new IndexWriter(directory, new >>> WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); >>> writer.setMaxBufferedDocs(2); >>> writer.setMergeFactor(1000); >>> writer.addDocument(adoc("id", "a", "title", "ipod", "str_s", "a")); >>> writer.addDocument(adoc("id", "b", "title", "ipod ipod", "str_s", "b")); >>> writer.addDocument(adoc("id", "c", "title", "ipod ipod ipod", "str_s", >>> "c")); >>> writer.addDocument(adoc("id", "x", "title", "boosted", "str_s", "x")); >>> writer.addDocument(adoc("id", "y", "title", "boosted boosted", "str_s", >>> "y")); >>> writer.addDocument(adoc("id", "z", "title", "boosted boosted boosted", >>> "str_s", "z")); >>> writer.close(); >>> >>> IndexSearcher searcher = new IndexSearcher(directory, true); >>> BooleanQuery newq = new BooleanQuery(false); >>> TermQuery query = new TermQuery(new Term("title", "ipod")); >>> >>> newq.add(query, BooleanClause.Occur.SHOULD); >>> newq.add(getElevatedQuery("id", "a", "id", "x"), >>> BooleanClause.Occur.SHOULD); >>> >>> Sort sort = new Sort(new SortField[]{ >>> new SortField("id", new ElevationComparatorSource(priority), >>> false), >>> new SortField(null, SortField.SCORE, true) >>> }); >>> TopDocsCollector topCollector = TopFieldCollector.create(sort, 50, >>> false, true, true, true); >>> searcher.search(newq, null, topCollector); >>> >>> TopDocs topDocs = topCollector.topDocs(0, 10); >>> int nDocsReturned = topDocs.scoreDocs.length; >>> >>> int[] ids = new int[nDocsReturned]; >>> float[] scores = new float[nDocsReturned]; >>> Document[] documents = new Document[nDocsReturned]; >>> for (int i = 0; i < nDocsReturned; i++) { >>> ScoreDoc scoreDoc = topDocs.scoreDocs[i]; >>> ids[i] = scoreDoc.doc; >>> scores[i] = scoreDoc.score; >>> documents[i] = searcher.doc(ids[i]); >>> System.out.println("documents[i] = " + documents[i]); >>> System.out.println("scores[i] = " + scores[i]); >>> } >>> >>> searcher.close(); >>> } >>> >>> private Query getElevatedQuery(String... vals) { >>> BooleanQuery q = new BooleanQuery(false); >>> q.setBoost(0); >>> int max = (vals.length / 2) + 5; >>> for (int i = 0; i < vals.length - 1; i += 2) { >>> q.add(new TermQuery(new Term(vals[i], vals[i + 1])), >>> BooleanClause.Occur.SHOULD); >>> priority.put(vals[i + 1], max--); >>> } >>> return q; >>> } >>> >>> private Document adoc(String... vals) { >>> Document doc = new Document(); >>> for (int i = 0; i < vals.length - 2; i += 2) { >>> doc.add(new Field(vals[i], vals[i + 1], Field.Store.YES, >>> Field.Index.ANALYZED)); >>> } >>> return doc; >>> } >>> } >>> >>> class ElevationComparatorSource extends FieldComparatorSource { >>> private final Map<String, Integer> priority; >>> >>> public ElevationComparatorSource(final Map<String, Integer> boosts) { >>> this.priority = boosts; >>> } >>> >>> public FieldComparator newComparator(final String fieldname, final int >>> numHits, int sortPos, boolean reversed) throws IOException { >>> return new FieldComparator() { >>> >>> FieldCache.StringIndex idIndex; >>> private final int[] values = new int[numHits]; >>> int bottomVal; >>> >>> public int compare(int slot1, int slot2) { >>> return values[slot2] - values[slot1]; // values will be small >>> enough that there is no overflow concern >>> } >>> >>> public void setBottom(int slot) { >>> bottomVal = values[slot]; >>> } >>> >>> private int docVal(int doc) throws IOException { >>> String id = idIndex.lookup[idIndex.order[doc]]; >>> Integer prio = priority.get(id); >>> return prio == null ? 0 : prio.intValue(); >>> } >>> >>> public int compareBottom(int doc) throws IOException { >>> return docVal(doc) - bottomVal; >>> } >>> >>> public void copy(int slot, int doc) throws IOException { >>> values[slot] = docVal(doc); >>> } >>> >>> public void setNextReader(IndexReader reader, int docBase, int >>> numSlotsFull) throws IOException { >>> idIndex = FieldCache.DEFAULT.getStringIndex(reader, fieldname); >>> } >>> >>> public int sortType() { >>> return SortField.CUSTOM; >>> } >>> >>> public Comparable value(int slot) { >>> return values[slot]; >>> } >>> }; >>> } >>> } >>> >>> With Lucene trunk: >>> >>> Sort: new SortField(null, SortField.SCORE, true) >>> >>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:a> >>> stored/uncompressed,indexed,tokenized<title:ipod>> >>> scores[i] = 1.4054651 >>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:x> >>> stored/uncompressed,indexed,tokenized<title:boosted>> >>> scores[i] = 0.0 >>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:b> >>> stored/uncompressed,indexed,tokenized<title:ipod ipod>> >>> scores[i] = 0.6211337 >>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:c> >>> stored/uncompressed,indexed,tokenized<title:ipod ipod ipod>> >>> scores[i] = 0.6085842 >>> >>> Sort: new SortField(null, SortField.SCORE, false) >>> >>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:a> >>> stored/uncompressed,indexed,tokenized<title:ipod>> >>> scores[i] = 1.4054651 >>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:x> >>> stored/uncompressed,indexed,tokenized<title:boosted>> >>> scores[i] = 0.0 >>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:b> >>> stored/uncompressed,indexed,tokenized<title:ipod ipod>> >>> scores[i] = 0.6211337 >>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:c> >>> stored/uncompressed,indexed,tokenized<title:ipod ipod ipod>> >>> scores[i] = 0.6085842 >>> >>> With Lucene r779312: >>> >>> Sort: new SortField(null, SortField.SCORE, true) >>> >>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:a> >>> stored/uncompressed,indexed,tokenized<title:ipod>> >>> scores[i] = 1.4054651 >>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:x> >>> stored/uncompressed,indexed,tokenized<title:boosted>> >>> scores[i] = 0.0 >>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:c> >>> stored/uncompressed,indexed,tokenized<title:ipod ipod ipod>> >>> scores[i] = 0.6085842 >>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:b> >>> stored/uncompressed,indexed,tokenized<title:ipod ipod>> >>> scores[i] = 0.6211337 >>> >>> Sort: new SortField(null, SortField.SCORE, false) >>> >>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:a> >>> stored/uncompressed,indexed,tokenized<title:ipod>> >>> scores[i] = 1.4054651 >>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:x> >>> stored/uncompressed,indexed,tokenized<title:boosted>> >>> scores[i] = 0.0 >>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:b> >>> stored/uncompressed,indexed,tokenized<title:ipod ipod>> >>> scores[i] = 0.6211337 >>> documents[i] = Document<stored/uncompressed,indexed,tokenized<id:c> >>> stored/uncompressed,indexed,tokenized<title:ipod ipod ipod>> >>> scores[i] = 0.6085842 >>> >>> -- >>> Regards, >>> Shalin Shekhar Mangar. >>> >> > --------------------------------------------------------------------- To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org For additional commands, e-mail: java-user-h...@lucene.apache.org