TestCustomSearcherSort.java

Bernhard Messer Tue, 15 Nov 2005 11:02:13 -0800

Yonik,

TestCustomSearcherSort.java you added a few days ago shows that theauthor is Martin Seitz from T-Systems and doesn't has the apache licenseagreement in it's header. Is it ok to have this test in SVN ?


Bernhard


[EMAIL PROTECTED] wrote:

Author: yonik
Date: Thu Nov 10 19:13:10 2005
New Revision: 332431

URL: http://svn.apache.org/viewcvs?rev=332431&view=rev
Log:
break sorting ties by index order: LUCENE-456

Added:
   
lucene/java/trunk/src/test/org/apache/lucene/search/TestCustomSearcherSort.java
Modified:
   lucene/java/trunk/CHANGES.txt
   
lucene/java/trunk/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java

Modified: lucene/java/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewcvs/lucene/java/trunk/CHANGES.txt?rev=332431&r1=332430&r2=332431&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Thu Nov 10 19:13:10 2005
@@ -245,6 +245,10 @@
    change the sort order when sorting by string for documents without
    a value for the sort field.
    (Luc Vanlerberghe via Yonik, LUCENE-453)
+
+16. Fixed a sorting problem with MultiSearchers that can lead to
+    missing or duplicate docs due to equal docs sorting in an arbitrary order.
+    (Yonik Seeley, LUCENE-456)
        
Optimizations

Modified: 
lucene/java/trunk/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java
URL: 
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java?rev=332431&r1=332430&r2=332431&view=diff
==============================================================================
--- 
lucene/java/trunk/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java 
(original)
+++ 
lucene/java/trunk/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java 
Thu Nov 10 19:13:10 2005
@@ -157,6 +157,11 @@
                                c = -c;
                        }
                }
-               return c > 0;
+
+    // avoid random sort order that could lead to duplicates (bug #31241):
+    if (c == 0)
+      return docA.doc > docB.doc;
+
+    return c > 0;
        }
}

Added: 
lucene/java/trunk/src/test/org/apache/lucene/search/TestCustomSearcherSort.java
URL: 
http://svn.apache.org/viewcvs/lucene/java/trunk/src/test/org/apache/lucene/search/TestCustomSearcherSort.java?rev=332431&view=auto
==============================================================================
--- 
lucene/java/trunk/src/test/org/apache/lucene/search/TestCustomSearcherSort.java 
(added)
+++ 
lucene/java/trunk/src/test/org/apache/lucene/search/TestCustomSearcherSort.java 
Thu Nov 10 19:13:10 2005
@@ -0,0 +1,268 @@
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Calendar;
+import java.util.GregorianCalendar;
+import java.util.Map;
+import java.util.Random;
+import java.util.TreeMap;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+import junit.textui.TestRunner;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.DateTools;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+
+/**
+ * Unit test for sorting code.
+ *
+ * @author  Martin Seitz (T-Systems)
+ */
+
+public class TestCustomSearcherSort
+extends TestCase
+implements Serializable {
+
+    private Directory index = null;
+    private Query query = null;
+    // reduced from 20000 to 2000 to speed up test...
+    private final static int INDEX_SIZE = 2000;
+
+       public TestCustomSearcherSort (String name) {
+               super (name);
+       }
+
+       public static void main (String[] argv) {
+           TestRunner.run (suite());
+       }
+
+       public static Test suite() {
+               return new TestSuite (TestCustomSearcherSort.class);
+       }
+
+
+       // create an index for testing
+       private Directory getIndex()
+       throws IOException {
+               RAMDirectory indexStore = new RAMDirectory ();
+               IndexWriter writer = new IndexWriter (indexStore, new 
StandardAnalyzer(), true);
+               RandomGen random = new RandomGen();
+               for (int i=0; i<INDEX_SIZE; ++i) { // don't decrease; if to low 
the problem doesn't show up
+               Document doc = new Document();
+                   if((i%5)!=0) { // some documents must not have an entry in 
the first sort field
+                       doc.add (new Field("publicationDate_", 
random.getLuceneDate(), Field.Store.YES, Field.Index.UN_TOKENIZED));
+                   }

+ if((i%7)==0) { // some documents to match the query (see below)+ doc.add (new Field("content", "test", Field.Store.YES, Field.Index.TOKENIZED));

+                   }
+                   // every document has a defined 'mandant' field
+                   doc.add(new Field("mandant", Integer.toString(i%3), 
Field.Store.YES, Field.Index.UN_TOKENIZED));
+                   writer.addDocument (doc);
+               }
+               writer.optimize ();
+               writer.close ();
+           return indexStore;
+       }
+
+       /**

+ * Create index and query for test cases.+ */

+       public void setUp() throws Exception {
+               index = getIndex();
+           query = new TermQuery( new Term("content", "test"));
+       }
+
+       /**

+ * Run the test using two CustomSearcher instances.+ */

+       public void testFieldSortCustomSearcher() throws Exception {
+           log("Run testFieldSortCustomSearcher");
+               // define the sort criteria
+           Sort custSort = new Sort(new SortField[] {

+ new SortField("publicationDate_"),+ SortField.FIELD_SCORE

+           });
+           Searcher searcher = new CustomSearcher (index, 2);
+           // search and check hits
+               matchHits(searcher, custSort);
+       }
+       /**

+ * Run the test using one CustomSearcher wrapped by a MultiSearcher.+ */

+       public void testFieldSortSingleSearcher() throws Exception {
+           log("Run testFieldSortSingleSearcher");
+               // define the sort criteria
+           Sort custSort = new Sort(new SortField[] {

+ new SortField("publicationDate_"),+ SortField.FIELD_SCORE

+           });

+ Searcher searcher =+ new MultiSearcher(new Searchable[] {

+                       new CustomSearcher (index, 2)});
+           // search and check hits
+               matchHits(searcher, custSort);
+       }
+       /**

+ * Run the test using two CustomSearcher instances.+ */

+       public void testFieldSortMultiCustomSearcher() throws Exception {
+           log("Run testFieldSortMultiCustomSearcher");
+               // define the sort criteria
+           Sort custSort = new Sort(new SortField[] {

+ new SortField("publicationDate_"),+ SortField.FIELD_SCORE

+           });

+ Searcher searcher =+ new MultiSearcher(new Searchable[] {

+                       new CustomSearcher (index, 0),
+                       new CustomSearcher (index, 2)});
+           // search and check hits
+               matchHits(searcher, custSort);
+       }
+
+
+       // make sure the documents returned by the search match the expected 
list
+       private void matchHits (Searcher searcher, Sort sort)
+       throws IOException {
+           // make a query without sorting first
+               Hits hitsByRank = searcher.search(query);
+               checkHits(hitsByRank, "Sort by rank: "); // check for duplicates
+        Map resultMap = new TreeMap();
+        // store hits in TreeMap - TreeMap does not allow duplicates; existing 
entries are silently overwritten
+        for(int hitid=0;hitid<hitsByRank.length(); ++hitid) {
+            resultMap.put(
+                    new Integer(hitsByRank.id(hitid)),  // Key:   Lucene 
Document ID
+                    new Integer(hitid));                               // 
Value: Hits-Objekt Index
+        }

++ // now make a query using the sort criteria

+               Hits resultSort = searcher.search (query, sort);
+               checkHits(resultSort, "Sort by custom criteria: "); // check 
for duplicates
+               
+        String lf = System.getProperty("line.separator", "\n");
+        // besides the sorting both sets of hits must be identical
+        for(int hitid=0;hitid<resultSort.length(); ++hitid) {
+            Integer idHitDate = new Integer(resultSort.id(hitid)); // document 
ID from sorted search
+            if(!resultMap.containsKey(idHitDate)) {
+                log("ID "+idHitDate+" not found. Possibliy a duplicate.");
+            }
+            assertTrue(resultMap.containsKey(idHitDate)); // same ID must be 
in the Map from the rank-sorted search
+            // every hit must appear once in both result sets --> remove it 
from the Map.
+            // At the end the Map must be empty!
+            resultMap.remove(idHitDate);
+        }
+        if(resultMap.size()==0) {
+            log("All hits matched");
+        } else {
+        log("Couldn't match "+resultMap.size()+" hits.");
+        }
+        assertEquals(resultMap.size(), 0);
+       }
+
+       /**
+        * Check the hits for duplicates.
+        * @param hits
+        */
+    private void checkHits(Hits hits, String prefix) {
+        if(hits!=null) {
+            Map idMap = new TreeMap();
+            for(int docnum=0;docnum<hits.length();++docnum) {
+                Integer luceneId = null;
+                try {
+                    luceneId = new Integer(hits.id(docnum));
+                    if(idMap.containsKey(luceneId)) {
+                        StringBuffer message = new StringBuffer(prefix);
+                        message.append("Duplicate key for hit index = ");
+                        message.append(docnum);
+                        message.append(", previous index = ");
+                        
message.append(((Integer)idMap.get(luceneId)).toString());
+                        message.append(", Lucene ID = ");
+                        message.append(luceneId);
+                        log(message.toString());

+ } else {+ idMap.put(luceneId, new Integer(docnum));

+                    }
+                } catch(IOException ioe) {
+                    StringBuffer message = new StringBuffer(prefix);
+                    message.append("Error occurred for hit index = ");
+                    message.append(docnum);
+                    message.append(" (");
+                    message.append(ioe.getMessage());
+                    message.append(")");
+                    log(message.toString());
+                }
+            }
+        }
+    }

++ // Simply write to console - choosen to be independant of log4j etc+ private void log(String message) {

+        System.out.println(message);
+    }

++ public class CustomSearcher extends IndexSearcher {

+        private int switcher;
+        /**
+         * @param directory
+         * @throws IOException
+         */
+        public CustomSearcher(Directory directory, int switcher) throws 
IOException {
+            super(directory);
+            this.switcher = switcher;
+        }
+        /**
+         * @param r
+         */
+        public CustomSearcher(IndexReader r, int switcher) {
+            super(r);
+            this.switcher = switcher;
+        }
+        /**
+         * @param path
+         * @throws IOException
+         */
+        public CustomSearcher(String path, int switcher) throws IOException {
+            super(path);
+            this.switcher = switcher;
+        }
+        /* (non-Javadoc)
+         * @see 
org.apache.lucene.search.Searchable#search(org.apache.lucene.search.Query, 
org.apache.lucene.search.Filter, int, org.apache.lucene.search.Sort)
+         */
+        public TopFieldDocs search(Query query, Filter filter, int nDocs,
+                Sort sort) throws IOException {
+            BooleanQuery bq = new BooleanQuery();
+            bq.add(query, BooleanClause.Occur.MUST);
+            bq.add(new TermQuery(new Term("mandant", 
Integer.toString(switcher))), BooleanClause.Occur.MUST);
+            return super.search(bq, filter, nDocs, sort);
+        }
+        /* (non-Javadoc)
+         * @see 
org.apache.lucene.search.Searchable#search(org.apache.lucene.search.Query, 
org.apache.lucene.search.Filter, int)
+         */
+        public TopDocs search(Query query, Filter filter, int nDocs)
+        throws IOException {
+            BooleanQuery bq = new BooleanQuery();
+            bq.add(query, BooleanClause.Occur.MUST);
+            bq.add(new TermQuery(new Term("mandant", 
Integer.toString(switcher))), BooleanClause.Occur.MUST);
+            return super.search(bq, filter, nDocs);
+        }
+    }
+    private class RandomGen {
+        private Random random = new Random(0); // to generate some arbitrary 
contents
+           private Calendar base = new GregorianCalendar(1980, 1, 1);
+
+           // Just to generate some different Lucene Date strings
+        private String getLuceneDate() {
+           return DateTools.timeToString(base.getTimeInMillis() + 
random.nextInt() - Integer.MIN_VALUE, DateTools.Resolution.DAY);
+        }
+    }
+}



---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Re: svn commit: r332431 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java src/test/org/apache/lucene/search/TestCustomSearcherSort.java

Reply via email to