hello all,

attached patch enables you to start the web search interface even if you have
an ongoing fetching/indexing task going on. also attached is a patch that allows you to
remove duplicates in same situation.


br,

Sami Siren
Index: DeleteDuplicates.java
===================================================================
RCS file: /cvsroot/nutch/nutch/src/java/net/nutch/indexer/DeleteDuplicates.java,v
retrieving revision 1.11
diff -r1.11 DeleteDuplicates.java
12a13
> import java.util.Vector;
239c240,241
<     IndexReader[] readers = new IndexReader[directories.length];
---
>     Vector vReaders=new Vector();
>     //IndexReader[] readers = new IndexReader[directories.length];
242,246c244,254
<       File indexDir = new File(directories[i], "index");
<       IndexReader reader = IndexReader.open(indexDir);
<       if (reader.hasDeletions()) {
<         LOG.info("Clearing old deletions in " + indexDir);
<         reader.undeleteAll();
---
>       File indexDone = new File(directories[i], IndexSegment.DONE_NAME);
>       if(indexDone.exists() && indexDone.isFile()){
>         File indexDir = new File(directories[i], "index");
> 
>               IndexReader reader = IndexReader.open(indexDir);
>         if (reader.hasDeletions()) {
>           LOG.info("Clearing old deletions in " + indexDir);
>           reader.undeleteAll();
>         }
>         maxDoc += reader.maxDoc();
>         vReaders.add(reader);
248,249c256,260
<       maxDoc += reader.maxDoc();
<       readers[i] = reader;
---
>     }
> 
>     IndexReader[] readers=new IndexReader[vReaders.size()];
>     for(int i = 0; vReaders.size()>0; i++) {
>       readers[i]=(IndexReader)vReaders.remove(0);
Index: FetchedSegments.java
===================================================================
RCS file: /cvsroot/nutch/nutch/src/java/net/nutch/searcher/FetchedSegments.java,v
retrieving revision 1.4
diff -r1.4 FetchedSegments.java
64c64,67
<             segments.put(segmentDir.getName(), new Segment(segmentDir));
---
>             File indexdone = new File(segmentDir, IndexSegment.DONE_NAME);
>             if(indexdone.exists() && indexdone.isFile()) {
>               segments.put(segmentDir.getName(), new Segment(segmentDir));
>             }
Index: NutchBean.java
===================================================================
RCS file: /cvsroot/nutch/nutch/src/java/net/nutch/searcher/NutchBean.java,v
retrieving revision 1.7
diff -r1.7 NutchBean.java
10a11,12
> import java.util.Vector;
> import net.nutch.indexer.IndexSegment;
71c73,88
<       indexSearcher = new IndexSearcher(segmentsDir.listFiles());
---
>       
>       Vector vDirs=new Vector();
>       File [] directories = segmentsDir.listFiles();
>       for(int i = 0; i < segmentsDir.listFiles().length; i++) {
>         File indexdone = new File(directories[i], IndexSegment.DONE_NAME);
>         if(indexdone.exists() && indexdone.isFile()) {
>           vDirs.add(directories[i]);
>         }
>       }
>       
>       directories = new File[ vDirs.size() ];
>       for(int i = 0; vDirs.size()>0; i++) {
>         directories[i]=(File)vDirs.remove(0);
>       }
>       
>       indexSearcher = new IndexSearcher(directories);

Reply via email to