attached patch enables you to start the web search interface even if you have
an ongoing fetching/indexing task going on. also attached is a patch that allows you to
remove duplicates in same situation.
br,
Sami Siren
Index: DeleteDuplicates.java
===================================================================
RCS file: /cvsroot/nutch/nutch/src/java/net/nutch/indexer/DeleteDuplicates.java,v
retrieving revision 1.11
diff -r1.11 DeleteDuplicates.java
12a13
> import java.util.Vector;
239c240,241
< IndexReader[] readers = new IndexReader[directories.length];
---
> Vector vReaders=new Vector();
> //IndexReader[] readers = new IndexReader[directories.length];
242,246c244,254
< File indexDir = new File(directories[i], "index");
< IndexReader reader = IndexReader.open(indexDir);
< if (reader.hasDeletions()) {
< LOG.info("Clearing old deletions in " + indexDir);
< reader.undeleteAll();
---
> File indexDone = new File(directories[i], IndexSegment.DONE_NAME);
> if(indexDone.exists() && indexDone.isFile()){
> File indexDir = new File(directories[i], "index");
>
> IndexReader reader = IndexReader.open(indexDir);
> if (reader.hasDeletions()) {
> LOG.info("Clearing old deletions in " + indexDir);
> reader.undeleteAll();
> }
> maxDoc += reader.maxDoc();
> vReaders.add(reader);
248,249c256,260
< maxDoc += reader.maxDoc();
< readers[i] = reader;
---
> }
>
> IndexReader[] readers=new IndexReader[vReaders.size()];
> for(int i = 0; vReaders.size()>0; i++) {
> readers[i]=(IndexReader)vReaders.remove(0);
Index: FetchedSegments.java
===================================================================
RCS file: /cvsroot/nutch/nutch/src/java/net/nutch/searcher/FetchedSegments.java,v
retrieving revision 1.4
diff -r1.4 FetchedSegments.java
64c64,67
< segments.put(segmentDir.getName(), new Segment(segmentDir));
---
> File indexdone = new File(segmentDir, IndexSegment.DONE_NAME);
> if(indexdone.exists() && indexdone.isFile()) {
> segments.put(segmentDir.getName(), new Segment(segmentDir));
> }
Index: NutchBean.java
===================================================================
RCS file: /cvsroot/nutch/nutch/src/java/net/nutch/searcher/NutchBean.java,v
retrieving revision 1.7
diff -r1.7 NutchBean.java
10a11,12
> import java.util.Vector;
> import net.nutch.indexer.IndexSegment;
71c73,88
< indexSearcher = new IndexSearcher(segmentsDir.listFiles());
---
>
> Vector vDirs=new Vector();
> File [] directories = segmentsDir.listFiles();
> for(int i = 0; i < segmentsDir.listFiles().length; i++) {
> File indexdone = new File(directories[i], IndexSegment.DONE_NAME);
> if(indexdone.exists() && indexdone.isFile()) {
> vDirs.add(directories[i]);
> }
> }
>
> directories = new File[ vDirs.size() ];
> for(int i = 0; vDirs.size()>0; i++) {
> directories[i]=(File)vDirs.remove(0);
> }
>
> indexSearcher = new IndexSearcher(directories);
