Author: ab Date: Tue Apr 24 15:13:53 2007 New Revision: 532105 URL: http://svn.apache.org/viewvc?view=rev&rev=532105 Log: Prevent NPE when working with small, possibly empty indexes.
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java Modified: lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java?view=diff&rev=532105&r1=532104&r2=532105 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/indexer/DeleteDuplicates.java Tue Apr 24 15:13:53 2007 @@ -158,19 +158,28 @@ public class DDRecordReader implements RecordReader { private IndexReader indexReader; - private int maxDoc; - private int doc; + private int maxDoc = 0; + private int doc = 0; private Text index; public DDRecordReader(FileSplit split, JobConf job, Text index) throws IOException { - indexReader = IndexReader.open(new FsDirectory(FileSystem.get(job), split.getPath(), false, job)); - maxDoc = indexReader.maxDoc(); + try { + indexReader = IndexReader.open(new FsDirectory(FileSystem.get(job), split.getPath(), false, job)); + maxDoc = indexReader.maxDoc(); + } catch (IOException ioe) { + LOG.warn("Can't open index at " + split + ", skipping. (" + ioe.getMessage() + ")"); + indexReader = null; + } this.index = index; } public boolean next(Writable key, Writable value) throws IOException { + + // skip empty indexes + if (indexReader == null || maxDoc <= 0) + return false; // skip deleted documents while (indexReader.isDeleted(doc) && doc < maxDoc) doc++;