Author: cutting Date: Tue Aug 23 21:14:18 2005 New Revision: 239523 URL: http://svn.apache.org/viewcvs?rev=239523&view=rev Log: Limit number of inlinks to avoid out-of-memory exceptions.
Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java Modified: lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java?rev=239523&r1=239522&r2=239523&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java (original) +++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/LinkDb.java Tue Aug 23 21:14:18 2005 @@ -36,6 +36,7 @@ public static String CURRENT_NAME = "current"; private int maxAnchorLength; + private int maxInlinks; public LinkDb() { super(null); @@ -48,6 +49,7 @@ public void configure(JobConf job) { maxAnchorLength = job.getInt("db.max.anchor.length", 100); + maxInlinks = job.getInt("db.max.inlinks", 100000); } public void map(WritableComparable key, Writable value, @@ -72,13 +74,12 @@ public void reduce(WritableComparable key, Iterator values, OutputCollector output, Reporter reporter) throws IOException { - Inlinks result = null; + Inlinks result = new Inlinks(); while (values.hasNext()) { Inlinks inlinks = (Inlinks)values.next(); - if (result == null) { - result = inlinks; - } else { - result.add(inlinks); + int end = Math.min(maxInlinks - result.size(), inlinks.size()); + for (int i = 0; i < end; i++) { + result.add(inlinks.get(i)); } } output.collect(key, result);