Author: jerome Date: Thu Apr 6 03:49:40 2006 New Revision: 391958 URL: http://svn.apache.org/viewcvs?rev=391958&view=rev Log: NUTCH-244, db.max.outlinks.per.page can now be negative for no limit of handled outlinks per page
Modified: lucene/nutch/trunk/conf/nutch-default.xml lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java Modified: lucene/nutch/trunk/conf/nutch-default.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/conf/nutch-default.xml?rev=391958&r1=391957&r2=391958&view=diff ============================================================================== --- lucene/nutch/trunk/conf/nutch-default.xml (original) +++ lucene/nutch/trunk/conf/nutch-default.xml Thu Apr 6 03:49:40 2006 @@ -255,6 +255,8 @@ <name>db.max.outlinks.per.page</name> <value>100</value> <description>The maximum number of outlinks that we'll process for a page. + If this value is nonnegative (>=0), at most db.max.outlinks.per.page outlinks + will be processed for a page; otherwise, all outlinks will be processed. </description> </property> Modified: lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java?rev=391958&r1=391957&r2=391958&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/parse/ParseData.java Thu Apr 6 03:49:40 2006 @@ -119,12 +119,15 @@ int totalOutlinks = in.readInt(); // read outlinks int maxOutlinksPerPage = this.conf.getInt("db.max.outlinks.per.page", 100); - int outlinksToRead = Math.min(maxOutlinksPerPage, totalOutlinks); + int outlinksToRead = totalOutlinks; + if (maxOutlinksPerPage >= 0) { + outlinksToRead = Math.min(maxOutlinksPerPage, totalOutlinks); + } outlinks = new Outlink[outlinksToRead]; for (int i = 0; i < outlinksToRead; i++) { outlinks[i] = Outlink.read(in); } - for (int i = maxOutlinksPerPage; i < totalOutlinks; i++) { + for (int i = outlinksToRead; i < totalOutlinks; i++) { Outlink.skip(in); } Modified: lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java?rev=391958&r1=391957&r2=391958&view=diff ============================================================================== --- lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java (original) +++ lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java Thu Apr 6 03:49:40 2006 @@ -51,4 +51,31 @@ WritableTestUtils.testWritable(r, conf); } + public void testMaxOutlinks() throws Exception { + Outlink[] outlinks = new Outlink[128]; + for (int i=0; i<outlinks.length; i++) { + outlinks[i] = new Outlink("http://outlink.com/" + i, "Outlink" + i, conf); + } + ParseData original = new ParseData(ParseStatus.STATUS_SUCCESS, + "Max Outlinks Title", + outlinks, + new Metadata()); + Configuration conf = NutchConfiguration.create(); + // No Outlinks + conf.setInt("db.max.outlinks.per.page", 0); + ParseData data = (ParseData) WritableTestUtils.writeRead(original, conf); + assertEquals(0, data.getOutlinks().length); + // Only 100 Outlinks + conf.setInt("db.max.outlinks.per.page", 100); + data = (ParseData) WritableTestUtils.writeRead(original, conf); + assertEquals(100, data.getOutlinks().length); + // 256 Outlinks + conf.setInt("db.max.outlinks.per.page", 256); + data = (ParseData) WritableTestUtils.writeRead(original, conf); + assertEquals(outlinks.length, data.getOutlinks().length); + // All Outlinks + conf.setInt("db.max.outlinks.per.page", -1); + data = (ParseData) WritableTestUtils.writeRead(original, conf); + assertEquals(outlinks.length, data.getOutlinks().length); + } } Modified: lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java?rev=391958&r1=391957&r2=391958&view=diff ============================================================================== --- lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java (original) +++ lucene/nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java Thu Apr 6 03:49:40 2006 @@ -31,6 +31,14 @@ /** Utility method for testing writables. */ public static void testWritable(Writable before, Configuration conf) throws Exception { + TestCase.assertEquals(before, writeRead(before, conf)); + } + + + /** Utility method for testing writables. */ + public static Writable writeRead(Writable before, Configuration conf) + throws Exception { + DataOutputBuffer dob = new DataOutputBuffer(); before.write(dob); @@ -42,8 +50,7 @@ ((Configurable)after).setConf(conf); } after.readFields(dib); - - TestCase.assertEquals(before, after); + return after; } - + }