Author: ab
Date: Tue Oct 31 13:32:51 2006
New Revision: 469660
URL: http://svn.apache.org/viewvc?view=rev&rev=469660
Log:
When jobtracker is 'local' generate only one partition. This should fix
NUTCH-361 and NUTCH-136.
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
URL:
http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java?view=diff&rev=469660&r1=469659&r2=469660
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Tue Oct
31 13:32:51 2006
@@ -307,18 +307,20 @@
// map to inverted subset due for fetch, sort by link count
JobConf job = new NutchJob(getConf());
job.setJobName("generate: select " + segment);
-
+
if (numLists == -1) { // for politeness make
numLists = job.getNumMapTasks(); // a partition per fetch task
}
-
+ if ("local".equals(job.get("mapred.job.tracker")) && numLists != 1) {
+ // override
+ LOG.info("Generator: jobtracker is 'local', generating exactly one
partition.");
+ numLists = 1;
+ }
job.setLong("crawl.gen.curTime", curTime);
job.setLong("crawl.topN", topN);
job.setInputPath(new Path(dbDir, CrawlDatum.DB_DIR_NAME));
job.setInputFormat(SequenceFileInputFormat.class);
- job.setInputKeyClass(Text.class);
- job.setInputValueClass(CrawlDatum.class);
job.setMapperClass(Selector.class);
job.setPartitionerClass(Selector.class);
@@ -342,8 +344,6 @@
job.setInputPath(tempDir);
job.setInputFormat(SequenceFileInputFormat.class);
- job.setInputKeyClass(FloatWritable.class);
- job.setInputValueClass(SelectorEntry.class);
job.setMapperClass(SelectorInverseMapper.class);
job.setPartitionerClass(PartitionUrlByHost.class);
-------------------------------------------------------------------------
Using Tomcat but need to do more? Need to support web services, security?
Get stuff done quickly with pre-integrated technology to make your job easier
Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642
_______________________________________________
Nutch-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/nutch-cvs