Author: ab Date: Fri Oct 9 15:56:02 2009 New Revision: 823600 URL: http://svn.apache.org/viewvc?rev=823600&view=rev Log: NUTCH-679 Fetcher2 implementing Tool.
Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=823600&r1=823599&r2=823600&view=diff ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Fri Oct 9 15:56:02 2009 @@ -26,6 +26,8 @@ * NUTCH-756 - CrawlDatum.set() does not reset Metadata if it is null (Julien Nioche via ab) +* NUTCH-679 - Fetcher2 implementing Tool (Julien Nioche via ab) + Release 1.0 - 2009-03-23 1. NUTCH-474 - Fetcher2 crawlDelay and blocking fix (Dogacan Guney via ab) Modified: lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=823600&r1=823599&r2=823600&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/fetcher/Fetcher.java Fri Oct 9 15:56:02 2009 @@ -35,6 +35,8 @@ import org.apache.hadoop.conf.*; import org.apache.hadoop.mapred.*; import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; import org.apache.nutch.crawl.CrawlDatum; import org.apache.nutch.crawl.NutchWritable; @@ -83,7 +85,7 @@ * * @author Andrzej Bialecki */ -public class Fetcher extends Configured implements +public class Fetcher extends Configured implements Tool, MapRunnable<Text, CrawlDatum, Text, NutchWritable> { public static final int PERM_REFRESH_TIME = 5; @@ -972,19 +974,22 @@ /** Run the fetcher. */ public static void main(String[] args) throws Exception { + int res = ToolRunner.run(NutchConfiguration.create(), new Fetcher(), args); + System.exit(res); + } + + public int run(String[] args) throws Exception { String usage = "Usage: Fetcher <segment> [-threads n] [-noParsing]"; if (args.length < 1) { System.err.println(usage); - System.exit(-1); + return -1; } Path segment = new Path(args[0]); - Configuration conf = NutchConfiguration.create(); - - int threads = conf.getInt("fetcher.threads.fetch", 10); + int threads = getConf().getInt("fetcher.threads.fetch", 10); boolean parsing = true; for (int i = 1; i < args.length; i++) { // parse command line @@ -993,13 +998,17 @@ } else if (args[i].equals("-noParsing")) parsing = false; } - conf.setInt("fetcher.threads.fetch", threads); + getConf().setInt("fetcher.threads.fetch", threads); if (!parsing) { - conf.setBoolean("fetcher.parse", parsing); + getConf().setBoolean("fetcher.parse", parsing); + } + try { + fetch(segment, threads, parsing); + return 0; + } catch (Exception e) { + LOG.fatal("Fetcher: " + StringUtils.stringifyException(e)); + return -1; } - Fetcher fetcher = new Fetcher(conf); // make a Fetcher - - fetcher.fetch(segment, threads, parsing); // run the Fetcher }