Author: ab Date: Sat Sep 23 12:00:13 2006 New Revision: 449289 URL: http://svn.apache.org/viewvc?view=rev&rev=449289 Log: NUTCH-337: obey "fetcher.parse" property if -noParsing is not specified.
Modified: lucene/nutch/branches/branch-0.8/CHANGES.txt lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Crawl.java lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/fetcher/Fetcher.java Modified: lucene/nutch/branches/branch-0.8/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/branches/branch-0.8/CHANGES.txt?view=diff&rev=449289&r1=449288&r2=449289 ============================================================================== --- lucene/nutch/branches/branch-0.8/CHANGES.txt (original) +++ lucene/nutch/branches/branch-0.8/CHANGES.txt Sat Sep 23 12:00:13 2006 @@ -35,6 +35,9 @@ 11. NUTCH-336 - Differentiate between newly discovered pages and newly injected pages (Chris Schneider via ab) NOTE: this changes the scoring API, filter implementations need to be updated. + +12. NUTCH-337 - Fetcher ignores the fetcher.parse value (Stefan Groschupf + via ab) Release 0.8 - 2006-07-25 Modified: lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Crawl.java URL: http://svn.apache.org/viewvc/lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Crawl.java?view=diff&rev=449289&r1=449288&r2=449289 ============================================================================== --- lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Crawl.java (original) +++ lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/crawl/Crawl.java Sat Sep 23 12:00:13 2006 @@ -108,7 +108,7 @@ Path segment = new Generator(job).generate(crawlDb, segments, -1, topN, System.currentTimeMillis()); - new Fetcher(job).fetch(segment, threads, Fetcher.isParsing(job)); // fetch it + new Fetcher(job).fetch(segment, threads); // fetch it if (!Fetcher.isParsing(job)) { new ParseSegment(job).parse(segment); // parse it, if needed } Modified: lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/fetcher/Fetcher.java URL: http://svn.apache.org/viewvc/lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/fetcher/Fetcher.java?view=diff&rev=449289&r1=449288&r2=449289 ============================================================================== --- lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/fetcher/Fetcher.java (original) +++ lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/fetcher/Fetcher.java Sat Sep 23 12:00:13 2006 @@ -405,7 +405,7 @@ } - public void fetch(Path segment, int threads, boolean parsing) + public void fetch(Path segment, int threads) throws IOException { if (LOG.isInfoEnabled()) { @@ -418,7 +418,6 @@ job.setInt("fetcher.threads.fetch", threads); job.set(SEGMENT_NAME_KEY, segment.getName()); - job.setBoolean("fetcher.parse", parsing); // for politeness, don't permit parallel execution of a single task job.setSpeculativeExecution(false); @@ -469,7 +468,7 @@ } Fetcher fetcher = new Fetcher(conf); // make a Fetcher - fetcher.fetch(segment, threads, parsing); // run the Fetcher + fetcher.fetch(segment, threads); // run the Fetcher } }