Author: jnioche Date: Mon Jul 7 12:38:23 2014 New Revision: 1608431 URL: http://svn.apache.org/r1608431 Log: NUTCH-578 URL fetched with 403 is generated over and over again
Modified: nutch/trunk/CHANGES.txt nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1608431&r1=1608430&r2=1608431&view=diff ============================================================================== --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Mon Jul 7 12:38:23 2014 @@ -2,6 +2,8 @@ Nutch Change Log Nutch Current Development +* NUTCH-578 URL fetched with 403 is generated over and over again (snagel) + * NUTCH-1776 Log incorrect plugin.folder file path (Diaa via snagel) * NUTCH-1566 bin/nutch to allow whitespace in paths (tejasp, snagel) Modified: nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=1608431&r1=1608430&r2=1608431&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original) +++ nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Mon Jul 7 12:38:23 2014 @@ -274,6 +274,8 @@ public class CrawlDbReducer implements R result.setStatus(CrawlDatum.STATUS_DB_UNFETCHED); } else { result.setStatus(CrawlDatum.STATUS_DB_GONE); + result = schedule.setPageGoneSchedule(key, result, prevFetchTime, + prevModifiedTime, fetch.getFetchTime()); } break;