Author: markus Date: Wed Oct 16 14:56:37 2013 New Revision: 1532785 URL: http://svn.apache.org/r1532785 Log: NUTCH-1656 ParseMeta not passed to CrawlDatum for not_modified
Modified: nutch/trunk/CHANGES.txt nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1532785&r1=1532784&r2=1532785&view=diff ============================================================================== --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Wed Oct 16 14:56:37 2013 @@ -2,6 +2,8 @@ Nutch Change Log Nutch Development Trunk +* NUTCH-1656 ParseMeta not passed to CrawlDatum for not_modified (markus) + * NUTCH-1606 Check that Factory classes use the cache in a thread safe way (jnioche) * NUTCH-1653 AbstractScoringFilter (jnioche) Modified: nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=1532785&r1=1532784&r2=1532785&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original) +++ nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Wed Oct 16 14:56:37 2013 @@ -243,12 +243,15 @@ public class CrawlDbReducer implements R else result.setStatus(CrawlDatum.STATUS_DB_UNFETCHED); } result.setSignature(signature); - if (metaFromParse != null) { - for (Entry<Writable, Writable> e : metaFromParse.entrySet()) { - result.getMetaData().put(e.getKey(), e.getValue()); - } - } } + + // https://issues.apache.org/jira/browse/NUTCH-1656 + if (metaFromParse != null) { + for (Entry<Writable, Writable> e : metaFromParse.entrySet()) { + result.getMetaData().put(e.getKey(), e.getValue()); + } + } + // if fetchInterval is larger than the system-wide maximum, trigger // an unconditional recrawl. This prevents the page to be stuck at // NOTMODIFIED state, when the old fetched copy was already removed with