Author: markus
Date: Wed Oct 16 14:56:37 2013
New Revision: 1532785

URL: http://svn.apache.org/r1532785
Log:
NUTCH-1656 ParseMeta not passed to CrawlDatum for not_modified

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1532785&r1=1532784&r2=1532785&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Wed Oct 16 14:56:37 2013
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Development Trunk
 
+* NUTCH-1656 ParseMeta not passed to CrawlDatum for not_modified (markus)
+
 * NUTCH-1606 Check that Factory classes use the cache in a thread safe way 
(jnioche)
 
 * NUTCH-1653 AbstractScoringFilter (jnioche)

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=1532785&r1=1532784&r2=1532785&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Wed Oct 16 
14:56:37 2013
@@ -243,12 +243,15 @@ public class CrawlDbReducer implements R
           else result.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
         }
         result.setSignature(signature);
-        if (metaFromParse != null) {
-            for (Entry<Writable, Writable> e : metaFromParse.entrySet()) {
-              result.getMetaData().put(e.getKey(), e.getValue());
-            }
-          }
       }
+
+      // https://issues.apache.org/jira/browse/NUTCH-1656
+      if (metaFromParse != null) {
+        for (Entry<Writable, Writable> e : metaFromParse.entrySet()) {
+          result.getMetaData().put(e.getKey(), e.getValue());
+        }
+      }
+
       // if fetchInterval is larger than the system-wide maximum, trigger
       // an unconditional recrawl. This prevents the page to be stuck at
       // NOTMODIFIED state, when the old fetched copy was already removed with


Reply via email to