add hint and log warning that fetcher.store.robotstxt works only in combination with fetcher.store.content
Project: http://git-wip-us.apache.org/repos/asf/nutch/repo Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/33cdca76 Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/33cdca76 Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/33cdca76 Branch: refs/heads/master Commit: 33cdca76ac91a63445d4e761081e8124a23413af Parents: 264eea0 Author: Sebastian Nagel <sna...@apache.org> Authored: Fri Aug 19 15:32:34 2016 +0200 Committer: Sebastian Nagel <sna...@apache.org> Committed: Fri Aug 19 15:32:34 2016 +0200 ---------------------------------------------------------------------- conf/nutch-default.xml | 6 ++++-- src/java/org/apache/nutch/fetcher/FetcherThread.java | 6 +++++- 2 files changed, 9 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/nutch/blob/33cdca76/conf/nutch-default.xml ---------------------------------------------------------------------- diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml index 8c329bc..ec9d2d4 100644 --- a/conf/nutch-default.xml +++ b/conf/nutch-default.xml @@ -985,8 +985,10 @@ <property> <name>fetcher.store.robotstxt</name> <value>false</value> - <description>If true, fetcher will store the robots.txt response - content and status for debugging or archival purposes. + <description>If true (and fetcher.store.content is also true), + fetcher will store the robots.txt response content and status for + debugging or archival purposes. The robots.txt is added to the + content/ folder of the fetched segment. </description> </property> http://git-wip-us.apache.org/repos/asf/nutch/blob/33cdca76/src/java/org/apache/nutch/fetcher/FetcherThread.java ---------------------------------------------------------------------- diff --git a/src/java/org/apache/nutch/fetcher/FetcherThread.java b/src/java/org/apache/nutch/fetcher/FetcherThread.java index 6024b8d..449e220 100644 --- a/src/java/org/apache/nutch/fetcher/FetcherThread.java +++ b/src/java/org/apache/nutch/fetcher/FetcherThread.java @@ -192,7 +192,11 @@ public class FetcherThread extends Thread { outlinksDepthDivisor = conf.getInt( "fetcher.follow.outlinks.depth.divisor", 2); if (conf.getBoolean("fetcher.store.robotstxt", false)) { - robotsTxtContent = new LinkedList<Content>(); + if (storingContent) { + robotsTxtContent = new LinkedList<Content>(); + } else { + LOG.warn("Ignoring fetcher.store.robotstxt because not storing content (fetcher.store.content)!"); + } } }