Author: lewismc Date: Thu Apr 23 23:55:09 2015 New Revision: 1675735 URL: http://svn.apache.org/r1675735 Log: Add back in NUTCH-1927 property to nutch-default as revoved during commit @1675022
Modified: nutch/trunk/conf/nutch-default.xml Modified: nutch/trunk/conf/nutch-default.xml URL: http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=1675735&r1=1675734&r2=1675735&view=diff ============================================================================== --- nutch/trunk/conf/nutch-default.xml (original) +++ nutch/trunk/conf/nutch-default.xml Thu Apr 23 23:55:09 2015 @@ -118,6 +118,15 @@ </property> <property> + <name>http.robot.rules.whitelist</name> + <value></value> + <description>Comma separated list of hostnames or IP addresses to ignore + robot rules parsing for. Use with care and only if you are explicitly + allowed by the site owner to ignore the site's robots.txt! + </description> +</property> + +<property> <name>http.robots.403.allow</name> <value>true</value> <description>Some servers return HTTP status 403 (Forbidden) if