Author: jnioche
Date: Fri Jun 27 07:38:45 2014
New Revision: 1605978

URL: http://svn.apache.org/r1605978
Log:
NUTCH-385 Improve description of thread related configuration for Fetcher

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/conf/nutch-default.xml

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1605978&r1=1605977&r2=1605978&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Fri Jun 27 07:38:45 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Current Development
 
+* NUTCH-385 Improve description of thread related configuration for Fetcher 
(jnioche,lufeng)
+
 * NUTCH-1633 slf4j is provided by hadoop and should not be included in the job 
file (kaveh minooie via jnioche)
 
 * NUTCH-1787 update and complete API doc overview page (snagel)

Modified: nutch/trunk/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=1605978&r1=1605977&r2=1605978&view=diff
==============================================================================
--- nutch/trunk/conf/nutch-default.xml (original)
+++ nutch/trunk/conf/nutch-default.xml Fri Jun 27 07:38:45 2014
@@ -684,7 +684,10 @@
   <name>fetcher.server.delay</name>
   <value>5.0</value>
   <description>The number of seconds the fetcher will delay between 
-   successive requests to the same server.</description>
+   successive requests to the same server. Note that this might get
+   overriden by a Crawl-Delay from a robots.txt and is used ONLY if 
+   fetcher.threads.per.queue is set to 1.
+   </description>
 </property>
 
 <property>
@@ -692,7 +695,7 @@
   <value>0.0</value>
   <description>The minimum number of seconds the fetcher will delay between 
   successive requests to the same server. This value is applicable ONLY
-  if fetcher.threads.per.host is greater than 1 (i.e. the host blocking
+  if fetcher.threads.per.queue is greater than 1 (i.e. the host blocking
   is turned off).</description>
 </property>
 
@@ -723,7 +726,11 @@
   <name>fetcher.threads.per.queue</name>
   <value>1</value>
   <description>This number is the maximum number of threads that
-    should be allowed to access a queue at one time.
+    should be allowed to access a queue at one time. Setting it to 
+    a value > 1 will cause the Crawl-Delay value from robots.txt to
+    be ignored and the value of fetcher.server.min.delay to be used
+    as a delay between successive requests to the same server instead 
+    of fetcher.server.delay.
    </description>
 </property>
 


Reply via email to