Author: jnioche
Date: Fri Jun 27 07:49:05 2014
New Revision: 1605979

URL: http://svn.apache.org/r1605979
Log:
NUTCH-385 Improve description of thread related configuration for Fetcher

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/conf/nutch-default.xml

Modified: nutch/branches/2.x/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1605979&r1=1605978&r2=1605979&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Fri Jun 27 07:49:05 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Current Development
 
+* NUTCH-385 Improve description of thread related configuration for Fetcher 
(jnioche,lufeng)
+
 * NUTCH-1798 Crawl script not calling index command correctly (Aaron Bedward 
via jnioche)
 
 * NUTCH-1769 REST API refactoring (Fjodor Vershinin via lewismc)

Modified: nutch/branches/2.x/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/conf/nutch-default.xml?rev=1605979&r1=1605978&r2=1605979&view=diff
==============================================================================
--- nutch/branches/2.x/conf/nutch-default.xml (original)
+++ nutch/branches/2.x/conf/nutch-default.xml Fri Jun 27 07:49:05 2014
@@ -607,7 +607,10 @@
   <name>fetcher.server.delay</name>
   <value>5.0</value>
   <description>The number of seconds the fetcher will delay between 
-   successive requests to the same server.</description>
+   successive requests to the same server. Note that this might get
+   overriden by a Crawl-Delay from a robots.txt and is used ONLY if 
+   fetcher.threads.per.queue is set to 1.
+   </description>
 </property>
 
 <property>
@@ -615,7 +618,7 @@
   <value>0.0</value>
   <description>The minimum number of seconds the fetcher will delay between 
   successive requests to the same server. This value is applicable ONLY
-  if fetcher.threads.per.host is greater than 1 (i.e. the host blocking
+  if fetcher.threads.per.queue is greater than 1 (i.e. the host blocking
   is turned off).</description>
 </property>
 
@@ -646,7 +649,12 @@
   <name>fetcher.threads.per.queue</name>
   <value>1</value>
   <description>This number is the maximum number of threads that
-    should be allowed to access a queue at one time.</description>
+    should be allowed to access a queue at one time. Setting it to 
+    a value > 1 will cause the Crawl-Delay value from robots.txt to
+    be ignored and the value of fetcher.server.min.delay to be used
+    as a delay between successive requests to the same server instead 
+    of fetcher.server.delay.
+   </description>
 </property>
 
 <property>


Reply via email to