Author: lewismc
Date: Thu Oct 22 03:47:04 2015
New Revision: 1709943

URL: http://svn.apache.org/viewvc?rev=1709943&view=rev
Log:
NUTCH-2148 Review and update mapred --> mapreduce config params in crawl script

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/bin/crawl

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1709943&r1=1709942&r2=1709943&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Thu Oct 22 03:47:04 2015
@@ -2,6 +2,8 @@ Nutch Change Log
     
 Nutch Current Development 1.11-SNAPSHOT
 
+* NUTCH-2128 Review and update mapred --> mapreduce config params in crawl 
script (lewismc)
+
 * NUTCH-2141 Change the InteractiveSelenium plugin handler Interface to return 
page content
   (Balaji Gurumurthy via mattmann)
 

Modified: nutch/trunk/src/bin/crawl
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/bin/crawl?rev=1709943&r1=1709942&r2=1709943&view=diff
==============================================================================
--- nutch/trunk/src/bin/crawl (original)
+++ nutch/trunk/src/bin/crawl Thu Oct 22 03:47:04 2015
@@ -109,7 +109,7 @@ fi
 numSlaves=1
 
 # and the total number of available tasks
-# sets Hadoop parameter "mapred.reduce.tasks"
+# sets Hadoop parameter "mapreduce.job.reduces"
 numTasks=`expr $numSlaves \* 2`
 
 # number of urls to fetch in one iteration
@@ -135,7 +135,7 @@ fi
 
 # note that some of the options listed here could be set in the
 # corresponding hadoop site xml param file
-commonOptions="-D mapred.reduce.tasks=$numTasks -D 
mapred.child.java.opts=-Xmx1000m -D 
mapred.reduce.tasks.speculative.execution=false -D 
mapred.map.tasks.speculative.execution=false -D mapred.compress.map.output=true"
+commonOptions="-D mapreduce.job.reduces=$numTasks -D 
mapred.child.java.opts=-Xmx1000m -D mapreduce.reduce.speculative=false -D 
mapreduce.map.speculative=false -D mapreduce.map.output.compress=true"
 
  # check that hadoop can be found on the path
 if [ $mode = "distributed" ]; then
@@ -232,7 +232,7 @@ do
   echo "Parsing : $SEGMENT"
   # enable the skipping of records for the parsing so that a dodgy document
   # so that it does not fail the full task
-  skipRecordsOptions="-D mapred.skip.attempts.to.start.skipping=2 -D 
mapred.skip.map.max.skip.records=1"
+  skipRecordsOptions="-D mapreduce.task.skip.start.attempts=2 -D 
mapreduce.map.skip.maxrecords=1"
   __bin_nutch parse $commonOptions $skipRecordsOptions 
"$CRAWL_PATH"/segments/$SEGMENT
 
   # updatedb with this segment


Reply via email to