Author: jnioche Date: Fri Aug 29 11:19:37 2014 New Revision: 1621284 URL: http://svn.apache.org/r1621284 Log: NUTCH-1828 bin/crawl : incorrect handling of nutch errors
Modified: nutch/trunk/CHANGES.txt nutch/trunk/src/bin/crawl Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1621284&r1=1621283&r2=1621284&view=diff ============================================================================== --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Fri Aug 29 11:19:37 2014 @@ -2,6 +2,8 @@ Nutch Change Log Nutch Current Development +* NUTCH-1828 bin/crawl : incorrect handling of nutch errors (Mathieu Bouchard via jnioche) + * NUTCH-1775 IndexingFilter: document origin of passed CrawlDatum (snagel) * NUTCH-1693 TextMD5Signature computed on textual content (Tien Nguyen Manh, markus via snagel) Modified: nutch/trunk/src/bin/crawl URL: http://svn.apache.org/viewvc/nutch/trunk/src/bin/crawl?rev=1621284&r1=1621283&r2=1621284&view=diff ============================================================================== --- nutch/trunk/src/bin/crawl (original) +++ nutch/trunk/src/bin/crawl Fri Aug 29 11:19:37 2014 @@ -92,9 +92,10 @@ fi # initial injection "$bin/nutch" inject "$CRAWL_PATH"/crawldb "$SEEDDIR" +RETCODE=$? -if [ $? -ne 0 ] - then exit $? +if [ $RETCODE -ne 0 ] + then exit $RETCODE fi @@ -111,9 +112,10 @@ do echo "Generating a new segment" "$bin/nutch" generate $commonOptions "$CRAWL_PATH"/crawldb "$CRAWL_PATH"/segments -topN $sizeFetchlist -numFetchers $numSlaves -noFilter + RETCODE=$? - if [ $? -ne 0 ] - then exit $? + if [ $RETCODE -ne 0 ] + then exit $RETCODE fi # capture the name of the segment @@ -131,9 +133,10 @@ do # fetching the segment echo "Fetching : $SEGMENT" "$bin/nutch" fetch $commonOptions -D fetcher.timelimit.mins=$timeLimitFetch "$CRAWL_PATH"/segments/$SEGMENT -noParsing -threads $numThreads + RETCODE=$? - if [ $? -ne 0 ] - then exit $? + if [ $RETCODE -ne 0 ] + then exit $RETCODE fi # parsing the segment @@ -142,47 +145,53 @@ do # so that it does not fail the full task skipRecordsOptions="-D mapred.skip.attempts.to.start.skipping=2 -D mapred.skip.map.max.skip.records=1" "$bin/nutch" parse $commonOptions $skipRecordsOptions "$CRAWL_PATH"/segments/$SEGMENT + RETCODE=$? - if [ $? -ne 0 ] - then exit $? + if [ $RETCODE -ne 0 ] + then exit $RETCODE fi # updatedb with this segment echo "CrawlDB update" "$bin/nutch" updatedb $commonOptions "$CRAWL_PATH"/crawldb "$CRAWL_PATH"/segments/$SEGMENT + RETCODE=$? - if [ $? -ne 0 ] - then exit $? + if [ $RETCODE -ne 0 ] + then exit $RETCODE fi # note that the link inversion - indexing routine can be done within the main loop # on a per segment basis echo "Link inversion" "$bin/nutch" invertlinks "$CRAWL_PATH"/linkdb "$CRAWL_PATH"/segments/$SEGMENT + RETCODE=$? - if [ $? -ne 0 ] - then exit $? + if [ $RETCODE -ne 0 ] + then exit $RETCODE fi echo "Dedup on crawldb" $bin/nutch dedup $CRAWL_PATH/crawldb + RETCODE=$? - if [ $? -ne 0 ] - then exit $? + if [ $RETCODE -ne 0 ] + then exit $RETCODE fi echo "Indexing $SEGMENT on SOLR index -> $SOLRURL" "$bin/nutch" index -D solr.server.url=$SOLRURL "$CRAWL_PATH"/crawldb -linkdb "$CRAWL_PATH"/linkdb "$CRAWL_PATH"/segments/$SEGMENT + RETCODE=$? - if [ $? -ne 0 ] - then exit $? + if [ $RETCODE -ne 0 ] + then exit $RETCODE fi echo "Cleanup on SOLR index -> $SOLRURL" "$bin/nutch" clean -D solr.server.url=$SOLRURL "$CRAWL_PATH"/crawldb + RETCODE=$? - if [ $? -ne 0 ] - then exit $? + if [ $RETCODE -ne 0 ] + then exit $RETCODE fi done