I try to automatically restart my spider when the scrapping is completed, 
more particularly when the response status is bad. For example, I've got 
this code:

#!/usr/bin/python -tt# -*- coding: utf-8 -*-
> from scrapy.selector import Selectorfrom scrapy.contrib.spiders import 
> CrawlSpiderfrom scrapy.http import Requestfrom urlparse import urljoinfrom 
> bs4 import BeautifulSoupfrom scrapy.spider import BaseSpiderfrom scrapy 
> import signalsfrom scrapy.xlib.pydispatch import dispatcherfrom datetime 
> import datetimeimport re
> class level1(BaseSpider):
>     # Crawling Start
>     CrawlSpider.started_on = datetime.now()
>
>     name = "level1"
>     base_domain = 'http://www.google.com'
>
>     DOWNLOAD_DELAY = 3
>
>     restart=False
>
>     handle_httpstatus_list = [404, 302, 503, 999, 200] #add any other code 
> you need
>
>     # Call sendEmail class
>     email = sendEmail()
>
>
>     # Call log settings
>     saveLog = runlog()
>
>
>     # Init
>     def __init__(self, url='', child='', parent=''):
>         self.start_urls = [url]
>         self.child = child
>         self.parent = parent
>
>         #run baby, run :)
>         super(level1, self).__init__(self.start_urls)
>
>
>         # On Spider Closed
>         dispatcher.connect(self.spider_closed, signals.spider_closed)
>
>     def spider_closed(self, reason):
>         if self.restart:
>             print "we need to retry"
>             super(level1, self).__init__(self.start_urls)
>         else:
>             print "ok"
>             # parsing time
>             work_time = datetime.now() - CrawlSpider.started_on
>
>             # Correct Finished
>             if reason == "finished":
>                 print "finished"
>
>     def parse(self, response):
>
>         if response.status == 503:
>             self.restart = True
>         if response.status == 999:
>             self.restart = True
>
>
>         if str(response.status) == "200":
>             # Selector
>             sel = Selector(response)
>             todo
>
>
In the spider_closed method, I try to restart my spider when the response 
status is bad, but it's not work.

How to resolve this ?


Thanks in advance.
 

-- 
You received this message because you are subscribed to the Google Groups 
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at http://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/d/optout.

Reply via email to