I try to automatically restart my spider when the scrapping is completed, more particularly when the response status is bad. For example, I've got this code:
#!/usr/bin/python -tt# -*- coding: utf-8 -*- > from scrapy.selector import Selectorfrom scrapy.contrib.spiders import > CrawlSpiderfrom scrapy.http import Requestfrom urlparse import urljoinfrom > bs4 import BeautifulSoupfrom scrapy.spider import BaseSpiderfrom scrapy > import signalsfrom scrapy.xlib.pydispatch import dispatcherfrom datetime > import datetimeimport re > class level1(BaseSpider): > # Crawling Start > CrawlSpider.started_on = datetime.now() > > name = "level1" > base_domain = 'http://www.google.com' > > DOWNLOAD_DELAY = 3 > > restart=False > > handle_httpstatus_list = [404, 302, 503, 999, 200] #add any other code > you need > > # Call sendEmail class > email = sendEmail() > > > # Call log settings > saveLog = runlog() > > > # Init > def __init__(self, url='', child='', parent=''): > self.start_urls = [url] > self.child = child > self.parent = parent > > #run baby, run :) > super(level1, self).__init__(self.start_urls) > > > # On Spider Closed > dispatcher.connect(self.spider_closed, signals.spider_closed) > > def spider_closed(self, reason): > if self.restart: > print "we need to retry" > super(level1, self).__init__(self.start_urls) > else: > print "ok" > # parsing time > work_time = datetime.now() - CrawlSpider.started_on > > # Correct Finished > if reason == "finished": > print "finished" > > def parse(self, response): > > if response.status == 503: > self.restart = True > if response.status == 999: > self.restart = True > > > if str(response.status) == "200": > # Selector > sel = Selector(response) > todo > > In the spider_closed method, I try to restart my spider when the response status is bad, but it's not work. How to resolve this ? Thanks in advance. -- You received this message because you are subscribed to the Google Groups "scrapy-users" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. To post to this group, send email to [email protected]. Visit this group at http://groups.google.com/group/scrapy-users. For more options, visit https://groups.google.com/d/optout.
