python core.py
Traceback (most recent call last):
File "core.py", line 39, in <module>
crawler = Crawler(settings)
File
"/home/hardik/Install/envs/webparse/local/lib/python2.7/site-packages/scrapy/crawler.py",
line 32, in __init__
self.spidercls.update_settings(self.settings)
AttributeError: 'Settings' object has no attribute 'update_settings'
Please tell me how to set Crawler settings
--
You received this message because you are subscribed to the Google Groups
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To post to this group, send email to [email protected].
Visit this group at http://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/d/optout.
import logging
import scrapy
logger = logging.getLogger('mycustomlogger')
# import the spiders you want to run
from spiders.faballey import FaballeySpider
from spiders.bewakoof import BewakoofSpider
# scrapy api imports
from scrapy import signals
from twisted.internet import reactor
from scrapy.crawler import Crawler, CrawlerProcess
from scrapy.settings import Settings
# list of crawlers
TO_CRAWL = [FaballeySpider, BewakoofSpider]
# crawlers that are running
RUNNING_CRAWLERS = []
def spider_closing(spider):
"""
Activates on spider closed signal
"""
log.msg("Spider closed: %s" % spider, level=log.INFO)
RUNNING_CRAWLERS.remove(spider)
if not RUNNING_CRAWLERS:
reactor.stop()
# start logger
# log.start(loglevel=log.DEBUG)
# set up the crawler and start to crawl one spider at a time
for spider in TO_CRAWL:
settings = Settings()
# crawl responsibly
settings.set("USER_AGENT","Aniket Jagani (+http://aniketjagani.github.io)", priority='cmdline')
crawler = Crawler(settings)
crawler_obj = spider()
RUNNING_CRAWLERS.append(crawler_obj)
# stop reactor when spider closes
crawler.signals.connect(spider_closing, signal=signals.spider_closed)
crawler.configure()
crawler.crawl(crawler_obj)
crawler.start()
# blocks process; so always keep as the last statement
reactor.run()