Crawler Settings

Aniket Jagani Thu, 08 Oct 2015 12:31:54 -0700

python core.py
Traceback (most recent call last):
  File "core.py", line 39, in <module>
    crawler = Crawler(settings)
  File 
"/home/hardik/Install/envs/webparse/local/lib/python2.7/site-packages/scrapy/crawler.py",
 
line 32, in __init__
    self.spidercls.update_settings(self.settings)
AttributeError: 'Settings' object has no attribute 'update_settings'


Please tell me how to set Crawler settings

-- 
You received this message because you are subscribed to the Google Groups 
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at http://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/d/optout.

import logging
import scrapy
logger = logging.getLogger('mycustomlogger')
# import the spiders you want to run
from spiders.faballey import FaballeySpider
from spiders.bewakoof import BewakoofSpider

# scrapy api imports
from scrapy import signals
from twisted.internet import reactor
from scrapy.crawler import Crawler, CrawlerProcess
from scrapy.settings import Settings


# list of crawlers
TO_CRAWL = [FaballeySpider, BewakoofSpider]

# crawlers that are running 
RUNNING_CRAWLERS = []

def spider_closing(spider):
    """
    Activates on spider closed signal
    """
    log.msg("Spider closed: %s" % spider, level=log.INFO)
    RUNNING_CRAWLERS.remove(spider)
    if not RUNNING_CRAWLERS:
        reactor.stop()

# start logger
# log.start(loglevel=log.DEBUG)

# set up the crawler and start to crawl one spider at a time
for spider in TO_CRAWL:
    settings = Settings()

    # crawl responsibly
    settings.set("USER_AGENT","Aniket Jagani (+http://aniketjagani.github.io)", priority='cmdline')
    crawler = Crawler(settings)
    crawler_obj = spider()
    RUNNING_CRAWLERS.append(crawler_obj)

    # stop reactor when spider closes
    crawler.signals.connect(spider_closing, signal=signals.spider_closed)
    crawler.configure()
    crawler.crawl(crawler_obj)
    crawler.start()

# blocks process; so always keep as the last statement
reactor.run()

Crawler Settings

Reply via email to