# -*- coding: utf-8 -*-
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors import LinkExtractor
from toster.items import DjangoItem
class DjangoSpider(CrawlSpider):
name = "django"
allowed_domains = ["www.toster.ru"]
start_urls = [
'http://www.toster.ru/tag/django/questions',
]
rules = [
Rule(LinkExtractor(
allow=['/tag/django/questions\?page=\d']),
callback='parse_item',
follow=True)
]
def parse_item(self, response):
selector_list = response.css('div.thing')
for selector in selector_list:
item = TosterItem()
item['title'] = selector.xpath('/div/div/h2/a/text()').extract()
item['url'] = selector.xpath('a/@href').extract()
yield item
item:
from scrapy import Field, Item
class DjangoItem(Item):
# define the fields for your item here like:
# name = scrapy.Field()
title = Field()
url = Field()
--
You received this message because you are subscribed to the Google Groups
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To post to this group, send email to [email protected].
Visit this group at https://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/d/optout.