# -*- coding: utf-8 -*-
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors import LinkExtractor
from toster.items import DjangoItem


class DjangoSpider(CrawlSpider):
    name = "django"
    allowed_domains = ["www.toster.ru"]
    start_urls = [
        'http://www.toster.ru/tag/django/questions',
    ]

    rules = [
        Rule(LinkExtractor(
            allow=['/tag/django/questions\?page=\d']),
            callback='parse_item',
            follow=True)
    ]


    def parse_item(self, response):
        
        selector_list = response.css('div.thing')

        for selector in selector_list:
            item = TosterItem()
            item['title'] = selector.xpath('/div/div/h2/a/text()').extract()
            item['url'] = selector.xpath('a/@href').extract()

            yield item


item:

from scrapy import Field, Item


class DjangoItem(Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    title = Field()
    url = Field()
    

-- 
You received this message because you are subscribed to the Google Groups 
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at https://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/d/optout.

Reply via email to