from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.selector import HtmlXPathSelector
from craigslist_sample.items import CraigslistSampleItem


class MySpider(CrawlSpider):
    name = "giidli"
    allowed_domains = []
    start_urls = ["http://www.wikipedia.org/";]

    rules = (
        #Rule(SgmlLinkExtractor(allow=(), 
restrict_xpaths=('//*[@id=''www-wikipedia-org'']/div[6]/div[3]/div',)), 
callback="parse_items", follow= True),
    )

    def parse_items(self, response):
        hxs = HtmlXPathSelector(response)
        titles = 
hxs.xpath('//*[@id=''www-wikipedia-org'']/div[6]/div[3]/div')
        items = []
        for title in titles:
            item = CraigslistSampleItem()
            item["title"] = 
title.select('a/div[2]/span[1]/text()').extract()
            item["link"] = title.xpath('a/@href').extract()
            items.append(item)

    #item1 = CraigslistSampleItem()
    #item1["title"] = 'akkad'
    #item1["link"] = 'bakkad'
    #items.append(item1)
    #item2 = CraigslistSampleItem()
    #item2["title"] = 'bambe'
    #item2["link"] = 'bo'
    #items.append(item2)
        return(items)

-- 
You received this message because you are subscribed to the Google Groups 
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at https://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/d/optout.

Reply via email to