There has to be something really simple that I'm missing. I'm trying to get 
it to crawl more than one page, but I'm using a section of the page as a 
starting point for testing. I can't get it to crawl anything beyond the 
index page. What am I doing wrong?

import scrapy
from scrapy.contrib.spiders import CrawlSpider, Rule
from africanstudies.items import AfricanstudiesItem
from scrapy.contrib.linkextractors import LinkExtractor

class DmozSpider(CrawlSpider):
    name = "africanstudies"
    allowed_domains = ["northwestern.edu"]
    start_urls = [
        "http://www.northwestern.edu/african-studies/about/";
    ]

    def parse(self, response):
        for sel in response.xpath('//div[2]/div[1]'):
            item = AfricanstudiesItem()
            item['url'] = response.url
            item['title'] = sel.xpath('div[3]/*[@id="green_title"]/text()').
extract()      
            item['desc'] = sel.xpath('div[4]/*').extract()      
            yield item


-- 
You received this message because you are subscribed to the Google Groups 
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at http://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/d/optout.

Reply via email to