Hi, Try defining a "parse_start_url" class InfojobsSpider(CrawlSpider): name = "pg" allowed_domains = ["craigslist.org"] start_urls = [ "http://sfbay.craigslist.org/npo/" ] rules = ( Rule(SgmlLinkExtractor(allow=("index.*?html",), restrict_xpaths=('//a[@class="button next"]')), callback='parse_response', follow=True), ) def parse_response(self, response): print response.url items=[] hxs = HtmlXPathSelector(response) titles = hxs.select('//span[@class="pl"]') for title in titles: item= PaginationItem() item['title'] = title.select('a/text()').extract() print item['title'] items.append(item) return items parse_start_url = parse_response
On Wednesday, April 9, 2014 8:33:42 AM UTC+2, masroor javed wrote: > > Hi Guys Please help me and sort out my problem..... > > On Tuesday, April 8, 2014 2:45:43 PM UTC+5:30, masroor javed wrote: >> >> ya sure. >> >> from scrapy.contrib.spiders import CrawlSpider, Rule >> from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor >> from scrapy.selector import HtmlXPathSelector >> from scrapy.http import Request >> from pagination.items import PaginationItem >> class InfojobsSpider(CrawlSpider): >> name = "pg" >> allowed_domains = ["craigslist.org"] >> start_urls = [ >> "http://sfbay.craigslist.org/npo/" >> ] >> rules = ( >> >> Rule(SgmlLinkExtractor(allow=("index.*?html",),restrict_xpaths=('//a[@class="button >> >> next"]')), callback='parse_response', follow=True), >> ) >> def parse_response(self, response): >> print response.url >> items=[] >> hxs = HtmlXPathSelector(response) >> titles = hxs.select('//span[@class="pl"]') >> for title in titles: >> item= PaginationItem() >> item['title'] = title.select('a/text()').extract() >> print item['title'] >> items.append(item) >> return items >> >> NOTE: title is comming from second page. >> please let me know how to extract title from starting page? >> >> -- You received this message because you are subscribed to the Google Groups "scrapy-users" group. To unsubscribe from this group and stop receiving emails from it, send an email to scrapy-users+unsubscr...@googlegroups.com. To post to this group, send email to scrapy-users@googlegroups.com. Visit this group at http://groups.google.com/group/scrapy-users. For more options, visit https://groups.google.com/d/optout.