Hi, For this notice:
scrapy_demo\spiders\test.py:43: ScrapyDeprecationWarning: Call to deprecated function select. Use .xpath() instead. Just replace select by xpath. Regards. --------- Lhassan Baazzi | Web Developer PHP / Python - Symfony - JS - Scrapy Email/Gtalk: [email protected] - Skype: baazzilhassan - Twitter: @baazzilhassan <http://twitter.com/baazzilhassan> Blog: http://blog.jbinfo.io/ 2014-09-02 9:45 GMT+01:00 james josh <[email protected]>: > I am debugging this code and get different job counts; but its not > giving me all the jobs as they are spread across multiple pages. > > I also get the following error, but I'm not sure what to do about it: > > > scrapy_demo\spiders\test.py:43: ScrapyDeprecationWarning: Call to deprecated > function select. Use .xpath() instead. > > > > next_page = Noneif > hxs.select('//div[@class="paggingNext"]/a[@class="blue"]/@href').extract(): > next_page = > hxs.select('//div[@class="paggingNext"]/a[@class="blue"]/@href').extract()[0]if > next_page:yield Request(urlparse.urljoin(response.url, next_page), > self.parse) > > > > from scrapy.spider import BaseSpider from scrapy.selector import > HtmlXPathSelector import urlparse from scrapy.http.request import Requestfrom > scrapy.contrib.spiders import CrawlSpider,Rulefrom > scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor > from scrapy.item import Item, Fieldclass ScrapyDemoSpiderItem(Item): > > link = Field() > title = Field() > city = Field() > salary = Field() > content = Field() > class ScrapyDemoSpider(BaseSpider): > name = 'eujobs77' > allowed_domains = ['eujobs77.com'] > start_urls = ['http://www.eujobs77.com/jobs'] > > def parse(self,response): > hxs = HtmlXPathSelector(response) > listings = hxs.select('//div[@class="jobSearchBrowse > jobSearchBrowsev1"]') > links = [] > > #scrap listings page to get listing links > for listing in listings: > > link=listing.select('//h2[@class="jobtitle"]/a[@class="blue"]/@href').extract() > > links.extend(link) > > #parse listing url to get content of the listing page > > for link in links: > item=ScrapyDemoSpiderItem() > item['link']=link > > yield Request(urlparse.urljoin(response.url, link), > meta={'item':item},callback=self.parse_listing_page) > > #get next button link > next_page = None > if hxs.select('//div[@class="paggingNext"]/@href').extract(): > next_page = > hxs.select('//div[@class="paggingNext"]/@href').extract() > if next_page: > yield Request(urlparse.urljoin(response.url, next_page), > self.parse) > > #scrap listing page to get content > def parse_listing_page(self,response): > hxs = HtmlXPathSelector(response) > item = response.request.meta['item'] > item ['link'] = response.url > item['title'] = > hxs.select("//h1[@id='share_jobtitle']/text()").extract() > item['city'] = > hxs.select("//html/body/div[3]/div[3]/div[2]/div[1]/div[3]/ul/li[1]/div[2]/text()").extract() > item['salary'] = > hxs.select("//html/body/div[3]/div[3]/div[2]/div[1]/div[3]/ul/li[3]/div[2]/text()").extract() > item['content'] = hxs.select("//div[@class='detailTxt > deneL']/text()").extract() > > yield item > > -- > You received this message because you are subscribed to the Google Groups > "scrapy-users" group. > To unsubscribe from this group and stop receiving emails from it, send an > email to [email protected]. > To post to this group, send email to [email protected]. > Visit this group at http://groups.google.com/group/scrapy-users. > For more options, visit https://groups.google.com/d/optout. > -- You received this message because you are subscribed to the Google Groups "scrapy-users" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. To post to this group, send email to [email protected]. Visit this group at http://groups.google.com/group/scrapy-users. For more options, visit https://groups.google.com/d/optout.
