Hi, I am new here in scrapy. I just want to know how to call a function and pass the two or three value in return. I have a spider code please let me know how to solve it.
Step: 1. i want to scrap all page links with pagination and and stand number. 2. hit all the links and want to extract website url 3. Total value should b 3 means titlename, standnumber and website url. my spider code is import re import sys import unicodedata from string import join from scrapy.contrib.spiders import CrawlSpider, Rule from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor from scrapy.selector import HtmlXPathSelector from scrapy.http import Request from pagitest.items import PagitestItem from urlparse import urlparse from urlparse import urljoin class InfojobsSpider(CrawlSpider): USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; rv:29.0) Gecko/20100101 Firefox/29.0" name = "info" allowed_domains = ["infosec.co.uk"] start_urls = [ "http://www.infosec.co.uk/exhibitor-directory/" ] rules = ( Rule(SgmlLinkExtractor(allow=(r'exhibitor\W+directory'),restrict_xpaths=('//li[@class="gButton"]/a')), callback='parse_item', follow=True), ) def parse_item(self, response): items=[] hxs = HtmlXPathSelector(response) data = hxs.select('//div[@class="listItemDetail exhibitorDetail"]') for titlename in data: titleurl=titlename.select('h3[@class="name"]/a/@href').extract() for titleurls in titleurl: preg=re.match('^http',titleurls) if preg: titleurls=titleurls else: titleurls="http://www.infosec.co.uk"+titleurls yield Request(url=titleurls,callback=self.getwebsitename) def getwebsitename(self,response): hxs= HtmlXPathSelector(response) websites= hxs.select('//li[@class="web"]/a/@href').extract() for websitename in websites: return websites -- You received this message because you are subscribed to the Google Groups "scrapy-users" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. To post to this group, send email to [email protected]. Visit this group at http://groups.google.com/group/scrapy-users. For more options, visit https://groups.google.com/d/optout.
