Hi,

I am new here in scrapy.
I just want to know how to call a function and pass the two or three value 
in return.
I have a spider code please let me know how to solve it.

Step:
1. i want to scrap all page links with pagination and and stand number.
2. hit all the links and want to extract website url
3. Total value should b 3 means titlename, standnumber and website url.

my spider code is

import re
import sys
import unicodedata
from string import join
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
from scrapy.selector import HtmlXPathSelector
from scrapy.http import Request
from pagitest.items import PagitestItem
from urlparse import urlparse
from urlparse import urljoin
class InfojobsSpider(CrawlSpider):
USER_AGENT = "Mozilla/5.0 (Windows NT 6.1; rv:29.0) Gecko/20100101 
Firefox/29.0"
name = "info"
allowed_domains = ["infosec.co.uk"]
start_urls = [
"http://www.infosec.co.uk/exhibitor-directory/";
]
rules = (
Rule(SgmlLinkExtractor(allow=(r'exhibitor\W+directory'),restrict_xpaths=('//li[@class="gButton"]/a')),
 
callback='parse_item', follow=True),
)
def parse_item(self, response):
items=[]
hxs = HtmlXPathSelector(response)
data = hxs.select('//div[@class="listItemDetail exhibitorDetail"]')
for titlename in data:
titleurl=titlename.select('h3[@class="name"]/a/@href').extract()
for titleurls in titleurl:
preg=re.match('^http',titleurls)
if preg:
titleurls=titleurls
else:
titleurls="http://www.infosec.co.uk"+titleurls
yield Request(url=titleurls,callback=self.getwebsitename)
 
def getwebsitename(self,response):
hxs= HtmlXPathSelector(response)
websites= hxs.select('//li[@class="web"]/a/@href').extract()
for websitename in websites:
return websites

-- 
You received this message because you are subscribed to the Google Groups 
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at http://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/d/optout.

Reply via email to