Hello guys, I am a newer to scrapy, and my script can not get more requested url. Can anyone help me ? thanks.
My code is shown as follows: My test2.py file: > from scrapy.contrib.spiders import CrawlSpider, Rule > from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor > from try_yhd.items import TryYhdItem > from scrapy.selector import Selector > > class MySpider(CrawlSpider): > name = "yhdspider" > allowed_domains = ["yhd.com"] > start_urls = ["http://item.yhd.com/item/30838751",] > rules = [Rule(SgmlLinkExtractor(allow=['^http://item.yhd.com/item/\d+' > ,]), > callback="parse_items",follow = True), > ] > > def parse_items(self,response): > print "Hello this is the url %s" % response.url > hxs = Selector(response) > # items = [] > # find the price and product id. > item = TryYhdItem() > item['url'] = response.url > item['price']= hxs.xpath("//span[@id='current_price']").extract() > item['productId']= hxs.xpath("//p[@class='product_id']/text()"). > extract() > item['title'] = hxs.xpath("//h1[@id = 'productMainName']").extract > () > yield item > > > my middlewares.py file: > SPIDER_MIDDLEWARES = { 'try_yhd.middlewares.CustomSpiderMiddleware': 543, #'scrapy.contrib.spidermiddleware.offsite.OffsiteMiddleware': None, } my items.py file # -*- coding: utf-8 -*- # Define here the models for your scraped items # # See documentation in: # http://doc.scrapy.org/en/latest/topics/items.html from scrapy.item import Item, Field import scrapy class TryYhdItem(Item): # define the fields for your item here like: price = Field() productId = Field() url = Field() title = Field() -- You received this message because you are subscribed to the Google Groups "scrapy-users" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. To post to this group, send email to [email protected]. Visit this group at http://groups.google.com/group/scrapy-users. For more options, visit https://groups.google.com/d/optout.
