Problem with getting authentication to work

Tobias Gårdner Thu, 15 Jan 2015 07:46:46 -0800

Hi, I am new to scrapy and trying to learn. I have used my friend google a 
lot but I am currently stuck. The pages I need to scrape requires 
authentication to get hold of price information.
I would appreciate all help I can get on this problem.
Best regards, Tobias Gårdner


The login page can be found here:
http://www.vortexparts.eu/account/login

The output I get looks like this:
>>>>> start_request <<<<<
2015-01-15 16:35:14+0100 [vortex3] DEBUG: Crawled (200) <GET 
http://www.vortexparts.eu/account/login> (referer: None)
>>>>> login_parse: http://www.vortexparts.eu/account/login <<<<<
>>>>> login token: [u'-pJRTcvZyxvZhljpVTVuMSqJo-klwQHfhDAElxBaGb4'] <<<<<
2015-01-15 16:35:14+0100 [vortex3] DEBUG: Redirecting (302) to <GET 
http://www.vortexparts.eu/account/login> from <POST 
http://www.vortexparts.eu/changeUserOption>

And my spider and items code can be found below.

# File: vortex_spider_public.py
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors import LinkExtractor
from scrapy.selector import Selector
from scrapy.http import FormRequest, Request
from vortex.items import VortexCategoryItem, VortexProductItem

class vortexSpiderPublic(CrawlSpider):
    name = "vortex3"
    allowed_domains = ["vortexparts.eu"]
    login_url = "http://www.vortexparts.eu/account/login";
    start_urls = 
["http://www.vortexparts.eu/category/above-ground-pool-parts";]
    rules = (
        Rule(LinkExtractor(allow=(r"vortexparts.eu/account/login")), 
callback='login_parse', follow=True),
        Rule(LinkExtractor(allow=(r"product/"), deny=('/login',)), 
callback="parse_items", follow= True), )
    
    def start_requests(self):
        print ">>>>> start_request <<<<<"
        yield Request(self.login_url, callback=self.login_parse)

    def login_parse(self, response):
        print ">>>>> login_parse: %s <<<<<" % response.url
        print ">>>>> login token: %s <<<<<" % 
response.xpath('//input[@id="login__token"]/@value').extract()
        return FormRequest.from_response(response,
            formdata={'login[_username]': "[email protected]", \
                'login[_password]': "xxx", \
                'login[_token]': 
response.xpath('//input[@id="login__token"]/@value').extract()}, \
                callback=self.after_login)
                    
    def after_login(self, response):
        print ">>>>> after_login: %s <<<<<" % response.url
        if "Bad credentials" in response.body:
            print ">>>>> login failed! <<<<<"
            return
        else:
            if "Tobias" in response.body:
                print ">>>>> login succeeded! Found first name Tobias in 
the body! <<<<<"
            else:
                print ">>>>> login succeeded! BUT did NOT find first name 
Tobias  in the body! <<<<<"
                
            return [Request(url=u) for u in self.start_urls]

    def parse_items(self, response):
        productItem = VortexProductItem()
        productItem ["url"] = response.url
        productItem ["categories"] = 
response.xpath('//ul[@id="breadcrumbs"]/li/a/@href').extract()
        return(productItem)
Ange koden här...

# file: items.py
import scrapy

class VortexProductItem(scrapy.Item):
       url = scrapy.Field()
    categories = scrapy.Field()
    pass






-- 
You received this message because you are subscribed to the Google Groups 
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at http://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/d/optout.

Problem with getting authentication to work

Reply via email to