select a dropdown option and retrieve the response to the same function with scrapy

ajrpc Sat, 09 Apr 2016 15:36:48 -0700

I'm trying to scrap a 3 level page. Every page has a datatable with a 
select option to "show all". I need every new page i enter so select that 
option and return the response to the same function. I've added that block 
of code that you'll see but it does now work. What i'm i doing wrong ?


class BrSpider(scrapy.Spider):
       name = "acre"
       allowed_domains = [""]
       start_urls = [
           ""
       ]
       count=0


   def parse(self, response):

       for cidx in 
response.xpath("//div[@class='main-container']//div[@class='conteudo']//div/p[not(@class)
 and not(@style)][2]//a"):
           title = cidx.xpath('text()').extract_first()
           url = cidx.xpath('@href').extract_first()
           urlx = response.urljoin(url)
           yield scrapy.Request(urlx, callback=self.parse_cidade, 
meta={'titulo':title})

   def parse_cidade(self, response):
       ####thats the block that i've add#####
       global count
       self.count=self.count+1
       if self.count == 1:

           yield scrapy.FormRequest.from_response(
               response,formdata={'filterable-datatable_length':'-1'},
               callback=self.parse_cidade
           )
       if self.count == 2:
           ###end###
           
tds=response.xpath("count(//table[@id='filterable-datatable']//thead//th)").extract()

           if tds[0] == "5.0":
               for r in self.parse_one(response):
                   yield r

           if tds[0] == "4.0":
               for r in self.parse_two(response):
                   yield r


   def parse_one(self,response):

       for row in 
response.xpath("//table[@id='filterable-datatable']//tbody//tr[position()<last()]"):
           item = BrItem()

           item['distrito']=row.xpath("td[1]//p//a/text()").extract()
           item['tipo']=row.xpath("td[2]//p//a/text()").extract()
           item['nome']=row.xpath("td[3]//p//a/text()").extract()
           item['trecho']=row.xpath("td[4]//p//a/text()").extract()
           item['cep']=row.xpath("td[5]//p//a/text()").extract()
           yield item

   def parse_two(self,response):

       for row in 
response.xpath("//table[@id='filterable-datatable']//tbody//tr"):

           localidade=row.xpath("td[1]//p//a/text()").extract()

           tipolocal=row.xpath("td[2]//p//a/text()").extract()

           bairro=row.xpath("td[3]//p//a/text()").extract()
           urly=row.xpath("td[4]//p//a/@href").extract()[0]
           urlz=response.urljoin(urly)
           yield scrapy.Request(urlz,callback=self.parse_two_two, 
meta={'localidade':localidade,'tipolocal':tipolocal,'bairro':bairro})


   def parse_two_two(self,response):
       for row in 
response.xpath("//table[@id='filterable-datatable']//tbody//tr"):
           item = BrItem()
           item['localidade']=response.meta['localidade']
           item['tipolocalidade']=response.meta['tipolocal']
           item['bairro']=response.meta['bairro']
           item['tipo']=row.xpath("td[1]//p//a/text()").extract()
           item['nome']=row.xpath("td[2]//p//a/text()").extract()
           item['trecho']=row.xpath("td[3]//p//a/text()").extract()
           item['cep']=row.xpath("td[4]//p//a/text()").extract()
           yield item

also on: 
http://stackoverflow.com/questions/36520795/select-a-dropdown-option-and-retrieve-the-response-to-the-same-function-with-scr

-- 
You received this message because you are subscribed to the Google Groups 
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at https://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/d/optout.

select a dropdown option and retrieve the response to the same function with scrapy

Reply via email to