I'm trying to scrap a 3 level page. Every page has a datatable with a
select option to "show all". I need every new page i enter so select that
option and return the response to the same function. I've added that block
of code that you'll see but it does now work. What i'm i doing wrong ?
class BrSpider(scrapy.Spider):
name = "acre"
allowed_domains = [""]
start_urls = [
""
]
count=0
def parse(self, response):
for cidx in
response.xpath("//div[@class='main-container']//div[@class='conteudo']//div/p[not(@class)
and not(@style)][2]//a"):
title = cidx.xpath('text()').extract_first()
url = cidx.xpath('@href').extract_first()
urlx = response.urljoin(url)
yield scrapy.Request(urlx, callback=self.parse_cidade,
meta={'titulo':title})
def parse_cidade(self, response):
####thats the block that i've add#####
global count
self.count=self.count+1
if self.count == 1:
yield scrapy.FormRequest.from_response(
response,formdata={'filterable-datatable_length':'-1'},
callback=self.parse_cidade
)
if self.count == 2:
###end###
tds=response.xpath("count(//table[@id='filterable-datatable']//thead//th)").extract()
if tds[0] == "5.0":
for r in self.parse_one(response):
yield r
if tds[0] == "4.0":
for r in self.parse_two(response):
yield r
def parse_one(self,response):
for row in
response.xpath("//table[@id='filterable-datatable']//tbody//tr[position()<last()]"):
item = BrItem()
item['distrito']=row.xpath("td[1]//p//a/text()").extract()
item['tipo']=row.xpath("td[2]//p//a/text()").extract()
item['nome']=row.xpath("td[3]//p//a/text()").extract()
item['trecho']=row.xpath("td[4]//p//a/text()").extract()
item['cep']=row.xpath("td[5]//p//a/text()").extract()
yield item
def parse_two(self,response):
for row in
response.xpath("//table[@id='filterable-datatable']//tbody//tr"):
localidade=row.xpath("td[1]//p//a/text()").extract()
tipolocal=row.xpath("td[2]//p//a/text()").extract()
bairro=row.xpath("td[3]//p//a/text()").extract()
urly=row.xpath("td[4]//p//a/@href").extract()[0]
urlz=response.urljoin(urly)
yield scrapy.Request(urlz,callback=self.parse_two_two,
meta={'localidade':localidade,'tipolocal':tipolocal,'bairro':bairro})
def parse_two_two(self,response):
for row in
response.xpath("//table[@id='filterable-datatable']//tbody//tr"):
item = BrItem()
item['localidade']=response.meta['localidade']
item['tipolocalidade']=response.meta['tipolocal']
item['bairro']=response.meta['bairro']
item['tipo']=row.xpath("td[1]//p//a/text()").extract()
item['nome']=row.xpath("td[2]//p//a/text()").extract()
item['trecho']=row.xpath("td[3]//p//a/text()").extract()
item['cep']=row.xpath("td[4]//p//a/text()").extract()
yield item
also on:
http://stackoverflow.com/questions/36520795/select-a-dropdown-option-and-retrieve-the-response-to-the-same-function-with-scr
--
You received this message because you are subscribed to the Google Groups
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To post to this group, send email to [email protected].
Visit this group at https://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/d/optout.