Hello
i have a data base with an url list
i would like to crawl each url for find the h1 and update the row (in data 
base) with the value of the h1
but i don't understand how i can recover in the update request the value of 
the url
I tried url[0] but it say to me that the url is not defined
thanks in advance for your help
regards


class H1searchSpider(BaseSpider):
    name = "h1search"
    
    def start_requests(self):
        self.db = MySQLdb.connect(host="localhost", user="root", passwd="",db
="crawler_engine", charset = 'utf8', use_unicode = False)
        cur = self.db.cursor()
        cur.execute("select url from urls")
        for url in cur.fetchall():
            yield Request(url[0])
            
    def parse(self, response):
        hxs = HtmlXPathSelector(response)
        item = DmozItem()
        item['h1'] = hxs.select('//h1').extract()
        cursor = self.db.cursor()
        for j in range(len(item['h1'])):
            cursor = self.db.cursor()
            sql = "update urls set h1 = '%s' where url = '%s'" % (item['h1'
][0], url[0])
            cursor.execute(sql)
            self.db.commit()
        return item



-- 
You received this message because you are subscribed to the Google Groups 
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at http://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to