hello, i have this code:

#!/usr/local/bin/python
# -*- coding: utf-8 -*-

import re
import urllib2
import BeautifulSoup

origin_site = 
'http://DOMAIN.TLD/index.php?id=annuaire_assos&theme=0&rech=&num_page='

pages = range(1,3)

for page_no in pages:
        print '====== %s' % page_no
        req = ('%s%s' % (origin_site, page_no))
        user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
        headers = { 'User-Agent' : user_agent }
        items = []
        try:
                urllib2.urlopen(req)
        except urllib2.URLError, e:
                pass
        else:
                # do something with the page
                doc = urllib2.urlopen(req)
                soup = BeautifulSoup.BeautifulSoup(doc)
                infoblock = soup.findAll('tr', { "class" : "menu2" })
                for item in infoblock:
                        soup = BeautifulSoup.BeautifulSoup(str(item))
                        for tag in soup.recursiveChildGenerator():
                                if isinstance(tag,BeautifulSoup.Tag) and 
tag.name in ('td'):
                                        if tag.string is not None:
                                                assoc_name = (tag.string)
                                if isinstance(tag,BeautifulSoup.Tag) and 
tag.name in ('u'):
                                        if tag.string is not None:
                                                assoc_theme = (tag.string)

                        get_onclick = soup('a')[0]['onclick'] # get the 
'onclick' attribute
                        print assoc_name, get_onclick, assoc_theme


this returns the following:

Amiral 
window.open('http://DOMAIN.TLD/extranet/associations/detail-assos.php?id=3815','','toolbar=0,menubar=0,location=0,scrollbars=1,top=80,left=400,width=500,height=400');return
false Culture

how do i extract from the get_onclick the
'http://DOMAIN.TLD/extranet/associations/detail-assos.php?id=3815'
correctly?

Any advise much appreciated.



-- 
%>>> "".join( [ {'*':'@','^':'.'}.get(c,None) or
chr(97+(ord(c)-83)%26) for c in ",adym,*)&uzq^zqf" ] )
_______________________________________________
Tutor maillist  -  Tutor@python.org
To unsubscribe or change subscription options:
http://mail.python.org/mailman/listinfo/tutor

Reply via email to