Le 9 mai 09 à 22:11, Frank Niessink a écrit :
> > A quick and dirty script could do that, I guess.
>
> Uservoice has a simple uploading format:
> "title,description,nr_of_votes". I downloaded the XML export of our
> data and played around with it. Unfortunately I got stuck trying to
> distinguish between different types of tracker items. It seems the XML
> file doesn't contain the type of a tracker item. It's almost too weird
> to be true.
>
I said dirty, who needs XML ? :)
The attached script should do (but see the FIXME inside). Depends on
BeautifulSoup.
from BeautifulSoup import BeautifulSoup
import urllib2, re
def main():
url = 'https://sourceforge.net/tracker/?group_id=130831&atid=719137&status=1'
rx = re.compile('p\d')
while True:
content = urllib2.urlopen(url).read()
soup = BeautifulSoup(content)
for line in soup('tr'):
cols = line('td', attrs={'class': rx})
if cols:
link = cols[1]('a')[0]
detailsurl = link['href']
title = link.string
priority = int(cols[6].string.strip())
descriptionsoup = BeautifulSoup(urllib2.urlopen('http://sourceforge.net' + detailsurl))
descriptiondiv = descriptionsoup('div', attrs={'class': 'yui-g box'})[0]
contents = [s for s in descriptiondiv('p')[0].contents if unicode(s).find('google_ad_') == -1]
description = u''.join(map(unicode, contents)).strip().replace('\n', '<br />').replace('\r', '')
# FIXME: are HTML tags allowed ? How are actual "," escaped ? Which encoding to use ?
print '%s, %s, %d' % (title, description, priority)
next = [link for link in soup.findAll('a') if link.string and link.string.startswith('Next')]
if not next:
break
url = 'http://sourceforge.net' + next[0]['href']
if __name__ == '__main__':
main()