(duck)

542 comp.lang.python rtfm

467 comp.lang.python shut+up

263 comp.lang.perl rtfm

45 comp.lang.perl shut+up




Code:

import urllib2
import re
import time

def fillurlfmt(args):
    urlfmt, ggroup, gkw = args
    return {'group':ggroup, 'keyword':gkw, 'url': urlfmt % (gkw, ggroup)}

def consqurls(args):
    ggroup, gkeywords = args
urlfmt = 'http://groups.google.com/groups/search?as_q=%s&as_epq=&as_oq=&as_eq=&num=10&scoring=&lr=&as_sitesearch=&as_drrb=q&as_qdr=&as_mind=1&as_minm=1&as_miny=1999&as_maxd=1&as_maxm=1&as_maxy=2009&as_ugroup=%s&as_usubject=&as_uauthors=&safe=off'
    qurls = map(fillurlfmt, [ (urlfmt, ggroup, gkw) for gkw in gkeywords ])
    return qurls

def flatten_list(x):
    res = []
    for el in x:
        if isinstance(el,list):
            res.extend(flatten_list(el))
        else:
            res.append(el)
    return res

def ggsearch(urldict):
    opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.20) Gecko/20081217 (CK-IBM) Firefox/2.0.0.20')]
    time.sleep(0.1)
    urlf = opener.open(urldict['url'])
    resdict = {'result': urlf.read()}
    resdict.update(urldict)
    urlf.close()
    return resdict

def extrclosure(resregexp, groupno):
    def extrres(resdict):
        txtgr = resregexp.search(resdict['result'])
        resdict['result']=txtgr.group(groupno)
        return resdict
    return extrres

def delcomma(x):
    x['result'] = x['result'].replace(',','')
    return x

if __name__ == "__main__":
    gkeywords = ['rtfm', 'shut+up']
    ggroups = ['comp.lang.python', 'comp.lang.perl']
    params = [(ggroup, gkeywords) for ggroup in ggroups]
    qurls = map(consqurls, params)
    qurls = flatten_list(qurls)
    gresults = map(ggsearch, qurls)
resre = re.compile('Results \<b\>1\</b\> - \<b\>.+?\</b\> of about \<b\>(.+?)\</b\>')
    gextrsearchresult = extrclosure(resre,1)
    gresults = map(gextrsearchresult, gresults)
    gresults = map(delcomma, gresults)
    for el in gresults:
        print el['result'], el['group'], el['keyword']
        print


This was inspired by http://mail.python.org/pipermail/python-list/2002-November/172466.html

Regards,
mk

--
http://mail.python.org/mailman/listinfo/python-list

Reply via email to