I wrote a small script to compute the H-Index of an author. It is modeled after activestate's google search: http://code.activestate.com/recipes/523047/
Example use: hindex i daubechies Result: 49 The script: #!/usr/bin/python import httplib, urllib, re, sys from BeautifulSoup import BeautifulSoup terms = sys.argv[1:] limit = 100 params = urllib.urlencode( { 'q': "+".join( terms ), 'num': limit } ) headers = {'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'} url = '/scholar'+"?"+params conn = httplib.HTTPConnection( 'scholar.google.com' ) conn.request( "GET", url, {}, headers ) resp = conn.getresponse() cites = [] if resp.status == 200: html = resp.read() html = html.decode( 'ascii', 'ignore' ) soup = BeautifulSoup( html ) for record in soup( 'p', { 'class': 'g' } ): match = re.search("Cited by ([^<]*)", str(record)) if match != None: cite = int( match.group( 1 ) ) cites.append( cite ) else: print 'Error: ' print resp.status, resp.reason cites.sort() cites.reverse() h = 0 for cite in cites: if cite > h: h += 1 print h -- http://mail.python.org/mailman/listinfo/python-list