Am 21.11.2010 11:01, schrieb Petar Milin:

On 20/11/10 22:34, Chris Rebert wrote:
On Thu, Nov 18, 2010 at 3:26 AM, neocortex<pmi...@gmail.com>  wrote:
The library doesn't seem to have built-in support for filtering by
language (and Google lacks a search query-string-based operator for
that), but it looks like you could implement that feature by adding an
"lr" parameter with an appropriate value to the query `args`
dictionary. See the "lr?" entry under "Web Search Specific Arguments"
onhttp://code.google.com/apis/websearch/docs/reference.html, and
lines 68&  102 of pygoogle.py.
 From those lines, it can be concluded that lr=lang_?? is not
supported, unfortunately.
Right; that's why I said "you could implement that feature". Pretty
easily it would seem.
Thanks for believing in me ( ;-) ), but I am a newbie in Python world, although with some experience in other prog. languages. So, if I read pygoogle.py well, I sould add lr parameter in init and then after lines 68 and 102?
Correct.
I just did not read source code enough to realize the parameter is needed
twice. You just could hack that out, too :-)
Normally (what is normal, anyway ?) it's good practice to create an
contructor (__init__) in python which has the most common options
as keyword arguments and default values assigned, like so:

class Foo:
    def __init__(self,lr='lang_en',url='http://some.of.this'):
        self.lr = lr
        self.url = url
    ...

Thanks again! You guys here are very kind and helpful!
Best,
Petar


#!/usr/bin/python
"""
Google AJAX Search Module
http://code.google.com/apis/ajaxsearch/documentation/reference.html
"""
try:
    import simplejson as json
except:
    import json
import urllib

__author__ = "Kiran Bandla"
__version__ = "0.1"
URL = 'http://ajax.googleapis.com/ajax/services/search/web?'

#Web Search Specific Arguments
#http://code.google.com/apis/ajaxsearch/documentation/reference.html#_fonje_web
#SAFE,FILTER
"""
SAFE
This optional argument supplies the search safety level which may be one of:
    * safe=active - enables the highest level of safe search filtering
    * safe=moderate - enables moderate safe search filtering (default)
    * safe=off - disables safe search filtering
"""
SAFE_ACTIVE     = "active"
SAFE_MODERATE   = "moderate"
SAFE_OFF        = "off"

"""
FILTER
This optional argument controls turning on or off the duplicate content filter:

    * filter=0 - Turns off the duplicate content filter
    * filter=1 - Turns on the duplicate content filter (default)

"""
FILTER_OFF  = 0
FILTER_ON   = 1

#Standard URL Arguments
#http://code.google.com/apis/ajaxsearch/documentation/reference.html#_fonje_args
"""
RSZ
This optional argument supplies the number of results that the application 
would like to recieve. 
A value of small indicates a small result set size or 4 results. 
A value of large indicates a large result set or 8 results. If this argument is 
not supplied, a value of small is assumed. 
"""
RSZ_SMALL = "small"
RSZ_LARGE = "large"


class pygoogle:
    
    def __init__(self,query,pages=10):
        self.pages = pages          #Number of pages. default 10
        self.query = query
        self.filter = FILTER_ON     #Controls turning on or off the duplicate 
content filter. On = 1.
        self.rsz = RSZ_LARGE        #Results per page. small = 4 /large = 8
        self.safe = SAFE_OFF        #SafeBrowsing -  active/moderate/off
        self.lr = 'lang_en'         # Default searches to english
        
    def __search__(self,print_results = False):
        results = []
        for page in range(0,self.pages):
            rsz = 8
            if self.rsz == RSZ_SMALL:
                rsz = 4
            args = {'q' : self.query,
                    'v' : '1.0',
                    'start' : page*rsz,
                    'rsz': self.rsz,
                    'safe' : self.safe, 
                    'filter' : self.filter,    
                    'lr': self.lr
                    }
            q = urllib.urlencode(args)
            search_results = urllib.urlopen(URL+q)
            data = json.loads(search_results.read())
            if print_results:
                if data['responseStatus'] == 200:
                    for result in  data['responseData']['results']:
                        if result:
                            print 
'[%s]'%(urllib.unquote(result['titleNoFormatting']))
                            print 
result['content'].strip("<b>...</b>").replace("<b>",'').replace("</b>",'').replace("&#39;","'").strip()
                            print urllib.unquote(result['unescapedUrl'])+'\n'   
             
            results.append(data)
        return results
    
    def search(self):
        """Returns a dict of Title/URLs"""
        results = {}
        for data in self.__search__():
            for result in  data['responseData']['results']:
                if result:
                    title = urllib.unquote(result['titleNoFormatting'])
                    results[title] = urllib.unquote(result['unescapedUrl'])
        return results

    def search_page_wise(self):
        """Returns a dict of page-wise urls"""
        results = {}
        for page in range(0,self.pages):
            args = {'q' : self.query,
                    'v' : '1.0',
                    'start' : page,
                    'rsz': RSZ_LARGE,
                    'safe' : SAFE_OFF, 
                    'filter' : FILTER_ON,    
                    'lr': self.lr
                    }
            q = urllib.urlencode(args)
            search_results = urllib.urlopen(URL+q)
            data = json.loads(search_results.read())
            urls = []
            for result in  data['responseData']['results']:
                if result:
                    url = urllib.unquote(result['unescapedUrl'])
                    urls.append(url)            
            results[page] = urls
        return results
        
    def get_urls(self):
        """Returns list of result URLs"""
        results = []
        for data in self.__search__():
            for result in  data['responseData']['results']:
                if result:
                    results.append(urllib.unquote(result['unescapedUrl']))
        return results

    def get_result_count(self):
        """Returns the number of results"""
        temp = self.pages
        self.pages = 1
        result_count = 0
        try:
            result_count = 
self.__search__()[0]['responseData']['cursor']['estimatedResultCount']
        except Exception,e:
            print e
        finally:
            self.pages = temp
        return result_count
        
    def display_results(self):
        """Prints results (for command line)"""
        self.__search__(True)

    
if __name__ == "__main__":
    import sys
    query = ' '.join(sys.argv[1:])
    #print pygoogle(' '.join(sys.argv[1:])).display_results()
    g = pygoogle(query)
    print '*Found %s results*'%(g.get_result_count())
    g.pages = 1
    g.display_results()

<<attachment: stefan_sonnenberg.vcf>>

-- 
http://mail.python.org/mailman/listinfo/python-list

Reply via email to