Hi folks,

Has anyone seen 'Googlewhack Adventure'?

http://www.davegorman.com/googlewhack.htm

I wrote a script to generate Googlewhacks - thought I'd share it with you. I'd better stop running it as I fear Google may ban my IP for making 20 searches a seconds..


Will McGugan

import random
import urllib2
import threading

WHACKER_THREADS = 20

random.seed()

wordlist = [ line.rstrip() for line in file("word.lst") ]
whacks = file( "whacks.txt", "a" )


class WhackerThread( threading.Thread ):

    excluded = "/dict .lst word.lst .txt words".split()

    def run(self):

        def check_word( word ):
            url = """http://dictionary.reference.com/search?q=%s"""; % word
            dict_page = urllib2.urlopen( url ).read()
            return "Did You Mean" not in dict_page

        def is_excluded(page):
            for word in WhackerThread.excluded:
                if word in page:
                    return True
            return False

        while( True ):
            word_a = random.choice( wordlist )
            #word_a = "haggis"
            word_b = random.choice( wordlist )
            words = word_a + " " + word_b

            google_url = 
"""http://www.google.com/search?hl=en&q=%s+%s&btnG=Google+Search"""; % ( word_a, 
word_b )

            opener = urllib2.build_opener()
            opener.addheaders = [('User-agent', 'Mozilla/5.0')]
            google_page = opener.open(google_url).read()

            if is_excluded( google_page ):
                print words + " (probably a word list)"
                continue

            if "Results <b>1</b> - <b>1</b> of <b>1</b>" in google_page:
                if not check_word( word_a ):
                    print "%s (%s is not in dicionary.com)" % (words, word_a)
                elif not check_word( word_b ):
                    print "%s (%s is not in dicionary.com)" % (words, word_b)
                else:
                    print words + " WHACK!"
                    print >> whacks, words
                    whacks.flush()
            else:
                print words + "(no whack)"

Threads= [ WhackerThread() for _ in xrange(WHACKER_THREADS) ]
for whacker_thread in Threads:
    whacker_thread.start()
-- 
http://mail.python.org/mailman/listinfo/python-list

Reply via email to