I wrote this function which does the following: after readling lines from file.It splits and finds the word occurences through a hash table...for some reason this is quite slow..can some one help me make it faster... f = open(filename) lines = f.readlines() def create_words(lines): cnt = 0 spl_set = '[",;<>{}_&?!():-[\.=+*\t\n\r]+' for content in lines: words=content.split() countDict={} wordlist = [] for w in words: w=string.lower(w) if w[-1] in spl_set: w = w[:-1] if w != '': if countDict.has_key(w): countDict[w]=countDict[w]+1 else: countDict[w]=1 wordlist = countDict.keys() wordlist.sort() cnt += 1 if countDict != {}: for word in wordlist: print (word+' '+ str(countDict[word])+'\n')
-- http://mail.python.org/mailman/listinfo/python-list