You're making a new countDict for each line read from the file... is
that what you meant to do?  Or are you trying to count word occurrences
across the whole file?

--

In general, any time string manipulation is going slowly, ask yourself,
"Can I use the re module for this?"

# disclaimer: untested code.  probably contains typos

import re
word_finder = re.compile('[a-z0-9_]+', re.I)

def count_words (string, word_finder = word_finder):  # avoid global
lookups
  countDict = {}
  for match in word_finder.finditer(string):
    word = match.group(0)
    countDict[word] = countDict.get(word,0) + 1
  return countDict

f = open(filename)
for i, line in enumerate(f.xreadlines()):
  countDict = count_words(line)
  print "Line %s" % i
  for word in sorted(countDict.keys()):
    print "  %s %s" % (word, countDict[word])

f.close()

-- 
http://mail.python.org/mailman/listinfo/python-list

Reply via email to