import sys
def moreCommonWord( x, y ): if x[1] != y[1]: return cmp( x[1], y[1] ) * -1 return cmp( x[0], y[0] )
If you want to keep this, use:
def moreCommonWord(x, y): if x[1] != y[1]: return cmp(y[1], x[1]) return cmp(x[0], y[0])
... I don't like type-based names (Charles Simonyi never convinced me), so: > wordsDic = {} corpus = {}
...
for word in inFile.read().split(): if wordsDic.has_key( word ): wordsDic[word] = wordsDic[word] + 1 else: wordsDic[word] = 1 inFile.close()
How about: for line in inFile: for word in line.split(): try: corpus[word] += 1 except KeyError: corpus[word] = 1
...
wordsLst = wordsDic.items() wordsLst.sort( moreCommonWord )
OK, here I'm going to get version specific. For Python 2.4 and later: words = sorted((-freq, word) for word, freq in corpus.iteritems()) For at least Python 2.2: words = [(-freq, word) for word, freq in corpus.iteritems()] words.sort() For before Python 2.2: words = corpus.items() words.sort(moreCommonWord)
for pair in wordsLst: outFile.write( str( pair[1] ).rjust( 7 ) + " : " + str( pair[0] ) + "\n" ) outFile.close()
Before python 2.2 (because we use different data for words): for word, frequency in words: print >>outFile, '%7d : %s' % (frequency, word)
After python 2.2: for negfrequency, word in words: print >>outFile, '%7d : %s' % (-negfrequency, word)
So, with all my prejudices in place and python 2.4 on my box, I'd lift a few things to functions:
def refcount(corpus, infile): '''Update corpus counters in corpus from words in infile''' for line in infile: for word in line.split(): try: corpus[word] += 1 except KeyError: corpus[word] = 1
def main(sources, output=None): '''Count words in sources and report frequencies to output''' corpus = {} for source in sources: f = open(source) refcount(corpus, f) f.close() for negfrequency, word in sorted((-frequency, word) for word, frequency in corpus.iteritems()): print >>output, '%7d : %s' % (-negfrequency, word)
if __name__ == '__main__': import sys
if len(sys.argv) < 2: main(sys.argv[1 :]) else: output = open(sys.argv[-1], 'w') try: main(sys.argv[1 : -1], output) finally: output.close()
--Scott David Daniels [EMAIL PROTECTED] -- http://mail.python.org/mailman/listinfo/python-list