Did Steve's question about order ever get answered?
I think he wanted something like this?

fmt -1 file_name | sort | uniq -c | sort -dk2 | sort -srnk1

Cheers,
Alan Isaac

PS Here's another Python implementation, which adds a couple
features: minimum frequency and minimum size requirements.
(Also word counts.) Public domain.

import sys,string

chars2strip = string.punctuation

word_hash = dict()
CT_ALLWORDS = 0
CT_WORDS = 0
WORDSIZE_MIN = 3
FREQ_MIN = 2
for line in sys.stdin:
    line.strip()
    for word in line.split()
        word = word.strip(chars2strip)
        if word:
            CT_ALLWORDS += 1
        if len(word) >= WORDSIZE_MIN:
            CT_WORDS += 1
            word_hash[word] = word_hash.get(word,0) + 1

print "================================================="
print "=============== WORD COUNT ======================"
print "================================================="

print "Total number of words: %d"%(CT_ALLWORDS)
print "Total number of words (len >= %d): %d"%(WORDSIZE_MIN, CT_WORDS)

print "================================================="
print "=============== ALPHA ORDER ====================="
print "================================================="

for key in sorted(word_hash):
    if word_hash[key] >= FREQ_MIN:
        print "%24s %6d"%(key, word_hash[key])

print "================================================="
print "============ OCCURRENCE ORDER ==================="
print "================================================="

for word, freq in sorted(word_hash.iteritems(), cmp=lambda a,b: 
cmp((-a[1],a[0].lower()),(-b[1],b[0].lower()))):
    if freq >= FREQ_MIN:
        print "%7d   %s"%(freq,word)



Reply via email to