Here's a method for inputting text with four keys: the digits 1, 2, 3, and the backspace key. It uses experimental probabilities of word occurrence (stored in the file ".wordlist") to speed up input. By default, it generates some default SWAG probabilities from the lengths of the words in /usr/share/dict/words; it worked a lot better for me when I merged data into the .wordlist file from much of my recent email, by the following means.
cat priv/mail/outmail[efgh]* | tr -c 'a-zA-Z' '^V^J' | sort | uniq -c | awk '{print $2, $1}' >x cat .wordlist x | sort -f > .wrodlist.new mv .wrodlist.new .wordlist Someone somewhere claimed that three is, in some sense, the optimal number of alternatives for things like this; the time to navigate to your goal is jointly proportional to the number of points at which you must make a choice and the number of alternatives you must examine at each point; the number of points at which you must make a choice is roughly the logarithm of the total number of choices (e.g. the number of words in English) to the base of the number of choices at each point. Taking N as the total number of choices and n as the number at each point, the number of times you need to make a choice is ln(N)/ln(n), and the total time to pick one of the N items is (n ln(N)/ln(n)). I can't differentiate that at 12:30 AM, unfortunately, but I'm told it has a minimum at n=e, and is smaller at n=3 than where n is any other whole number. That's why I picked three alternatives. Even with the probabilities from my old email, I can't input text nearly as fast with this method as I can with, say, Tegic T9. I tried a certain short phrase; keyboarding it with QWERTY, it took me about 3 seconds; with T9, I entered it in 25 seconds; with the traditional keypad entry "press 2 once for A, twice for B, three times for C", it took 37 seconds; with this 4-key method, it took me 148 seconds with the probabilities guessed from /usr/share/dict/words and 116 seconds with word probabilities from my email. Conditional probabilities based on previous words might help some. #!/usr/bin/python import os, sys, random, bisect, math def chomp(line): if line.endswith('\n'): return chomp(line[:-1]) return line def make_initial_wordlist(filename): # what initial distribution should we assume? Presumably English # is pretty well huffman-coded, so a word's frequency should # decrease exponentially with its length. I don't know the base # of the exponent, but some random web page asserts that English # has about 2.62 bits per letter, so I guesstimate that a word's # frequency should be proportional to 2^(-2.62 * len), or # (2^-2.62)^len, or e^((ln 2) * -2.62 * len). infile = open('/usr/share/dict/words') filename_new = "%s.new" % filename outfile = open(filename_new, 'w') freqfudgefactor = math.log(2) * -2.62 for line in infile.xreadlines(): word = chomp(line) freqguess = math.exp(freqfudgefactor * len(word)) * 200 outfile.write('%s %.10f\n' % (word, freqguess)) outfile.close() os.rename(filename_new, filename) # only works in Unix! def read_wordlist(): class wordlist: def __init__(self, filename): self.filename = filename infile = open(filename) lastword = None self.words = [] self.probs = [] self.cumprobs = [0] for line in infile.xreadlines(): word, prob = line.split() prob = float(prob) if word != lastword: self.words.append(word) self.probs.append(prob) self.cumprobs.append(self.cumprobs[-1] + prob) else: # dup self.probs[-1] += prob self.cumprobs[-1] += prob lastword = word del self.cumprobs[0:1] infile.close() def start(self): return 0 def end(self): return len(self.words) def get(self, ii): self.probs[ii] += 1 return self.words[ii] def write(self): filename_new = '%s.new' % self.filename outfile = open(filename_new, 'w') for ii in xrange(self.end()): outfile.write('%s %.10f\n' % (self.words[ii], self.probs[ii])) outfile.close() os.rename(filename_new, self.filename) def divide(self, start, end): startprob = self.cumprobs[start] endprob = self.cumprobs[end] first = bisect.bisect_left(self.cumprobs, (startprob + startprob + endprob)/3.0, start, end) second = bisect.bisect_left(self.cumprobs, (startprob + endprob + endprob)/3.0, start, end) # this is a dumb kludge: if second == end and end - start > 1: second -= 1 if first >= second: first = second if first == second and second - start > 1: first -= 1 return first, second filename = '.wordlist' if not os.path.exists(filename): print "initializing wordlist..." make_initial_wordlist(filename) return wordlist(filename) class cursor: def __init__(self, wordlist): self.stack = [(wordlist.start(), wordlist.end()-1)] self.wordlist = wordlist def _opts(self): start, end = self.stack[-1] first, second = self.wordlist.divide(start, end) return [start, first, second, end] def word(self, ii): return self.wordlist.words[ii] def options(self): return '[%s 1 %s 2 %s 3 %s]' % tuple(map(self.word, self._opts())) def shrink(self, whichpart): opts = self._opts() self.stack.append((opts[whichpart-1], opts[whichpart])) def expand(self): if len(self.stack) > 1: self.stack.pop() def done(self): opts = self._opts() return opts[0] == opts[-1] def get(self): return self.wordlist.get(self.stack[-1][0]) def dump(self): opts = self._opts() return '%s %s %s' % (opts, map(self.word, opts), [self.wordlist.cumprobs[ii] for ii in opts]) def main(): print "reading wordlist..." wordlist = read_wordlist() os.system('stty cbreak -echo') backspace, delete = '\b', '\177' try: while 1: c = cursor(wordlist) msg = '' while not c.done(): msg += c.options() sys.stdout.write(msg) char = sys.stdin.read(1) sys.stdout.write((backspace + ' ' + backspace) * len(msg)) msg = '' if char in '123': c.shrink(int(char)) elif char in [backspace, delete]: c.expand() elif char == '?': msg = c.dump() + ' ' else: msg = "unknown char '" + char + "' " sys.stdout.write(c.get() + ' ') finally: sys.stdout.write("exiting...") sys.stdout.flush() os.system('stty -cbreak echo') wordlist.write() if __name__ == '__main__': main() -- <[EMAIL PROTECTED]> Kragen Sitaker <http://www.pobox.com/~kragen/> Edsger Wybe Dijkstra died in August of 2002. The world has lost a great man. See http://advogato.org/person/raph/diary.html?start=252 and http://www.kode-fu.com/geek/2002_08_04_archive.shtml for details.