/* find mattress words: words consisting of two halves in which the first
 * part can be transformed by a one-to-one mapping of letters to become
 * the second half.  e.g. m -> r, a -> e, t -> s. 
 *
 * filters for "interesting" words, where there is at least one
 * repeated letter in each half of the word.
 *
 * Some of the more interesting ones: Zeffirelli, Weisenheimer, needlessly,
 * Shakespearizes, donnybrook, Appomattox, anastomose.  It finds 720 words
 * in a 173528-word word list I have.  */

#include <stdio.h>
#include <string.h>
#include <ctype.h>

#define wordsiz 256
char word[wordsiz], modword[wordsiz];

/* Canonicalizes in such a way that cryptograms get the same string.
 * Returns whether the canonicalization was "interesting" ---
 * i.e. repeated characters were found. */
int canonicalize(char *input, char *output, int len) {
  int ii, interesting = 0, distinct = 0;
  char corresp[256] = { 0 };
  for (ii = 0; ii != len; ii++) {
    int c = tolower(input[ii]);
    if (!corresp[c]) corresp[c] = ++distinct;
    else interesting = 1;
    output[ii] = corresp[c];
  }
  return interesting;
}

int main() {
  int len;
  while (fgets(word, wordsiz, stdin)) {
    if (!strchr(word, '\n')) {
      fprintf(stderr, "Word too long: %s...\n", word);
      return 1;
    }
    len = strlen(word) - 1;  /* -1 to remove \n */
    if (len % 2) continue;  /* can't split word in half */
    len /= 2;
    /* remove this conditional to print uninteresting words like 'adherent'
     * too. */
    if (!canonicalize(word, modword, len)) continue;
    canonicalize(word + len, modword + len, len);
    if (!memcmp(modword, modword + len, len)) fputs(word, stdout);
  }
  return 0;
}

-- 
<[EMAIL PROTECTED]>       Kragen Sitaker     <http://www.pobox.com/~kragen/>
The Internet stock bubble didn't burst on 1999-11-08.  Hurrah!
<URL:http://www.pobox.com/~kragen/bubble.html>
The power didn't go out on 2000-01-01 either.  :)


Reply via email to