Gilles,
Thank you for the patch. I tested the updated version of 3.1.5 and
it works great! (The problems with endings in Russian disappeared.)
I'll test the program thoroughly tomorrow.
The only thing that surprised me was that words "highness", "likeness",
and "witness" were already in the English word list, so there was no need
in high/P, like/P, and wit/P forms at all. Has anybody checked the
redundancy of the list?
- Alexander
------------------------
Gilles Detillieux <[EMAIL PROTECTED]> wrote:
Subject: Re: [htdig] a bug? (reposted) - PATCH for htfuzzy 3.1.5
[...]
>Please give this a shot and let me know how it works out.
>
>--- htfuzzy/Endings.cc.orig Wed Sep 1 14:48:32 1999
>+++ htfuzzy/Endings.cc Wed Jun 13 16:47:49 2001
>@@ -20,6 +20,7 @@
> static char RCSid[] = "$Id: Endings.cc,v 1.2.2.1 1999/09/01 19:50:59 grdetil Exp $";
> #endif
>
>+#include "StringList.h"
> #include "Endings.h"
> #include "htfuzzy.h"
> #include <Configuration.h>
>@@ -72,53 +73,55 @@ Endings::getWords(char *w, List &words)
> String word = w;
> word.lowercase();
>
>- if (root2word->Get(word, data) == OK)
>+ //
>+ // Look for word's root(s). Some words may have more than one root,
>+ // so handle them all. Whether or not a word has a root, it's assumed
>+ // to be root in itself.
>+ //
>+ if (word2root->Get(word, data) == OK)
> {
>- //
>- // Found the root's permutations
>- //
>- char *token = strtok(data.get(), " ");
>- while (token)
>- {
>- if (mystrcasecmp(token, w) != 0)
>- {
>- words.Add(new String(token));
>- }
>- token = strtok(0, " ");
>- }
>+ word << ' ' << data;
> }
>- else
>+ StringList roots(word, " ");
>+ Object *root;
>+ roots.Start_Get();
>+ while ((root = roots.Get_Next()) != 0)
> {
>- if (word2root->Get(word, data) == OK)
>- {
>- //
>- // Found the root of the word. We'll add it to the list already
>- //
>- word = data;
>- words.Add(new String(word));
>- }
>- else
>+ //
>+ // Found a root. Look for new words that have this root.
>+ //
>+ word = ((String *)root)->get();
>+ if (root2word->Get(word, data) == OK)
> {
>- //
>- // The root wasn't found. This could mean that the word
>- // is already the root.
>- //
>+ word << ' ' << data;
> }
>-
>- if (root2word->Get(word, data) == OK)
>+ //
>+ // Iterate through the root's permutations
>+ //
>+ char *token = strtok(word.get(), " ");
>+ while (token)
> {
>- //
>- // Found the root's permutations
>- //
>- char *token = strtok(data.get(), " ");
>- while (token)
>+ if (mystrcasecmp(token, w) != 0)
> {
>- if (mystrcasecmp(token, w) != 0)
>+ //
>+ // This permutation isn't the original word, so we add it
>+ // to the list if it's not already there.
>+ //
>+ Object *obj;
>+ words.Start_Get();
>+ while((obj = words.Get_Next()) != 0)
>+ {
>+ if (mystrcasecmp(token, ((String *)obj)->get()) == 0)
>+ {
>+ break;
>+ }
>+ }
>+ if (obj == 0)
> {
> words.Add(new String(token));
> }
>- token = strtok(0, " ");
> }
>+ token = strtok(0, " ");
> }
> }
> }
>--- htfuzzy/EndingsDB.cc.orig Mon Mar 29 09:59:43 1999
>+++ htfuzzy/EndingsDB.cc Wed Jun 13 13:57:54 2001
>@@ -189,7 +189,14 @@ Endings::createRoot(Dictionary &rules, c
> //
> for (int i = 0; i < wordList.Count(); i++)
> {
>- w2r->Put(((String *)wordList[i])->get(), word, strlen(input));
>+ //
>+ // Append to existing record if there is one.
>+ //
>+ data = "";
>+ if (w2r->Get(((String *)wordList[i])->get(), data) == OK)
>+ data << ' ';
>+ data << word;
>+ w2r->Put(((String *)wordList[i])->get(), data);
> }
> }
>
>--- installdir/english.0.orig Tue Feb 16 23:03:57 1999
>+++ installdir/english.0 Wed Jun 13 14:40:07 2001
>@@ -41246,7 +41246,7 @@ hierology
> hierophant
> hifalutin
> higgle/DGRS
>-high/PRTY
>+high/RTY
> highball
> highbinder
> highborn
>@@ -48349,7 +48349,7 @@ lii
> likability
> likable/P
> likasi
>-like/DGJPRSTY
>+like/DGJRSTY
> likeable
> likelihood/SU
> likely/PRT
>@@ -86638,7 +86638,7 @@ wist
> wistaria
> wisteria
> wistful/PY
>-wit/MPS
>+wit/MS
> witan
> witch/GS
> witchcraft
--
Gilles R. Detillieux E-mail: <[EMAIL PROTECTED]>
Spinal Cord Research Centre WWW: http://www.scrc.umanitoba.ca/~grdetil
Dept. Physiology, U. of Manitoba Phone: (204)789-3766
Winnipeg, MB R3E 3J7 (Canada) Fax: (204)789-3930
_______________________________________________
htdig-general mailing list <[EMAIL PROTECTED]>
To unsubscribe, send a message to <[EMAIL PROTECTED]> with a
subject of unsubscribe
FAQ: http://htdig.sourceforge.net/FAQ.html