CVS update: /hu/src/2.0.1/Patches/

timar Sun, 04 Dec 2005 00:16:28 -0800

User: timar   
Date: 05/12/04 00:16:28

Added:
 /hu/src/2.0.1/Patches/
  althyph.diff


Log:
 hyphenation of duplicated two-letter consonants

File Changes:

Directory: /hu/src/2.0.1/Patches/
=================================

File [added]: althyph.diff
Url: 
http://hu.openoffice.org/source/browse/hu/src/2.0.1/Patches/althyph.diff?rev=1.1&content-type=text/vnd.viewcvs-markup
Added lines: 779
----------------
diff -u -r 
lingucomponent/source/hyphenator.old/altlinuxhyph/hyphen/hyphenimp.cxx 
lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx
--- lingucomponent/source/hyphenator.old/altlinuxhyph/hyphen/hyphenimp.cxx      
2005-09-07 21:39:42.000000000 +0200
+++ lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.cxx  
2005-12-04 05:16:32.616893216 +0100
@@ -101,6 +101,12 @@
 using namespace com::sun::star::linguistic2;
 using namespace linguistic;
 
+// values asigned to capitalization types
+#define CAPTYPE_UNKNOWN 0
+#define CAPTYPE_NOCAP   1
+#define CAPTYPE_INITCAP 2
+#define CAPTYPE_ALLCAP  3
+#define CAPTYPE_MIXED   4
 
 ///////////////////////////////////////////////////////////////////////////
 
@@ -325,8 +331,11 @@
        SvtPathOptions aPathOpt;
 
        int nHyphenationPos = -1;
+        int nHyphenationPosAlt = -1;
+        int nHyphenationPosAltHyph = -1;
        int wordlen;
        char *hyphens;
+       char *hyphword;
         char *lcword;
         int k = 0;
 
@@ -383,6 +392,9 @@
             aEnc = aDicts[k].aEnc;
             pCC =  aDicts[k].apCC;
 
+            sal_uInt16 ct = CAPTYPE_UNKNOWN;
+            ct = capitalType(aWord, pCC);
+
             // first convert any smart quotes or apostrophes to normal ones
            OUStringBuffer rBuf(aWord);
             sal_Int32 nc = rBuf.getLength();
@@ -403,6 +415,11 @@
            wordlen = encWord.getLength();
             lcword = new char[wordlen+1];
            hyphens = new char[wordlen+5];
+           hyphword = new char[255];
+
+            char ** alt = NULL; // alternations (possible replacements at 
hyphenation points)
+            int * pos = NULL; // array of [hyphenation point] minus [deletion 
position] 
+            int * cut = NULL; // length of deletions in original word
 
             // copy converted word into simple char buffer
             strcpy(lcword,encWord.getStr());
@@ -411,54 +428,115 @@
             int n = wordlen-1;
            while((n >=0) && (lcword[n] == '.')) n--;
             n++;
-            // fprintf(stderr,"hyphenate... %s\n",lcword); fflush(stderr);
             if (n > 0) {
-              if (hnj_hyphen_hyphenate(dict, lcword, n, hyphens))
+              if (hnj_hyphen_hyphenate_alt(dict, lcword, n, hyphens, hyphword, 
&alt, &pos, &cut))
               {
                  //whoops something did not work
                  delete[] hyphens;
+                  delete[] hyphword;
                   delete[] lcword;
+                  if (alt) {
+                    for(int j = 0; j < n; j++) {
+                        if (alt[j]) free(alt[j]);
+                    }
+                    free(alt);
+                  }
+                  if (pos) free(pos);
+                  if (cut) free(cut);
                  return NULL;
               }
             }
+
             // now backfill hyphens[] for any removed trailing periods
             for (int c = n; c < wordlen; c++) hyphens[c] = '0';
             hyphens[wordlen] = '\0';
  
-            //fprintf(stderr,"... %s\n",hyphens); fflush(stderr);
-           OUStringBuffer  hyphenatedWordBuffer;
-            OUString hyphenatedWord;
            INT32 Leading =  GetPosInWordToCheck( aWord, nMaxLeading );
 
            for (INT32 i = 0; i < encWord.getLength(); i++)
            {
-               hyphenatedWordBuffer.append(aWord[i]);
+                int leftalt = 0;
                 BOOL hit = (wordlen >= minLen);
-                hit = hit && (hyphens[i]&1) && (i < Leading);
-                hit = hit && (i >= (minLead-1) );
-                hit = hit && ((wordlen - i - 1) >= minTrail);
-               if (hit)
-               {
+                if (!alt || !alt[i] || (i >= n)) {
+                    hit = hit && (hyphens[i]&1) && (i < Leading);
+                    hit = hit && (i >= (minLead-1) );
+                    hit = hit && ((wordlen - i - 1) >= minTrail);
+                } else {
+                    char * hyphpos = strchr(alt[i], '=');
+                    if (hyphpos) leftalt = hyphpos - alt[i];
+                    hit = hit && (hyphens[i]&1) && ((i + leftalt - pos[i]) < 
Leading);
+                    hit = hit && ((i + leftalt - pos[i]) >= (minLead-1) );
+                    hit = hit && ((wordlen - i - 1 + strlen(alt[i]) - leftalt 
- 1) >= minTrail);
+                }
+               if (hit) {
                    nHyphenationPos = i;
-                   hyphenatedWordBuffer.append(sal_Unicode('='));
-               }
+                    if (alt && (i < n) && alt[i]) {
+                        nHyphenationPosAlt = i - pos[i];
+                        nHyphenationPosAltHyph = i + leftalt - pos[i];
+                    }
+                }
             }
 
-            hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear();
-           // fprintf(stderr,"result is %s\n",OU2A(hyphenatedWord));
-            // fflush(stderr);
-            if (nHyphenationPos  == -1)
+            if (nHyphenationPos  == -1) {
                 xRes = NULL;
-            else
-            {
-                xRes = new HyphenatedWord( aWord, LocaleToLanguage( aLocale ), 
nHyphenationPos,  
-                                      aWord, nHyphenationPos );
-            }
+             } else {
+                if (alt && alt[nHyphenationPos]) {
+                    // remove equal sign
+                    char * s = alt[nHyphenationPos];
+                    int eq = 0;
+                    for (; *s; s++) {
+                        if (*s == '=') eq = 1;
+                        if (eq) *s = *(s + 1);
+                    }
+                    OUString altHyphlow(alt[nHyphenationPos], 
strlen(alt[nHyphenationPos]), aEnc);
+                    OUString altHyph;
+                    switch (ct) {
+                          case CAPTYPE_ALLCAP:
+                            {
+                               altHyph = makeUpperCase(altHyphlow, pCC);
+                               break;
+                             } 
+                          case CAPTYPE_INITCAP:
+                            {
+                               if (nHyphenationPosAlt == 0) {
+                                    altHyph = makeInitCap(altHyphlow, pCC);
+                               } else {
+                                    altHyph = altHyphlow;
+                               }
+                               break;
+                             } 
+                           default:
+                            { 
+                               altHyph = altHyphlow;
+                               break;
+                             }
+                   }
+                    
+                    // handle shortening
+                    int nPos = (nHyphenationPosAltHyph < nHyphenationPos) ?
+                        nHyphenationPosAltHyph : nHyphenationPos;
+                    // hyphenate word with alternations
+                   xRes = new HyphenatedWord( aWord, LocaleToLanguage( aLocale 
), nPos,  
+                       aWord.replaceAt(nHyphenationPosAlt + 1, 
cut[nHyphenationPos], altHyph), 
+                        nHyphenationPosAltHyph);
+                } else {
+                   xRes = new HyphenatedWord( aWord, LocaleToLanguage( aLocale 
), nHyphenationPos,
+                                      aWord, nHyphenationPos);
+                }
+           }
 
-            delete[] hyphens;
-             delete[] lcword;
-            return xRes;
-            
+            delete[] hyphword;
+            delete[] lcword;
+           delete[] hyphens;
+            if (alt) {
+                for(int j = 0; j < n; j++) {
+                    if (alt[j]) free(alt[j]);
+                }
+                free(alt);
+            }
+            if (pos) free(pos);
+            if (cut) free(cut);
+           return xRes;
        }
         return NULL;
 }
@@ -622,6 +700,29 @@
 
 }
 
+sal_uInt16 SAL_CALL Hyphenator::capitalType(const OUString& aTerm, CharClass * 
pCC)
+{
+        sal_Int32 tlen = aTerm.getLength();
+        if ((pCC) && (tlen)) {
+              String aStr(aTerm);
+              sal_Int32 nc = 0;
+              for (sal_Int32 tindex = 0; tindex < tlen;  tindex++) {
+                  if (pCC->getCharacterType(aStr,tindex) & 
+                       ::com::sun::star::i18n::KCharacterType::UPPER) nc++;
+             }
+
+              if (nc == 0) return (sal_uInt16) CAPTYPE_NOCAP;
+
+              if (nc == tlen) return (sal_uInt16) CAPTYPE_ALLCAP;
+
+              if ((nc == 1) && (pCC->getCharacterType(aStr,0) & 
+                      ::com::sun::star::i18n::KCharacterType::UPPER)) 
+                   return (sal_uInt16) CAPTYPE_INITCAP;
+
+              return (sal_uInt16) CAPTYPE_MIXED;
+       }
+        return (sal_uInt16) CAPTYPE_UNKNOWN;
+}
 
 OUString SAL_CALL Hyphenator::makeLowerCase(const OUString& aTerm, CharClass * 
pCC)
 {
@@ -630,6 +731,29 @@
         return aTerm;
 }
 
+OUString SAL_CALL Hyphenator::makeUpperCase(const OUString& aTerm, CharClass * 
pCC)
+{
+        if (pCC)
+              return pCC->toUpper_rtl(aTerm, 0, aTerm.getLength());
+        return aTerm;
+}
+
+
+OUString SAL_CALL Hyphenator::makeInitCap(const OUString& aTerm, CharClass * 
pCC)
+{
+        sal_Int32 tlen = aTerm.getLength();
+        if ((pCC) && (tlen)) {
+              OUString bTemp = aTerm.copy(0,1);
+              if (tlen > 1)
+                   return ( pCC->toUpper_rtl(bTemp, 0, 1) 
+                             + pCC->toLower_rtl(aTerm,1,(tlen-1)) );
+
+             return pCC->toUpper_rtl(bTemp, 0, 1);
+       }
+        return aTerm;
+}
+
+
 
 
 Reference< XInterface > SAL_CALL Hyphenator_CreateInstance( 
@@ -844,3 +968,9 @@
 
 
 ///////////////////////////////////////////////////////////////////////////
+
+#undef CAPTYPE_UNKNOWN
+#undef CAPTYPE_NOCAP
+#undef CAPTYPE_INITCAP
+#undef CAPTYPE_ALLCAP
+#undef CAPTYPE_MIXED
diff -u -r 
lingucomponent/source/hyphenator.old/altlinuxhyph/hyphen/hyphenimp.hxx 
lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.hxx
--- lingucomponent/source/hyphenator.old/altlinuxhyph/hyphen/hyphenimp.hxx      
2005-09-07 21:40:02.000000000 +0200
+++ lingucomponent/source/hyphenator/altlinuxhyph/hyphen/hyphenimp.hxx  
2005-12-04 04:24:56.913511664 +0100
@@ -217,8 +217,10 @@
 
 
 private:
+        sal_uInt16 SAL_CALL capitalType(const OUString&, CharClass *);
         OUString SAL_CALL makeLowerCase(const OUString&, CharClass *);
-
+        OUString SAL_CALL makeUpperCase(const OUString&, CharClass *);
+        OUString SAL_CALL makeInitCap(const OUString&, CharClass *);
 };
 
 inline OUString Hyphenator::getImplementationName_Static() throw()
diff -u -r 
lingucomponent/source/hyphenator.old/altlinuxhyph/hyphtabs/substrings.pl 
lingucomponent/source/hyphenator/altlinuxhyph/hyphtabs/substrings.pl
--- lingucomponent/source/hyphenator.old/altlinuxhyph/hyphtabs/substrings.pl    
2003-03-26 14:02:13.000000000 +0100
+++ lingucomponent/source/hyphenator/altlinuxhyph/hyphtabs/substrings.pl        
2005-11-29 12:18:31.000000000 +0100
@@ -8,14 +8,31 @@
 
 while (<HYPH>)
 {
+    $pat =~ s/%.*$//g;
     if (/^\%/) {
        #comment, ignore
-    } elsif (/^(.+)\%/) {
+    } elsif (/^(.+)\/([^,]+),([0-9]+),([0-9]+)$/) {
         $origpat = $1;
        $pat = $1;
+        $repl = $2;
+        $beg = $3;
+        $len = $4;
        $pat =~ s/\d//g;
        push @patlist, $pat;
-       $pattab{$pat} = $origpat;       
+       $pattab{$pat} = $origpat;
+        $repltab{$pat} = $repl;
+        $replbeg{$pat} = $beg - 1;
+        $repllen{$pat} = $len;
+    } elsif (/^(.+)\/(.+)$/) {
+        $origpat = $1;
+       $pat = $1;
+        $repl = $2;
+       $pat =~ s/\d//g;
+       push @patlist, $pat;
+       $pattab{$pat} = $origpat;
+        $repltab{$pat} = $repl;
+        $replbeg{$pat} = 0;
+        $repllen{$pat} = length($pat);
     } elsif (/^(.+)$/) {
        $origpat = $1;
        $pat = $1;
@@ -40,11 +57,20 @@
                    $ss = substr ($pat, 0, $i);
                    print "$ss+$pattab{$subpat}\n";
                    push @newpatlist, $newpat;
+                   if (defined $repltab{$subpat}) {
+                        $newrepltab{$newpat} = $repltab{$subpat};
+                        $newreplbeg{$newpat} = $replbeg{$subpat} + $i;
+                        $newrepllen{$newpat} = $repllen{$subpat};
+                    }
                } else {
                    $tmp =  $newpattab{$newpat};
                    $newpattab{$newpat} =
                        combine ($newpattab{$newpat}, $pattab{$subpat});
                    print "$tmp + $pattab{$subpat} -> $newpattab{$newpat}\n";
+#                  if (defined $newrepltab{$newpat}) {
+#                        $newrepltab{$newpat} = $repltab{$subpat};
+#                        $newreplbeg{$newpat} = $replbeg{$subpat} + $i;
+#                        $newrepllen{$newpat} = $repllen{$subpat};
                }
            }
        }
@@ -52,7 +78,11 @@
 }
 
 foreach $pat (@newpatlist) {
-    print OUT $newpattab{$pat}."\n";
+    if (defined $newrepltab{$pat}) {
+        print OUT 
$newpattab{$pat}."/".$newrepltab{$pat}.",".($newreplbeg{$pat}+1).",".$newrepllen{$pat}."\n";
+    } else {
+        print OUT $newpattab{$pat}."\n";
+    }
 }
 
 #convert 'n1im' to 0n1i0m0 expresed as a list
@@ -91,6 +121,11 @@
 #              print ("$i $j $subexp[$j] $exp[2 * $i + $j]\n");
                if ($subexp[$j] > $exp[2 * $i + $j]) {
                    $exp[2 * $i + $j] = $subexp[$j];
+                    if (defined $newrepltab{$pat2} && !defined 
$newrepltab{$pat1}) {
+                        $newrepltab{$pat1} = $newrepltab{$pat2};
+                        $newreplbeg{$pat1} = $newreplbeg{$pat2} + $i;
+                        $newrepllen{$pat1} = $newrepllen{$pat2};
+                    }
                }
            }
            print ("$pat1 includes $pat2 at pos $i\n");
diff -u -r lingucomponent/source/hyphenator.old/altlinuxhyph/libhnj/AUTHORS 
lingucomponent/source/hyphenator/altlinuxhyph/libhnj/AUTHORS
--- lingucomponent/source/hyphenator.old/altlinuxhyph/libhnj/AUTHORS    
2003-03-26 14:02:16.000000000 +0100
+++ lingucomponent/source/hyphenator/altlinuxhyph/libhnj/AUTHORS        
2005-12-04 05:00:15.000000000 +0100
@@ -1,3 +1,6 @@
+Alternation extension: NÃ©meth LÃ¡szlÃ³ <nemeth at ooo>
+
+---
 This is part of libhnj library, but it is heavily modified, so write
 bug reports to me:
 Peter Novodvorsky <[EMAIL PROTECTED]>
diff -u -r lingucomponent/source/hyphenator.old/altlinuxhyph/libhnj/hyphen.c 
lingucomponent/source/hyphenator/altlinuxhyph/libhnj/hyphen.c
--- lingucomponent/source/hyphenator.old/altlinuxhyph/libhnj/hyphen.c   
2003-03-26 14:02:18.000000000 +0100
+++ lingucomponent/source/hyphenator/altlinuxhyph/libhnj/hyphen.c       
2005-12-03 14:27:08.000000000 +0100
@@ -1,4 +1,4 @@
-/* LibHnj is dual licensed under LGPL and MPL. Boilerplate for both
+/* Libhnj is dual licensed under LGPL and MPL. Boilerplate for both
  * licenses follows.
  */
 
@@ -61,6 +61,14 @@
   return new;
 }
 
+/* remove cross-platform text line end characters */
+void hnj_strchomp(char * s)
+{
+  int k = strlen(s);
+  if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
+  if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
+}
+
 /* a little bit of a hash table implementation. This simply maps strings
    to state numbers */
 
@@ -86,7 +94,6 @@
 {
   const char *p;
   unsigned int h=0, g;
-
   for(p = s; *p != '\0'; p += 1) {
     h = ( h << 4 ) + *p;
     if ( ( g = h & 0xf0000000 ) ) {
@@ -148,7 +155,6 @@
 {
   int i;
   HashEntry *e;
-
   i = hnj_string_hash (key) % HASH_SIZE;
   for (e = hashtab->entries[i]; e; e = e->next)
     if (!strcmp (key, e->key))
@@ -176,6 +182,7 @@
                                  sizeof(HyphenState));
     }
   dict->states[dict->num_states].match = NULL;
+  dict->states[dict->num_states].repl = NULL;
   dict->states[dict->num_states].fallback_state = -1;
   dict->states[dict->num_states].num_trans = 0;
   dict->states[dict->num_states].trans = NULL;
@@ -228,9 +235,12 @@
   HyphenDict *dict;
   HashTab *hashtab;
   FILE *f;
-  char buf[80];
-  char word[80];
-  char pattern[80];
+  char buf[MAX_CHARS];
+  char word[MAX_CHARS];
+  char pattern[MAX_CHARS];
+  char * repl;
+  signed char replindex;
+  signed char replcut;
   int state_num, last_state;
   int i, j;
   char ch;
@@ -251,6 +261,7 @@
   dict->num_states = 1;
   dict->states = hnj_malloc (sizeof(HyphenState));
   dict->states[0].match = NULL;
+  dict->states[0].repl = NULL;
   dict->states[0].fallback_state = -1;
   dict->states[0].num_trans = 0;
   dict->states[0].trans = NULL;
@@ -268,6 +279,25 @@
        {
          j = 0;
          pattern[j] = '0';
+          repl = strchr(buf, '/');
+          replindex = 0;
+          replcut = 0;
+          if (repl) {
+            char * index = strchr(repl + 1, ',');
+            *repl = '\0';
+            if (index) {
+                char * index2 = strchr(index + 1, ',');
+                *index = '\0';
+                *index2 = '\0';
+                replindex = (signed char) atoi(index + 1) - 1;
+                replcut = (signed char) atoi(index2 + 1);                
+            } else {
+                hnj_strchomp(repl + 1);
+                replindex = 0;
+                replcut = strlen(buf);
+            }
+            repl = hnj_strdup(repl + 1);
+          }
          for (i = 0; ((buf[i] > ' ') || (buf[i] < 0)); i++)
            {
              if (buf[i] >= '0' && buf[i] <= '9')
@@ -281,18 +311,26 @@
          word[j] = '\0';
          pattern[j + 1] = '\0';
 
+          i = 0;
          /* Optimize away leading zeroes */
-         for (i = 0; pattern[i] == '0'; i++);
+         if (!repl) for (; pattern[i] == '0'; i++);
 
 #ifdef VERBOSE
-         printf ("word %s pattern %s, j = %d\n", word, pattern + i, j);
+         printf ("word %s pattern %s, j = %d  repl: %s\n", word, pattern + i, 
j, repl);
 #endif
          found = hnj_hash_lookup (hashtab, word);
          state_num = hnj_get_state (dict, hashtab, word);
          dict->states[state_num].match = hnj_strdup (pattern + i);
+         dict->states[state_num].repl = repl;
+         dict->states[state_num].replindex = replindex;
+          if (!replcut) {
+            dict->states[state_num].replcut = strlen(word);
+          } else {
+            dict->states[state_num].replcut = replcut;
+          }
 
          /* now, put in the prefix transitions */
-         for (; found < 0 ;j--)
+          for (; found < 0 ;j--)
            {
              last_state = state_num;
              ch = word[j - 1];
@@ -326,14 +364,14 @@
   for (i = 0; i < HASH_SIZE; i++)
     for (e = hashtab->entries[i]; e; e = e->next)
       {
-       for (j = 1; 1; j++)
-         {
+       if (*(e->key)) for (j = 1; 1; j++)
+         {          
            state_num = hnj_hash_lookup (hashtab, e->key + j);
            if (state_num >= 0)
              break;
          }
-        // KBH: FIXME state 0 fallback_state should always be -1?
-       if (e->val) 
+        /* KBH: FIXME state 0 fallback_state should always be -1? */
+       if (e->val)
          dict->states[e->val].fallback_state = state_num;
       }
 #ifdef VERBOSE
@@ -365,6 +403,8 @@
       hstate = &dict->states[state_num];
       if (hstate->match)
        hnj_free (hstate->match);
+      if (hstate->repl)
+       hnj_free (hstate->repl);
       if (hstate->trans)
        hnj_free (hstate->trans);
     }
@@ -419,8 +459,8 @@
        {
 
          if (state == -1) {
-            // return 1;
-           // KBH: FIXME shouldn't this be as follows?
+            /* return 1; */
+           /*  KBH: FIXME shouldn't this be as follows? */
             state = 0;
             goto try_next_letter;
           }          
@@ -454,7 +494,8 @@
         elimination of trailing zeroes from the match. Leading zeroes
         have already been optimized. */
       match = dict->states[state].match;
-      if (match)
+      /* replacing rules not handled by hyphen_hyphenate() */
+      if (match && !dict->states[state].repl)
        {
          offset = i + 1 - strlen (match);
 #ifdef VERBOSE
@@ -469,9 +510,9 @@
              hyphens[offset + k] = match[k];
        }
 
-      // KBH: we need this to make sure we keep looking in a word
-      // for patterns even if the current character is not known in state 0
-      // since patterns for hyphenation may occur anywhere in the word
+      /* KBH: we need this to make sure we keep looking in a word */
+      /* for patterns even if the current character is not known in state 0 */
+      /* since patterns for hyphenation may occur anywhere in the word */
       try_next_letter: ;
 
     }
@@ -497,3 +538,194 @@
     hnj_free (prep_word);
   return 0;    
 }
+
+int hnj_hyphen_hyphenate_alt (HyphenDict *dict,
+                          const char *word, int word_size, char * hyphens,
+                          char *hyphword, char *** alt, int ** pos, int ** cut)
+{
+  char prep_word_buf[MAX_WORD];
+  char *prep_word;
+  int i, j, k;
+  int state;
+  char ch;
+  HyphenState *hstate;
+  char *match;
+  char *repl;
+  signed char replindex;
+  signed char replcut;
+  int offset;
+  int matchlen[MAX_CHARS];
+  int matchindex[MAX_CHARS];
+  char * matchrepl[MAX_CHARS];
+  int isrepl = 0;
+  int nHyphCount;
+
+  if (word_size + 3 < MAX_WORD)
+    prep_word = prep_word_buf;
+  else
+    prep_word = hnj_malloc (word_size + 3);
+
+  j = 0;
+  prep_word[j++] = '.';
+  
+  for (i = 0; i < word_size; i++)
+      prep_word[j++] = word[i];
+      
+  for (i = 0; i < j; i++)                                                      
 
+    hyphens[i] = '0';    
+  
+  prep_word[j++] = '.';
+
+  prep_word[j] = '\0';
+#ifdef VERBOSE
+  printf ("prep_word = %s\n", prep_word);
+#endif
+
+  /* now, run the finite state machine */
+  state = 0;
+  for (i = 0; i < j; i++)
+    {
+      ch = prep_word[i];
+      for (;;)
+       {
+
+         if (state == -1) {
+            /* return 1; */
+           /*  KBH: FIXME shouldn't this be as follows? */
+            state = 0;
+            goto try_next_letter;
+          }          
+
+#ifdef VERBOSE
+         char *state_str;
+         state_str = get_state_str (state);
+
+         for (k = 0; k < i - strlen (state_str); k++)
+           putchar (' ');
+         printf ("%s", state_str);
+#endif
+
+         hstate = &dict->states[state];
+         for (k = 0; k < hstate->num_trans; k++)
+           if (hstate->trans[k].ch == ch)
+             {
+               state = hstate->trans[k].new_state;
+               goto found_state;
+             }
+         state = hstate->fallback_state;
+#ifdef VERBOSE
+         printf (" falling back, fallback_state %d\n", state);
+#endif
+       }
+    found_state:
+#ifdef VERBOSE
+      printf ("found state %d\n",state);
+#endif
+      /* Additional optimization is possible here - especially,
+        elimination of trailing zeroes from the match. Leading zeroes
+        have already been optimized. */
+      match = dict->states[state].match;
+      repl = dict->states[state].repl;
+      replindex = dict->states[state].replindex;
+      replcut = dict->states[state].replcut;
+      /* replacing rules not handled by hyphen_hyphenate() */
+      if (match)
+       {
+         offset = i + 1 - strlen (match);
+#ifdef VERBOSE
+         for (k = 0; k < offset; k++)
+           putchar (' ');
+         printf ("%s (%s)\n", match, repl);
+#endif
+          if (repl) {
+            if (!isrepl) for(; isrepl < word_size; isrepl++) {
+                matchrepl[isrepl] = NULL;
+                matchindex[isrepl] = -1;
+            }
+            matchlen[offset + replindex] = replcut;
+          }
+         /* This is a linear search because I tried a binary search and
+            found it to be just a teeny bit slower. */
+         for (k = 0; match[k]; k++) {
+           if ((hyphens[offset + k] < match[k])) {
+             hyphens[offset + k] = match[k];
+              if (match[k]&1) {
+                matchrepl[offset + k] = repl;
+                if (repl && (k >= replindex) && (k <= replindex + replcut)) {
+                    matchindex[offset + replindex] = offset + k;
+                }
+              }
+            }
+          }
+          
+       }
+
+      /* KBH: we need this to make sure we keep looking in a word */
+      /* for patterns even if the current character is not known in state 0 */
+      /* since patterns for hyphenation may occur anywhere in the word */
+      try_next_letter: ;
+
+    }
+#ifdef VERBOSE
+  for (i = 0; i < j; i++)
+    putchar (hyphens[i]);
+  putchar ('\n');
+#endif
+
+  for (i = 0; i < j - 4; i++)
+#if 0
+    if (hyphens[i + 1] & 1)
+      hyphens[i] = '-';
+#else
+    hyphens[i] = hyphens[i + 1];
+#endif
+  hyphens[0] = '0';
+  for (; i < word_size; i++)
+    hyphens[i] = '0';
+  hyphens[word_size] = '\0';
+
+  if (prep_word != prep_word_buf)
+    hnj_free (prep_word);
+        
+       /* now create a new char string showing hyphenation positions */
+       /* count the hyphens and allocate space for the new hypehanted string */
+       nHyphCount = 0;
+       for (i = 0; i < word_size; i++)
+          if (hyphens[i]&1)
+             nHyphCount++;
+       j = 0;
+       for (i = 0; i < word_size; i++) {
+           if (isrepl && (matchindex[i] >= 0) && matchrepl[matchindex[i]]) {
+                if (alt && pos && cut) {
+                    if (!*alt && !*pos && !*cut) {
+                        int k;
+                        *alt = (char **) malloc(sizeof(char *) * word_size);
+                        *pos = (int *) malloc(sizeof(int) * word_size);
+                        *cut = (int *) malloc(sizeof(int) * word_size);
+                        for (k = 0; k < word_size; k++) {
+                            (*alt)[k] = NULL;
+                            (*pos)[k] = 0;
+                            (*cut)[k] = 0;
+                        }
+                    }
+                    (*alt)[matchindex[i] - 1] = 
hnj_strdup(matchrepl[matchindex[i]]);
+                    (*pos)[matchindex[i] - 1] = matchindex[i] - i;
+                    (*cut)[matchindex[i] - 1] = matchlen[i];
+                }
+                strcpy(hyphword +j, matchrepl[matchindex[i]]);
+                j += strlen(matchrepl[matchindex[i]]);
+                i += matchlen[i] - 1;
+              }
+
+          else {
+                 hyphword[j++] = word[i];
+
+          if (hyphens[i]&1)
+         {
+               hyphword[j++] = '=';
+         }
+          }
+       }
+       hyphword[j] = '\0';
+  return 0;
+}
diff -u -r lingucomponent/source/hyphenator.old/altlinuxhyph/libhnj/hyphen.h 
lingucomponent/source/hyphenator/altlinuxhyph/libhnj/hyphen.h
--- lingucomponent/source/hyphenator.old/altlinuxhyph/libhnj/hyphen.h   
2003-03-26 14:02:19.000000000 +0100
+++ lingucomponent/source/hyphenator/altlinuxhyph/libhnj/hyphen.h       
2005-12-01 09:26:38.000000000 +0100
@@ -54,6 +54,9 @@
 
 struct _HyphenState {
   char *match;
+  char *repl;
+  signed char replindex;
+  signed char replcut;
   int fallback_state;
   int num_trans;
   HyphenTrans *trans;
@@ -70,6 +73,10 @@
                           const char *word, int word_size,
                           char *hyphens);
 
+int hnj_hyphen_hyphenate_alt (HyphenDict *dict,
+                          const char *word, int word_size, char * hyphens,
+                          char *hyphenated_word, char *** alt, int ** pos, int 
** cut);
+
 #ifdef __cplusplus
 }
 #endif /* __cplusplus */




---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

CVS update: /hu/src/2.0.1/Patches/

Reply via email to