-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

This patch enables hunspell to deal with two-level-inflexed compound
words.

like kichwa:
wiñana -> wiñashka (two levels of inflexion) + compounds =
wiñashka+kuna+ntin+mi


note, the file lingucomponent/unxlngx6.pro/lib/libspelllo.so must be
removed before rebuilding, or else this patch won't be picked up

Perhaps there are better ways to make the build process aware of the
need to rebuild libsepelllo.so?

as you've already figured out, this is one of my first patches o_O beware

Arno

- -----

$ hunspell -d qu_EC -m
wiñashkakunantinmi
wiñashkakunantinmi  pa:wiñashka st:wiñana # stem # Perfect 3rd person
singular pa:kuna st:kuna pa:ntin st:ntin pa:mi
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/

iEYEARECAAYFAk5v6pkACgkQEMIGVCc8BjDLHwCggGEL66N/53csBRyuPDP+ITGs
/CoAoMqx7TgMYWhnBr5GliItDLUHSlqc
=X0J+
-----END PGP SIGNATURE-----
>From ecea3f9e53793140e10a4e6b79d7ef5cb3714f19 Mon Sep 17 00:00:00 2001
From: Arno Teigseth <arnot...@gmail.com>
Date: Tue, 13 Sep 2011 18:34:16 -0500
Subject: [PATCH] added twofold affix+compound to hunspell, as the official fixed https://sourceforge.net/tracker/index.php?func=detail&aid=3288562&group_id=143754&atid=756395

---
 hunspell/hunspell-twoaffixcompound.patch |   80 ++++++++++++++++++++++++++++++
 hunspell/makefile.mk                     |    1 +
 2 files changed, 81 insertions(+), 0 deletions(-)
 create mode 100644 hunspell/hunspell-twoaffixcompound.patch

diff --git a/hunspell/hunspell-twoaffixcompound.patch b/hunspell/hunspell-twoaffixcompound.patch
new file mode 100644
index 0000000..71881a2
--- /dev/null
+++ b/hunspell/hunspell-twoaffixcompound.patch
@@ -0,0 +1,80 @@
+--- misc/hunspell-1.3.2/src/hunspell/affixmgr.cxx	2010-06-17 15:56:41.000000000 +0200
++++ misc/build/hunspell-1.3.2/src/hunspell/affixmgr.cxx	2011-02-10 20:47:22.000000000 +0100
+@@ -48,6 +48,7 @@
+   compoundroot = FLAG_NULL; // compound word signing flag
+   compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word
+   compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word
++  compoundmoresuffixes = 0; // allow more suffixes within compound words
+   checkcompounddup = 0; // forbid double words in compounds
+   checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution)
+   checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds
+@@ -404,6 +405,10 @@
+           }
+        }
+ 
++       if (strncmp(line,"COMPOUNDMORESUFFIXES",20) == 0) {
++                   compoundmoresuffixes = 1;
++       }
++
+        if (strncmp(line,"CHECKCOMPOUNDDUP",16) == 0) {
+                    checkcompounddup = 1;
+        }
+@@ -1626,8 +1631,9 @@
+             if (onlycpdrule) break;
+             if (compoundflag && 
+              !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
+-                if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
+-                        FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
++                if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
++                        FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) || 
++                        (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && !hu_mov_rule &&
+                     sfx->getCont() &&
+                         ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag, 
+                             sfx->getContLen())) || (compoundend &&
+@@ -1640,9 +1646,11 @@
+             if (rv ||
+               (((wordnum == 0) && compoundbegin &&
+                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
++                (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundbegin))) || // twofold suffixes + compound
+                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
+               ((wordnum > 0) && compoundmiddle &&
+                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
++                (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundmiddle))) || // twofold suffixes + compound
+                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
+               ) checked_prefix = 1;
+         // else check forbiddenwords and needaffix
+@@ -2118,8 +2126,9 @@
+             if (onlycpdrule) break;
+             if (compoundflag &&
+              !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
+-                if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
+-                        FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
++                if (((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
++                        FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
++                        (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundflag)))) && !hu_mov_rule &&
+                     sfx->getCont() &&
+                         ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag, 
+                             sfx->getContLen())) || (compoundend &&
+@@ -2132,9 +2141,11 @@
+             if (rv ||
+               (((wordnum == 0) && compoundbegin &&
+                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
++                (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundbegin))) ||  // twofold suffix+compound
+                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
+               ((wordnum > 0) && compoundmiddle &&
+                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
++                (compoundmoresuffixes && (rv = suffix_check_twosfx(st, i, 0, NULL, compoundmiddle))) ||  // twofold suffix+compound
+                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
+               ) {
+                 // char * p = prefix_check_morph(st, i, 0, compound);
+--- misc/hunspell-1.3.2/src/hunspell/affixmgr.hxx	2010-06-17 15:56:41.000000000 +0200
++++ misc/build/hunspell-1.3.2/src/hunspell/affixmgr.hxx	2011-02-10 20:47:22.000000000 +0100
+@@ -41,6 +41,7 @@
+   FLAG                compoundroot;
+   FLAG                compoundforbidflag;
+   FLAG                compoundpermitflag;
++  int                 compoundmoresuffixes;
+   int                 checkcompounddup;
+   int                 checkcompoundrep;
+   int                 checkcompoundcase;
+
diff --git a/hunspell/makefile.mk b/hunspell/makefile.mk
index bf6f0b9..8cf2e4e 100644
--- a/hunspell/makefile.mk
+++ b/hunspell/makefile.mk
@@ -41,6 +41,7 @@ TARFILE_MD5=3121aaf3e13e5d88dfff13fb4a5f1ab8
 ADDITIONAL_FILES+=config.h
 
 PATCH_FILES=\
+    hunspell-twoaffixcompound.patch \
     hunspell-static.patch \
     hunspell-wntconfig.patch \
     hunspell-solaris.patch \
-- 
1.7.4.1

Attachment: twofoldaffix_compound.patch.sig
Description: Binary data

_______________________________________________
LibreOffice mailing list
LibreOffice@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/libreoffice

Reply via email to