Revision: 7387
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=7387&view=rev
Author:   dominikoeo
Date:     2012-06-17 19:00:35 +0000 (Sun, 17 Jun 2012)
Log Message:
-----------
[br] updated Breton dictionary to use Apertium svn r38896
     and small changes to Perl script that creates the dictionary
     for LanguageTool.

Revision Links:
--------------
    http://languagetool.svn.sourceforge.net/languagetool/?rev=38896&view=rev

Modified Paths:
--------------
    trunk/JLanguageTool/src/resource/br/breton.dict
    trunk/JLanguageTool/src/resource/br/create-lexicon.pl

Modified: trunk/JLanguageTool/src/resource/br/breton.dict
===================================================================
(Binary files differ)

Modified: trunk/JLanguageTool/src/resource/br/create-lexicon.pl
===================================================================
--- trunk/JLanguageTool/src/resource/br/create-lexicon.pl       2012-06-17 
17:44:08 UTC (rev 7386)
+++ trunk/JLanguageTool/src/resource/br/create-lexicon.pl       2012-06-17 
19:00:35 UTC (rev 7387)
@@ -15,10 +15,14 @@
 #
 # 1) Download the Apertium Breton dictionary:
 #    $ svn co 
https://apertium.svn.sourceforge.net/svnroot/apertium/trunk/apertium-br-fr
+#    $ cd apertium-br-fr/
 # 2) Install Apertium tools:
 #    $ sudo apt-get install lttoolbox
+# 3) Download morfologik-stemming-1.4.0.zip from
+#    
http://sourceforge.net/projects/morfologik/files/morfologik-stemming/1.4.0/
+#    $ unzip morfologik-stemming-1.4.0.zip
+#    This creates morfologik-stemming-nodict-1.4.0.jar
 # 3) Run the script:
-#    $ cd apertium-br-fr/
 #    $ ./create-lexicon.pl
 #
 # Author: Dominique Pelle <dominique.pe...@gmail.com>
@@ -50,6 +54,7 @@
   "Alamaned",
   "Amerikaned",
   "Angled",
+  "Barbared",           "Varbared",       "Parbared",
   "Bretoned",           "Vretoned",       "Pretoned",
   "Brezhoned",          "Vrezhoned",      "Prezhoned",
   "Eskimoed",
@@ -224,6 +229,8 @@
   "deuñvien",           "zeuñvien",       "teuñvien",
   "dezvarnourien",      "zezvarnourien",  "tezvarnourien",
   "diazezerien",        "ziazezerien",    "tiazezerien",
+  "diazezourien",       "ziazezourien",   "tiazezourien",
+  "diazezourion",       "ziazezourion",   "tiazezourion",
   "dibaberien",         "zibaberien",     "tibaberien",
   "dibennerien",        "zibennerien",    "tibennerien",
   "dibunerien",         "zibunerien",     "tibunerien",
@@ -297,9 +304,11 @@
   "gouerien",           "c’houerien",     "kouerien",
   "gouizieien",         "c’houizieien",   "kouizieien",
   "gourdonerien",       "c’hourdonerien", "kourdonerien",
+  "gourenerien",        "c’hourenerien",  "kourenerien",
   "goved",              "c’hoved",        "koved",
   "gwazed",             "wazed",          "kwazed",
   "gwenanerien",        "wenanerien",     "kwenanerien",
+  "gwarded",            "warded",         "kwarded",
   "gwerzherien",        "werzherien",     "kwerzherien",
   "gwiaderien",         "wiaderien",      "kwiaderien",
   "gwiaderion",         "wiaderion",      "kwiaderion",
@@ -489,6 +498,8 @@
   "mistri-skol",        "vistri-skol",
   "mistri-vicherour",   "vistri-vicherour",
   "monitourien",        "vonitourien",
+  "moraerien",          "voraerien",
+  "moraerion",          "voraerion",
   "morlaeron",          "vorlaeron",
   "moruteaerien",       "voruteaerien",
   "mouezhierien",       "vouezhierien",
@@ -909,7 +920,9 @@
     }
 
     my ($first_letter_lemma) = $lemma =~ /^(gw|[ktpgdbm]).*/i;
-    my ($first_letter_word)  = $word  =~ /^([kg]w|c’h|[gdbzfktvpw]).*/i;
+    $first_letter_lemma = "" unless (defined $first_letter_lemma);
+    my ($first_letter_word) = $word  =~ /^([kg]w|c’h|[gdbzfktvpw]).*/i;
+    $first_letter_word = "" unless (defined $first_letter_word);
     $first_letter_lemma = lc $first_letter_lemma;
     $first_letter_word  = lc $first_letter_word;
 
@@ -981,6 +994,14 @@
 }
 print "handled [$out_count] words, unhandled [$err_count] words\n";
 
+# Adding missing words in dictionary.
+# kiz exists only in expressions in Apertium (which is OK) but
+# for LanguageTool, it's easier to make it a normal word so we
+# don't give false positive on "war ho c'hiz", etc.
+print OUT "kiz\tkiz\tN f s\n";
+print OUT "c’hiz\tkiz\tN f s M:0a:2:\n";
+print OUT "giz\tkiz\tN f s M:1:1a:\n";
+
 print "Lemma words missing from dictionary:\n";
 foreach (sort keys %all_lemmas) { print "$_\n" unless (exists $all_words{$_}); 
}
 

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
Languagetool-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to