Revision: 7387 http://languagetool.svn.sourceforge.net/languagetool/?rev=7387&view=rev Author: dominikoeo Date: 2012-06-17 19:00:35 +0000 (Sun, 17 Jun 2012) Log Message: ----------- [br] updated Breton dictionary to use Apertium svn r38896 and small changes to Perl script that creates the dictionary for LanguageTool.
Revision Links: -------------- http://languagetool.svn.sourceforge.net/languagetool/?rev=38896&view=rev Modified Paths: -------------- trunk/JLanguageTool/src/resource/br/breton.dict trunk/JLanguageTool/src/resource/br/create-lexicon.pl Modified: trunk/JLanguageTool/src/resource/br/breton.dict =================================================================== (Binary files differ) Modified: trunk/JLanguageTool/src/resource/br/create-lexicon.pl =================================================================== --- trunk/JLanguageTool/src/resource/br/create-lexicon.pl 2012-06-17 17:44:08 UTC (rev 7386) +++ trunk/JLanguageTool/src/resource/br/create-lexicon.pl 2012-06-17 19:00:35 UTC (rev 7387) @@ -15,10 +15,14 @@ # # 1) Download the Apertium Breton dictionary: # $ svn co https://apertium.svn.sourceforge.net/svnroot/apertium/trunk/apertium-br-fr +# $ cd apertium-br-fr/ # 2) Install Apertium tools: # $ sudo apt-get install lttoolbox +# 3) Download morfologik-stemming-1.4.0.zip from +# http://sourceforge.net/projects/morfologik/files/morfologik-stemming/1.4.0/ +# $ unzip morfologik-stemming-1.4.0.zip +# This creates morfologik-stemming-nodict-1.4.0.jar # 3) Run the script: -# $ cd apertium-br-fr/ # $ ./create-lexicon.pl # # Author: Dominique Pelle <dominique.pe...@gmail.com> @@ -50,6 +54,7 @@ "Alamaned", "Amerikaned", "Angled", + "Barbared", "Varbared", "Parbared", "Bretoned", "Vretoned", "Pretoned", "Brezhoned", "Vrezhoned", "Prezhoned", "Eskimoed", @@ -224,6 +229,8 @@ "deuñvien", "zeuñvien", "teuñvien", "dezvarnourien", "zezvarnourien", "tezvarnourien", "diazezerien", "ziazezerien", "tiazezerien", + "diazezourien", "ziazezourien", "tiazezourien", + "diazezourion", "ziazezourion", "tiazezourion", "dibaberien", "zibaberien", "tibaberien", "dibennerien", "zibennerien", "tibennerien", "dibunerien", "zibunerien", "tibunerien", @@ -297,9 +304,11 @@ "gouerien", "c’houerien", "kouerien", "gouizieien", "c’houizieien", "kouizieien", "gourdonerien", "c’hourdonerien", "kourdonerien", + "gourenerien", "c’hourenerien", "kourenerien", "goved", "c’hoved", "koved", "gwazed", "wazed", "kwazed", "gwenanerien", "wenanerien", "kwenanerien", + "gwarded", "warded", "kwarded", "gwerzherien", "werzherien", "kwerzherien", "gwiaderien", "wiaderien", "kwiaderien", "gwiaderion", "wiaderion", "kwiaderion", @@ -489,6 +498,8 @@ "mistri-skol", "vistri-skol", "mistri-vicherour", "vistri-vicherour", "monitourien", "vonitourien", + "moraerien", "voraerien", + "moraerion", "voraerion", "morlaeron", "vorlaeron", "moruteaerien", "voruteaerien", "mouezhierien", "vouezhierien", @@ -909,7 +920,9 @@ } my ($first_letter_lemma) = $lemma =~ /^(gw|[ktpgdbm]).*/i; - my ($first_letter_word) = $word =~ /^([kg]w|c’h|[gdbzfktvpw]).*/i; + $first_letter_lemma = "" unless (defined $first_letter_lemma); + my ($first_letter_word) = $word =~ /^([kg]w|c’h|[gdbzfktvpw]).*/i; + $first_letter_word = "" unless (defined $first_letter_word); $first_letter_lemma = lc $first_letter_lemma; $first_letter_word = lc $first_letter_word; @@ -981,6 +994,14 @@ } print "handled [$out_count] words, unhandled [$err_count] words\n"; +# Adding missing words in dictionary. +# kiz exists only in expressions in Apertium (which is OK) but +# for LanguageTool, it's easier to make it a normal word so we +# don't give false positive on "war ho c'hiz", etc. +print OUT "kiz\tkiz\tN f s\n"; +print OUT "c’hiz\tkiz\tN f s M:0a:2:\n"; +print OUT "giz\tkiz\tN f s M:1:1a:\n"; + print "Lemma words missing from dictionary:\n"; foreach (sort keys %all_lemmas) { print "$_\n" unless (exists $all_words{$_}); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Live Security Virtual Conference Exclusive live event will cover all the ways today's security and threat landscape has changed and how IT managers can respond. Discussions will include endpoint security, mobile security and the latest in malware threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/ _______________________________________________ Languagetool-cvs mailing list Languagetool-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/languagetool-cvs