This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch opennlp-2.x
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/opennlp-2.x by this push:
     new 117d27f7 OPENNLP-1782: Add tagging examples to verify French POS model 
(#863)
117d27f7 is described below

commit 117d27f7c453780a109ee832d45eb953e72e6f1a
Author: meriam2303 <[email protected]>
AuthorDate: Tue Nov 11 14:49:32 2025 +0100

    OPENNLP-1782: Add tagging examples to verify French POS model (#863)
    
    * adds French sample sentence and pos tags, incl. arabic+maghrebi stub 
examples for existing tests
    
    * adds French constant
    
    * inits French resources for test context
    ---------
    
    Co-authored-by: Richard Zowalla <[email protected]>
    (cherry picked from commit 237a7713cc6d59e914741f58395fa4068ef311e6)
---
 .../java/opennlp/tools/postag/POSTaggerMEIT.java    | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java 
b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java
index d901654a..931e5c6f 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMEIT.java
@@ -43,6 +43,7 @@ public class POSTaggerMEIT {
   private static final String GERMAN = "de";
   private static final String POLISH = "pl";
   private static final String PORTUGUESE = "pt";
+  private static final String FRENCH = "fr";
 
   private static final Map<String, Tokenizer> TOKENIZERS = new HashMap<>();
   private static final Map<String, POSTagger> TAGGERS = new HashMap<>();
@@ -51,7 +52,7 @@ public class POSTaggerMEIT {
 
   @BeforeAll
   public static void initResources() throws IOException {
-    List<String> langs = List.of(CATALAN, ENGLISH, GERMAN, POLISH, PORTUGUESE);
+    final List<String> langs = List.of(CATALAN, ENGLISH, FRENCH, GERMAN, 
POLISH, PORTUGUESE);
     for (String langCode: langs) {
       TOKENIZERS.put(langCode, new ThreadSafeTokenizerME(langCode));
       TAGGERS.put(langCode, new ThreadSafePOSTaggerME(langCode));
@@ -142,7 +143,7 @@ public class POSTaggerMEIT {
         "Un gran embossament d'aire fred es comença a despenjar cap al centre 
d'Europa.",
           // OpenNLP, different at: idx pos 2, 3, 5, and 13(+14) -> however, 
only pos 5 is "wrong" (ref)
           new String[]{"DET", "ADJ", "NOUN", "ADP", "NOUN", "ADJ", "PRON", 
"VERB", "ADP", "VERB", "NOUN",
-              "ADP+DET", "NOUN", "ADP", "PROPN", "PUNCT"})
+              "ADP+DET", "NOUN", "ADP", "PROPN", "PUNCT"}),
       // REFERENCE ("gold"):
       // "DET", "ADJ", "NOUN", "ADP", "NOUN", "ADJ", "PRON", "VERB", "ADP", 
"VERB", "NOUN", "ADP+DET",
         // "NOUN", "ADP", "PROPN", "PUNCT"})
@@ -152,6 +153,22 @@ public class POSTaggerMEIT {
         // "NOUN", "PROPN", "PROPN", "PUNCT"
         // ok! ,  ok! ,  ??? ,  ???   ,  ok!  ,  ok! ,  ok!  ,  ok!  ,  ok! ,  
ok!  ,  ok!  ,  ok!  +  ok! ,
         // ok!  ,  ???   ,  ok!   ,  ok!
+      // via: @meriam2303 , original by Guillaume Musso:
+      // La jeune fille et la nuit, S.469 
+      Arguments.of(FRENCH, 0,
+            "Vivre avec elle me faisait souffrir, mais vivre sans elle 
m'aurait tué.",
+            new String[] {"VERB", "ADP", "PRON", "PRON", "VERB", "VERB", 
"PUNCT", "CCONJ", "VERB",
+                "ADP", "PRON", "PRON", "AUX", "VERB", "PUNCT"})
+      // via @meriam2303, original by Hind Choueykh Ben Salah
+      // التجريد في الشّعر العربي , S. 42
+      //Arguments.of(ARABIC,0,
+      //"عشق أبو نواس جارية تدعى جنان",
+      //new String[]{"VERB","PROPN","NOUN","VERB","PROPN"})  
+      // via @meriam2303, original by Mohamed Laarousi Elmetoui
+      // التوت المر , S.7
+      //Arguments.of(MARGHREBI_ARABIC_FRENCH,0,
+      //"Wassa3 belek ya baba...",
+      //new String[]{"VERB","NOUN","ITNJ","NOUN","PUNCT"})    
     );
   }
 }

Reply via email to