This is an automated email from the ASF dual-hosted git repository.

humbedooh pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp-addons.git

commit 7b9875b1cf6fd36b517414f34296de3ef88f057f
Author: Martin Wiesner <[email protected]>
AuthorDate: Wed Jan 22 11:34:08 2025 +0100

    OPENNLP-1698: Switch to extjwnl in jwnl-addon
    - migrates jwnl-addon to net.sf.extjwnl (2.0.5)
    - simplifies constructor of JWNLLemmatizer
    - adds new unit test: JWNLLemmatizerTest with line coverage > 90%
---
 jwnl-addon/pom.xml                                 |  27 +++++-
 .../opennlp/jwnl/lemmatizer/JWNLLemmatizer.java    | 106 ++++++---------------
 .../jwnl/lemmatizer/JWNLLemmatizerTest.java        |  77 +++++++++++++++
 jwnl-addon/src/test/resources/log4j2.xml           |  37 +++++++
 pom.xml                                            |   1 -
 5 files changed, 168 insertions(+), 80 deletions(-)

diff --git a/jwnl-addon/pom.xml b/jwnl-addon/pom.xml
index 3a39da5..2511645 100644
--- a/jwnl-addon/pom.xml
+++ b/jwnl-addon/pom.xml
@@ -33,6 +33,11 @@
   <packaging>jar</packaging>
   <name>Apache OpenNLP JWNL Addon</name>
 
+  <properties>
+    <extjwnl.version>2.0.5</extjwnl.version>
+    <wn-data.version>1.2</wn-data.version>
+  </properties>
+
   <dependencies>
     <dependency>
       <groupId>org.apache.opennlp</groupId>
@@ -40,10 +45,17 @@
     </dependency>
 
     <dependency>
-      <groupId>net.sf.jwordnet</groupId>
-      <artifactId>jwnl</artifactId>
-      <version>1.3.3</version>
-      <scope>compile</scope>
+      <groupId>net.sf.extjwnl</groupId>
+      <artifactId>extjwnl</artifactId>
+      <version>${extjwnl.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>net.sf.extjwnl</groupId>
+      <artifactId>extjwnl-data-wn31</artifactId>
+      <version>${wn-data.version}</version>
+      <optional>true</optional>
+      <scope>runtime</scope>
     </dependency>
     
     <dependency>
@@ -60,6 +72,13 @@
       <groupId>org.junit.jupiter</groupId>
       <artifactId>junit-jupiter-params</artifactId>
     </dependency>
+
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-slf4j2-impl</artifactId>
+      <version>${log4j2.version}</version>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
   <build>
diff --git 
a/jwnl-addon/src/main/java/opennlp/jwnl/lemmatizer/JWNLLemmatizer.java 
b/jwnl-addon/src/main/java/opennlp/jwnl/lemmatizer/JWNLLemmatizer.java
index ba55a0f..b85b2b8 100644
--- a/jwnl-addon/src/main/java/opennlp/jwnl/lemmatizer/JWNLLemmatizer.java
+++ b/jwnl-addon/src/main/java/opennlp/jwnl/lemmatizer/JWNLLemmatizer.java
@@ -17,92 +17,51 @@
 
 package opennlp.jwnl.lemmatizer;
 
-import java.io.IOException;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
-import net.didion.jwnl.JWNLException;
-import net.didion.jwnl.data.Adjective;
-import net.didion.jwnl.data.FileDictionaryElementFactory;
-import net.didion.jwnl.data.IndexWord;
-import net.didion.jwnl.data.POS;
-import net.didion.jwnl.data.PointerType;
-import net.didion.jwnl.data.VerbFrame;
-import net.didion.jwnl.dictionary.FileBackedDictionary;
-import net.didion.jwnl.dictionary.MorphologicalProcessor;
-import net.didion.jwnl.dictionary.file_manager.FileManager;
-import net.didion.jwnl.dictionary.file_manager.FileManagerImpl;
-import net.didion.jwnl.dictionary.morph.DefaultMorphologicalProcessor;
-import net.didion.jwnl.dictionary.morph.DetachSuffixesOperation;
-import net.didion.jwnl.dictionary.morph.LookupExceptionsOperation;
-import net.didion.jwnl.dictionary.morph.LookupIndexWordOperation;
-import net.didion.jwnl.dictionary.morph.Operation;
-import net.didion.jwnl.dictionary.morph.TokenizerOperation;
-import 
net.didion.jwnl.princeton.data.PrincetonWN17FileDictionaryElementFactory;
-import net.didion.jwnl.princeton.file.PrincetonRandomAccessDictionaryFile;
+import net.sf.extjwnl.JWNLException;
+import net.sf.extjwnl.data.IndexWord;
+import net.sf.extjwnl.data.POS;
+import net.sf.extjwnl.dictionary.Dictionary;
+import net.sf.extjwnl.dictionary.MorphologicalProcessor;
+
 import opennlp.tools.lemmatizer.Lemmatizer;
 
+/**
+ * A {@link Lemmatizer} implementation based on extJWNL
+ * and underlying WordNet resources.
+ *
+ * @see Dictionary
+ * @see MorphologicalProcessor
+ * @see POS
+ */
 public class JWNLLemmatizer implements Lemmatizer {
 
-  private net.didion.jwnl.dictionary.Dictionary dict;
-  private MorphologicalProcessor morphy;
+  private final MorphologicalProcessor morphy;
 
   /**
-   * Creates JWNL dictionary and morphological processor objects in
-   * JWNLemmatizer constructor. It also loads the JWNL configuration into the
-   * constructor. 
-   * <p>
-   * Constructor code based on Apache OpenNLP JWNLDictionary class. 
+   * Initializes a {@link JWNLLemmatizer} instance.
+   * Loads {@link Dictionary JWNL dictionary} and {@link 
MorphologicalProcessor} objects.
+   * It also loads the JWNL configuration.
    * 
-   * @param wnDirectory
-   * @throws IOException
+   * @throws JWNLException Thrown if errors occurred ramping up the WordNet 
resources.
    */
-  public JWNLLemmatizer(String wnDirectory) throws IOException {
+  public JWNLLemmatizer() throws JWNLException {
     super();
-    PointerType.initialize();
-    Adjective.initialize();
-    VerbFrame.initialize();
-    Map<POS, String[][]> suffixMap = new HashMap<>();
-    suffixMap.put(POS.NOUN, new String[][] { { "s", "" }, { "ses", "s" },
-        { "xes", "x" }, { "zes", "z" }, { "ches", "ch" }, { "shes", "sh" },
-        { "men", "man" }, { "ies", "y" } });
-    suffixMap.put(POS.VERB, new String[][] { { "s", "" }, { "ies", "y" },
-        { "es", "e" }, { "es", "" }, { "ed", "e" }, { "ed", "" },
-        { "ing", "e" }, { "ing", "" } });
-    suffixMap.put(POS.ADJECTIVE, new String[][] { { "er", "" }, { "est", "" },
-        { "er", "e" }, { "est", "e" } });
-    DetachSuffixesOperation tokDso = new DetachSuffixesOperation(suffixMap);
-    tokDso.addDelegate(DetachSuffixesOperation.OPERATIONS, new Operation[] {
-        new LookupIndexWordOperation(), new LookupExceptionsOperation() });
-    TokenizerOperation tokOp = new TokenizerOperation(new String[] { " ", "-" 
});
-    tokOp.addDelegate(TokenizerOperation.TOKEN_OPERATIONS,
-        new Operation[] { new LookupIndexWordOperation(),
-            new LookupExceptionsOperation(), tokDso });
-    DetachSuffixesOperation morphDso = new DetachSuffixesOperation(suffixMap);
-    morphDso.addDelegate(DetachSuffixesOperation.OPERATIONS, new Operation[] {
-        new LookupIndexWordOperation(), new LookupExceptionsOperation() });
-    Operation[] operations = { new LookupExceptionsOperation(), morphDso, 
tokOp };
-    morphy = new DefaultMorphologicalProcessor(operations);
-    FileManager manager = new FileManagerImpl(wnDirectory,
-        PrincetonRandomAccessDictionaryFile.class);
-    FileDictionaryElementFactory factory = new 
PrincetonWN17FileDictionaryElementFactory();
-    FileBackedDictionary.install(manager, morphy, factory, true);
-    dict = net.didion.jwnl.dictionary.Dictionary.getInstance();
+    Dictionary dict = Dictionary.getDefaultResourceInstance();
     morphy = dict.getMorphologicalProcessor();
   }
 
   /**
-   * It takes a word and a POS tag and obtains a word's lemma from WordNet.
+   * Takes a word and a POS tag and obtains a word's lemma from WordNet.
    * 
-   * @param word
-   * @param postag
-   * @return lemma
+   * @param word The word to find the corresponding lemma for.
+   * @param postag The POS tag associated with the {@code word}.
+   * @return lemma The lemma as provided by WordNet, or {@code null} if not 
found.
    */
   public String lemmatize(String word, String postag) {
     String constantTag = "NNP";
-    IndexWord baseForm;
     String lemma;
     try {
       POS pos;
@@ -117,18 +76,15 @@ public class JWNLLemmatizer implements Lemmatizer {
       } else {
         pos = POS.ADVERB;
       }
-      baseForm = morphy.lookupBaseForm(pos, word);
+      IndexWord baseForm = morphy.lookupBaseForm(pos, word);
       if (baseForm != null) {
-        lemma = baseForm.getLemma().toString();
+        lemma = baseForm.getLemma();
+      } else if (postag.startsWith(constantTag)) {
+        lemma = word;
+      } else {
+        lemma= word.toLowerCase();
       }
-      else if (baseForm == null && postag.startsWith(constantTag)) {
-          lemma = word;
-        }
-        else {
-          lemma= word.toLowerCase();
-        }
     } catch (JWNLException e) {
-      e.printStackTrace();
       return null;
     }
     return lemma;
diff --git 
a/jwnl-addon/src/test/java/opennlp/jwnl/lemmatizer/JWNLLemmatizerTest.java 
b/jwnl-addon/src/test/java/opennlp/jwnl/lemmatizer/JWNLLemmatizerTest.java
new file mode 100644
index 0000000..e417830
--- /dev/null
+++ b/jwnl-addon/src/test/java/opennlp/jwnl/lemmatizer/JWNLLemmatizerTest.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.jwnl.lemmatizer;
+
+import java.util.List;
+import java.util.stream.Stream;
+
+import net.sf.extjwnl.JWNLException;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+public class JWNLLemmatizerTest {
+
+  // SUT
+  private JWNLLemmatizer lemmatizer;
+
+  @BeforeEach
+  public void setUp() throws JWNLException {
+    lemmatizer = new JWNLLemmatizer();
+  }
+
+  @ParameterizedTest
+  @MethodSource("provideData")
+  public void testLemmatize(String word, String posTag, String expectedLemma) {
+    String lemma = lemmatizer.lemmatize(word, posTag);
+    assertNotNull(lemma);
+    assertEquals(expectedLemma, lemma);
+  }
+
+  @ParameterizedTest
+  @MethodSource("provideData")
+  public void testLemmatizeArray(String word, String posTag, String 
expectedLemma) {
+    String[] lemma = lemmatizer.lemmatize(new String[]{word}, new 
String[]{posTag});
+    assertNotNull(lemma);
+    assertEquals(1, lemma.length);
+    assertEquals(expectedLemma, lemma[0]);
+  }
+
+  @Test
+  public void testLemmatizeList() {
+    assertThrows(UnsupportedOperationException.class, () -> {
+      lemmatizer.lemmatize(List.of("mouse"), List.of("NN"));
+    });
+  }
+
+  private static Stream<Arguments> provideData() {
+    return Stream.of(
+        Arguments.of("the", "DT", "the"),
+        Arguments.of("cats", "NN", "cat"),
+        Arguments.of("saw", "VB", "see"),
+        Arguments.of("best", "JJS", "good"),
+        Arguments.of("upside", "RB", "upside")
+    );
+  }
+}
diff --git a/jwnl-addon/src/test/resources/log4j2.xml 
b/jwnl-addon/src/test/resources/log4j2.xml
new file mode 100644
index 0000000..8b6b24c
--- /dev/null
+++ b/jwnl-addon/src/test/resources/log4j2.xml
@@ -0,0 +1,37 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<Configuration>
+    <Appenders>
+        <Console name="STDOUT" target="SYSTEM_OUT">
+            <!--
+                The pattern can be adjusted as needed, see 
https://logging.apache.org/log4j/2.x/manual/layouts.html
+            -->
+            <PatternLayout pattern="%m%n"/>
+        </Console>
+    </Appenders>
+
+    <Loggers>
+        <Logger name="opennlp.jwnl.lemmatizer" level="warn"/>
+        <Logger name="opennlp.tools" level="warn"/>
+        <Root level="INFO">
+            <AppenderRef ref="STDOUT"/>
+        </Root>
+    </Loggers>
+</Configuration>
diff --git a/pom.xml b/pom.xml
index 77eb51f..5c46bf7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -342,7 +342,6 @@
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-compiler-plugin</artifactId>
-                <version>3.13.0</version>
                 <configuration>
                     <release>${java.version}</release>
                     <compilerArgument>-Xlint</compilerArgument>

Reply via email to