This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch 
OPENNLP-1593-Provide-tests-for-all-Summarizer-implementations
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git

commit 79475212de77db80d0e30f0a1a47c4483dc48233
Author: Martin Wiesner <[email protected]>
AuthorDate: Wed Jul 10 10:00:34 2024 +0200

    OPENNLP-1593 Provide tests for all Summarizer implementations
    - provides JUnit tests for LexicalChainingSummarizer and TextRankSummarizer
    - improves the existing code so that summarization works, fixes a bug in 
LexicalChainingSummarizer#summarize
    - improves the code quality
    - adds JavaDoc where possible
---
 .../lexicalchaining/LexicalChainingSummarizer.java | 10 ++--
 .../opennlp/summarization/meta/MetaSummarizer.java |  5 +-
 .../summarization/textrank/TextRankSummarizer.java | 19 +++++---
 ...arizerTest.java => AbstractSummarizerTest.java} | 29 +++++-------
 .../LexicalChainingSummarizerTest.java             | 43 +++++++++++++++++
 .../summarization/meta/MetaSummarizerTest.java     | 55 ++++------------------
 .../textrank/TextRankSummarizerTest.java           | 42 +++++++++++++++++
 7 files changed, 130 insertions(+), 73 deletions(-)

diff --git 
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizer.java
 
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizer.java
index 513fb54..f243d69 100755
--- 
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizer.java
+++ 
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizer.java
@@ -34,6 +34,9 @@ import opennlp.summarization.Summarizer;
  * that share a word that are very closely related. Thus, the longest chain 
represents the most important
  * topic and so forth. A summary can then be formed by identifying the most 
important lexical chains
  * and "pulling" out sentences from them.
+ *
+ * @see LexicalChain
+ * @see Summarizer
  */
 public class LexicalChainingSummarizer implements Summarizer {
 
@@ -186,11 +189,12 @@ public class LexicalChainingSummarizer implements 
Summarizer {
     List<Sentence> summ = new ArrayList<>();
     StringBuilder sb = new StringBuilder();
     for (int i = 0; i < lc.size(); i++) {
-      for (int j = 0; j < lc.size(); j++) {
-        Sentence candidate = lc.get(i).sentences.get(j);
+      LexicalChain chain = lc.get(i);
+      for (int j = 0; j < chain.sentences.size(); j++) {
+        Sentence candidate = chain.sentences.get(j);
         if (!summ.contains(candidate)) {
           summ.add(candidate);
-          sb.append(candidate.getStringVal());
+          sb.append(candidate.getStringVal()).append(" ");
           summSize += candidate.getWordCnt();
           break;
         }
diff --git 
a/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java 
b/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java
index bed3c2d..7fa1155 100644
--- a/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java
+++ b/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java
@@ -35,8 +35,11 @@ import opennlp.summarization.DocProcessor;
 
 /**
  * A summarizer that combines results from the text rank algorithm and the 
lexical chaining algorithm.
- * It runs both algorithm and uses the lexical chains to identify the main 
topics and relative importance
+ * It runs both algorithms and uses the lexical chains to identify the main 
topics and relative importance
  * and the text rank to pick sentences from lexical chains.
+ *
+ * @see TextRankSummarizer
+ * @see LexicalChainingSummarizer
  */
 public class MetaSummarizer implements Summarizer {
 
diff --git 
a/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java
 
b/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java
index 47c2514..765bb94 100755
--- 
a/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java
+++ 
b/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java
@@ -27,10 +27,18 @@ import java.util.ArrayList;
 import java.util.Hashtable;
 import java.util.List;
 
-/*
- * A wrapper around the text rank algorithm.  This class
- * a) Sets up the data for the TextRank class
- * b) Takes the ranked sentences and does some basic rearranging (e.g. 
ordering) to provide a more reasonable summary.
+/**
+ * A wrapper {@link Summarizer} implementation around the {@link TextRank text 
rank} algorithm.
+ * <p>
+ * This implementation:
+ * <ol>
+ * <li>sets up the data for the {@link TextRank} class</li>
+ * <li>takes the ranked sentences and conducts rearranging (e.g. ordering) to 
provide
+ * a more reasonable summary.</li>
+ * </ol>
+ *
+ * @see TextRank
+ * @see Summarizer
  */
 public class TextRankSummarizer implements Summarizer {
 
@@ -98,7 +106,6 @@ public class TextRankSummarizer implements Summarizer {
     return null;
   }
 
-  //Returns the summary as a string.
   @Override
   public String summarize(String article, int maxWords) {
     List<Sentence> sentences = docProcessor.getSentencesFromStr(article);
@@ -112,7 +119,7 @@ public class TextRankSummarizer implements Summarizer {
     int i = 0;
     while (b.length() < maxWords && i < scores.size()) {
       String sent = sentences.get(scores.get(i).getSentId()).getStringVal();
-      b.append(sent).append(scores.get(i));
+      b.append(sent); //.append(scores.get(i));
       i++;
     }
     return b.toString();
diff --git 
a/summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java 
b/summarizer/src/test/java/opennlp/summarization/AbstractSummarizerTest.java
similarity index 75%
copy from 
summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java
copy to 
summarizer/src/test/java/opennlp/summarization/AbstractSummarizerTest.java
index bd69886..ce7bc50 100644
--- 
a/summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java
+++ b/summarizer/src/test/java/opennlp/summarization/AbstractSummarizerTest.java
@@ -15,14 +15,12 @@
  * limitations under the License.
  */
 
-package opennlp.summarization.meta;
+package opennlp.summarization;
 
-import opennlp.summarization.Summarizer;
 import opennlp.summarization.lexicalchaining.OpenNLPPOSTagger;
 import opennlp.summarization.preprocess.DefaultDocProcessor;
 
 import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.ValueSource;
 import org.slf4j.Logger;
@@ -34,26 +32,23 @@ import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class MetaSummarizerTest {
+public abstract class AbstractSummarizerTest {
 
-  private static final Logger log = 
LoggerFactory.getLogger(MetaSummarizerTest.class);
-  
-  private static DefaultDocProcessor docProcessor;
-  private static OpenNLPPOSTagger posTagger;
+  private static final Logger log = 
LoggerFactory.getLogger(AbstractSummarizerTest.class);
 
-  // SUT
-  private Summarizer metaSummarizer;
+  protected static DefaultDocProcessor docProcessor;
+  protected static OpenNLPPOSTagger posTagger;
 
   @BeforeAll
   static void initEnv() throws IOException {
-    docProcessor = new 
DefaultDocProcessor(MetaSummarizerTest.class.getResourceAsStream("/en-sent.bin"));
-    posTagger = new OpenNLPPOSTagger(docProcessor, 
MetaSummarizerTest.class.getResourceAsStream("/en-pos-maxent.bin"));
+    docProcessor = new 
DefaultDocProcessor(AbstractSummarizerTest.class.getResourceAsStream("/en-sent.bin"));
+    posTagger = new OpenNLPPOSTagger(docProcessor, 
AbstractSummarizerTest.class.getResourceAsStream("/en-pos-maxent.bin"));
   }
 
-  @BeforeEach
-  void setUp() {
-    metaSummarizer = new MetaSummarizer(docProcessor, posTagger);
-  }
+  /**
+   * @return Obtains the {@link Summarizer} under test.
+   */
+  public abstract Summarizer getSummarizer();
 
   @ParameterizedTest(name = "news story {index}")
   @ValueSource(strings = {
@@ -69,7 +64,7 @@ public class MetaSummarizerTest {
   })
   public void testSummarize(String filename) {
     String article = docProcessor.docToString(filename);
-    String summary = metaSummarizer.summarize(article, 20);
+    String summary = getSummarizer().summarize(article, 20);
     assertNotNull(summary);
     assertFalse(summary.isBlank());
     assertTrue(summary.length() > 20);
diff --git 
a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizerTest.java
 
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizerTest.java
new file mode 100644
index 0000000..5d23bef
--- /dev/null
+++ 
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizerTest.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.summarization.lexicalchaining;
+
+import opennlp.summarization.AbstractSummarizerTest;
+import opennlp.summarization.Summarizer;
+
+import org.junit.jupiter.api.BeforeEach;
+
+/**
+ * Tests the implementation of {@link LexicalChainingSummarizer}.
+ */
+public class LexicalChainingSummarizerTest extends AbstractSummarizerTest {
+
+  // SUT
+  private Summarizer lexicalChainSummarizer;
+
+  @BeforeEach
+  void setUp() {
+    lexicalChainSummarizer = new LexicalChainingSummarizer(docProcessor, 
posTagger);
+  }
+
+  @Override
+  public Summarizer getSummarizer() {
+    return lexicalChainSummarizer;
+  }
+
+}
diff --git 
a/summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java 
b/summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java
index bd69886..2a80782 100644
--- 
a/summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java
+++ 
b/summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java
@@ -17,64 +17,27 @@
 
 package opennlp.summarization.meta;
 
+import opennlp.summarization.AbstractSummarizerTest;
 import opennlp.summarization.Summarizer;
-import opennlp.summarization.lexicalchaining.OpenNLPPOSTagger;
-import opennlp.summarization.preprocess.DefaultDocProcessor;
 
-import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.params.ParameterizedTest;
-import org.junit.jupiter.params.provider.ValueSource;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
-
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-
-public class MetaSummarizerTest {
-
-  private static final Logger log = 
LoggerFactory.getLogger(MetaSummarizerTest.class);
-  
-  private static DefaultDocProcessor docProcessor;
-  private static OpenNLPPOSTagger posTagger;
+/**
+ * Tests the implementation of {@link MetaSummarizer}.
+ */
+public class MetaSummarizerTest extends AbstractSummarizerTest {
 
   // SUT
   private Summarizer metaSummarizer;
 
-  @BeforeAll
-  static void initEnv() throws IOException {
-    docProcessor = new 
DefaultDocProcessor(MetaSummarizerTest.class.getResourceAsStream("/en-sent.bin"));
-    posTagger = new OpenNLPPOSTagger(docProcessor, 
MetaSummarizerTest.class.getResourceAsStream("/en-pos-maxent.bin"));
-  }
-
   @BeforeEach
   void setUp() {
     metaSummarizer = new MetaSummarizer(docProcessor, posTagger);
   }
 
-  @ParameterizedTest(name = "news story {index}")
-  @ValueSource(strings = {
-          "/meta/0a2035f3f73b06a5150a6f01cffdf45d027bbbed.story",
-          "/meta/0a2278bec4a80aec1bc3e9e7a9dac10ac1b6425b.story",
-          "/meta/0a3040b6c1bba95efca727158f128a19c44ec8ba.story",
-          "/meta/0a3479b53796863a664c32ca20d8672583335d2a.story",
-          "/meta/0a3639cb86487e72e2ba084211f99799918aedf8.story",
-          "/meta/0a4092bef1801863296777ebcfeceb1aec23c78f.story",
-          "/meta/0a5458d3427b290524a8df11d8503a5b57b32747.story",
-          "/meta/0a5691b8fe654b6b2cdace5ab87aff2ee4c23577.story",
-          "/meta/0a6790f886a42a76945d4a21ed27c4ebd9ca1025.story"
-  })
-  public void testSummarize(String filename) {
-    String article = docProcessor.docToString(filename);
-    String summary = metaSummarizer.summarize(article, 20);
-    assertNotNull(summary);
-    assertFalse(summary.isBlank());
-    assertTrue(summary.length() > 20);
-    if (log.isDebugEnabled()) {
-      log.debug(summary);
-    }
+  @Override
+  public Summarizer getSummarizer() {
+    return metaSummarizer;
   }
+
 }
diff --git 
a/summarizer/src/test/java/opennlp/summarization/textrank/TextRankSummarizerTest.java
 
b/summarizer/src/test/java/opennlp/summarization/textrank/TextRankSummarizerTest.java
new file mode 100644
index 0000000..31f89f6
--- /dev/null
+++ 
b/summarizer/src/test/java/opennlp/summarization/textrank/TextRankSummarizerTest.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.summarization.textrank;
+
+import opennlp.summarization.AbstractSummarizerTest;
+import opennlp.summarization.Summarizer;
+
+import org.junit.jupiter.api.BeforeEach;
+
+/**
+ * Tests the implementation of {@link TextRankSummarizer}.
+ */
+public class TextRankSummarizerTest extends AbstractSummarizerTest {
+
+  // SUT
+  private Summarizer textRankSummarizer;
+
+  @BeforeEach
+  void setUp() {
+    textRankSummarizer = new TextRankSummarizer(docProcessor);
+  }
+
+  @Override
+  public Summarizer getSummarizer() {
+    return textRankSummarizer;
+  }
+}

Reply via email to