This is an automated email from the ASF dual-hosted git repository. mawiesne pushed a commit to branch OPENNLP-1593-Provide-tests-for-all-Summarizer-implementations in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
commit 79475212de77db80d0e30f0a1a47c4483dc48233 Author: Martin Wiesner <[email protected]> AuthorDate: Wed Jul 10 10:00:34 2024 +0200 OPENNLP-1593 Provide tests for all Summarizer implementations - provides JUnit tests for LexicalChainingSummarizer and TextRankSummarizer - improves the existing code so that summarization works, fixes a bug in LexicalChainingSummarizer#summarize - improves the code quality - adds JavaDoc where possible --- .../lexicalchaining/LexicalChainingSummarizer.java | 10 ++-- .../opennlp/summarization/meta/MetaSummarizer.java | 5 +- .../summarization/textrank/TextRankSummarizer.java | 19 +++++--- ...arizerTest.java => AbstractSummarizerTest.java} | 29 +++++------- .../LexicalChainingSummarizerTest.java | 43 +++++++++++++++++ .../summarization/meta/MetaSummarizerTest.java | 55 ++++------------------ .../textrank/TextRankSummarizerTest.java | 42 +++++++++++++++++ 7 files changed, 130 insertions(+), 73 deletions(-) diff --git a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizer.java b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizer.java index 513fb54..f243d69 100755 --- a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizer.java +++ b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizer.java @@ -34,6 +34,9 @@ import opennlp.summarization.Summarizer; * that share a word that are very closely related. Thus, the longest chain represents the most important * topic and so forth. A summary can then be formed by identifying the most important lexical chains * and "pulling" out sentences from them. + * + * @see LexicalChain + * @see Summarizer */ public class LexicalChainingSummarizer implements Summarizer { @@ -186,11 +189,12 @@ public class LexicalChainingSummarizer implements Summarizer { List<Sentence> summ = new ArrayList<>(); StringBuilder sb = new StringBuilder(); for (int i = 0; i < lc.size(); i++) { - for (int j = 0; j < lc.size(); j++) { - Sentence candidate = lc.get(i).sentences.get(j); + LexicalChain chain = lc.get(i); + for (int j = 0; j < chain.sentences.size(); j++) { + Sentence candidate = chain.sentences.get(j); if (!summ.contains(candidate)) { summ.add(candidate); - sb.append(candidate.getStringVal()); + sb.append(candidate.getStringVal()).append(" "); summSize += candidate.getWordCnt(); break; } diff --git a/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java b/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java index bed3c2d..7fa1155 100644 --- a/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java +++ b/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java @@ -35,8 +35,11 @@ import opennlp.summarization.DocProcessor; /** * A summarizer that combines results from the text rank algorithm and the lexical chaining algorithm. - * It runs both algorithm and uses the lexical chains to identify the main topics and relative importance + * It runs both algorithms and uses the lexical chains to identify the main topics and relative importance * and the text rank to pick sentences from lexical chains. + * + * @see TextRankSummarizer + * @see LexicalChainingSummarizer */ public class MetaSummarizer implements Summarizer { diff --git a/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java b/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java index 47c2514..765bb94 100755 --- a/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java +++ b/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java @@ -27,10 +27,18 @@ import java.util.ArrayList; import java.util.Hashtable; import java.util.List; -/* - * A wrapper around the text rank algorithm. This class - * a) Sets up the data for the TextRank class - * b) Takes the ranked sentences and does some basic rearranging (e.g. ordering) to provide a more reasonable summary. +/** + * A wrapper {@link Summarizer} implementation around the {@link TextRank text rank} algorithm. + * <p> + * This implementation: + * <ol> + * <li>sets up the data for the {@link TextRank} class</li> + * <li>takes the ranked sentences and conducts rearranging (e.g. ordering) to provide + * a more reasonable summary.</li> + * </ol> + * + * @see TextRank + * @see Summarizer */ public class TextRankSummarizer implements Summarizer { @@ -98,7 +106,6 @@ public class TextRankSummarizer implements Summarizer { return null; } - //Returns the summary as a string. @Override public String summarize(String article, int maxWords) { List<Sentence> sentences = docProcessor.getSentencesFromStr(article); @@ -112,7 +119,7 @@ public class TextRankSummarizer implements Summarizer { int i = 0; while (b.length() < maxWords && i < scores.size()) { String sent = sentences.get(scores.get(i).getSentId()).getStringVal(); - b.append(sent).append(scores.get(i)); + b.append(sent); //.append(scores.get(i)); i++; } return b.toString(); diff --git a/summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java b/summarizer/src/test/java/opennlp/summarization/AbstractSummarizerTest.java similarity index 75% copy from summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java copy to summarizer/src/test/java/opennlp/summarization/AbstractSummarizerTest.java index bd69886..ce7bc50 100644 --- a/summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java +++ b/summarizer/src/test/java/opennlp/summarization/AbstractSummarizerTest.java @@ -15,14 +15,12 @@ * limitations under the License. */ -package opennlp.summarization.meta; +package opennlp.summarization; -import opennlp.summarization.Summarizer; import opennlp.summarization.lexicalchaining.OpenNLPPOSTagger; import opennlp.summarization.preprocess.DefaultDocProcessor; import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; import org.slf4j.Logger; @@ -34,26 +32,23 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; -public class MetaSummarizerTest { +public abstract class AbstractSummarizerTest { - private static final Logger log = LoggerFactory.getLogger(MetaSummarizerTest.class); - - private static DefaultDocProcessor docProcessor; - private static OpenNLPPOSTagger posTagger; + private static final Logger log = LoggerFactory.getLogger(AbstractSummarizerTest.class); - // SUT - private Summarizer metaSummarizer; + protected static DefaultDocProcessor docProcessor; + protected static OpenNLPPOSTagger posTagger; @BeforeAll static void initEnv() throws IOException { - docProcessor = new DefaultDocProcessor(MetaSummarizerTest.class.getResourceAsStream("/en-sent.bin")); - posTagger = new OpenNLPPOSTagger(docProcessor, MetaSummarizerTest.class.getResourceAsStream("/en-pos-maxent.bin")); + docProcessor = new DefaultDocProcessor(AbstractSummarizerTest.class.getResourceAsStream("/en-sent.bin")); + posTagger = new OpenNLPPOSTagger(docProcessor, AbstractSummarizerTest.class.getResourceAsStream("/en-pos-maxent.bin")); } - @BeforeEach - void setUp() { - metaSummarizer = new MetaSummarizer(docProcessor, posTagger); - } + /** + * @return Obtains the {@link Summarizer} under test. + */ + public abstract Summarizer getSummarizer(); @ParameterizedTest(name = "news story {index}") @ValueSource(strings = { @@ -69,7 +64,7 @@ public class MetaSummarizerTest { }) public void testSummarize(String filename) { String article = docProcessor.docToString(filename); - String summary = metaSummarizer.summarize(article, 20); + String summary = getSummarizer().summarize(article, 20); assertNotNull(summary); assertFalse(summary.isBlank()); assertTrue(summary.length() > 20); diff --git a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizerTest.java b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizerTest.java new file mode 100644 index 0000000..5d23bef --- /dev/null +++ b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizerTest.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.summarization.lexicalchaining; + +import opennlp.summarization.AbstractSummarizerTest; +import opennlp.summarization.Summarizer; + +import org.junit.jupiter.api.BeforeEach; + +/** + * Tests the implementation of {@link LexicalChainingSummarizer}. + */ +public class LexicalChainingSummarizerTest extends AbstractSummarizerTest { + + // SUT + private Summarizer lexicalChainSummarizer; + + @BeforeEach + void setUp() { + lexicalChainSummarizer = new LexicalChainingSummarizer(docProcessor, posTagger); + } + + @Override + public Summarizer getSummarizer() { + return lexicalChainSummarizer; + } + +} diff --git a/summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java b/summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java index bd69886..2a80782 100644 --- a/summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java +++ b/summarizer/src/test/java/opennlp/summarization/meta/MetaSummarizerTest.java @@ -17,64 +17,27 @@ package opennlp.summarization.meta; +import opennlp.summarization.AbstractSummarizerTest; import opennlp.summarization.Summarizer; -import opennlp.summarization.lexicalchaining.OpenNLPPOSTagger; -import opennlp.summarization.preprocess.DefaultDocProcessor; -import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import java.io.IOException; - -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertNotNull; -import static org.junit.jupiter.api.Assertions.assertTrue; - -public class MetaSummarizerTest { - - private static final Logger log = LoggerFactory.getLogger(MetaSummarizerTest.class); - - private static DefaultDocProcessor docProcessor; - private static OpenNLPPOSTagger posTagger; +/** + * Tests the implementation of {@link MetaSummarizer}. + */ +public class MetaSummarizerTest extends AbstractSummarizerTest { // SUT private Summarizer metaSummarizer; - @BeforeAll - static void initEnv() throws IOException { - docProcessor = new DefaultDocProcessor(MetaSummarizerTest.class.getResourceAsStream("/en-sent.bin")); - posTagger = new OpenNLPPOSTagger(docProcessor, MetaSummarizerTest.class.getResourceAsStream("/en-pos-maxent.bin")); - } - @BeforeEach void setUp() { metaSummarizer = new MetaSummarizer(docProcessor, posTagger); } - @ParameterizedTest(name = "news story {index}") - @ValueSource(strings = { - "/meta/0a2035f3f73b06a5150a6f01cffdf45d027bbbed.story", - "/meta/0a2278bec4a80aec1bc3e9e7a9dac10ac1b6425b.story", - "/meta/0a3040b6c1bba95efca727158f128a19c44ec8ba.story", - "/meta/0a3479b53796863a664c32ca20d8672583335d2a.story", - "/meta/0a3639cb86487e72e2ba084211f99799918aedf8.story", - "/meta/0a4092bef1801863296777ebcfeceb1aec23c78f.story", - "/meta/0a5458d3427b290524a8df11d8503a5b57b32747.story", - "/meta/0a5691b8fe654b6b2cdace5ab87aff2ee4c23577.story", - "/meta/0a6790f886a42a76945d4a21ed27c4ebd9ca1025.story" - }) - public void testSummarize(String filename) { - String article = docProcessor.docToString(filename); - String summary = metaSummarizer.summarize(article, 20); - assertNotNull(summary); - assertFalse(summary.isBlank()); - assertTrue(summary.length() > 20); - if (log.isDebugEnabled()) { - log.debug(summary); - } + @Override + public Summarizer getSummarizer() { + return metaSummarizer; } + } diff --git a/summarizer/src/test/java/opennlp/summarization/textrank/TextRankSummarizerTest.java b/summarizer/src/test/java/opennlp/summarization/textrank/TextRankSummarizerTest.java new file mode 100644 index 0000000..31f89f6 --- /dev/null +++ b/summarizer/src/test/java/opennlp/summarization/textrank/TextRankSummarizerTest.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.summarization.textrank; + +import opennlp.summarization.AbstractSummarizerTest; +import opennlp.summarization.Summarizer; + +import org.junit.jupiter.api.BeforeEach; + +/** + * Tests the implementation of {@link TextRankSummarizer}. + */ +public class TextRankSummarizerTest extends AbstractSummarizerTest { + + // SUT + private Summarizer textRankSummarizer; + + @BeforeEach + void setUp() { + textRankSummarizer = new TextRankSummarizer(docProcessor); + } + + @Override + public Summarizer getSummarizer() { + return textRankSummarizer; + } +}
