enhances the JavaDoc further

mawiesne Fri, 12 Jul 2024 08:26:01 -0700

This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch 
OPENNLP-1594-Add-stricter-tests-for-Summarizer-component
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git


commit 976f251606221335a3c0f22971f885bcc80ea785
Author: Martin Wiesner <[email protected]>
AuthorDate: Thu Jul 11 11:09:09 2024 +0200

    OPENNLP-1594 Add stricter tests for Summarizer component
    - adds further, stricter tests
    - clarifies, at API level, the semantics and constraints of parameters
    - separates tests so that each test class has a clear responsibility for 
its class under test
    - removes binary model files from test/resources folder
    - improves / enhances the JavaDoc further
---
 summarizer/pom.xml                                 |  10 +-
 .../java/opennlp/summarization/DocProcessor.java   |  14 +-
 .../src/main/java/opennlp/summarization/Score.java |   8 +-
 .../main/java/opennlp/summarization/Sentence.java  |  80 +++++----
 .../java/opennlp/summarization/Summarizer.java     |   9 +-
 .../LexChainingKeywordExtractor.java               |  45 +++--
 .../lexicalchaining/LexicalChain.java              |  18 +-
 .../lexicalchaining/LexicalChainingSummarizer.java | 200 ++++++++++++---------
 .../lexicalchaining/NounPOSTagger.java             | 124 +++++++++++++
 .../lexicalchaining/OpenNLPPOSTagger.java          |  92 ----------
 .../summarization/lexicalchaining/POSTagger.java   |  25 ++-
 .../WordRelationshipDetermination.java             |  21 +--
 .../summarization/lexicalchaining/WordnetWord.java |  69 ++++++-
 .../opennlp/summarization/meta/MetaSummarizer.java |   6 +-
 .../preprocess/DefaultDocProcessor.java            | 156 +++++++++-------
 .../summarization/preprocess/IDFWordWeight.java    |  13 +-
 .../summarization/preprocess/StopWords.java        |   2 +-
 .../summarization/preprocess/WordWeight.java       |   9 +-
 .../opennlp/summarization/textrank/TextRank.java   | 100 ++++++-----
 .../summarization/textrank/TextRankSummarizer.java |   2 +-
 .../summarization/AbstractSummarizerTest.java      |  28 +--
 .../java/opennlp/summarization/SentenceTest.java   | 104 +++++++++++
 .../lexicalchaining/AbstractLexicalChainTest.java  |  40 +++++
 .../lexicalchaining/LexChainTest.java              | 109 -----------
 .../LexChainingKeywordExtractorTest.java           |  68 ++++---
 .../LexicalChainingSummarizerNewsTest.java}        |  37 ++--
 .../LexicalChainingSummarizerTest.java             |  55 ++++--
 .../lexicalchaining/NounPOSTaggerTest.java         | 104 +++++++++++
 .../WordRelationshipDeterminationTest.java         |  63 +++++++
 .../lexicalchaining/WordnetWordTest.java           | 107 +++++++++++
 .../preprocess/DefaultDocProcessorTest.java        | 116 ++++++++++++
 summarizer/src/test/resources/{meta => }/Notes.txt |   0
 summarizer/src/test/resources/en-pos-maxent.bin    | Bin 1175564 -> 0 bytes
 summarizer/src/test/resources/en-sent.bin          | Bin 20317 -> 0 bytes
 summarizer/src/test/resources/{meta => }/idf.csv   |   0
 .../0a2035f3f73b06a5150a6f01cffdf45d027bbbed.story |   0
 .../0a2278bec4a80aec1bc3e9e7a9dac10ac1b6425b.story |   0
 .../0a3040b6c1bba95efca727158f128a19c44ec8ba.story |   0
 .../0a3479b53796863a664c32ca20d8672583335d2a.story |   0
 .../0a3639cb86487e72e2ba084211f99799918aedf8.story |   0
 .../0a4092bef1801863296777ebcfeceb1aec23c78f.story |   0
 .../0a4324d4a5effa420aa95bb058314eab35c73852.story |   0
 .../0a5458d3427b290524a8df11d8503a5b57b32747.story |   0
 .../0a5691b8fe654b6b2cdace5ab87aff2ee4c23577.story |   0
 .../0a6790f886a42a76945d4a21ed27c4ebd9ca1025.story |   0
 45 files changed, 1277 insertions(+), 557 deletions(-)

diff --git a/summarizer/pom.xml b/summarizer/pom.xml
index 19237f3..2c4da8f 100644
--- a/summarizer/pom.xml
+++ b/summarizer/pom.xml
@@ -31,10 +31,18 @@
   <name>Apache OpenNLP Summarizer</name>
 
   <properties>
+    <wordnet.version>2.4.0</wordnet.version>
     <wordnet-dict.version>3.1</wordnet-dict.version>
     
     <maven.download.plugin>1.9.0</maven.download.plugin>
   </properties>
+  <repositories>
+    <repository>
+      <id>maven.aksw.org</id>
+      <url>https://maven.aksw.org/repository/internal/</url>
+      <releases/>
+    </repository>
+  </repositories>
 
   <dependencies>
     <dependency>
@@ -45,7 +53,7 @@
     <dependency>
       <groupId>edu.mit</groupId>
       <artifactId>jwi</artifactId>
-      <version>2.2.3</version>
+      <version>${wordnet.version}</version>
     </dependency>
 
     <dependency>
diff --git a/summarizer/src/main/java/opennlp/summarization/DocProcessor.java 
b/summarizer/src/main/java/opennlp/summarization/DocProcessor.java
index 65a992f..756744f 100644
--- a/summarizer/src/main/java/opennlp/summarization/DocProcessor.java
+++ b/summarizer/src/main/java/opennlp/summarization/DocProcessor.java
@@ -31,12 +31,20 @@ import opennlp.tools.stemmer.Stemmer;
 public interface DocProcessor {
 
   /**
-   * Extracts sentences from a string representing an article.
+   * Extracts {@link Sentence sentences} from a string representing an article.
+   *
+   * @param text The text to process; if {@code null} or empty, an empty list 
is returned.
+   *
+   * @return The resulting list of detected {@link Sentence sentences}.
    */
-  List<Sentence> getSentencesFromStr(String text);
+  List<Sentence> getSentences(String text);
 
   /**
-   * Parses out words from a specified {@link String sent}.
+   * Extracts words from a specified {@link String sent}.
+   *
+   * @param sent The sentence to process; if {@code null} or empty, an zero 
length array is returned.
+   *
+   * @return An array of tokens (words) contained in the given {@code sent}.
    */
   String[] getWords(String sent);
 
diff --git a/summarizer/src/main/java/opennlp/summarization/Score.java 
b/summarizer/src/main/java/opennlp/summarization/Score.java
index 76a2694..80751d6 100755
--- a/summarizer/src/main/java/opennlp/summarization/Score.java
+++ b/summarizer/src/main/java/opennlp/summarization/Score.java
@@ -18,14 +18,15 @@
 package opennlp.summarization;
 
 /**
- * Stores the score of a sentence for ranking sentences within a document.
+ * Encapsulates the score of a sentence for the purpose of ranking sentences 
within a document.
  */
 public class Score implements Comparable<Score> {
   private int sentId;
   private double score;
 
-  public Score() {
-    score = 0;
+  public Score(int sentId, double score) {
+    this.sentId = sentId;
+    this.score = score;
   }
 
   public int getSentId() {
@@ -46,7 +47,6 @@ public class Score implements Comparable<Score> {
 
   @Override
   public int compareTo(Score o) {
-
     if (o.score > score) return 1;
     else if (o.score < score) return -1;
     return 0;
diff --git a/summarizer/src/main/java/opennlp/summarization/Sentence.java 
b/summarizer/src/main/java/opennlp/summarization/Sentence.java
index a158199..e59d809 100755
--- a/summarizer/src/main/java/opennlp/summarization/Sentence.java
+++ b/summarizer/src/main/java/opennlp/summarization/Sentence.java
@@ -32,7 +32,8 @@ import opennlp.tools.stemmer.PorterStemmer;
 public class Sentence {
 
   private static final String SPACE = " ";
-  private final List<Sentence> links;
+  private final List<Sentence> links = new ArrayList<>();
+
   // sentId is always position of sentence in doc.
   private int sentId;
   private String stringVal;
@@ -43,23 +44,32 @@ public class Sentence {
   private double wordWt = 0;
   private int wordCnt;
 
-  public Sentence() {
-    links = new ArrayList<>();
-  }
+  /**
+   * Instantiates a plain {@link Sentence} via a set of parameters.
+   *
+   * @param id A numeric identifier with a postive value.
+   * @param stringVal The string representation of the sentence.
+   * @param paragraph TODO clarify exact meaning.
+   * @param paraPos TODO clarify exact meaning.
+   * @throws IllegalArgumentException Thrown if parameters are invalid.
+   */
+  public Sentence(int id, String stringVal, int paragraph, int paraPos) {
+    if (id < 0) throw new IllegalArgumentException("Parameter 'id' cannot be 
negative");
+    if (stringVal == null || stringVal.isBlank())
+      throw new IllegalArgumentException("Parameter 'stringVal' must not be 
null");
+    if (paragraph < 0) throw new IllegalArgumentException("Parameter 
'paragraph' cannot be negative");
+    if (paraPos < 0) throw new IllegalArgumentException("Parameter 'paraPos' 
cannot be negative");
 
-  public Sentence(int id) {
-    this();
     this.sentId = id;
-  }
+    setParagraph(paragraph);
+    setStringVal(stringVal);
+    setParaPos(paraPos);
+  };
 
   public int getSentId() {
     return sentId;
   }
 
-  public void setSentId(int sentId) {
-    this.sentId = sentId;
-  }
-
   public Score getPageRankScore() {
     return pageRankScore;
   }
@@ -113,38 +123,21 @@ public class Sentence {
     return this.links;
   }
 
-  public double getWordWt() {
+  public double getWordWeight() {
     return wordWt;
   }
 
-  public void setWordWt(double wordWt) {
+  public void setWordWeight(double wordWt) {
     this.wordWt = wordWt;
   }
 
   public int getWordCnt() {
-    return wordCnt == 0 ? this.getStringVal().split("\\s+").length : wordCnt;
-  }
-
-  // Should add an article id to the sentence class. For now returns true if 
the ids are the same.
-
-  @Override
-  public final boolean equals(Object o) {
-    if (this == o) return true;
-    if (!(o instanceof Sentence sentence)) return false;
-
-    return sentId == sentence.sentId;
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(sentId);
-  }
-
-  @Override
-  public String toString() {
-    return this.stringVal;//+ "("+ this.paragraph +", "+this.paraPos+")";
+    return wordCnt;
   }
 
+  /**
+   * @return Applies stemming to each word and returns a fully-stemmed 
representation of a sentence.
+   */
   public String stem() {
     PorterStemmer stemmer = new PorterStemmer();
     StopWords sw = StopWords.getInstance();
@@ -167,4 +160,23 @@ public class Sentence {
     }
     return b.toString();
   }
+
+  // Should add an article id to the sentence class. For now returns true if 
the ids are the same.
+  @Override
+  public final boolean equals(Object o) {
+    if (this == o) return true;
+    if (!(o instanceof Sentence sentence)) return false;
+
+    return sentId == sentence.sentId;
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(sentId);
+  }
+
+  @Override
+  public String toString() {
+    return this.stringVal;//+ "("+ this.paragraph +", "+this.paraPos+")";
+  }
 }
diff --git a/summarizer/src/main/java/opennlp/summarization/Summarizer.java 
b/summarizer/src/main/java/opennlp/summarization/Summarizer.java
index e3ae124..8271868 100644
--- a/summarizer/src/main/java/opennlp/summarization/Summarizer.java
+++ b/summarizer/src/main/java/opennlp/summarization/Summarizer.java
@@ -17,15 +17,18 @@
 
 package opennlp.summarization;
 
+/**
+ * Describes the API of a component which summarizes the content of news, 
articles or books.
+ */
 public interface Summarizer {
 
   /**
-   * Summarizes a given {@code article}. The length of the summary is
+   * Summarizes a given {@code text}. The length of the summary is
    * influenced by the specified {@code maxWords} parameter.
    *
-   * @param article  The text to summarize. Must not be {@code null} and not 
be blank.
+   * @param text  The content to summarize. Must not be {@code null} and not 
be blank.
    * @param maxWords The maximum number of words. Must be larger than {@code 
zero}.
    * @return The summary or an {@code empty} String if no summary could be 
derived.
    */
-  String summarize(String article, int maxWords);
+  String summarize(String text, int maxWords);
 }
diff --git 
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexChainingKeywordExtractor.java
 
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexChainingKeywordExtractor.java
index a313928..10820cd 100644
--- 
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexChainingKeywordExtractor.java
+++ 
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexChainingKeywordExtractor.java
@@ -22,20 +22,45 @@ import java.util.Collections;
 import java.util.List;
 
 /**
- * Uses the lexical chaining algorithm to extract keywords.
+ * Uses the {@link LexicalChain lexical chaining} algorithm to extract 
keywords.
+ *
+ * @see LexicalChain
  */
 public class LexChainingKeywordExtractor {
 
-  // Simple logic to pull out the keyword based on longest lexical chains..
-  public List<String> getKeywords(List<LexicalChain> lexicalChains, int 
noOfKeywords) {
-    Collections.sort(lexicalChains);
-    List<String> ret = new ArrayList<>();
-    for (int i = 0; i < Math.min(lexicalChains.size(), noOfKeywords); i++) {
-      List<Word> words = lexicalChains.get(i).getWord();
-      if (!words.isEmpty() && !ret.contains(words.get(0).getLexicon())) {
-        ret.add(words.get(0).getLexicon());
+  /**
+   * Extracts keywords from a list of {@link LexicalChain lexical chains}, 
limited by {@code noOfKeywords}.
+   *
+   * @param lexicalChains The {@link LexicalChain lexical chains} to process. 
Must not be {@code null}.
+   * @param noOfKeywords The upper limit of keywords. Must be greater than 
{@code zero}.
+   *
+   * @return The extracted keywords as a list. Guaranteed to be not {@code 
null}.
+   *
+   * @throws IllegalArgumentException Thrown if parameters are invalid.
+   * @implNote This operation is based on longest lexical chains.
+   */
+  public List<String> extractKeywords(List<LexicalChain> lexicalChains, int 
noOfKeywords) {
+    if (lexicalChains == null) {
+      throw new IllegalArgumentException("Parameter 'lexicalChains' must not 
be null.");
+    }
+    if (noOfKeywords <= 0) {
+      throw new IllegalArgumentException("Parameter 'noOfKeywords' must be 
greater than 0.");
+    }
+    if (lexicalChains.isEmpty()) {
+      return Collections.emptyList();
+    } else {
+      Collections.sort(lexicalChains);
+      List<String> ret = new ArrayList<>();
+      for (int i = 0; i < Math.min(lexicalChains.size(), noOfKeywords); i++) {
+        List<Word> words = lexicalChains.get(i).getWords();
+        if (!words.isEmpty()) {
+          Word w = words.get(0);
+          if (!ret.contains(w.getLexicon())) {
+            ret.add(w.getLexicon());
+          }
+        }
       }
+      return ret;
     }
-    return ret;
   }
 }
diff --git 
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChain.java
 
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChain.java
index 3da83e3..612465c 100644
--- 
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChain.java
+++ 
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChain.java
@@ -22,17 +22,19 @@ import java.util.List;
 
 import opennlp.summarization.Sentence;
 
+/**
+ * Represents a lexical chain.
+ */
 public class LexicalChain implements Comparable<LexicalChain> {
-  final List<Word> word;
-  final List<Sentence> sentences;
+
+  final List<Word> words = new ArrayList<>();
+  final List<Sentence> sentences = new ArrayList<>();
 
   int start, last;
   int score;
   int occurrences = 1;
 
   public LexicalChain() {
-    word = new ArrayList<>();
-    sentences = new ArrayList<>();
   }
 
   public double score() {
@@ -40,7 +42,7 @@ public class LexicalChain implements Comparable<LexicalChain> 
{
   }
 
   public int length() {
-    return word.size();
+    return words.size();
   }
 
   public float homogeneity() {
@@ -48,7 +50,7 @@ public class LexicalChain implements Comparable<LexicalChain> 
{
   }
 
   public void addWord(Word w) {
-    word.add(w);
+    words.add(w);
   }
 
   public void addSentence(Sentence sent) {
@@ -56,8 +58,8 @@ public class LexicalChain implements Comparable<LexicalChain> 
{
       sentences.add(sent);
   }
 
-  public List<Word> getWord() {
-    return word;
+  public List<Word> getWords() {
+    return words;
   }
 
   public List<Sentence> getSentences() {
diff --git 
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizer.java
 
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizer.java
index f243d69..53e480b 100755
--- 
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizer.java
+++ 
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizer.java
@@ -17,7 +17,7 @@
 
 package opennlp.summarization.lexicalchaining;
 
-import java.io.InputStream;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Hashtable;
@@ -26,15 +26,19 @@ import java.util.List;
 import opennlp.summarization.DocProcessor;
 import opennlp.summarization.Sentence;
 import opennlp.summarization.Summarizer;
+import opennlp.tools.postag.POSModel;
 
 /**
- * Implements the algorithm outlined in - "Summarization Using Lexical Chains" 
by R. Berzilay et al.
- * <p>
+ * Implements a {@link Summarizer summarization} algorithm outlined in: <br/>
+ * <a href="https://aclanthology.org/W97-0703.pdf";>
+ *   "Summarization Using Lexical Chains"</a>, by Regina Berzilay and Michael 
Elhadad.
+ * <br/><br/>
  * The algorithm is based on extracting so-called lexical chains - a set of 
sentences in the article
- * that share a word that are very closely related. Thus, the longest chain 
represents the most important
+ * that share a {@link Word} that are very closely related. Thus, the longest 
chain represents the most important
  * topic and so forth. A summary can then be formed by identifying the most 
important lexical chains
  * and "pulling" out sentences from them.
  *
+ * @see Word
  * @see LexicalChain
  * @see Summarizer
  */
@@ -44,87 +48,122 @@ public class LexicalChainingSummarizer implements 
Summarizer {
   private final DocProcessor docProcessor;
   private final WordRelationshipDetermination wordRel;
 
-  public LexicalChainingSummarizer(DocProcessor dp, OpenNLPPOSTagger 
posTagger) {
-    docProcessor = dp;
-    tagger = posTagger;
-    wordRel = new WordRelationshipDetermination();
+  /**
+   * Instantiates a {@link LexicalChainingSummarizer}.
+   *
+   * @param docProcessor The {@link DocProcessor} to use at runtime. Must not 
be {@code null}.
+   * @param languageCode An ISO-language code for obtaining a {@link POSModel}.
+   *                     Must not be {@code null}.
+   *
+   * @throws IllegalArgumentException Thrown if parameters are invalid.
+   */
+  public LexicalChainingSummarizer(DocProcessor docProcessor, String 
languageCode) throws IOException {
+    this(docProcessor, new NounPOSTagger(languageCode));
   }
 
-  public LexicalChainingSummarizer(DocProcessor dp, InputStream posModelFile) 
throws Exception {
-    this(dp, new OpenNLPPOSTagger(dp, posModelFile));
+  /**
+   * Instantiates a {@link LexicalChainingSummarizer}.
+   *
+   * @param docProcessor The {@link DocProcessor} to use at runtime. Must not 
be {@code null}.
+   * @param posTagger The {@link NounPOSTagger} to use at runtime. Must not be 
{@code null}.
+   *
+   * @throws IllegalArgumentException Thrown if parameters are invalid.
+   */
+  public LexicalChainingSummarizer(DocProcessor docProcessor, NounPOSTagger 
posTagger) {
+    if (docProcessor == null) throw new IllegalArgumentException("Parameter 
'docProcessor' must not be null!");
+    if (posTagger == null) throw new IllegalArgumentException("Parameter 
'posTagger' must not be null!");
+
+    this.docProcessor = docProcessor;
+    tagger = posTagger;
+    wordRel = new WordRelationshipDetermination();
   }
 
-  //Build Lexical chains..
-  public List<LexicalChain> buildLexicalChains(String article, List<Sentence> 
sent) {
-    // POS tag article
-    Hashtable<String, List<LexicalChain>> chains = new Hashtable<>();
-    List<LexicalChain> lc = new ArrayList<>();
-    // Build lexical chains
-    // For each sentence
-    for (Sentence currSent : sent) {
-      String taggedSent = tagger.getTaggedString(currSent.getStringVal());
-      List<String> nouns = tagger.getWordsOfType(taggedSent, POSTagger.NOUN);
-      //       For each noun
-      for (String noun : nouns) {
-        int chainsAddCnt = 0;
-        //  Loop through each LC
-        for (LexicalChain l : lc) {
-          try {
-            WordRelation rel = wordRel.getRelation(l, noun, 
(currSent.getSentId() - l.start) > 7);
-            //  Is the noun an exact match to one of the current LCs (Strong 
relation)
-            //  Add sentence to chain
-            if (rel.relation() == WordRelation.STRONG_RELATION) {
-              addToChain(rel.dest(), l, chains, currSent);
-              if (currSent.getSentId() - l.last > 10) {
-                l.occurrences++;
-                l.start = currSent.getSentId();
-              }
-              chainsAddCnt++;
-            } else if (rel.relation() == WordRelation.MED_RELATION) {
-              //  Add sentence to chain if it is 7 sentences away from start 
of chain
-              addToChain(rel.dest(), l, chains, currSent);
-              chainsAddCnt++;
-              //If greater than 7 we will add it but call it a new occurrence 
of the lexical chain...
-              if (currSent.getSentId() - l.start > 7) {
-                l.occurrences++;
-                l.start = currSent.getSentId();
-              }
-            } else if (rel.relation() == WordRelation.WEAK_RELATION) {
-              if (currSent.getSentId() - l.start <= 3) {
+  /**
+   * Constructs a list of {@link LexicalChain lexical chains} from specified 
sentences.
+   *
+   * @param article TODO unused parameter -> remove it?!
+   * @param sentences The list of {@link Sentence sentences} to build lexical 
chains from.
+   *                  Must not be {@code null}.
+   * @return The result list of {@link LexicalChain lexical chains}. 
Guaranteed to be not {@code null}.
+   * @throws IllegalArgumentException Thrown if parameters are invalid.
+   */
+  public List<LexicalChain> buildLexicalChains(String article, List<Sentence> 
sentences) {
+    if (sentences == null) throw new IllegalArgumentException("Parameter 
'sentences' must not be null!");
+    else {
+      if (sentences.isEmpty()) {
+        return Collections.emptyList();
+      }
+      Hashtable<String, List<LexicalChain>> chains = new Hashtable<>();
+      List<LexicalChain> lc = new ArrayList<>();
+      // Build lexical chains
+      // For each sentence
+      for (Sentence currSent : sentences) {
+        // POS tag article
+        String taggedSent = 
tagger.getTaggedString(currSent.getStringVal().replace(".", " ."));
+        List<String> nouns = 
tagger.getWordsOfType(docProcessor.getWords(taggedSent), POSTagger.NOUN);
+        //     For each noun
+        for (String noun : nouns) {
+          int chainsAddCnt = 0;
+          //  Loop through each LC
+          for (LexicalChain l : lc) {
+            try {
+              WordRelation rel = wordRel.getRelation(l, noun, 
(currSent.getSentId() - l.start) > 7);
+              // Is the noun an exact match to one of the current LCs (Strong 
relation)
+              // Add sentence to chain
+              if (rel.relation() == WordRelation.STRONG_RELATION) {
                 addToChain(rel.dest(), l, chains, currSent);
+                if (currSent.getSentId() - l.last > 10) {
+                  l.occurrences++;
+                  l.start = currSent.getSentId();
+                }
                 chainsAddCnt++;
+              } else if (rel.relation() == WordRelation.MED_RELATION) {
+                // Add sentence to chain if it is 7 sentences away from start 
of chain
+                addToChain(rel.dest(), l, chains, currSent);
+                chainsAddCnt++;
+                // If greater than 7 we will add it but call it a new 
occurrence of the lexical chain...
+                if (currSent.getSentId() - l.start > 7) {
+                  l.occurrences++;
+                  l.start = currSent.getSentId();
+                }
+              } else if (rel.relation() == WordRelation.WEAK_RELATION) {
+                if (currSent.getSentId() - l.start <= 3) {
+                  addToChain(rel.dest(), l, chains, currSent);
+                  chainsAddCnt++;
+                }
               }
+            } catch (Exception ex) {
+              throw new RuntimeException(ex);
             }
-          } catch (Exception ex) {
+            // add sentence and update last occurrence..
+            //chaincnt++
+            //  else 1 hop-relation in Wordnet (weak relation)
+            //  Add sentence to chain if it is 3 sentences away from start of 
chain
+            //chaincnt++
+            // End loop LC
           }
-          // add sentence and update last occurrence..
-          //chaincnt++
-          //  else 1 hop-relation in Wordnet (weak relation)
-          //  Add sentence to chain if it is 3 sentences away from start of 
chain
-          //chaincnt++
-          // End loop LC
-        }
-        //Could not add the word to any existing list. Start a new lexical 
chain with the word.
-        if (chainsAddCnt == 0) {
-          List<Word> senses = wordRel.getWordSenses(noun);
-          for (Word w : senses) {
-            LexicalChain newLc = new LexicalChain();
-            newLc.start = currSent.getSentId();
-            addToChain(w, newLc, chains, currSent);
-            lc.add(newLc);
+          // Could not add the word to any existing list. Start a new lexical 
chain with the word.
+          if (chainsAddCnt == 0) {
+            List<Word> senses = wordRel.getWordSenses(noun);
+            for (Word w : senses) {
+              LexicalChain newLc = new LexicalChain();
+              newLc.start = currSent.getSentId();
+              addToChain(w, newLc, chains, currSent);
+              lc.add(newLc);
+            }
           }
+          if (lc.size() > 20)
+            purge(lc, currSent.getSentId(), sentences.size());
         }
-        if (lc.size() > 20)
-          purge(lc, currSent.getSentId(), sent.size());
+        //End sentence
       }
-      //End sentence
-    }
 
 //                     disambiguateAndCleanChains(lc, chains);
-    // Calculate score
-    // Length of chain * homogeneity
-    //sort LC by strength.
-    return lc;
+      // Calculate score
+      //       Length of chain * homogeneity
+      //sort LC by strength.
+      return lc;
+    }
   }
 
   /*
@@ -132,7 +171,7 @@ public class LexicalChainingSummarizer implements 
Summarizer {
    * Takes care to only remove small chains that were added "long back"
    */
   private void purge(List<LexicalChain> lc, int sentId, int totSents) {
-    //Do nothing for the first 50 sentences.
+    //Do nothing for the first 20 sentences.
     if (lc.size() < 20) return;
 
     Collections.sort(lc);
@@ -146,12 +185,12 @@ public class LexicalChainingSummarizer implements 
Summarizer {
       LexicalChain l = lc.get(i);
       if (l.score() < cutOff && (sentId - l.last) > totSents / 3)//     && 
containsAllWords(words, l.word))
         toRem.add(l);
-        //A different sense and added long back.
-      else if (words.containsKey(l.getWord().get(0).getLexicon()) && (sentId - 
l.start) > totSents / 10)
+        // A different sense and added long back.
+      else if (words.containsKey(l.getWords().get(0).getLexicon()) && (sentId 
- l.start) > totSents / 10)
         toRem.add(l);
       else {
-        //Check if this is from a word with different sense..
-        for (Word w : l.word)
+        // Check if this is from a word with different sense..
+        for (Word w : l.words)
           words.put(w.getLexicon(), Boolean.TRUE);
       }
     }
@@ -169,9 +208,7 @@ public class LexicalChainingSummarizer implements 
Summarizer {
     return ret;
   }
 
-  private void addToChain(Word noun, LexicalChain l,
-                          Hashtable<String, List<LexicalChain>> chains, 
Sentence sent) {
-
+  private void addToChain(Word noun, LexicalChain l, Hashtable<String, 
List<LexicalChain>> chains, Sentence sent) {
     l.addWord(noun);
     l.addSentence(sent);
     l.last = sent.getSentId();
@@ -182,14 +219,13 @@ public class LexicalChainingSummarizer implements 
Summarizer {
 
   @Override
   public String summarize(String article, int maxWords) {
-    List<Sentence> sent = docProcessor.getSentencesFromStr(article);
+    List<Sentence> sent = docProcessor.getSentences(article);
     List<LexicalChain> lc = buildLexicalChains(article, sent);
     Collections.sort(lc);
     int summSize = 0;
     List<Sentence> summ = new ArrayList<>();
     StringBuilder sb = new StringBuilder();
-    for (int i = 0; i < lc.size(); i++) {
-      LexicalChain chain = lc.get(i);
+    for (LexicalChain chain : lc) {
       for (int j = 0; j < chain.sentences.size(); j++) {
         Sentence candidate = chain.sentences.get(j);
         if (!summ.contains(candidate)) {
diff --git 
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/NounPOSTagger.java
 
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/NounPOSTagger.java
new file mode 100644
index 0000000..2acc60b
--- /dev/null
+++ 
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/NounPOSTagger.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.summarization.lexicalchaining;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.postag.POSTaggerME;
+import opennlp.tools.tokenize.WhitespaceTokenizer;
+import opennlp.tools.util.DownloadUtil;
+
+/**
+ * A {@link POSTagger} wrapper implementation that relies on an OpenNLP {@link 
POSTaggerME}.
+ *
+ * @see POSTagger
+ * @see POSTaggerME
+ */
+public class NounPOSTagger implements POSTagger {
+
+  public static final String[] TAGS_NOUNS = {"NOUN", "NN", "NNS", "NNP", 
"NNPS"};
+  private static final Set<String> EOS_CHARS = Set.of(".", "?", "!");
+
+  private final POSTaggerME tagger;
+  private final Map<Integer, String[]> tagMap = new Hashtable<>();
+
+  /**
+   * Instantiates a {@link NounPOSTagger} for a POS model for the specified 
{@code languageCode}.
+   *
+   * @param languageCode An ISO-language code for obtaining a {@link POSModel}.
+   *                     Must not be {@code null}.
+   * @throws IOException Thrown if IO errors occurred.
+   * @throws IllegalArgumentException Thrown if parameters are invalid.
+   */
+  public NounPOSTagger(String languageCode) throws IOException {
+    if (languageCode == null || languageCode.isBlank())
+      throw new IllegalArgumentException("Parameter 'languageCode' must not be 
null");
+    // init Tag map
+    tagMap.put(POSTagger.NOUN, TAGS_NOUNS);
+    POSModel posModel = DownloadUtil.downloadModel(languageCode, 
DownloadUtil.ModelType.POS, POSModel.class);
+    tagger = new POSTaggerME(posModel);
+  }
+
+  /**
+   * @return {@code true} if the type string belongs to one of the (noun) tags 
for the type,
+   *         {@code false} otherwise.
+   */
+  public boolean isType(String typeStr, int type) {
+    boolean ret = false;
+    String[] tags = tagMap.get(type);
+    if (tags != null) {
+      for (String tag : tags) {
+        if (typeStr.equalsIgnoreCase(tag)) {
+          ret = true;
+          break;
+        }
+      }
+      return ret;
+    } else {
+      return false;
+    }
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public String getTaggedString(String input) {
+    if (input == null) throw new IllegalArgumentException("Parameter 'input' 
must not be null");
+    
+    String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(input);
+    String[] tags = tagger.tag(tokens);
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < tokens.length; i++) {
+      sb.append(tokens[i]).append("/").append(tags[i]);
+      // whitespace appending only for non-EOS / PUNCT tokens, skipping for 
actual EOS tokens
+      if (! (EOS_CHARS.contains(tokens[i]) && tokens.length == i + 1)) {
+        sb.append(" ");
+      }
+    }
+    return sb.toString();
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public List<String> getWordsOfType(String[] tokens, int type) {
+    if (tokens == null) throw new IllegalArgumentException("Parameter 'tokens' 
must not be null");
+    if (type < 0 || type > PRONOUN) throw new 
IllegalArgumentException("Parameter 'type' must be in range [0, 4]");
+
+    List<String> ret = new ArrayList<>();
+    for (String t : tokens) {
+      String[] wordPlusType = t.split("/");
+      if (wordPlusType.length == 2) {
+        if (isType(wordPlusType[1], type))
+          ret.add(wordPlusType[0]);
+      } else {
+        throw new IllegalArgumentException("Token '" + t + "' is not tagged 
correctly!");
+      }
+    }
+    // log.info(ret.toString());
+    return ret;
+  }
+}
diff --git 
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/OpenNLPPOSTagger.java
 
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/OpenNLPPOSTagger.java
deleted file mode 100644
index 39edde3..0000000
--- 
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/OpenNLPPOSTagger.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.summarization.lexicalchaining;
-
-import java.io.BufferedInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Hashtable;
-import java.util.List;
-
-import opennlp.summarization.DocProcessor;
-import opennlp.tools.postag.POSModel;
-import opennlp.tools.postag.POSTaggerME;
-import opennlp.tools.tokenize.WhitespaceTokenizer;
-
-public class OpenNLPPOSTagger implements POSTagger {
-
-  private final POSTaggerME tagger;
-  private final DocProcessor dp;
-  private final String[] nounTags = {"NOUN", "NN", "NNS", "NNP", "NNPS"};
-  private Hashtable<Integer, String[]> tagMap;
-
-  public OpenNLPPOSTagger(DocProcessor dp, InputStream posModelFile) throws 
IOException {
-    this.dp = dp;
-    initTagMap();
-
-    try (InputStream modelIn = new BufferedInputStream(posModelFile)) {
-      POSModel model = new POSModel(modelIn);
-      tagger = new POSTaggerME(model);
-    }
-  }
-
-  private void initTagMap() {
-    tagMap = new Hashtable<>();
-    tagMap.put(POSTagger.NOUN, nounTags);
-  }
-
-  // Returns true if the type string belongs to one of the tags for the type
-  public boolean isType(String typeStr, int type) {
-    boolean ret = false;
-    String[] tags = tagMap.get(type);
-    for (String tag : tags) {
-      if (typeStr.equalsIgnoreCase(tag)) {
-        ret = true;
-        break;
-      }
-    }
-    return ret;
-  }
-
-  @Override
-  public String getTaggedString(String input) {
-    String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(input);
-    String[] tags = tagger.tag(tokens);
-    StringBuilder sb = new StringBuilder();
-    for (int i = 0; i < tokens.length; i++) {
-      sb.append(tokens[i]).append("/").append(tags[i]).append(" ");
-    }
-    return sb.toString();
-  }
-
-  @Override
-  public List<String> getWordsOfType(String sent, int type) {
-    List<String> ret = new ArrayList<>();
-    String[] tokens = dp.getWords(sent);
-    for (String t : tokens) {
-      String[] wordPlusType = t.split("/");
-      if (wordPlusType.length == 2) {
-        if (isType(wordPlusType[1], type))
-          ret.add(wordPlusType[0]);
-      }
-    }
-    // log.info(ret.toString());
-    return ret;
-  }
-}
diff --git 
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/POSTagger.java 
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/POSTagger.java
index d6b5d2d..af468ed 100644
--- 
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/POSTagger.java
+++ 
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/POSTagger.java
@@ -19,6 +19,10 @@ package opennlp.summarization.lexicalchaining;
 
 import java.util.List;
 
+/**
+ * A basic POS tagger which describes functionality to tag text and
+ * filter tokens for certain word classes.
+ */
 public interface POSTagger {
 
   //Tagger types..
@@ -28,7 +32,26 @@ public interface POSTagger {
   int ADVERB = 3;
   int PRONOUN = 4;
 
+  /**
+   * Tags a given {@code input} text so that word classes are appenend to each 
token.
+   *
+   * @param input The text to process. Must not be {@code null}. If empty, an 
empty String is returned.
+   * @return The POS tagged text. May be empty.
+   * @throws IllegalArgumentException Thrown if parameters are invalid.
+   */
   String getTaggedString(String input);
 
-  List<String> getWordsOfType(String sent, int type);
+  /**
+   * Extracts words from POS-tagged {@code tokens} which equal a certain word 
class ({@code type}).
+   *
+   * @param tokens An array of words to filter for its word class ({@code 
type}). Must not be {@code null}.
+   *               Must be in a tagged form, that is, separated into {@code 
token/word-class} pairs.
+   * @param type One of the supported types: {@link #NOUN}, {@link #VERB}, 
{@link #ADJECTIVE},
+   *             {@link #ADVERB}, or {@link #PRONOUN}. Must not be less than 
{@code zero}
+   *             and not be more than {@link #PRONOUN}.
+   * @return A list of words that match the given {@code type}. May be empty, 
yet guaranteed to be non-{@code null}.
+   *
+   * @throws IllegalArgumentException Thrown if parameters are invalid.
+   */
+  List<String> getWordsOfType(String[] tokens, int type);
 }
diff --git 
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/WordRelationshipDetermination.java
 
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/WordRelationshipDetermination.java
index ebe352f..59b8a76 100644
--- 
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/WordRelationshipDetermination.java
+++ 
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/WordRelationshipDetermination.java
@@ -58,7 +58,7 @@ public class WordRelationshipDetermination {
       try {
         DICTIONARY.open();
       } catch (IOException e) {
-        e.printStackTrace();
+        throw new RuntimeException(e);
       }
   }
 
@@ -130,10 +130,7 @@ public class WordRelationshipDetermination {
     WordnetWord ww = (WordnetWord) w;
     IWord syn;
     if ((syn = this.isSynonym(noun, w)) != null) {
-      ret = new WordnetWord();
-      ret.lexicon = noun;
-      ret.id = syn.getID();
-      ret.wordSense = syn.getSenseKey();
+      ret = new WordnetWord(noun, syn.getSenseKey(), syn.getID());
     }
 
     //Construct an IWord object representing word associated with wordID
@@ -156,10 +153,7 @@ public class WordRelationshipDetermination {
         ISynset s = this.DICTIONARY.getSynset(id);
         IWord mat = inSynset(s, idxNoun);
         if (mat != null) {
-          ret = new WordnetWord();
-          ret.lexicon = noun;
-          ret.id = mat.getID();
-          ret.wordSense = mat.getSenseKey();
+          ret = new WordnetWord(noun, mat.getSenseKey(), mat.getID());
           break;
         }
       }
@@ -175,7 +169,7 @@ public class WordRelationshipDetermination {
    */
   public WordRelation getRelation(LexicalChain l, String noun, boolean 
checkMed) {
     WordRelation ret = new WordRelation(null, null, WordRelation.NO_RELATION);
-    for (Word w : l.word) {
+    for (Word w : l.words) {
       //Exact match is a string relation.
       if (w.getLexicon().equalsIgnoreCase(noun)) {
         ret = new WordRelation(w, w, WordRelation.STRONG_RELATION);
@@ -199,15 +193,12 @@ public class WordRelationshipDetermination {
       //               openDict();
       List<IWordID> wordIDs = this.DICTIONARY.getIndexWord(noun, 
POS.NOUN).getWordIDs();
       for (IWordID wid : wordIDs) {
-        Word w = new WordnetWord();
-        w.setLexicon(noun);
-        w.setID(wid);
+        Word w = new WordnetWord(noun, wid);
         ret.add(w);
       }
     } catch (Exception ex) {
       //Not in dictionary
-      Word w = new WordnetWord();
-      w.setLexicon(noun);
+      Word w = new WordnetWord(noun);
       ret.add(w);
     }
     return ret;
diff --git 
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/WordnetWord.java
 
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/WordnetWord.java
index a110719..0cf026d 100644
--- 
a/summarizer/src/main/java/opennlp/summarization/lexicalchaining/WordnetWord.java
+++ 
b/summarizer/src/main/java/opennlp/summarization/lexicalchaining/WordnetWord.java
@@ -18,6 +18,7 @@ package opennlp.summarization.lexicalchaining;
 
 import java.util.Hashtable;
 import java.util.List;
+import java.util.Objects;
 
 import edu.mit.jwi.item.IPointer;
 import edu.mit.jwi.item.ISenseKey;
@@ -25,16 +26,58 @@ import edu.mit.jwi.item.ISynset;
 import edu.mit.jwi.item.ISynsetID;
 import edu.mit.jwi.item.IWordID;
 
+/**
+ * A {@link Word} implementation based on Wordnet concepts.
+ */
 public class WordnetWord implements Word {
-  final Hashtable<IPointer, List<ISynsetID>> rels;
-  String lexicon;
-  ISenseKey wordSense;
-  IWordID id;
-  //Cache..
+
+  private String lexicon;
+  private IWordID id;
+  private ISenseKey wordSense;
+
+  final Hashtable<IPointer, List<ISynsetID>> rels = new Hashtable<>();
+  // Cache..
   ISynset synonyms;
 
-  public WordnetWord() {
-    rels = new Hashtable<>();
+  /**
+   * Instantiates a {@link WordnetWord} via its lexicon term.
+   *
+   * @param lexicon Must not be {@code null} and not be an empty string.
+   * @throws IllegalArgumentException Thrown if parameters are invalid.
+   */
+  public WordnetWord(String lexicon) {
+    if (lexicon == null || lexicon.isBlank()) throw new 
IllegalArgumentException("parameter 'lexicon' must not be null or empty");
+    setLexicon(lexicon);
+  }
+
+  /**
+   * Instantiates a {@link WordnetWord} via its lexicon term and a {@link 
IWordID}.
+   *
+   * @param lexicon Must not be {@code null} and not be an empty string.
+   * @param id A unique identifier sufficient to retrieve a particular word 
from the Wordnet database.
+   *           Must not be {@code null}.
+   * @throws IllegalArgumentException Thrown if parameters are invalid.
+   */
+  public WordnetWord(String lexicon, IWordID id) {
+    this(lexicon);
+    if (id == null) throw new IllegalArgumentException("parameter 'id' must 
not be null");
+    setID(id);
+  }
+
+  /**
+   * Instantiates a {@link WordnetWord} via its lexicon term and a {@link 
IWordID}.
+   *
+   * @param lexicon Must not be {@code null} and not be an empty string.
+   * @param wordSense A sense key is a unique string that identifies a Wordnet 
word.
+   *                  Must not be {@code null}.
+   * @param id A unique identifier sufficient to retrieve a particular word 
from the Wordnet database.
+   *           Must not be {@code null}.
+   * @throws IllegalArgumentException Thrown if parameters are invalid.
+   */
+  public WordnetWord(String lexicon, ISenseKey wordSense, IWordID id) {
+    this(lexicon, id);
+    if (wordSense == null) throw new IllegalArgumentException("parameter 
'wordSense' must not be null");
+    setSense(wordSense);
   }
 
   @Override
@@ -72,8 +115,18 @@ public class WordnetWord implements Word {
     return this.lexicon;
   }
 
+  @Override
+  public final boolean equals(Object o) {
+    if (this == o) return true;
+    if (!(o instanceof WordnetWord that)) return false;
+
+    return Objects.equals(lexicon, that.lexicon) && Objects.equals(id, 
that.id);
+  }
+
   @Override
   public int hashCode() {
-    return toString().hashCode();
+    int result = Objects.hashCode(lexicon);
+    result = 31 * result + Objects.hashCode(id);
+    return result;
   }
 }
diff --git 
a/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java 
b/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java
index 7fa1155..c52d4be 100644
--- a/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java
+++ b/summarizer/src/main/java/opennlp/summarization/meta/MetaSummarizer.java
@@ -28,7 +28,7 @@ import opennlp.summarization.Sentence;
 import opennlp.summarization.Summarizer;
 import opennlp.summarization.lexicalchaining.LexicalChain;
 import opennlp.summarization.lexicalchaining.LexicalChainingSummarizer;
-import opennlp.summarization.lexicalchaining.OpenNLPPOSTagger;
+import opennlp.summarization.lexicalchaining.NounPOSTagger;
 import opennlp.summarization.textrank.TextRankSummarizer;
 
 import opennlp.summarization.DocProcessor;
@@ -47,7 +47,7 @@ public class MetaSummarizer implements Summarizer {
   private final TextRankSummarizer textRank;
   private final LexicalChainingSummarizer lcs;
 
-  public MetaSummarizer(DocProcessor docProcessor, OpenNLPPOSTagger posTagger) 
{
+  public MetaSummarizer(DocProcessor docProcessor, NounPOSTagger posTagger) {
     dp = docProcessor;
     textRank = new TextRankSummarizer(dp);
     lcs = new LexicalChainingSummarizer(dp, posTagger);
@@ -117,7 +117,7 @@ public class MetaSummarizer implements Summarizer {
   @Override
   public String summarize(String article, int maxWords) {
     // Build lexical Chains..
-    List<Sentence> sent = dp.getSentencesFromStr(article);
+    List<Sentence> sent = dp.getSentences(article);
     List<Score> finalSc = rankSentences(article, sent, maxWords);
 
     StringBuilder sb = new StringBuilder();
diff --git 
a/summarizer/src/main/java/opennlp/summarization/preprocess/DefaultDocProcessor.java
 
b/summarizer/src/main/java/opennlp/summarization/preprocess/DefaultDocProcessor.java
index c185361..a638d68 100755
--- 
a/summarizer/src/main/java/opennlp/summarization/preprocess/DefaultDocProcessor.java
+++ 
b/summarizer/src/main/java/opennlp/summarization/preprocess/DefaultDocProcessor.java
@@ -17,8 +17,7 @@
 
 package opennlp.summarization.preprocess;
 
-import java.io.BufferedInputStream;
-import java.io.FileReader;
+import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.LineNumberReader;
@@ -28,8 +27,6 @@ import java.util.List;
 import java.util.ArrayList;
 import java.util.Locale;
 import java.util.Hashtable;
-import java.util.logging.Level;
-import java.util.logging.Logger;
 import java.util.regex.Pattern;
 
 import opennlp.summarization.Sentence;
@@ -38,6 +35,7 @@ import opennlp.tools.sentdetect.SentenceDetectorME;
 import opennlp.tools.sentdetect.SentenceModel;
 import opennlp.tools.stemmer.PorterStemmer;
 import opennlp.tools.stemmer.Stemmer;
+import opennlp.tools.util.DownloadUtil;
 
 /**
  * Parses a document to sentences.
@@ -53,16 +51,21 @@ public class DefaultDocProcessor implements DocProcessor {
   private static final int SENTENCE_FRAG = OPEN_NLP;
 
   private final Stemmer stemmer;
-  private SentenceModel sentModel;
-
-  public DefaultDocProcessor(InputStream fragModelFile) {
+  private final SentenceModel sentModel;
+
+  /**
+   * Instantiates a {@link DocProcessor} for a Sentence detection model for 
the specified {@code languageCode}.
+   *
+   * @param languageCode An ISO-language code for obtaining a {@link 
SentenceModel}.
+   *                     Must not be {@code null} and not be blank.
+   * @throws IOException Thrown if IO errors occurred.
+   * @throws IllegalArgumentException Thrown if parameters are invalid.
+   */
+  public DefaultDocProcessor(String languageCode) throws IOException {
+    if (languageCode == null || languageCode.isBlank())
+      throw new IllegalArgumentException("Parameter 'languageCode' must not be 
null or blank");
     stemmer = new PorterStemmer();
-
-    try (InputStream modelIn = new BufferedInputStream(fragModelFile)) {
-      sentModel = new SentenceModel(modelIn);
-    } catch (Exception ex) {
-      Logger.getAnonymousLogger().info("Error while parsing.. Ignoring the 
line and marching on.. " + ex.getMessage());
-    }
+    sentModel = DownloadUtil.downloadModel(languageCode, 
DownloadUtil.ModelType.SENTENCE_DETECTOR, SentenceModel.class);
   }
 
   // Str - Document or para
@@ -81,8 +84,8 @@ public class DefaultDocProcessor implements DocProcessor {
     for (int end = iterator.next(); end != BreakIterator.DONE; start = end, 
end = iterator.next()) {
       String sentence = str.substring(start, 
end);//str.substring(oldSentEndIdx, sentEndIdx).trim();
 
-      //Add the sentence as-is; do any processing at the word level
-      //To lower case and trim all punctuations
+      // Add the sentence as-is; do any processing at the word level
+      // To lower case and trim all punctuations
       sentences.add(sentence);
       wrdItr.setText(sentence);
       StringBuilder procSent = new StringBuilder();
@@ -93,12 +96,12 @@ public class DefaultDocProcessor implements DocProcessor {
         String word = sentence.substring(wrdStrt, wrdEnd);//words[i].trim();
         word = word.replace(REGEX, "");
 
-        //Skip stop words and stem the word
+        // Skip stop words and stem the word
         if (sw.isStopWord(word)) continue;
 
         String stemedWrd = stemmer.stem(word).toString();
 
-        //update iidx by adding the current sentence to the list
+        // update iidx by adding the current sentence to the list
         if (iidx != null) {
           if (stemedWrd.length() > 1) {
             List<Integer> sentList = iidx.get(stemedWrd);
@@ -107,7 +110,7 @@ public class DefaultDocProcessor implements DocProcessor {
             }
 
             sentList.add(sentCnt);
-            //Save it back
+            // Save it back
             iidx.put(stemedWrd, sentList);
           }
         }
@@ -121,60 +124,77 @@ public class DefaultDocProcessor implements DocProcessor {
   }
 
 
-  public String docToString(String fileName) {
-    StringBuilder docBuffer = new StringBuilder();
-
-    try (InputStream in = 
DefaultDocProcessor.class.getResourceAsStream(fileName);
-         LineNumberReader lnr = new LineNumberReader(new 
InputStreamReader(in))) {
-      String nextLine;
-
-      while ((nextLine = lnr.readLine()) != null) {
-        String trimmedLine = nextLine.trim();
-        if (!trimmedLine.isEmpty()) {
-          
docBuffer.append(REPLACEMENT_PATTERN.matcher(trimmedLine).replaceAll("")).append("
 ");
+  /**
+   * Reads a document's content from a file.
+   *
+   * @param fileName The path relative file reference of the resource to read 
in.
+   *                 If {@code null} or empty, an empty String is returned.
+   * @return A string representation of the file's contents.
+   */
+  public String docToString(String fileName) throws IOException {
+    if (fileName == null || fileName.isBlank()) {
+      return "";
+    } else {
+      StringBuilder docBuffer = new StringBuilder();
+      try (InputStream in = 
DefaultDocProcessor.class.getResourceAsStream(fileName);
+           LineNumberReader lnr = new LineNumberReader(new 
InputStreamReader(in))) {
+        String nextLine;
+
+        while ((nextLine = lnr.readLine()) != null) {
+          String trimmedLine = nextLine.trim();
+          if (!trimmedLine.isEmpty()) {
+            
docBuffer.append(REPLACEMENT_PATTERN.matcher(trimmedLine).replaceAll("")).append("
 ");
+          }
         }
       }
-    } catch (Exception ex) {
-      Logger.getLogger(DefaultDocProcessor.class.getName()).log(Level.SEVERE, 
null, ex);
+      return docBuffer.toString();
     }
-    return docBuffer.toString();
   }
 
-  //List of sentences form a document
-  public List<Sentence> docToSentList(String fileName) {
-    List<Sentence> sentList = new ArrayList<>();
-
-    try (LineNumberReader lnr = new LineNumberReader(new 
FileReader(fileName))) {
-      String nextLine;
-      int paraNo = 0;
-      int sentNo = 0;
-      while ((nextLine = lnr.readLine()) != null) {
-        String trimmedLine = nextLine.trim();
-        if (!trimmedLine.isEmpty()) {
-          List<String> sents = new ArrayList<>();
-          List<String> cleanedSents = new ArrayList<>();
-          this.getSentences(trimmedLine, sents, null, cleanedSents);
-          int paraPos = 1;
-          for (String sen : sents) {
-            Sentence s = new Sentence();
-            s.setSentId(sentNo++);
-            s.setParagraph(paraNo);
-            s.setStringVal(sen);
-            s.setParaPos(paraPos++);
-            sentList.add(s);
+  /**
+   * Reads a document's content from a file.
+   *
+   * @param fileName The path relative file reference of the resource to read 
in.
+   *                 If {@code null} or empty, an empty List is returned.
+   * @return A list {@link Sentence sentences} representing the file's 
contents.
+   */
+  public List<Sentence> docToSentences(String fileName) throws IOException {
+    if (fileName == null || fileName.isBlank()) {
+      return Collections.emptyList();
+    } else {
+      List<Sentence> sentList = new ArrayList<>();
+      try (InputStream in = 
DefaultDocProcessor.class.getResourceAsStream(fileName);
+           LineNumberReader lnr = new LineNumberReader(new 
InputStreamReader(in))) {
+        String nextLine;
+        int paraNo = 0;
+        int sentNo = 0;
+        while ((nextLine = lnr.readLine()) != null) {
+          String trimmedLine = nextLine.trim();
+          if (!trimmedLine.isEmpty()) {
+            List<String> sents = new ArrayList<>();
+            List<String> cleanedSents = new ArrayList<>();
+            this.getSentences(trimmedLine, sents, null, cleanedSents);
+            int paraPos = 1;
+            for (String sen : sents) {
+              Sentence s = new Sentence(sentNo++, sen, paraNo, paraPos++);
+              sentList.add(s);
+            }
+            paraNo++;
           }
-          paraNo++;
         }
       }
-
-    } catch (Exception ex) {
-      Logger.getLogger(DefaultDocProcessor.class.getName()).log(Level.SEVERE, 
null, ex);
+      return sentList;
     }
-    return sentList;
   }
 
+  /**
+   * {@inheritDoc}
+   */
   @Override
-  public List<Sentence> getSentencesFromStr(String text) {
+  public List<Sentence> getSentences(String text) {
+    if (text == null || text.isBlank()) {
+      return Collections.emptyList();
+    }
     List<Sentence> ret = new ArrayList<>();
     List<String> sentStrs = new ArrayList<>();
     List<String> cleanedSents = new ArrayList<>();
@@ -188,24 +208,28 @@ public class DefaultDocProcessor implements DocProcessor {
       Collections.addAll(sentStrs, sentences);
     }
     int sentNo = 0;
-
     for (String sen : sentStrs) {
-      Sentence s = new Sentence();
-      s.setSentId(sentNo);
-      s.setParagraph(1);
-      s.setStringVal(sen);
-      s.setParaPos(sentNo);
+      Sentence s = new Sentence(sentNo, sen, 1, sentNo);
       ret.add(s);
       sentNo++;
     }
     return ret;
   }
 
+  /**
+   * {@inheritDoc}
+   */
   @Override
   public String[] getWords(String sent) {
+    if (sent == null || sent.isBlank()) {
+      return new String[0];
+    }
     return sent.trim().split("\\s+");
   }
 
+  /**
+   * {@inheritDoc}
+   */
   @Override
   public Stemmer getStemmer() {
     return stemmer;
diff --git 
a/summarizer/src/main/java/opennlp/summarization/preprocess/IDFWordWeight.java 
b/summarizer/src/main/java/opennlp/summarization/preprocess/IDFWordWeight.java
index 8b88cd6..b6eef0b 100755
--- 
a/summarizer/src/main/java/opennlp/summarization/preprocess/IDFWordWeight.java
+++ 
b/summarizer/src/main/java/opennlp/summarization/preprocess/IDFWordWeight.java
@@ -17,6 +17,7 @@
 
 package opennlp.summarization.preprocess;
 
+import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.util.Hashtable;
@@ -29,12 +30,17 @@ import java.io.LineNumberReader;
  * @see WordWeight
  */
 public class IDFWordWeight implements WordWeight {
+
   private static IDFWordWeight instance;
   final Hashtable<String, Double> idf;
 
   public IDFWordWeight(String fileName) {
     idf = new Hashtable<>();
-    load(fileName);
+    try {
+      load(fileName);
+    } catch (IOException e) {
+      throw new RuntimeException("Could not load the file with IDF", e);
+    }
   }
 
   public static IDFWordWeight getInstance(String fileName) {
@@ -58,7 +64,7 @@ public class IDFWordWeight implements WordWeight {
    * Loads the IDF for words from given file. The file is required to have a 
simple format -
    * word, IDF.
    */
-  private void load(String fileName) {
+  private void load(String fileName) throws IOException {
     try (InputStream in = IDFWordWeight.class.getResourceAsStream(fileName);
          LineNumberReader lnr = new LineNumberReader(new 
InputStreamReader(in))) {
 
@@ -72,9 +78,6 @@ public class IDFWordWeight implements WordWeight {
           idf.put(word, idfVal);
         }
       }
-    } catch (Exception ex) {
-      System.err.println("Could not load the file with IDF");
-      ex.printStackTrace();
     }
   }
 }
diff --git 
a/summarizer/src/main/java/opennlp/summarization/preprocess/StopWords.java 
b/summarizer/src/main/java/opennlp/summarization/preprocess/StopWords.java
index deb338d..c558dee 100755
--- a/summarizer/src/main/java/opennlp/summarization/preprocess/StopWords.java
+++ b/summarizer/src/main/java/opennlp/summarization/preprocess/StopWords.java
@@ -215,7 +215,7 @@ public class StopWords {
     h.add("your");
     h.add("yours");
     h.add("yourself");
-    h.add("yourselves ");
+    h.add("yourselves");
   }
 
   public static StopWords getInstance() {
diff --git 
a/summarizer/src/main/java/opennlp/summarization/preprocess/WordWeight.java 
b/summarizer/src/main/java/opennlp/summarization/preprocess/WordWeight.java
index 97866aa..1998434 100755
--- a/summarizer/src/main/java/opennlp/summarization/preprocess/WordWeight.java
+++ b/summarizer/src/main/java/opennlp/summarization/preprocess/WordWeight.java
@@ -17,7 +17,14 @@
 
 package opennlp.summarization.preprocess;
 
+/**
+ * Represents a type which can compute the weight of a word in a certain 
context, e.g. a sentence or a text.
+ */
 public interface WordWeight {
 
-  double getWordWeight(String s);
+  /**
+   * @param token The input token (word) to get a weight for. Must not be 
{@code null}.
+   * @return The associated weight for the specified {@code token}.
+   */
+  double getWordWeight(String token);
 }
diff --git 
a/summarizer/src/main/java/opennlp/summarization/textrank/TextRank.java 
b/summarizer/src/main/java/opennlp/summarization/textrank/TextRank.java
index 3ead306..fc359e7 100755
--- a/summarizer/src/main/java/opennlp/summarization/textrank/TextRank.java
+++ b/summarizer/src/main/java/opennlp/summarization/textrank/TextRank.java
@@ -30,46 +30,74 @@ import opennlp.summarization.preprocess.StopWords;
 import opennlp.summarization.preprocess.WordWeight;
 
 /**
- * Implements the TextRank algorithm by Mihalcea et al.
- * <p>
+ * Implements the TextRank algorithm by Rada Mihalcea and Paul Tarau: <br/>
+ * <a href="https://aclanthology.org/W04-3252/";>TextRank: Bringing Order into 
Text</a>
+ * <br/><br/>
  * This basically applies the page rank algorithm to a graph where each 
sentence is a node
  * and a connection between sentences indicates that a word is shared between 
them.
+ * <p>
  * It returns a ranking of sentences where the highest rank means most 
important etc.
  * Currently, only stemming is done to the words; a more sophisticated way 
might use a
  * resource like Wordnet to match synonyms etc.
  */
 public class TextRank {
+
   private static final int NO_OF_IT = 100;
   // DAMPING FACTOR..
   private static final double DF = 0.15;
   private static final boolean HIGHER_TITLE_WEIGHT = true;
   private static final double TITLE_WRD_WT = 2d;
+
+  private final DocProcessor docProc;
   private final StopWords sw;
   private final WordWeight wordWt;
+
   private final double maxErr = 0.1;
   private final double title_wt = 0;
-  private String article;
-  private Hashtable<Integer, List<Integer>> links;
+
+  private Hashtable<Integer, List<Integer>> links = new Hashtable<>();
   private List<String> sentences = new ArrayList<>();
   private List<String> processedSent = new ArrayList<>();
-  private DocProcessor docProc;
 
+  /**
+   * Instantiates a {@link TextRank} with the specified {@link DocProcessor}.
+   *
+   * @param dp A valid {@link DocProcessor}. Must not be {@code null}.
+   *
+   * @throws IllegalArgumentException Thrown if parameters are invalid.
+   */
   public TextRank(DocProcessor dp) {
-    sw = new StopWords();
-    setLinks(new Hashtable<>());
-    processedSent = new ArrayList<>();
-    docProc = dp;
-    wordWt = IDFWordWeight.getInstance("/meta/idf.csv");
+    this(dp, new StopWords(), IDFWordWeight.getInstance("/idf.csv"));
   }
 
-  public TextRank(StopWords sw, WordWeight wordWts) {
-    this.sw = sw;
-    this.wordWt = wordWts;
+  /**
+   * Instantiates a {@link TextRank} with the specified {@link DocProcessor}.
+   *
+   * @param dp A valid {@link DocProcessor}. Must not be {@code null}.
+   * @param stopWords The {@link StopWords} instance to use. Must not be 
{@code null}.
+   * @param wordWeights The {@link WordWeight} instance to use. Must not be 
{@code null}.
+   *                    
+   * @throws IllegalArgumentException Thrown if parameters are invalid.
+   */
+  public TextRank(DocProcessor dp, StopWords stopWords, WordWeight 
wordWeights) {
+    if (dp == null) throw new IllegalArgumentException("parameter 'dp' must 
not be null");
+    if (stopWords == null) throw new IllegalArgumentException("parameter 
'stopWords' must not be null");
+    if (wordWeights == null) throw new IllegalArgumentException("parameter 
'wordWeights' must not be null");
+    this.docProc = dp;
+    this.sw = stopWords;
+    this.wordWt = wordWeights;
   }
 
-  // Returns similarity of two sentences. Wrd wts contains tf-idf of the 
words..
-  public double getWeightedSimilarity(String sent1, String sent2,
-                                      Hashtable<String, Double> wrdWts) {
+  /**
+   * Computes the similarity of two sentences.
+   *
+   * @param sent1 The first sentence. If {@code null} or empty the computation 
will result in {@code 0.0}.
+   * @param sent2 The second sentence. If {@code null} or empty the 
computation will result in {@code 0.0}.
+   * @param wrdWts The mapping table contains tf-idf of the words.
+   * @return The computed similarity. If no similarity exist, the resulting 
value equals {@code 0.0}.
+   */
+  public double getWeightedSimilarity(String sent1, String sent2, 
Hashtable<String, Double> wrdWts) {
+
     String[] words1 = docProc.getWords(sent1);
     String[] words2 = docProc.getWords(sent2);
     double wordsInCommon = 0;
@@ -97,13 +125,17 @@ public class TextRank {
     return (wordsInCommon) / (words1.length + words2.length);
   }
 
-  // Gets the current score from the list of scores passed ...
+  /**
+   * @param scores A list of {@link Score} instances.
+   * @param id The sentence id to check for.
+   * @return Gets the element from {@code scores} that matches the passed 
sentence {@code id}.
+   */
   public double getScoreFrom(List<Score> scores, int id) {
     for (Score s : scores) {
       if (s.getSentId() == id)
         return s.getScore();
     }
-    return 1;
+    return 1; // Why is the default score "1" here?
   }
 
   // This method runs the page rank algorithm for the sentences.
@@ -114,9 +146,7 @@ public class TextRank {
     List<Score> currWtScores = new ArrayList<>();
     // Start with equal weights for all sentences
     for (int i = 0; i < rawScores.size(); i++) {
-      Score ns = new Score();
-      ns.setSentId(rawScores.get(i).getSentId());
-      ns.setScore((1 - title_wt) / (rawScores.size()));// this.getSimilarity();
+      Score ns = new Score(rawScores.get(i).getSentId(), (1 - title_wt) / 
(rawScores.size())); // this.getSimilarity();
       currWtScores.add(ns);
     }
     // currWtScores.get(0).score = this.title_wt;
@@ -129,8 +159,6 @@ public class TextRank {
       // Update the scores for the current iteration..
       for (Score rs : rawScores) {
         int sentId = rs.getSentId();
-        Score ns = new Score();
-        ns.setSentId(sentId);
 
         List<Integer> neighbors = getLinks().get(sentId);
         double sum = 0;
@@ -145,7 +173,7 @@ public class TextRank {
             sum += wij / sigmawjk * txtRnkj;
           }
         }
-        ns.setScore((1d - DF) + sum * DF);// * rs.score
+        Score ns = new Score(sentId, (1d - DF) + sum * DF); // * rs.score
         totErr += ns.getScore() - getScoreFrom(rawScores, sentId);
         newWtScores.add(ns);
       }
@@ -169,8 +197,7 @@ public class TextRank {
     for (int i = 0; i < sentences.size(); i++) {
       String nextSent = sentences.get(i);
       String[] words = docProc.getWords(nextSent);
-      Score s = new Score();
-      s.setSentId(i);
+      Score s = new Score(i, 0d);
 
       for (String word : words) {
         String currWrd = docProc.getStemmer().stem(word).toString(); 
//stemmer.toString();
@@ -220,7 +247,7 @@ public class TextRank {
     this.sentences = sentences;
     this.processedSent = processedSent;
 
-    Hashtable<String, Double> wrdWts = toWordWtHashtable(this.wordWt, iidx);// 
new
+    Hashtable<String, Double> wrdWts = toWordWtHashtable(this.wordWt, iidx); 
// new
 
     if (HIGHER_TITLE_WEIGHT && !getSentences().isEmpty()) {
       String sent = getSentences().get(0);
@@ -250,14 +277,6 @@ public class TextRank {
     this.sentences = sentences;
   }
 
-  public String getArticle() {
-    return article;
-  }
-
-  public void setArticle(String article) {
-    this.article = article;
-  }
-
   public Hashtable<Integer, List<Integer>> getLinks() {
     return links;
   }
@@ -265,14 +284,5 @@ public class TextRank {
   private void setLinks(Hashtable<Integer, List<Integer>> links) {
     this.links = links;
   }
-}
 
-/*
- * public double getScore(String sent1, String sent2, boolean toPrint) {
- * String[] words1 = sent1.split("\\s+"); String[] words2 = 
sent2.split("\\s+");
- * double wordsInCommon = 0; for(int i=0;i< words1.length;i++) { for(int
- * j=0;j<words2.length;j++) { if(!sw.isStopWord(words1[i]) &&
- * !words1[i].trim().isEmpty() && words1[i].equals(words2[j])) { 
wordsInCommon+=
- * wordWt.getWordWeight(words1[i]); } } } return ((double)wordsInCommon) /
- * (Math.log(1+words1.length) + Math.log(1+words2.length)); }
- */
\ No newline at end of file
+}
diff --git 
a/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java
 
b/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java
index 765bb94..3c1a3e3 100755
--- 
a/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java
+++ 
b/summarizer/src/main/java/opennlp/summarization/textrank/TextRankSummarizer.java
@@ -108,7 +108,7 @@ public class TextRankSummarizer implements Summarizer {
 
   @Override
   public String summarize(String article, int maxWords) {
-    List<Sentence> sentences = docProcessor.getSentencesFromStr(article);
+    List<Sentence> sentences = docProcessor.getSentences(article);
     List<Score> scores = rankSentences(article, sentences, maxWords);
     return scores2String(sentences, scores, maxWords);
   }
diff --git 
a/summarizer/src/test/java/opennlp/summarization/AbstractSummarizerTest.java 
b/summarizer/src/test/java/opennlp/summarization/AbstractSummarizerTest.java
index ce7bc50..ec31f79 100644
--- a/summarizer/src/test/java/opennlp/summarization/AbstractSummarizerTest.java
+++ b/summarizer/src/test/java/opennlp/summarization/AbstractSummarizerTest.java
@@ -17,7 +17,7 @@
 
 package opennlp.summarization;
 
-import opennlp.summarization.lexicalchaining.OpenNLPPOSTagger;
+import opennlp.summarization.lexicalchaining.NounPOSTagger;
 import opennlp.summarization.preprocess.DefaultDocProcessor;
 
 import org.junit.jupiter.api.BeforeAll;
@@ -37,12 +37,12 @@ public abstract class AbstractSummarizerTest {
   private static final Logger log = 
LoggerFactory.getLogger(AbstractSummarizerTest.class);
 
   protected static DefaultDocProcessor docProcessor;
-  protected static OpenNLPPOSTagger posTagger;
+  protected static NounPOSTagger posTagger;
 
   @BeforeAll
   static void initEnv() throws IOException {
-    docProcessor = new 
DefaultDocProcessor(AbstractSummarizerTest.class.getResourceAsStream("/en-sent.bin"));
-    posTagger = new OpenNLPPOSTagger(docProcessor, 
AbstractSummarizerTest.class.getResourceAsStream("/en-pos-maxent.bin"));
+    docProcessor = new DefaultDocProcessor("en");
+    posTagger = new NounPOSTagger("en");
   }
 
   /**
@@ -52,17 +52,17 @@ public abstract class AbstractSummarizerTest {
 
   @ParameterizedTest(name = "news story {index}")
   @ValueSource(strings = {
-          "/meta/0a2035f3f73b06a5150a6f01cffdf45d027bbbed.story",
-          "/meta/0a2278bec4a80aec1bc3e9e7a9dac10ac1b6425b.story",
-          "/meta/0a3040b6c1bba95efca727158f128a19c44ec8ba.story",
-          "/meta/0a3479b53796863a664c32ca20d8672583335d2a.story",
-          "/meta/0a3639cb86487e72e2ba084211f99799918aedf8.story",
-          "/meta/0a4092bef1801863296777ebcfeceb1aec23c78f.story",
-          "/meta/0a5458d3427b290524a8df11d8503a5b57b32747.story",
-          "/meta/0a5691b8fe654b6b2cdace5ab87aff2ee4c23577.story",
-          "/meta/0a6790f886a42a76945d4a21ed27c4ebd9ca1025.story"
+          "/news/0a2035f3f73b06a5150a6f01cffdf45d027bbbed.story",
+          "/news/0a2278bec4a80aec1bc3e9e7a9dac10ac1b6425b.story",
+          "/news/0a3040b6c1bba95efca727158f128a19c44ec8ba.story",
+          "/news/0a3479b53796863a664c32ca20d8672583335d2a.story",
+          "/news/0a3639cb86487e72e2ba084211f99799918aedf8.story",
+          "/news/0a4092bef1801863296777ebcfeceb1aec23c78f.story",
+          "/news/0a5458d3427b290524a8df11d8503a5b57b32747.story",
+          "/news/0a5691b8fe654b6b2cdace5ab87aff2ee4c23577.story",
+          "/news/0a6790f886a42a76945d4a21ed27c4ebd9ca1025.story"
   })
-  public void testSummarize(String filename) {
+  public void testSummarize(String filename) throws IOException {
     String article = docProcessor.docToString(filename);
     String summary = getSummarizer().summarize(article, 20);
     assertNotNull(summary);
diff --git a/summarizer/src/test/java/opennlp/summarization/SentenceTest.java 
b/summarizer/src/test/java/opennlp/summarization/SentenceTest.java
new file mode 100644
index 0000000..28f9fc1
--- /dev/null
+++ b/summarizer/src/test/java/opennlp/summarization/SentenceTest.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.summarization;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.NullAndEmptySource;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+public class SentenceTest {
+
+  private static final String SENTENCE = "This example is available in many 
tests.";
+
+  // SUT
+  private Sentence sentence;
+
+  @BeforeEach
+  public void setUp() {
+    sentence = new Sentence(0, SENTENCE, 0, 0);
+  }
+
+  @ParameterizedTest
+  @ValueSource(strings = {"\t", "\n", " "})
+  @NullAndEmptySource
+  public void testConstructInvalid1(String input) {
+    assertThrows(IllegalArgumentException.class, () -> new Sentence(0, input, 
0, 0));
+  }
+
+  @ParameterizedTest
+  @ValueSource(ints = {Integer.MIN_VALUE, -42, -1})
+  public void testConstructInvalid2(int input) {
+    assertThrows(IllegalArgumentException.class, () -> new Sentence(input, 
SENTENCE, 0, 0));
+  }
+
+  @ParameterizedTest
+  @ValueSource(ints = {Integer.MIN_VALUE, -42, -1})
+  public void testConstructInvalid3(int input) {
+    assertThrows(IllegalArgumentException.class, () -> new Sentence(0, 
SENTENCE, input, 0));
+  }
+
+  @ParameterizedTest
+  @ValueSource(ints = {Integer.MIN_VALUE, -42, -1})
+  public void testConstructInvalid4(int input) {
+    assertThrows(IllegalArgumentException.class, () -> new Sentence(0,  
SENTENCE, 0, input));
+  }
+
+  @Test
+  public void testSentenceIdentity() {
+    assertEquals(0, sentence.getSentId());
+    assertEquals(0, sentence.getParagraph());
+    assertEquals(0, sentence.getParaPos());
+    assertEquals(SENTENCE, sentence.getStringVal());
+  }
+
+  @Test
+  public void testStem() {
+    String stemmed = sentence.stem();
+    assertNotNull(stemmed);
+    assertFalse(stemmed.isBlank());
+    assertEquals("Thi exampl avail mani test ", stemmed);
+  }
+
+  @Test
+  public void testGetWrdCnt() {
+    int wordCountWithoutStopwords = sentence.getWordCnt();
+    assertEquals(5, wordCountWithoutStopwords);
+  }
+
+  @Test
+  public void testHashcode() {
+    int hash = sentence.hashCode();
+    assertEquals(hash, new Sentence(0, SENTENCE, 0, 0).hashCode());
+  }
+
+  @Test
+  public void testEquals() {
+    assertEquals(sentence, new Sentence(0, SENTENCE, 0, 0));
+  }
+
+  @Test
+  public void testToString() {
+    assertEquals(sentence.toString(), new Sentence(0, SENTENCE, 0, 
0).toString());
+  }
+}
diff --git 
a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/AbstractLexicalChainTest.java
 
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/AbstractLexicalChainTest.java
new file mode 100644
index 0000000..b2bca3c
--- /dev/null
+++ 
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/AbstractLexicalChainTest.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.summarization.lexicalchaining;
+
+import opennlp.summarization.preprocess.DefaultDocProcessor;
+import org.junit.jupiter.api.BeforeAll;
+
+public abstract class AbstractLexicalChainTest {
+
+  protected static final String ARTICLE =
+          "US President Barack Obama has welcomed an agreement between the US 
and Russia under which Syria's chemical weapons must be destroyed or removed by 
mid-2014 as an \"important step\"."
+                  + "But a White House statement cautioned that the US 
expected Syria to live up to its public commitments. "
+                  + "The US-Russian framework document stipulates that Syria 
must provide details of its stockpile within a week. "
+                  + "If Syria fails to comply, the deal could be enforced by a 
UN resolution. "
+                  + "China, France, the UK, the UN and Nato have all expressed 
satisfaction at the agreement. "
+                  + "In Beijing, Foreign Minister Wang Yi said on Sunday that 
China welcomes the general agreement between the US and Russia.";
+
+  protected static DefaultDocProcessor dp;
+  protected static LexicalChainingSummarizer lcs;
+
+  @BeforeAll
+  static void initEnv() throws Exception {
+    dp = new DefaultDocProcessor("en");
+    lcs = new LexicalChainingSummarizer(dp, "en");
+  }
+}
diff --git 
a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainTest.java
 
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainTest.java
deleted file mode 100644
index 8655922..0000000
--- 
a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainTest.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.summarization.lexicalchaining;
-
-import opennlp.summarization.Sentence;
-import opennlp.summarization.preprocess.DefaultDocProcessor;
-
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-
-import java.util.Collections;
-import java.util.Hashtable;
-import java.util.List;
-
-import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertNotNull;
-import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assertions.fail;
-
-class LexChainTest {
-
-  private static final String ARTICLE =
-      "US President Barack Obama has welcomed an agreement between the US and 
Russia under which Syria's chemical weapons must be destroyed or removed by 
mid-2014 as an \"important step\"."
-          + "But a White House statement cautioned that the US expected Syria 
to live up to its public commitments. "
-          + "The US-Russian framework document stipulates that Syria must 
provide details of its stockpile within a week. "
-          + "If Syria fails to comply, the deal could be enforced by a UN 
resolution. "
-          + "China, France, the UK, the UN and Nato have all expressed 
satisfaction at the agreement. "
-          + "In Beijing, Foreign Minister Wang Yi said on Sunday that China 
welcomes the general agreement between the US and Russia.";
-
-  private static DefaultDocProcessor dp;
-  private static LexicalChainingSummarizer lcs;
-
-  @BeforeAll
-  static void initEnv() throws Exception {
-    dp = new 
DefaultDocProcessor(LexChainTest.class.getResourceAsStream("/en-sent.bin"));
-    lcs = new LexicalChainingSummarizer(dp, 
LexChainTest.class.getResourceAsStream("/en-pos-maxent.bin"));
-  }
-
-  @Test
-  void testBuildLexicalChains() {
-    List<Sentence> sent = dp.getSentencesFromStr(ARTICLE);
-    assertNotNull(sent);
-    List<LexicalChain> vh = lcs.buildLexicalChains(ARTICLE, sent);
-    assertNotNull(vh);
-    Collections.sort(vh);
-    assertTrue(!vh.isEmpty());
-
-    List<Sentence> s = dp.getSentencesFromStr(ARTICLE);
-    Hashtable<String, Boolean> comp = new Hashtable<>();
-
-    for (int i = vh.size() - 1; i >= Math.max(vh.size() - 50, 0); i--) {
-      LexicalChain lc = vh.get(i);
-
-      if (!(comp.containsKey(lc.getWord().get(0).getLexicon()))) {
-        comp.put(lc.getWord().get(0).getLexicon(), Boolean.TRUE);
-        /*
-        for(int j=0;j<lc.getWord().size();j++)
-          System.out.print(lc.getWord().get(j) + " -- ");
-        */
-
-        assertEquals(1.0d, lc.score());
-        /*
-        for(Sentence sid : lc.getSentences()) {
-          //if(sid>=0 && sid<s.size())
-          System.out.println(sid);
-        }
-        */
-      }
-    }
-
-  }
-
-  @Test
-  void testGetRelation() {
-    try {
-      WordRelationshipDetermination lcs = new WordRelationshipDetermination();
-      LexicalChain l = new LexicalChain();
-      List<Word> words = lcs.getWordSenses("music");
-
-      l.addWord(words.get(0));
-      // int rel = lcs.getRelation(l, "nation");
-      WordRelation rel2 = lcs.getRelation(l, "tune", true);
-      WordRelation rel3 = lcs.getRelation(l, "vocal", true);
-      assertEquals(1, rel2.relation());
-      assertEquals(1, rel3.relation());
-      // assertEquals(rel, LexicalChainingSummarizer.STRONG_RELATION);
-      assertEquals(WordRelation.MED_RELATION, rel2.relation());
-      assertEquals(WordRelation.MED_RELATION, rel3.relation());
-    } catch (Exception e) {
-      fail(e.getLocalizedMessage());
-    }
-  }
-
-}
diff --git 
a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainingKeywordExtractorTest.java
 
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainingKeywordExtractorTest.java
index 1bb476a..66fa5d9 100644
--- 
a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainingKeywordExtractorTest.java
+++ 
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexChainingKeywordExtractorTest.java
@@ -17,43 +17,69 @@
 
 package opennlp.summarization.lexicalchaining;
 
+import java.util.Collections;
 import java.util.List;
 
+import opennlp.summarization.Sentence;
+
 import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
-
-import opennlp.summarization.Sentence;
-import opennlp.summarization.preprocess.DefaultDocProcessor;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
 
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
-class LexChainingKeywordExtractorTest {
+class LexChainingKeywordExtractorTest extends AbstractLexicalChainTest {
 
-  private static final String ARTICLE =
-      "US President Barack Obama has welcomed an agreement between the US and 
Russia under which Syria's chemical weapons must be destroyed or removed by 
mid-2014 as an \"important step\"."
-          + "But a White House statement cautioned that the US expected Syria 
to live up to its public commitments. "
-          + "The US-Russian framework document stipulates that Syria must 
provide details of its stockpile within a week. "
-          + "If Syria fails to comply, the deal could be enforced by a UN 
resolution. "
-          + "China, France, the UK, the UN and Nato have all expressed 
satisfaction at the agreement. "
-          + "In Beijing, Foreign Minister Wang Yi said on Sunday that China 
welcomes the general agreement between the US and Russia.";
+  private static List<LexicalChain> chains;
 
-  private static DefaultDocProcessor dp;
-  private static LexicalChainingSummarizer lcs;
+  // SUT
+  private LexChainingKeywordExtractor keywordExtractor;
 
   @BeforeAll
   static void initEnv() throws Exception {
-    dp = new 
DefaultDocProcessor(LexChainingKeywordExtractorTest.class.getResourceAsStream("/en-sent.bin"));
-    lcs = new LexicalChainingSummarizer(dp, 
LexChainingKeywordExtractorTest.class.getResourceAsStream("/en-pos-maxent.bin"));
+    AbstractLexicalChainTest.initEnv();
+    // Prep
+    List<Sentence> sent = dp.getSentences(ARTICLE);
+    assertNotNull(sent);
+    assertFalse(sent.isEmpty());
+    chains = lcs.buildLexicalChains(ARTICLE, sent);
+    assertNotNull(chains);
+    assertFalse(chains.isEmpty());
   }
 
-  @Test
-  void testGetKeywords() {
-    List<Sentence> sent = dp.getSentencesFromStr(ARTICLE);
-    List<LexicalChain> vh = lcs.buildLexicalChains(ARTICLE, sent);
-    LexChainingKeywordExtractor ke = new LexChainingKeywordExtractor();
-    List<String> keywords = ke.getKeywords(vh, 5);
+  @BeforeEach
+  public void setUp() {
+    keywordExtractor = new LexChainingKeywordExtractor();
+  }
+
+  @ParameterizedTest
+  @ValueSource(ints = {1, 5, 42, Integer.MAX_VALUE})
+  void testExtractKeywords(int noOfKeywords) {
+    List<String> keywords = keywordExtractor.extractKeywords(chains, 
noOfKeywords);
     assertNotNull(keywords);
     assertFalse(keywords.isEmpty());
   }
+
+  @Test
+  void testExtractKeywordsWithEmptyInput() {
+    List<String> keywords = 
keywordExtractor.extractKeywords(Collections.emptyList(), 5);
+    assertNotNull(keywords);
+    assertTrue(keywords.isEmpty());
+  }
+
+  @Test
+  void testExtractKeywordsInvalid1() {
+    assertThrows(IllegalArgumentException.class, () -> 
keywordExtractor.extractKeywords(null, 5));
+  }
+
+  @ParameterizedTest
+  @ValueSource(ints = {Integer.MIN_VALUE, -1, 0})
+  void testExtractKeywordsInvalid2(int noOfKeywords) {
+    assertThrows(IllegalArgumentException.class, () -> 
keywordExtractor.extractKeywords(chains, noOfKeywords));
+  }
 }
diff --git 
a/summarizer/src/test/java/opennlp/summarization/preprocess/DocProcessorTest.java
 
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizerNewsTest.java
similarity index 54%
rename from 
summarizer/src/test/java/opennlp/summarization/preprocess/DocProcessorTest.java
rename to 
summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizerNewsTest.java
index ce31c26..8f82065 100644
--- 
a/summarizer/src/test/java/opennlp/summarization/preprocess/DocProcessorTest.java
+++ 
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizerNewsTest.java
@@ -15,32 +15,33 @@
  * limitations under the License.
  */
 
-package opennlp.summarization.preprocess;
+package opennlp.summarization.lexicalchaining;
 
-import java.util.List;
+import opennlp.summarization.AbstractSummarizerTest;
+import opennlp.summarization.Summarizer;
 
-import org.junit.jupiter.api.BeforeAll;
-import org.junit.jupiter.api.Test;
-
-import opennlp.summarization.Sentence;
+import org.junit.jupiter.api.BeforeEach;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 
-class DocProcessorTest {
+/**
+ * Tests the implementation of {@link LexicalChainingSummarizer}.
+ */
+public class LexicalChainingSummarizerNewsTest extends AbstractSummarizerTest {
 
-  private static DefaultDocProcessor dp;
+  // SUT
+  private LexicalChainingSummarizer lexicalChainSummarizer;
 
-  @BeforeAll
-  static void initEnv() {
-    dp = new 
DefaultDocProcessor(DocProcessorTest.class.getResourceAsStream("/en-sent.bin"));
+  @BeforeEach
+  void setUp() {
+    lexicalChainSummarizer = new LexicalChainingSummarizer(docProcessor, 
posTagger);
   }
 
-  @Test
-  void testGetSentencesFromStr() {
-    String sent = "This is a sentence, with some punctuations; to test if the 
sentence breaker can handle it! Is every thing working OK ? Yes.";
-    List<Sentence> doc = dp.getSentencesFromStr(sent);
-    //dp.docToString(fileName);
-    assertEquals(doc.size(), 3);
+  @Override
+  public Summarizer getSummarizer() {
+    return lexicalChainSummarizer;
   }
-
+  
 }
diff --git 
a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizerTest.java
 
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizerTest.java
index 5d23bef..435c727 100644
--- 
a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizerTest.java
+++ 
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizerTest.java
@@ -17,27 +17,58 @@
 
 package opennlp.summarization.lexicalchaining;
 
-import opennlp.summarization.AbstractSummarizerTest;
-import opennlp.summarization.Summarizer;
+import java.util.Collections;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.Map;
 
 import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 
-/**
- * Tests the implementation of {@link LexicalChainingSummarizer}.
- */
-public class LexicalChainingSummarizerTest extends AbstractSummarizerTest {
+import opennlp.summarization.Sentence;
+
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+class LexicalChainingSummarizerTest extends AbstractLexicalChainTest {
 
-  // SUT
-  private Summarizer lexicalChainSummarizer;
+  private List<Sentence> sent;
 
   @BeforeEach
   void setUp() {
-    lexicalChainSummarizer = new LexicalChainingSummarizer(docProcessor, 
posTagger);
+    sent = dp.getSentences(ARTICLE);
+    assertNotNull(sent);
   }
 
-  @Override
-  public Summarizer getSummarizer() {
-    return lexicalChainSummarizer;
+  @Test
+  void testBuildLexicalChains() {
+    List<LexicalChain> vh = lcs.buildLexicalChains(ARTICLE, sent);
+    assertNotNull(vh);
+    Collections.sort(vh);
+    assertFalse(vh.isEmpty());
+
+    Map<String, Boolean> comp = new Hashtable<>();
+
+    for (int i = vh.size() - 1; i >= Math.max(vh.size() - 50, 0); i--) {
+      LexicalChain lc = vh.get(i);
+      Word w = lc.getWords().get(0);
+      if (!(comp.containsKey(w.getLexicon()))) {
+        comp.put(w.getLexicon(), Boolean.TRUE);
+        /*
+        for(int j=0;j<lc.getWord().size();j++)
+          System.out.print(lc.getWord().get(j) + " -- ");
+        */
+
+        // assertEquals(1.0d, lc.score());
+        /*
+        System.out.println(lc + ": ");
+        for(Sentence sid : lc.getSentences()) {
+          //if(sid>=0 && sid<s.size())
+          System.out.println("\t" + sid + " [" + lc.score() + "]");
+        }
+        */
+      }
+    }
   }
 
 }
diff --git 
a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/NounPOSTaggerTest.java
 
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/NounPOSTaggerTest.java
new file mode 100644
index 0000000..2822299
--- /dev/null
+++ 
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/NounPOSTaggerTest.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.summarization.lexicalchaining;
+
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.EmptySource;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import java.io.IOException;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+public class NounPOSTaggerTest {
+
+  private static final String UNTAGGED_SENTENCE = "This is a test .";
+  private static final String[] TOKENS_SENTENCE = {"This", "is", "a", "test", 
"."};
+  private static final String[] TOKENS_TAGGED_SENTENCE = {"This/PRON", 
"is/AUX", "a/DET", "test/NOUN", "./PUNCT"};
+
+  private static POSTagger tagger;  // SUT
+
+  @BeforeAll
+  public static void initResources() throws IOException {
+    tagger = new NounPOSTagger("en");
+  }
+
+  @Test
+  void testConstructWithInvalidResource() {
+    assertThrows(IllegalArgumentException.class, () -> new 
NounPOSTagger(null));
+  }
+
+  @Test
+  void testGetTaggedString() {
+    String tagged = tagger.getTaggedString(UNTAGGED_SENTENCE);
+    assertNotNull(tagged);
+    assertEquals("This/PRON is/AUX a/DET test/NOUN ./PUNCT", tagged);
+  }
+
+  @Test
+  void testGetTaggedStringInvalid1() {
+    assertThrows(IllegalArgumentException.class, () -> 
tagger.getTaggedString(null));
+  }
+
+  @ParameterizedTest
+  @ValueSource(strings = {"\t", "\n", " "})
+  @EmptySource
+  void testGetTaggedStringInvalid2(String input) {
+    String tagged = tagger.getTaggedString(input);
+    assertNotNull(tagged);
+  }
+
+  @Test
+  void testGetWordsOfTypeWithTags() {
+    List<String> filteredByType = 
tagger.getWordsOfType(TOKENS_TAGGED_SENTENCE, POSTagger.NOUN);
+    assertNotNull(filteredByType);
+    assertEquals(1, filteredByType.size());
+    assertEquals("test", filteredByType.get(0));
+  }
+
+  @Test
+  void testGetWordsOfTypeWithoutTags() {
+    assertThrows(IllegalArgumentException.class, () ->
+            tagger.getWordsOfType(TOKENS_SENTENCE, POSTagger.NOUN));
+  }
+
+  @ParameterizedTest
+  @ValueSource(ints = {POSTagger.ADJECTIVE, POSTagger.ADVERB, POSTagger.VERB})
+  void testGetWordsOfTypeWithNonMatchingType(int type) {
+    List<String> filteredByType = 
tagger.getWordsOfType(TOKENS_TAGGED_SENTENCE, type);
+    assertNotNull(filteredByType);
+    assertEquals(0, filteredByType.size());
+  }
+
+  @ParameterizedTest
+  @ValueSource(ints = {Integer.MIN_VALUE, -1, 5, Integer.MAX_VALUE})
+  void testGetWordsOfTypeWithInvalidType(int type) {
+    assertThrows(IllegalArgumentException.class, () ->
+            tagger.getWordsOfType(TOKENS_TAGGED_SENTENCE, type));
+  }
+
+  @Test
+  void testGetWordsOfTypeWithInvalidInput() {
+    assertThrows(IllegalArgumentException.class, () ->
+            tagger.getWordsOfType(null, POSTagger.NOUN));
+  }
+}
diff --git 
a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/WordRelationshipDeterminationTest.java
 
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/WordRelationshipDeterminationTest.java
new file mode 100644
index 0000000..bd8845f
--- /dev/null
+++ 
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/WordRelationshipDeterminationTest.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.summarization.lexicalchaining;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+public class WordRelationshipDeterminationTest {
+
+  // SUT
+  private WordRelationshipDetermination wrd;
+
+  @BeforeEach
+  public void setUp() {
+    wrd = new WordRelationshipDetermination();
+  }
+
+  @Test
+  void testGetWordSenses() {
+    LexicalChain l = new LexicalChain();
+    List<Word> words = wrd.getWordSenses("music");
+    assertNotNull(words);
+    assertFalse(words.isEmpty());
+    l.addWord(words.get(0));
+  }
+
+  @Test
+  void testGetRelation() {
+    LexicalChain l = new LexicalChain();
+    List<Word> words = wrd.getWordSenses("music");
+    assertNotNull(words);
+    assertFalse(words.isEmpty());
+    l.addWord(words.get(0));
+    // int rel = lcs.getRelation(l, "nation");
+    WordRelation rel2 = wrd.getRelation(l, "tune", true);
+    WordRelation rel3 = wrd.getRelation(l, "vocal", true);
+    assertEquals(1, rel2.relation());
+    assertEquals(1, rel3.relation());
+    // assertEquals(rel, LexicalChainingSummarizer.STRONG_RELATION);
+    assertEquals(WordRelation.MED_RELATION, rel2.relation());
+    assertEquals(WordRelation.MED_RELATION, rel3.relation());
+  }
+}
diff --git 
a/summarizer/src/test/java/opennlp/summarization/lexicalchaining/WordnetWordTest.java
 
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/WordnetWordTest.java
new file mode 100644
index 0000000..ab25c21
--- /dev/null
+++ 
b/summarizer/src/test/java/opennlp/summarization/lexicalchaining/WordnetWordTest.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package opennlp.summarization.lexicalchaining;
+
+import edu.mit.jwi.item.ISynsetID;
+import edu.mit.jwi.item.IWordID;
+import edu.mit.jwi.item.POS;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.NullAndEmptySource;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+public class WordnetWordTest {
+
+  private WordRelationshipDetermination wrd;
+
+  // SUT
+  private Word word;
+
+  @BeforeEach
+  public void setUp() {
+    wrd = new WordRelationshipDetermination();
+    List<Word> words = wrd.getWordSenses("music");
+    assertNotNull(words);
+    assertFalse(words.isEmpty());
+    word = words.get(0);
+    assertNotNull(word);
+  }
+
+  @ParameterizedTest
+  @ValueSource(strings = {"\t", "\n", " "})
+  @NullAndEmptySource
+  public void testConstructInvalid1(String input) {
+    assertThrows(IllegalArgumentException.class, () -> new WordnetWord(input, 
new DummyWordID()));
+  }
+
+  @Test
+  public void testConstructInvalid2() {
+    assertThrows(IllegalArgumentException.class, () -> new 
WordnetWord("music", null));
+  }
+
+  @Test
+  public void testSentenceIdentity() {
+    assertEquals("music", word.getLexicon());
+    assertEquals("WID-07034009-N-01-music", word.getID().toString());
+  }
+
+  @Test
+  public void testHashcode() {
+    int hash = word.hashCode();
+    assertEquals(hash, wrd.getWordSenses("music").get(0).hashCode());
+  }
+
+  @Test
+  public void testEquals() {
+    assertEquals(word, wrd.getWordSenses("music").get(0));
+  }
+
+  @Test
+  public void testToString() {
+    assertEquals(word.toString(), 
wrd.getWordSenses("music").get(0).toString());
+  }
+
+  private static class DummyWordID implements IWordID {
+    @Override
+    public ISynsetID getSynsetID() {
+      return null;
+    }
+
+    @Override
+    public int getWordNumber() {
+      return 0;
+    }
+
+    @Override
+    public String getLemma() {
+      return "";
+    }
+
+    @Override
+    public POS getPOS() {
+      return null;
+    }
+  }
+}
diff --git 
a/summarizer/src/test/java/opennlp/summarization/preprocess/DefaultDocProcessorTest.java
 
b/summarizer/src/test/java/opennlp/summarization/preprocess/DefaultDocProcessorTest.java
new file mode 100644
index 0000000..dc95239
--- /dev/null
+++ 
b/summarizer/src/test/java/opennlp/summarization/preprocess/DefaultDocProcessorTest.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.summarization.preprocess;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+
+import opennlp.summarization.Sentence;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.NullAndEmptySource;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+class DefaultDocProcessorTest {
+
+  private static DefaultDocProcessor dp;
+
+  @BeforeAll
+  static void initEnv() throws IOException {
+    dp = new DefaultDocProcessor("en");
+  }
+
+  @Test
+  void testGetSentences() {
+    String sent = "This is a sentence, with some punctuations; to test if the 
sentence breaker can handle it! Is every thing working OK ? Yes.";
+    List<Sentence> doc = dp.getSentences(sent);
+    assertNotNull(doc);
+    assertEquals(3, doc.size());
+  }
+
+  @ParameterizedTest
+  @ValueSource(strings = {"\t", "\n", " "})
+  @NullAndEmptySource
+  void testGetSentencesInvalid(String input) {
+    List<Sentence> doc = dp.getSentences(input);
+    assertNotNull(doc);
+    assertEquals(0, doc.size());
+  }
+
+  @Test
+  void testGetWords() {
+    String sent = "This is a sentence, with some punctuations; to test if the 
sentence breaker can handle it! Is every thing working OK ? Yes.";
+    List<Sentence> doc = dp.getSentences(sent);
+    assertNotNull(doc);
+    assertEquals(3, doc.size());
+    for (Sentence sentence : doc) {
+      String[] words = dp.getWords(sentence.getStringVal());
+      assertNotNull(words);
+      assertTrue(words.length > 0);
+      assertTrue(words.length >= sentence.getWordCnt()); // due to stop words 
not counted, this must hold.
+    }
+  }
+
+  @ParameterizedTest
+  @ValueSource(strings = {"\t", "\n", " "})
+  @NullAndEmptySource
+  void testGetWordsInvalid(String input) {
+    String[] words = dp.getWords(input);
+    assertNotNull(words);
+    assertEquals(0, words.length);
+  }
+
+  @Test
+  void testDocToString() throws IOException {
+    String content = 
dp.docToString("/news/0a2035f3f73b06a5150a6f01cffdf45d027bbbed.story");
+    assertNotNull(content);
+    assertFalse(content.isEmpty());
+  }
+
+  @ParameterizedTest
+  @ValueSource(strings = {"\t", "\n", " "})
+  @NullAndEmptySource
+  void testDocToStringInvalid(String input) throws IOException {
+    String content = dp.docToString(input);
+    assertNotNull(content);
+    assertTrue(content.isEmpty());
+  }
+
+  @Test
+  void testDocToSentences() throws IOException {
+    List<Sentence> content = 
dp.docToSentences("/news/0a2035f3f73b06a5150a6f01cffdf45d027bbbed.story");
+    assertNotNull(content);
+    assertFalse(content.isEmpty());
+  }
+
+  @ParameterizedTest
+  @ValueSource(strings = {"\t", "\n", " "})
+  @NullAndEmptySource
+  void testDocToSentencesInvalid(String input) throws IOException {
+    List<Sentence> content = dp.docToSentences(input);
+    assertNotNull(content);
+    assertTrue(content.isEmpty());
+  }
+}
diff --git a/summarizer/src/test/resources/meta/Notes.txt 
b/summarizer/src/test/resources/Notes.txt
similarity index 100%
rename from summarizer/src/test/resources/meta/Notes.txt
rename to summarizer/src/test/resources/Notes.txt
diff --git a/summarizer/src/test/resources/en-pos-maxent.bin 
b/summarizer/src/test/resources/en-pos-maxent.bin
deleted file mode 100644
index 168f259..0000000
Binary files a/summarizer/src/test/resources/en-pos-maxent.bin and /dev/null 
differ
diff --git a/summarizer/src/test/resources/en-sent.bin 
b/summarizer/src/test/resources/en-sent.bin
deleted file mode 100644
index d3a2779..0000000
Binary files a/summarizer/src/test/resources/en-sent.bin and /dev/null differ
diff --git a/summarizer/src/test/resources/meta/idf.csv 
b/summarizer/src/test/resources/idf.csv
similarity index 100%
rename from summarizer/src/test/resources/meta/idf.csv
rename to summarizer/src/test/resources/idf.csv
diff --git 
a/summarizer/src/test/resources/meta/0a2035f3f73b06a5150a6f01cffdf45d027bbbed.story
 
b/summarizer/src/test/resources/news/0a2035f3f73b06a5150a6f01cffdf45d027bbbed.story
similarity index 100%
rename from 
summarizer/src/test/resources/meta/0a2035f3f73b06a5150a6f01cffdf45d027bbbed.story
rename to 
summarizer/src/test/resources/news/0a2035f3f73b06a5150a6f01cffdf45d027bbbed.story
diff --git 
a/summarizer/src/test/resources/meta/0a2278bec4a80aec1bc3e9e7a9dac10ac1b6425b.story
 
b/summarizer/src/test/resources/news/0a2278bec4a80aec1bc3e9e7a9dac10ac1b6425b.story
similarity index 100%
rename from 
summarizer/src/test/resources/meta/0a2278bec4a80aec1bc3e9e7a9dac10ac1b6425b.story
rename to 
summarizer/src/test/resources/news/0a2278bec4a80aec1bc3e9e7a9dac10ac1b6425b.story
diff --git 
a/summarizer/src/test/resources/meta/0a3040b6c1bba95efca727158f128a19c44ec8ba.story
 
b/summarizer/src/test/resources/news/0a3040b6c1bba95efca727158f128a19c44ec8ba.story
similarity index 100%
rename from 
summarizer/src/test/resources/meta/0a3040b6c1bba95efca727158f128a19c44ec8ba.story
rename to 
summarizer/src/test/resources/news/0a3040b6c1bba95efca727158f128a19c44ec8ba.story
diff --git 
a/summarizer/src/test/resources/meta/0a3479b53796863a664c32ca20d8672583335d2a.story
 
b/summarizer/src/test/resources/news/0a3479b53796863a664c32ca20d8672583335d2a.story
similarity index 100%
rename from 
summarizer/src/test/resources/meta/0a3479b53796863a664c32ca20d8672583335d2a.story
rename to 
summarizer/src/test/resources/news/0a3479b53796863a664c32ca20d8672583335d2a.story
diff --git 
a/summarizer/src/test/resources/meta/0a3639cb86487e72e2ba084211f99799918aedf8.story
 
b/summarizer/src/test/resources/news/0a3639cb86487e72e2ba084211f99799918aedf8.story
similarity index 100%
rename from 
summarizer/src/test/resources/meta/0a3639cb86487e72e2ba084211f99799918aedf8.story
rename to 
summarizer/src/test/resources/news/0a3639cb86487e72e2ba084211f99799918aedf8.story
diff --git 
a/summarizer/src/test/resources/meta/0a4092bef1801863296777ebcfeceb1aec23c78f.story
 
b/summarizer/src/test/resources/news/0a4092bef1801863296777ebcfeceb1aec23c78f.story
similarity index 100%
rename from 
summarizer/src/test/resources/meta/0a4092bef1801863296777ebcfeceb1aec23c78f.story
rename to 
summarizer/src/test/resources/news/0a4092bef1801863296777ebcfeceb1aec23c78f.story
diff --git 
a/summarizer/src/test/resources/meta/0a4324d4a5effa420aa95bb058314eab35c73852.story
 
b/summarizer/src/test/resources/news/0a4324d4a5effa420aa95bb058314eab35c73852.story
similarity index 100%
rename from 
summarizer/src/test/resources/meta/0a4324d4a5effa420aa95bb058314eab35c73852.story
rename to 
summarizer/src/test/resources/news/0a4324d4a5effa420aa95bb058314eab35c73852.story
diff --git 
a/summarizer/src/test/resources/meta/0a5458d3427b290524a8df11d8503a5b57b32747.story
 
b/summarizer/src/test/resources/news/0a5458d3427b290524a8df11d8503a5b57b32747.story
similarity index 100%
rename from 
summarizer/src/test/resources/meta/0a5458d3427b290524a8df11d8503a5b57b32747.story
rename to 
summarizer/src/test/resources/news/0a5458d3427b290524a8df11d8503a5b57b32747.story
diff --git 
a/summarizer/src/test/resources/meta/0a5691b8fe654b6b2cdace5ab87aff2ee4c23577.story
 
b/summarizer/src/test/resources/news/0a5691b8fe654b6b2cdace5ab87aff2ee4c23577.story
similarity index 100%
rename from 
summarizer/src/test/resources/meta/0a5691b8fe654b6b2cdace5ab87aff2ee4c23577.story
rename to 
summarizer/src/test/resources/news/0a5691b8fe654b6b2cdace5ab87aff2ee4c23577.story
diff --git 
a/summarizer/src/test/resources/meta/0a6790f886a42a76945d4a21ed27c4ebd9ca1025.story
 
b/summarizer/src/test/resources/news/0a6790f886a42a76945d4a21ed27c4ebd9ca1025.story
similarity index 100%
rename from 
summarizer/src/test/resources/meta/0a6790f886a42a76945d4a21ed27c4ebd9ca1025.story
rename to 
summarizer/src/test/resources/news/0a6790f886a42a76945d4a21ed27c4ebd9ca1025.story

Reply via email to