mawiesne commented on code in PR #158:
URL: https://github.com/apache/opennlp-sandbox/pull/158#discussion_r1684872405


##########
summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizer.java:
##########
@@ -44,95 +48,128 @@ public class LexicalChainingSummarizer implements 
Summarizer {
   private final DocProcessor docProcessor;
   private final WordRelationshipDetermination wordRel;
 
-  public LexicalChainingSummarizer(DocProcessor dp, OpenNLPPOSTagger 
posTagger) {
-    docProcessor = dp;
-    tagger = posTagger;
-    wordRel = new WordRelationshipDetermination();
+  /**
+   * Instantiates a {@link LexicalChainingSummarizer}.
+   *
+   * @param docProcessor The {@link DocProcessor} to use at runtime. Must not 
be {@code null}.
+   * @param languageCode An ISO-language code for obtaining a {@link POSModel}.
+   *                     Must not be {@code null}.
+   *
+   * @throws IllegalArgumentException Thrown if parameters are invalid.
+   */
+  public LexicalChainingSummarizer(DocProcessor docProcessor, String 
languageCode) throws IOException {
+    this(docProcessor, new NounPOSTagger(languageCode));
   }
 
-  public LexicalChainingSummarizer(DocProcessor dp, InputStream posModelFile) 
throws Exception {
-    this(dp, new OpenNLPPOSTagger(dp, posModelFile));
+  /**
+   * Instantiates a {@link LexicalChainingSummarizer}.
+   *
+   * @param docProcessor The {@link DocProcessor} to use at runtime. Must not 
be {@code null}.
+   * @param posTagger The {@link NounPOSTagger} to use at runtime. Must not be 
{@code null}.
+   *
+   * @throws IllegalArgumentException Thrown if parameters are invalid.
+   */
+  public LexicalChainingSummarizer(DocProcessor docProcessor, NounPOSTagger 
posTagger) {
+    if (docProcessor == null) throw new IllegalArgumentException("Parameter 
'docProcessor' must not be null!");
+    if (posTagger == null) throw new IllegalArgumentException("Parameter 
'posTagger' must not be null!");
+
+    this.docProcessor = docProcessor;
+    tagger = posTagger;
+    wordRel = new WordRelationshipDetermination();
   }
 
-  //Build Lexical chains..
-  public List<LexicalChain> buildLexicalChains(String article, List<Sentence> 
sent) {
-    // POS tag article
-    Hashtable<String, List<LexicalChain>> chains = new Hashtable<>();
-    List<LexicalChain> lc = new ArrayList<>();
-    // Build lexical chains
-    // For each sentence
-    for (Sentence currSent : sent) {
-      String taggedSent = tagger.getTaggedString(currSent.getStringVal());
-      List<String> nouns = tagger.getWordsOfType(taggedSent, POSTagger.NOUN);
-      //       For each noun
-      for (String noun : nouns) {
-        int chainsAddCnt = 0;
-        //  Loop through each LC
-        for (LexicalChain l : lc) {
-          try {
-            WordRelation rel = wordRel.getRelation(l, noun, 
(currSent.getSentId() - l.start) > 7);
-            //  Is the noun an exact match to one of the current LCs (Strong 
relation)
-            //  Add sentence to chain
-            if (rel.relation() == WordRelation.STRONG_RELATION) {
-              addToChain(rel.dest(), l, chains, currSent);
-              if (currSent.getSentId() - l.last > 10) {
-                l.occurrences++;
-                l.start = currSent.getSentId();
-              }
-              chainsAddCnt++;
-            } else if (rel.relation() == WordRelation.MED_RELATION) {
-              //  Add sentence to chain if it is 7 sentences away from start 
of chain
-              addToChain(rel.dest(), l, chains, currSent);
-              chainsAddCnt++;
-              //If greater than 7 we will add it but call it a new occurrence 
of the lexical chain...
-              if (currSent.getSentId() - l.start > 7) {
-                l.occurrences++;
-                l.start = currSent.getSentId();
-              }
-            } else if (rel.relation() == WordRelation.WEAK_RELATION) {
-              if (currSent.getSentId() - l.start <= 3) {
+  /**
+   * Constructs a list of {@link LexicalChain lexical chains} from specified 
sentences.
+   *
+   * @param sentences The list of {@link Sentence sentences} to build lexical 
chains from.
+   *                  Must not be {@code null}.
+   * @return The result list of {@link LexicalChain lexical chains}. 
Guaranteed to be not {@code null}.
+   * @throws IllegalArgumentException Thrown if parameters are invalid.
+   */
+  public List<LexicalChain> buildLexicalChains(List<Sentence> sentences) {
+    if (sentences == null) throw new IllegalArgumentException("Parameter 
'sentences' must not be null!");
+    else {
+      if (sentences.isEmpty()) {
+        return Collections.emptyList();
+      }
+      Hashtable<String, List<LexicalChain>> chains = new Hashtable<>();
+      List<LexicalChain> lc = new ArrayList<>();
+      // Build lexical chains
+      // For each sentence
+      for (Sentence currSent : sentences) {
+        // POS tag article
+        String taggedSent = 
tagger.getTaggedString(currSent.getStringVal().replace(".", " ."));
+        List<String> nouns = 
tagger.getWordsOfType(docProcessor.getWords(taggedSent), POSTagger.NOUN);
+        //     For each noun
+        for (String noun : nouns) {
+          int chainsAddCnt = 0;
+          //  Loop through each LC
+          for (LexicalChain l : lc) {
+            try {
+              WordRelation rel = wordRel.getRelation(l, noun, 
(currSent.getSentId() - l.start) > 7);

Review Comment:
   Interesting idea. Follow-up improvement as this issue is mainly about test 
aspects.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to