mawiesne commented on code in PR #158:
URL: https://github.com/apache/opennlp-sandbox/pull/158#discussion_r1677375794
##########
summarizer/src/main/java/opennlp/summarization/lexicalchaining/LexicalChainingSummarizer.java:
##########
@@ -44,95 +48,130 @@ public class LexicalChainingSummarizer implements
Summarizer {
private final DocProcessor docProcessor;
private final WordRelationshipDetermination wordRel;
- public LexicalChainingSummarizer(DocProcessor dp, OpenNLPPOSTagger
posTagger) {
- docProcessor = dp;
- tagger = posTagger;
- wordRel = new WordRelationshipDetermination();
+ /**
+ * Instantiates a {@link LexicalChainingSummarizer}.
+ *
+ * @param docProcessor The {@link DocProcessor} to use at runtime. Must not
be {@code null}.
+ * @param languageCode An ISO-language code for obtaining a {@link POSModel}.
+ * Must not be {@code null}.
+ *
+ * @throws IllegalArgumentException Thrown if parameters are invalid.
+ */
+ public LexicalChainingSummarizer(DocProcessor docProcessor, String
languageCode) throws IOException {
+ this(docProcessor, new NounPOSTagger(languageCode));
}
- public LexicalChainingSummarizer(DocProcessor dp, InputStream posModelFile)
throws Exception {
- this(dp, new OpenNLPPOSTagger(dp, posModelFile));
+ /**
+ * Instantiates a {@link LexicalChainingSummarizer}.
+ *
+ * @param docProcessor The {@link DocProcessor} to use at runtime. Must not
be {@code null}.
+ * @param posTagger The {@link NounPOSTagger} to use at runtime. Must not be
{@code null}.
+ *
+ * @throws IllegalArgumentException Thrown if parameters are invalid.
+ */
+ public LexicalChainingSummarizer(DocProcessor docProcessor, NounPOSTagger
posTagger) {
+ if (docProcessor == null) throw new IllegalArgumentException("Parameter
'docProcessor' must not be null!");
+ if (posTagger == null) throw new IllegalArgumentException("Parameter
'posTagger' must not be null!");
+
+ this.docProcessor = docProcessor;
+ tagger = posTagger;
+ wordRel = new WordRelationshipDetermination();
}
- //Build Lexical chains..
- public List<LexicalChain> buildLexicalChains(String article, List<Sentence>
sent) {
- // POS tag article
- Hashtable<String, List<LexicalChain>> chains = new Hashtable<>();
- List<LexicalChain> lc = new ArrayList<>();
- // Build lexical chains
- // For each sentence
- for (Sentence currSent : sent) {
- String taggedSent = tagger.getTaggedString(currSent.getStringVal());
- List<String> nouns = tagger.getWordsOfType(taggedSent, POSTagger.NOUN);
- // For each noun
- for (String noun : nouns) {
- int chainsAddCnt = 0;
- // Loop through each LC
- for (LexicalChain l : lc) {
- try {
- WordRelation rel = wordRel.getRelation(l, noun,
(currSent.getSentId() - l.start) > 7);
- // Is the noun an exact match to one of the current LCs (Strong
relation)
- // Add sentence to chain
- if (rel.relation() == WordRelation.STRONG_RELATION) {
- addToChain(rel.dest(), l, chains, currSent);
- if (currSent.getSentId() - l.last > 10) {
- l.occurrences++;
- l.start = currSent.getSentId();
- }
- chainsAddCnt++;
- } else if (rel.relation() == WordRelation.MED_RELATION) {
- // Add sentence to chain if it is 7 sentences away from start
of chain
- addToChain(rel.dest(), l, chains, currSent);
- chainsAddCnt++;
- //If greater than 7 we will add it but call it a new occurrence
of the lexical chain...
- if (currSent.getSentId() - l.start > 7) {
- l.occurrences++;
- l.start = currSent.getSentId();
- }
- } else if (rel.relation() == WordRelation.WEAK_RELATION) {
- if (currSent.getSentId() - l.start <= 3) {
+ /**
+ * Constructs a list of {@link LexicalChain lexical chains} from specified
sentences.
+ *
+ * @param article TODO unused parameter -> remove it?!
Review Comment:
Seems it was an orphaned relict from the past. I've checked and removed this
parameter.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]