This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
The following commit(s) were added to refs/heads/master by this push:
new 17a35ca OPENNLP-789: Extend JavaDoc for WSD component (#200)
17a35ca is described below
commit 17a35cacc7c4741c602d1691aa1ebf208114ee09
Author: Martin Wiesner <[email protected]>
AuthorDate: Fri Dec 20 12:51:02 2024 +0100
OPENNLP-789: Extend JavaDoc for WSD component (#200)
---
opennlp-wsd/pom.xml | 1 +
.../disambiguator/AbstractWSDisambiguator.java | 8 +-
.../opennlp/tools/disambiguator/Disambiguator.java | 43 ++--
.../tools/disambiguator/FeaturesExtractor.java | 51 ++--
.../disambiguator/IMSWSDContextGenerator.java | 13 +-
.../disambiguator/IMSWSDSequenceValidator.java | 13 +
.../java/opennlp/tools/disambiguator/Lesk.java | 269 +++++++--------------
.../tools/disambiguator/LeskParameters.java | 23 +-
.../main/java/opennlp/tools/disambiguator/MFS.java | 4 -
.../java/opennlp/tools/disambiguator/SynNode.java | 13 +-
.../tools/disambiguator/WSDContextGenerator.java | 6 +-
.../tools/disambiguator/WSDDefaultParameters.java | 47 ++--
.../tools/disambiguator/WSDEvaluationMonitor.java | 5 +-
.../opennlp/tools/disambiguator/WSDEvaluator.java | 21 +-
.../opennlp/tools/disambiguator/WSDHelper.java | 4 +
.../java/opennlp/tools/disambiguator/WSDModel.java | 12 +
.../opennlp/tools/disambiguator/WSDParameters.java | 22 +-
.../opennlp/tools/disambiguator/WSDSample.java | 10 +-
.../disambiguator/WSDisambiguatorFactory.java | 11 +
.../java/opennlp/tools/disambiguator/WTDIMS.java | 18 +-
.../java/opennlp/tools/disambiguator/WordPOS.java | 17 +-
.../opennlp/tools/disambiguator/WordSense.java | 50 +++-
22 files changed, 358 insertions(+), 303 deletions(-)
diff --git a/opennlp-wsd/pom.xml b/opennlp-wsd/pom.xml
index daed4ee..25111cd 100644
--- a/opennlp-wsd/pom.xml
+++ b/opennlp-wsd/pom.xml
@@ -48,6 +48,7 @@
<groupId>net.sf.extjwnl</groupId>
<artifactId>extjwnl-data-wn31</artifactId>
<version>1.2</version>
+ <scope>runtime</scope>
</dependency>
<dependency>
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/AbstractWSDisambiguator.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/AbstractWSDisambiguator.java
index de8cbad..bff03d0 100644
---
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/AbstractWSDisambiguator.java
+++
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/AbstractWSDisambiguator.java
@@ -33,9 +33,13 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * A base implementation of {@link Disambiguator}
+ * A base implementation of {@link Disambiguator}.
+ * <p>
+ * Hint:<br/>
+ * Examples on how to use different implementation approaches are provided
+ * in the corresponding tests.
*
- * @implNote Examples on how to use each approach are provided in the test
section.
+ * @implNote For the moment the source of sense definitions is from WordNet.
*
* @see Disambiguator
* @see WSDParameters
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Disambiguator.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Disambiguator.java
index ecabbdf..0cda028 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Disambiguator.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Disambiguator.java
@@ -24,21 +24,29 @@ import opennlp.tools.util.Span;
import java.util.List;
/**
- * A word sense disambiguator that determines which sense of a word is meant in
- * a particular context. It is a classification task, where the classes are the
- * different senses of the ambiguous word. Disambiguation can be achieved in
- * either supervised or un-supervised approaches. A {@link Disambiguator}
returns
- * a sense ID.
+ * Describes a word sense disambiguator that determines which sense of a word
is
+ * meant in a particular context.
+ * It is a classification task, where the classes are the different senses of
+ * the ambiguous word. Disambiguation can be achieved in either supervised or
+ * un-supervised approaches. A {@link Disambiguator} returns a sense ID.
* <p>
* <b>How it works:</b><br/>
- * Just supply the context as an array of tokens and the
- * index of the target word to the disambiguate method.
+ * Just supply the {@code context} as an array of tokens and the index of the
+ * {@code target word} to the disambiguate method.
* <p>
- * Otherwise, for multiple words, you can set a word span instead of simply one
- * index. For the moment the source of sense definitions is from WordNet.
+ * Otherwise, for multiple words, you can set a word {@link Span} instead of
+ * a single target index.
*/
public interface Disambiguator {
+ /**
+ * Conducts disambiguation for a {@link WSDSample} context.
+ *
+ * @param sample The {@link WSDSample} containing the word and POS tags
to use.
+ * @return The sense of the {@code sample} to disambiguate.
+ */
+ String disambiguate(WSDSample sample);
+
/**
* Conducts disambiguation for a single word located at {@code
ambiguousTokenIndex}.
*
@@ -47,26 +55,27 @@ public interface Disambiguator {
* @param lemmas The lemmas of ALL the words in the context.
* @param ambiguousTokenIndex The index of the word to disambiguate.
* Must not be less or equal to zero.
- * @return The senses of the word to disambiguate.
+ * @return The sense of the word to disambiguate.
*/
String disambiguate(String[] tokenizedContext, String[] tokenTags,
String[] lemmas, int ambiguousTokenIndex);
/**
- * Conducts disambiguation for a single word located at {@code
ambiguousTokenIndex}.
+ * Conducts disambiguation for all word located at {@code
ambiguousTokenSpan}.
*
* @param tokenizedContext The text containing the word to disambiguate.
* @param tokenTags The tags corresponding to the context.
* @param lemmas The lemmas of ALL the words in the context.
* @param ambiguousTokenSpan The {@link Span} of the word(s) to
disambiguate.
* Must not be {@code null}.
- * @return The senses of the word to disambiguate
+ * @return A List of senses, each corresponding to the senses of each word of
+ * the context which are to be disambiguated.
*/
List<String> disambiguate(String[] tokenizedContext, String[] tokenTags,
String[] lemmas, Span ambiguousTokenSpan);
/**
- * Conducts disambiguation for all the words of the context.
+ * Conducts disambiguation for all the words of the {@code tokenizedContext}.
*
* @param tokenizedContext The text containing the word to disambiguate.
* @param tokenTags The tags corresponding to the context.
@@ -77,12 +86,4 @@ public interface Disambiguator {
List<String> disambiguate(String[] tokenizedContext, String[] tokenTags,
String[] lemmas);
- /**
- * Conducts disambiguation for a {@link WSDSample} context.
- *
- * @param sample The {@link WSDSample} containing the word and POS tags
to use.
- * @return The senses of the {@code sample} to disambiguate.
- */
- String disambiguate(WSDSample sample);
-
}
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
index 3008297..2ce9fe6 100644
---
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
+++
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
@@ -40,9 +40,12 @@ import java.util.Map;
* </ul>
*
* The first methods serve to extract the features for the IMS algorithm. Three
- * families of features are to be extracted: - PoS of Surrounding Words: it
- * requires one parameter: "Window size" - Surrounding Words: no parameters are
- * required - Local Collocations: it requires one parameter: "the n-gram"
+ * families of features are to be extracted:
+ * <ul>
+ * <li>PoS of Surrounding Words: it requires one parameter: "Window
size"</li>
+ * <li>Surrounding Words: no parameters are required</li>
+ * <li>Local Collocations: it requires one parameter: the "n-gram"
number</li>
+ * </ul>
*
* @see WTDIMS
* @see <a href="https://aclanthology.org/P10-4014.pdf">
@@ -52,7 +55,7 @@ import java.util.Map;
public class FeaturesExtractor {
/*
- * Extracts POS tags of surrounding words of a given WTDIMS instance.
+ * Extracts POS tags of surrounding words of a given wordToDisambiguate
instance.
*/
private String[] extractPosOfSurroundingWords(WTDIMS wordToDisambiguate, int
windowSize) {
@@ -75,7 +78,7 @@ public class FeaturesExtractor {
}
/*
- * Extracts surrounding lemmas of a given WTDIMS instance.
+ * Extracts surrounding lemmas of a given wordToDisambiguate instance.
* Irrelevant stop words are skipped.
*/
private String[] extractSurroundingWords(WTDIMS wordToDisambiguate) {
@@ -131,13 +134,13 @@ public class FeaturesExtractor {
}
/**
- * Generates the full list of surrounding words, from the
- * training data. These data will be later used for the generation of the
- * features qualified of "Surrounding words".
+ * Generates the full list of surrounding words for the specified
+ * {@code trainingData}.
+ * These data will be used for the generation of features
+ * qualified for "Surrounding words".
*
- * @param trainingData
- * list of the training samples (type {@link WTDIMS}
- * @return the list of all the surrounding words from all the training data
+ * @param trainingData A list of the training samples (type {@link WTDIMS}.
+ * @return A list of all the surrounding words for the {@code trainingData}.
*/
public List<String> extractTrainingSurroundingWords(List<WTDIMS>
trainingData) {
@@ -155,31 +158,43 @@ public class FeaturesExtractor {
}
/**
- * This method generates the different set of features related to the IMS
- * approach and store them in the corresponding attributes of the {@link
WTDIMS}.
+ * Generates the different set of features related to the IMS
+ * approach and puts them in the corresponding attributes of
+ * the {@link WTDIMS word to disambiguate} object.
*
* @param wtd The {@link WTDIMS word to disambiguate}.
* @param windowSize The parameter required to generate the features
qualified of
* "PoS of Surrounding Words".
* @param ngram The parameter required to generate the features qualified of
* "Local Collocations".
+ *
+ * @throws IllegalArgumentException Thrown if parameters were invalid.
*/
public void extractIMSFeatures(WTDIMS wtd, int windowSize, int ngram) {
+ if (wtd == null) {
+ throw new IllegalArgumentException("WTD must not be null");
+ }
wtd.setPosOfSurroundingWords(extractPosOfSurroundingWords(wtd,
windowSize));
wtd.setSurroundingWords(extractSurroundingWords(wtd));
wtd.setLocalCollocations(extractLocalCollocations(wtd, ngram));
}
/**
- * This generates the context of IMS. It supposes that the features have
- * already been extracted and stored in the {@link WTDIMS} object, therefore
it
- * doesn't require any parameters.
+ * Generates the context for the {@link WTDIMS word to disambiguate}.
+ *
+ * @implNote It is assumed that the features have already been extracted and
+ * wrapped in the {@link WTDIMS word to disambiguate}.
+ * Therefore, it doesn't require any parameters.
*
- * @param wtd The {@link WTDIMS wtd to disambiguate}.
+ * @param wtd The {@link WTDIMS word to disambiguate}.
* @param listSurrWords The full list of surrounding words of the training
data.
+ *
+ * @throws IllegalArgumentException Thrown if parameters were invalid.
*/
public void serializeIMSFeatures(WTDIMS wtd, List<String> listSurrWords) {
-
+ if (wtd == null) {
+ throw new IllegalArgumentException("WTD must not be null");
+ }
String[] posOfSurroundingWords = wtd.getPosOfSurroundingWords();
List<String> surroundingWords = new ArrayList<>(
Arrays.asList(wtd.getSurroundingWords()));
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
index fe89933..76d50c8 100644
---
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
+++
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java
@@ -24,12 +24,17 @@ import java.util.List;
import java.util.Set;
/**
- * The default Context Generator of the IMS approach.
+ * The default Context Generator of the
+ * <a href="https://aclanthology.org/P10-4014.pdf"> IMS (It Makes Sense)</a>
approach.
*
* @see WSDContextGenerator
*/
public class IMSWSDContextGenerator implements WSDContextGenerator {
+ /*
+ * Extracts POS tags of surrounding words for the word at the specified index
+ * within the windowSize.
+ */
private String[] extractPosOfSurroundingWords(int index, String[] tags, int
windowSize) {
String[] windowTags = new String[2 * windowSize + 1];
@@ -96,6 +101,9 @@ public class IMSWSDContextGenerator implements
WSDContextGenerator {
return res;
}
+ /**
+ * {@inheritDoc}
+ */
@Override
public String[] getContext(int index, String[] tokens,
String[] tags, String[] lemmas, int ngram, int windowSize, List<String>
model) {
@@ -131,6 +139,9 @@ public class IMSWSDContextGenerator implements
WSDContextGenerator {
return serializedFeatures;
}
+ /**
+ * {@inheritDoc}
+ */
@Override
public String[] getContext(WSDSample sample, int ngram, int windowSize,
List<String> model) {
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDSequenceValidator.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDSequenceValidator.java
index 0818447..c910fea 100644
---
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDSequenceValidator.java
+++
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDSequenceValidator.java
@@ -17,6 +17,10 @@
package opennlp.tools.disambiguator;
+/**
+ * A {@link opennlp.tools.util.SequenceValidator} variant to check whether
+ * IMS-based sequence outcomes are valid or not.
+ */
public class IMSWSDSequenceValidator {
private boolean validOutcome(String outcome, String prevOutcome) {
@@ -43,6 +47,15 @@ public class IMSWSDSequenceValidator {
return validOutcome(outcome, prevOutcome);
}
+ /**
+ * Validates the provided {@code outcome} is valid in the context of
+ * the sequence {@code s}.
+ *
+ * @param outcome The candidate result to check.
+ * @param s The tokens that cover the sequence {@code outcome}
+ * shall be valid for.
+ * @return {@code True} if {@code outcome} is valid, {@code false} otherwise.
+ */
public boolean validSequence(String outcome, String[] s) {
return validOutcome(outcome, s);
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java
index 19e7ac7..ea2a8e4 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java
@@ -36,7 +36,22 @@ import opennlp.tools.tokenize.Tokenizer;
* The main idea is to check for word overlaps in the sense definitions
* of the surrounding context. An overlap is when two words have similar stems.
* The more overlaps a word has the higher its score. Different variations of
- * the approach are included in this class.
+ * the approach are included in this class, as defined in {@link
LeskParameters.LeskType}.
+ * <p>
+ * Ten features are possible for Lesk.
+ * <ul>
+ * <li>0: Synonyms</li>
+ * <li>1: Hypernyms</li>
+ * <li>2: Hyponyms</li>
+ * <li>3: Meronyms</li>
+ * <li>4: Holonyms</li>
+ * <li>5: Entailments</li>
+ * <li>6: Coordinate Terms</li>
+ * <li>7: Causes</li>
+ * <li>8: Attributes</li>
+ * <li>9: Pertainyms</li>
+ * </ul>
+ * Those are defined via {@link LeskParameters#features}.
*
* @see Disambiguator
* @see LeskParameters
@@ -90,7 +105,7 @@ public class Lesk extends AbstractWSDisambiguator {
if (params.areValid()) {
this.params = (LeskParameters) params;
} else {
- throw new IllegalArgumentException("wrong params");
+ throw new IllegalArgumentException("Detected incorrect LeskParameter
values!");
}
}
}
@@ -105,10 +120,9 @@ public class Lesk extends AbstractWSDisambiguator {
/**
* The basic Lesk method where the entire context is considered for overlaps
- *
- * @param sample
- * the word sample to disambiguate
- * @return The array of WordSenses with their scores
+ *
+ * @param sample The {@link WSDSample} to disambiguate.
+ * @return The list of {@link WordSense word senses} with their scores.
*/
public List<WordSense> basic(WSDSample sample) {
@@ -150,10 +164,9 @@ public class Lesk extends AbstractWSDisambiguator {
/**
* The basic Lesk method but applied to a default context windows
- *
- * @param sample
- * the word sample to disambiguate
- * @return The array of WordSenses with their scores
+ *
+ * @param sample The {@link WSDSample} to disambiguate.
+ * @return The list of {@link WordSense word senses} with their scores.
*/
public List<WordSense> basicContextual(WSDSample sample) {
@@ -207,10 +220,9 @@ public class Lesk extends AbstractWSDisambiguator {
* An extended version of the Lesk approach that takes into consideration
* semantically related feature overlaps across the entire context The
scoring
* function uses linear weights.
- *
- * @param sample
- * the word sample to disambiguate
- * @return the list of WordSenses with their scores
+ *
+ * @param sample The {@link WSDSample} to disambiguate.
+ * @return The list of {@link WordSense word senses} with their scores.
*/
public List<WordSense> extended(WSDSample sample) {
params.setWinBSize(0);
@@ -222,10 +234,9 @@ public class Lesk extends AbstractWSDisambiguator {
* An extended version of the Lesk approach that takes into consideration
* semantically related feature overlaps in a default context window The
* scoring function uses linear weights.
- *
- * @param sample
- * the word sample to disambiguate
- * @return the list of WordSenses with their scores
+ *
+ * @param sample The {@link WSDSample} to disambiguate.
+ * @return The list of {@link WordSense word senses} with their scores.
*/
public List<WordSense> extendedContextual(WSDSample sample) {
List<WordSense> scoredSenses;
@@ -234,81 +245,65 @@ public class Lesk extends AbstractWSDisambiguator {
} else {
scoredSenses = basicContextual(sample);
}
- for (WordSense wordSense : scoredSenses) {
-
- if (getParams().getFeatures()[0]) {
- wordSense.setScore(wordSense.getScore() + getParams().depth_weight
- * assessSynonyms(wordSense.getNode().getSynonyms(), contextWords));
+ for (WordSense ws : scoredSenses) {
+ final SynNode synNode = ws.getNode();
+ final Synset synset = synNode.synset;
+ if (params.getFeatures()[0]) {
+ ws.setScore(ws.getScore() + params.depth_weight
+ * assessSynonyms(synNode.getSynonyms(), contextWords));
}
-
- if (getParams().getFeatures()[1]) {
- fathomHypernyms(wordSense, wordSense.getNode().synset, contextWords,
+ if (params.getFeatures()[1]) {
+ fathomHypernyms(ws, synset, contextWords,
params.depth, params.depth, params.depth_weight);
}
-
- if (getParams().getFeatures()[2]) {
- fathomHyponyms(wordSense, wordSense.getNode().synset, contextWords,
+ if (params.getFeatures()[2]) {
+ fathomHyponyms(ws, synset, contextWords,
params.depth, params.depth, params.depth_weight);
}
-
- if (getParams().getFeatures()[3]) {
- fathomMeronyms(wordSense, wordSense.getNode().synset, contextWords,
+ if (params.getFeatures()[3]) {
+ fathomMeronyms(ws, synset, contextWords,
params.depth, params.depth, params.depth_weight);
-
}
-
- if (getParams().getFeatures()[4]) {
- fathomHolonyms(wordSense, wordSense.getNode().synset, contextWords,
+ if (params.getFeatures()[4]) {
+ fathomHolonyms(ws, synset, contextWords,
params.depth, params.depth, params.depth_weight);
-
}
-
- if (getParams().getFeatures()[5]) {
- fathomEntailments(wordSense, wordSense.getNode().synset, contextWords,
+ if (params.getFeatures()[5]) {
+ fathomEntailments(ws, synset, contextWords,
params.depth, params.depth, params.depth_weight);
-
}
- if (getParams().getFeatures()[6]) {
- fathomCoordinateTerms(wordSense, wordSense.getNode().synset,
+ if (params.getFeatures()[6]) {
+ fathomCoordinateTerms(ws, synset,
contextWords, params.depth, params.depth, params.depth_weight);
-
}
- if (getParams().getFeatures()[7]) {
- fathomCauses(wordSense, wordSense.getNode().synset, contextWords,
+ if (params.getFeatures()[7]) {
+ fathomCauses(ws, synset, contextWords,
params.depth, params.depth, params.depth_weight);
-
}
- if (getParams().getFeatures()[8]) {
- fathomAttributes(wordSense, wordSense.getNode().synset, contextWords,
+ if (params.getFeatures()[8]) {
+ fathomAttributes(ws, synset, contextWords,
params.depth, params.depth, params.depth_weight);
-
}
- if (getParams().getFeatures()[9]) {
- fathomPertainyms(wordSense, wordSense.getNode().synset, contextWords,
+ if (params.getFeatures()[9]) {
+ fathomPertainyms(ws, synset, contextWords,
params.depth, params.depth, params.depth_weight);
-
}
-
}
-
return scoredSenses;
-
}
/**
* An extended version of the Lesk approach that takes into consideration
* semantically related feature overlaps in all the context. The scoring
* function uses exponential weights.
- *
- * @param sample the word sample to disambiguate
- *
- * @return A list of {@link WordSense word senses} with their scores.
+ *
+ * @param sample The {@link WSDSample} to disambiguate.
+ * @return The list of {@link WordSense word senses} with their scores.
*/
public List<WordSense> extendedExponential(WSDSample sample) {
params.setWinBSize(0);
params.setWinFSize(0);
return extendedExponentialContextual(sample);
-
}
/**
@@ -316,9 +311,8 @@ public class Lesk extends AbstractWSDisambiguator {
* semantically related feature overlaps in a custom window in the context.
* The scoring function uses exponential weights.
*
- * @param sample
- * the word sample to disambiguate
- * @return the list of WordSenses with their scores
+ * @param sample The {@link WSDSample} to disambiguate.
+ * @return The list of {@link WordSense word senses} with their scores.
*/
public List<WordSense> extendedExponentialContextual(WSDSample sample) {
List<WordSense> scoredSenses;
@@ -328,74 +322,53 @@ public class Lesk extends AbstractWSDisambiguator {
scoredSenses = basicContextual(sample);
}
- for (WordSense wordSense : scoredSenses) {
-
+ for (WordSense ws : scoredSenses) {
+ final SynNode synNode = ws.getNode();
+ final Synset synset = synNode.synset;
if (params.features[0]) {
- wordSense.setScore(wordSense.getScore() + Math.pow(
- assessSynonyms(wordSense.getNode().getSynonyms(), contextWords),
- params.iexp));
+ ws.setScore(ws.getScore() + Math.pow(assessSynonyms(
+ synNode.getSynonyms(), contextWords), params.iexp));
}
-
if (params.features[1]) {
- fathomHypernymsExponential(wordSense, wordSense.getNode().synset,
- contextWords, params.depth, params.depth, params.iexp,
params.dexp);
+ fathomHypernymsExponential(ws, synset, contextWords,
+ params.depth, params.depth, params.iexp, params.dexp);
}
-
if (params.features[2]) {
- fathomHyponymsExponential(wordSense, wordSense.getNode().synset,
- contextWords, params.depth, params.depth, params.iexp,
params.dexp);
+ fathomHyponymsExponential(ws, synset, contextWords,
+ params.depth, params.depth, params.iexp, params.dexp);
}
-
if (params.features[3]) {
- fathomMeronymsExponential(wordSense, wordSense.getNode().synset,
- contextWords, params.depth, params.depth, params.iexp,
params.dexp);
-
+ fathomMeronymsExponential(ws, synset, contextWords,
+ params.depth, params.depth, params.iexp, params.dexp);
}
-
if (params.features[4]) {
- fathomHolonymsExponential(wordSense, wordSense.getNode().synset,
- contextWords, params.depth, params.depth, params.iexp,
params.dexp);
-
+ fathomHolonymsExponential(ws, synset, contextWords,
+ params.depth, params.depth, params.iexp, params.dexp);
}
-
if (params.features[5]) {
- fathomEntailmentsExponential(wordSense, wordSense.getNode().synset,
- contextWords, params.depth, params.depth, params.iexp,
params.dexp);
+ fathomEntailmentsExponential(ws, synset, contextWords,
+ params.depth, params.depth, params.iexp, params.dexp);
}
-
if (params.features[6]) {
- fathomCoordinateTermsExponential(wordSense, wordSense.getNode().synset,
- contextWords, params.depth, params.depth, params.iexp,
params.dexp);
-
+ fathomCoordinateTermsExponential(ws, synset, contextWords,
+ params.depth, params.depth, params.iexp, params.dexp);
}
if (params.features[7]) {
- fathomCausesExponential(wordSense, wordSense.getNode().synset,
- contextWords, params.depth, params.depth, params.iexp,
params.dexp);
-
+ fathomCausesExponential(ws, synset, contextWords,
+ params.depth, params.depth, params.iexp, params.dexp);
}
if (params.features[8]) {
- fathomAttributesExponential(wordSense, wordSense.getNode().synset,
- contextWords, params.depth, params.depth, params.iexp,
params.dexp);
-
+ fathomAttributesExponential(ws, synset, contextWords,
+ params.depth, params.depth, params.iexp, params.dexp);
}
if (params.features[9]) {
- fathomPertainymsExponential(wordSense, wordSense.getNode().synset,
- contextWords, params.depth, params.depth, params.iexp,
params.dexp);
+ fathomPertainymsExponential(ws, synset, contextWords,
+ params.depth, params.depth, params.iexp, params.dexp);
}
}
return scoredSenses;
}
- /**
- * Recursively score the hypernym tree linearly.
- *
- * @param wordSense
- * @param child
- * @param relvWords
- * @param depth
- * @param maxDepth
- * @param depthScoreWeight
- */
private void fathomHypernyms(WordSense wordSense, Synset child,
List<WordPOS> relvWords,
int depth, int maxDepth, double
depthScoreWeight) {
if (depth == 0)
@@ -415,17 +388,6 @@ public class Lesk extends AbstractWSDisambiguator {
}
}
- /**
- * Recursively score the hypernym tree exponentially.
- *
- * @param wordSense
- * @param child
- * @param relvWords
- * @param depth
- * @param maxDepth
- * @param intersectionExponent
- * @param depthScoreExponent
- */
private void fathomHypernymsExponential(WordSense wordSense, Synset child,
List<WordPOS> relvWords,
int depth, int maxDepth, double
intersectionExponent,
double depthScoreExponent) {
@@ -446,16 +408,6 @@ public class Lesk extends AbstractWSDisambiguator {
}
}
- /**
- * Recursively score the hyponym tree linearly.
- *
- * @param wordSense
- * @param child
- * @param relvWords
- * @param depth
- * @param maxDepth
- * @param depthScoreWeight
- */
private void fathomHyponyms(WordSense wordSense, Synset child, List<WordPOS>
relvWords,
int depth, int maxDepth, double
depthScoreWeight) {
if (depth == 0)
@@ -476,17 +428,6 @@ public class Lesk extends AbstractWSDisambiguator {
}
}
- /**
- * Recursively score the hyponym tree exponentially.
- *
- * @param wordSense
- * @param child
- * @param relvWords
- * @param depth
- * @param maxDepth
- * @param intersectionExponent
- * @param depthScoreExponent
- */
private void fathomHyponymsExponential(WordSense wordSense, Synset child,
List<WordPOS> relvWords,
int depth, int maxDepth, double
intersectionExponent, double depthScoreExponent) {
if (depth == 0)
@@ -508,16 +449,6 @@ public class Lesk extends AbstractWSDisambiguator {
}
}
- /**
- * Recursively score the meronym tree linearly.
- *
- * @param wordSense
- * @param child
- * @param relvWords
- * @param depth
- * @param maxDepth
- * @param depthScoreWeight
- */
private void fathomMeronyms(WordSense wordSense, Synset child, List<WordPOS>
relvWords,
int depth, int maxDepth, double
depthScoreWeight) {
if (depth == 0)
@@ -539,17 +470,6 @@ public class Lesk extends AbstractWSDisambiguator {
}
}
- /**
- * Recursively score the meronym tree exponentially.
- *
- * @param wordSense
- * @param child
- * @param relvWords
- * @param depth
- * @param maxDepth
- * @param intersectionExponent
- * @param depthScoreExponent
- */
private void fathomMeronymsExponential(WordSense wordSense, Synset child,
List<WordPOS> relvWords,
int depth, int maxDepth, double
intersectionExponent, double depthScoreExponent) {
if (depth == 0)
@@ -569,16 +489,6 @@ public class Lesk extends AbstractWSDisambiguator {
}
}
- /**
- * Recursively score the holonym tree linearly.
- *
- * @param wordSense
- * @param child
- * @param relvWords
- * @param depth
- * @param maxDepth
- * @param depthScoreWeight
- */
private void fathomHolonyms(WordSense wordSense, Synset child, List<WordPOS>
relvWords,
int depth, int maxDepth, double
depthScoreWeight) {
if (depth == 0)
@@ -598,17 +508,6 @@ public class Lesk extends AbstractWSDisambiguator {
}
}
- /**
- * Recursively score the holonym tree exponentially.
- *
- * @param wordSense
- * @param child
- * @param relvWords
- * @param depth
- * @param maxDepth
- * @param intersectionExponent
- * @param depthScoreExponent
- */
private void fathomHolonymsExponential(WordSense wordSense, Synset child,
List<WordPOS> relvWords,
int depth, int maxDepth, double
intersectionExponent, double depthScoreExponent) {
if (depth == 0)
@@ -829,7 +728,7 @@ public class Lesk extends AbstractWSDisambiguator {
}
- /**
+ /*
* Checks if the feature should be counted in the score.
*
* @param featureSynsets
@@ -839,7 +738,7 @@ public class Lesk extends AbstractWSDisambiguator {
private int assessFeature(List<Synset> featureSynsets, List<WordPOS>
relevantWords) {
int count = 0;
for (Synset synset : featureSynsets) {
- SynNode subNode = new SynNode(synset, relevantWords);
+ final SynNode subNode = new SynNode(synset, relevantWords);
String[] tokenizedSense = tokenizer.tokenize(subNode.getGloss());
List<WordPOS> relvSenseWords =
WSDHelper.getAllRelevantWords(tokenizedSense);
@@ -855,7 +754,7 @@ public class Lesk extends AbstractWSDisambiguator {
return count;
}
- /**
+ /*
* Checks if the synonyms should be counted in the score.
*
* @param synonyms
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java
index a3d275f..deaba4f 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java
@@ -20,14 +20,14 @@
package opennlp.tools.disambiguator;
/**
- * Lesk specific parameter set.
+ * Lesk specific {@link WSDParameters parameter set}.
*
* @see WSDParameters
*/
public class LeskParameters extends WSDParameters {
/**
- * Enum of all types of implemented variations of Lesk
+ * Enum of all types of implemented variations of Lesk.
*/
public enum LeskType {
LESK_BASIC, LESK_BASIC_CTXT, LESK_EXT, LESK_EXT_CTXT, LESK_EXT_EXP,
LESK_EXT_EXP_CTXT
@@ -77,7 +77,7 @@ public class LeskParameters extends WSDParameters {
return type;
}
- public void setType(LeskType type) {
+ void setType(LeskType type) {
this.type = type;
}
@@ -85,7 +85,7 @@ public class LeskParameters extends WSDParameters {
return winFSize;
}
- public void setWinFSize(int winFSize) {
+ void setWinFSize(int winFSize) {
this.winFSize = winFSize;
}
@@ -93,7 +93,7 @@ public class LeskParameters extends WSDParameters {
return winBSize;
}
- public void setWinBSize(int winBSize) {
+ void setWinBSize(int winBSize) {
this.winBSize = winBSize;
}
@@ -101,7 +101,7 @@ public class LeskParameters extends WSDParameters {
return depth;
}
- public void setDepth(int depth) {
+ void setDepth(int depth) {
this.depth = depth;
}
@@ -109,7 +109,7 @@ public class LeskParameters extends WSDParameters {
return depth_weight;
}
- public void setDepth_weight(double depth_weight) {
+ void setDepth_weight(double depth_weight) {
this.depth_weight = depth_weight;
}
@@ -117,7 +117,7 @@ public class LeskParameters extends WSDParameters {
return iexp;
}
- public void setIexp(double iexp) {
+ void setIexp(double iexp) {
this.iexp = iexp;
}
@@ -125,7 +125,7 @@ public class LeskParameters extends WSDParameters {
return dexp;
}
- public void setDexp(double dexp) {
+ void setDexp(double dexp) {
this.dexp = dexp;
}
@@ -138,7 +138,7 @@ public class LeskParameters extends WSDParameters {
}
/**
- * Sets default parameters
+ * Activates default Lesk feature parameters.
*/
void setDefaults() {
setType(LeskParameters.DFLT_LESK_TYPE);
@@ -153,6 +153,9 @@ public class LeskParameters extends WSDParameters {
{ true, true, true, true, true, true, true, true, true, true };
}
+ /**
+ * {@inheritDoc}
+ */
@Override
public boolean areValid() {
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/MFS.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/MFS.java
index 966c3a5..1d51f3b 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/MFS.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/MFS.java
@@ -42,10 +42,6 @@ public class MFS extends AbstractWSDisambiguator {
public static final String NONESENSE = "nonesense";
- public MFS() {
- super();
- }
-
/**
* Extracts the most frequent sense for a specified {@link WSDSample}.
*
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java
index 28517fb..87641a0 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java
@@ -34,6 +34,9 @@ import opennlp.tools.tokenize.Tokenizer;
/**
* Convenience class to access some features via {@link Synset synsets}.
+ * A {@link Synset} represents a concept, and contains a set of words, each of
which
+ * has a sense for that concept. Each element is thus synonymous with the other
+ * words contained in the Synset.
*
* @see Synset
*/
@@ -80,6 +83,9 @@ public class SynNode {
this.parent = parent;
}
+ /**
+ * @return Retrieves the associated {@link WordPOS elements}.
+ */
public List<WordPOS> getSenseRelevantWords() {
return senseRelevantWords;
}
@@ -246,10 +252,10 @@ public class SynNode {
}
/**
- * Gets the senses of the nodes
+ * Retrieves associated {@link WordSense senses} for specified {@code nodes}.
*
- * @param nodes
- * @return senses from the nodes
+ * @param nodes A list of {@link SynNode nodes} to score.
+ * @return The {@link WordSense senses} obtained for the {@code nodes}.
*/
public static List<WordSense> updateSenses(List<SynNode> nodes) {
List<WordSense> scoredSenses = new ArrayList<>();
@@ -262,6 +268,5 @@ public class SynNode {
scoredSenses.add(wordSense);
}
return scoredSenses;
-
}
}
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDContextGenerator.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDContextGenerator.java
index 14ba92f..10581ed 100644
---
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDContextGenerator.java
+++
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDContextGenerator.java
@@ -23,7 +23,7 @@ import java.util.List;
import java.util.regex.Pattern;
/**
- * Interface for {@link AbstractWSDisambiguator} context generators.
+ * Describes a context generator for word sense disambiguation.
*/
public interface WSDContextGenerator {
@@ -39,7 +39,7 @@ public interface WSDContextGenerator {
* @param ngram The ngram to consider for context. Must be greater than
{@code 0}.
* @param windowSize The context window. Must be greater than {@code 0}.
* @param model The list of unigrams.
- * @return The IMS context of the word to disambiguate.
+ * @return The context of the word to disambiguate at {@code index} in
{@code toks}.
*/
String[] getContext(int index, String[] toks, String[] tags, String[] lemmas,
int ngram, int windowSize, List<String> model);
@@ -51,7 +51,7 @@ public interface WSDContextGenerator {
* @param ngram The ngram to consider for context. Must be greater than
{@code 0}.
* @param windowSize The context window. Must be greater than {@code 0}.
* @param model The list of unigrams.
- * @return The IMS context of the word to disambiguate.
+ * @return The context of the word to disambiguate at {@code index} in
{@code sample}.
*/
String[] getContext(WSDSample sample, int ngram, int windowSize,
List<String> model);
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java
index 9f6c28f..5fce6df 100644
---
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java
+++
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java
@@ -23,32 +23,35 @@ import java.io.File;
import java.nio.file.Path;
/**
- * This class contains the parameters for the IMS approach as well as the
+ * Defines the parameters for the <a
href="https://aclanthology.org/P10-4014.pdf">
+ * IMS (It Makes Sense)</a> approach, as well as the
* directories containing the files used
+ *
+ * @see WSDParameters
*/
public class WSDDefaultParameters extends WSDParameters {
- protected String languageCode;
- protected int windowSize;
- protected int ngram;
+ public static final int DFLT_WIN_SIZE = 3;
+ public static final int DFLT_NGRAM = 2;
+ public static final String DFLT_LANG_CODE = "en";
+ public static final SenseSource DFLT_SOURCE = SenseSource.WORDNET;
- protected Path trainingDataDir;
+ private final Path trainingDataDir;
- protected static final int DFLT_WIN_SIZE = 3;
- protected static final int DFLT_NGRAM = 2;
- protected static final String DFLT_LANG_CODE = "en";
- protected static final SenseSource DFLT_SOURCE = SenseSource.WORDNET;
+ private final String languageCode;
+ protected int windowSize;
+ protected int ngram;
/**
* Initializes a new set of {@link WSDDefaultParameters}.
- * The default language used is <i>English</i>.
+ * The default language used is '<i>en</i>' (English).
*
- * @param windowSize the size of the window used for the extraction of the
features
- * qualified of Surrounding Words
- * @param ngram the number words used for the extraction of features
qualified of
- * Local Collocations
- * @param senseSource the source of the training data
- * @param trainingDataDir The {@link Path} where to place or lookup trained
models.
+ * @param windowSize The size of the window used for the extraction of the
features
+ * qualified of Surrounding Words.
+ * @param ngram The number words used for the extraction of features
qualified of
+ * Local Collocations.
+ * @param senseSource The {@link SenseSource source} of the training data
+ * @param trainingDataDir The {@link Path} where to store or read trained
models from.
*/
public WSDDefaultParameters(int windowSize, int ngram, SenseSource
senseSource, Path trainingDataDir) {
this.languageCode = DFLT_LANG_CODE;
@@ -65,8 +68,10 @@ public class WSDDefaultParameters extends WSDParameters {
/**
* Initializes a new set of {@link WSDDefaultParameters}.
- * The default language used is <i>English</i>, the window size is {@link
#DFLT_WIN_SIZE},
+ * The default language used is '<i>en</i>' (English), the window size is
{@link #DFLT_WIN_SIZE},
* and the ngram length is initialized as {@link #DFLT_NGRAM}.
+ *
+ * @implNote The training directory will be unset.
*/
public WSDDefaultParameters() {
this(DFLT_WIN_SIZE, DFLT_NGRAM, DFLT_SOURCE, null);
@@ -74,7 +79,7 @@ public class WSDDefaultParameters extends WSDParameters {
/**
* Initializes a new set of {@link WSDDefaultParameters}.
- * The default language used is <i>English</i>, the window size is {@link
#DFLT_WIN_SIZE},
+ * The default language used is '<i>en</i>' (English), the window size is
{@link #DFLT_WIN_SIZE},
* and the ngram length is initialized as {@link #DFLT_NGRAM}.
*
* @param trainingDataDir The {@link Path} where to place or lookup trained
models.
@@ -95,10 +100,16 @@ public class WSDDefaultParameters extends WSDParameters {
return ngram;
}
+ /**
+ * @return The {@link Path} where to place or lookup trained models. May be
{@code null}!
+ */
public Path getTrainingDataDirectory() {
return trainingDataDir;
}
+ /**
+ * {@inheritDoc}
+ */
@Override
public boolean areValid() {
return true;
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluationMonitor.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluationMonitor.java
index ddc2cac..1775e5c 100644
---
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluationMonitor.java
+++
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluationMonitor.java
@@ -20,7 +20,10 @@ package opennlp.tools.disambiguator;
import opennlp.tools.util.eval.EvaluationMonitor;
/**
- * A marker interface for evaluating {@link AbstractWSDisambiguator
disambiguators}.
+ * A marker interface for evaluating {@link Disambiguator disambiguators}.
+ *
+ * @see WSDSample
+ * @see EvaluationMonitor
*/
public interface WSDEvaluationMonitor extends EvaluationMonitor<WSDSample> {
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java
index 6b71c79..4e0d502 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java
@@ -43,18 +43,19 @@ public class WSDEvaluator extends Evaluator<WSDSample> {
private final Disambiguator disambiguator;
/**
- * Initializes the current instance with the given {@link
AbstractWSDisambiguator}.
+ * Initializes a {@link WSDEvaluator} with the given {@link Disambiguator}.
*
- * @param disambiguator
- * the {@link AbstractWSDisambiguator} to evaluate.
- * @param listeners
- * evaluation sample listeners
+ * @param disambiguator The {@link Disambiguator} to evaluate.
+ * @param listeners The (optional) {@link WSDEvaluationMonitor evaluation
sample listeners}.
*/
public WSDEvaluator(Disambiguator disambiguator, WSDEvaluationMonitor...
listeners) {
super(listeners);
this.disambiguator = disambiguator;
}
+ /**
+ * {@inheritDoc}
+ */
@Override
protected WSDSample processSample(WSDSample ref) {
@@ -82,20 +83,16 @@ public class WSDEvaluator extends Evaluator<WSDSample> {
}
/**
- * Retrieves the WSD accuracy.
- * <p>
- * This is defined as: WSD accuracy = correctly disambiguated / total words
+ * @implNote WSD accuracy = correctly disambiguated / total words
*
- * @return the WSD accuracy
+ * @return Retrieves the WSD accuracy.
*/
public double getAccuracy() {
return accuracy.mean();
}
/**
- * Retrieves the total number of words considered in the evaluation.
- *
- * @return the word count
+ * @return Retrieves the total number of words considered in the evaluation
*/
public long getWordCount() {
return accuracy.count();
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
index d462947..3613c0d 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java
@@ -42,6 +42,10 @@ import opennlp.tools.tokenize.ThreadSafeTokenizerME;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.util.DownloadUtil;
+/**
+ * A helper class that loads and organizes resources, and provides helper
methods
+ * to avoid multiple copies of dealing with certain resources.
+ */
public class WSDHelper {
private static final Logger LOG = LoggerFactory.getLogger(WSDHelper.class);
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java
index 984c41b..87bc3d2 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java
@@ -159,11 +159,17 @@ public class WSDModel extends BaseModel {
return getManifestProperty(WORDTAG);
}
+ /**
+ * {@inheritDoc}
+ */
@Override
protected Class<? extends BaseToolFactory> getDefaultFactory() {
return WSDisambiguatorFactory.class;
}
+ /**
+ * {@inheritDoc}
+ */
@Override
protected void validateArtifactMap() throws InvalidFormatException {
super.validateArtifactMap();
@@ -186,11 +192,17 @@ public class WSDModel extends BaseModel {
this.contextEntries = Arrays.asList(surroundings.split(","));
}
+ /**
+ * {@inheritDoc}
+ */
@Override
public int hashCode() {
return Objects.hash(artifactMap.get(MANIFEST_ENTRY),
artifactMap.get(WSD_MODEL_ENTRY));
}
+ /**
+ * {@inheritDoc}
+ */
@Override
public boolean equals(Object obj) {
if (obj == this) {
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
index 36f0abf..912f6a5 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
@@ -20,8 +20,7 @@
package opennlp.tools.disambiguator;
/**
- * Disambiguation Parameters
- *
+ * Describes a set of parameters to configure word sense disambiguation.
*/
public abstract class WSDParameters {
@@ -32,19 +31,24 @@ public abstract class WSDParameters {
protected SenseSource senseSource;
/**
- * @return if the disambiguation type is coarse grained or fine-grained
+ * Initializes a default set of {@link WSDParameters} and chooses
+ * the {@link SenseSource#WORDNET} by default.
*/
+ public WSDParameters() {
+ this.senseSource = SenseSource.WORDNET;
+ }
+ /**
+ * @return if the disambiguation type is coarse grained or fine-grained
+ */
public SenseSource getSenseSource() {
return senseSource;
}
- public WSDParameters() {
- this.senseSource = SenseSource.WORDNET;
- }
-
- /*
- * @return checks if the parameters are valid or not
+ /**
+ * Checks if the parameters are valid or not.
+ *
+ * @return {@code true} if valid, {@code false} otherwise.
*/
public abstract boolean areValid();
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java
index 5360a1d..e6984ff 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java
@@ -217,6 +217,9 @@ public class WSDSample implements Sample {
return ref;
}
+ /**
+ * {@inheritDoc}
+ */
@Override
public boolean equals(Object obj) {
if (this == obj) {
@@ -230,6 +233,9 @@ public class WSDSample implements Sample {
}
}
+ /**
+ * {@inheritDoc}
+ */
@Override
public int hashCode() {
int result = Objects.hash(targetPosition);
@@ -238,6 +244,9 @@ public class WSDSample implements Sample {
return result;
}
+ /**
+ * {@inheritDoc}
+ */
@Override
public String toString() {
StringBuilder result = new StringBuilder();
@@ -255,7 +264,6 @@ public class WSDSample implements Sample {
// get rid of last space
result.setLength(result.length() - 1);
}
-
return result.toString();
}
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorFactory.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorFactory.java
index 5a656b7..f48d628 100644
---
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorFactory.java
+++
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorFactory.java
@@ -21,6 +21,9 @@ import opennlp.tools.util.BaseToolFactory;
import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.ext.ExtensionLoader;
+/**
+ * Implements a word sense disambiguation related {@link BaseToolFactory}.
+ */
public class WSDisambiguatorFactory extends BaseToolFactory {
/**
@@ -58,11 +61,19 @@ public class WSDisambiguatorFactory extends BaseToolFactory
{
}
}
+ /**
+ * {@inheritDoc}
+ */
@Override
public void validateArtifactMap() throws InvalidFormatException {
// no additional artifacts
}
+ /**
+ * @implNote By default, an {@link IMSWSDContextGenerator} will be
instantiated.
+ *
+ * @return Retrieves the active {@link WSDContextGenerator}.
+ */
public WSDContextGenerator getContextGenerator() {
// default can be IMS
return new IMSWSDContextGenerator();
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WTDIMS.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WTDIMS.java
index d3c5dea..496d543 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WTDIMS.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WTDIMS.java
@@ -123,7 +123,7 @@ public class WTDIMS {
return sentence;
}
- public void setSentence(String[] sentence) {
+ private void setSentence(String[] sentence) {
this.sentence = sentence;
}
@@ -131,7 +131,7 @@ public class WTDIMS {
return posTags;
}
- public void setPosTags(String[] posTags) {
+ private void setPosTags(String[] posTags) {
this.posTags = posTags;
}
@@ -139,7 +139,7 @@ public class WTDIMS {
return wordIndex;
}
- public void setWordIndex(int wordIndex) {
+ private void setWordIndex(int wordIndex) {
this.wordIndex = wordIndex;
}
@@ -147,7 +147,7 @@ public class WTDIMS {
return lemmas;
}
- public void setLemmas(String[] lemmas) {
+ private void setLemmas(String[] lemmas) {
this.lemmas = lemmas;
}
@@ -163,7 +163,7 @@ public class WTDIMS {
return senseIDs;
}
- public void setSenseIDs(String[] senseIDs) {
+ private void setSenseIDs(String[] senseIDs) {
this.senseIDs = senseIDs;
}
@@ -201,7 +201,7 @@ public class WTDIMS {
return posOfSurroundingWords;
}
- public void setPosOfSurroundingWords(String[] posOfSurroundingWords) {
+ protected void setPosOfSurroundingWords(String[] posOfSurroundingWords) {
this.posOfSurroundingWords = posOfSurroundingWords;
}
@@ -209,7 +209,7 @@ public class WTDIMS {
return surroundingWords;
}
- public void setSurroundingWords(String[] surroundingWords) {
+ protected void setSurroundingWords(String[] surroundingWords) {
this.surroundingWords = surroundingWords;
}
@@ -217,7 +217,7 @@ public class WTDIMS {
return localCollocations;
}
- public void setLocalCollocations(String[] localCollocations) {
+ protected void setLocalCollocations(String[] localCollocations) {
this.localCollocations = localCollocations;
}
@@ -225,7 +225,7 @@ public class WTDIMS {
return this.features;
}
- public void setFeatures(String[] features) {
+ protected void setFeatures(String[] features) {
this.features = features;
}
diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
index 91c2d48..118c306 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
@@ -30,7 +30,12 @@ import net.sf.extjwnl.data.Synset;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-// TODO extend Word instead
+/**
+ * A container class that holds a token (word) and it's related POS tag.
+ * It provides methods to get {@link Synset synsets} or stem forms for it.
+ *
+ * @see POS
+ */
public class WordPOS {
private static final Logger LOG = LoggerFactory.getLogger(WordPOS.class);
@@ -45,6 +50,7 @@ public class WordPOS {
*
* @param word The token to use. It must not be {@code null}.
* @param tag The POS tag to use. It must not be {@code null}.
+ *
* @throws IllegalArgumentException Thrown if parameters are invalid.
*/
public WordPOS(String word, String tag) {
@@ -56,6 +62,7 @@ public class WordPOS {
*
* @param word The token to use. It must not be {@code null} and not be
empty.
* @param pos The {@link POS pos tag} to use. It must not be {@code null}.
+ *
* @throws IllegalArgumentException Thrown if parameters are invalid.
*/
public WordPOS(String word, POS pos) {
@@ -108,7 +115,13 @@ public class WordPOS {
return Collections.emptyList();
}
- // uses Stemming to check if two words are equivalent
+ /**
+ * Applies stemming to check whether {@code wordToCompare} is equivalent
+ * to the current {@link WordPOS} instance.
+ *
+ * @param wordToCompare The {@link WordPOS} instance to compare against.
+ */
+ // TODO check the performance of the current implementation (!)
public boolean isStemEquivalent(WordPOS wordToCompare) {
// check if there is intersection in the stems;
List<String> listToCompare = wordToCompare.getStems();
diff --git
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
index da52e9a..91a6863 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
@@ -19,51 +19,95 @@
package opennlp.tools.disambiguator;
+/**
+ * Represents a type that associates {@link WSDSample samples} with a
+ * {@link SynNode syn node} which holds one or more senses.
+ * Elements of this type can be scored and thereby be ranked. This is
+ * why {@link WordSense} implements {@link Comparable}.
+ *
+ * @see SynNode
+ * @see WSDSample
+ */
public class WordSense implements Comparable<WordSense> {
- private final SynNode node;
private final int id;
+ private final SynNode node;
private WSDSample sample;
private double score;
+ /**
+ * Initializes a {@link WordSense} via a numerical {@code id} and
+ * a {@link SynNode} instance.
+ *
+ * @param id Must be a positive number.
+ * @param node The {@link SynNode node} to link senses to.
+ */
public WordSense(int id, SynNode node) {
this.id = id;
this.node = node;
}
+ /**
+ * Initializes a {@link WordSense} via a {@link WSDSample} and
+ * a {@link SynNode} instance.
+ *
+ * @param sample The {@link WSDSample word sample} to associate.
+ * @param node The {@link SynNode node} to link senses to.
+ */
public WordSense(WSDSample sample, SynNode node) {
this(sample.getSenseID(), node);
this.sample = sample;
}
+ /**
+ * @return Retrieves to numerical identifier.
+ */
public int getId() {
return id;
}
+ /**
+ * @return Retrieves the gloss available via WordNet.
+ */
public String getGloss() {
return node.getGloss();
}
+ /**
+ * @return Retrieves {@link SynNode syn node} instance.
+ */
public SynNode getNode() {
return node;
}
+ /**
+ * @return Retrieves {@link WSDSample word sample} instance.
+ */
public WSDSample getWSDSample() {
return sample;
}
- public void setWSDSample(WSDSample sample) {
+ void setWSDSample(WSDSample sample) {
this.sample = sample;
}
+ /**
+ * @return Retrieves numerical score. The value might be undefined, aka not
set.
+ */
public double getScore() {
return score;
}
- public void setScore(double score) {
+ /**
+ * @param score The score to assign. No restrictions on this parameter.
+ */
+ void setScore(double score) {
this.score = score;
}
+ /**
+ * {@inheritDoc}
+ */
@Override
public int compareTo(WordSense o) {
return Double.compare(this.score, o.score);