This is an automated email from the ASF dual-hosted git repository. mawiesne pushed a commit to branch OPENNLP-789-Extend-JavaDoc-for-WSD-component in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
commit ef94ce615bbe34d21b4aeb89177e3ca3847e888a Author: Martin Wiesner <[email protected]> AuthorDate: Thu Dec 19 16:44:04 2024 +0100 OPENNLP-789 Extend JavaDoc for WSD component --- .../disambiguator/AbstractWSDisambiguator.java | 8 +- .../opennlp/tools/disambiguator/Disambiguator.java | 43 ++-- .../tools/disambiguator/FeaturesExtractor.java | 51 ++-- .../disambiguator/IMSWSDContextGenerator.java | 13 +- .../disambiguator/IMSWSDSequenceValidator.java | 13 + .../java/opennlp/tools/disambiguator/Lesk.java | 269 +++++++-------------- .../tools/disambiguator/LeskParameters.java | 23 +- .../main/java/opennlp/tools/disambiguator/MFS.java | 4 - .../java/opennlp/tools/disambiguator/SynNode.java | 13 +- .../tools/disambiguator/WSDContextGenerator.java | 6 +- .../tools/disambiguator/WSDDefaultParameters.java | 47 ++-- .../tools/disambiguator/WSDEvaluationMonitor.java | 5 +- .../opennlp/tools/disambiguator/WSDEvaluator.java | 21 +- .../opennlp/tools/disambiguator/WSDHelper.java | 4 + .../java/opennlp/tools/disambiguator/WSDModel.java | 12 + .../opennlp/tools/disambiguator/WSDParameters.java | 22 +- .../opennlp/tools/disambiguator/WSDSample.java | 10 +- .../disambiguator/WSDisambiguatorFactory.java | 11 + .../java/opennlp/tools/disambiguator/WTDIMS.java | 18 +- .../java/opennlp/tools/disambiguator/WordPOS.java | 17 +- .../opennlp/tools/disambiguator/WordSense.java | 50 +++- 21 files changed, 357 insertions(+), 303 deletions(-) diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/AbstractWSDisambiguator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/AbstractWSDisambiguator.java index de8cbad..bff03d0 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/AbstractWSDisambiguator.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/AbstractWSDisambiguator.java @@ -33,9 +33,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * A base implementation of {@link Disambiguator} + * A base implementation of {@link Disambiguator}. + * <p> + * Hint:<br/> + * Examples on how to use different implementation approaches are provided + * in the corresponding tests. * - * @implNote Examples on how to use each approach are provided in the test section. + * @implNote For the moment the source of sense definitions is from WordNet. * * @see Disambiguator * @see WSDParameters diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Disambiguator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Disambiguator.java index ecabbdf..0cda028 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Disambiguator.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Disambiguator.java @@ -24,21 +24,29 @@ import opennlp.tools.util.Span; import java.util.List; /** - * A word sense disambiguator that determines which sense of a word is meant in - * a particular context. It is a classification task, where the classes are the - * different senses of the ambiguous word. Disambiguation can be achieved in - * either supervised or un-supervised approaches. A {@link Disambiguator} returns - * a sense ID. + * Describes a word sense disambiguator that determines which sense of a word is + * meant in a particular context. + * It is a classification task, where the classes are the different senses of + * the ambiguous word. Disambiguation can be achieved in either supervised or + * un-supervised approaches. A {@link Disambiguator} returns a sense ID. * <p> * <b>How it works:</b><br/> - * Just supply the context as an array of tokens and the - * index of the target word to the disambiguate method. + * Just supply the {@code context} as an array of tokens and the index of the + * {@code target word} to the disambiguate method. * <p> - * Otherwise, for multiple words, you can set a word span instead of simply one - * index. For the moment the source of sense definitions is from WordNet. + * Otherwise, for multiple words, you can set a word {@link Span} instead of + * a single target index. */ public interface Disambiguator { + /** + * Conducts disambiguation for a {@link WSDSample} context. + * + * @param sample The {@link WSDSample} containing the word and POS tags to use. + * @return The sense of the {@code sample} to disambiguate. + */ + String disambiguate(WSDSample sample); + /** * Conducts disambiguation for a single word located at {@code ambiguousTokenIndex}. * @@ -47,26 +55,27 @@ public interface Disambiguator { * @param lemmas The lemmas of ALL the words in the context. * @param ambiguousTokenIndex The index of the word to disambiguate. * Must not be less or equal to zero. - * @return The senses of the word to disambiguate. + * @return The sense of the word to disambiguate. */ String disambiguate(String[] tokenizedContext, String[] tokenTags, String[] lemmas, int ambiguousTokenIndex); /** - * Conducts disambiguation for a single word located at {@code ambiguousTokenIndex}. + * Conducts disambiguation for all word located at {@code ambiguousTokenSpan}. * * @param tokenizedContext The text containing the word to disambiguate. * @param tokenTags The tags corresponding to the context. * @param lemmas The lemmas of ALL the words in the context. * @param ambiguousTokenSpan The {@link Span} of the word(s) to disambiguate. * Must not be {@code null}. - * @return The senses of the word to disambiguate + * @return A List of senses, each corresponding to the senses of each word of + * the context which are to be disambiguated. */ List<String> disambiguate(String[] tokenizedContext, String[] tokenTags, String[] lemmas, Span ambiguousTokenSpan); /** - * Conducts disambiguation for all the words of the context. + * Conducts disambiguation for all the words of the {@code tokenizedContext}. * * @param tokenizedContext The text containing the word to disambiguate. * @param tokenTags The tags corresponding to the context. @@ -77,12 +86,4 @@ public interface Disambiguator { List<String> disambiguate(String[] tokenizedContext, String[] tokenTags, String[] lemmas); - /** - * Conducts disambiguation for a {@link WSDSample} context. - * - * @param sample The {@link WSDSample} containing the word and POS tags to use. - * @return The senses of the {@code sample} to disambiguate. - */ - String disambiguate(WSDSample sample); - } diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java index 3008297..2ce9fe6 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java @@ -40,9 +40,12 @@ import java.util.Map; * </ul> * * The first methods serve to extract the features for the IMS algorithm. Three - * families of features are to be extracted: - PoS of Surrounding Words: it - * requires one parameter: "Window size" - Surrounding Words: no parameters are - * required - Local Collocations: it requires one parameter: "the n-gram" + * families of features are to be extracted: + * <ul> + * <li>PoS of Surrounding Words: it requires one parameter: "Window size"</li> + * <li>Surrounding Words: no parameters are required</li> + * <li>Local Collocations: it requires one parameter: the "n-gram" number</li> + * </ul> * * @see WTDIMS * @see <a href="https://aclanthology.org/P10-4014.pdf"> @@ -52,7 +55,7 @@ import java.util.Map; public class FeaturesExtractor { /* - * Extracts POS tags of surrounding words of a given WTDIMS instance. + * Extracts POS tags of surrounding words of a given wordToDisambiguate instance. */ private String[] extractPosOfSurroundingWords(WTDIMS wordToDisambiguate, int windowSize) { @@ -75,7 +78,7 @@ public class FeaturesExtractor { } /* - * Extracts surrounding lemmas of a given WTDIMS instance. + * Extracts surrounding lemmas of a given wordToDisambiguate instance. * Irrelevant stop words are skipped. */ private String[] extractSurroundingWords(WTDIMS wordToDisambiguate) { @@ -131,13 +134,13 @@ public class FeaturesExtractor { } /** - * Generates the full list of surrounding words, from the - * training data. These data will be later used for the generation of the - * features qualified of "Surrounding words". + * Generates the full list of surrounding words for the specified + * {@code trainingData}. + * These data will be used for the generation of features + * qualified for "Surrounding words". * - * @param trainingData - * list of the training samples (type {@link WTDIMS} - * @return the list of all the surrounding words from all the training data + * @param trainingData A list of the training samples (type {@link WTDIMS}. + * @return A list of all the surrounding words for the {@code trainingData}. */ public List<String> extractTrainingSurroundingWords(List<WTDIMS> trainingData) { @@ -155,31 +158,43 @@ public class FeaturesExtractor { } /** - * This method generates the different set of features related to the IMS - * approach and store them in the corresponding attributes of the {@link WTDIMS}. + * Generates the different set of features related to the IMS + * approach and puts them in the corresponding attributes of + * the {@link WTDIMS word to disambiguate} object. * * @param wtd The {@link WTDIMS word to disambiguate}. * @param windowSize The parameter required to generate the features qualified of * "PoS of Surrounding Words". * @param ngram The parameter required to generate the features qualified of * "Local Collocations". + * + * @throws IllegalArgumentException Thrown if parameters were invalid. */ public void extractIMSFeatures(WTDIMS wtd, int windowSize, int ngram) { + if (wtd == null) { + throw new IllegalArgumentException("WTD must not be null"); + } wtd.setPosOfSurroundingWords(extractPosOfSurroundingWords(wtd, windowSize)); wtd.setSurroundingWords(extractSurroundingWords(wtd)); wtd.setLocalCollocations(extractLocalCollocations(wtd, ngram)); } /** - * This generates the context of IMS. It supposes that the features have - * already been extracted and stored in the {@link WTDIMS} object, therefore it - * doesn't require any parameters. + * Generates the context for the {@link WTDIMS word to disambiguate}. + * + * @implNote It is assumed that the features have already been extracted and + * wrapped in the {@link WTDIMS word to disambiguate}. + * Therefore, it doesn't require any parameters. * - * @param wtd The {@link WTDIMS wtd to disambiguate}. + * @param wtd The {@link WTDIMS word to disambiguate}. * @param listSurrWords The full list of surrounding words of the training data. + * + * @throws IllegalArgumentException Thrown if parameters were invalid. */ public void serializeIMSFeatures(WTDIMS wtd, List<String> listSurrWords) { - + if (wtd == null) { + throw new IllegalArgumentException("WTD must not be null"); + } String[] posOfSurroundingWords = wtd.getPosOfSurroundingWords(); List<String> surroundingWords = new ArrayList<>( Arrays.asList(wtd.getSurroundingWords())); diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java index fe89933..76d50c8 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDContextGenerator.java @@ -24,12 +24,17 @@ import java.util.List; import java.util.Set; /** - * The default Context Generator of the IMS approach. + * The default Context Generator of the + * <a href="https://aclanthology.org/P10-4014.pdf"> IMS (It Makes Sense)</a> approach. * * @see WSDContextGenerator */ public class IMSWSDContextGenerator implements WSDContextGenerator { + /* + * Extracts POS tags of surrounding words for the word at the specified index + * within the windowSize. + */ private String[] extractPosOfSurroundingWords(int index, String[] tags, int windowSize) { String[] windowTags = new String[2 * windowSize + 1]; @@ -96,6 +101,9 @@ public class IMSWSDContextGenerator implements WSDContextGenerator { return res; } + /** + * {@inheritDoc} + */ @Override public String[] getContext(int index, String[] tokens, String[] tags, String[] lemmas, int ngram, int windowSize, List<String> model) { @@ -131,6 +139,9 @@ public class IMSWSDContextGenerator implements WSDContextGenerator { return serializedFeatures; } + /** + * {@inheritDoc} + */ @Override public String[] getContext(WSDSample sample, int ngram, int windowSize, List<String> model) { diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDSequenceValidator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDSequenceValidator.java index 0818447..c910fea 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDSequenceValidator.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/IMSWSDSequenceValidator.java @@ -17,6 +17,10 @@ package opennlp.tools.disambiguator; +/** + * A {@link opennlp.tools.util.SequenceValidator} variant to check whether + * IMS-based sequence outcomes are valid or not. + */ public class IMSWSDSequenceValidator { private boolean validOutcome(String outcome, String prevOutcome) { @@ -43,6 +47,15 @@ public class IMSWSDSequenceValidator { return validOutcome(outcome, prevOutcome); } + /** + * Validates the provided {@code outcome} is valid in the context of + * the sequence {@code s}. + * + * @param outcome The candidate result to check. + * @param s The tokens that cover the sequence {@code outcome} + * shall be valid for. + * @return {@code True} if {@code outcome} is valid, {@code false} otherwise. + */ public boolean validSequence(String outcome, String[] s) { return validOutcome(outcome, s); } diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java index 19e7ac7..ea2a8e4 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Lesk.java @@ -36,7 +36,22 @@ import opennlp.tools.tokenize.Tokenizer; * The main idea is to check for word overlaps in the sense definitions * of the surrounding context. An overlap is when two words have similar stems. * The more overlaps a word has the higher its score. Different variations of - * the approach are included in this class. + * the approach are included in this class, as defined in {@link LeskParameters.LeskType}. + * <p> + * Ten features are possible for Lesk. + * <ul> + * <li>0: Synonyms</li> + * <li>1: Hypernyms</li> + * <li>2: Hyponyms</li> + * <li>3: Meronyms</li> + * <li>4: Holonyms</li> + * <li>5: Entailments</li> + * <li>6: Coordinate Terms</li> + * <li>7: Causes</li> + * <li>8: Attributes</li> + * <li>9: Pertainyms</li> + * </ul> + * Those are defined via {@link LeskParameters#features}. * * @see Disambiguator * @see LeskParameters @@ -90,7 +105,7 @@ public class Lesk extends AbstractWSDisambiguator { if (params.areValid()) { this.params = (LeskParameters) params; } else { - throw new IllegalArgumentException("wrong params"); + throw new IllegalArgumentException("Detected incorrect LeskParameter values!"); } } } @@ -105,10 +120,9 @@ public class Lesk extends AbstractWSDisambiguator { /** * The basic Lesk method where the entire context is considered for overlaps - * - * @param sample - * the word sample to disambiguate - * @return The array of WordSenses with their scores + * + * @param sample The {@link WSDSample} to disambiguate. + * @return The list of {@link WordSense word senses} with their scores. */ public List<WordSense> basic(WSDSample sample) { @@ -150,10 +164,9 @@ public class Lesk extends AbstractWSDisambiguator { /** * The basic Lesk method but applied to a default context windows - * - * @param sample - * the word sample to disambiguate - * @return The array of WordSenses with their scores + * + * @param sample The {@link WSDSample} to disambiguate. + * @return The list of {@link WordSense word senses} with their scores. */ public List<WordSense> basicContextual(WSDSample sample) { @@ -207,10 +220,9 @@ public class Lesk extends AbstractWSDisambiguator { * An extended version of the Lesk approach that takes into consideration * semantically related feature overlaps across the entire context The scoring * function uses linear weights. - * - * @param sample - * the word sample to disambiguate - * @return the list of WordSenses with their scores + * + * @param sample The {@link WSDSample} to disambiguate. + * @return The list of {@link WordSense word senses} with their scores. */ public List<WordSense> extended(WSDSample sample) { params.setWinBSize(0); @@ -222,10 +234,9 @@ public class Lesk extends AbstractWSDisambiguator { * An extended version of the Lesk approach that takes into consideration * semantically related feature overlaps in a default context window The * scoring function uses linear weights. - * - * @param sample - * the word sample to disambiguate - * @return the list of WordSenses with their scores + * + * @param sample The {@link WSDSample} to disambiguate. + * @return The list of {@link WordSense word senses} with their scores. */ public List<WordSense> extendedContextual(WSDSample sample) { List<WordSense> scoredSenses; @@ -234,81 +245,65 @@ public class Lesk extends AbstractWSDisambiguator { } else { scoredSenses = basicContextual(sample); } - for (WordSense wordSense : scoredSenses) { - - if (getParams().getFeatures()[0]) { - wordSense.setScore(wordSense.getScore() + getParams().depth_weight - * assessSynonyms(wordSense.getNode().getSynonyms(), contextWords)); + for (WordSense ws : scoredSenses) { + final SynNode synNode = ws.getNode(); + final Synset synset = synNode.synset; + if (params.getFeatures()[0]) { + ws.setScore(ws.getScore() + params.depth_weight + * assessSynonyms(synNode.getSynonyms(), contextWords)); } - - if (getParams().getFeatures()[1]) { - fathomHypernyms(wordSense, wordSense.getNode().synset, contextWords, + if (params.getFeatures()[1]) { + fathomHypernyms(ws, synset, contextWords, params.depth, params.depth, params.depth_weight); } - - if (getParams().getFeatures()[2]) { - fathomHyponyms(wordSense, wordSense.getNode().synset, contextWords, + if (params.getFeatures()[2]) { + fathomHyponyms(ws, synset, contextWords, params.depth, params.depth, params.depth_weight); } - - if (getParams().getFeatures()[3]) { - fathomMeronyms(wordSense, wordSense.getNode().synset, contextWords, + if (params.getFeatures()[3]) { + fathomMeronyms(ws, synset, contextWords, params.depth, params.depth, params.depth_weight); - } - - if (getParams().getFeatures()[4]) { - fathomHolonyms(wordSense, wordSense.getNode().synset, contextWords, + if (params.getFeatures()[4]) { + fathomHolonyms(ws, synset, contextWords, params.depth, params.depth, params.depth_weight); - } - - if (getParams().getFeatures()[5]) { - fathomEntailments(wordSense, wordSense.getNode().synset, contextWords, + if (params.getFeatures()[5]) { + fathomEntailments(ws, synset, contextWords, params.depth, params.depth, params.depth_weight); - } - if (getParams().getFeatures()[6]) { - fathomCoordinateTerms(wordSense, wordSense.getNode().synset, + if (params.getFeatures()[6]) { + fathomCoordinateTerms(ws, synset, contextWords, params.depth, params.depth, params.depth_weight); - } - if (getParams().getFeatures()[7]) { - fathomCauses(wordSense, wordSense.getNode().synset, contextWords, + if (params.getFeatures()[7]) { + fathomCauses(ws, synset, contextWords, params.depth, params.depth, params.depth_weight); - } - if (getParams().getFeatures()[8]) { - fathomAttributes(wordSense, wordSense.getNode().synset, contextWords, + if (params.getFeatures()[8]) { + fathomAttributes(ws, synset, contextWords, params.depth, params.depth, params.depth_weight); - } - if (getParams().getFeatures()[9]) { - fathomPertainyms(wordSense, wordSense.getNode().synset, contextWords, + if (params.getFeatures()[9]) { + fathomPertainyms(ws, synset, contextWords, params.depth, params.depth, params.depth_weight); - } - } - return scoredSenses; - } /** * An extended version of the Lesk approach that takes into consideration * semantically related feature overlaps in all the context. The scoring * function uses exponential weights. - * - * @param sample the word sample to disambiguate - * - * @return A list of {@link WordSense word senses} with their scores. + * + * @param sample The {@link WSDSample} to disambiguate. + * @return The list of {@link WordSense word senses} with their scores. */ public List<WordSense> extendedExponential(WSDSample sample) { params.setWinBSize(0); params.setWinFSize(0); return extendedExponentialContextual(sample); - } /** @@ -316,9 +311,8 @@ public class Lesk extends AbstractWSDisambiguator { * semantically related feature overlaps in a custom window in the context. * The scoring function uses exponential weights. * - * @param sample - * the word sample to disambiguate - * @return the list of WordSenses with their scores + * @param sample The {@link WSDSample} to disambiguate. + * @return The list of {@link WordSense word senses} with their scores. */ public List<WordSense> extendedExponentialContextual(WSDSample sample) { List<WordSense> scoredSenses; @@ -328,74 +322,53 @@ public class Lesk extends AbstractWSDisambiguator { scoredSenses = basicContextual(sample); } - for (WordSense wordSense : scoredSenses) { - + for (WordSense ws : scoredSenses) { + final SynNode synNode = ws.getNode(); + final Synset synset = synNode.synset; if (params.features[0]) { - wordSense.setScore(wordSense.getScore() + Math.pow( - assessSynonyms(wordSense.getNode().getSynonyms(), contextWords), - params.iexp)); + ws.setScore(ws.getScore() + Math.pow(assessSynonyms( + synNode.getSynonyms(), contextWords), params.iexp)); } - if (params.features[1]) { - fathomHypernymsExponential(wordSense, wordSense.getNode().synset, - contextWords, params.depth, params.depth, params.iexp, params.dexp); + fathomHypernymsExponential(ws, synset, contextWords, + params.depth, params.depth, params.iexp, params.dexp); } - if (params.features[2]) { - fathomHyponymsExponential(wordSense, wordSense.getNode().synset, - contextWords, params.depth, params.depth, params.iexp, params.dexp); + fathomHyponymsExponential(ws, synset, contextWords, + params.depth, params.depth, params.iexp, params.dexp); } - if (params.features[3]) { - fathomMeronymsExponential(wordSense, wordSense.getNode().synset, - contextWords, params.depth, params.depth, params.iexp, params.dexp); - + fathomMeronymsExponential(ws, synset, contextWords, + params.depth, params.depth, params.iexp, params.dexp); } - if (params.features[4]) { - fathomHolonymsExponential(wordSense, wordSense.getNode().synset, - contextWords, params.depth, params.depth, params.iexp, params.dexp); - + fathomHolonymsExponential(ws, synset, contextWords, + params.depth, params.depth, params.iexp, params.dexp); } - if (params.features[5]) { - fathomEntailmentsExponential(wordSense, wordSense.getNode().synset, - contextWords, params.depth, params.depth, params.iexp, params.dexp); + fathomEntailmentsExponential(ws, synset, contextWords, + params.depth, params.depth, params.iexp, params.dexp); } - if (params.features[6]) { - fathomCoordinateTermsExponential(wordSense, wordSense.getNode().synset, - contextWords, params.depth, params.depth, params.iexp, params.dexp); - + fathomCoordinateTermsExponential(ws, synset, contextWords, + params.depth, params.depth, params.iexp, params.dexp); } if (params.features[7]) { - fathomCausesExponential(wordSense, wordSense.getNode().synset, - contextWords, params.depth, params.depth, params.iexp, params.dexp); - + fathomCausesExponential(ws, synset, contextWords, + params.depth, params.depth, params.iexp, params.dexp); } if (params.features[8]) { - fathomAttributesExponential(wordSense, wordSense.getNode().synset, - contextWords, params.depth, params.depth, params.iexp, params.dexp); - + fathomAttributesExponential(ws, synset, contextWords, + params.depth, params.depth, params.iexp, params.dexp); } if (params.features[9]) { - fathomPertainymsExponential(wordSense, wordSense.getNode().synset, - contextWords, params.depth, params.depth, params.iexp, params.dexp); + fathomPertainymsExponential(ws, synset, contextWords, + params.depth, params.depth, params.iexp, params.dexp); } } return scoredSenses; } - /** - * Recursively score the hypernym tree linearly. - * - * @param wordSense - * @param child - * @param relvWords - * @param depth - * @param maxDepth - * @param depthScoreWeight - */ private void fathomHypernyms(WordSense wordSense, Synset child, List<WordPOS> relvWords, int depth, int maxDepth, double depthScoreWeight) { if (depth == 0) @@ -415,17 +388,6 @@ public class Lesk extends AbstractWSDisambiguator { } } - /** - * Recursively score the hypernym tree exponentially. - * - * @param wordSense - * @param child - * @param relvWords - * @param depth - * @param maxDepth - * @param intersectionExponent - * @param depthScoreExponent - */ private void fathomHypernymsExponential(WordSense wordSense, Synset child, List<WordPOS> relvWords, int depth, int maxDepth, double intersectionExponent, double depthScoreExponent) { @@ -446,16 +408,6 @@ public class Lesk extends AbstractWSDisambiguator { } } - /** - * Recursively score the hyponym tree linearly. - * - * @param wordSense - * @param child - * @param relvWords - * @param depth - * @param maxDepth - * @param depthScoreWeight - */ private void fathomHyponyms(WordSense wordSense, Synset child, List<WordPOS> relvWords, int depth, int maxDepth, double depthScoreWeight) { if (depth == 0) @@ -476,17 +428,6 @@ public class Lesk extends AbstractWSDisambiguator { } } - /** - * Recursively score the hyponym tree exponentially. - * - * @param wordSense - * @param child - * @param relvWords - * @param depth - * @param maxDepth - * @param intersectionExponent - * @param depthScoreExponent - */ private void fathomHyponymsExponential(WordSense wordSense, Synset child, List<WordPOS> relvWords, int depth, int maxDepth, double intersectionExponent, double depthScoreExponent) { if (depth == 0) @@ -508,16 +449,6 @@ public class Lesk extends AbstractWSDisambiguator { } } - /** - * Recursively score the meronym tree linearly. - * - * @param wordSense - * @param child - * @param relvWords - * @param depth - * @param maxDepth - * @param depthScoreWeight - */ private void fathomMeronyms(WordSense wordSense, Synset child, List<WordPOS> relvWords, int depth, int maxDepth, double depthScoreWeight) { if (depth == 0) @@ -539,17 +470,6 @@ public class Lesk extends AbstractWSDisambiguator { } } - /** - * Recursively score the meronym tree exponentially. - * - * @param wordSense - * @param child - * @param relvWords - * @param depth - * @param maxDepth - * @param intersectionExponent - * @param depthScoreExponent - */ private void fathomMeronymsExponential(WordSense wordSense, Synset child, List<WordPOS> relvWords, int depth, int maxDepth, double intersectionExponent, double depthScoreExponent) { if (depth == 0) @@ -569,16 +489,6 @@ public class Lesk extends AbstractWSDisambiguator { } } - /** - * Recursively score the holonym tree linearly. - * - * @param wordSense - * @param child - * @param relvWords - * @param depth - * @param maxDepth - * @param depthScoreWeight - */ private void fathomHolonyms(WordSense wordSense, Synset child, List<WordPOS> relvWords, int depth, int maxDepth, double depthScoreWeight) { if (depth == 0) @@ -598,17 +508,6 @@ public class Lesk extends AbstractWSDisambiguator { } } - /** - * Recursively score the holonym tree exponentially. - * - * @param wordSense - * @param child - * @param relvWords - * @param depth - * @param maxDepth - * @param intersectionExponent - * @param depthScoreExponent - */ private void fathomHolonymsExponential(WordSense wordSense, Synset child, List<WordPOS> relvWords, int depth, int maxDepth, double intersectionExponent, double depthScoreExponent) { if (depth == 0) @@ -829,7 +728,7 @@ public class Lesk extends AbstractWSDisambiguator { } - /** + /* * Checks if the feature should be counted in the score. * * @param featureSynsets @@ -839,7 +738,7 @@ public class Lesk extends AbstractWSDisambiguator { private int assessFeature(List<Synset> featureSynsets, List<WordPOS> relevantWords) { int count = 0; for (Synset synset : featureSynsets) { - SynNode subNode = new SynNode(synset, relevantWords); + final SynNode subNode = new SynNode(synset, relevantWords); String[] tokenizedSense = tokenizer.tokenize(subNode.getGloss()); List<WordPOS> relvSenseWords = WSDHelper.getAllRelevantWords(tokenizedSense); @@ -855,7 +754,7 @@ public class Lesk extends AbstractWSDisambiguator { return count; } - /** + /* * Checks if the synonyms should be counted in the score. * * @param synonyms diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java index a3d275f..deaba4f 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/LeskParameters.java @@ -20,14 +20,14 @@ package opennlp.tools.disambiguator; /** - * Lesk specific parameter set. + * Lesk specific {@link WSDParameters parameter set}. * * @see WSDParameters */ public class LeskParameters extends WSDParameters { /** - * Enum of all types of implemented variations of Lesk + * Enum of all types of implemented variations of Lesk. */ public enum LeskType { LESK_BASIC, LESK_BASIC_CTXT, LESK_EXT, LESK_EXT_CTXT, LESK_EXT_EXP, LESK_EXT_EXP_CTXT @@ -77,7 +77,7 @@ public class LeskParameters extends WSDParameters { return type; } - public void setType(LeskType type) { + void setType(LeskType type) { this.type = type; } @@ -85,7 +85,7 @@ public class LeskParameters extends WSDParameters { return winFSize; } - public void setWinFSize(int winFSize) { + void setWinFSize(int winFSize) { this.winFSize = winFSize; } @@ -93,7 +93,7 @@ public class LeskParameters extends WSDParameters { return winBSize; } - public void setWinBSize(int winBSize) { + void setWinBSize(int winBSize) { this.winBSize = winBSize; } @@ -101,7 +101,7 @@ public class LeskParameters extends WSDParameters { return depth; } - public void setDepth(int depth) { + void setDepth(int depth) { this.depth = depth; } @@ -109,7 +109,7 @@ public class LeskParameters extends WSDParameters { return depth_weight; } - public void setDepth_weight(double depth_weight) { + void setDepth_weight(double depth_weight) { this.depth_weight = depth_weight; } @@ -117,7 +117,7 @@ public class LeskParameters extends WSDParameters { return iexp; } - public void setIexp(double iexp) { + void setIexp(double iexp) { this.iexp = iexp; } @@ -125,7 +125,7 @@ public class LeskParameters extends WSDParameters { return dexp; } - public void setDexp(double dexp) { + void setDexp(double dexp) { this.dexp = dexp; } @@ -138,7 +138,7 @@ public class LeskParameters extends WSDParameters { } /** - * Sets default parameters + * Activates default Lesk feature parameters. */ void setDefaults() { setType(LeskParameters.DFLT_LESK_TYPE); @@ -153,6 +153,9 @@ public class LeskParameters extends WSDParameters { { true, true, true, true, true, true, true, true, true, true }; } + /** + * {@inheritDoc} + */ @Override public boolean areValid() { diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/MFS.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/MFS.java index 966c3a5..1d51f3b 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/MFS.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/MFS.java @@ -42,10 +42,6 @@ public class MFS extends AbstractWSDisambiguator { public static final String NONESENSE = "nonesense"; - public MFS() { - super(); - } - /** * Extracts the most frequent sense for a specified {@link WSDSample}. * diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java index 28517fb..87641a0 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java @@ -34,6 +34,9 @@ import opennlp.tools.tokenize.Tokenizer; /** * Convenience class to access some features via {@link Synset synsets}. + * A {@link Synset} represents a concept, and contains a set of words, each of which + * has a sense for that concept. Each element is thus synonymous with the other + * words contained in the Synset. * * @see Synset */ @@ -80,6 +83,9 @@ public class SynNode { this.parent = parent; } + /** + * @return Retrieves the associated {@link WordPOS elements}. + */ public List<WordPOS> getSenseRelevantWords() { return senseRelevantWords; } @@ -246,10 +252,10 @@ public class SynNode { } /** - * Gets the senses of the nodes + * Retrieves associated {@link WordSense senses} for specified {@code nodes}. * - * @param nodes - * @return senses from the nodes + * @param nodes A list of {@link SynNode nodes} to score. + * @return The {@link WordSense senses} obtained for the {@code nodes}. */ public static List<WordSense> updateSenses(List<SynNode> nodes) { List<WordSense> scoredSenses = new ArrayList<>(); @@ -262,6 +268,5 @@ public class SynNode { scoredSenses.add(wordSense); } return scoredSenses; - } } diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDContextGenerator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDContextGenerator.java index 14ba92f..10581ed 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDContextGenerator.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDContextGenerator.java @@ -23,7 +23,7 @@ import java.util.List; import java.util.regex.Pattern; /** - * Interface for {@link AbstractWSDisambiguator} context generators. + * Describes a context generator for word sense disambiguation. */ public interface WSDContextGenerator { @@ -39,7 +39,7 @@ public interface WSDContextGenerator { * @param ngram The ngram to consider for context. Must be greater than {@code 0}. * @param windowSize The context window. Must be greater than {@code 0}. * @param model The list of unigrams. - * @return The IMS context of the word to disambiguate. + * @return The context of the word to disambiguate at {@code index} in {@code toks}. */ String[] getContext(int index, String[] toks, String[] tags, String[] lemmas, int ngram, int windowSize, List<String> model); @@ -51,7 +51,7 @@ public interface WSDContextGenerator { * @param ngram The ngram to consider for context. Must be greater than {@code 0}. * @param windowSize The context window. Must be greater than {@code 0}. * @param model The list of unigrams. - * @return The IMS context of the word to disambiguate. + * @return The context of the word to disambiguate at {@code index} in {@code sample}. */ String[] getContext(WSDSample sample, int ngram, int windowSize, List<String> model); diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java index 9f6c28f..5fce6df 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDDefaultParameters.java @@ -23,32 +23,35 @@ import java.io.File; import java.nio.file.Path; /** - * This class contains the parameters for the IMS approach as well as the + * Defines the parameters for the <a href="https://aclanthology.org/P10-4014.pdf"> + * IMS (It Makes Sense)</a> approach, as well as the * directories containing the files used + * + * @see WSDParameters */ public class WSDDefaultParameters extends WSDParameters { - protected String languageCode; - protected int windowSize; - protected int ngram; + public static final int DFLT_WIN_SIZE = 3; + public static final int DFLT_NGRAM = 2; + public static final String DFLT_LANG_CODE = "en"; + public static final SenseSource DFLT_SOURCE = SenseSource.WORDNET; - protected Path trainingDataDir; + private final Path trainingDataDir; - protected static final int DFLT_WIN_SIZE = 3; - protected static final int DFLT_NGRAM = 2; - protected static final String DFLT_LANG_CODE = "en"; - protected static final SenseSource DFLT_SOURCE = SenseSource.WORDNET; + private final String languageCode; + protected int windowSize; + protected int ngram; /** * Initializes a new set of {@link WSDDefaultParameters}. - * The default language used is <i>English</i>. + * The default language used is '<i>en</i>' (English). * - * @param windowSize the size of the window used for the extraction of the features - * qualified of Surrounding Words - * @param ngram the number words used for the extraction of features qualified of - * Local Collocations - * @param senseSource the source of the training data - * @param trainingDataDir The {@link Path} where to place or lookup trained models. + * @param windowSize The size of the window used for the extraction of the features + * qualified of Surrounding Words. + * @param ngram The number words used for the extraction of features qualified of + * Local Collocations. + * @param senseSource The {@link SenseSource source} of the training data + * @param trainingDataDir The {@link Path} where to store or read trained models from. */ public WSDDefaultParameters(int windowSize, int ngram, SenseSource senseSource, Path trainingDataDir) { this.languageCode = DFLT_LANG_CODE; @@ -65,8 +68,10 @@ public class WSDDefaultParameters extends WSDParameters { /** * Initializes a new set of {@link WSDDefaultParameters}. - * The default language used is <i>English</i>, the window size is {@link #DFLT_WIN_SIZE}, + * The default language used is '<i>en</i>' (English), the window size is {@link #DFLT_WIN_SIZE}, * and the ngram length is initialized as {@link #DFLT_NGRAM}. + * + * @implNote The training directory will be unset. */ public WSDDefaultParameters() { this(DFLT_WIN_SIZE, DFLT_NGRAM, DFLT_SOURCE, null); @@ -74,7 +79,7 @@ public class WSDDefaultParameters extends WSDParameters { /** * Initializes a new set of {@link WSDDefaultParameters}. - * The default language used is <i>English</i>, the window size is {@link #DFLT_WIN_SIZE}, + * The default language used is '<i>en</i>' (English), the window size is {@link #DFLT_WIN_SIZE}, * and the ngram length is initialized as {@link #DFLT_NGRAM}. * * @param trainingDataDir The {@link Path} where to place or lookup trained models. @@ -95,10 +100,16 @@ public class WSDDefaultParameters extends WSDParameters { return ngram; } + /** + * @return The {@link Path} where to place or lookup trained models. May be {@code null}! + */ public Path getTrainingDataDirectory() { return trainingDataDir; } + /** + * {@inheritDoc} + */ @Override public boolean areValid() { return true; diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluationMonitor.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluationMonitor.java index ddc2cac..1775e5c 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluationMonitor.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluationMonitor.java @@ -20,7 +20,10 @@ package opennlp.tools.disambiguator; import opennlp.tools.util.eval.EvaluationMonitor; /** - * A marker interface for evaluating {@link AbstractWSDisambiguator disambiguators}. + * A marker interface for evaluating {@link Disambiguator disambiguators}. + * + * @see WSDSample + * @see EvaluationMonitor */ public interface WSDEvaluationMonitor extends EvaluationMonitor<WSDSample> { diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java index 6b71c79..4e0d502 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java @@ -43,18 +43,19 @@ public class WSDEvaluator extends Evaluator<WSDSample> { private final Disambiguator disambiguator; /** - * Initializes the current instance with the given {@link AbstractWSDisambiguator}. + * Initializes a {@link WSDEvaluator} with the given {@link Disambiguator}. * - * @param disambiguator - * the {@link AbstractWSDisambiguator} to evaluate. - * @param listeners - * evaluation sample listeners + * @param disambiguator The {@link Disambiguator} to evaluate. + * @param listeners The (optional) {@link WSDEvaluationMonitor evaluation sample listeners}. */ public WSDEvaluator(Disambiguator disambiguator, WSDEvaluationMonitor... listeners) { super(listeners); this.disambiguator = disambiguator; } + /** + * {@inheritDoc} + */ @Override protected WSDSample processSample(WSDSample ref) { @@ -82,20 +83,16 @@ public class WSDEvaluator extends Evaluator<WSDSample> { } /** - * Retrieves the WSD accuracy. - * <p> - * This is defined as: WSD accuracy = correctly disambiguated / total words + * @implNote WSD accuracy = correctly disambiguated / total words * - * @return the WSD accuracy + * @return Retrieves the WSD accuracy. */ public double getAccuracy() { return accuracy.mean(); } /** - * Retrieves the total number of words considered in the evaluation. - * - * @return the word count + * @return Retrieves the total number of words considered in the evaluation */ public long getWordCount() { return accuracy.count(); diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java index d462947..3613c0d 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDHelper.java @@ -42,6 +42,10 @@ import opennlp.tools.tokenize.ThreadSafeTokenizerME; import opennlp.tools.tokenize.Tokenizer; import opennlp.tools.util.DownloadUtil; +/** + * A helper class that loads and organizes resources, and provides helper methods + * to avoid multiple copies of dealing with certain resources. + */ public class WSDHelper { private static final Logger LOG = LoggerFactory.getLogger(WSDHelper.class); diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java index 984c41b..87bc3d2 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDModel.java @@ -159,11 +159,17 @@ public class WSDModel extends BaseModel { return getManifestProperty(WORDTAG); } + /** + * {@inheritDoc} + */ @Override protected Class<? extends BaseToolFactory> getDefaultFactory() { return WSDisambiguatorFactory.class; } + /** + * {@inheritDoc} + */ @Override protected void validateArtifactMap() throws InvalidFormatException { super.validateArtifactMap(); @@ -186,11 +192,17 @@ public class WSDModel extends BaseModel { this.contextEntries = Arrays.asList(surroundings.split(",")); } + /** + * {@inheritDoc} + */ @Override public int hashCode() { return Objects.hash(artifactMap.get(MANIFEST_ENTRY), artifactMap.get(WSD_MODEL_ENTRY)); } + /** + * {@inheritDoc} + */ @Override public boolean equals(Object obj) { if (obj == this) { diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java index 36f0abf..912f6a5 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java @@ -20,8 +20,7 @@ package opennlp.tools.disambiguator; /** - * Disambiguation Parameters - * + * Describes a set of parameters to configure word sense disambiguation. */ public abstract class WSDParameters { @@ -32,19 +31,24 @@ public abstract class WSDParameters { protected SenseSource senseSource; /** - * @return if the disambiguation type is coarse grained or fine-grained + * Initializes a default set of {@link WSDParameters} and chooses + * the {@link SenseSource#WORDNET} by default. */ + public WSDParameters() { + this.senseSource = SenseSource.WORDNET; + } + /** + * @return if the disambiguation type is coarse grained or fine-grained + */ public SenseSource getSenseSource() { return senseSource; } - public WSDParameters() { - this.senseSource = SenseSource.WORDNET; - } - - /* - * @return checks if the parameters are valid or not + /** + * Checks if the parameters are valid or not. + * + * @return {@code true} if valid, {@code false} otherwise. */ public abstract boolean areValid(); diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java index 5360a1d..e6984ff 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDSample.java @@ -217,6 +217,9 @@ public class WSDSample implements Sample { return ref; } + /** + * {@inheritDoc} + */ @Override public boolean equals(Object obj) { if (this == obj) { @@ -230,6 +233,9 @@ public class WSDSample implements Sample { } } + /** + * {@inheritDoc} + */ @Override public int hashCode() { int result = Objects.hash(targetPosition); @@ -238,6 +244,9 @@ public class WSDSample implements Sample { return result; } + /** + * {@inheritDoc} + */ @Override public String toString() { StringBuilder result = new StringBuilder(); @@ -255,7 +264,6 @@ public class WSDSample implements Sample { // get rid of last space result.setLength(result.length() - 1); } - return result.toString(); } diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorFactory.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorFactory.java index 5a656b7..f48d628 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorFactory.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguatorFactory.java @@ -21,6 +21,9 @@ import opennlp.tools.util.BaseToolFactory; import opennlp.tools.util.InvalidFormatException; import opennlp.tools.util.ext.ExtensionLoader; +/** + * Implements a word sense disambiguation related {@link BaseToolFactory}. + */ public class WSDisambiguatorFactory extends BaseToolFactory { /** @@ -58,11 +61,19 @@ public class WSDisambiguatorFactory extends BaseToolFactory { } } + /** + * {@inheritDoc} + */ @Override public void validateArtifactMap() throws InvalidFormatException { // no additional artifacts } + /** + * @implNote By default, an {@link IMSWSDContextGenerator} will be instantiated. + * + * @return Retrieves the active {@link WSDContextGenerator}. + */ public WSDContextGenerator getContextGenerator() { // default can be IMS return new IMSWSDContextGenerator(); diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WTDIMS.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WTDIMS.java index d3c5dea..496d543 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WTDIMS.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WTDIMS.java @@ -123,7 +123,7 @@ public class WTDIMS { return sentence; } - public void setSentence(String[] sentence) { + private void setSentence(String[] sentence) { this.sentence = sentence; } @@ -131,7 +131,7 @@ public class WTDIMS { return posTags; } - public void setPosTags(String[] posTags) { + private void setPosTags(String[] posTags) { this.posTags = posTags; } @@ -139,7 +139,7 @@ public class WTDIMS { return wordIndex; } - public void setWordIndex(int wordIndex) { + private void setWordIndex(int wordIndex) { this.wordIndex = wordIndex; } @@ -147,7 +147,7 @@ public class WTDIMS { return lemmas; } - public void setLemmas(String[] lemmas) { + private void setLemmas(String[] lemmas) { this.lemmas = lemmas; } @@ -163,7 +163,7 @@ public class WTDIMS { return senseIDs; } - public void setSenseIDs(String[] senseIDs) { + private void setSenseIDs(String[] senseIDs) { this.senseIDs = senseIDs; } @@ -201,7 +201,7 @@ public class WTDIMS { return posOfSurroundingWords; } - public void setPosOfSurroundingWords(String[] posOfSurroundingWords) { + protected void setPosOfSurroundingWords(String[] posOfSurroundingWords) { this.posOfSurroundingWords = posOfSurroundingWords; } @@ -209,7 +209,7 @@ public class WTDIMS { return surroundingWords; } - public void setSurroundingWords(String[] surroundingWords) { + protected void setSurroundingWords(String[] surroundingWords) { this.surroundingWords = surroundingWords; } @@ -217,7 +217,7 @@ public class WTDIMS { return localCollocations; } - public void setLocalCollocations(String[] localCollocations) { + protected void setLocalCollocations(String[] localCollocations) { this.localCollocations = localCollocations; } @@ -225,7 +225,7 @@ public class WTDIMS { return this.features; } - public void setFeatures(String[] features) { + protected void setFeatures(String[] features) { this.features = features; } diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java index 91c2d48..118c306 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java @@ -30,7 +30,12 @@ import net.sf.extjwnl.data.Synset; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -// TODO extend Word instead +/** + * A container class that holds a token (word) and it's related POS tag. + * It provides methods to get {@link Synset synsets} or stem forms for it. + * + * @see POS + */ public class WordPOS { private static final Logger LOG = LoggerFactory.getLogger(WordPOS.class); @@ -45,6 +50,7 @@ public class WordPOS { * * @param word The token to use. It must not be {@code null}. * @param tag The POS tag to use. It must not be {@code null}. + * * @throws IllegalArgumentException Thrown if parameters are invalid. */ public WordPOS(String word, String tag) { @@ -56,6 +62,7 @@ public class WordPOS { * * @param word The token to use. It must not be {@code null} and not be empty. * @param pos The {@link POS pos tag} to use. It must not be {@code null}. + * * @throws IllegalArgumentException Thrown if parameters are invalid. */ public WordPOS(String word, POS pos) { @@ -108,7 +115,13 @@ public class WordPOS { return Collections.emptyList(); } - // uses Stemming to check if two words are equivalent + /** + * Applies stemming to check whether {@code wordToCompare} is equivalent + * to the current {@link WordPOS} instance. + * + * @param wordToCompare The {@link WordPOS} instance to compare against. + */ + // TODO check the performance of the current implementation (!) public boolean isStemEquivalent(WordPOS wordToCompare) { // check if there is intersection in the stems; List<String> listToCompare = wordToCompare.getStems(); diff --git a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java index da52e9a..91a6863 100644 --- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java +++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java @@ -19,51 +19,95 @@ package opennlp.tools.disambiguator; +/** + * Represents a type that associates {@link WSDSample samples} with a + * {@link SynNode syn node} which holds one or more senses. + * Elements of this type can be scored and thereby be ranked. This is + * why {@link WordSense} implements {@link Comparable}. + * + * @see SynNode + * @see WSDSample + */ public class WordSense implements Comparable<WordSense> { - private final SynNode node; private final int id; + private final SynNode node; private WSDSample sample; private double score; + /** + * Initializes a {@link WordSense} via a numerical {@code id} and + * a {@link SynNode} instance. + * + * @param id Must be a positive number. + * @param node The {@link SynNode node} to link senses to. + */ public WordSense(int id, SynNode node) { this.id = id; this.node = node; } + /** + * Initializes a {@link WordSense} via a {@link WSDSample} and + * a {@link SynNode} instance. + * + * @param sample The {@link WSDSample word sample} to associate. + * @param node The {@link SynNode node} to link senses to. + */ public WordSense(WSDSample sample, SynNode node) { this(sample.getSenseID(), node); this.sample = sample; } + /** + * @return Retrieves to numerical identifier. + */ public int getId() { return id; } + /** + * @return Retrieves the gloss available via WordNet. + */ public String getGloss() { return node.getGloss(); } + /** + * @return Retrieves {@link SynNode syn node} instance. + */ public SynNode getNode() { return node; } + /** + * @return Retrieves {@link WSDSample word sample} instance. + */ public WSDSample getWSDSample() { return sample; } - public void setWSDSample(WSDSample sample) { + void setWSDSample(WSDSample sample) { this.sample = sample; } + /** + * @return Retrieves numerical score. The value might be undefined, aka not set. + */ public double getScore() { return score; } - public void setScore(double score) { + /** + * @param score The score to assign. No restrictions on this parameter. + */ + void setScore(double score) { this.score = score; } + /** + * {@inheritDoc} + */ @Override public int compareTo(WordSense o) { return Double.compare(this.score, o.score);
