This is an automated email from the ASF dual-hosted git repository. mawiesne pushed a commit to branch extract-constants-and-enhance-javadoc-of-opennlp-coref in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
commit 652eec7d3b589cdf7df9ac846fc775e01aad0c54 Author: Martin Wiesner <[email protected]> AuthorDate: Sun Jul 20 21:17:26 2025 +0200 extracts constants of frequently used POS tag strings extracts constants of model names simplifies code in AbstractLinker adds missing @Override annotations enhances JavaDoc along the path fixes typos --- .../cmdline/coref/CoreferenceConverterTool.java | 1 - .../tools/cmdline/coref/TrainingParams.java | 2 + .../java/opennlp/tools/coref/DiscourseElement.java | 2 +- .../opennlp/tools/coref/linker/AbstractLinker.java | 48 +++++++++++----------- .../opennlp/tools/coref/linker/DefaultLinker.java | 18 ++++---- .../java/opennlp/tools/coref/linker/Linker.java | 7 +--- .../opennlp/tools/coref/linker/LinkerMode.java | 6 ++- .../opennlp/tools/coref/mention/HeadFinder.java | 14 ++++--- .../java/opennlp/tools/coref/mention/Mention.java | 5 +-- .../tools/coref/mention/MentionContext.java | 4 +- .../opennlp/tools/coref/mention/MentionFinder.java | 29 +++++++------ .../tools/coref/mention/PTBMentionFinder.java | 14 +++---- .../java/opennlp/tools/coref/mention/Parse.java | 8 ++-- .../coref/mention/ShallowParseMentionFinder.java | 7 +++- .../tools/coref/resolver/AbstractResolver.java | 3 +- .../tools/coref/resolver/CommonNounResolver.java | 12 +++--- .../resolver/DefaultNonReferentialResolver.java | 12 +++--- .../tools/coref/resolver/DefiniteNounResolver.java | 6 ++- .../opennlp/tools/coref/resolver/IsAResolver.java | 2 +- .../tools/coref/resolver/MaxentResolver.java | 10 +++-- .../coref/resolver/NonReferentialResolver.java | 10 +++-- .../tools/coref/resolver/PerfectResolver.java | 10 ++--- .../tools/coref/resolver/PluralNounResolver.java | 12 +++--- .../coref/resolver/PluralPronounResolver.java | 8 ++-- .../tools/coref/resolver/ProperNounResolver.java | 10 +++-- .../opennlp/tools/coref/resolver/Resolver.java | 5 +++ .../tools/coref/resolver/ResolverUtils.java | 8 ++-- .../coref/resolver/SingularPronounResolver.java | 14 ++++--- .../coref/resolver/SpeechPronounResolver.java | 24 ++++++----- .../main/java/opennlp/tools/coref/sim/Context.java | 9 ++-- .../main/java/opennlp/tools/coref/sim/Gender.java | 2 + .../main/java/opennlp/tools/coref/sim/Number.java | 2 + .../java/opennlp/tools/coref/sim/TrainModel.java | 2 +- .../tools/formats/muc/MucCorefContentHandler.java | 18 ++++---- .../tools/lang/english/TreebankNameFinder.java | 22 ++++++---- .../tools/coref/linker/AbstractLinkerTest.java | 2 +- 36 files changed, 208 insertions(+), 160 deletions(-) diff --git a/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/CoreferenceConverterTool.java b/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/CoreferenceConverterTool.java index 4920d18..512a7ea 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/CoreferenceConverterTool.java +++ b/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/CoreferenceConverterTool.java @@ -20,7 +20,6 @@ package opennlp.tools.cmdline.coref; import opennlp.tools.cmdline.AbstractConverterTool; import opennlp.tools.coref.CorefSample; import opennlp.tools.coref.CorefSampleStreamFactory; -import opennlp.tools.postag.POSSample; /** * Tool to convert multiple data formats into native OpenNLP Coref training format. diff --git a/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/TrainingParams.java b/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/TrainingParams.java index efd9034..c54104d 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/TrainingParams.java +++ b/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/TrainingParams.java @@ -18,11 +18,13 @@ package opennlp.tools.cmdline.coref; import opennlp.tools.cmdline.params.BasicTrainingParams; +import opennlp.tools.commons.Internal; /** * TrainingParameters for Co-Referencer * <p> * Note: Do not use this class, internal use only! */ +@Internal interface TrainingParams extends BasicTrainingParams { } diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/DiscourseElement.java b/opennlp-coref/src/main/java/opennlp/tools/coref/DiscourseElement.java index a74ae01..c7ecc81 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/DiscourseElement.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/DiscourseElement.java @@ -48,7 +48,7 @@ public abstract class DiscourseElement { /** * @return Retrieves an {@link Iterator} over the mentions which iterates through them - * based on which were most recently mentioned.. + * based on which were most recently mentioned. */ public Iterator<MentionContext> getRecentMentions() { return new ReverseListIterator<>(extents); diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/linker/AbstractLinker.java b/opennlp-coref/src/main/java/opennlp/tools/coref/linker/AbstractLinker.java index aae7cb3..5b020c1 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/linker/AbstractLinker.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/linker/AbstractLinker.java @@ -19,6 +19,9 @@ package opennlp.tools.coref.linker; import java.io.IOException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import opennlp.tools.coref.DiscourseEntity; import opennlp.tools.coref.DiscourseModel; import opennlp.tools.coref.mention.HeadFinder; @@ -28,10 +31,9 @@ import opennlp.tools.coref.mention.MentionFinder; import opennlp.tools.coref.mention.Parse; import opennlp.tools.coref.resolver.AbstractResolver; import opennlp.tools.coref.sim.Gender; +import opennlp.tools.coref.sim.GenderEnum; import opennlp.tools.coref.sim.Number; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import opennlp.tools.coref.sim.NumberEnum; /** * Provides a default implementation of many of the methods in {@link Linker} that @@ -151,43 +153,43 @@ public abstract class AbstractLinker implements Linker { } /** - * Updates the specified {@link DiscourseModel} with the specified mention as coreferent with the specified e. + * Updates the specified {@link DiscourseModel} with the specified mention as coreferent + * with the specified {@code entity 'e'}. + * * @param dm The {@link DiscourseModel}. * @param m The {@link MentionContext mention} to be added to the specified {@code entity 'e'}. * @param e The {@link DiscourseEntity} which is mentioned by the specified mention. * @param useDiscourseModel Whether the mentions should be kept as an e or simply co-indexed. */ protected void updateExtent(DiscourseModel dm, MentionContext m, DiscourseEntity e, boolean useDiscourseModel) { + final NumberEnum n = m.getNumber(); + final GenderEnum g = m.getGender(); + final double gProb = m.getGenderProb(); + final double nProb = m.getNumberProb(); if (useDiscourseModel) { if (e != null) { logger.debug("Adding extent: {}", m.toText()); - if (e.getGenderProbability() < m.getGenderProb()) { - e.setGender(m.getGender()); - e.setGenderProbability(m.getGenderProb()); + if (e.getGenderProbability() < gProb) { + e.setGender(g); + e.setGenderProbability(gProb); } - if (e.getNumberProbability() < m.getNumberProb()) { - e.setNumber(m.getNumber()); - e.setNumberProbability(m.getNumberProb()); + if (e.getNumberProbability() < nProb) { + e.setNumber(n); + e.setNumberProbability(nProb); } e.addMention(m); dm.mentionEntity(e); } else { - logger.debug("Creating Extent: {} {} {}", m.toText(), m.getGender(), m.getNumber()); - e = new DiscourseEntity(m, m.getGender(), m.getGenderProb(), m.getNumber(), m.getNumberProb()); + logger.debug("Creating Extent: {} {} {}", m.toText(), g, n); + e = new DiscourseEntity(m, g, gProb, n, nProb); dm.addEntity(e); } } else { + DiscourseEntity newEntity = new DiscourseEntity(m, g, gProb, n, nProb); + dm.addEntity(newEntity); if (e != null) { - DiscourseEntity newEntity = - new DiscourseEntity(m, m.getGender(), m.getGenderProb(), m.getNumber(), m.getNumberProb()); - dm.addEntity(newEntity); newEntity.setId(e.getId()); } - else { - DiscourseEntity newEntity = - new DiscourseEntity(m, m.getGender(), m.getGenderProb(), m.getNumber(), m.getNumberProb()); - dm.addEntity(newEntity); - } } } @@ -269,10 +271,10 @@ public abstract class AbstractLinker implements Linker { contexts[mi].setId(mentions[mi].getId()); mentionInSentenceIndex++; if (mode != LinkerMode.SIM) { - Gender g = computeGender(contexts[mi]); - contexts[mi].setGender(g.getType(),g.getConfidence()); + Gender g = computeGender(contexts[mi]); + contexts[mi].setGender(g.getType(), g.getConfidence()); Number n = computeNumber(contexts[mi]); - contexts[mi].setNumber(n.getType(),n.getConfidence()); + contexts[mi].setNumber(n.getType(), n.getConfidence()); } } } diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/linker/DefaultLinker.java b/opennlp-coref/src/main/java/opennlp/tools/coref/linker/DefaultLinker.java index d113b33..83a0070 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/linker/DefaultLinker.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/linker/DefaultLinker.java @@ -52,7 +52,7 @@ import org.slf4j.LoggerFactory; * <p> * Non-constituent entities such as pre-nominal named-entities and sub entities in simple coordinated * noun phases will be created. This {@link Linker} requires that named-entity information also be provided. - * This information can be added to the parse using the -parse option with EnglishNameFinder. + * This information can be added to the parse using the {@code -parse} option with EnglishNameFinder. * * @see Linker */ @@ -170,14 +170,14 @@ public class DefaultLinker extends AbstractLinker { } else { NonReferentialResolver nrr = new FixedNonReferentialResolver(fixedNonReferentialProbability); resolvers = new MaxentResolver[] { - new SingularPronounResolver(corefProject, ResolverMode.TEST,nrr), - new ProperNounResolver(corefProject, ResolverMode.TEST,nrr), - new DefiniteNounResolver(corefProject, ResolverMode.TEST,nrr), - new IsAResolver(corefProject, ResolverMode.TEST,nrr), - new PluralPronounResolver(corefProject, ResolverMode.TEST,nrr), - new PluralNounResolver(corefProject, ResolverMode.TEST,nrr), - new CommonNounResolver(corefProject, ResolverMode.TEST,nrr), - new SpeechPronounResolver(corefProject, ResolverMode.TEST,nrr) + new SingularPronounResolver(corefProject, ResolverMode.TEST, nrr), + new ProperNounResolver(corefProject, ResolverMode.TEST, nrr), + new DefiniteNounResolver(corefProject, ResolverMode.TEST, nrr), + new IsAResolver(corefProject, ResolverMode.TEST, nrr), + new PluralPronounResolver(corefProject, ResolverMode.TEST, nrr), + new PluralNounResolver(corefProject, ResolverMode.TEST, nrr), + new CommonNounResolver(corefProject, ResolverMode.TEST, nrr), + new SpeechPronounResolver(corefProject, ResolverMode.TEST, nrr) }; } if (LinkerMode.EVAL == mode) { diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/linker/Linker.java b/opennlp-coref/src/main/java/opennlp/tools/coref/linker/Linker.java index 973c184..ee6073d 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/linker/Linker.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/linker/Linker.java @@ -66,7 +66,6 @@ public interface Linker { */ String PRONOUN_MODIFIER = "np"; - /** * Indicated that the specified mentions can be used to train this linker. * This requires that the coreference relationship between the mentions have been labeled @@ -85,12 +84,12 @@ public interface Linker { DiscourseEntity[] getEntities(Mention[] mentions); /** - * Creates mention contexts for the specified mention exents. + * Creates mention contexts for the specified mention extents. * These are used to compute coreference features over. * * @param mentions The mention of a document. * - * @return mention contexts for the specified mention exents. + * @return mention contexts for the specified mention extents. */ MentionContext[] constructMentionContexts(Mention[] mentions); @@ -110,8 +109,6 @@ public interface Linker { MentionFinder getMentionFinder(); /** - * Returns the head finder associated with this linker. - * * @return The head finder associated with this linker. */ HeadFinder getHeadFinder(); diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/linker/LinkerMode.java b/opennlp-coref/src/main/java/opennlp/tools/coref/linker/LinkerMode.java index 086404c..b392813 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/linker/LinkerMode.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/linker/LinkerMode.java @@ -19,7 +19,7 @@ package opennlp.tools.coref.linker; /** - * Enumeration of modes in which a linker can run. + * Enumeration of modes in which a {@link Linker} can run. */ public enum LinkerMode { @@ -33,7 +33,9 @@ public enum LinkerMode { */ TRAIN, - /** Evaluation mode, used to evaluate identified coreference relationships based on annotated text. */ + /** + * Evaluation mode, used to evaluate identified coreference relationships based on annotated text. + */ EVAL, /** diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/HeadFinder.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/HeadFinder.java index 5820c68..9ee9c99 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/HeadFinder.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/HeadFinder.java @@ -19,13 +19,15 @@ package opennlp.tools.coref.mention; /** * Interface for finding head words in noun phrases and head noun-phrases in parses. + * + * @see Parse */ public interface HeadFinder { /** - * Returns the child parse which contains the lexical head of the specified parse. + * Returns the child parse which contains the lexical head of the specified {@link Parse}. * - * @param parse The parse in which to find the head. + * @param parse The {@link Parse} in which to find the head. * @return The parse containing the lexical head of the specified parse. If no head is * available or the constituent has no subcomponents that are eligible heads then null is returned. */ @@ -40,18 +42,18 @@ public interface HeadFinder { int getHeadIndex(Parse parse); /** - * Returns the parse bottom-most head of a <code>Parse</code>. If no + * Returns the parse bottom-most head of a {@link Parse}. If no * head is available which is a child of <code>p</code> then <code>p</code> is returned. * - * @param p Parse to find the head of. + * @param p The {@link Parse} to find the head of. * @return bottom-most head of p. */ Parse getLastHead(Parse p); /** - * Returns head token for the specified np parse. + * Returns head token for the specified np {@link Parse}. * - * @param np The noun parse to get head from. + * @param np The noun {@link Parse} to get head from. * @return head token parse. */ Parse getHeadToken(Parse np); diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/Mention.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/Mention.java index 1101c54..c41cf2e 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/Mention.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/Mention.java @@ -36,8 +36,8 @@ public class Mention implements Comparable<Mention> { protected final String type; /** - * The entity id indicating which entity this extent belongs to. This is only - * used when training a coreference classifier. + * The entity id indicating which entity this extent belongs to. + * This is only used when training a coreference classifier. */ private int id; @@ -126,7 +126,6 @@ public class Mention implements Comparable<Mention> { return id; } - @Override public int compareTo(Mention e) { return span.compareTo(e.span); diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/MentionContext.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/MentionContext.java index d8db7ee..69c8f97 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/MentionContext.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/MentionContext.java @@ -19,7 +19,6 @@ package opennlp.tools.coref.mention; import java.util.List; -import opennlp.tools.coref.resolver.AbstractResolver; import opennlp.tools.coref.sim.Context; import opennlp.tools.coref.sim.GenderEnum; import opennlp.tools.coref.sim.NumberEnum; @@ -36,7 +35,8 @@ public class MentionContext extends Context { private static final Logger logger = LoggerFactory.getLogger(MentionContext.class); /** - * The index of first token which is not part of a descriptor. This is 0 if no descriptor is present. + * The index of first token which is not part of a descriptor. + * This is 0 if no descriptor is present. */ private final int nonDescriptorStart; diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/MentionFinder.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/MentionFinder.java index 047f258..5e158db 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/MentionFinder.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/MentionFinder.java @@ -20,46 +20,49 @@ package opennlp.tools.coref.mention; /** * Specifies the interface that Objects which determine the space of - * mentions for coreference should implement. + * {@link Mention mentions} for coreference should implement. + * + * @see Mention */ public interface MentionFinder { /** - * Specifies whether pre-nominal named-entities should be collected as mentions. + * Specifies whether pre-nominal named-entities should be collected as {@link Mention mentions}. * - * @param collectPrenominalNamedEntities true if pre-nominal named-entities - * should be collected; false otherwise. + * @param collectPrenominalNamedEntities {@code true} if pre-nominal named-entities + * should be collected; {@code false} otherwise. */ void setPrenominalNamedEntityCollection(boolean collectPrenominalNamedEntities); /** - * Returns whether this mention finder collects pre-nominal named-entities as mentions. + * Returns whether this mention finder collects pre-nominal named-entities as {@link Mention mentions}. * - * @return true if this mention finder collects pre-nominal named-entities as mentions + * @return {@code true} if this mention finder collects pre-nominal named-entities as mentions. */ boolean isPrenominalNamedEntityCollection(); /** - * Returns whether this mention finder collects coordinated noun phrases as mentions. + * Returns whether this mention finder collects coordinated noun phrases as {@link Mention mentions}. * - * @return true if this mention finder collects coordinated noun phrases as mentions; false otherwise. + * @return {@code true} if this mention finder collects coordinated noun phrases as mentions; + * {@code false} otherwise. */ boolean isCoordinatedNounPhraseCollection(); /** - * Specifies whether coordinated noun phrases should be collected as mentions. + * Specifies whether coordinated noun phrases should be collected as {@link Mention mentions}. * - * @param collectCoordinatedNounPhrases true if coordinated noun phrases should be collected; - * false otherwise. + * @param collectCoordinatedNounPhrases {@code true} if coordinated noun phrases should be collected; + * {@code false} otherwise. */ void setCoordinatedNounPhraseCollection(boolean collectCoordinatedNounPhrases); /** - * Returns an array of mentions. + * Returns an array of {@link Mention mentions}. * * @param parse A top level parse from which mentions are gathered. * - * @return an array of mentions which implement the <code>Extent</code> interface. + * @return An array of {@link Mention mentions} which implement the <code>Extent</code> interface. */ Mention[] getMentions(Parse parse); } diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/PTBMentionFinder.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/PTBMentionFinder.java index 4793024..29a18d8 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/PTBMentionFinder.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/PTBMentionFinder.java @@ -18,7 +18,10 @@ package opennlp.tools.coref.mention; /** - * Finds mentions from Penn Treebank style parses. + * A {@link MentionFinder} implementation that finds mentions from Penn Treebank style parses. + * + * @see AbstractMentionFinder + * @see MentionFinder */ public class PTBMentionFinder extends AbstractMentionFinder { @@ -35,10 +38,10 @@ public class PTBMentionFinder extends AbstractMentionFinder { } /** - * Retrives the one and only existing instance. + * Retrieves the one and only existing instance. * - * @param hf - * @return the one and only existing instance + * @param hf A valid {@link HeadFinder} to assign if no instance exists. + * @return The one and only existing {@link PTBMentionFinder} instance. */ public static PTBMentionFinder getInstance(HeadFinder hf) { if (instance == null) { @@ -50,9 +53,6 @@ public class PTBMentionFinder extends AbstractMentionFinder { return instance; } - - - /* private boolean isTraceNp(Parse np){ List sc = np.getSyntacticChildren(); diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/Parse.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/Parse.java index c0b6c81..67f8d03 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/Parse.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/Parse.java @@ -109,28 +109,28 @@ public interface Parse extends Comparable<Parse> { /** * Specifies whether this parse is a named-entity. * - * @return {@code True} if this parse is a named-entity; {@code false} otherwise. + * @return {@code true} if this parse is a named-entity; {@code false} otherwise. */ boolean isNamedEntity(); /** * Specifies whether this parse is a noun phrase. * - * @return {@code True} if this parse is a noun phrase; {@code false} otherwise. + * @return {@code true} if this parse is a noun phrase; {@code false} otherwise. */ boolean isNounPhrase(); /** * Specifies whether this parse is a sentence. * - * @return {@code True} if this parse is a sentence; {@code false} otherwise. + * @return {@code true} if this parse is a sentence; {@code false} otherwise. */ boolean isSentence(); /** * Specifies whether this parse is a coordinated noun phrase. * - * @return {@code True} if this parse is a coordinated noun phrase; {@code false} otherwise. + * @return {@code true} if this parse is a coordinated noun phrase; {@code false} otherwise. */ boolean isCoordinatedNounPhrase(); diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/ShallowParseMentionFinder.java b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/ShallowParseMentionFinder.java index eff4068..e652eda 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/ShallowParseMentionFinder.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/ShallowParseMentionFinder.java @@ -18,7 +18,10 @@ package opennlp.tools.coref.mention; /** - * Finds mentions from shallow np-chunking based parses. + * A {@link MentionFinder} implementation that finds mentions from shallow np-chunking based parses. + * + * @see AbstractMentionFinder + * @see MentionFinder */ public class ShallowParseMentionFinder extends AbstractMentionFinder { @@ -32,7 +35,7 @@ public class ShallowParseMentionFinder extends AbstractMentionFinder { /** * @param hf A valid {@link HeadFinder} to assign if no instance exists. - * @return Retrieves the one and only existing instance. + * @return Retrieves the one and only existing {@link ShallowParseMentionFinder} instance. */ public static ShallowParseMentionFinder getInstance(HeadFinder hf) { if (instance == null) { diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/AbstractResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/AbstractResolver.java index 197570f..d0c84c3 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/AbstractResolver.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/AbstractResolver.java @@ -165,6 +165,7 @@ public abstract class AbstractResolver implements Resolver { mention.getIndexSpan().getEnd() <= cec.getIndexSpan().getEnd(); } + @Override public DiscourseEntity retain(MentionContext mention, DiscourseModel dm) { int ei = 0; if (mention.getId() == -1) { @@ -200,6 +201,6 @@ public abstract class AbstractResolver implements Resolver { return fs.toString(); } - + @Override public void train() throws IOException {} } diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/CommonNounResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/CommonNounResolver.java index c8f5a05..00e8357 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/CommonNounResolver.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/CommonNounResolver.java @@ -28,18 +28,21 @@ import opennlp.tools.coref.mention.MentionContext; * Resolves coreference between common nouns. * * @see MaxentResolver + * @see Resolver */ public class CommonNounResolver extends MaxentResolver { + private static final String MODEL_NAME = "cmodel"; + public CommonNounResolver(String modelDirectory, ResolverMode m) throws IOException { - super(modelDirectory,"cmodel", m, 80, true); + super(modelDirectory, MODEL_NAME, m, 80, true); showExclusions = false; preferFirstReferent = true; } public CommonNounResolver(String modelDirectory, ResolverMode m, NonReferentialResolver nrr) throws IOException { - super(modelDirectory,"cmodel", m, 80, true, nrr); + super(modelDirectory, MODEL_NAME, m, 80, true, nrr); showExclusions = false; preferFirstReferent = true; } @@ -54,12 +57,11 @@ public class CommonNounResolver extends MaxentResolver { return features; } + @Override public boolean canResolve(MentionContext mention) { String firstTok = mention.getFirstTokenText().toLowerCase(); String firstTokTag = mention.getFirstToken().getSyntacticType(); - boolean rv = mention.getHeadTokenTag().equals("NN") - && !ResolverUtils.definiteArticle(firstTok, firstTokTag); - return rv; + return mention.getHeadTokenTag().equals(NN) && !ResolverUtils.definiteArticle(firstTok, firstTokTag); } @Override diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java index cc7df52..6b2fce6 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java @@ -22,7 +22,6 @@ import java.io.BufferedReader; import java.io.DataInputStream; import java.io.File; import java.io.FileInputStream; -import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -36,6 +35,9 @@ import java.util.ArrayList; import java.util.List; import java.util.zip.GZIPInputStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import opennlp.tools.coref.mention.MentionContext; import opennlp.tools.coref.mention.Parse; import opennlp.tools.ml.maxent.GISModel; @@ -47,8 +49,6 @@ import opennlp.tools.ml.model.FileEventStream; import opennlp.tools.ml.model.MaxentModel; import opennlp.tools.util.ObjectStreamUtils; import opennlp.tools.util.TrainingParameters; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * Default implementation of the {@link NonReferentialResolver} interface. @@ -97,7 +97,7 @@ public class DefaultNonReferentialResolver implements NonReferentialResolver { List<String> features = getFeatures(mention); double r = model.eval(features.toArray(new String[0]))[nonRefIndex]; if (DEBUG) { - logger.debug(this + " {} -> null {} {}", mention.toText(), r, features); + logger.debug("{} {} -> null {} {}", this, mention.toText(), r, features); } return r; } @@ -121,9 +121,9 @@ public class DefaultNonReferentialResolver implements NonReferentialResolver { } /** - * Returns a list of features used to predict whether the specified mention is non-referential. * @param mention The mention under consideration. - * @return a list of features used to predict whether the specified mention is non-referential. + * @return Retrieves a list of features used to predict whether the + * specified mention is non-referential. */ protected List<String> getNonReferentialFeatures(MentionContext mention) { List<String> features = new ArrayList<>(); diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefiniteNounResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefiniteNounResolver.java index 683e0c1..25a86c2 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefiniteNounResolver.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefiniteNounResolver.java @@ -31,14 +31,16 @@ import opennlp.tools.coref.mention.MentionContext; */ public class DefiniteNounResolver extends MaxentResolver { + private static final String MODEL_NAME = "defmodel"; + public DefiniteNounResolver(String modelDirectory, ResolverMode m) throws IOException { - super(modelDirectory, "defmodel", m, 80); + super(modelDirectory, MODEL_NAME, m, 80); // preferFirstReferent = true; } public DefiniteNounResolver(String modelDirectory, ResolverMode m, NonReferentialResolver nrr) throws IOException { - super(modelDirectory, "defmodel", m, 80,nrr); + super(modelDirectory, MODEL_NAME, m, 80,nrr); // preferFirstReferent = true; } diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/IsAResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/IsAResolver.java index 4406d1e..5ba4a5d 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/IsAResolver.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/IsAResolver.java @@ -55,7 +55,7 @@ public class IsAResolver extends MaxentResolver { @Override public boolean canResolve(MentionContext ec) { - if (ec.getHeadTokenTag().startsWith("NN")) { + if (ec.getHeadTokenTag().startsWith(NN)) { return ec.getPreviousToken() != null && predicativePattern.matcher(ec.getPreviousToken().toString()).matches(); } diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java index 6dd2d30..20086ca 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java @@ -106,8 +106,11 @@ public abstract class MaxentResolver extends AbstractResolver { * Creates a maximum-entropy-based resolver which will look the specified number of * entities back for a referent. This constructor is only used for unit testing. * - * @param numberOfEntitiesBack - * @param preferFirstReferent + * @param numberOfEntitiesBack The number of sentences back this resolver should look for a referent. + * @param preferFirstReferent If {@code true}, this designates that the resolver should use the first + * referent encountered which it more preferable than non-reference. + * When {@code false} all non-excluded referents within this resolvers + * range are considered. */ protected MaxentResolver(int numberOfEntitiesBack, boolean preferFirstReferent) { super(numberOfEntitiesBack); @@ -163,7 +166,8 @@ public abstract class MaxentResolver extends AbstractResolver { * @param numberEntitiesBack The number of entities back in the text that this resolver will look for a referent. * @throws IOException If the model file is not found or can not be written to. */ - public MaxentResolver(String modelDirectory, String modelName, ResolverMode mode, int numberEntitiesBack) throws IOException { + public MaxentResolver(String modelDirectory, String modelName, ResolverMode mode, int numberEntitiesBack) + throws IOException { this(modelDirectory, modelName, mode, numberEntitiesBack, false); } diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/NonReferentialResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/NonReferentialResolver.java index d365a85..1a7b910 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/NonReferentialResolver.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/NonReferentialResolver.java @@ -23,9 +23,11 @@ import opennlp.tools.coref.mention.MentionContext; /** * Provides the interface for an object to provide a resolver with a non-referential - * probability. Non-referential resolvers compute the probability that a particular mention refers - * to no antecedent. This probability can then compete with the probability that + * probability. Non-referential resolvers compute the probability that a particular mention refers + * to no antecedent. This probability can then compete with the probability that * a mention refers with a specific antecedent. + * + * @see MentionContext */ public interface NonReferentialResolver { @@ -40,13 +42,13 @@ public interface NonReferentialResolver { /** * Designates that the specified mention be used for training. * - * @param mention The mention to be used. The mention id is used to determine + * @param mention The mention to be used. The mention id is used to determine * whether this mention is referential or non-referential. */ void addEvent(MentionContext mention); /** - * Trains a model based on the events given to this resolver via #addEvent. + * Trains a model based on the events given to this resolver via {@link #addEvent}. * * @throws IOException When the model can not be written out. */ diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PerfectResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PerfectResolver.java index 5d3053d..28d32b2 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PerfectResolver.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PerfectResolver.java @@ -23,22 +23,22 @@ import opennlp.tools.coref.mention.MentionContext; /** * Resolver used in training to update the discourse model based on the coreference annotation. + * + * @see AbstractResolver + * @see Resolver */ -public class PerfectResolver extends AbstractResolver { +public class PerfectResolver extends AbstractResolver { public PerfectResolver() { super(0); } + @Override public boolean canResolve(MentionContext ec) { return true; } @Override - protected boolean outOfRange(MentionContext ec, DiscourseEntity de) { - return false; - } - public DiscourseEntity resolve(MentionContext ec, DiscourseModel dm) { return null; } diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralNounResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralNounResolver.java index ae815ea..f7d7aee 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralNounResolver.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralNounResolver.java @@ -28,17 +28,20 @@ import opennlp.tools.coref.mention.MentionContext; * Resolves coreference between plural nouns. * * @see MaxentResolver + * @see Resolver */ public class PluralNounResolver extends MaxentResolver { + private static final String MODEL_NAME = "plmodel"; + public PluralNounResolver(String modelDirectory, ResolverMode m) throws IOException { - super(modelDirectory, "plmodel", m, 80, true); + super(modelDirectory, MODEL_NAME, m, 80, true); showExclusions = false; } public PluralNounResolver(String modelDirectory, ResolverMode m, NonReferentialResolver nrr) throws IOException { - super(modelDirectory, "plmodel", m, 80, true, nrr); + super(modelDirectory, MODEL_NAME, m, 80, true, nrr); showExclusions = false; } @@ -58,8 +61,7 @@ public class PluralNounResolver extends MaxentResolver { public boolean canResolve(MentionContext mention) { String firstTok = mention.getFirstTokenText().toLowerCase(); String firstTokTag = mention.getFirstToken().getSyntacticType(); - return mention.getHeadTokenTag().equals("NNS") - && !ResolverUtils.definiteArticle(firstTok, firstTokTag); + return NNS.equals(mention.getHeadTokenTag()) && !ResolverUtils.definiteArticle(firstTok, firstTokTag); } @Override @@ -69,7 +71,7 @@ public class PluralNounResolver extends MaxentResolver { } else { MentionContext cec = entity.getLastExtent(); - return (!cec.getHeadTokenTag().equals("NNS") || super.excluded(mention, entity)); + return (!NNS.equals(cec.getHeadTokenTag()) || super.excluded(mention, entity)); } } } diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java index a72ea5f..b7a6c17 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java @@ -28,18 +28,20 @@ import opennlp.tools.coref.mention.MentionContext; * Resolves coreference between plural pronouns and their referents. * * @see MaxentResolver + * @see Resolver */ public class PluralPronounResolver extends MaxentResolver { + private static final String MODEL_NAME = "tmodel"; private static final int NUM_SENTS_BACK_PRONOUNS = 2; public PluralPronounResolver(String modelDirectory, ResolverMode m) throws IOException { - super(modelDirectory, "tmodel", m, 30); + super(modelDirectory, MODEL_NAME, m, 30); } public PluralPronounResolver(String modelDirectory, ResolverMode m, NonReferentialResolver nrr) throws IOException { - super(modelDirectory, "tmodel", m, 30, nrr); + super(modelDirectory, MODEL_NAME, m, 30, nrr); } @Override @@ -89,7 +91,7 @@ public class PluralPronounResolver extends MaxentResolver { @Override public boolean canResolve(MentionContext mention) { String tag = mention.getHeadTokenTag(); - return tag != null && tag.startsWith("PRP") + return tag != null && tag.startsWith(PRP) && ResolverUtils.PLURAL_THIRD_PERSON_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches(); } } diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ProperNounResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ProperNounResolver.java index 3c96b08..7fbf558 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ProperNounResolver.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ProperNounResolver.java @@ -40,16 +40,18 @@ import org.slf4j.LoggerFactory; * Resolves coreference between proper nouns. * * @see MaxentResolver + * @see Resolver */ public class ProperNounResolver extends MaxentResolver { private static final Logger logger = LoggerFactory.getLogger(ProperNounResolver.class); + private static final String MODEL_NAME = "pnmodel"; private static Map<String, Set<String>> acroMap; private static boolean acroMapLoaded = false; public ProperNounResolver(String modelDirectory, ResolverMode m) throws IOException { - super(modelDirectory,"pnmodel", m, 500); + super(modelDirectory, MODEL_NAME, m, 500); if (!acroMapLoaded) { initAcronyms(modelDirectory + "/acronyms"); acroMapLoaded = true; @@ -59,7 +61,7 @@ public class ProperNounResolver extends MaxentResolver { public ProperNounResolver(String modelDirectory, ResolverMode m,NonReferentialResolver nonRefResolver) throws IOException { - super(modelDirectory,"pnmodel", m, 500,nonRefResolver); + super(modelDirectory, MODEL_NAME, m, 500,nonRefResolver); if (!acroMapLoaded) { initAcronyms(modelDirectory + "/acronyms"); acroMapLoaded = true; @@ -69,7 +71,7 @@ public class ProperNounResolver extends MaxentResolver { @Override public boolean canResolve(MentionContext mention) { - return (mention.getHeadTokenTag().startsWith("NNP") || mention.getHeadTokenTag().startsWith("CD")); + return (mention.getHeadTokenTag().startsWith(NNP) || mention.getHeadTokenTag().startsWith("CD")); } private void initAcronyms(String name) { @@ -137,7 +139,7 @@ public class ProperNounResolver extends MaxentResolver { for (Iterator<MentionContext> ei = entity.getMentions(); ei.hasNext();) { MentionContext xec = ei.next(); - if (xec.getHeadTokenTag().startsWith("NNP")) { + if (xec.getHeadTokenTag().startsWith(NNP)) { // || initialCaps.matcher(xec.headToken.toString()).find()) { logger.debug("Kept {} with {}", xec.toText(), xec.getHeadTokenTag()); return false; diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/Resolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/Resolver.java index 62f0b89..7bc7129 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/Resolver.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/Resolver.java @@ -28,6 +28,11 @@ import opennlp.tools.coref.mention.MentionContext; */ public interface Resolver { + String NN = "NN"; // tag for singular noun + String NNP = "NNP"; // tag for proper noun + String NNS = "NNS"; // tag for plural nouns + String PRP = "PRP"; // tag for pronoun + /** * Returns {@code true} if this resolver is able to resolve the referring expression of the same type * as the specified mention. diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java index aa22f79..5bed71b 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java @@ -373,7 +373,7 @@ public class ResolverUtils { String type; for (int i = start; i < end;i++) { type = mtokens[start].getSyntacticType(); - if (type.startsWith("NNP")) { + if (type.startsWith(Resolver.NNP)) { break; } start++; @@ -412,7 +412,7 @@ public class ResolverUtils { for (Iterator<MentionContext> ei = de.getMentions(); ei.hasNext();) { MentionContext xec = ei.next(); String xecHeadTag = xec.getHeadTokenTag(); - if (xecHeadTag.startsWith("NNP") || INITIAL_CAPS.matcher(xec.getHeadTokenText()).find()) { + if (xecHeadTag.startsWith(Resolver.NNP) || INITIAL_CAPS.matcher(xec.getHeadTokenText()).find()) { return xec; } } @@ -461,12 +461,12 @@ public class ResolverUtils { public static List<String> getPronounMatchFeatures(MentionContext mention, DiscourseEntity entity) { boolean foundCompatiblePronoun = false; boolean foundIncompatiblePronoun = false; - if (mention.getHeadTokenTag().startsWith("PRP")) { + if (mention.getHeadTokenTag().startsWith(Resolver.PRP)) { Map<String, String> pronounMap = getPronounFeatureMap(mention.getHeadTokenText()); logger.debug("PronounMap: {}", pronounMap); for (Iterator<MentionContext> mi = entity.getMentions();mi.hasNext();) { MentionContext candidateMention = mi.next(); - if (candidateMention.getHeadTokenTag().startsWith("PRP")) { + if (candidateMention.getHeadTokenTag().startsWith(Resolver.PRP)) { if (mention.getHeadTokenText().equalsIgnoreCase(candidateMention.getHeadTokenText())) { foundCompatiblePronoun = true; break; diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java index 0a1b635..f338f7b 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java @@ -32,19 +32,21 @@ import org.slf4j.LoggerFactory; * This class resolver singular pronouns such as "he", "she", "it" and their various forms. * * @see MaxentResolver + * @see Resolver */ public class SingularPronounResolver extends MaxentResolver { private static final Logger logger = LoggerFactory.getLogger(SingularPronounResolver.class); + private static final String MODEL_NAME = "pmodel"; public SingularPronounResolver(String modelDirectory, ResolverMode m) throws IOException { - super(modelDirectory, "pmodel", m, 30); + super(modelDirectory, MODEL_NAME, m, 30); this.numSentencesBack = 2; } - public SingularPronounResolver(String modelDirectory, ResolverMode m, - NonReferentialResolver nonReferentialResolver) throws IOException { - super(modelDirectory, "pmodel", m, 30,nonReferentialResolver); + public SingularPronounResolver(String modelDirectory, ResolverMode m, NonReferentialResolver nrr) + throws IOException { + super(modelDirectory, MODEL_NAME, m, 30, nrr); this.numSentencesBack = 2; } @@ -52,7 +54,7 @@ public class SingularPronounResolver extends MaxentResolver { public boolean canResolve(MentionContext mention) { logger.debug("CanResolve: ec=({}) {}", mention.getId(), mention.toText()); String tag = mention.getHeadTokenTag(); - return tag != null && tag.startsWith("PRP") + return tag != null && tag.startsWith(PRP) && ResolverUtils.SINGULAR_THIRD_PERSON_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches(); } @@ -113,7 +115,7 @@ public class SingularPronounResolver extends MaxentResolver { for (Iterator<MentionContext> ei = entity.getMentions(); ei.hasNext();) { MentionContext entityMention = ei.next(); String tag = entityMention.getHeadTokenTag(); - if (tag != null && tag.startsWith("PRP") + if (tag != null && tag.startsWith(PRP) && ResolverUtils.SINGULAR_THIRD_PERSON_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches()) { if (mentionGender == null) { //lazy initialization mentionGender = ResolverUtils.getPronounGender(mention.getHeadTokenText()); diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java index 030b803..2bb5d08 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java @@ -31,13 +31,15 @@ import org.slf4j.LoggerFactory; * Resolves pronouns specific to quoted speech such as "you", "me", and "I". * * @see MaxentResolver + * @see Resolver */ public class SpeechPronounResolver extends MaxentResolver { private static final Logger logger = LoggerFactory.getLogger(SpeechPronounResolver.class); + private static final String MODEL_NAME = "fmodel"; public SpeechPronounResolver(String modelDirectory, ResolverMode m) throws IOException { - super(modelDirectory, "fmodel", m, 30); + super(modelDirectory, MODEL_NAME, m, 30); this.numSentencesBack = 0; showExclusions = false; preferFirstReferent = true; @@ -45,7 +47,7 @@ public class SpeechPronounResolver extends MaxentResolver { public SpeechPronounResolver(String modelDirectory, ResolverMode m, NonReferentialResolver nrr) throws IOException { - super(modelDirectory, "fmodel", m, 30, nrr); + super(modelDirectory, MODEL_NAME, m, 30, nrr); showExclusions = false; preferFirstReferent = true; } @@ -57,10 +59,10 @@ public class SpeechPronounResolver extends MaxentResolver { features.addAll(ResolverUtils.getPronounMatchFeatures(mention,entity)); List<String> contexts = ResolverUtils.getContextFeatures(mention); MentionContext cec = entity.getLastExtent(); - if (mention.getHeadTokenTag().startsWith("PRP") && cec.getHeadTokenTag().startsWith("PRP")) { + if (mention.getHeadTokenTag().startsWith(PRP) && cec.getHeadTokenTag().startsWith(PRP)) { features.add(mention.getHeadTokenText() + "," + cec.getHeadTokenText()); } - else if (mention.getHeadTokenText().startsWith("NNP")) { + else if (mention.getHeadTokenText().startsWith(NNP)) { features.addAll(contexts); features.add(mention.getNameType() + "," + cec.getHeadTokenText()); } @@ -82,9 +84,9 @@ public class SpeechPronounResolver extends MaxentResolver { @Override public boolean canResolve(MentionContext mention) { String tag = mention.getHeadTokenTag(); - boolean fpp = tag != null && tag.startsWith("PRP") + boolean fpp = tag != null && tag.startsWith(PRP) && ResolverUtils.SPEECH_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches(); - boolean pn = tag != null && tag.startsWith("NNP"); + boolean pn = tag != null && tag.startsWith(NNP); return (fpp || pn); } @@ -97,8 +99,8 @@ public class SpeechPronounResolver extends MaxentResolver { if (!canResolve(cec)) { return true; } - if (mention.getHeadTokenTag().startsWith("NNP")) { //mention is a propernoun - if (cec.getHeadTokenTag().startsWith("NNP")) { + if (mention.getHeadTokenTag().startsWith(NNP)) { //mention is a proper noun + if (cec.getHeadTokenTag().startsWith(NNP)) { return true; // both NNP } else { @@ -108,13 +110,13 @@ public class SpeechPronounResolver extends MaxentResolver { return !canResolve(cec); } } - else if (mention.getHeadTokenTag().startsWith("PRP")) { // mention is a speech pronoun + else if (mention.getHeadTokenTag().startsWith(PRP)) { // mention is a speech pronoun // cec can be either a speech pronoun or a proper noun - if (cec.getHeadTokenTag().startsWith("NNP")) { + if (cec.getHeadTokenTag().startsWith(NNP)) { //exclude antecedents not in the same sentence when they are not pronoun return (mention.getSentenceNumber() - cec.getSentenceNumber() != 0); } - else if (cec.getHeadTokenTag().startsWith("PRP")) { + else if (cec.getHeadTokenTag().startsWith(PRP)) { return false; } else { diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/Context.java b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/Context.java index fc130c7..9bcc234 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/Context.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/Context.java @@ -28,6 +28,7 @@ import opennlp.tools.coref.dictionary.DictionaryFactory; import opennlp.tools.coref.mention.HeadFinder; import opennlp.tools.coref.mention.Mention; import opennlp.tools.coref.mention.Parse; +import opennlp.tools.coref.resolver.Resolver; import opennlp.tools.util.Span; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -77,7 +78,7 @@ public class Context extends Mention { tokens = tokenList.toArray(new Parse[0]); this.headTokenTag = headToken.getSyntacticType(); this.headTokenText = headToken.toString(); - if (headTokenTag.startsWith("NN") && !headTokenTag.startsWith("NNP")) { + if (headTokenTag.startsWith(Resolver.NN) && !headTokenTag.startsWith(Resolver.NNP)) { this.synsets = getSynsetSet(this); } else { @@ -138,10 +139,10 @@ public class Context extends Mention { Dictionary dict = DictionaryFactory.getDictionary(); logger.debug("{} lemmas for {}", lemmas.length, c.getHeadTokenText()); for (String lemma : lemmas) { - String senseKey = dict.getSenseKey(lemma, "NN", 0); + String senseKey = dict.getSenseKey(lemma, Resolver.NN, 0); if (senseKey != null) { synsetSet.add(senseKey); - String[] synsets = dict.getParentSenseKeys(lemma, "NN", 0); + String[] synsets = dict.getParentSenseKeys(lemma, Resolver.NN, 0); synsetSet.addAll(Arrays.asList(synsets)); } } @@ -150,7 +151,7 @@ public class Context extends Mention { private static String[] getLemmas(Context c) { String word = c.headTokenText.toLowerCase(); - return DictionaryFactory.getDictionary().getLemmas(word,"NN"); + return DictionaryFactory.getDictionary().getLemmas(word, Resolver.NN); } /** diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/Gender.java b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/Gender.java index cdff989..0a04bf5 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/Gender.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/Gender.java @@ -19,6 +19,8 @@ package opennlp.tools.coref.sim; /** * Class which models the gender of an entity and the confidence of that association. + * + * @see GenderEnum */ public class Gender { diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/Number.java b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/Number.java index 68a9e20..4d8e47c 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/Number.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/Number.java @@ -18,6 +18,8 @@ package opennlp.tools.coref.sim; /** * Class which models the number of an entity and the confidence of that association. + * + * @see NumberEnum */ public class Number { private final NumberEnum type; diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/TrainModel.java b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/TrainModel.java index a9b6136..0061a8a 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/TrainModel.java +++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/TrainModel.java @@ -35,7 +35,7 @@ public interface TrainModel<T>{ * <p> * Incompatible extents are chosen at random from the set of extents which don't meet these criteria. * - * @param extents + * @param extents The {@link Context extents} to set and process. */ void setExtents(Context[] extents); } diff --git a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucCorefContentHandler.java b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucCorefContentHandler.java index baed3f6..063ffee 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucCorefContentHandler.java +++ b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucCorefContentHandler.java @@ -31,6 +31,10 @@ import opennlp.tools.util.Span; // Take care for special @ sign handling (identifies a table or something else that should be ignored) class MucCorefContentHandler extends SgmlParser.ContentHandler { + private static final String ID = "ID"; + private static final String REF = "REF"; + private static final String MIN = "MIN"; + public static class CorefMention { Span span; int id; @@ -58,11 +62,11 @@ class MucCorefContentHandler extends SgmlParser.ContentHandler { private RawCorefSample sample; /** - * Initializes a {@link MucCorefContentHandler}. + * Initializes a {@link MucCorefContentHandler} with the specified parameters. * * @param tokenizer The {@link Tokenizer} to use. Must not be {@code null}. - * @param samples The {@link List< RawCorefSample > samples} as input. - * Must not be {@code null}. + * @param samples The {@link RawCorefSample samples} as input. + * Must not be {@code null}. */ MucCorefContentHandler(Tokenizer tokenizer, List<RawCorefSample> samples) { this.tokenizer = tokenizer; @@ -72,7 +76,7 @@ class MucCorefContentHandler extends SgmlParser.ContentHandler { /** * Resolves an id via the references to the root {@code id}. * - * @param id the id or reference to be resolved + * @param id the id or reference to be resolved. * * @return the resolved {@code id} or {@code -1} if id cannot be resolved. */ @@ -104,8 +108,8 @@ class MucCorefContentHandler extends SgmlParser.ContentHandler { if (COREF_ELEMENT.equals(name)) { int beginOffset = text.size(); - String idString = attributes.get("ID"); - String refString = attributes.get("REF"); + String idString = attributes.get(ID); + String refString = attributes.get(REF); int id; if (idString != null) { @@ -124,7 +128,7 @@ class MucCorefContentHandler extends SgmlParser.ContentHandler { // throw invalid format exception ... } - mentionStack.push(new CorefMention(new Span(beginOffset, beginOffset), id, attributes.get("MIN"))); + mentionStack.push(new CorefMention(new Span(beginOffset, beginOffset), id, attributes.get(MIN))); } } diff --git a/opennlp-coref/src/main/java/opennlp/tools/lang/english/TreebankNameFinder.java b/opennlp-coref/src/main/java/opennlp/tools/lang/english/TreebankNameFinder.java index 7d4739a..ff9fb28 100644 --- a/opennlp-coref/src/main/java/opennlp/tools/lang/english/TreebankNameFinder.java +++ b/opennlp-coref/src/main/java/opennlp/tools/lang/english/TreebankNameFinder.java @@ -23,15 +23,15 @@ import java.io.IOException; import java.io.InputStreamReader; import java.nio.file.FileSystems; -import opennlp.tools.coref.sim.SimilarityModel; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import opennlp.tools.namefind.NameFinderEventStream; import opennlp.tools.namefind.NameFinderME; import opennlp.tools.namefind.TokenNameFinderModel; import opennlp.tools.parser.Parse; import opennlp.tools.tokenize.SimpleTokenizer; import opennlp.tools.util.Span; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * Class is used to create a name finder for English. @@ -42,6 +42,8 @@ import org.slf4j.LoggerFactory; public class TreebankNameFinder { private static final Logger logger = LoggerFactory.getLogger(TreebankNameFinder.class); + private static final String SYMBOL_LT = "<"; + private static final String SYMBOL_GT = ">"; public static String[] NAME_TYPES = {"person", "organization", "location", "date", "time", "percentage", "money"}; @@ -90,11 +92,13 @@ public class TreebankNameFinder { } /** - * Adds sgml style name tags to the specified input buffer and outputs this information to stdout. + * Adds sgml style name {@code tags} to the specified {@code input} buffer and outputs this information to stdout. + * * @param finders The name finders to be used. * @param tags The tag names for the corresponding name finder. - * @param input The input reader. - * @throws IOException + * @param input The {@link BufferedReader input reader}. + * + * @throws IOException Thrown if IO errors occurred. */ private static void processText(TreebankNameFinder[] finders, String[] tags, BufferedReader input) throws IOException { @@ -124,7 +128,7 @@ public class TreebankNameFinder { || nameOutcomes[fi][ti].equals(NameFinderME.OTHER)) && (nameOutcomes[fi][ti - 1].equals(NameFinderME.START) || nameOutcomes[fi][ti - 1].equals(NameFinderME.CONTINUE))) { - output.append("</").append(tags[fi]).append(">"); + output.append("</").append(tags[fi]).append(SYMBOL_GT); } } } @@ -134,7 +138,7 @@ public class TreebankNameFinder { //check for start tags for (int fi = 0, fl = finders.length; fi < fl; fi++) { if (nameOutcomes[fi][ti].equals(NameFinderME.START)) { - output.append("<").append(tags[fi]).append(">"); + output.append(SYMBOL_LT).append(tags[fi]).append(SYMBOL_GT); } } output.append(tokens[ti]); @@ -144,7 +148,7 @@ public class TreebankNameFinder { for (int fi = 0, fl = finders.length; fi < fl; fi++) { if (nameOutcomes[fi][tokens.length - 1].equals(NameFinderME.START) || nameOutcomes[fi][tokens.length - 1].equals(NameFinderME.CONTINUE)) { - output.append("</").append(tags[fi]).append(">"); + output.append("</").append(tags[fi]).append(SYMBOL_GT); } } } diff --git a/opennlp-coref/src/test/java/opennlp/tools/coref/linker/AbstractLinkerTest.java b/opennlp-coref/src/test/java/opennlp/tools/coref/linker/AbstractLinkerTest.java index 925fc0e..824dcc8 100644 --- a/opennlp-coref/src/test/java/opennlp/tools/coref/linker/AbstractLinkerTest.java +++ b/opennlp-coref/src/test/java/opennlp/tools/coref/linker/AbstractLinkerTest.java @@ -54,7 +54,7 @@ public abstract class AbstractLinkerTest extends AbstractCorefTest { static void showEntities(DiscourseEntity[] entities) { for (int ei = 0, en = entities.length; ei < en; ei++) { - logger.debug(ei + " " + entities[ei]); + logger.debug("{} {}", ei, entities[ei]); } } }
