This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
The following commit(s) were added to refs/heads/master by this push:
new 6d40375 OPENNLP-1614: Clean plain Sys.out logging from Coref
component (#169)
6d40375 is described below
commit 6d40375434f6eeb1e25a1cd15f8eff16d8ca944d
Author: Martin Wiesner <[email protected]>
AuthorDate: Thu Oct 10 09:15:20 2024 +0200
OPENNLP-1614: Clean plain Sys.out logging from Coref component (#169)
- removes all Sys.out/err logging and e.printStackTrace calls from Coref
component
- introduces proper (slf4j) logging instead
- adds @Override annotation where appropriate
- drops commented / orphaned code
- applies code cosmetics: removing useless brackets
---
.../tools/cmdline/coref/CoreferencerTool.java | 26 ++--
.../java/opennlp/tools/coref/AbstractParse.java | 1 -
.../main/java/opennlp/tools/coref/CorefParse.java | 7 +-
.../java/opennlp/tools/coref/DiscourseModel.java | 15 ++-
.../tools/coref/dictionary/DictionaryFactory.java | 10 +-
.../tools/coref/dictionary/JWNLDictionary.java | 20 +--
.../opennlp/tools/coref/linker/AbstractLinker.java | 65 +++++-----
.../opennlp/tools/coref/linker/DefaultLinker.java | 7 +-
.../tools/coref/mention/AbstractMentionFinder.java | 101 ++++++---------
.../tools/coref/mention/MentionContext.java | 128 ++++---------------
.../opennlp/tools/coref/mention/PTBHeadFinder.java | 63 +++++-----
.../coref/mention/ShallowParseMentionFinder.java | 38 +-----
.../tools/coref/resolver/AbstractResolver.java | 6 +-
.../resolver/DefaultNonReferentialResolver.java | 3 +-
.../opennlp/tools/coref/resolver/IsAResolver.java | 138 ++++-----------------
.../tools/coref/resolver/MaxentResolver.java | 38 +++---
.../coref/resolver/PluralPronounResolver.java | 13 +-
.../tools/coref/resolver/ProperNounResolver.java | 13 +-
.../tools/coref/resolver/ResolverUtils.java | 52 ++++----
.../coref/resolver/SingularPronounResolver.java | 22 ++--
.../coref/resolver/SpeechPronounResolver.java | 11 +-
.../main/java/opennlp/tools/coref/sim/Context.java | 6 +-
.../java/opennlp/tools/coref/sim/GenderModel.java | 25 ++--
.../tools/coref/sim/MaxentCompatibilityModel.java | 12 +-
.../java/opennlp/tools/coref/sim/NumberModel.java | 7 +-
.../opennlp/tools/coref/sim/SimilarityModel.java | 58 ++++-----
.../formats/muc/MucMentionInserterStream.java | 27 ++--
.../tools/lang/english/TreebankNameFinder.java | 33 +++--
28 files changed, 374 insertions(+), 571 deletions(-)
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/CoreferencerTool.java
b/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/CoreferencerTool.java
index 6827d16..9cab3ee 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/CoreferencerTool.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/CoreferencerTool.java
@@ -44,8 +44,13 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
public class CoreferencerTool extends BasicCmdLineTool {
+ private static final Logger logger =
LoggerFactory.getLogger(CoreferencerTool.class);
+
static class CorefParse {
private final Map<Parse, Integer> parseMap;
@@ -131,9 +136,8 @@ public class CoreferencerTool extends BasicCmdLineTool {
String line;
while ((line = lineStream.read()) != null) {
- if (line.equals("")) {
- DiscourseEntity[] entities =
- treebankLinker.getEntities(document.toArray(new Mention[0]));
+ if (line.isEmpty()) {
+ DiscourseEntity[] entities =
treebankLinker.getEntities(document.toArray(new Mention[0]));
//showEntities(entities);
new CorefParse(parses, entities).show();
sentenceNumber = 0;
@@ -143,31 +147,25 @@ public class CoreferencerTool extends BasicCmdLineTool {
else {
Parse p = Parse.parseParse(line);
parses.add(p);
- Mention[] extents =
- treebankLinker.getMentionFinder().getMentions(new
DefaultParse(p,sentenceNumber));
- //construct new parses for mentions which don't have constituents.
+ Mention[] extents =
treebankLinker.getMentionFinder().getMentions(new
DefaultParse(p,sentenceNumber));
+ // construct new parses for mentions which don't have constituents.
for (Mention extent : extents) {
- //System.err.println("PennTreebankLiner.main: "+ei+"
"+extents[ei]);
-
+ logger.debug("Constructing new parse for: {}", extent);
if (extent.getParse() == null) {
- //not sure how to get head index, but it's not used at this
point.
+ // not sure how to get head index, but it's not used at this
point.
Parse snp = new Parse(p.getText(), extent.getSpan(), "NML",
1.0, 0);
p.insert(snp);
extent.setParse(new DefaultParse(snp, sentenceNumber));
}
-
}
document.addAll(Arrays.asList(extents));
sentenceNumber++;
}
-
perfMon.incrementCounter();
}
- }
- catch (IOException e) {
+ } catch (IOException e) {
CmdLineUtil.handleStdinIoError(e);
}
-
perfMon.stopAndPrintFinalResult();
}
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/AbstractParse.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/AbstractParse.java
index 74ef1e8..56ab639 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/AbstractParse.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/AbstractParse.java
@@ -49,7 +49,6 @@ public abstract class AbstractParse implements Parse {
while (!parts.isEmpty()) {
List<Parse> newParts = new ArrayList<>();
for (Parse cp : parts) {
- //System.err.println("AbstractParse.getNounPhrases
"+parts.get(pi).getClass());
if (cp.isNounPhrase()) {
nps.add(cp);
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/CorefParse.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/CorefParse.java
index 3c3f579..c0b3885 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/CorefParse.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/CorefParse.java
@@ -26,6 +26,8 @@ import opennlp.tools.coref.mention.MentionContext;
import opennlp.tools.parser.Parse;
import opennlp.tools.parser.chunking.Parser;
import opennlp.tools.util.Span;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* A container class encapsulating results of a co-reference parse operation.
@@ -35,6 +37,8 @@ import opennlp.tools.util.Span;
*/
public class CorefParse {
+ private static final Logger logger =
LoggerFactory.getLogger(CorefParse.class);
+
private final Map<Parse, Integer> parseMap;
private final List<Parse> parses;
@@ -47,7 +51,7 @@ public class CorefParse {
MentionContext mc = mi.next();
Parse mentionParse = ((DefaultParse) mc.getParse()).getParse();
parseMap.put(mentionParse, ei + 1);
- //System.err.println("CorefParse: "+mc.getParse().hashCode()+" -> "+
(ei+1));
+ logger.debug("CorefParse: {} -> {}", mc.getParse().hashCode(), ei+1);
}
}
}
@@ -79,7 +83,6 @@ public class CorefParse {
if (parseMap.containsKey(p)) {
System.out.print("#" + parseMap.get(p));
}
- //System.out.print(p.hashCode()+"-"+parseMap.containsKey(p));
System.out.print(" ");
}
Parse[] children = p.getChildren();
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/DiscourseModel.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/DiscourseModel.java
index 1ae6bb8..d2b3da3 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/DiscourseModel.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/DiscourseModel.java
@@ -22,6 +22,8 @@ import java.util.Iterator;
import java.util.List;
import opennlp.tools.coref.mention.MentionContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Represents the {@link DiscourseElement elements} which are part of a
discourse.
@@ -31,6 +33,8 @@ import opennlp.tools.coref.mention.MentionContext;
*/
public class DiscourseModel {
+ private static final Logger logger =
LoggerFactory.getLogger(DiscourseModel.class);
+
private final List<DiscourseEntity> entities;
private int nextEntityId = 1;
@@ -43,16 +47,15 @@ public class DiscourseModel {
}
/**
- * Indicates that the specified entity has been mentioned.
+ * Indicates that the specified {@link DiscourseEntity} has been mentioned.
*
* @param e The entity which has been mentioned.
*/
public void mentionEntity(DiscourseEntity e) {
if (entities.remove(e)) {
- entities.add(0,e);
- }
- else {
- System.err.println("DiscourseModel.mentionEntity: failed to remove " +
e);
+ entities.add(0, e);
+ } else {
+ logger.warn("Failed to remove {}", e);
}
}
@@ -93,8 +96,8 @@ public class DiscourseModel {
for (Iterator<MentionContext> ei = e2.getMentions(); ei.hasNext();) {
e1.addMention(ei.next());
}
- //System.err.println("DiscourseModel.mergeEntities: removing "+e2);
entities.remove(e2);
+ logger.debug("Removed entity during entity merge operation: {}", e2);
}
/**
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/dictionary/DictionaryFactory.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/dictionary/DictionaryFactory.java
index 1e2ac3a..b35ec8f 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/dictionary/DictionaryFactory.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/dictionary/DictionaryFactory.java
@@ -18,6 +18,9 @@
package opennlp.tools.coref.dictionary;
import net.sf.extjwnl.JWNLException;
+import opennlp.tools.coref.linker.AbstractLinker;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.IOException;
@@ -28,6 +31,8 @@ import java.io.IOException;
*/
public class DictionaryFactory {
+ private static final Logger logger =
LoggerFactory.getLogger(DictionaryFactory.class);
+
private static Dictionary dictionary;
/**
@@ -37,9 +42,8 @@ public class DictionaryFactory {
if (dictionary == null) {
try {
dictionary = new JWNLDictionary();
- }
- catch (IOException | JWNLException e) {
- System.err.println(e);
+ } catch (JWNLException e) {
+ logger.error(e.getLocalizedMessage(), e);
}
}
return dictionary;
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/dictionary/JWNLDictionary.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/dictionary/JWNLDictionary.java
index 8236fc0..07abe85 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/dictionary/JWNLDictionary.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/dictionary/JWNLDictionary.java
@@ -17,7 +17,6 @@
package opennlp.tools.coref.dictionary;
-import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@@ -27,19 +26,23 @@ import net.sf.extjwnl.data.POS;
import net.sf.extjwnl.data.Pointer;
import net.sf.extjwnl.data.PointerType;
import net.sf.extjwnl.data.Synset;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
- * An implementation of the Dictionary interface using the JWNL library.
+ * An implementation of the {@link Dictionary} using the JWNL library.
*
* @see Dictionary
*/
public class JWNLDictionary implements Dictionary {
+ private static final Logger logger =
LoggerFactory.getLogger(JWNLDictionary.class);
+
private final net.sf.extjwnl.dictionary.Dictionary dict;
private final net.sf.extjwnl.dictionary.MorphologicalProcessor morphy;
private static final String[] EMPTY = new String[0];
- public JWNLDictionary() throws IOException, JWNLException {
+ public JWNLDictionary() throws JWNLException {
dict = net.sf.extjwnl.dictionary.Dictionary.getDefaultResourceInstance();
morphy = dict.getMorphologicalProcessor();
}
@@ -67,22 +70,22 @@ public class JWNLDictionary implements Dictionary {
return lemmas.toArray(new String[0]);
}
catch (JWNLException e) {
- e.printStackTrace();
+ logger.warn("Problem retrieving lemmas for word '{}' (tag='{}'): {}",
word, tag, e.getMessage());
return null;
}
}
@Override
- public String getSenseKey(String lemma, String pos,int sense) {
+ public String getSenseKey(String lemma, String pos, int sense) {
try {
- IndexWord iw = dict.getIndexWord(POS.NOUN,lemma);
+ IndexWord iw = dict.getIndexWord(POS.NOUN, lemma);
if (iw == null) {
return null;
}
return String.valueOf(iw.getSynsetOffsets()[sense]);
}
catch (JWNLException e) {
- e.printStackTrace();
+ logger.warn("Problem retrieving sense key for lemma '{}': {}", lemma,
e.getMessage());
return null;
}
}
@@ -97,6 +100,7 @@ public class JWNLDictionary implements Dictionary {
return iw.getSynsetOffsets().length;
}
catch (JWNLException e) {
+ logger.warn("Problem retrieving number of senses for lemma '{}': {}",
lemma, e.getMessage());
return 0;
}
}
@@ -127,7 +131,7 @@ public class JWNLDictionary implements Dictionary {
}
}
catch (JWNLException e) {
- e.printStackTrace();
+ logger.warn("Problem retrieving parent sense keys for lemma '{}'
(pos='{}'): {}", lemma, pos, e.getMessage());
return null;
}
}
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/linker/AbstractLinker.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/linker/AbstractLinker.java
index e4f770d..7ca95b7 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/linker/AbstractLinker.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/linker/AbstractLinker.java
@@ -30,12 +30,17 @@ import opennlp.tools.coref.resolver.AbstractResolver;
import opennlp.tools.coref.sim.Gender;
import opennlp.tools.coref.sim.Number;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
/**
* Provides a default implementation of many of the methods in {@link Linker}
that
* most implementations of {@link Linker} will want to extend.
*/
public abstract class AbstractLinker implements Linker {
+ private static final Logger logger =
LoggerFactory.getLogger(AbstractLinker.class);
+
/** The mention finder used to find mentions. */
protected MentionFinder mentionFinder;
@@ -97,7 +102,6 @@ public abstract class AbstractLinker implements Linker {
* @param discourseModel The discourse model of existing entities.
*/
protected void resolve(MentionContext mention, DiscourseModel
discourseModel) {
- //System.err.println("AbstractLinker.resolve: "+mode+"("+econtext.id+")
"+econtext.toText());
boolean validEntity = true; // true if we should add this entity to the dm
boolean canResolve = false;
@@ -119,7 +123,7 @@ public abstract class AbstractLinker implements Linker {
//eval.update(rde == entities[ri], ri, entities[ri], rde);
}
else {
- System.err.println("AbstractLinker.Unknown mode: " + mode);
+ logger.warn("Invalid linker mode '{}' detected during resolve in
AbstractLinker", mode);
}
if (ri == SINGULAR_PRONOUN && entities[ri] == null) {
validEntity = false;
@@ -130,7 +134,8 @@ public abstract class AbstractLinker implements Linker {
}
}
if (!canResolve && removeUnresolvedMentions) {
- //System.err.println("No resolver for: "+econtext.toText()
+ // What is / was econtext here ?
+ //logger.debug("No resolver for: "+econtext.toText()
// + " head="+econtext.headTokenText+" "+econtext.headTokenTag);
validEntity = false;
}
@@ -148,45 +153,40 @@ public abstract class AbstractLinker implements Linker {
/**
* Updates the specified discourse model with the specified mention as
coreferent with the specified entity.
* @param dm The discourse model
- * @param mention The mention to be added to the specified entity.
+ * @param m The mention to be added to the specified entity.
* @param entity The entity which is mentioned by the specified mention.
- * @param useDiscourseModel Whether the mentions should be kept as an entiy
or simply co-indexed.
+ * @param useDiscourseModel Whether the mentions should be kept as an entity
or simply co-indexed.
*/
- protected void updateExtent(DiscourseModel dm, MentionContext mention,
DiscourseEntity entity,
+ protected void updateExtent(DiscourseModel dm, MentionContext m,
DiscourseEntity entity,
boolean useDiscourseModel) {
if (useDiscourseModel) {
if (entity != null) {
- //System.err.println("AbstractLinker.updateExtent: addingExtent:
- // "+econtext.toText());
- if (entity.getGenderProbability() < mention.getGenderProb()) {
- entity.setGender(mention.getGender());
- entity.setGenderProbability(mention.getGenderProb());
+ logger.debug("Adding extent: {}", m.toText());
+ if (entity.getGenderProbability() < m.getGenderProb()) {
+ entity.setGender(m.getGender());
+ entity.setGenderProbability(m.getGenderProb());
}
- if (entity.getNumberProbability() < mention.getNumberProb()) {
- entity.setNumber(mention.getNumber());
- entity.setNumberProbability(mention.getNumberProb());
+ if (entity.getNumberProbability() < m.getNumberProb()) {
+ entity.setNumber(m.getNumber());
+ entity.setNumberProbability(m.getNumberProb());
}
- entity.addMention(mention);
+ entity.addMention(m);
dm.mentionEntity(entity);
- }
- else {
- //System.err.println("AbstractLinker.updateExtent: creatingExtent:
- // "+econtext.toText()+" "+econtext.gender+" "+econtext.number);
- entity = new DiscourseEntity(mention, mention.getGender(),
mention.getGenderProb(),
- mention.getNumber(), mention.getNumberProb());
+ } else {
+ logger.debug("Creating Extent: {} {} {}", m.toText(), m.getGender(),
m.getNumber());
+ entity = new DiscourseEntity(m, m.getGender(), m.getGenderProb(),
m.getNumber(), m.getNumberProb());
dm.addEntity(entity);
}
- }
- else {
+ } else {
if (entity != null) {
- DiscourseEntity newEntity = new DiscourseEntity(mention,
mention.getGender(),
- mention.getGenderProb(), mention.getNumber(),
mention.getNumberProb());
+ DiscourseEntity newEntity =
+ new DiscourseEntity(m, m.getGender(), m.getGenderProb(),
m.getNumber(), m.getNumberProb());
dm.addEntity(newEntity);
newEntity.setId(entity.getId());
}
else {
- DiscourseEntity newEntity = new DiscourseEntity(mention,
mention.getGender(),
- mention.getGenderProb(), mention.getNumber(),
mention.getNumberProb());
+ DiscourseEntity newEntity =
+ new DiscourseEntity(m, m.getGender(), m.getGenderProb(),
m.getNumber(), m.getNumberProb());
dm.addEntity(newEntity);
}
}
@@ -215,7 +215,7 @@ public abstract class AbstractLinker implements Linker {
MentionContext[] extentContexts = this.constructMentionContexts(mentions);
DiscourseModel dm = new DiscourseModel();
for (MentionContext extentContext : extentContexts) {
- //System.err.println(ei+" "+extentContexts[ei].toText());
+ logger.debug("{}", extentContext.toText());
resolve(extentContext, dm);
}
return (dm.getEntities());
@@ -246,9 +246,9 @@ public abstract class AbstractLinker implements Linker {
MentionContext[] contexts = new MentionContext[mentions.length];
for (int mi = 0,mn = mentions.length;mi < mn; mi++) {
Parse mentionParse = mentions[mi].getParse();
- //System.err.println("AbstractLinker.constructMentionContexts:
mentionParse="+mentionParse);
+ logger.debug("Constructing MentionContexts: mentionParse = {}",
mentionParse);
if (mentionParse == null) {
- System.err.println("no parse for " + mentions[mi]);
+ logger.warn("no parse for {}", mentions[mi]);
}
int sentenceIndex = mentionParse.getSentenceNumber();
if (sentenceIndex != prevSentenceIndex) {
@@ -264,9 +264,8 @@ public abstract class AbstractLinker implements Linker {
}
contexts[mi] = new MentionContext(mentions[mi], mentionInSentenceIndex,
numMentionsInSentence, mi, sentenceIndex, getHeadFinder());
- //System.err.println("AbstractLinker.constructMentionContexts: mi="+mi
- // +" sn="+mentionParse.getSentenceNumber()+" extent="+mentions[mi]+"
parse="
- // +mentionParse.getSpan()+" mc="+contexts[mi].toText());
+ logger.debug("Constructing MentionContexts:: mi={} sn={} extent={}
parse={} mc={}",
+ mi, mentionParse.getSentenceNumber(), mentions[mi],
mentionParse.getSpan(), contexts[mi].toText());
contexts[mi].setId(mentions[mi].getId());
mentionInSentenceIndex++;
if (mode != LinkerMode.SIM) {
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/linker/DefaultLinker.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/linker/DefaultLinker.java
index bb6c51b..1379ee3 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/linker/DefaultLinker.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/linker/DefaultLinker.java
@@ -42,6 +42,9 @@ import opennlp.tools.coref.sim.MaxentCompatibilityModel;
import opennlp.tools.coref.sim.Number;
import opennlp.tools.coref.sim.SimilarityModel;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
/**
* This class perform coreference for treebank style parses or for noun-phrase
chunked data.
* <p>
@@ -53,6 +56,8 @@ import opennlp.tools.coref.sim.SimilarityModel;
*/
public class DefaultLinker extends AbstractLinker {
+ private static final Logger logger =
LoggerFactory.getLogger(DefaultLinker.class);
+
protected MaxentCompatibilityModel mcm;
/**
@@ -164,7 +169,7 @@ public class DefaultLinker extends AbstractLinker {
resolvers[8] = new PerfectResolver();
}
else {
- System.err.println("DefaultLinker: Invalid Mode");
+ logger.warn("Invalid linker mode '{}' detected during creation of
DefaultLinker", mode);
}
}
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractMentionFinder.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractMentionFinder.java
index 6c2b16c..8413bdf 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractMentionFinder.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/AbstractMentionFinder.java
@@ -31,11 +31,16 @@ import opennlp.tools.coref.linker.Linker;
import opennlp.tools.coref.resolver.ResolverUtils;
import opennlp.tools.util.Span;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
/**
* Provides default implementation of many of the methods in the {@link
MentionFinder} interface.
*/
public abstract class AbstractMentionFinder implements MentionFinder {
+ private static final Logger logger =
LoggerFactory.getLogger(AbstractMentionFinder.class);
+
protected HeadFinder headFinder;
protected boolean collectPrenominalNamedEntities;
@@ -43,18 +48,18 @@ public abstract class AbstractMentionFinder implements
MentionFinder {
private void gatherHeads(Parse p, Map<Parse, Parse> heads) {
Parse head = headFinder.getHead(p);
- //System.err.println("AbstractMention.gatherHeads: "+head+" ->
("+p.hashCode()+") "+p);
- //if (head != null) {
System.err.println("head.hashCode()="+head.hashCode());}
+ logger.debug("Gathering Heads: {} -> ({}) {}", head, p.hashCode(), p);
if (head != null) {
+ logger.debug("head.hashCode() = {}", head.hashCode());
heads.put(head, p);
}
}
/**
* Assigns head relations between noun phrases and the child np
- * which is their head.
- * @param nps List of valid nps for this mention finder.
- * @return mapping from noun phrases and the child np which is their head
+ * which is their head.
+ * @param nps List of valid nps for this mention finder.
+ * @return mapping from noun phrases and the child np which is their head
*/
protected Map<Parse, Parse> constructHeadMap(List<Parse> nps) {
Map<Parse, Parse> headMap = new HashMap<>();
@@ -75,7 +80,7 @@ public abstract class AbstractMentionFinder implements
MentionFinder {
}
protected boolean isBasalNounPhrase(Parse np) {
- return np.getNounPhrases().size() == 0;
+ return np.getNounPhrases().isEmpty();
}
protected boolean isPossessive(Parse np) {
@@ -138,7 +143,7 @@ public abstract class AbstractMentionFinder implements
MentionFinder {
}
private void collectCoordinatedNounPhraseMentions(Parse np, List<Mention>
entities) {
- //System.err.println("collectCoordNp: "+np);
+ logger.trace("collectCoordNp: {}", np);
//exclude nps with UCPs inside.
List<Parse> sc = np.getSyntacticChildren();
for (Parse scp : sc) {
@@ -159,8 +164,8 @@ public abstract class AbstractMentionFinder implements
MentionFinder {
npTokens.get(lastNpTokenIndex).getSpan().getEnd());
Mention snpExtent = new Mention(npSpan, npSpan, tok.getEntityId(),
null,"CNP");
entities.add(snpExtent);
- //System.err.println("adding extent for conjunction in: "+np+"
preeceeded by "
- // +((Parse) npTokens.get(ti-1)).getSyntacticType());
+ logger.debug("Adding extent for conjunction in: {} preceded by {}",
+ np, npTokens.get(ti-1).getSyntacticType());
inCoordinatedNounPhrase = true;
}
else {
@@ -175,7 +180,7 @@ public abstract class AbstractMentionFinder implements
MentionFinder {
npTokens.get(lastNpTokenIndex).getSpan().getEnd());
Mention snpExtent = new Mention(npSpan, npSpan, tok.getEntityId(),
null,"CNP");
entities.add(snpExtent);
- //System.err.println("adding extent for comma in: "+np);
+ logger.debug("Adding extent for comma in: {}", np);
}
lastNpTokenIndex = ti - 1;
}
@@ -184,7 +189,7 @@ public abstract class AbstractMentionFinder implements
MentionFinder {
npTokens.get(lastNpTokenIndex).getSpan().getEnd());
Mention snpExtent = new Mention(npSpan, npSpan, tok.getEntityId(),
null,"CNP");
entities.add(snpExtent);
- //System.err.println("adding extent for start coord in: "+np);
+ logger.debug("Adding extent for start coord in: {}", np);
}
}
}
@@ -195,13 +200,13 @@ public abstract class AbstractMentionFinder implements
MentionFinder {
ResolverUtils.SPEECH_PRONOUN_PATTERN.matcher(tok).find();
}
- private void collectPossesivePronouns(Parse np, List<Mention> entities) {
- //TODO: Look at how training is done and examine whether this is
- // needed or can be accomidated in a different way.
+ private void collectPossessivePronouns(Parse np, List<Mention> entities) {
+ //XXX: Look at how training is done and examine whether this is
+ // needed or can be accommodated in a different way.
/*
List snps = np.getSubNounPhrases();
if (snps.size() != 0) {
- //System.err.println("AbstractMentionFinder: Found existing snps");
+ logger.trace("Found existing snps");
for (int si = 0, sl = snps.size(); si < sl; si++) {
Parse snp = (Parse) snps.get(si);
Extent ppExtent = new Extent(snp.getSpan(), snp.getSpan(),
snp.getEntityId(),
@@ -211,7 +216,7 @@ public abstract class AbstractMentionFinder implements
MentionFinder {
}
else {
*/
- //System.err.println("AbstractEntityFinder.collectPossesivePronouns:
"+np);
+ logger.debug("CollectPossessivePronouns: {}", np);
List<Parse> npTokens = np.getTokens();
Parse headToken = headFinder.getHeadToken(np);
for (int ti = npTokens.size() - 2; ti >= 0; ti--) {
@@ -222,10 +227,9 @@ public abstract class AbstractMentionFinder implements
MentionFinder {
if (tok.getSyntacticType().startsWith("PRP") &&
handledPronoun(tok.toString())) {
Mention ppExtent = new Mention(tok.getSpan(), tok.getSpan(),
tok.getEntityId(), null,Linker.PRONOUN_MODIFIER);
- //System.err.println("AbstractEntityFinder.collectPossesivePronouns:
adding possesive pronoun: "
- // +tok+" "+tok.getEntityId());
+ logger.debug("CollectPossessivePronouns: adding possessive pronoun:
{} {}", tok, tok.getEntityId());
entities.add(ppExtent);
- //System.err.println("AbstractMentionFinder: adding pos-pro:
"+ppExtent);
+ logger.debug("Adding pos-pro: {}", ppExtent);
break;
}
}
@@ -260,23 +264,22 @@ public abstract class AbstractMentionFinder implements
MentionFinder {
private void clearMentions(Set<Parse> mentions, Parse np) {
Span npSpan = np.getSpan();
- //System.err.println("clearing "+mention+" for "+np);
+ logger.debug("Clearing {} for {}", mentions, np);
mentions.removeIf(mention -> !mention.getSpan().contains(npSpan));
}
private Mention[] collectMentions(List<Parse> nps, Map<Parse, Parse>
headMap) {
List<Mention> mentions = new ArrayList<>(nps.size());
Set<Parse> recentMentions = new HashSet<>();
- //System.err.println("AbtractMentionFinder.collectMentions: "+headMap);
+ logger.debug("CollectMentions: {}", headMap);
for (Parse np : nps) {
- //System.err.println("AbstractMentionFinder: collectMentions: np[" + npi
+ "]="
- // + np + " head=" + headMap.get(np));
+ logger.debug("CollectMentions: {} head={}", np, headMap.get(np));
if (!isHeadOfExistingMention(np, headMap, recentMentions)) {
clearMentions(recentMentions, np);
if (!isPartOfName(np)) {
Parse head = headFinder.getLastHead(np);
Mention extent = new Mention(np.getSpan(), head.getSpan(),
head.getEntityId(), np, null);
- //System.err.println("adding "+np+" with head "+head);
+ logger.debug("Adding {} with head {}", np, head);
mentions.add(extent);
recentMentions.add(np);
// determine name-entity type
@@ -285,12 +288,10 @@ public abstract class AbstractMentionFinder implements
MentionFinder {
extent.setNameType(entityType);
}
} else {
- //System.err.println(
- // "AbstractMentionFinder.collectMentions excluding np as part of
name. np=" + np);
+ logger.debug("CollectMentions excluding np as part of name. np={}",
np);
}
} else {
- //System.err.println(
- // "AbstractMentionFinder.collectMentions excluding np as head of
previous mention. np=" + np);
+ logger.debug("CollectMentions excluding np as head of previous
mention. np={}", np);
}
if (isBasalNounPhrase(np)) {
if (collectPrenominalNamedEntities) {
@@ -299,7 +300,7 @@ public abstract class AbstractMentionFinder implements
MentionFinder {
if (collectCoordinatedNounPhrases) {
collectCoordinatedNounPhraseMentions(np, mentions);
}
- collectPossesivePronouns(np, mentions);
+ collectPossessivePronouns(np, mentions);
} else {
// Could use to get NP -> tokens CON structures for basal nps
including NP -> NAC tokens
//collectComplexNounPhrases(np,mentions);
@@ -310,37 +311,13 @@ public abstract class AbstractMentionFinder implements
MentionFinder {
return mentions.toArray(new Mention[0]);
}
- /*
- * Adds a mention for the non-treebank-labeled possesive noun phrases.
- * @param possesiveNounPhrase The possesive noun phase which may require an
additional mention.
- * @param mentions The list of mentions into which a new mention can be
added.
- */
-// private void addPossesiveMentions(Parse possesiveNounPhrase, List<Mention>
mentions) {
-// List<Parse> kids = possesiveNounPhrase.getSyntacticChildren();
-// if (kids.size() >1) {
-// Parse firstToken = kids.get(1);
-// if (firstToken.isToken() &&
!firstToken.getSyntacticType().equals("POS")) {
-// Parse lastToken = kids.get(kids.size()-1);
-// if (lastToken.isToken()) {
-// Span extentSpan = new
Span(firstToken.getSpan().getStart(),lastToken.getSpan().getEnd());
-// Mention extent = new Mention(extentSpan, extentSpan, -1, null,
null);
-// mentions.add(extent);
-// }
-// else {
-// System.err.println("AbstractMentionFinder.addPossesiveMentions:
odd parse structure: "
-// +possesiveNounPhrase);
-// }
-// }
-// }
-// }
-
private void collectPrenominalNamedEntities(Parse np, List<Mention> extents)
{
Parse htoken = headFinder.getHeadToken(np);
List<Parse> nes = np.getNamedEntities();
Span headTokenSpan = htoken.getSpan();
for (Parse ne : nes) {
if (!ne.getSpan().contains(headTokenSpan)) {
- //System.err.println("adding extent for prenominal ne: "+ne);
+ logger.debug("Adding extent for prenominal ne: {}", ne);
Mention extent = new Mention(ne.getSpan(), ne.getSpan(),
ne.getEntityId(), null, "NAME");
extent.setNameType(ne.getEntityType());
extents.add(extent);
@@ -375,9 +352,9 @@ public abstract class AbstractMentionFinder implements
MentionFinder {
String entityType;
for (Parse parent = np.getParent(); parent != null; parent =
parent.getParent()) {
entityType = parent.getEntityType();
- //System.err.println("AbstractMentionFinder.isPartOfName:
entityType="+entityType);
+ logger.debug("IsPartOfName: entityType={}", entityType);
if (entityType != null) {
- //System.err.println("npSpan = "+np.getSpan()+"
parentSpan="+parent.getSpan());
+ logger.debug("npSpan={} parentSpan={}", np.getSpan(),
parent.getSpan());
if (!np.getSpan().contains(parent.getSpan())) {
return true;
}
@@ -389,12 +366,6 @@ public abstract class AbstractMentionFinder implements
MentionFinder {
return false;
}
- /** Return all noun phrases which are contained by <code>p</code>.
- * @param p The parse in which to find the noun phrases.
- * @return A list of <code>Parse</code> objects which are noun phrases
contained by <code>p</code>.
- */
- //protected abstract List getNounPhrases(Parse p);
-
public List<Parse> getNamedEntities(Parse p) {
return p.getNamedEntities();
}
@@ -404,10 +375,8 @@ public abstract class AbstractMentionFinder implements
MentionFinder {
List<Parse> nps = p.getNounPhrases();
Collections.sort(nps);
Map<Parse, Parse> headMap = constructHeadMap(nps);
- //System.err.println("AbstractMentionFinder.getMentions: got " +
nps.size()); // + " nps, and "
- // + nes.size() + " named entities");
- Mention[] mentions = collectMentions(nps, headMap);
- return mentions;
+ logger.debug("GetMentions: got {} named entities", nps.size());
+ return collectMentions(nps, headMap);
}
@Override
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/MentionContext.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/MentionContext.java
index 610b73a..14bcd93 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/MentionContext.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/MentionContext.java
@@ -19,17 +19,22 @@ package opennlp.tools.coref.mention;
import java.util.List;
+import opennlp.tools.coref.resolver.AbstractResolver;
import opennlp.tools.coref.sim.Context;
import opennlp.tools.coref.sim.GenderEnum;
import opennlp.tools.coref.sim.NumberEnum;
import opennlp.tools.util.Span;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Data structure representation of a mention with additional contextual
information.
* The contextual information is used in performing coreference resolution.
*/
public class MentionContext extends Context {
-
+
+ private static final Logger logger =
LoggerFactory.getLogger(MentionContext.class);
+
/**
* The index of first token which is not part of a descriptor. This is 0 if
no descriptor is present.
*/
@@ -123,7 +128,7 @@ public class MentionContext extends Context {
public MentionContext(Span span, Span headSpan, int entityId, Parse parse,
String extentType,
String nameType, int mentionIndex, int
mentionsInSentence,
int mentionIndexInDocument, int sentenceIndex,
HeadFinder headFinder) {
- super(span,headSpan,entityId,parse,extentType,nameType,headFinder);
+ super(span, headSpan, entityId, parse, extentType, nameType, headFinder);
nounLocation = mentionIndex;
maxNounLocation = mentionsInSentence;
nounNumber = mentionIndexInDocument;
@@ -135,14 +140,15 @@ public class MentionContext extends Context {
List<Parse> headTokens = head.getTokens();
tokens = headTokens.toArray(new Parse[0]);
basalNextToken = head.getNextToken();
- //System.err.println("MentionContext.init: "+ent+" "+ent.getEntityId()+"
head="+head);
+ logger.debug("Constructing MentionContext for '{}' id={} head={}", parse,
parse.getEntityId(), head);
nonDescriptorStart = 0;
initHeads(headFinder.getHeadIndex(head));
gender = GenderEnum.UNKNOWN;
- this.genderProb = 0d;
number = NumberEnum.UNKNOWN;
+ this.genderProb = 0d;
this.numberProb = 0d;
}
+
/**
* Constructs context information for the specified mention.
*
@@ -160,51 +166,6 @@ public class MentionContext extends Context {
mentionIndexInDocument, sentenceIndex, headFinder);
}
-
- /*
- * Constructs context information for the specified mention.
- *
- * @param mentionParse Mention parse structure for which context is to be
constructed.
- * @param mentionIndex mention position in sentence.
- * @param mentionsInSentence Number of mentions in the sentence.
- * @param mentionsInDocument Number of mentions in the document.
- * @param sentenceIndex Sentence number for this mention.
- * @param nameType The named-entity type for this mention.
- * @param headFinder Object which provides head information.
- */
- /*
- public MentionContext(Parse mentionParse, int mentionIndex, int
mentionsInSentence,
- int mentionsInDocument, int sentenceIndex, String nameType, HeadFinder
headFinder) {
- nounLocation = mentionIndex;
- maxNounLocation = mentionsInDocument;
- sentenceNumber = sentenceIndex;
- parse = mentionParse;
- indexSpan = mentionParse.getSpan();
- prevToken = mentionParse.getPreviousToken();
- nextToken = mentionParse.getNextToken();
- head = headFinder.getLastHead(mentionParse);
- List headTokens = head.getTokens();
- tokens = (Parse[]) headTokens.toArray(new Parse[headTokens.size()]);
- basalNextToken = head.getNextToken();
- //System.err.println("MentionContext.init: "+ent+" "+ent.getEntityId()+"
head="+head);
- indexHeadSpan = head.getSpan();
- nonDescriptorStart = 0;
- initHeads(headFinder.getHeadIndex(head));
- this.neType= nameType;
- if (getHeadTokenTag().startsWith("NN") &&
!getHeadTokenTag().startsWith("NNP")) {
- //if (headTokenTag.startsWith("NNP") && neType != null) {
- this.synsets = getSynsetSet(this);
- }
- else {
- this.synsets=Collections.EMPTY_SET;
- }
- gender = GenderEnum.UNKNOWN;
- this.genderProb = 0d;
- number = NumberEnum.UNKNOWN;
- this.numberProb = 0d;
- }
- */
-
private void initHeads(int headIndex) {
this.headTokenIndex = headIndex;
this.headToken = (Parse) tokens[getHeadTokenIndex()];
@@ -216,9 +177,7 @@ public class MentionContext extends Context {
}
/**
- * Returns the parse of the head token for this mention.
- *
- * @return the parse of the head token for this mention.
+ * @return Retrieves the parse of the head token for this mention.
*/
public Parse getHeadTokenParse() {
return headToken;
@@ -241,28 +200,24 @@ public class MentionContext extends Context {
}
/**
- * Returns a sentence-based token span for this mention. If this mention
consist
+ * Returns a sentence-based token span for this mention. If this mention
consist
* of the third, fourth, and fifth token, then this span will be 2..4.
*
- * @return a sentence-based token span for this mention.
+ * @return Retrieves a sentence-based token span for this mention.
*/
public Span getIndexSpan() {
return indexSpan;
}
/**
- * Returns the index of the noun phrase for this mention in a sentence.
- *
- * @return the index of the noun phrase for this mention in a sentence.
+ * @return Retrieves the index of the noun phrase for this mention in a
sentence.
*/
public int getNounPhraseSentenceIndex() {
return nounLocation;
}
/**
- * Returns the index of the noun phrase for this mention in a document.
- *
- * @return the index of the noun phrase for this mention in a document.
+ * @return Retrieves the index of the noun phrase for this mention in a
document.
*/
public int getNounPhraseDocumentIndex() {
return nounNumber;
@@ -291,36 +246,28 @@ public class MentionContext extends Context {
}
/**
- * Returns the index of the sentence which contains this mention.
- *
- * @return the index of the sentence which contains this mention.
+ * @return Retrieves the index of the sentence which contains this mention.
*/
public int getSentenceNumber() {
return sentenceNumber;
}
/**
- * Returns the parse for the first token in this mention.
- *
- * @return The parse for the first token in this mention.
+ * @return Retrieves the parse for the first token in this mention.
*/
public Parse getFirstToken() {
return firstToken;
}
/**
- * Returns the text for the first token of the mention.
- *
- * @return The text for the first token of the mention.
+ * @return Retrieves the text for the first token of the mention.
*/
public String getFirstTokenText() {
return firstTokenText;
}
/**
- * Returns the pos-tag of the first token of this mention.
- *
- * @return the pos-tag of the first token of this mention.
+ * @return Retrieves the pos-tag of the first token of this mention.
*/
public String getFirstTokenTag() {
return firstTokenTag;
@@ -344,27 +291,6 @@ public class MentionContext extends Context {
return parse.toString();
}
- /*
- private static String[] getLemmas(MentionContext xec) {
- //TODO: Try multi-word lemmas first.
- String word = xec.getHeadTokenText();
- return DictionaryFactory.getDictionary().getLemmas(word,"NN");
- }
-
- private static Set getSynsetSet(MentionContext xec) {
- //System.err.println("getting synsets for mention:"+xec.toText());
- Set synsetSet = new HashSet();
- String[] lemmas = getLemmas(xec);
- for (int li = 0; li < lemmas.length; li++) {
- String[] synsets =
DictionaryFactory.getDictionary().getParentSenseKeys(lemmas[li],"NN",0);
- for (int si=0,sn=synsets.length;si<sn;si++) {
- synsetSet.add(synsets[si]);
- }
- }
- return (synsetSet);
- }
- */
-
/**
* Assigns the specified gender with the specified probability to this
mention.
*
@@ -377,18 +303,14 @@ public class MentionContext extends Context {
}
/**
- * Returns the gender of this mention.
- *
- * @return The gender of this mention.
+ * @return Retrieves the gender of this mention.
*/
public GenderEnum getGender() {
return gender;
}
/**
- * Returns the probability associated with the gender assignment.
- *
- * @return The probability associated with the gender assignment.
+ * @return Retrieves the probability associated with the gender assignment.
*/
public double getGenderProb() {
return genderProb;
@@ -406,18 +328,14 @@ public class MentionContext extends Context {
}
/**
- * Returns the number of this mention.
- *
- * @return The number of this mention.
+ * @return Retrieves the number of this mention.
*/
public NumberEnum getNumber() {
return number;
}
/**
- * Returns the probability associated with the number assignment.
- *
- * @return The probability associated with the number assignment.
+ * @return Retrieves the probability associated with the number assignment.
*/
public double getNumberProb() {
return numberProb;
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/PTBHeadFinder.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/PTBHeadFinder.java
index dbbd25d..9cd50d6 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/PTBHeadFinder.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/PTBHeadFinder.java
@@ -17,18 +17,23 @@
package opennlp.tools.coref.mention;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
import java.util.HashSet;
import java.util.List;
import java.util.Set;
-
/**
* Finds head information from Penn Treebank style parses.
*/
public final class PTBHeadFinder implements HeadFinder {
+ private static final Logger logger =
LoggerFactory.getLogger(PTBHeadFinder.class);
+
private static PTBHeadFinder instance;
private static final Set<String> SKIP_SET = new HashSet<>();
+
static {
SKIP_SET.add("POS");
SKIP_SET.add(",");
@@ -42,8 +47,7 @@ public final class PTBHeadFinder implements HeadFinder {
private PTBHeadFinder() {}
/**
- * Returns an instance of this head finder.
- * @return an instance of this head finder.
+ * @return Retrieves an instance of this head finder.
*/
public static HeadFinder getInstance() {
if (instance == null) {
@@ -52,13 +56,14 @@ public final class PTBHeadFinder implements HeadFinder {
return instance;
}
+ @Override
public Parse getHead(Parse p) {
if (p == null) {
return null;
}
if (p.isNounPhrase()) {
List<Parse> parts = p.getSyntacticChildren();
- //shallow parse POS
+ // shallow parse POS
if (parts.size() > 2) {
Parse child0 = parts.get(0);
Parse child1 = parts.get(1);
@@ -68,13 +73,13 @@ public final class PTBHeadFinder implements HeadFinder {
return child2;
}
}
- //full parse POS
+ // full parse POS
if (parts.size() > 1) {
Parse child0 = parts.get(0);
if (child0.isNounPhrase()) {
List<Parse> ctoks = child0.getTokens();
- if (ctoks.size() == 0) {
- System.err.println("PTBHeadFinder: NP " + child0 + " with no
tokens");
+ if (ctoks.isEmpty()) {
+ logger.debug("NP {} with no tokens.", child0);
}
Parse tok = ctoks.get(ctoks.size() - 1);
if (tok.getSyntacticType().equals("POS")) {
@@ -82,7 +87,7 @@ public final class PTBHeadFinder implements HeadFinder {
}
}
}
- //coordinated nps are their own entities
+ // coordinated nps are their own entities
if (parts.size() > 1) {
for (int pi = 1; pi < parts.size() - 1; pi++) {
Parse child = parts.get(pi);
@@ -91,10 +96,9 @@ public final class PTBHeadFinder implements HeadFinder {
}
}
}
- //all other NPs
+ // all other NPs
for (Parse child : parts) {
- //System.err.println("PTBHeadFinder.getHead: "+p.getSyntacticType()+"
"+p
- // +" child "+pi+"="+child.getSyntacticType()+" "+child);
+ logger.debug("Getting head : {} {} - type {} child {}",
p.getSyntacticType(), p, child.getSyntacticType(), child);
if (child.isNounPhrase()) {
return child;
}
@@ -106,21 +110,21 @@ public final class PTBHeadFinder implements HeadFinder {
}
}
+ @Override
public int getHeadIndex(Parse p) {
List<Parse> sChildren = p.getSyntacticChildren();
boolean countTokens = false;
int tokenCount = 0;
- //check for NP -> NN S type structures and return last token before S as
head.
+ // check for NP -> NN S type structures and return last token before S as
head.
for (int sci = 0, scn = sChildren.size(); sci < scn;sci++) {
Parse sc = sChildren.get(sci);
- //System.err.println("PTBHeadFinder.getHeadIndex "+p+"
"+p.getSyntacticType()
- // +" sChild "+sci+" type = "+sc.getSyntacticType());
+ logger.debug("Getting head index: {} {} - sChild {} type {}", p,
p.getSyntacticType(), sci, sc.getSyntacticType());
if (sc.getSyntacticType().startsWith("S")) {
if (sci != 0) {
countTokens = true;
}
else {
- //System.err.println("PTBHeadFinder.getHeadIndex(): NP -> S
production assuming right-most head");
+ logger.debug("Getting head index: NP -> S production assuming
right-most head");
}
}
if (countTokens) {
@@ -128,8 +132,8 @@ public final class PTBHeadFinder implements HeadFinder {
}
}
List<Parse> toks = p.getTokens();
- if (toks.size() == 0) {
- System.err.println("PTBHeadFinder.getHeadIndex(): empty tok list for
parse " + p);
+ if (toks.isEmpty()) {
+ logger.debug("Empty tok list for parse {}", p);
}
for (int ti = toks.size() - tokenCount - 1; ti >= 0; ti--) {
Parse tok = toks.get(ti);
@@ -137,29 +141,32 @@ public final class PTBHeadFinder implements HeadFinder {
return ti;
}
}
- //System.err.println("PTBHeadFinder.getHeadIndex: "+p+"
hi="+toks.size()+"-"+tokenCount
- // +" -1 = "+(toks.size()-tokenCount -1));
return toks.size() - tokenCount - 1;
}
- /** Returns the bottom-most head of a <code>Parse</code>. If no
- head is available which is a child of <code>p</code> then
- <code>p</code> is returned. */
+ /**
+ * Returns the bottom-most head of a {@link Parse}.
+ * If no head is available which is a child of <code>p</code> then
+ * <code>p</code> is returned.
+ *
+ * @param p The parse to check for a bottom-most head.
+ */
+ @Override
public Parse getLastHead(Parse p) {
Parse head;
- //System.err.print("EntityFinder.getLastHead: "+p);
+ logger.debug("Getting last head: {}", p);
while (null != (head = getHead(p))) {
- //System.err.print(" -> "+head);
- //if (p.getEntityId() != -1 && head.getEntityId() != p.getEntityId()) {
- // System.err.println(p+" ("+p.getEntityId()+") -> "+head+"
("+head.getEntityId()+")");
- // }
+ logger.debug(" -> {}", head);
+ if (p.getEntityId() != -1 && head.getEntityId() != p.getEntityId()) {
+ logger.debug("{} ({}) -> {} ({})", p, p.getEntityId(), head,
head.getEntityId());
+ }
p = head;
}
- //System.err.println(" -> null");
return p;
}
+ @Override
public Parse getHeadToken(Parse p) {
List<Parse> toks = p.getTokens();
return toks.get(getHeadIndex(p));
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/ShallowParseMentionFinder.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/ShallowParseMentionFinder.java
index 659fdd8..eff4068 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/mention/ShallowParseMentionFinder.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/mention/ShallowParseMentionFinder.java
@@ -31,48 +31,16 @@ public class ShallowParseMentionFinder extends
AbstractMentionFinder {
}
/**
- * Retrieves the one and only existing instance.
- *
- * @param hf
- * @return one and only existing instance
+ * @param hf A valid {@link HeadFinder} to assign if no instance exists.
+ * @return Retrieves the one and only existing instance.
*/
public static ShallowParseMentionFinder getInstance(HeadFinder hf) {
if (instance == null) {
instance = new ShallowParseMentionFinder(hf);
- }
- else if (instance.headFinder != hf) {
+ } else if (instance.headFinder != hf) {
instance = new ShallowParseMentionFinder(hf);
}
return instance;
}
- /*
- protected final List getNounPhrases(Parse p) {
- List nps = p.getNounPhrases();
- List basals = new ArrayList();
- for (int ni=0,ns=nps.size();ni<ns;ni++) {
- Parse np = (Parse) nps.get(ni);
- //System.err.println("getNounPhrases: np="+np);
- if (isBasalNounPhrase(np)) {
- //System.err.println("basal");
- basals.add(np);
- }
- else if (isPossessive(np)) {
- //System.err.println("pos np");
- basals.add(np);
- basals.addAll(getNounPhrases(np));
- }
- else if (isOfPrepPhrase(np)) {
- //System.err.println("of np");
- basals.add(np);
- basals.addAll(getNounPhrases(np));
- }
- else {
- //System.err.println("big np");
- basals.addAll(getNounPhrases(np));
- }
- }
- return(basals);
- }
- */
}
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/AbstractResolver.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/AbstractResolver.java
index d8b147a..197570f 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/AbstractResolver.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/AbstractResolver.java
@@ -25,12 +25,16 @@ import opennlp.tools.coref.DiscourseEntity;
import opennlp.tools.coref.DiscourseModel;
import opennlp.tools.coref.mention.MentionContext;
import opennlp.tools.coref.mention.Parse;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Default implementation of some methods in the {@link Resolver} interface.
*/
public abstract class AbstractResolver implements Resolver {
+ private static final Logger logger =
LoggerFactory.getLogger(AbstractResolver.class);
+
/**
* The number of previous entities that resolver should consider.
*/
@@ -175,7 +179,7 @@ public abstract class AbstractResolver implements Resolver {
return cde;
}
}
- //System.err.println("AbstractResolver.retain: non-referring entity with
id: "+ec.toText()+" id="+ec.id);
+ logger.debug("Non-referring entity with id={}: {}", mention.getId(),
mention.toText());
return null;
}
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java
index 8bf9bd1..cc7df52 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java
@@ -128,7 +128,8 @@ public class DefaultNonReferentialResolver implements
NonReferentialResolver {
protected List<String> getNonReferentialFeatures(MentionContext mention) {
List<String> features = new ArrayList<>();
Parse[] mtokens = mention.getTokenParses();
- //System.err.println("getNonReferentialFeatures: mention has
"+mtokens.length+" tokens");
+ logger.debug("Mention has {} tokens", mtokens.length);
+
for (int ti = 0; ti <= mention.getHeadTokenIndex(); ti++) {
Parse tok = mtokens[ti];
List<String> wfs = ResolverUtils.getWordFeatures(tok);
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/IsAResolver.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/IsAResolver.java
index 7842321..4406d1e 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/IsAResolver.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/IsAResolver.java
@@ -25,6 +25,9 @@ import java.util.regex.Pattern;
import opennlp.tools.coref.DiscourseEntity;
import opennlp.tools.coref.mention.MentionContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
/**
* Resolves coreference between appositives.
*
@@ -32,7 +35,9 @@ import opennlp.tools.coref.mention.MentionContext;
*/
public class IsAResolver extends MaxentResolver {
- Pattern predicativePattern;
+ private static final Logger logger =
LoggerFactory.getLogger(IsAResolver.class);
+
+ private final Pattern predicativePattern;
public IsAResolver(String modelDirectory, ResolverMode m) throws IOException
{
super(modelDirectory, "/imodel", m, 20);
@@ -60,31 +65,31 @@ public class IsAResolver extends MaxentResolver {
@Override
protected boolean excluded(MentionContext ec, DiscourseEntity de) {
MentionContext cec = de.getLastExtent();
- //System.err.println("IsAResolver.excluded?: ec.span="+ec.getSpan()+"
cec.span="+cec.getSpan()
- // +" cec="+cec.toText()+" lastToken="+ec.getNextToken());
+ logger.debug("Excluded: ec.span={} cec.span={} cec={} lastToken={}",
+ ec.getSpan(), cec.getSpan(), cec.toText(), ec.getNextToken());
+
if (ec.getSentenceNumber() != cec.getSentenceNumber()) {
- //System.err.println("IsAResolver.excluded: (true) not same sentence");
- return (true);
+ logger.debug("Excluded: (true) not same sentence");
+ return true;
}
- //shallow parse appositives
- //System.err.println("IsAResolver.excluded: ec="+ec.toText()+" "
- // +ec.span+" cec="+cec.toText()+" "+cec.span);
+ // shallow parse appositives
+ logger.debug("Excluded: ec={} {} cec={} {}", ec.toText(), ec.getSpan(),
cec.toText(), cec.getSpan());
if (cec.getIndexSpan().getEnd() == ec.getIndexSpan().getStart() - 2) {
- return (false);
+ return false;
}
- //full parse w/o trailing comma
+ // full parse w/o trailing comma
if (cec.getIndexSpan().getEnd() == ec.getIndexSpan().getEnd()) {
- //System.err.println("IsAResolver.excluded: (false) spans share end");
- return (false);
+ logger.debug("Excluded: (false) spans share end");
+ return false;
}
- //full parse w/ trailing comma or period
+ // full parse w/ trailing comma or period
if (cec.getIndexSpan().getEnd() <= ec.getIndexSpan().getEnd() + 2 &&
(ec.getNextToken() != null
&& (ec.getNextToken().toString().equals(",") ||
ec.getNextToken().toString().equals(".")))) {
- //System.err.println("IsAResolver.excluded: (false) spans end + punct");
- return (false);
+ logger.debug("Excluded: (false) spans end + punct");
+ return false;
}
- //System.err.println("IsAResolver.excluded: (true) default");
- return (true);
+ logger.debug("Excluded: (true) default");
+ return true;
}
@Override
@@ -95,7 +100,7 @@ public class IsAResolver extends MaxentResolver {
@Override
protected boolean defaultReferent(DiscourseEntity de) {
- return (true);
+ return true;
}
@Override
@@ -113,101 +118,8 @@ public class IsAResolver extends MaxentResolver {
}
features.add("hts" + ant.getHeadTokenTag() + "," +
mention.getHeadTokenTag());
}
- /*
- if (entity != null) {
- //System.err.println("MaxentIsResolver.getFeatures:
- [ "+ec2.toText()+"] -> ["+de.getLastExtent().toText()+"]");
- //previous word and tag
- if (ant.prevToken != null) {
- features.add("pw=" + ant.prevToken);
- features.add("pt=" + ant.prevToken.getSyntacticType());
- }
- else {
- features.add("pw=<none>");
- features.add("pt=<none>");
- }
-
- //next word and tag
- if (mention.nextToken != null) {
- features.add("nw=" + mention.nextToken);
- features.add("nt=" + mention.nextToken.getSyntacticType());
- }
- else {
- features.add("nw=<none>");
- features.add("nt=<none>");
- }
- //modifier word and tag for c1
- int i = 0;
- List c1toks = ant.tokens;
- for (; i < ant.headTokenIndex; i++) {
- features.add("mw=" + c1toks.get(i));
- features.add("mt=" + ((Parse) c1toks.get(i)).getSyntacticType());
- }
- //head word and tag for c1
- features.add("mh=" + c1toks.get(i));
- features.add("mt=" + ((Parse) c1toks.get(i)).getSyntacticType());
-
- //modifier word and tag for c2
- i = 0;
- List c2toks = mention.tokens;
- for (; i < mention.headTokenIndex; i++) {
- features.add("mw=" + c2toks.get(i));
- features.add("mt=" + ((Parse) c2toks.get(i)).getSyntacticType());
- }
- //head word and tag for n2
- features.add("mh=" + c2toks.get(i));
- features.add("mt=" + ((Parse) c2toks.get(i)).getSyntacticType());
-
- //word/tag pairs
- for (i = 0; i < ant.headTokenIndex; i++) {
- for (int j = 0; j < mention.headTokenIndex; j++) {
- features.add("w=" + c1toks.get(i) + "|" + "w=" + c2toks.get(j));
- features.add("w=" + c1toks.get(i) + "|" + "t=" + ((Parse)
c2toks.get(j)).getSyntacticType());
- features.add("t=" + ((Parse) c1toks.get(i)).getSyntacticType() + "|"
+ "w=" + c2toks.get(j));
- features.add("t=" + ((Parse) c1toks.get(i)).getSyntacticType() + "|"
+ "t=" +
- ((Parse) c2toks.get(j)).getSyntacticType());
- }
- }
- features.add("ht=" + ant.headTokenTag + "|" + "ht=" +
mention.headTokenTag);
- features.add("ht1=" + ant.headTokenTag);
- features.add("ht2=" + mention.headTokenTag);
- */
- //semantic categories
- /*
- if (ant.neType != null) {
- if (re.neType != null) {
- features.add("sc="+ant.neType+","+re.neType);
- }
- else if (!re.headTokenTag.startsWith("NNP") &&
re.headTokenTag.startsWith("NN")) {
- Set synsets = re.synsets;
- for (Iterator si=synsets.iterator();si.hasNext();) {
- features.add("sc="+ant.neType+","+si.next());
- }
- }
- }
- else if (!ant.headTokenTag.startsWith("NNP") &&
ant.headTokenTag.startsWith("NN")) {
- if (re.neType != null) {
- Set synsets = ant.synsets;
- for (Iterator si=synsets.iterator();si.hasNext();) {
- features.add("sc="+re.neType+","+si.next());
- }
- }
- else if (!re.headTokenTag.startsWith("NNP") &&
re.headTokenTag.startsWith("NN")) {
- //System.err.println("MaxentIsaResolover.getFeatures: both common
re="+re.parse+" ant="+ant.parse);
- Set synsets1 = ant.synsets;
- Set synsets2 = re.synsets;
- for (Iterator si=synsets1.iterator();si.hasNext();) {
- Object synset = si.next();
- if (synsets2.contains(synset)) {
- features.add("sc="+synset);
- }
- }
- }
- }
- }
- */
- //System.err.println("MaxentIsResolver.getFeatures: "+features.toString());
- return (features);
+ logger.debug("GetFeatures: {}", features);
+ return features;
}
}
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java
index f320d7b..8411158 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java
@@ -39,6 +39,7 @@ import java.util.zip.GZIPInputStream;
import opennlp.tools.coref.DiscourseEntity;
import opennlp.tools.coref.DiscourseModel;
import opennlp.tools.coref.mention.MentionContext;
+import opennlp.tools.coref.mention.PTBHeadFinder;
import opennlp.tools.coref.sim.TestSimilarityModel;
import opennlp.tools.ml.maxent.GISModel;
import opennlp.tools.ml.maxent.GISTrainer;
@@ -49,6 +50,8 @@ import opennlp.tools.ml.model.FileEventStream;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.util.ObjectStreamUtils;
import opennlp.tools.util.TrainingParameters;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Provides common functionality used by classes which implement the {@link
Resolver} class
@@ -56,6 +59,8 @@ import opennlp.tools.util.TrainingParameters;
*/
public abstract class MaxentResolver extends AbstractResolver {
+ private static final Logger logger =
LoggerFactory.getLogger(MaxentResolver.class);
+
/** Outcomes when two mentions are coreferent. */
public static final String SAME = "same";
/** Outcome when two mentions are not coreferent. */
@@ -111,7 +116,6 @@ public abstract class MaxentResolver extends
AbstractResolver {
this.preferFirstReferent = preferFirstReferent;
}
-
/**
* Creates a maximum-entropy-based resolver with the specified model name,
using the
* specified mode, which will look the specified number of entities back for
a referent and
@@ -145,7 +149,7 @@ public abstract class MaxentResolver extends
AbstractResolver {
events = new ArrayList<>();
}
else {
- System.err.println("Unknown mode: " + this.mode);
+ logger.warn("Invalid resolver mode '{}' detected during resolve in
MaxentResolver", this.mode);
}
//add one for non-referent possibility
candProbs = new double[getNumEntities() + 1];
@@ -190,7 +194,7 @@ public abstract class MaxentResolver extends
AbstractResolver {
int ei = 0;
double nonReferentialProbability =
nonReferentialResolver.getNonReferentialProbability(ec);
if (DEBUG) {
- System.err.println(this + ".resolve: " + ec.toText() + " -> " + "null "
+ nonReferentialProbability);
+ logger.debug("Resolve: {} -> null {}", ec.toText(),
nonReferentialProbability);
}
for (; ei < getNumEntities(dm); ei++) {
de = dm.getEntity(ei);
@@ -200,23 +204,20 @@ public abstract class MaxentResolver extends
AbstractResolver {
if (excluded(ec, de)) {
candProbs[ei] = 0;
if (DEBUG) {
- System.err.println("excluded " + this + ".resolve: " + ec.toText() +
" -> " + de + " "
- + candProbs[ei]);
+ logger.debug("Excluded during resolve: {} -> {} {}", ec.toText(),
de, candProbs[ei]);
}
}
else {
-
List<String> lfeatures = getFeatures(ec, de);
String[] features = lfeatures.toArray(new String[0]);
try {
candProbs[ei] = model.eval(features)[sameIndex];
- }
- catch (ArrayIndexOutOfBoundsException e) {
+ } catch (ArrayIndexOutOfBoundsException e) {
candProbs[ei] = 0;
}
if (DEBUG) {
- System.err.println(this + ".resolve: " + ec.toText() + " -> " + de +
" ("
- + ec.getGender() + "," + de.getGender() + ") " + candProbs[ei] +
" " + lfeatures);
+ logger.debug("Resolve: {} -> {} ({}, {}) {} {}",
+ ec.toText(), de, ec.getGender(), de.getGender(),
candProbs[ei], lfeatures);
}
}
if (preferFirstReferent && candProbs[ei] > nonReferentialProbability) {
@@ -261,7 +262,6 @@ public abstract class MaxentResolver extends
AbstractResolver {
@Override
public DiscourseEntity retain(MentionContext mention, DiscourseModel dm) {
- //System.err.println(this+".retain("+ec+") "+mode);
if (ResolverMode.TRAIN == mode) {
DiscourseEntity de = null;
boolean referentFound = false;
@@ -272,16 +272,16 @@ public abstract class MaxentResolver extends
AbstractResolver {
MentionContext entityMention = cde.getLastExtent();
if (outOfRange(mention, cde)) {
if (mention.getId() != -1 && !referentFound) {
- //System.err.println("retain: Referent out of range:
"+ec.toText()+" "+ec.parse.getSpan());
+ // what is or was 'ec' here?
+ // logger.debug("Referent out of range: {} {}", ec.toText(),
ec.parse.getSpan());
}
break;
}
if (excluded(mention, cde)) {
if (showExclusions) {
if (mention.getId() != -1 && entityMention.getId() ==
mention.getId()) {
- System.err.println(this + ".retain: Referent excluded: (" +
mention.getId() + ") "
- + mention.toText() + " " + mention.getIndexSpan() + " -> ("
+ entityMention.getId()
- + ") " + entityMention.toText() + " " +
entityMention.getSpan() + " " + this);
+ logger.debug("Referent excluded: ({}) {} {} -> ({}) {} {}",
mention.getId(), mention.toText(),
+ mention.getIndexSpan(), entityMention.getId(),
entityMention.toText(), entityMention.getSpan());
}
}
}
@@ -292,17 +292,16 @@ public abstract class MaxentResolver extends
AbstractResolver {
// || (!nonReferentFound && useAsDifferentExample)) {
List<String> features = getFeatures(mention, cde);
- //add Event to Model
+ // add Event to Model
if (DEBUG) {
- System.err.println(this + ".retain: " + mention.getId() + " " +
mention.toText()
- + " -> " + entityMention.getId() + " " + cde);
+ logger.debug("Retain: {} {} -> {} {}", mention.getId(),
mention.toText(), entityMention.getId(), cde);
}
if (mention.getId() != -1 && entityMention.getId() ==
mention.getId()) {
referentFound = true;
events.add(new Event(SAME, features.toArray(new String[0])));
de = cde;
- //System.err.println("MaxentResolver.retain: resolved at "+ei);
+ logger.debug("Retain: resolved at {}", ei);
// incrementing count for key 'ei'
distances.merge(ei, 1, Integer::sum);
}
@@ -360,7 +359,6 @@ public abstract class MaxentResolver extends
AbstractResolver {
}
}
if (DEBUG) {
- System.err.println(this + " referential");
Path p = Path.of(modelName + ".events");
try (Writer writer = Files.newBufferedWriter(p, StandardCharsets.UTF_8,
StandardOpenOption.WRITE, StandardOpenOption.CREATE,
StandardOpenOption.TRUNCATE_EXISTING)) {
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java
index e5d55f8..a72ea5f 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/PluralPronounResolver.java
@@ -31,7 +31,7 @@ import opennlp.tools.coref.mention.MentionContext;
*/
public class PluralPronounResolver extends MaxentResolver {
- final int NUM_SENTS_BACK_PRONOUNS = 2;
+ private static final int NUM_SENTS_BACK_PRONOUNS = 2;
public PluralPronounResolver(String modelDirectory, ResolverMode m) throws
IOException {
super(modelDirectory, "tmodel", m, 30);
@@ -77,22 +77,19 @@ public class PluralPronounResolver extends MaxentResolver {
}
*/
}
- return (features);
+ return features;
}
@Override
protected boolean outOfRange(MentionContext mention, DiscourseEntity entity)
{
MentionContext cec = entity.getLastExtent();
- //System.err.println("MaxentPluralPronounResolver.outOfRange:
["+ec.toText()+" ("+ec.id+")]
- // ["+cec.toText()+" ("+cec.id+")]
ec.sentenceNumber=("+ec.sentenceNumber+")-cec.sentenceNumber
- // =("+cec.sentenceNumber+") > "+NUM_SENTS_BACK_PRONOUNS);
- return (mention.getSentenceNumber() - cec.getSentenceNumber() >
NUM_SENTS_BACK_PRONOUNS);
+ return mention.getSentenceNumber() - cec.getSentenceNumber() >
NUM_SENTS_BACK_PRONOUNS;
}
@Override
public boolean canResolve(MentionContext mention) {
String tag = mention.getHeadTokenTag();
- return (tag != null && tag.startsWith("PRP")
- &&
ResolverUtils.PLURAL_THIRD_PERSON_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches());
+ return tag != null && tag.startsWith("PRP")
+ &&
ResolverUtils.PLURAL_THIRD_PERSON_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches();
}
}
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ProperNounResolver.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ProperNounResolver.java
index fe4955f..3c96b08 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ProperNounResolver.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ProperNounResolver.java
@@ -75,7 +75,7 @@ public class ProperNounResolver extends MaxentResolver {
private void initAcronyms(String name) {
acroMap = new HashMap<>(15000);
try (BufferedReader str = new BufferedReader(new FileReader(name))) {
- //System.err.println("Reading acronyms database: " + file + " ");
+ logger.debug("Reading acronyms database: {}", name);
String line;
while (null != (line = str.readLine())) {
StringTokenizer st = new StringTokenizer(line, "\t");
@@ -95,16 +95,13 @@ public class ProperNounResolver extends MaxentResolver {
exSet.add(acro);
}
} catch (IOException e) {
- logger.warn("ProperNounResolver.initAcronyms: Acronym Database not
found: " + e.getMessage(), e);
+ logger.error("Acronym Database not found: {}", e.getMessage(), e);
}
}
private boolean isAcronym(String ecStrip, String xecStrip) {
Set<String> exSet = acroMap.get(ecStrip);
- if (exSet != null && exSet.contains(xecStrip)) {
- return true;
- }
- return false;
+ return exSet != null && exSet.contains(xecStrip);
}
protected List<String> getAcronymFeatures(MentionContext mention,
DiscourseEntity entity) {
@@ -123,7 +120,7 @@ public class ProperNounResolver extends MaxentResolver {
@Override
protected List<String> getFeatures(MentionContext mention, DiscourseEntity
entity) {
- //System.err.println("ProperNounResolver.getFeatures: "+mention.toText()+"
-> "+entity);
+ logger.debug("Getting features: mention = {} -> entity = {}",
mention.toText(), entity);
List<String> features = new ArrayList<>(super.getFeatures(mention,
entity));
if (entity != null) {
features.addAll(ResolverUtils.getStringMatchFeatures(mention, entity));
@@ -142,7 +139,7 @@ public class ProperNounResolver extends MaxentResolver {
MentionContext xec = ei.next();
if (xec.getHeadTokenTag().startsWith("NNP")) {
// || initialCaps.matcher(xec.headToken.toString()).find()) {
- //System.err.println("MaxentProperNounResolver.exclude: kept
"+xec.toText()+" with "+xec.headTag);
+ logger.debug("Kept {} with {}", xec.toText(), xec.getHeadTokenTag());
return false;
}
}
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java
index c090503..aa22f79 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/ResolverUtils.java
@@ -33,11 +33,16 @@ import opennlp.tools.coref.sim.GenderEnum;
import opennlp.tools.coref.sim.NumberEnum;
import opennlp.tools.coref.sim.TestSimilarityModel;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
/**
* This class provides a set of utilities for turning mentions into normalized
strings and features.
*/
public class ResolverUtils {
-
+
+ private static final Logger logger =
LoggerFactory.getLogger(ResolverUtils.class);
+
private static final Pattern ENDS_WITH_PERIOD = Pattern.compile("\\.$");
private static final Pattern INITIAL_CAPS = Pattern.compile("^[A-Z]");
@@ -210,7 +215,7 @@ public class ResolverUtils {
}
public static String getExactMatchFeature(MentionContext ec, MentionContext
xec) {
- //System.err.println("getExactMatchFeature: ec="+mentionString(ec)+"
mc="+mentionString(xec));
+ logger.debug("GetExactMatchFeature: ec={} mc={}", mentionString(ec),
mentionString(xec));
if (mentionString(ec).equals(mentionString(xec))) {
return "exactMatch";
}
@@ -263,11 +268,7 @@ public class ResolverUtils {
}
Parse[] xtoks = entityMention.getTokenParses();
int headIndex = entityMention.getHeadTokenIndex();
- //if
(!mention.getHeadTokenTag().equals(entityMention.getHeadTokenTag())) {
- // //System.err.println("skipping "+mention.headTokenText+" with
"+xec.headTokenText
- // +" because "+mention.headTokenTag+" != "+xec.headTokenTag);
- // continue;
- //} want to match NN NNP
+
String entityMentionHeadString =
entityMention.getHeadTokenText().toLowerCase();
// model lexical similarity
if (mentionHeadString.equals(entityMentionHeadString)) {
@@ -315,7 +316,7 @@ public class ResolverUtils {
}
public static boolean isSubstring(String ecStrip, String xecStrip) {
- //System.err.println("MaxentResolver.isSubstring: ec="+ecStrip+"
xec="+xecStrip);
+ logger.debug("IsSubstring: ec={} xec={}", ecStrip, xecStrip);
int io = xecStrip.indexOf(ecStrip);
if (io != -1) {
//check boundries
@@ -339,7 +340,7 @@ public class ResolverUtils {
String token = mtokens[ti].toString();
sb.append(" ").append(token);
}
- //System.err.println("mentionString "+ec+" == "+sb.toString()+"
mtokens.length="+mtokens.length);
+ logger.debug("mentionString {} == {} mtokens.length={}", ec, sb,
mtokens.length);
return sb.toString();
}
@@ -357,7 +358,7 @@ public class ResolverUtils {
Parse[] mtokens = mention.getTokenParses();
int end = mention.getHeadTokenIndex() + 1;
if (start == end) {
- //System.err.println("stripNp: return null 1");
+ logger.trace("stripNp: return null 1");
return null;
}
//strip determiners
@@ -365,7 +366,7 @@ public class ResolverUtils {
start++;
}
if (start == end) {
- //System.err.println("stripNp: return null 2");
+ logger.trace("stripNp: return null 2");
return null;
}
//get to first NNP
@@ -378,16 +379,16 @@ public class ResolverUtils {
start++;
}
if (start == end) {
- //System.err.println("stripNp: return null 3");
+ logger.trace("stripNp: return null 3");
return null;
}
if (start + 1 != end) { // don't do this on head words, to keep "U.S."
- //strip off honorifics in begining
+ //strip off honorifics in beginning
if (HONORIFICS_PATTERN.matcher(mtokens[start].toString()).find()) {
start++;
}
if (start == end) {
- //System.err.println("stripNp: return null 4");
+ logger.trace("stripNp: return null 4");
return null;
}
//strip off and honerifics on the end
@@ -396,7 +397,7 @@ public class ResolverUtils {
}
}
if (start == end) {
- //System.err.println("stripNp: return null 5");
+ logger.trace("stripNp: return null 5");
return null;
}
StringBuilder strip = new StringBuilder();
@@ -462,7 +463,7 @@ public class ResolverUtils {
boolean foundIncompatiblePronoun = false;
if (mention.getHeadTokenTag().startsWith("PRP")) {
Map<String, String> pronounMap =
getPronounFeatureMap(mention.getHeadTokenText());
- //System.err.println("getPronounMatchFeatures.pronounMap:"+pronounMap);
+ logger.debug("PronounMap: {}", pronounMap);
for (Iterator<MentionContext> mi = entity.getMentions();mi.hasNext();) {
MentionContext candidateMention = mi.next();
if (candidateMention.getHeadTokenTag().startsWith("PRP")) {
@@ -471,9 +472,8 @@ public class ResolverUtils {
break;
}
else {
- Map<String, String> candidatePronounMap =
- getPronounFeatureMap(candidateMention.getHeadTokenText());
-
//System.err.println("getPronounMatchFeatures.candidatePronounMap:"+candidatePronounMap);
+ Map<String, String> candidatePronounMap =
getPronounFeatureMap(candidateMention.getHeadTokenText());
+ logger.debug("CandidatePronounMap: {}", candidatePronounMap);
boolean allKeysMatch = true;
for (String key : pronounMap.keySet()) {
String cfv = candidatePronounMap.get(key);
@@ -505,9 +505,10 @@ public class ResolverUtils {
/**
* Returns distance features for the specified mention and entity.
- * @param mention The mention.
- * @param entity The entity.
- * @return list of distance features for the specified mention and entity.
+ *
+ * @param mention The {@link MentionContext mention}.
+ * @param entity The {@link DiscourseEntity entity}.
+ * @return A list of distance features for the specified mention and entity.
*/
public static List<String> getDistanceFeatures(MentionContext mention,
DiscourseEntity entity) {
List<String> features = new ArrayList<>();
@@ -590,7 +591,7 @@ public class ResolverUtils {
public static String getGenderCompatibilityFeature(MentionContext ec,
DiscourseEntity de) {
GenderEnum eg = de.getGender();
- //System.err.println("getGenderCompatibility: mention="+ec.getGender()+"
entity="+eg);
+ logger.debug("GenderCompatibility: mention={} entity={}", ec.getGender(),
eg);
if (eg == GenderEnum.UNKNOWN || ec.getGender() == GenderEnum.UNKNOWN) {
return GEN_UNKNOWN;
}
@@ -622,9 +623,8 @@ public class ResolverUtils {
else {
return SIM_INCOMPATIBLE;
}
- }
- else {
- System.err.println("MaxentResolver: Uninitialized Semantic Model");
+ } else {
+ logger.warn("Uninitialized Semantic Model");
return SIM_UNKNOWN;
}
}
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java
index 94d89dc..0a1b635 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SingularPronounResolver.java
@@ -25,6 +25,9 @@ import java.util.List;
import opennlp.tools.coref.DiscourseEntity;
import opennlp.tools.coref.mention.MentionContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
/**
* This class resolver singular pronouns such as "he", "she", "it" and their
various forms.
*
@@ -32,6 +35,8 @@ import opennlp.tools.coref.mention.MentionContext;
*/
public class SingularPronounResolver extends MaxentResolver {
+ private static final Logger logger =
LoggerFactory.getLogger(SingularPronounResolver.class);
+
public SingularPronounResolver(String modelDirectory, ResolverMode m) throws
IOException {
super(modelDirectory, "pmodel", m, 30);
this.numSentencesBack = 2;
@@ -43,8 +48,9 @@ public class SingularPronounResolver extends MaxentResolver {
this.numSentencesBack = 2;
}
+ @Override
public boolean canResolve(MentionContext mention) {
- //System.err.println("MaxentSingularPronounResolver.canResolve: ec=
("+mention.id+") "+ mention.toText());
+ logger.debug("CanResolve: ec=({}) {}", mention.getId(), mention.toText());
String tag = mention.getHeadTokenTag();
return tag != null && tag.startsWith("PRP")
&&
ResolverUtils.SINGULAR_THIRD_PERSON_PRONOUN_PATTERN.matcher(mention.getHeadTokenText()).matches();
@@ -94,13 +100,13 @@ public class SingularPronounResolver extends
MaxentResolver {
}
*/
}
- return (features);
+ return features;
}
@Override
public boolean excluded(MentionContext mention, DiscourseEntity entity) {
if (super.excluded(mention, entity)) {
- return (true);
+ return true;
}
String mentionGender = null;
@@ -114,20 +120,16 @@ public class SingularPronounResolver extends
MaxentResolver {
}
String entityGender =
ResolverUtils.getPronounGender(entityMention.getHeadTokenText());
if (!entityGender.equals("u") && !mentionGender.equals(entityGender)) {
- return (true);
+ return true;
}
}
}
- return (false);
+ return false;
}
@Override
protected boolean outOfRange(MentionContext mention, DiscourseEntity entity)
{
MentionContext cec = entity.getLastExtent();
- //System.err.println("MaxentSingularPronounresolve.outOfRange:
["+entity.getLastExtent().toText()
- // +" ("+entity.getId()+")] ["+mention.toText()+" ("+mention.getId()+")]
entity.sentenceNumber=("
- // +entity.getLastExtent().getSentenceNumber()+")-mention.sentenceNumber=("
- // +mention.getSentenceNumber()+") > "+numSentencesBack);
- return (mention.getSentenceNumber() - cec.getSentenceNumber() >
numSentencesBack);
+ return mention.getSentenceNumber() - cec.getSentenceNumber() >
numSentencesBack;
}
}
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java
index 628bde5..030b803 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/SpeechPronounResolver.java
@@ -24,6 +24,9 @@ import java.util.List;
import opennlp.tools.coref.DiscourseEntity;
import opennlp.tools.coref.mention.MentionContext;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
/**
* Resolves pronouns specific to quoted speech such as "you", "me", and "I".
*
@@ -31,6 +34,8 @@ import opennlp.tools.coref.mention.MentionContext;
*/
public class SpeechPronounResolver extends MaxentResolver {
+ private static final Logger logger =
LoggerFactory.getLogger(SpeechPronounResolver.class);
+
public SpeechPronounResolver(String modelDirectory, ResolverMode m) throws
IOException {
super(modelDirectory, "fmodel", m, 30);
this.numSentencesBack = 0;
@@ -74,6 +79,7 @@ public class SpeechPronounResolver extends MaxentResolver {
return (mention.getSentenceNumber() - cec.getSentenceNumber() >
numSentencesBack);
}
+ @Override
public boolean canResolve(MentionContext mention) {
String tag = mention.getHeadTokenTag();
boolean fpp = tag != null && tag.startsWith("PRP")
@@ -112,14 +118,13 @@ public class SpeechPronounResolver extends MaxentResolver
{
return false;
}
else {
- System.err.println("Unexpected candidate excluded: " + cec.toText());
+ logger.warn("Unexpected candidate excluded: {}", cec.toText());
return true;
}
}
else {
- System.err.println("Unexpected mention excluded: " + mention.toText());
+ logger.warn("Unexpected mention excluded: {}", mention.toText());
return true;
}
}
-
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/Context.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/Context.java
index 5e20767..fc130c7 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/Context.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/Context.java
@@ -29,6 +29,8 @@ import opennlp.tools.coref.mention.HeadFinder;
import opennlp.tools.coref.mention.Mention;
import opennlp.tools.coref.mention.Parse;
import opennlp.tools.util.Span;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Specifies the context of a mention for computing gender, number, and
semantic compatibility.
@@ -37,6 +39,8 @@ import opennlp.tools.util.Span;
*/
public class Context extends Mention {
+ private static final Logger logger = LoggerFactory.getLogger(Context.class);
+
protected String headTokenText;
protected String headTokenTag;
protected Set<String> synsets;
@@ -132,7 +136,7 @@ public class Context extends Mention {
Set<String> synsetSet = new HashSet<>();
String[] lemmas = getLemmas(c);
Dictionary dict = DictionaryFactory.getDictionary();
- //System.err.println(lemmas.length+" lemmas for "+c.headToken);
+ logger.debug("{} lemmas for {}", lemmas.length, c.getHeadTokenText());
for (String lemma : lemmas) {
String senseKey = dict.getSenseKey(lemma, "NN", 0);
if (senseKey != null) {
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
index a9036fd..097a30b 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
@@ -15,7 +15,6 @@
* limitations under the License.
*/
-
package opennlp.tools.coref.sim;
import java.io.BufferedInputStream;
@@ -30,7 +29,6 @@ import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Collection;
@@ -40,6 +38,7 @@ import java.util.List;
import java.util.Set;
import opennlp.tools.coref.resolver.ResolverUtils;
+import opennlp.tools.coref.resolver.SpeechPronounResolver;
import opennlp.tools.ml.maxent.GISModel;
import opennlp.tools.ml.maxent.GISTrainer;
import opennlp.tools.ml.maxent.io.BinaryGISModelReader;
@@ -48,12 +47,16 @@ import opennlp.tools.ml.model.Event;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.util.ObjectStreamUtils;
import opennlp.tools.util.TrainingParameters;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Class which models the gender of a particular mentions and entities made up
of mentions.
*/
public class GenderModel implements TestGenderModel, TrainSimilarityModel {
+ private static final Logger logger =
LoggerFactory.getLogger(GenderModel.class);
+
private int maleIndex;
private int femaleIndex;
private int neuterIndex;
@@ -113,16 +116,16 @@ public class GenderModel implements TestGenderModel,
TrainSimilarityModel {
features.add("n=" + np1.getNameType());
if (np1.getNameType() != null && np1.getNameType().equals("person")) {
Object[] tokens = np1.getTokens();
- //System.err.println("GenderModel.getFeatures: person name="+np1);
+ logger.debug("GetFeatures: person name={}", np1);
for (int ti = 0; ti < np1.getHeadTokenIndex() || ti == 0; ti++) {
String name = tokens[ti].toString().toLowerCase();
if (femaleNames.contains(name)) {
features.add("fem");
- //System.err.println("GenderModel.getFeatures: person (fem) "+np1);
+ logger.debug("GenderModel.getFeatures: person (fem) {}", np1);
}
if (maleNames.contains(name)) {
features.add("mas");
- //System.err.println("GenderModel.getFeatures: person (mas) "+np1);
+ logger.debug("GenderModel.getFeatures: person (mas) {}", np1);
}
}
}
@@ -178,12 +181,13 @@ public class GenderModel implements TestGenderModel,
TrainSimilarityModel {
return GenderEnum.UNKNOWN;
}
+ @Override
@SuppressWarnings("unchecked")
public void setExtents(Context[] extentContexts) {
HashMap<Integer,Context> entities = new HashMap<>();
List<Context> singletons = new ArrayList<>();
for (Context ec : extentContexts) {
- //System.err.println("GenderModel.setExtents: ec("+ec.getId()+")
"+ec.toText());
+ logger.debug("GenderModel.setExtents: ec({}) {}", ec.getId(), ec);
if (ec.getId() != -1) {
entities.put(ec.getId(), ec);
} else {
@@ -229,9 +233,10 @@ public class GenderModel implements TestGenderModel,
TrainSimilarityModel {
}
}
+ // TODO Extract a Test case from this example
public static void main(String[] args) throws IOException {
if (args.length == 0) {
- System.err.println("Usage: GenderModel modelName < tiger/NN bear/NN");
+ logger.info("Usage: GenderModel modelName < tiger/NN bear/NN");
System.exit(1);
}
String modelName = args[0];
@@ -242,15 +247,15 @@ public class GenderModel implements TestGenderModel,
TrainSimilarityModel {
for (String line = in.readLine(); line != null; line = in.readLine()) {
String[] words = line.split(" ");
double[] dist = model.genderDistribution(Context.parseContext(words[0]));
- System.out.println("m=" + dist[model.getMaleIndex()] + " f=" +
dist[model.getFemaleIndex()]
- + " n=" + dist[model.getNeuterIndex()] + " " +
model.getFeatures(Context.parseContext(words[0])));
+ logger.debug("m={} f={} n={} {}", dist[model.getMaleIndex()],
dist[model.getFemaleIndex()],
+ dist[model.getNeuterIndex()],
model.getFeatures(Context.parseContext(words[0])));
}
}
@Override
public double[] genderDistribution(Context np1) {
List<String> features = getFeatures(np1);
- //System.err.println("GenderModel.genderDistribution: "+features);
+ logger.debug("GenderDistribution: {}", features);
return testModel.eval(features.toArray(new String[0]));
}
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/MaxentCompatibilityModel.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/MaxentCompatibilityModel.java
index d56bca7..0272f58 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/MaxentCompatibilityModel.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/MaxentCompatibilityModel.java
@@ -18,6 +18,8 @@
package opennlp.tools.coref.sim;
import opennlp.tools.coref.linker.LinkerMode;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.IOException;
@@ -26,6 +28,8 @@ import java.io.IOException;
*/
public class MaxentCompatibilityModel {
+ private static final Logger logger =
LoggerFactory.getLogger(MaxentCompatibilityModel.class);
+
private final double minGenderProb = 0.66;
private final double minNumberProb = 0.66;
@@ -48,9 +52,8 @@ public class MaxentCompatibilityModel {
Gender gender;
double[] gdist = genModel.genderDistribution(c);
if (debugOn) {
- System.err.println("MaxentCompatibilityModel.computeGender: "
- + c.toString() + " m=" + gdist[genModel.getMaleIndex()] + " f="
- + gdist[genModel.getFemaleIndex()] + " n=" +
gdist[genModel.getNeuterIndex()]);
+ logger.debug("Computing Gender: {} - m={} f={} n={}", c,
gdist[genModel.getMaleIndex()],
+ gdist[genModel.getFemaleIndex()],
gdist[genModel.getNeuterIndex()]);
}
if (genModel.getMaleIndex() >= 0 && gdist[genModel.getMaleIndex()] >
minGenderProb) {
gender = new Gender(GenderEnum.MALE,gdist[genModel.getMaleIndex()]);
@@ -70,8 +73,7 @@ public class MaxentCompatibilityModel {
public Number computeNumber(Context c) {
double[] dist = numModel.numberDist(c);
Number number;
- //System.err.println("MaxentCompatibiltyResolver.computeNumber: "+c+"
sing="
- // +dist[numModel.getSingularIndex()]+"
plural="+dist[numModel.getPluralIndex()]);
+ logger.debug("Computing number: {} sing={} plural={}", c,
dist[numModel.getSingularIndex()], dist[numModel.getPluralIndex()]);
if (dist[numModel.getSingularIndex()] > minNumberProb) {
number = new
Number(NumberEnum.SINGULAR,dist[numModel.getSingularIndex()]);
}
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java
index 96dadda..2eb08cc 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java
@@ -37,11 +37,16 @@ import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.util.ObjectStreamUtils;
import opennlp.tools.util.TrainingParameters;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
/**
* Class which models the number of particular mentions and the entities made
up of mentions.
*/
public class NumberModel implements TestNumberModel, TrainSimilarityModel {
+ private static final Logger logger =
LoggerFactory.getLogger(NumberModel.class);
+
private final String modelName;
private final String modelExtension = ".bin";
private MaxentModel testModel;
@@ -118,7 +123,7 @@ public class NumberModel implements TestNumberModel,
TrainSimilarityModel {
Map<Integer,Context> entities = new HashMap<>();
List<Context> singletons = new ArrayList<>();
for (Context ec : extentContexts) {
- //System.err.println("NumberModel.setExtents: ec("+ec.getId()+")
"+ec.toText());
+ logger.debug("NumberModel.setExtents: ec({}) {}", ec.getId(), ec);
if (ec.getId() != -1) {
entities.put(ec.getId(), ec);
} else {
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java
b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java
index 7baa7b4..25648a0 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java
@@ -45,6 +45,8 @@ import opennlp.tools.ml.model.Event;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.util.ObjectStreamUtils;
import opennlp.tools.util.TrainingParameters;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Models semantic similarity between two mentions and returns a score based on
@@ -52,6 +54,8 @@ import opennlp.tools.util.TrainingParameters;
*/
public class SimilarityModel implements TestSimilarityModel,
TrainSimilarityModel {
+ private static final Logger logger =
LoggerFactory.getLogger(SimilarityModel.class);
+
private final String modelName;
private final String modelExtension = ".bin";
private MaxentModel testModel;
@@ -86,14 +90,13 @@ public class SimilarityModel implements
TestSimilarityModel, TrainSimilarityMode
private void addEvent(boolean same, Context np1, Context np2) {
if (same) {
List<String> feats = getFeatures(np1, np2);
- //System.err.println(SAME+" "+np1.headTokenText+" ("+np1.id+") ->
"+np2.headTokenText+"
- // ("+np2.id+") "+feats);
+ logger.debug("{} {} ({}) -> {} + ({}) {}",
+ SAME, np1.headTokenText, np1.getId(), np2.headTokenText,
np2.getId(), feats);
events.add(new Event(SAME, feats.toArray(new String[0])));
- }
- else {
+ } else {
List<String> feats = getFeatures(np1, np2);
- //System.err.println(DIFF+" "+np1.headTokenText+" ("+np1.id+") ->
"+np2.headTokenText+"
- // ("+np2.id+") "+feats);
+ logger.debug("{} {} ({}) -> {} + ({}) {}",
+ DIFF, np1.headTokenText, np1.getId(), np2.headTokenText,
np2.getId(), feats);
events.add(new Event(DIFF, feats.toArray(new String[0])));
}
}
@@ -249,7 +252,7 @@ public class SimilarityModel implements
TestSimilarityModel, TrainSimilarityMode
}
private boolean inSuperClass(Context ec, Context cec) {
- if (ec.getSynsets().size() == 0 || cec.getSynsets().size() == 0) {
+ if (ec.getSynsets().isEmpty() || cec.getSynsets().isEmpty()) {
return false;
}
else {
@@ -271,12 +274,6 @@ public class SimilarityModel implements
TestSimilarityModel, TrainSimilarityMode
}
}
- /*
- private boolean isPronoun(MentionContext mention) {
- return mention.getHeadTokenTag().startsWith("PRP");
- }
- */
-
@Override
@SuppressWarnings("unchecked")
public void setExtents(Context[] extentContexts) {
@@ -284,9 +281,9 @@ public class SimilarityModel implements
TestSimilarityModel, TrainSimilarityMode
/* Extents which are not in a coreference chain. */
List<Context> singletons = new ArrayList<>();
List<Context> allExtents = new ArrayList<>();
- //populate data structures
+ // populate data structures
for (Context ec : extentContexts) {
- //System.err.println("SimilarityModel: setExtents: ec("+ec.getId()+")
"+ec.getNameType()+" "+ec);
+ logger.debug("Set extents: ec({}) {} {}", ec.getId(), ec.getNameType(),
ec);
if (ec.getId() == -1) {
singletons.add(ec);
} else {
@@ -307,6 +304,7 @@ public class SimilarityModel implements
TestSimilarityModel, TrainSimilarityMode
List<Context> entityContexts = (List<Context>) entities.get(key);
Set<Context> exclusionSet = constructExclusionSet(key, entities,
headSets, nameSets, singletons);
if (entityContexts.size() == 1) {
+ // ?
}
for (int xi1 = 0, xl = entityContexts.size(); xi1 < xl; xi1++) {
Context ec1 = entityContexts.get(xi1);
@@ -324,8 +322,9 @@ public class SimilarityModel implements
TestSimilarityModel, TrainSimilarityMode
Context sec1 = allExtents.get(axi);
axi = (axi + 1) % allExtents.size();
if (!exclusionSet.contains(sec1)) {
- if (debugOn) System.err.println(ec1.toString() + " " +
entityNameSet + " "
- + sec1.toString() + " " + nameSets.get(sec1.getId()));
+ if (debugOn) {
+ logger.debug("{} {} {} {}", ec1.toString(), entityNameSet,
sec1.toString(), nameSets.get(sec1.getId()));
+ }
addEvent(false, ec1, sec1);
break;
}
@@ -349,7 +348,9 @@ public class SimilarityModel implements
TestSimilarityModel, TrainSimilarityMode
@Override
public double compatible(Context mention1, Context mention2) {
List<String> feats = getFeatures(mention1, mention2);
- if (debugOn) System.err.println("SimilarityModel.compatible: feats=" +
feats);
+ if (debugOn) {
+ logger.debug("Compatible: feats={}", feats);
+ }
return (testModel.eval(feats.toArray(new String[0]))[SAME_INDEX]);
}
@@ -490,11 +491,11 @@ public class SimilarityModel implements
TestSimilarityModel, TrainSimilarityMode
Set<String> synsets1 = common1.getSynsets();
Set<String> synsets2 = common2.getSynsets();
- if (synsets1.size() == 0) {
+ if (synsets1.isEmpty()) {
//features.add("missing_"+common1.headToken);
return features;
}
- if (synsets2.size() == 0) {
+ if (synsets2.isEmpty()) {
//features.add("missing_"+common2.headToken);
return features;
}
@@ -517,7 +518,7 @@ public class SimilarityModel implements
TestSimilarityModel, TrainSimilarityMode
}
else if (numCommonSynsets == synsets2.size()) {
features.add("1isa2");
- //features.add("1isa2-"+(synsets1.size() - numCommonSynsets));
+ // features.add("1isa2-"+(synsets1.size() - numCommonSynsets));
}
return features;
}
@@ -538,7 +539,7 @@ public class SimilarityModel implements
TestSimilarityModel, TrainSimilarityMode
private List<String> getFeatures(Context np1, Context np2) {
List<String> features = new ArrayList<>();
features.add("default");
- // semantic categories
+ // semantic categories
String w1 = np1.getHeadTokenText().toLowerCase();
String w2 = np2.getHeadTokenText().toLowerCase();
if (w1.compareTo(w2) < 0) {
@@ -550,7 +551,7 @@ public class SimilarityModel implements
TestSimilarityModel, TrainSimilarityMode
if (w1.equals(w2)) {
features.add("sameHead");
}
- //features.add("tt="+np1.headTag+","+np2.headTag);
+ // features.add("tt="+np1.headTag+","+np2.headTag);
if (isName(np1)) {
if (isName(np2)) {
features.addAll(getNameNameFeatures(np1, np2));
@@ -579,7 +580,7 @@ public class SimilarityModel implements
TestSimilarityModel, TrainSimilarityMode
features.addAll(getCommonNumberFeatures(np1, np2));
}
else {
- //System.err.println("unknown group for " + np1.headTokenText + " -> "
+ np2.headTokenText);
+ logger.warn("unknown group for: {} -> {}", np1.headTokenText,
np2.headTokenText);
}
}
else if (isPronoun(np1)) {
@@ -596,7 +597,7 @@ public class SimilarityModel implements
TestSimilarityModel, TrainSimilarityMode
features.addAll(getNumberPronounFeatures(np2, np1));
}
else {
- //System.err.println("unknown group for " + np1.headTokenText + " -> "
+ np2.headTokenText);
+ logger.warn("unknown group for: {} -> {}", np1.headTokenText,
np2.headTokenText);
}
}
else if (isNumber(np1)) {
@@ -612,18 +613,19 @@ public class SimilarityModel implements
TestSimilarityModel, TrainSimilarityMode
else if (isNumber(np2)) {
}
else {
- //System.err.println("unknown group for " + np1.headTokenText + " -> "
+ np2.headTokenText);
+ logger.warn("unknown group for: {} -> {}", np1.headTokenText,
np2.headTokenText);
}
}
else {
- //System.err.println("unknown group for " + np1.headToken);
+ logger.warn("unknown group for: {}", np1.headTokenText);
}
return (features);
}
+ // TODO Extract a Test case from this example
public static void main(String[] args) throws IOException {
if (args.length == 0) {
- System.err.println("Usage: SimilarityModel modelName < tiger/NN
bear/NN");
+ logger.info("Usage: SimilarityModel modelName < tiger/NN bear/NN");
System.exit(1);
}
String modelName = args[0];
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucMentionInserterStream.java
b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucMentionInserterStream.java
index 128cfbb..1e6e4e6 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucMentionInserterStream.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/formats/muc/MucMentionInserterStream.java
@@ -36,12 +36,17 @@ import opennlp.tools.util.FilterObjectStream;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
/**
* The mention insert is responsible to insert the mentions from the training
data
* into the parse trees.
*/
public class MucMentionInserterStream extends
FilterObjectStream<RawCorefSample, CorefSample> {
+ private static final Logger logger =
LoggerFactory.getLogger(MucMentionInserterStream.class);
+
private static final Set<String> ENTITY_SET = new
HashSet<>(Arrays.asList(DefaultParse.NAME_TYPES));
private final MentionFinder mentionFinder;
@@ -56,7 +61,6 @@ public class MucMentionInserterStream extends
FilterObjectStream<RawCorefSample,
String min = mention.min;
if (min != null) {
-
int startOffset = p.toString().indexOf(min);
int endOffset = startOffset + min.length();
@@ -85,7 +89,6 @@ public class MucMentionInserterStream extends
FilterObjectStream<RawCorefSample,
}
public static boolean addMention(int id, Span mention, Parse[] tokens) {
-
boolean failed = false;
Parse startToken = tokens[mention.getStart()];
@@ -97,15 +100,12 @@ public class MucMentionInserterStream extends
FilterObjectStream<RawCorefSample,
if (ENTITY_SET.contains(commonParent.getType())) {
commonParent.getParent().setType("NP#" + id);
- }
- else if (commonParent.getType().equals("NML")) {
+ } else if (commonParent.getType().equals("NML")) {
commonParent.setType("NML#" + id);
- }
- else if (commonParent.getType().equals("NP")) {
+ } else if (commonParent.getType().equals("NP")) {
commonParent.setType("NP#" + id);
- }
- else {
- System.out.println("Inserting mention failed: " +
commonParent.getType() + " Failed id: " + id);
+ } else {
+ logger.warn("Inserting mention failed: {} - failed id: {}",
commonParent.getType(), id);
failed = true;
}
}
@@ -115,15 +115,14 @@ public class MucMentionInserterStream extends
FilterObjectStream<RawCorefSample,
return !failed;
}
-
+
+ @Override
public CorefSample read() throws IOException {
RawCorefSample sample = samples.read();
if (sample != null) {
-
List<Parse> mentionParses = new ArrayList<>();
-
List<CorefMention[]> allMentions = sample.getMentions();
List<Parse> allParses = sample.getParses();
@@ -140,7 +139,6 @@ public class MucMentionInserterStream extends
FilterObjectStream<RawCorefSample,
}
Parse[] tokens = p.getTagNodes();
-
for (CorefMention mention : mentions) {
Span min = getMinSpan(p, mention);
@@ -150,12 +148,9 @@ public class MucMentionInserterStream extends
FilterObjectStream<RawCorefSample,
addMention(mention.id, min, tokens);
}
-
p.show();
-
mentionParses.add(p);
}
-
return new CorefSample(mentionParses);
}
else {
diff --git
a/opennlp-coref/src/main/java/opennlp/tools/lang/english/TreebankNameFinder.java
b/opennlp-coref/src/main/java/opennlp/tools/lang/english/TreebankNameFinder.java
index c03b762..7d4739a 100644
---
a/opennlp-coref/src/main/java/opennlp/tools/lang/english/TreebankNameFinder.java
+++
b/opennlp-coref/src/main/java/opennlp/tools/lang/english/TreebankNameFinder.java
@@ -21,13 +21,17 @@ import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
+import java.nio.file.FileSystems;
+import opennlp.tools.coref.sim.SimilarityModel;
import opennlp.tools.namefind.NameFinderEventStream;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.parser.Parse;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.util.Span;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Class is used to create a name finder for English.
@@ -36,7 +40,9 @@ import opennlp.tools.util.Span;
*/
@Deprecated
public class TreebankNameFinder {
-
+
+ private static final Logger logger =
LoggerFactory.getLogger(TreebankNameFinder.class);
+
public static String[] NAME_TYPES =
{"person", "organization", "location", "date", "time", "percentage",
"money"};
@@ -72,11 +78,8 @@ public class TreebankNameFinder {
for (int ti = 0; ti < tagNodes.length; ti++) {
tokens[ti] = tagNodes[ti].getCoveredText();
}
- //System.err.println(java.util.Arrays.asList(tokens));
for (int fi = 0, fl = finders.length; fi < fl; fi++) {
nameSpans[fi] = finders[fi].nameFinder.find(tokens);
- //System.err.println("english.NameFinder.processParse: "+tags[fi] + " "
- // + java.util.Arrays.asList(nameSpans[fi]));
}
for (int fi = 0, fl = finders.length; fi < fl; fi++) {
@@ -110,8 +113,6 @@ public class TreebankNameFinder {
String[] tokens = Span.spansToStrings(spans,line);
for (int fi = 0, fl = finders.length; fi < fl; fi++) {
nameSpans[fi] = finders[fi].nameFinder.find(tokens);
- //System.err.println("EnglishNameFinder.processText: "+tags[fi] + " "
- // + java.util.Arrays.asList(finderTags[fi]));
nameOutcomes[fi] =
NameFinderEventStream.generateOutcomes(nameSpans[fi], null, tokens.length);
}
@@ -158,9 +159,10 @@ public class TreebankNameFinder {
public static void main(String[] args) throws IOException {
if (args.length == 0) {
- System.err.println("Usage NameFinder -[parse] model1 model2 ... modelN <
sentences");
- System.err.println(" -parse: Use this option to find names on parsed
input. " +
- "Un-tokenized sentence text is the default.");
+ logger.info("""
+ Usage NameFinder -[parse] model1 model2 ... modelN < sentences\s
+ -parse: Use this option to find names on parsed input.\s
+ Un-tokenized sentence text is the default.""");
System.exit(1);
}
int ai = 0;
@@ -168,9 +170,8 @@ public class TreebankNameFinder {
while (args[ai].startsWith("-") && ai < args.length) {
if (args[ai].equals("-parse")) {
parsedInput = true;
- }
- else {
- System.err.println("Ignoring unknown option " + args[ai]);
+ } else {
+ logger.warn("Ignoring unknown option {}", args[ai]);
}
ai++;
}
@@ -179,22 +180,18 @@ public class TreebankNameFinder {
for (int fi = 0; ai < args.length; ai++,fi++) {
String modelName = args[ai];
finders[fi] = new TreebankNameFinder(new TokenNameFinderModel(new
FileInputStream(modelName)));
- int nameStart =
modelName.lastIndexOf(System.getProperty("file.separator")) + 1;
+ int nameStart =
modelName.lastIndexOf(FileSystems.getDefault().getSeparator()) + 1;
int nameEnd = modelName.indexOf('.', nameStart);
if (nameEnd == -1) {
nameEnd = modelName.length();
}
names[fi] = modelName.substring(nameStart, nameEnd);
}
- //long t1 = System.currentTimeMillis();
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
if (parsedInput) {
processParse(finders,names,in);
- }
- else {
+ } else {
processText(finders,names,in);
}
- //long t2 = System.currentTimeMillis();
- //System.err.println("Time "+(t2-t1));
}
}