Revision: 19011
http://sourceforge.net/p/gate/code/19011
Author: dgmaynard
Date: 2015-11-27 14:32:01 +0000 (Fri, 27 Nov 2015)
Log Message:
-----------
prevents names which are part of Twitter usernames getting included in a
person's name unless the Person spans only the username
Modified Paths:
--------------
gate/trunk/plugins/ANNIE/resources/NE/firstname.jape
gate/trunk/plugins/ANNIE/resources/NE/main-twitter.jape
gate/trunk/plugins/ANNIE/resources/NE/name-twitter.jape
Modified: gate/trunk/plugins/ANNIE/resources/NE/firstname.jape
===================================================================
--- gate/trunk/plugins/ANNIE/resources/NE/firstname.jape 2015-11-27
02:22:03 UTC (rev 19010)
+++ gate/trunk/plugins/ANNIE/resources/NE/firstname.jape 2015-11-27
14:32:01 UTC (rev 19011)
@@ -14,10 +14,81 @@
*/
Phase: FirstName
-Input: Token Lookup ClosedClass NumberLetter
+Input: Token Lookup ClosedClass NumberLetter UserID
Options: control = appelt
+Rule: FirstNameTwitterName
+Priority: 500
+// @fred
+
+(
+ {Lookup.majorType == person_first, UserID, Lookup.kind !=ambig}
+):person
+-->
+{
+gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+
+//find out if the gender is unambiguous
+String gender = (String)personAnn.getFeatures().get("minorType");
+boolean ambig = false;
+gate.FeatureMap constraints = Factory.newFeatureMap();
+constraints.put("majorType", "person_first");
+Iterator lookupsIter =
inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup",
constraints).iterator();
+while(!ambig && lookupsIter.hasNext()){
+ gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
+ //we're only interested in annots of the same length
+
if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
+ ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
+ }
+}
+if(!ambig) features.put("gender", gender);
+
+features.put("rule", "FirstNameTwitterName");
+features.put("twittername", "yes");
+outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
+features);
+}
+
+
+Rule: FirstNameTwitterNameAmbig
+Priority: 600
+// @mark
+
+(
+ {Lookup.majorType == person_first, UserID, Lookup.kind ==ambig}
+):person
+-->
+{
+gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+
+//find out if the gender is unambiguous
+String gender = (String)personAnn.getFeatures().get("minorType");
+boolean ambig = false;
+gate.FeatureMap constraints = Factory.newFeatureMap();
+constraints.put("majorType", "person_first");
+Iterator lookupsIter =
inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup",
constraints).iterator();
+while(!ambig && lookupsIter.hasNext()){
+ gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
+ //we're only interested in annots of the same length
+
if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
+ ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
+ }
+}
+if(!ambig) features.put("gender", gender);
+
+features.put("rule", "FirstNameTwitterNameAmbig");
+features.put("twittername", "yes");
+features.put("kind", "ambig");
+outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
+features);
+}
+
+
Rule: FirstName
// Fred
@@ -47,6 +118,7 @@
if(!ambig) features.put("gender", gender);
features.put("rule", "FirstName");
+features.put("twittername", "no");
outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
features);
}
@@ -85,6 +157,7 @@
features.put("rule", "FirstNameAmbig");
features.put("kind", "ambig");
+features.put("twittername", "no");
outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
features);
}
Modified: gate/trunk/plugins/ANNIE/resources/NE/main-twitter.jape
===================================================================
--- gate/trunk/plugins/ANNIE/resources/NE/main-twitter.jape 2015-11-27
02:22:03 UTC (rev 19010)
+++ gate/trunk/plugins/ANNIE/resources/NE/main-twitter.jape 2015-11-27
14:32:01 UTC (rev 19011)
@@ -35,4 +35,4 @@
name_context
org_context
loc_context
-clean
+//clean
Modified: gate/trunk/plugins/ANNIE/resources/NE/name-twitter.jape
===================================================================
--- gate/trunk/plugins/ANNIE/resources/NE/name-twitter.jape 2015-11-27
02:22:03 UTC (rev 19010)
+++ gate/trunk/plugins/ANNIE/resources/NE/name-twitter.jape 2015-11-27
14:32:01 UTC (rev 19011)
@@ -34,19 +34,23 @@
Macro: FIRSTNAME
- ({FirstPerson.gender == male, FirstPerson.kind != ambig} |
- {FirstPerson.gender == female, FirstPerson.kind != ambig})
+ ({FirstPerson.gender == male, FirstPerson.kind != ambig,
FirstPerson.twittername == no} |
+ {FirstPerson.gender == female, FirstPerson.kind != ambig,
FirstPerson.twittername == no})
+
Macro: FIRSTNAMEAMBIG
(
- {FirstPerson.kind == ambig}
+ {FirstPerson.kind == ambig, FirstPerson.twittername == no}
)
+Macro: FIRSTNAMETWITTER
+(
+ {FirstPerson.twittername == yes}
+)
-
Macro: PERSONENDING
(
({Token.string == ","})?
@@ -137,10 +141,40 @@
gate.Utils.addAnn(outputAS, personSet, "TempPerson", features);
}
+Rule: GazPersonFirstTwitter
+Priority: 300
+(
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
+(
+ {FirstPerson.twittername == yes, FirstPerson.kind != ambig}
+):person
+(
+ {Token.orth == upperInitial, Token.length == "1"}
+)?
+-->
+{
+gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+features.put("gender", personAnn.getFeatures().get("gender"));
+features.put("kind", "firstName");
+features.put("rule", "GazPersonFirstTwitter");
+// get the string of the first name
+String contentFirstName = gate.Utils.stringFor(doc, personAnn);
+features.put("firstName", contentFirstName);
+features.put("twittername", "yes");
+outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
+features);
+}
+
+
Rule: GazPersonFirst
Priority: 200
(
@@ -531,17 +565,14 @@
:person.Discard = {rule = "PersonFullInitialsCaps"}
-
-Rule: PersonFullInitials
+Rule: PersonFull
Priority: 10
// F.W. Jones
+// Fred Jones
(
- {Token.category == DT}
-)?
-(
- ({Initials, !Lookup}):initials
+ (FIRSTNAME | FIRSTNAMEAMBIG ):firstName
((FIRSTNAME | FIRSTNAMEAMBIG )?):middleName
((PREFIX)*
({Upper,!Initials})
@@ -553,47 +584,47 @@
gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
- gate.AnnotationSet initialsSet =
(gate.AnnotationSet)bindings.get("initials");
- List<Annotation> initialsList = gate.Utils.inDocumentOrder(initialsSet);
-
- Long initialsStart = gate.Utils.start(initialsList.get(0));
- Long initialsEnd = gate.Utils.end(initialsList.get(initialsList.size() -
1));
- String initialsContent = gate.Utils.cleanStringFor(doc, initialsStart,
initialsEnd);
- features.put("initials", initialsContent);
+ gate.AnnotationSet firstNameSet =
(gate.AnnotationSet)bindings.get("firstName");
+ gate.Annotation firstNameAnn =
(gate.Annotation)firstNameSet.iterator().next();
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+ features.put("gender", firstNameAnn.getFeatures().get("gender"));
gate.AnnotationSet middleNameSet =
(gate.AnnotationSet)bindings.get("middleName");
if (middleNameSet != null && middleNameSet.size()>0)
{
gate.Annotation middleNameAnn =
(gate.Annotation)middleNameSet.iterator().next();
- String middleNameContent = gate.Utils.cleanStringFor(doc, middleNameAnn);
+ String middleNameContent = gate.Utils.stringFor(doc, middleNameAnn);
features.put("middleName", middleNameContent);
- features.put("gender", middleNameAnn.getFeatures().get("gender"));
}
gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
- String surnameContent = gate.Utils.cleanStringFor(doc, surnameAnn);
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
features.put("surname", surnameContent);
features.put("kind", "fullName");
- features.put("rule", "PersonFullInitials");
+ features.put("rule", "PersonFull");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
features);
}
-Rule: PersonFull
+
+Rule: PersonFullInitials
Priority: 10
// F.W. Jones
-// Fred Jones
(
+ {Token.category == DT}
+)?
+(
- (FIRSTNAME | FIRSTNAMEAMBIG ):firstName
+ ({Initials, !Lookup}):initials
((FIRSTNAME | FIRSTNAMEAMBIG )?):middleName
((PREFIX)*
({Upper,!Initials})
@@ -605,35 +636,42 @@
gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
- gate.AnnotationSet firstNameSet =
(gate.AnnotationSet)bindings.get("firstName");
- gate.Annotation firstNameAnn =
(gate.Annotation)firstNameSet.iterator().next();
+ gate.AnnotationSet initialsSet =
(gate.AnnotationSet)bindings.get("initials");
+ List<Annotation> initialsList = gate.Utils.inDocumentOrder(initialsSet);
+
+ Long initialsStart = gate.Utils.start(initialsList.get(0));
+ Long initialsEnd = gate.Utils.end(initialsList.get(initialsList.size() -
1));
+ String initialsContent = gate.Utils.cleanStringFor(doc, initialsStart,
initialsEnd);
+ features.put("initials", initialsContent);
- String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
- features.put("firstName", firstNameContent);
- features.put("gender", firstNameAnn.getFeatures().get("gender"));
gate.AnnotationSet middleNameSet =
(gate.AnnotationSet)bindings.get("middleName");
if (middleNameSet != null && middleNameSet.size()>0)
{
gate.Annotation middleNameAnn =
(gate.Annotation)middleNameSet.iterator().next();
- String middleNameContent = gate.Utils.stringFor(doc, middleNameAnn);
+ String middleNameContent = gate.Utils.cleanStringFor(doc, middleNameAnn);
features.put("middleName", middleNameContent);
+ features.put("gender", middleNameAnn.getFeatures().get("gender"));
}
gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
- String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ String surnameContent = gate.Utils.cleanStringFor(doc, surnameAnn);
features.put("surname", surnameContent);
features.put("kind", "fullName");
- features.put("rule", "PersonFull");
+ features.put("rule", "PersonFullInitials");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
features);
}
+
+
+
+
Rule: PersonFullDoubleBarrelled
Priority: 9
// F.W. Smith Jones
@@ -727,7 +765,7 @@
features.put("surname", surnameContent);
features.put("kind", "fullName");
- features.put("rule", "PersonFull");
+ features.put("rule", "PersonMiddleInitial");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
features);
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs