Revision: 19011
          http://sourceforge.net/p/gate/code/19011
Author:   dgmaynard
Date:     2015-11-27 14:32:01 +0000 (Fri, 27 Nov 2015)
Log Message:
-----------
prevents names which are part of Twitter usernames getting included in a 
person's name unless the Person spans only the username

Modified Paths:
--------------
    gate/trunk/plugins/ANNIE/resources/NE/firstname.jape
    gate/trunk/plugins/ANNIE/resources/NE/main-twitter.jape
    gate/trunk/plugins/ANNIE/resources/NE/name-twitter.jape

Modified: gate/trunk/plugins/ANNIE/resources/NE/firstname.jape
===================================================================
--- gate/trunk/plugins/ANNIE/resources/NE/firstname.jape        2015-11-27 
02:22:03 UTC (rev 19010)
+++ gate/trunk/plugins/ANNIE/resources/NE/firstname.jape        2015-11-27 
14:32:01 UTC (rev 19011)
@@ -14,10 +14,81 @@
 */
 
 Phase: FirstName
-Input: Token Lookup ClosedClass NumberLetter
+Input: Token Lookup ClosedClass NumberLetter UserID
 Options: control = appelt
 
 
+Rule: FirstNameTwitterName
+Priority: 500
+// @fred
+
+(
+ {Lookup.majorType == person_first, UserID, Lookup.kind !=ambig}
+):person
+-->
+{
+gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+
+//find out if the gender is unambiguous
+String gender = (String)personAnn.getFeatures().get("minorType");
+boolean ambig = false;
+gate.FeatureMap constraints = Factory.newFeatureMap();
+constraints.put("majorType", "person_first");
+Iterator lookupsIter =  
inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup", 
constraints).iterator();
+while(!ambig && lookupsIter.hasNext()){
+  gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
+  //we're only interested in annots of the same length
+  
if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
+    ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
+  }
+}
+if(!ambig) features.put("gender", gender);
+
+features.put("rule", "FirstNameTwitterName");
+features.put("twittername", "yes");
+outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
+features);
+}
+
+
+Rule: FirstNameTwitterNameAmbig
+Priority: 600
+// @mark
+
+(
+ {Lookup.majorType == person_first, UserID, Lookup.kind ==ambig}
+):person
+-->
+{
+gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+
+//find out if the gender is unambiguous
+String gender = (String)personAnn.getFeatures().get("minorType");
+boolean ambig = false;
+gate.FeatureMap constraints = Factory.newFeatureMap();
+constraints.put("majorType", "person_first");
+Iterator lookupsIter =  
inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup", 
constraints).iterator();
+while(!ambig && lookupsIter.hasNext()){
+  gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
+  //we're only interested in annots of the same length
+  
if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
+    ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
+  }
+}
+if(!ambig) features.put("gender", gender);
+
+features.put("rule", "FirstNameTwitterNameAmbig");
+features.put("twittername", "yes");
+features.put("kind", "ambig");
+outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
+features);
+}
+
+
 Rule: FirstName
 // Fred
 
@@ -47,6 +118,7 @@
 if(!ambig) features.put("gender", gender);
 
 features.put("rule", "FirstName");
+features.put("twittername", "no");
 outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
 features);
 }
@@ -85,6 +157,7 @@
 
 features.put("rule", "FirstNameAmbig");
 features.put("kind", "ambig");
+features.put("twittername", "no");
 outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
 features);
 }

Modified: gate/trunk/plugins/ANNIE/resources/NE/main-twitter.jape
===================================================================
--- gate/trunk/plugins/ANNIE/resources/NE/main-twitter.jape     2015-11-27 
02:22:03 UTC (rev 19010)
+++ gate/trunk/plugins/ANNIE/resources/NE/main-twitter.jape     2015-11-27 
14:32:01 UTC (rev 19011)
@@ -35,4 +35,4 @@
 name_context
 org_context
 loc_context
-clean
+//clean

Modified: gate/trunk/plugins/ANNIE/resources/NE/name-twitter.jape
===================================================================
--- gate/trunk/plugins/ANNIE/resources/NE/name-twitter.jape     2015-11-27 
02:22:03 UTC (rev 19010)
+++ gate/trunk/plugins/ANNIE/resources/NE/name-twitter.jape     2015-11-27 
14:32:01 UTC (rev 19011)
@@ -34,19 +34,23 @@
 
 Macro: FIRSTNAME
 
- ({FirstPerson.gender == male, FirstPerson.kind != ambig} |
-  {FirstPerson.gender == female, FirstPerson.kind != ambig})
+ ({FirstPerson.gender == male, FirstPerson.kind != ambig, 
FirstPerson.twittername == no} |
+  {FirstPerson.gender == female, FirstPerson.kind != ambig, 
FirstPerson.twittername == no})
 
 
+
 Macro: FIRSTNAMEAMBIG
 (
- {FirstPerson.kind == ambig}
+ {FirstPerson.kind == ambig, FirstPerson.twittername == no}
 )
 
+Macro: FIRSTNAMETWITTER
 
+(
+ {FirstPerson.twittername == yes}
+)
 
 
-
 Macro: PERSONENDING
 (
  ({Token.string == ","})?
@@ -137,10 +141,40 @@
 gate.Utils.addAnn(outputAS, personSet, "TempPerson", features);
 }
 
+Rule:  GazPersonFirstTwitter
+Priority: 300
+(
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
+(
+ {FirstPerson.twittername == yes, FirstPerson.kind != ambig}
+):person 
+( 
+ {Token.orth == upperInitial, Token.length == "1"}
+)?
+-->
+{
+gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+features.put("gender", personAnn.getFeatures().get("gender"));
+features.put("kind", "firstName");
+features.put("rule", "GazPersonFirstTwitter");
 
+// get the string of the first name
+String contentFirstName = gate.Utils.stringFor(doc, personAnn);
+features.put("firstName", contentFirstName);
+features.put("twittername", "yes");
 
+outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
+features);
+}
 
 
+
+
 Rule:  GazPersonFirst
 Priority: 200
 (
@@ -531,17 +565,14 @@
 :person.Discard = {rule = "PersonFullInitialsCaps"}
 
 
-
-Rule:  PersonFullInitials
+Rule:  PersonFull
 Priority: 10
 // F.W. Jones
+// Fred Jones
 
 (
- {Token.category == DT}
-)?
-(
  
-  ({Initials, !Lookup}):initials
+  (FIRSTNAME | FIRSTNAMEAMBIG ):firstName
   ((FIRSTNAME | FIRSTNAMEAMBIG )?):middleName
  ((PREFIX)*
   ({Upper,!Initials})
@@ -553,47 +584,47 @@
  gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
  gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
   
-  gate.AnnotationSet initialsSet = 
(gate.AnnotationSet)bindings.get("initials");
-  List<Annotation> initialsList = gate.Utils.inDocumentOrder(initialsSet);
-
-  Long initialsStart = gate.Utils.start(initialsList.get(0));
-  Long initialsEnd   = gate.Utils.end(initialsList.get(initialsList.size() - 
1));
-  String initialsContent = gate.Utils.cleanStringFor(doc, initialsStart, 
initialsEnd); 
-  features.put("initials", initialsContent);
+  gate.AnnotationSet firstNameSet = 
(gate.AnnotationSet)bindings.get("firstName");
+  gate.Annotation firstNameAnn = 
(gate.Annotation)firstNameSet.iterator().next();
  
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+ features.put("gender", firstNameAnn.getFeatures().get("gender"));
 
  gate.AnnotationSet middleNameSet = 
(gate.AnnotationSet)bindings.get("middleName");
 
  if (middleNameSet != null && middleNameSet.size()>0)
 {
  gate.Annotation middleNameAnn = 
(gate.Annotation)middleNameSet.iterator().next();
- String middleNameContent = gate.Utils.cleanStringFor(doc, middleNameAnn);
+ String middleNameContent = gate.Utils.stringFor(doc, middleNameAnn);
  features.put("middleName", middleNameContent);
- features.put("gender", middleNameAnn.getFeatures().get("gender"));
 }
 
  gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
  gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
 
- String surnameContent = gate.Utils.cleanStringFor(doc, surnameAnn);
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
  features.put("surname", surnameContent);
 
  features.put("kind", "fullName");
- features.put("rule", "PersonFullInitials");
+ features.put("rule", "PersonFull");
 outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
 features);
 
 }
 
 
-Rule:  PersonFull
+
+Rule:  PersonFullInitials
 Priority: 10
 // F.W. Jones
-// Fred Jones
 
 (
+ {Token.category == DT}
+)?
+(
  
-  (FIRSTNAME | FIRSTNAMEAMBIG ):firstName
+  ({Initials, !Lookup}):initials
   ((FIRSTNAME | FIRSTNAMEAMBIG )?):middleName
  ((PREFIX)*
   ({Upper,!Initials})
@@ -605,35 +636,42 @@
  gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
  gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
   
-  gate.AnnotationSet firstNameSet = 
(gate.AnnotationSet)bindings.get("firstName");
-  gate.Annotation firstNameAnn = 
(gate.Annotation)firstNameSet.iterator().next();
+  gate.AnnotationSet initialsSet = 
(gate.AnnotationSet)bindings.get("initials");
+  List<Annotation> initialsList = gate.Utils.inDocumentOrder(initialsSet);
+
+  Long initialsStart = gate.Utils.start(initialsList.get(0));
+  Long initialsEnd   = gate.Utils.end(initialsList.get(initialsList.size() - 
1));
+  String initialsContent = gate.Utils.cleanStringFor(doc, initialsStart, 
initialsEnd); 
+  features.put("initials", initialsContent);
  
- String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
- features.put("firstName", firstNameContent);
- features.put("gender", firstNameAnn.getFeatures().get("gender"));
 
  gate.AnnotationSet middleNameSet = 
(gate.AnnotationSet)bindings.get("middleName");
 
  if (middleNameSet != null && middleNameSet.size()>0)
 {
  gate.Annotation middleNameAnn = 
(gate.Annotation)middleNameSet.iterator().next();
- String middleNameContent = gate.Utils.stringFor(doc, middleNameAnn);
+ String middleNameContent = gate.Utils.cleanStringFor(doc, middleNameAnn);
  features.put("middleName", middleNameContent);
+ features.put("gender", middleNameAnn.getFeatures().get("gender"));
 }
 
  gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
  gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
 
- String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ String surnameContent = gate.Utils.cleanStringFor(doc, surnameAnn);
  features.put("surname", surnameContent);
 
  features.put("kind", "fullName");
- features.put("rule", "PersonFull");
+ features.put("rule", "PersonFullInitials");
 outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
 features);
 
 }
 
+
+
+
+
 Rule:  PersonFullDoubleBarrelled
 Priority: 9
 // F.W. Smith Jones
@@ -727,7 +765,7 @@
  features.put("surname", surnameContent);
 
  features.put("kind", "fullName");
- features.put("rule", "PersonFull");
+ features.put("rule", "PersonMiddleInitial");
 outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
 features);
 

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to