Revision: 17327
http://sourceforge.net/p/gate/code/17327
Author: dgmaynard
Date: 2014-02-17 17:22:25 +0000 (Mon, 17 Feb 2014)
Log Message:
-----------
better cleaning, and better twitter grammar
Modified Paths:
--------------
gate/trunk/plugins/ANNIE/resources/NE/clean.jape
gate/trunk/plugins/ANNIE/resources/NE/name-twitter.jape
Modified: gate/trunk/plugins/ANNIE/resources/NE/clean.jape
===================================================================
--- gate/trunk/plugins/ANNIE/resources/NE/clean.jape 2014-02-17 16:38:01 UTC
(rev 17326)
+++ gate/trunk/plugins/ANNIE/resources/NE/clean.jape 2014-02-17 17:22:25 UTC
(rev 17327)
@@ -14,8 +14,8 @@
*/
Phase: Clean
-Input: TempPerson TempLocation TempOrganization TempDate TempTime TempYear
TempZone Street Postcode Email Url Phone Ip TempIdentifier TempSpecs
-Options: control = appelt
+Input: TempPerson TempLocation TempOrganization TempDate TempTime TempYear
TempZone Street Postcode Email Url Phone Ip TempIdentifier TempSpecs
ClosedClass Initials Upper FirstPerson Jobtitle HashtagToken HashtagLookup
+Options: control = all
Rule:CleanTempAnnotations
(
@@ -33,7 +33,14 @@
{Phone}|
{Ip}|
{TempIdentifier}|
- {TempSpecs}
+ {TempSpecs}|
+ {ClosedClass}|
+ {Upper}|
+ {Initials}|
+ {FirstPerson}|
+ {Jobtitle}|
+ {HashtagToken}|
+ {HashtagLookup}
):temp
-->
{
Modified: gate/trunk/plugins/ANNIE/resources/NE/name-twitter.jape
===================================================================
--- gate/trunk/plugins/ANNIE/resources/NE/name-twitter.jape 2014-02-17
16:38:01 UTC (rev 17326)
+++ gate/trunk/plugins/ANNIE/resources/NE/name-twitter.jape 2014-02-17
17:22:25 UTC (rev 17327)
@@ -10,12 +10,12 @@
*
* Diana Maynard, 10 Sep 2001
*
-* $Id: name.jape 13147 2010-10-15 08:30:24Z markagreenwood $
+* $Id: name.jape 17326 2014-02-17 16:38:01Z dgmaynard $
*/
Phase: Name
-Input: Token Lookup Title FirstPerson TempDate Split UserID
+Input: Token Lookup Title FirstPerson Upper ClosedClass Initials Split UserID
Options: control = appelt debug = false
///////////////////////////////////////////////////////////////
@@ -29,21 +29,9 @@
({Token.string == "."})?
)
-Macro: INITIALS
-(
- ({Token.orth == upperInitial, Token.length =="1"}
- ({Token.string == "."})?
- )+
-)
-Macro: INITIALS2
-(
- {Token.orth == allCaps, Token.length == "2"} |
- {Token.orth == allCaps, Token.length == "3"}
-)
-
Macro: FIRSTNAME
({FirstPerson.gender == male} |
@@ -57,17 +45,8 @@
-Macro: UPPER
-(
- ({Token.category == NNP}|
- {Token.orth == upperInitial}|
- {Token.orth == mixedCaps}
-)
- ({Token.string == "-"}
- {Token.category == NNP}
- )?
-)
+
Macro: PERSONENDING
(
{Lookup.majorType == person_ending}
@@ -86,20 +65,13 @@
///////////////////////////////////////////////////////////
-Rule: NotAnything
-Priority: 1000
-(
- {Lookup.majorType == spur}
-)
--->
-{}
// Person Rules
Rule: Pronoun
Priority: 1000
-//stops personal pronouns being recognised as Initials
+
(
{Token.category == PP}|
{Token.category == PRP}|
@@ -110,34 +82,76 @@
+Rule:Reject
+Priority: 1000
+// stops certain things being recognised as People
+(
+ {ClosedClass}
+)
+-->
+{}
-Rule: GazPerson
+
+Rule: GazPerson
Priority: 50
(
{Lookup.majorType == person_full}
)
:person -->
{
-gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
-gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-features.put("kind", "personName");
+
+// find the Token annotations
+AnnotationSet tokenSet = gate.Utils.getContainedAnnotations(inputAS,
personSet, "Token");
+// put them in order
+List<Annotation> tokenList = gate.Utils.inDocumentOrder(tokenSet);
+
+if (tokenList.size() == 1) {
+ // if there's only one Token, guess it's a surname
+
+ String surnameContent = gate.Utils.stringFor(doc, tokenList.get(0));
+ features.put("surname", surnameContent);
+ }
+
+else if (tokenList.size() > 0) {
+ // the string under the first Token
+ String firstNameContent = gate.Utils.stringFor(doc, tokenList.get(0));
+ features.put("firstName", firstNameContent);
+
+
+ // the string under the remaining Tokens if any
+ if (tokenList.size() > 1) {
+ Long lastNameStart = gate.Utils.start(tokenList.get(1));
+ Long lastNameEnd = gate.Utils.end(tokenList.get(tokenList.size() - 1));
+ String surnameContent = gate.Utils.stringFor(doc, lastNameStart,
lastNameEnd);
+ features.put("surname", surnameContent);
+ }
+}
+
+features.put("kind", "fullName");
features.put("rule", "GazPerson");
-outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
-features);
+features.put("gender", personAnn.getFeatures().get("gender"));
+
+// this method doesn't require try-catch
+gate.Utils.addAnn(outputAS, personSet, "TempPerson", features);
}
-Rule: TheGazPersonFirst
+
+
+
+
+Rule: GazPersonFirst
Priority: 200
(
{Token.category == DT}|
{Token.category == PRP}|
{Token.category == RB}
-)
+)?
(
- {FirstPerson}
-)
-:person
+ {FirstPerson.kind != ambig}
+):person
(
{Token.orth == upperInitial, Token.length == "1"}
)?
@@ -147,40 +161,19 @@
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
features.put("gender", personAnn.getFeatures().get("gender"));
-features.put("kind", "personName");
+features.put("kind", "firstName");
features.put("rule", "GazPersonFirst");
-outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
-features);
-//outputAS.removeAll(person);
-}
+// get the string of the first name
+String contentFirstName = gate.Utils.stringFor(doc, personAnn);
+features.put("firstName", contentFirstName);
-Rule: GazPersonFirst
-Priority: 70
-(
- {FirstPerson}
-)
-:person
-(
- {Token.orth == upperInitial, Token.length == "1"}
-)?
--->
-{
-gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
-gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
-gate.FeatureMap features = Factory.newFeatureMap();
-features.put("gender", personAnn.getFeatures().get("gender"));
-features.put("kind", "personName");
-features.put("rule", "GazPersonFirst");
outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
features);
-//outputAS.removeAll(person);
}
-
-
Rule: PersonFirstContext
Priority: 30
// Anne and Kenton
@@ -191,180 +184,196 @@
(
{Token.string == "and"}
)
-({Token.orth == upperInitial})
+({Token.orth == upperInitial, Token.length != "1"})
:person2
-->
{
//first deal with person1
gate.FeatureMap features1 = Factory.newFeatureMap();
-gate.AnnotationSet person1Set = (gate.AnnotationSet)bindings.get("person1");
-gate.AnnotationSet firstPerson =
(gate.AnnotationSet)person1Set.get("FirstPerson");
-if (firstPerson != null && firstPerson.size()>0)
-{
- gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
+ gate.AnnotationSet person1Set = (gate.AnnotationSet)bindings.get("person1");
+ gate.Annotation personAnn = (gate.Annotation)person1Set.iterator().next();
+
+ String contentFirstName = gate.Utils.stringFor(doc, personAnn);
+ features1.put("firstName", contentFirstName);
features1.put("gender", personAnn.getFeatures().get("gender"));
-}
- features1.put("kind", "personName");
+ features1.put("kind", "firstName");
features1.put("rule", "PersonFirstContext");
outputAS.add(person1Set.firstNode(), person1Set.lastNode(), "TempPerson",
features1);
+
//now deal with person2
gate.FeatureMap features2 = Factory.newFeatureMap();
gate.AnnotationSet person2Set = (gate.AnnotationSet)bindings.get("person2");
- features2.put("kind", "personName");
+gate.Annotation person2Ann = (gate.Annotation)person2Set.iterator().next();
+
+ String content2FirstName = gate.Utils.stringFor(doc, person2Ann);
+ features2.put("firstName", content2FirstName);
+ features2.put("kind", "firstName");
features2.put("rule", "PersonFirstContext");
outputAS.add(person2Set.firstNode(), person2Set.lastNode(), "TempPerson",
features2);
}
-Rule: PersonFirstContext2
-Priority: 40
-// Anne and I
+Rule: PersonTitle
+Priority: 35
+// Mr. Jones
+// Mr Fred Jones
+// note we only allow one first and surname,
+// but we add more in a final phase if we find adjacent unknowns
+(
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
(
- {FirstPerson}
-):person
-(
- {Token.string == "and"}
- {Token.length == "1"}
-)
- -->
+ ({Title.rule == "TitleGender"}):title
+ ({Title})?
+ (
+ (FIRSTNAME | FIRSTNAMEAMBIG )?
+ ):firstName
+ (
+ (PREFIX)*
+ ({Upper})
+ (PERSONENDING)?
+ ):surname
+):person
+-->
{
gate.FeatureMap features = Factory.newFeatureMap();
-gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
-gate.AnnotationSet firstPerson =
(gate.AnnotationSet)personSet.get("FirstPerson");
-if (firstPerson != null && firstPerson.size()>0)
-{
- gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
- features.put("gender", personAnn.getFeatures().get("gender"));
-}
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+
+ gate.AnnotationSet firstNameSet =
(gate.AnnotationSet)bindings.get("firstName");
+
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+ features.put("title", contentTitle);
+ features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ if (firstNameSet != null && firstNameSet.size()>0)
+ {
+ gate.Annotation firstNameAnn =
(gate.Annotation)firstNameSet.iterator().next();
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+ }
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
features.put("kind", "personName");
- features.put("rule", "PersonFirstContext2");
+ features.put("rule", "PersonTitle");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
features);
}
-Rule: PersonTitle
+
+
+
+Rule: PersonTitleInitials
Priority: 35
-// Mr. Jones
-// Mr Fred Jones
-// note we only allow one first and surname,
-// but we can add more in a final phase if we find adjacent unknowns
+// Mr J. Jones
+
+
(
{Token.category == DT}|
{Token.category == PRP}|
{Token.category == RB}
)?
(
- (TITLE)+
- ((FIRSTNAME | FIRSTNAMEAMBIG | INITIALS2)
- )?
+ ({Title.rule == "TitleGender"}):title
+ ({Title})?
+ (
+ ({Initials})?
+ ):initials
+ (
(PREFIX)*
- (UPPER)
- (PERSONENDING)?
-)
-:person -->
+ ({Upper, !Initials})
+ (PERSONENDING)?
+ ):surname
+):person
+-->
{
gate.FeatureMap features = Factory.newFeatureMap();
-gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
-
- // get all Title annotations that have a gender feature
- HashSet fNames = new HashSet();
- fNames.add("gender");
- gate.AnnotationSet personTitle = personSet.get("Title", fNames);
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+
+ gate.AnnotationSet initialsSet = (gate.AnnotationSet)bindings.get("initials");
-// if the gender feature exists
- if (personTitle != null && personTitle.size()>0)
-{
- //Out.prln("Titles found " + personTitle);
- gate.Annotation personAnn = (gate.Annotation)personTitle.iterator().next();
- features.put("gender", personAnn.getFeatures().get("gender"));
-}
-else
-{
- //get all firstPerson annotations that have a gender feature
- // HashSet fNames = new HashSet();
- // fNames.add("gender");
- gate.AnnotationSet firstPerson = personSet.get("FirstPerson", fNames);
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
- if (firstPerson != null && firstPerson.size()>0)
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+ features.put("title", contentTitle);
+ features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ if (initialsSet != null && initialsSet.size()>0)
{
- //Out.prln("First persons found " + firstPerson);
- gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
- features.put("gender", personAnn.getFeatures().get("gender"));
+ List<Annotation> initialsList = gate.Utils.inDocumentOrder(initialsSet);
+
+ Long initialsStart = gate.Utils.start(initialsList.get(0));
+ Long initialsEnd = gate.Utils.end(initialsList.get(initialsList.size() -
1));
+ String initialsContent = gate.Utils.cleanStringFor(doc, initialsStart,
initialsEnd);
+ features.put("initials", initialsContent);
}
-}
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
features.put("kind", "personName");
- features.put("rule", "PersonTitle");
+ features.put("rule", "PersonTitleInitials");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
features);
}
-Rule: PersonFirstTitleGender
+Rule: TitleFirstName
Priority: 55
// use this rule when we know what gender the title indicates
// Mr Fred
(
- ({Title.gender == male} | {Title.gender == female})
- ((FIRSTNAME | FIRSTNAMEAMBIG | INITIALS2)
- )
+ ({Title.gender == male} | {Title.gender == female}):title
+ (FIRSTNAME | FIRSTNAMEAMBIG ):firstname
+
)
:person -->
+
{
gate.FeatureMap features = Factory.newFeatureMap();
-gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
-gate.AnnotationSet title = (gate.AnnotationSet)personSet.get("Title");
-if (title != null && title.size()>0)
-{
- gate.Annotation personAnn = (gate.Annotation)title.iterator().next();
- features.put("gender", personAnn.getFeatures().get("gender"));
-}
- features.put("kind", "personName");
- features.put("rule", "PersonFirstTitleGender");
-outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
-features);
-}
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+
+ gate.AnnotationSet firstNameSet =
(gate.AnnotationSet)bindings.get("firstName");
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
-Rule: PersonTitleGender
-Priority: 18
-// use this rule if the title has a feature gender
-// Miss F Smith
-(
- ({Title.gender == male}|
- {Title.gender == female}
- )
- ((FIRSTNAME | FIRSTNAMEAMBIG | INITIALS2)
- )*
- (UPPER)
- (PERSONENDING)?
-)
-:person -->
-{
- gate.FeatureMap features = Factory.newFeatureMap();
-gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
-gate.AnnotationSet title = (gate.AnnotationSet)personSet.get("Title");
-// if the annotation type title doesn't exist, do nothing
-if (title != null && title.size()>0)
-{
-// if it does exist, take the first element in the set
- gate.Annotation personAnn = (gate.Annotation)title.iterator().next();
-//propagate gender feature (and value) from title
- features.put("gender", personAnn.getFeatures().get("gender"));
-}
-// create some new features
- features.put("kind", "personName");
- features.put("rule", "PersonTitleGender");
-// create a TempPerson annotation and add the features we've created
-outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+ String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+ features.put("title", contentTitle);
+ features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ if (firstNameSet != null && firstNameSet.size()>0)
+ {
+ gate.Annotation firstNameAnn =
(gate.Annotation)firstNameSet.iterator().next();
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+ }
+
+ features.put("kind", "personName");
+
+ features.put("rule", "TitleFirstName");
+ outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
features);
}
+
Rule: PersonJobTitle
Priority: 20
// note we include titles but not jobtitles in markup
@@ -377,12 +386,12 @@
((FIRSTNAME | FIRSTNAMEAMBIG )
)
(PREFIX)*
- (UPPER)
+ ({Upper,!Initials})
(PERSONENDING)?
)
:person
-->
- :person.TempPerson = {kind = "personName", rule = "PersonJobTitle"},
+ :person.TempPerson = {kind = "fullName", rule = "PersonJobTitle"},
:jobtitle.JobTitle = {rule = "PersonJobTitle"}
@@ -403,6 +412,8 @@
)
:person -->
{}
+
+
Rule: FirstPersonStop
Priority: 50
// John And
@@ -422,7 +433,7 @@
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
features.put("gender", personAnn.getFeatures().get("gender"));
-features.put("kind", "personName");
+features.put("kind", "firstName");
features.put("rule", "GazPersonFirst");
outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
features);
@@ -442,7 +453,7 @@
)
(
(PREFIX)*
- (UPPER)
+ ({Upper})
(PERSONENDING)?
):foo
-->
@@ -452,33 +463,91 @@
Rule: LocPersonAmbig1
Priority: 50
-// Location + Surname
+// Location + Possible Surname --> Location only (ignore Surname)
+
(
{Lookup.majorType == location}
):loc
(
(PREFIX)*
- (UPPER)
+ ({Upper,!Initials})
(PERSONENDING)
):foo
-->
:loc.TempLocation = {kind = "locName", rule = LocPersonAmbig1}
+
Rule: LocPersonAmbig2
Priority: 50
-// Location + Surname
+// Location + + Possible Surname --> Location only (ignore Surname)
+
(
{Lookup.majorType == location}
):loc
(
(PREFIX)
- (UPPER)
+ ({Upper,!Initials})
(PERSONENDING)?
):foo
-->
:loc.TempLocation = {kind = "locName", rule = LocPersonAmbig2}
+
+Rule: PersonFullInitials
+Priority: 10
+// F.W. Jones
+
+(
+ {Token.category == DT}
+)?
+(
+
+ ({Initials}):initials
+ ((FIRSTNAME | FIRSTNAMEAMBIG )?):middleName
+ ((PREFIX)*
+ ({Upper,!Initials})
+ (PERSONENDING)?
+ ):surname
+):person -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
+
+ gate.AnnotationSet initialsSet =
(gate.AnnotationSet)bindings.get("initials");
+ List<Annotation> initialsList = gate.Utils.inDocumentOrder(initialsSet);
+
+ Long initialsStart = gate.Utils.start(initialsList.get(0));
+ Long initialsEnd = gate.Utils.end(initialsList.get(initialsList.size() -
1));
+ String initialsContent = gate.Utils.cleanStringFor(doc, initialsStart,
initialsEnd);
+ features.put("initials", initialsContent);
+
+
+ gate.AnnotationSet middleNameSet =
(gate.AnnotationSet)bindings.get("middleName");
+
+ if (middleNameSet != null && middleNameSet.size()>0)
+{
+ gate.Annotation middleNameAnn =
(gate.Annotation)middleNameSet.iterator().next();
+ String middleNameContent = gate.Utils.cleanStringFor(doc, middleNameAnn);
+ features.put("middleName", middleNameContent);
+ features.put("gender", middleNameAnn.getFeatures().get("gender"));
+}
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ String surnameContent = gate.Utils.cleanStringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+ features.put("kind", "fullName");
+ features.put("rule", "PersonFullInitials");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+
+}
+
+
Rule: PersonFull
Priority: 10
// F.W. Jones
@@ -487,31 +556,46 @@
{Token.category == DT}
)?
(
- ((FIRSTNAME | FIRSTNAMEAMBIG) )+
- (PREFIX)*
- (UPPER)
- (PERSONENDING)?
-)
-:person -->
+
+ (FIRSTNAME | FIRSTNAMEAMBIG ):firstName
+ ((FIRSTNAME | FIRSTNAMEAMBIG )?):middleName
+ ((PREFIX)*
+ ({Upper,!Initials})
+ (PERSONENDING)?
+ ):surname
+):person -->
{
gate.FeatureMap features = Factory.newFeatureMap();
-gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
- //get all firstPerson annotations that have a gender feature
- HashSet fNames = new HashSet();
- fNames.add("gender");
- gate.AnnotationSet firstPerson = personSet.get("FirstPerson", fNames);
+ gate.AnnotationSet firstNameSet =
(gate.AnnotationSet)bindings.get("firstName");
+ gate.Annotation firstNameAnn =
(gate.Annotation)firstNameSet.iterator().next();
+
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+ features.put("gender", firstNameAnn.getFeatures().get("gender"));
- if (firstPerson != null && firstPerson.size()>0)
- {
- //Out.prln("First persons found " + firstPerson);
- gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
- features.put("gender", personAnn.getFeatures().get("gender"));
+ gate.AnnotationSet middleNameSet =
(gate.AnnotationSet)bindings.get("middleName");
+
+ if (middleNameSet != null && middleNameSet.size()>0)
+{
+ gate.Annotation middleNameAnn =
(gate.Annotation)middleNameSet.iterator().next();
+ String middleNameContent = gate.Utils.stringFor(doc, middleNameAnn);
+ features.put("middleName", middleNameContent);
}
- features.put("kind", "personName");
- features.put("rule", "PersonFull");
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+ features.put("kind", "fullName");
+ features.put("rule", "PersonFull");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
features);
+
}
Rule: PersonFullStop
@@ -521,20 +605,20 @@
(
((FIRSTNAME | FIRSTNAMEAMBIG) )
(PREFIX)*
- (UPPER)
+ ({Upper})
):person
(
{Lookup.majorType == date}
)
-->
- :person.TempPerson = {kind = "personName", rule = "PersonFullStop"}
+ :person.TempPerson = {kind = "fullName", rule = "PersonFullStop"}
Rule: NotPersonFullReverse
Priority: 20
// XYZ, I
(
- (UPPER)
+ ({Upper})
{Token.string == ","}
{Token.category == PRP}
(PERSONENDING)?
@@ -562,7 +646,7 @@
gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
features.put("gender", personAnn.getFeatures().get("gender"));
}
- features.put("kind", "personName");
+ features.put("kind", "firstName");
features.put("rule", "PersonSaint");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
features);
@@ -576,26 +660,71 @@
// Christian name + Location --> Person's Name
(
- {Lookup.majorType == person_first}
- {Lookup.majorType == location}
-)
-:person -->
+ ({Lookup.majorType == person_first}):firstName
+ ({Lookup.majorType == location}):surname
+):person -->
{
gate.FeatureMap features = Factory.newFeatureMap();
-gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
-gate.AnnotationSet firstPerson =
(gate.AnnotationSet)inputAS.get("FirstPerson",
personSet.firstNode().getOffset(), personSet.lastNode().getOffset());
-if (firstPerson != null && firstPerson.size()>0)
-{
- gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
- features.put("gender", personAnn.getFeatures().get("gender"));
-}
- features.put("kind", "personName");
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
+
+ gate.AnnotationSet firstNameSet =
(gate.AnnotationSet)bindings.get("firstName");
+ gate.Annotation firstNameAnn =
(gate.Annotation)firstNameSet.iterator().next();
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ features.put("gender", firstNameAnn.getFeatures().get("minorType"));
+
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+ features.put("kind", "fullName");
features.put("rule", "PersonLocAmbig");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
features);
}
+Rule: TitlePersonLocAmbig
+Priority: 50
+// Professor London
+// title + Location --> Person's Name
+
+(
+ ({Title}):title
+ ({Lookup.majorType == location}):surname
+):person -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
+
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ String titleContent = gate.Utils.stringFor(doc, titleAnn);
+ features.put("title", titleContent);
+
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+ features.put("kind", "fullName");
+ features.put("rule", "TitlePersonLocAmbig");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+}
+
+
+
Rule:PersonOrgAmbig
Priority: 50
// if the last name is an organisation ending, treat as an organisation not
person
@@ -730,14 +859,14 @@
{Token.string == "&"}
(
- {Token.orth == upperInitial, Token.category != PRP}
+ {Token.orth == upperInitial}
)+
(CDG)?
)
:orgName -->
- :orgName.TempOrganization = {orgType = "unknown", rule = "INOrgXandY"}
+ :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXandY"}
Rule: OrgXandY
Priority: 20
@@ -753,10 +882,10 @@
{Token.string == "&"}
(
- {Token.orth == upperInitial, Token.category != PRP }
+ {Token.orth == upperInitial}
)+
- (CDG)
+ (CDG)?
)
:orgName -->
@@ -806,11 +935,11 @@
{Token.category == DT}
)
(
- (UPPER)
- (UPPER)?
- (UPPER)?
- (UPPER)?
- (UPPER)?
+ ({Upper})
+ ({Upper})?
+ ({Upper})?
+ ({Upper})?
+ ({Upper})?
{Lookup.majorType == org_key}
({Lookup.majorType == org_ending})?
)
@@ -834,6 +963,7 @@
{}
+
Rule: NotTheKey
Priority: 200
@@ -846,17 +976,18 @@
-->
{}
+
Rule: OrgXKey
Priority: 125
// Aaaa Ltd.
-
+({Token.category == DT})?
(
- ((UPPER))
- (UPPER)?
- (UPPER)?
- (UPPER)?
- (UPPER)?
+ ({Upper})
+ ({Upper})?
+ ({Upper})?
+ ({Upper})?
+ ({Upper})?
{Lookup.majorType == org_key}
({Lookup.majorType == org_ending})?
)
@@ -889,8 +1020,8 @@
{Token.category == DT}
)
(
- (UPPER)
- (UPPER)?
+ ({Upper})
+ ({Upper})?
{Lookup.majorType == cdg}
)
:orgName -->
@@ -904,8 +1035,8 @@
// Coca Cola Co.
(
- (UPPER)
- (UPPER)?
+ ({Upper})
+ ({Upper})?
{Lookup.majorType == cdg}
)
:orgName -->
@@ -918,13 +1049,13 @@
{Token.category == DT}
)
(
- (UPPER)
- (UPPER)?
+ ({Upper})
+ ({Upper})?
(({Token.string == "and"} |
{Token.string == "&"})
- (UPPER)?
- (UPPER)?
- (UPPER)?
+ ({Upper})?
+ ({Upper})?
+ ({Upper})?
)
{Lookup.majorType == org_key}
({Lookup.majorType == org_ending})?
@@ -943,13 +1074,13 @@
// but NOT A XXX Services Ltd.
(
- (UPPER)
- (UPPER)?
+ ({Upper})
+ ({Upper})?
(({Token.string == "and"} |
{Token.string == "&"})
- (UPPER)?
- (UPPER)?
- (UPPER)?
+ ({Upper})?
+ ({Upper})?
+ ({Upper})?
)
{Lookup.majorType == org_key}
({Lookup.majorType == org_ending})?
@@ -965,8 +1096,8 @@
// Queen's Ware
(
- (UPPER)?
- (UPPER)?
+ ({Upper})?
+ ({Upper})?
({Token.orth == upperInitial}
{Token.string == "'"}
({Token.string == "s"})?
@@ -1008,19 +1139,19 @@
)
(
(
- (UPPER)|
+ ({Upper})|
{Lookup.majorType == organization}
)
- (UPPER)?
- (UPPER)?
+ ({Upper})?
+ ({Upper})?
({Lookup.majorType == org_base}|
{Lookup.majorType == govern_key}
)
(
{Token.string == "of"}
- (UPPER)
- (UPPER)?
- (UPPER)?
+ ({Upper})
+ ({Upper})?
+ ({Upper})?
)?
)
:orgName -->
@@ -1037,19 +1168,19 @@
(
(
- (UPPER)|
+ ({Upper})|
{Lookup.majorType == organization}
)
- (UPPER)?
- (UPPER)?
+ ({Upper})?
+ ({Upper})?
({Lookup.majorType == org_base}|
{Lookup.majorType == govern_key}
)
(
{Token.string == "of"}
- (UPPER)
- (UPPER)?
- (UPPER)?
+ ({Upper})
+ ({Upper})?
+ ({Upper})?
)?
)
:orgName -->
@@ -1070,8 +1201,8 @@
(
{Token.category == DT}
)?
- (UPPER)
- (UPPER)?
+ ({Upper})
+ ({Upper})?
)
:orgName -->
:orgName.TempOrganization = {orgType = "unknown", rule = "BaseofOrg"}
@@ -1091,8 +1222,8 @@
(
{Token.category == DT}
)?
- (UPPER)
- (UPPER)?
+ ({Upper})
+ ({Upper})?
)
:orgName -->
:orgName.TempOrganization = {orgType = "unknown", rule = "BaseofOrg"}
@@ -1115,7 +1246,6 @@
-
Rule: OrgChurch
Priority: 150
// St. Andrew's Church
@@ -1136,7 +1266,9 @@
// overrides PersonFull
(
+ (TITLE)?
(FIRSTNAME)
+ {Token.string == "'"}({Token.string == "s"})?
({Lookup.majorType == org_key}|
{Lookup.majorType == org_base})
({Lookup.majorType == org_ending})?
@@ -1284,7 +1416,7 @@
(
({Lookup.majorType == loc_key, Lookup.minorType == pre}
)
- (UPPER)
+ ({Upper})
(
{Lookup.majorType == loc_key, Lookup.minorType == post})?
)
@@ -1297,8 +1429,7 @@
Rule:InLoc1
(
- {Token.string == "in"}|
- {Token.string == "to"}
+ {Token.string == "in"}
)
(
{Lookup.majorType == location}
@@ -1314,7 +1445,7 @@
{Token.string == "of"}
)
(
- (UPPER)
+ ({Upper})
)
:loc
-->
@@ -1329,9 +1460,9 @@
{Token.string == "company"}
)
(
- (UPPER)
- (UPPER)?
- (UPPER)?
+ ({Upper})
+ ({Upper})?
+ ({Upper})?
)
:org
-->
@@ -1344,9 +1475,9 @@
// Medici offices
(
- (UPPER)
- (UPPER)?
- (UPPER)?
+ ({Upper})
+ ({Upper})?
+ ({Upper})?
)
: org
(
@@ -1374,9 +1505,9 @@
)
)
(
- (UPPER)
- (UPPER)?
- (UPPER)?
+ ({Upper})
+ ({Upper})?
+ ({Upper})?
)
:org
-->
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Managing the Performance of Cloud-Based Applications
Take advantage of what the Cloud has to offer - Avoid Common Pitfalls.
Read the Whitepaper.
http://pubads.g.doubleclick.net/gampad/clk?id=121054471&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs