Revision: 19646
http://sourceforge.net/p/gate/code/19646
Author: dgmaynard
Date: 2016-10-06 12:35:17 +0000 (Thu, 06 Oct 2016)
Log Message:
-----------
updating to latest English version
Modified Paths:
--------------
gate/trunk/plugins/Lang_French/grammar/clean.jape
gate/trunk/plugins/Lang_French/grammar/date.jape
gate/trunk/plugins/Lang_French/grammar/date_pre.jape
gate/trunk/plugins/Lang_French/grammar/email.jape
gate/trunk/plugins/Lang_French/grammar/final.jape
gate/trunk/plugins/Lang_French/grammar/first.jape
gate/trunk/plugins/Lang_French/grammar/firstname.jape
gate/trunk/plugins/Lang_French/grammar/loc_context.jape
gate/trunk/plugins/Lang_French/grammar/main.jape
gate/trunk/plugins/Lang_French/grammar/name.jape
gate/trunk/plugins/Lang_French/grammar/name_context.jape
gate/trunk/plugins/Lang_French/grammar/name_post.jape
gate/trunk/plugins/Lang_French/grammar/number.jape
gate/trunk/plugins/Lang_French/grammar/org_context.jape
gate/trunk/plugins/Lang_French/grammar/reldate.jape
gate/trunk/plugins/Lang_French/grammar/unknown.jape
gate/trunk/plugins/Lang_French/grammar/url.jape
Added Paths:
-----------
gate/trunk/plugins/Lang_French/grammar/document_date.jape
gate/trunk/plugins/Lang_French/grammar/hyphens.jape
gate/trunk/plugins/Lang_French/grammar/main-twitter.jape
gate/trunk/plugins/Lang_French/grammar/name-twitter.jape
gate/trunk/plugins/Lang_French/grammar/number_clean.jape
gate/trunk/plugins/Lang_French/grammar/numberletter.jape
Modified: gate/trunk/plugins/Lang_French/grammar/clean.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/clean.jape 2016-10-06 12:34:37 UTC
(rev 19645)
+++ gate/trunk/plugins/Lang_French/grammar/clean.jape 2016-10-06 12:35:17 UTC
(rev 19646)
@@ -14,8 +14,8 @@
*/
Phase: Clean
-Input: TempPerson TempLocation TempOrganization TempDate TempTime TempYear
TempZone Street Postcode Email Url Phone Ip TempIdentifier TempSpecs
-Options: control = appelt
+Input: TempPerson TempLocation TempOrganization TempDate TempTime TempYear
TempZone Street Postcode Email Url Phone Ip TempIdentifier TempSpecs
ClosedClass Initials Upper FirstPerson JobTitle HashtagToken HashtagLookup
NumberLetter Temp Title UrlPre
+Options: control = all
Rule:CleanTempAnnotations
(
@@ -33,7 +33,18 @@
{Phone}|
{Ip}|
{TempIdentifier}|
- {TempSpecs}
+ {TempSpecs}|
+ {ClosedClass}|
+ {Upper}|
+ {Initials}|
+ {FirstPerson}|
+ {JobTitle}|
+ {HashtagToken}|
+ {HashtagLookup}|
+ {Title}|
+ {UrlPre}|
+ {Temp}|
+ {NumberLetter}
):temp
-->
{
Modified: gate/trunk/plugins/Lang_French/grammar/date.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/date.jape 2016-10-06 12:34:37 UTC
(rev 19645)
+++ gate/trunk/plugins/Lang_French/grammar/date.jape 2016-10-06 12:35:17 UTC
(rev 19646)
@@ -21,13 +21,6 @@
/////////////////////////////////////////////////
-Macro: DEF_ART_SING
-(
- {Token.string == "le"}|
- {Token.string == "la"}|
- {Token.string == "l"}{Token.string == "'"}
- )
-
Macro: DAY_NAME
({Lookup.minorType == day })
@@ -70,6 +63,9 @@
Macro: DASH
{Token.string == "-"}
+Macro: DOT
+ {Token.string == "."}
+
Macro: OF
{Token.string == "of"}
@@ -112,11 +108,7 @@
Macro: ORDINAL
(
- ({Token.kind == number}
- ({Token.string == "th"}|
- {Token.string == "rd"}|
- {Token.string == "nd"}|
- {Token.string == "st"})
+ ({Token.string ==~ "[0-9][0-9]?(th|rd|nd|st)"}
|
{Lookup.minorType == ordinal})
(
@@ -170,33 +162,38 @@
Rule: TimeDigital2
-// 04h30
-// 6h
+// 8:14 am
+// 4.34 pm
+// 6am
+
(
(ONE_DIGIT|TWO_DIGIT)
- ({Token.string == "h"}|{Token.string == "H"})
- (TWO_DIGIT)
+ (({Token.string == ":"}|{Token.string == "."} |{Token.string == "-"} )
+ TWO_DIGIT)?
+ (TIME_AMPM)
(TIME_ZONE)?
)
:time
-->
-:time.TempTime = {kind = "positive", rule = "TimeDigital2"}
+:time.TempTime = {kind = "positive", rule = "TimeDigital"}
Rule: TimeOClock
-// dix heures
+// ten o'clock
(
{Lookup.minorType == hour}
- {Token.string == "heures"}
+ {Token.string == "o"}
+ {Token.string == "'"}
+ {Token.string == "clock"}
)
:time
-->
:time.TempTime = {kind = "positive", rule = "TimeOClock"}
-/*Rule: TimeAnalogue
+Rule: TimeAnalogue
// half past ten
// ten to twelve
// twenty six minutes to twelve
@@ -217,9 +214,9 @@
:time
-->
:time.TempTime = {kind = "positive", rule = "TimeAnalogue"}
-*/
-/*Rule: TimeWordsContext
+
+Rule: TimeWordsContext
Priority: 50
// seven thirty tomorrow
@@ -234,10 +231,10 @@
)
-->
:time1.TempTime = {kind = "positive", rule = "TimeWordsContext"}
-*/
-/*Rule: TimeWords
+Rule: TimeWords
+
(
{Lookup.majorType == number}
(
@@ -248,7 +245,7 @@
-->
:time.TempTime = {kind = "timeWords", rule = "TimeWords"}
- */
+
Rule: TimeDigitalContext1
@@ -356,20 +353,16 @@
// Date Rules
-// commented out this rule because Date and Person are not included in the
-// Input headers and I have no idea if adding them will mess up other rules
+//Rule: IgnoreDatePerson
+//Priority: 500
+//(
+// {Date}
+// {Person}
+//)
+//:date
+//-->
+//{}
-/*
-Rule: IgnoreDatePerson
-Priority: 500
-(
- {Date}
- {Person}
-)
-:date
--->
-{}
-*/
Rule: DateSlash // UK only
@@ -405,7 +398,6 @@
:date.TempDate = {rule = "DateDash"}
-
Rule: DateName
Priority: 20
// Wed 10 July
@@ -417,7 +409,6 @@
// July, 2000
(
- (DEF_ART_SING)?
(DAY_NAME NUM_OR_ORDINAL MONTH_NAME)|
(DAY_NAME (COMMA)?
@@ -505,16 +496,16 @@
:date.TempDate = {rule = "DateNumDashRev"}
-Rule: DateNumSlash
+Rule: DateNumSlashDot
// 01/07/00
// Note: not 07/00
(
-DAY_MONTH_NUM SLASH DAY_MONTH_NUM SLASH YEAR
+DAY_MONTH_NUM (SLASH|DOT) DAY_MONTH_NUM (SLASH|DOT) YEAR
)
:date
-->
- :date.TempDate = {rule = "DateNumSlash"}
+ :date.TempDate = {rule = "DateNumSlashDot"}
Rule: ModifierMonth
@@ -626,7 +617,7 @@
(FOUR_DIGIT)
:date -->
- :date.TempYear = {kind = "positive", rule = "TempYear3"}
+ :date.TempYear = {kind = "negative", rule = "TempYear3"}
Rule: YearWords
Modified: gate/trunk/plugins/Lang_French/grammar/date_pre.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/date_pre.jape 2016-10-06
12:34:37 UTC (rev 19645)
+++ gate/trunk/plugins/Lang_French/grammar/date_pre.jape 2016-10-06
12:35:17 UTC (rev 19646)
@@ -51,6 +51,16 @@
-->
:date.TempDate = {rule = "GazDate"}
+Rule: GazDateAmbig
+Priority: 200
+(SPACE | {Token.kind == punctuation})
+(
+ {Token.string == "Sun"}
+)
+:date
+(SPACE | {Token.kind == punctuation})
+-->
+ :date.TempDate = {rule = "GazDateAmbig", }
Rule: PersonDateAmbig
Priority: 100
Added: gate/trunk/plugins/Lang_French/grammar/document_date.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/document_date.jape
(rev 0)
+++ gate/trunk/plugins/Lang_French/grammar/document_date.jape 2016-10-06
12:35:17 UTC (rev 19646)
@@ -0,0 +1,33 @@
+Phase: DateHeader
+Input: DCT
+Options: control = appelt
+
+Rule: DCT
+(
+ {DCT}
+):tag
+-->
+{
+gate.AnnotationSet tagSet = (gate.AnnotationSet)bindings.get("tag");
+gate.Annotation tagAnn = (gate.Annotation)tagSet.iterator().next();
+
+gate.FeatureMap features = Factory.newFeatureMap();
+
+
+String s = gate.Utils.stringFor(doc, tagAnn);
+//String content =
doc.getContent().getContent(tagAnn.getStartNode().getOffset(),
+ // tagAnn.getEndNode().getOffset()).toString();
+
+ if (s.matches("^\\d{8}$") ) {
+String s1 = s.substring(0,4) + "-" + s.substring(4,6) + "-" + s.substring(6,8);
+
+doc.getFeatures().put("document-date", s1);
+}
+
+ }
+
+
+
+
+
+
Property changes on: gate/trunk/plugins/Lang_French/grammar/document_date.jape
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Modified: gate/trunk/plugins/Lang_French/grammar/email.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/email.jape 2016-10-06 12:34:37 UTC
(rev 19645)
+++ gate/trunk/plugins/Lang_French/grammar/email.jape 2016-10-06 12:35:17 UTC
(rev 19646)
@@ -25,15 +25,17 @@
(
(
{Token.kind == word}|
- {Token.kind == number}|
+ {Token.kind == number}
+ )[1,9]
+ (
{Token.string == "_"}
- )
- ({Token.string == "."}
+ )?
+ ({Token.string == "."})?
({Token.kind == word}|
{Token.kind == number}|
{Token.string == "_"}
- )
- )?
+ )[0,9]
+
{Token.string == "@"}
(
{Token.kind == word}|
@@ -47,7 +49,7 @@
{Token.kind == symbol}|
{Token.kind == punctuation}|
{Token.kind == number}
- )?
+ )[0,9]
({Token.string == "."})?
(
{Token.kind == word}|
Modified: gate/trunk/plugins/Lang_French/grammar/final.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/final.jape 2016-10-06 12:34:37 UTC
(rev 19645)
+++ gate/trunk/plugins/Lang_French/grammar/final.jape 2016-10-06 12:35:17 UTC
(rev 19646)
@@ -16,34 +16,34 @@
//note: organization should be included as part of the address ??
Phase: Final
-Input: Token Lookup JobTitle TempPerson TempLocation TempOrganization TempDate
TempTime TempYear TempZone Street Postcode Email Url Phone Ip TempIdentifier
TempSpecs
+Input: Token Lookup Jobtitle TempPerson TempLocation TempOrganization TempDate
TempTime TempYear TempZone Street Postcode Email Url Phone Ip TempIdentifier
TempSpecs Title Split Money
Options: control = appelt
///////////////////////////////////////////////////////////////
+Rule: Money
+Priority: 200
+(
+ {Money}
+)
+-->
+{}
Rule: PersonFinal
Priority: 30
-({JobTitle}
-)?
+
(
- {TempPerson.kind == personName}
-)+
+ {TempPerson}
+)
:person
-->
{
gate.FeatureMap features = Factory.newFeatureMap();
gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
-gate.Annotation person1Ann = (gate.Annotation)personSet.iterator().next();
+gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
-gate.AnnotationSet firstPerson =
(gate.AnnotationSet)personSet.get("TempPerson");
-if (firstPerson != null && firstPerson.size()>0)
-{
- gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
- features.put("gender", personAnn.getFeatures().get("gender"));
-}
- features.put("rule1", person1Ann.getFeatures().get("rule"));
- features.put("rule", "PersonFinal");
+features.putAll(personAnn.getFeatures());
+features.put("ruleFinal", "PersonFinal");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "Person",
features);
outputAS.removeAll(personSet);
@@ -75,21 +75,23 @@
-->
{
//removes TempOrg annotation, gets the rule feature and adds a new Org
annotation
-gate.AnnotationSet org = (gate.AnnotationSet)bindings.get("org");
-gate.Annotation orgAnn = (gate.Annotation)org.iterator().next();
+gate.AnnotationSet orgSet = (gate.AnnotationSet)bindings.get("org");
+//locate the first TempOrganization annotation
+//(there will always be at least one)
+gate.Annotation orgAnn = orgSet.get("TempOrganization").iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-features.put("orgType", orgAnn.getFeatures().get("orgType"));
-features.put("rule1", orgAnn.getFeatures().get("rule"));
-features.put("rule2", "OrgCountryFinal");
-outputAS.add(org.firstNode(), org.lastNode(), "Organization",
+
+features.putAll(orgAnn.getFeatures());
+features.put("ruleFinal", "OrgCountryFinal");
+outputAS.add(orgSet.firstNode(), orgSet.lastNode(), "Organization",
features);
-outputAS.removeAll(org);
+outputAS.removeAll(orgSet);
}
+// note - move this rule to after final
+// another note - I have no idea why the original note is there, or even which
rule this refers to
-//note - move this rule to after final
-
Rule: OrgFinal
Priority: 10
(
@@ -99,21 +101,21 @@
-->
{
//removes TempOrg annotation, gets the rule feature and adds a new Org
annotation
-gate.AnnotationSet org = (gate.AnnotationSet)bindings.get("org");
-gate.Annotation orgAnn = (gate.Annotation)org.iterator().next();
+gate.AnnotationSet orgSet = (gate.AnnotationSet)bindings.get("org");
+gate.Annotation orgAnn = (gate.Annotation)orgSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-features.put("orgType", orgAnn.getFeatures().get("orgType"));
-features.put("rule1", orgAnn.getFeatures().get("rule"));
-features.put("rule2", "OrgFinal");
-outputAS.add(org.firstNode(), org.lastNode(), "Organization",
+
+features.putAll(orgAnn.getFeatures());
+features.put("ruleFinal", "OrgFinal");
+outputAS.add(orgSet.firstNode(), orgSet.lastNode(), "Organization",
features);
-outputAS.removeAll(org);
+outputAS.removeAll(orgSet);
}
Rule: PersonLocFinal
Priority: 100
-// George Airport
+// George Airport is a Location not a Person
// later we might change this to any facility, rather than just airports
(
@@ -125,14 +127,15 @@
-->
{
//removes TempLoc annotation, gets the rule feature and adds a new Loc
annotation
-gate.AnnotationSet loc = (gate.AnnotationSet)bindings.get("loc");
-gate.Annotation locAnn = (gate.Annotation)loc.iterator().next();
+gate.AnnotationSet locSet = (gate.AnnotationSet)bindings.get("loc");
+gate.Annotation locAnn = (gate.Annotation)locSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", locAnn.getFeatures().get("rule"));
-features.put("rule2", "PersonLocFinal");
-outputAS.add(loc.firstNode(), loc.lastNode(), "Location",
+features.putAll(locAnn.getFeatures());
+features.put("ruleFinal", "PersonLocFinal");
+features.put("locType", "airport");
+outputAS.add(locSet.firstNode(), locSet.lastNode(), "Location",
features);
-outputAS.removeAll(loc);
+outputAS.removeAll(locSet);
}
@@ -146,15 +149,14 @@
-->
{
//removes TempLoc annotation, gets the rule feature and adds a new Loc
annotation
-gate.AnnotationSet loc = (gate.AnnotationSet)bindings.get("loc");
-gate.Annotation locAnn = (gate.Annotation)loc.iterator().next();
+gate.AnnotationSet locSet = (gate.AnnotationSet)bindings.get("loc");
+gate.Annotation locAnn = (gate.Annotation)locSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-features.put("locType",locAnn.getFeatures().get("locType"));
-features.put("rule1", locAnn.getFeatures().get("rule"));
-features.put("rule2", "LocFinal");
-outputAS.add(loc.firstNode(), loc.lastNode(), "Location",
+features.putAll(locAnn.getFeatures());
+features.put("ruleFinal", "LocFinal");
+outputAS.add(locSet.firstNode(), locSet.lastNode(), "Location",
features);
-outputAS.removeAll(loc);
+outputAS.removeAll(locSet);
}
@@ -165,13 +167,15 @@
Rule: DateTimeFinal
Priority: 20
// Friday 10 January 2000 2pm
+// 2008-01-25T16:10:48
(
{TempDate}
(
({Token.string == ","})?
{TempDate})?
- ({Token.string == ":"})?
+ ({Token.string == ":"}|
+ {Token.string == "T"})?
{TempTime}
({TempYear})?
({TempZone})?
@@ -180,15 +184,14 @@
-->
{
//removes TempDate annotation, gets the rule feature and adds a new Date
annotation
-gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date");
-gate.Annotation dateAnn = (gate.Annotation)date.iterator().next();
+gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date");
+gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-//features.put("rule1", dateAnn.getFeatures().get("rule"));
-features.put("rule2", "DateTimeFinal");
+features.put("ruleFinal", "DateTimeFinal");
features.put("kind", "dateTime");
-outputAS.add(date.firstNode(), date.lastNode(), "Date",
+outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date",
features);
-outputAS.removeAll(date);
+outputAS.removeAll(dateSet);
}
@@ -203,15 +206,15 @@
-->
{
//removes TempDate annotation, gets the rule feature and adds a new Date
annotation
-gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date");
-gate.Annotation dateAnn = (gate.Annotation)date.iterator().next();
+gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date");
+gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", dateAnn.getFeatures().get("rule"));
-features.put("rule2", "SeasonYearFinal");
+features.putAll(dateAnn.getFeatures());
+features.put("ruleFinal", "SeasonYearFinal");
features.put("kind", "date");
-outputAS.add(date.firstNode(), date.lastNode(), "Date",
+outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date",
features);
-outputAS.removeAll(date);
+outputAS.removeAll(dateSet);
}
@@ -229,15 +232,15 @@
-->
{
//removes TempDate annotation, gets the rule feature and adds a new Date
annotation
-gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date");
-gate.Annotation dateAnn = (gate.Annotation)date.iterator().next();
+gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date");
+gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", dateAnn.getFeatures().get("rule"));
+features.putAll(dateAnn.getFeatures());
features.put("rule2", "DateYearFinal");
features.put("kind", "date");
-outputAS.add(date.firstNode(), date.lastNode(), "Date",
+outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date",
features);
-outputAS.removeAll(date);
+outputAS.removeAll(dateSet);
}
@@ -256,15 +259,14 @@
-->
{
//removes TempDate annotation, gets the rule feature and adds a new Date
annotation
-gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date");
-gate.Annotation dateAnn = (gate.Annotation)date.iterator().next();
+gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date");
+gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-//features.put("rule1", dateAnn.getFeatures().get("rule"));
-features.put("rule2", "TimeDateFinal");
+features.put("ruleFinal", "TimeDateFinal");
features.put("kind", "dateTime");
-outputAS.add(date.firstNode(), date.lastNode(), "Date",
+outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date",
features);
-outputAS.removeAll(date);
+outputAS.removeAll(dateSet);
}
@@ -282,15 +284,14 @@
-->
{
//removes TempDate annotation, gets the rule feature and adds a new Date
annotation
-gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date");
-gate.Annotation dateAnn = (gate.Annotation)date.iterator().next();
+gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date");
+gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-//features.put("rule1", dateAnn.getFeatures().get("rule"));
-features.put("rule2", "TimeYearFinal");
+features.put("ruleFinal", "TimeYearFinal");
features.put("kind", "dateTime");
-outputAS.add(date.firstNode(), date.lastNode(), "Date",
+outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date",
features);
-outputAS.removeAll(date);
+outputAS.removeAll(dateSet);
}
@@ -298,7 +299,10 @@
Rule: DateOnlyFinal
-Priority: 10
+Priority: 50
+(
+ {Title}
+)?
(
{TempDate}
)
@@ -306,15 +310,15 @@
-->
{
//removes TempDate annotation, gets the rule feature and adds a new Date
annotation
-gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date");
-gate.Annotation dateAnn = (gate.Annotation)date.iterator().next();
+gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date");
+gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", dateAnn.getFeatures().get("rule"));
-features.put("rule2", "DateOnlyFinal");
+features.putAll(dateAnn.getFeatures());
+features.put("ruleFinal", "DateOnlyFinal");
features.put("kind", "date");
-outputAS.add(date.firstNode(), date.lastNode(), "Date",
+outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date",
features);
-outputAS.removeAll(date);
+outputAS.removeAll(dateSet);
}
//fix this later
@@ -333,25 +337,26 @@
-->
{
//removes TempDate annotation, gets the rule feature and adds a new Date
annotation
-gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date");
-gate.Annotation dateAnn = (gate.Annotation)date.iterator().next();
+gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date");
+gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", dateAnn.getFeatures().get("rule"));
-features.put("rule", "TimeContextFinal");
+features.putAll(dateAnn.getFeatures());
+features.put("ruleFinal", "TimeContextFinal");
features.put("kind", "date");
-outputAS.add(date.firstNode(), date.lastNode(), "Date",
+outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date",
features);
-outputAS.removeAll(date);
+outputAS.removeAll(dateSet);
+
//removes TempTime annotation, gets the rule feature and adds a new Date
annotation
-gate.AnnotationSet time = (gate.AnnotationSet)bindings.get("time");
-gate.Annotation timeAnn = (gate.Annotation)time.iterator().next();
+gate.AnnotationSet timeSet = (gate.AnnotationSet)bindings.get("time");
+gate.Annotation timeAnn = (gate.Annotation)timeSet.iterator().next();
gate.FeatureMap features2 = Factory.newFeatureMap();
-features2.put("rule1", timeAnn.getFeatures().get("rule"));
-features2.put("rule", "TimeContextFinal");
+features.putAll(timeAnn.getFeatures());
+features2.put("ruleFinal", "TimeContextFinal");
features2.put("kind", "time");
-outputAS.add(time.firstNode(), date.lastNode(), "Date",
+outputAS.add(timeSet.firstNode(), timeSet.lastNode(), "Date",
features2);
-outputAS.removeAll(time);
+outputAS.removeAll(timeSet);
}
@@ -368,15 +373,15 @@
-->
{
//removes TempTime annotation, gets the rule feature and adds a new Date
annotation
-gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date");
-gate.Annotation dateAnn = (gate.Annotation)date.iterator().next();
+gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date");
+gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", dateAnn.getFeatures().get("rule"));
-features.put("rule2", "TimeWordsContextFinal");
+features.putAll(dateAnn.getFeatures());
+features.put("ruleFinal", "TimeWordsContextFinal");
features.put("kind", "time");
-outputAS.add(date.firstNode(), date.lastNode(), "Date",
+outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date",
features);
-outputAS.removeAll(date);
+outputAS.removeAll(dateSet);
}
@@ -389,15 +394,15 @@
-->
{
//removes TempDate annotation, gets the rule feature and adds a new Date
annotation
-gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date");
-gate.Annotation dateAnn = (gate.Annotation)date.iterator().next();
+gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date");
+gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", dateAnn.getFeatures().get("rule"));
-features.put("rule2", "YearOnlyFinal");
+features.putAll(dateAnn.getFeatures());
+features.put("ruleFinal", "YearOnlyFinal");
features.put("kind", "date");
-outputAS.add(date.firstNode(), date.lastNode(), "Date",
+outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date",
features);
-outputAS.removeAll(date);
+outputAS.removeAll(dateSet);
}
@@ -411,15 +416,15 @@
-->
{
//removes TempDate annotation, gets the rule feature and adds a new Date
annotation
-gate.AnnotationSet date = (gate.AnnotationSet)bindings.get("date");
-gate.Annotation dateAnn = (gate.Annotation)date.iterator().next();
+gate.AnnotationSet dateSet = (gate.AnnotationSet)bindings.get("date");
+gate.Annotation dateAnn = (gate.Annotation)dateSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", dateAnn.getFeatures().get("rule"));
-features.put("rule2", "TimeOnlyFinal");
+features.putAll(dateAnn.getFeatures());
+features.put("ruleFinal", "TimeOnlyFinal");
features.put("kind", "time");
-outputAS.add(date.firstNode(), date.lastNode(), "Date",
+outputAS.add(dateSet.firstNode(), dateSet.lastNode(), "Date",
features);
-outputAS.removeAll(date);
+outputAS.removeAll(dateSet);
}
@@ -440,15 +445,15 @@
-->
{
//removes TempAddress annotation, gets the rule feature and adds a new Address
annotation
-gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address");
-gate.Annotation addressAnn = (gate.Annotation)address.iterator().next();
+gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address");
+gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", addressAnn.getFeatures().get("rule"));
-features.put("rule2", "AddressFull");
+features.putAll(addressAnn.getFeatures());
+features.put("ruleFinal", "AddressFull");
features.put("kind", "complete");
-outputAS.add(address.firstNode(), address.lastNode(), "Address",
+outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Address",
features);
-outputAS.removeAll(address);
+outputAS.removeAll(addressSet);
}
@@ -461,15 +466,15 @@
-->
{
//removes Email annotation, gets the rule feature and adds a new Address
annotation
-gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address");
-gate.Annotation addressAnn = (gate.Annotation)address.iterator().next();
+gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address");
+gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", addressAnn.getFeatures().get("rule"));
-features.put("rule2", "EmailFinal");
+features.putAll(addressAnn.getFeatures());
+features.put("ruleFinal", "EmailFinal");
features.put("kind", "email");
-outputAS.add(address.firstNode(), address.lastNode(), "Address",
+outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Address",
features);
-outputAS.removeAll(address);
+outputAS.removeAll(addressSet);
}
@@ -482,15 +487,15 @@
-->
{
//removes TempAddress annotation, gets the rule feature and adds a new Address
annotation
-gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address");
-gate.Annotation addressAnn = (gate.Annotation)address.iterator().next();
+gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address");
+gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", addressAnn.getFeatures().get("rule"));
-features.put("rule2", "PhoneFinal");
+features.putAll(addressAnn.getFeatures());
+features.put("ruleFinal", "PhoneFinal");
features.put("kind", "phone");
-outputAS.add(address.firstNode(), address.lastNode(), "Address",
+outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Address",
features);
-outputAS.removeAll(address);
+outputAS.removeAll(addressSet);
}
@@ -503,15 +508,15 @@
-->
{
//removes TempAddress annotation, gets the rule feature and adds a new Address
annotation
-gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address");
-gate.Annotation addressAnn = (gate.Annotation)address.iterator().next();
+gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address");
+gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", addressAnn.getFeatures().get("rule"));
-features.put("rule2", "PostcodeFinal");
+features.putAll(addressAnn.getFeatures());
+features.put("ruleFinal", "PostcodeFinal");
features.put("kind", "postcode");
-outputAS.add(address.firstNode(), address.lastNode(), "Address",
+outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Address",
features);
-outputAS.removeAll(address);
+outputAS.removeAll(addressSet);
}
@@ -524,15 +529,15 @@
-->
{
//removes TempAddress annotation, gets the rule feature and adds a new Address
annotation
-gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address");
-gate.Annotation addressAnn = (gate.Annotation)address.iterator().next();
+gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address");
+gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", addressAnn.getFeatures().get("rule"));
-features.put("rule2", "IpFinal");
+features.putAll(addressAnn.getFeatures());
+features.put("ruleFinal", "IpFinal");
features.put("kind", "ip");
-outputAS.add(address.firstNode(), address.lastNode(), "Address",
+outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Address",
features);
-outputAS.removeAll(address);
+outputAS.removeAll(addressSet);
}
@@ -545,15 +550,15 @@
-->
{
//removes TempAddress annotation, gets the rule feature and adds a new Address
annotation
-gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address");
-gate.Annotation addressAnn = (gate.Annotation)address.iterator().next();
+gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address");
+gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", addressAnn.getFeatures().get("rule"));
+features.putAll(addressAnn.getFeatures());
features.put("rule2", "UrlFinal");
features.put("kind", "url");
-outputAS.add(address.firstNode(), address.lastNode(), "Address",
+outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Address",
features);
-outputAS.removeAll(address);
+outputAS.removeAll(addressSet);
}
@@ -567,14 +572,14 @@
-->
{
//removes TempAddress annotation, gets the rule feature and adds a new Address
annotation
-gate.AnnotationSet address = (gate.AnnotationSet)bindings.get("address");
-gate.Annotation addressAnn = (gate.Annotation)address.iterator().next();
+gate.AnnotationSet addressSet = (gate.AnnotationSet)bindings.get("address");
+gate.Annotation addressAnn = (gate.Annotation)addressSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", addressAnn.getFeatures().get("rule"));
-features.put("rule2", "StreetFinal");
-outputAS.add(address.firstNode(), address.lastNode(), "Location",
+features.putAll(addressAnn.getFeatures());
+features.put("ruleFinal", "StreetFinal");
+outputAS.add(addressSet.firstNode(), addressSet.lastNode(), "Location",
features);
-outputAS.removeAll(address);
+outputAS.removeAll(addressSet);
}
////////////////////////////////////////////////////////////
@@ -590,14 +595,14 @@
-->
{
//removes TempIdent annotation, gets the rule feature and adds a new
Identifier annotation
-gate.AnnotationSet ident = (gate.AnnotationSet)bindings.get("ident");
-gate.Annotation identAnn = (gate.Annotation)ident.iterator().next();
+gate.AnnotationSet identSet = (gate.AnnotationSet)bindings.get("ident");
+gate.Annotation identAnn = (gate.Annotation)identSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule1", identAnn.getFeatures().get("rule"));
-features.put("rule2", "IdentifierFinal");
-outputAS.add(ident.firstNode(), ident.lastNode(), "Identifier",
+features.putAll(identAnn.getFeatures());
+features.put("ruleFinal", "IdentifierFinal");
+outputAS.add(identSet.firstNode(), identSet.lastNode(), "Identifier",
features);
-outputAS.removeAll(ident);
+outputAS.removeAll(identSet);
}
@@ -613,38 +618,10 @@
-->
{
//removes TempSpecs annotation
-gate.AnnotationSet spec = (gate.AnnotationSet)bindings.get("spec");
+gate.AnnotationSet specSet = (gate.AnnotationSet)bindings.get("spec");
//gate.FeatureMap features = Factory.newFeatureMap();
-outputAS.removeAll(spec);
+outputAS.removeAll(specSet);
}
//////////////////////////////////////////////////////
-Rule: UnknownPerson
-Priority: 5
-(
- {Token.category == NNP}
- (({Token.string == "-"})?
- {Token.category == NNP})?
- ( {Token.category == NNP})?
- ( {Token.category == NNP})?
-):unknown
-
-(
- {TempPerson}
-):person
--->
-:unknown.Unknown = {kind = "PN", rule = UnknownTempPerson},
-{
-//removes TempPerson annotation, gets the rule feature and adds a new Person
annotation
-gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
-gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
-gate.FeatureMap features = Factory.newFeatureMap();
-features.put("gender", personAnn.getFeatures().get("gender"));
-features.put("rule1", personAnn.getFeatures().get("rule"));
-features.put("rule2", "UnknownPerson");
-outputAS.add(person.firstNode(), person.lastNode(), "Person",
-features);
-outputAS.removeAll(person);
-}
-
Modified: gate/trunk/plugins/Lang_French/grammar/first.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/first.jape 2016-10-06 12:34:37 UTC
(rev 19645)
+++ gate/trunk/plugins/Lang_French/grammar/first.jape 2016-10-06 12:35:17 UTC
(rev 19646)
@@ -14,11 +14,10 @@
*/
Phase: First
-Input: Token Lookup
+Input: Token NumberLetter
Options: control = appelt
// this has to be run first of all
-// contains any macros etc needed only for standard grammars
//////////////////////////////////////////////////////////////
Macro: SPACE
@@ -40,21 +39,59 @@
)
+
+
///////////////////////////////////////////////////////////////
-Rule: Silly
-// we have to have a rule here, so we'll just have something silly
+Rule: ClosedClass
+// closed class words should not be part of names generally, so let's identify
them
+Priority: 100
(
- {Token.string == "afguahughaegarth"}
-)
-:silly
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}|
+ {Token.category == IN}
+):tag
-->
- {}
+:tag.ClosedClass = {rule = "ClosedClass"}
+Rule: NumberLetter
+Priority: 100
+(
+ {NumberLetter}
+):tag
+-->
+{}
+Rule: UpperAllCaps
+Priority: 100
+// separate proper nouns that are in all caps, as they're more ambiguous
+(
+ {Token.category == NNP, Token.orth == allCaps}
+ ({Token.string == "-"}
+ {Token.category == NNP, Token.orth == allCaps}
+ )?
+):tag
+-->
+:tag.Upper = {kind = "allCaps", rule = "Upper"}
+Rule: Upper
+// define what can be a possible proper noun - cater for the fact that POS tag
might not be correct
+(
+ ({Token.category == NNP}|
+ {Token.orth == upperInitial}|
+ {Token.orth == mixedCaps}
+ )
+ ({Token.string == "-"}
+ {Token.category == NNP}
+ )?
+):tag
+-->
+:tag.Upper = {rule = "Upper"}
+
+
Modified: gate/trunk/plugins/Lang_French/grammar/firstname.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/firstname.jape 2016-10-06
12:34:37 UTC (rev 19645)
+++ gate/trunk/plugins/Lang_French/grammar/firstname.jape 2016-10-06
12:35:17 UTC (rev 19646)
@@ -14,14 +14,87 @@
*/
Phase: FirstName
-Input: Token Lookup
+Input: Token Lookup ClosedClass NumberLetter UserID
Options: control = appelt
+
+Rule: FirstNameTwitterName
+Priority: 500
+// @fred
+
+(
+ {Lookup.majorType == person_first, UserID, Lookup.kind !=ambig}
+):person
+-->
+{
+gate.AnnotationSet person =
(gate.AnnotationSet)bindings.get("person").get("Lookup");
+gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+
+//find out if the gender is unambiguous
+String gender = (String)personAnn.getFeatures().get("minorType");
+boolean ambig = false;
+gate.FeatureMap constraints = Factory.newFeatureMap();
+constraints.put("majorType", "person_first");
+Iterator lookupsIter =
inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup",
constraints).iterator();
+while(!ambig && lookupsIter.hasNext()){
+ gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
+ //we're only interested in annots of the same length
+
if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
+ ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
+ }
+}
+if(!ambig) features.put("gender", gender);
+
+features.put("rule", "FirstNameTwitterName");
+features.put("twittername", "yes");
+outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
+features);
+}
+
+
+Rule: FirstNameTwitterNameAmbig
+Priority: 600
+// @mark
+
+(
+ {Lookup.majorType == person_first, UserID, Lookup.kind ==ambig}
+):person
+-->
+{
+gate.AnnotationSet person =
(gate.AnnotationSet)bindings.get("person").get("Lookup");
+gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+
+//find out if the gender is unambiguous
+String gender = (String)personAnn.getFeatures().get("minorType");
+boolean ambig = false;
+gate.FeatureMap constraints = Factory.newFeatureMap();
+constraints.put("majorType", "person_first");
+Iterator lookupsIter =
inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup",
constraints).iterator();
+while(!ambig && lookupsIter.hasNext()){
+ gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
+ //we're only interested in annots of the same length
+
if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
+ ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
+ }
+}
+if(!ambig) features.put("gender", gender);
+
+features.put("rule", "FirstNameTwitterNameAmbig");
+features.put("twittername", "yes");
+features.put("kind", "ambig");
+outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
+features);
+}
+
+
Rule: FirstName
// Fred
+
(
- {Lookup.majorType == person_first}
+ {Lookup.majorType == person_first, !ClosedClass}
):person
-->
{
@@ -45,18 +118,61 @@
if(!ambig) features.put("gender", gender);
features.put("rule", "FirstName");
+features.put("twittername", "no");
outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
features);
}
+
+Rule: FirstNameAmbig
+Priority: 300
+/* prefer this rule if the firstname has an ambiguous feature in the
gazetteer, e.g. "Christian"
+ In this case, we won't use it in the main name-finding grammar if we find
it on its own,
+ only as part of a longer name
+*/
+
+(
+ {Lookup.majorType == person_first, Lookup.kind == ambig}
+):person
+-->
+{
+gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+
+//find out if the gender is unambiguous
+String gender = (String)personAnn.getFeatures().get("minorType");
+boolean ambig = false;
+gate.FeatureMap constraints = Factory.newFeatureMap();
+constraints.put("majorType", "person_first");
+Iterator lookupsIter =
inputAS.get(personAnn.getStartNode().getOffset()).get("Lookup",
constraints).iterator();
+while(!ambig && lookupsIter.hasNext()){
+ gate.Annotation anAnnot = (gate.Annotation)lookupsIter.next();
+ //we're only interested in annots of the same length
+
if(anAnnot.getEndNode().getOffset().equals(personAnn.getEndNode().getOffset())){
+ ambig = !gender.equals(anAnnot.getFeatures().get("minorType"));
+ }
+}
+if(!ambig) features.put("gender", gender);
+
+features.put("rule", "FirstNameAmbig");
+features.put("kind", "ambig");
+features.put("twittername", "no");
+outputAS.add(person.firstNode(), person.lastNode(), "FirstPerson",
+features);
+}
+
+
Rule: TitleGender
Priority: 50
// Mr
(
- {Lookup.majorType == title, Lookup.minorType == male}|
- {Lookup.majorType == title, Lookup.minorType == female}
-):person
+ ({Lookup.majorType == title, Lookup.minorType == male}|
+ {Lookup.majorType == title, Lookup.minorType == female})
+ ({Token.string == "."})?
+)
+:person
-->
{
gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
@@ -73,6 +189,7 @@
(
{Lookup.majorType == title}
+ ({Token.string == "."})?
):person
-->
:person.Title = {rule = "Title"}
@@ -80,6 +197,29 @@
+Rule: Initials1
+// A.B.
+// A.
+// A
+(
+ ({Token.orth == upperInitial, Token.length =="1", !ClosedClass,
!NumberLetter}
+ ({Token.string == "."})?
+ )+
+):tag
+-->
+:tag.Initials = {rule = "Initials1"}
+Rule: Initials2
+// AB
+// ABC
+
+(
+ {Token.orth == allCaps, Token.length == "2", !Lookup, !ClosedClass,
!NumberLetter} |
+ {Token.orth == allCaps, Token.length == "3", !Lookup, !ClosedClass,
!NumberLetter}
+):tag
+-->
+:tag.Initials = {kind = "nopunct", rule = "Initials2"}
+
+
Added: gate/trunk/plugins/Lang_French/grammar/hyphens.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/hyphens.jape
(rev 0)
+++ gate/trunk/plugins/Lang_French/grammar/hyphens.jape 2016-10-06 12:35:17 UTC
(rev 19646)
@@ -0,0 +1,25 @@
+Phase: Hyphens
+Input: Token SpaceToken
+Options: control = appelt
+
+/* A phase to deal with weird problems in hyphenated words
+*/
+
+Rule: UpperHyphenated
+// two NNPs separated by no white space should also be an Upper.
+// This happens when they're hyphenated and the hyphen is part of the first NNP
+
+(
+ ({Token.category == NNP}|
+ {Token.orth == upperInitial}|
+ {Token.orth == mixedCaps}
+ )
+ (
+ {Token.category == NNP}|
+ {Token.orth == upperInitial}|
+ {Token.orth == mixedCaps}
+ )
+):tag
+-->
+:tag.Upper = {rule = "UpperHyphenated"}
+
Modified: gate/trunk/plugins/Lang_French/grammar/loc_context.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/loc_context.jape 2016-10-06
12:34:37 UTC (rev 19645)
+++ gate/trunk/plugins/Lang_French/grammar/loc_context.jape 2016-10-06
12:35:17 UTC (rev 19646)
@@ -14,56 +14,59 @@
*/
Phase: Loc_Context
-Input: Unknown Token Location
+Input: Unknown Token Location Lookup
Options: control = appelt
-Rule: LocConjLoc1
-Priority: 10
+//Rule: LocConjLoc1
+//Priority: 10
+// Unknown and Location
-(
-{Unknown.kind == PN}
-):loc
-(
-{Token.category == CC}
-({Token.category == DT}
-)?
-{Location}
-)
--->
-{
-gate.AnnotationSet loc = (gate.AnnotationSet) bindings.get("loc");
-gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule ", "LocConjLoc1");
-outputAS.add(loc.firstNode(), loc.lastNode(), "Location",
-features);
-outputAS.removeAll(loc);
-}
+//(
+//{Unknown.kind == PN}
+//):loc
+//(
+//{Token.category == CC}
+//({Token.category == DT}
+//)?
+//{Location}
+//)
+//-->
+//{
+//gate.AnnotationSet loc = (gate.AnnotationSet) bindings.get("loc");
+//gate.FeatureMap features = Factory.newFeatureMap();
+//features.put("rule ", "LocConjLoc1");
+//outputAS.add(loc.firstNode(), loc.lastNode(), "Location",
+//features);
+//outputAS.removeAll(loc);
+//}
-Rule: LocConjLoc2
-Priority: 10
+//Rule: LocConjLoc2
+//Priority: 10
-(
- {Location}
- {Token.category == CC}
- ({Token.category == DT}
- )?
-)
-(
- {Unknown.kind == PN}
-):loc
--->
- {
-gate.AnnotationSet loc = (gate.AnnotationSet) bindings.get("loc");
-gate.FeatureMap features = Factory.newFeatureMap();
-features.put("rule ", "LocConjLoc2");
-outputAS.add(loc.firstNode(), loc.lastNode(), "Location",
-features);
-outputAS.removeAll(loc);
-}
+// Location and Unknown
+//(
+// {Location}
+// {Token.category == CC}
+ //({Token.category == DT}
+ //)?
+//)
+//(
+// {Unknown.kind == PN}
+//):loc
+//-->
+// {
+//gate.AnnotationSet loc = (gate.AnnotationSet) bindings.get("loc");
+//gate.FeatureMap features = Factory.newFeatureMap();
+//features.put("rule ", "LocConjLoc2");
+//outputAS.add(loc.firstNode(), loc.lastNode(), "Location",
+//features);
+//outputAS.removeAll(loc);
+//}
+
Rule: UnknownLocRegion
Priority: 50
(
@@ -76,10 +79,37 @@
):loc
(
{Token.string == ","}
- {Location.kind == region}
+ {Location.locType == region}
)
-->
- :loc.Location = {rule = "UnknownLocRegion"}
+ :loc.Location = {rule = "UnknownLocRegion"},
+ {
+gate.AnnotationSet loc = (gate.AnnotationSet) bindings.get("loc");
+outputAS.removeAll(loc);
+}
+Rule: LocState
+Priority: 100
+(
+ {Location.locType == city}
+ ({Token.string == ","})?
+)
+(
+ {Lookup.majorType == state}
+):tag
+-->
+:tag.Location = {locType = region, rule = "LocState"}
+Rule: UnknownLocKey
+Priority: 20
+(
+ ({Unknown}):tag
+ {Lookup.majorType == loc_general_key}
+)
+-->
+:tag.Location = {locType = unknown, rule = "UnknownLocKey"},
+{
+gate.AnnotationSet loc = (gate.AnnotationSet) bindings.get("tag");
+outputAS.removeAll(loc);
+}
Added: gate/trunk/plugins/Lang_French/grammar/main-twitter.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/main-twitter.jape
(rev 0)
+++ gate/trunk/plugins/Lang_French/grammar/main-twitter.jape 2016-10-06
12:35:17 UTC (rev 19646)
@@ -0,0 +1,38 @@
+/*
+* main.jape
+*
+* Copyright (c) 1998-2004, The University of Sheffield.
+*
+* This file is part of GATE (see http://gate.ac.uk/), and is free
+* software, licenced under the GNU Library General Public License,
+* Version 2, June 1991 (in the distribution as file licence.html,
+* and also available at http://gate.ac.uk/gate/licence.html).
+*
+* Diana Maynard, 02 Aug 2001
+*
+* $Id: main.jape 9233 2007-11-23 13:01:52Z dgmaynard $
+*/
+
+MultiPhase: TestTheGrammars
+Phases:
+first
+firstname
+name-twitter
+name_post
+date_pre
+date
+reldate
+number
+number_clean
+address
+url_pre
+url
+email
+identifier
+jobtitle
+final
+unknown
+name_context
+org_context
+loc_context
+//clean
Modified: gate/trunk/plugins/Lang_French/grammar/main.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/main.jape 2016-10-06 12:34:37 UTC
(rev 19645)
+++ gate/trunk/plugins/Lang_French/grammar/main.jape 2016-10-06 12:35:17 UTC
(rev 19646)
@@ -14,8 +14,10 @@
*/
MultiPhase: TestTheGrammars
-Phases:
+Phases:
+numberletter
first
+hyphens
firstname
name
name_post
@@ -23,6 +25,7 @@
date
reldate
number
+number_clean
address
url_pre
url
@@ -34,4 +37,4 @@
name_context
org_context
loc_context
-clean
\ No newline at end of file
+clean
Added: gate/trunk/plugins/Lang_French/grammar/name-twitter.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/name-twitter.jape
(rev 0)
+++ gate/trunk/plugins/Lang_French/grammar/name-twitter.jape 2016-10-06
12:35:17 UTC (rev 19646)
@@ -0,0 +1,1726 @@
+/*
+* name.jape
+*
+* Copyright (c) 1998-2004, The University of Sheffield.
+*
+* This file is part of GATE (see http://gate.ac.uk/), and is free
+* software, licenced under the GNU Library General Public License,
+* Version 2, June 1991 (in the distribution as file licence.html,
+* and also available at http://gate.ac.uk/gate/licence.html).
+*
+* Diana Maynard, 10 Sep 2001
+*
+* $Id: name.jape 18116 2014-06-23 11:35:16Z dgmaynard $
+*/
+
+
+Phase: Name
+Input: Token Lookup Title FirstPerson Upper ClosedClass Initials Split UserID
URL
+Options: control = appelt debug = false
+
+///////////////////////////////////////////////////////////////
+
+// Person Rules
+
+/////////////////////////////////////////////////////////////////
+Macro: TITLE
+(
+ {Title}
+ ({Token.string == "."})?
+)
+
+
+
+
+Macro: FIRSTNAME
+
+ ({FirstPerson.gender == male, FirstPerson.kind != ambig,
FirstPerson.twittername == no} |
+ {FirstPerson.gender == female, FirstPerson.kind != ambig,
FirstPerson.twittername == no})
+
+
+
+Macro: FIRSTNAMEAMBIG
+(
+ {FirstPerson.kind == ambig, FirstPerson.twittername == no}
+)
+
+Macro: FIRSTNAMETWITTER
+
+(
+ {FirstPerson.twittername == yes}
+)
+
+
+Macro: PERSONENDING
+(
+ ({Token.string == ","})?
+ {Lookup.majorType == person_ending}
+)
+
+Macro: PREFIX
+(
+ ({Lookup.majorType == surname, Lookup.minorType == prefix}
+ )|
+ (({Token.string == "O"}|{Token.string == "D"})
+ {Token.string == "'"}
+ )
+)
+
+
+
+
+///////////////////////////////////////////////////////////
+
+
+// Person Rules
+
+Rule: Pronoun
+Priority: 1000
+
+(
+ {Token.category == PP}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+):pro
+-->
+{}
+
+
+Rule:Reject
+Priority: 1000
+// stops certain things being recognised as People
+(
+ ({ClosedClass}|{URL})[1,5]
+)
+-->
+{}
+
+Rule: GazPerson
+Priority: 50
+(
+ {Lookup.majorType == person_full}
+)
+:person -->
+{
+gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+
+// find the Token annotations
+AnnotationSet tokenSet = gate.Utils.getContainedAnnotations(inputAS,
personSet, "Token");
+// put them in order
+List<Annotation> tokenList = gate.Utils.inDocumentOrder(tokenSet);
+
+if (tokenList.size() == 1) {
+ // if there's only one Token, guess it's a surname
+
+ String surnameContent = gate.Utils.stringFor(doc, tokenList.get(0));
+ features.put("surname", surnameContent);
+ }
+
+else if (tokenList.size() > 0) {
+ // the string under the first Token
+ String firstNameContent = gate.Utils.stringFor(doc, tokenList.get(0));
+ features.put("firstName", firstNameContent);
+
+
+ // the string under the remaining Tokens if any
+ if (tokenList.size() > 1) {
+ Long lastNameStart = gate.Utils.start(tokenList.get(1));
+ Long lastNameEnd = gate.Utils.end(tokenList.get(tokenList.size() - 1));
+ String surnameContent = gate.Utils.stringFor(doc, lastNameStart,
lastNameEnd);
+ features.put("surname", surnameContent);
+ }
+}
+
+features.put("kind", "fullName");
+features.put("rule", "GazPerson");
+features.put("gender", personAnn.getFeatures().get("gender"));
+
+// this method doesn't require try-catch
+gate.Utils.addAnn(outputAS, personSet, "TempPerson", features);
+}
+
+Rule: GazPersonFirstTwitter
+Priority: 300
+(
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
+(
+ {FirstPerson.twittername == yes, FirstPerson.kind != ambig}
+):person
+(
+ {Token.orth == upperInitial, Token.length == "1"}
+)?
+-->
+{
+gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+features.put("gender", personAnn.getFeatures().get("gender"));
+features.put("kind", "firstName");
+features.put("rule", "GazPersonFirstTwitter");
+
+// get the string of the first name
+String contentFirstName = gate.Utils.stringFor(doc, personAnn);
+features.put("firstName", contentFirstName);
+features.put("twittername", "yes");
+
+outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
+features);
+}
+
+
+
+
+Rule: GazPersonFirst
+Priority: 200
+(
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
+(
+ {FirstPerson.kind != ambig}
+):person
+(
+ {Token.orth == upperInitial, Token.length == "1"}
+)?
+-->
+{
+gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+features.put("gender", personAnn.getFeatures().get("gender"));
+features.put("kind", "firstName");
+features.put("rule", "GazPersonFirst");
+
+// get the string of the first name
+String contentFirstName = gate.Utils.stringFor(doc, personAnn);
+features.put("firstName", contentFirstName);
+
+outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
+features);
+}
+
+
+
+Rule: PersonFirstContext
+Priority: 30
+// Anne and Kenton
+
+(FIRSTNAME):person1
+(
+ {Token.string == "and"}
+)
+({Token.orth == upperInitial, Token.length != "1"})
+:person2
+ -->
+{
+//first deal with person1
+ gate.FeatureMap features1 = Factory.newFeatureMap();
+ gate.AnnotationSet person1Set = (gate.AnnotationSet)bindings.get("person1");
+ gate.Annotation personAnn = (gate.Annotation)person1Set.iterator().next();
+
+ String contentFirstName = gate.Utils.stringFor(doc, personAnn);
+ features1.put("firstName", contentFirstName);
+ features1.put("gender", personAnn.getFeatures().get("gender"));
+ features1.put("kind", "firstName");
+ features1.put("rule", "PersonFirstContext");
+outputAS.add(person1Set.firstNode(), person1Set.lastNode(), "TempPerson",
+features1);
+
+//now deal with person2
+gate.FeatureMap features2 = Factory.newFeatureMap();
+gate.AnnotationSet person2Set = (gate.AnnotationSet)bindings.get("person2");
+gate.Annotation person2Ann = (gate.Annotation)person2Set.iterator().next();
+
+ String content2FirstName = gate.Utils.stringFor(doc, person2Ann);
+ features2.put("firstName", content2FirstName);
+ features2.put("kind", "firstName");
+ features2.put("rule", "PersonFirstContext");
+outputAS.add(person2Set.firstNode(), person2Set.lastNode(), "TempPerson",
+features2);
+}
+
+
+Rule: PersonTitle
+Priority: 35
+// Mr. Jones
+// Mr Fred Jones
+// note we only allow one first and surname,
+// but we add more in a final phase if we find adjacent unknowns
+
+(
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
+(
+ ({Title.rule == "TitleGender"}):title
+ ({Title})?
+ (
+ (FIRSTNAME | FIRSTNAMEAMBIG )?
+ ):firstName
+ (
+ (PREFIX)*
+ ({Upper})
+ (PERSONENDING)?
+ ):surname
+):person
+-->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+
+ gate.AnnotationSet firstNameSet =
(gate.AnnotationSet)bindings.get("firstName");
+
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+ features.put("title", contentTitle);
+ features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ if (firstNameSet != null && firstNameSet.size()>0)
+ {
+ gate.Annotation firstNameAnn =
(gate.Annotation)firstNameSet.iterator().next();
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+ }
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+ features.put("kind", "personName");
+ features.put("rule", "PersonTitle");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+}
+
+
+
+
+Rule: PersonTitleInitials
+Priority: 35
+
+// Mr J. Jones
+
+
+(
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
+(
+ ({Title.rule == "TitleGender"}):title
+ ({Title})?
+ (
+ ({Initials})?
+ ):initials
+ (
+ (PREFIX)*
+ ({Upper, !Initials})
+ (PERSONENDING)?
+ ):surname
+):person
+-->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+
+ gate.AnnotationSet initialsSet = (gate.AnnotationSet)bindings.get("initials");
+
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+ features.put("title", contentTitle);
+ features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ if (initialsSet != null && initialsSet.size()>0)
+ {
+ List<Annotation> initialsList = gate.Utils.inDocumentOrder(initialsSet);
+
+ Long initialsStart = gate.Utils.start(initialsList.get(0));
+ Long initialsEnd = gate.Utils.end(initialsList.get(initialsList.size() -
1));
+ String initialsContent = gate.Utils.cleanStringFor(doc, initialsStart,
initialsEnd);
+ features.put("initials", initialsContent);
+ }
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+ features.put("kind", "personName");
+ features.put("rule", "PersonTitleInitials");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+}
+
+
+Rule: TitleFirstName
+Priority: 55
+// use this rule when we know what gender the title indicates
+// Mr Fred
+
+(
+ ({Title.gender == male} | {Title.gender == female}):title
+ (FIRSTNAME | FIRSTNAMEAMBIG ):firstname
+
+)
+:person -->
+
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+
+ gate.AnnotationSet firstNameSet =
(gate.AnnotationSet)bindings.get("firstName");
+
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
+
+ String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+ features.put("title", contentTitle);
+ features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ if (firstNameSet != null && firstNameSet.size()>0)
+ {
+ gate.Annotation firstNameAnn =
(gate.Annotation)firstNameSet.iterator().next();
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+ }
+
+ features.put("kind", "personName");
+
+ features.put("rule", "TitleFirstName");
+ outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+}
+
+
+
+Rule: PersonJobTitle
+Priority: 20
+// note we include titles but not jobtitles in markup
+
+(
+ {Lookup.majorType == jobtitle}
+):jobtitle
+(
+ (TITLE)?
+ ((FIRSTNAME | FIRSTNAMEAMBIG )
+ )
+ (PREFIX)*
+ ({Upper,!Initials})
+ (PERSONENDING)?
+)
+:person
+-->
+ :person.TempPerson = {kind = "fullName", rule = "PersonJobTitle"},
+ :jobtitle.JobTitle = {rule = "PersonJobTitle"}
+
+
+
+
+Rule: NotFirstPersonStop
+Priority: 70
+// ambig first name and surname is stop word
+// e.g. Will And
+
+(
+ ((FIRSTNAMEAMBIG)+ |
+ {Token.category == PRP}|
+ {Token.category == DT}
+ )
+ ({Lookup.majorType == stop}
+ )
+)
+:person -->
+ {}
+
+
+Rule: FirstPersonStop
+Priority: 50
+// John And
+
+(FIRSTNAME):person
+(
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}|
+ {Token.category == IN}
+)
+-->
+{
+gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+features.put("gender", personAnn.getFeatures().get("gender"));
+features.put("kind", "firstName");
+features.put("rule", "FirstPersonStop");
+outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
+features);
+}
+
+
+
+
+
+Rule: NotPersonFull
+Priority: 50
+// do not allow Det + Surname
+(
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)
+(
+ (PREFIX)*
+ ({Upper})
+ (PERSONENDING)?
+):foo
+-->
+{}
+
+
+
+Rule: LocPersonAmbig1
+Priority: 50
+// Location + Possible Surname --> Location only (ignore Surname)
+
+(
+ {Lookup.majorType == location}
+):loc
+(
+ (PREFIX)*
+ ({Upper,!Initials})
+ (PERSONENDING)
+):foo
+-->
+:loc.TempLocation = {kind = "locName", rule = LocPersonAmbig1}
+
+
+Rule: LocPersonAmbig2
+Priority: 50
+// Location + + Possible Surname --> Location only (ignore Surname)
+
+(
+ {Lookup.majorType == location}
+):loc
+(
+ (PREFIX)
+ ({Upper,!Initials})
+ (PERSONENDING)?
+):foo
+-->
+:loc.TempLocation = {kind = "locName", rule = LocPersonAmbig2}
+
+
+Rule: LocPersonAmbig3
+Priority: 100
+// Ambiguous Location/Person + Possible Surname --> Person
+
+(
+ {Lookup.majorType == location, Lookup.ambig == yes, FirstPerson}
+ (PREFIX)
+ ({Upper,!Initials})
+ (PERSONENDING)?
+):person
+-->
+{
+gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.FeatureMap features = Factory.newFeatureMap();
+features.put("gender", personAnn.getFeatures().get("gender"));
+features.put("kind", "firstName");
+features.put("rule", "LocPersonAmbig3");
+outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
+features);
+}
+
+
+Rule: PersonFullInitialsCaps
+Priority: 100
+// TO FISH
+// If the initials is of type nopunct, we want to discard the Person if the
surname is also in all caps, as it's too ambiguous
+
+(
+ {Token.category == DT}
+)?
+(
+
+ ({Initials.kind == nopunct})
+ ((FIRSTNAME | FIRSTNAMEAMBIG )?)
+ ((PREFIX)*
+ ({Upper.kind == allCaps})
+ (PERSONENDING)?
+ )
+):person -->
+:person.Discard = {rule = "PersonFullInitialsCaps"}
+
+
+Rule: PersonFull
+Priority: 10
+// F.W. Jones
+// Fred Jones
+
+(
+
+ (FIRSTNAME | FIRSTNAMEAMBIG ):firstName
+ ((FIRSTNAME | FIRSTNAMEAMBIG )?):middleName
+ ((PREFIX)*
+ ({Upper,!Initials})
+ (PERSONENDING)?
+ ):surname
+):person -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
+
+ gate.AnnotationSet firstNameSet =
(gate.AnnotationSet)bindings.get("firstName");
+ gate.Annotation firstNameAnn =
(gate.Annotation)firstNameSet.iterator().next();
+
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+ features.put("gender", firstNameAnn.getFeatures().get("gender"));
+
+ gate.AnnotationSet middleNameSet =
(gate.AnnotationSet)bindings.get("middleName");
+
+ if (middleNameSet != null && middleNameSet.size()>0)
+{
+ gate.Annotation middleNameAnn =
(gate.Annotation)middleNameSet.iterator().next();
+ String middleNameContent = gate.Utils.stringFor(doc, middleNameAnn);
+ features.put("middleName", middleNameContent);
+}
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+ features.put("kind", "fullName");
+ features.put("rule", "PersonFull");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+
+}
+
+
+
+Rule: PersonFullInitials
+Priority: 10
+// F.W. Jones
+
+(
+ {Token.category == DT}
+)?
+(
+
+ ({Initials, !Lookup}):initials
+ ((FIRSTNAME | FIRSTNAMEAMBIG )?):middleName
+ ((PREFIX)*
+ ({Upper,!Initials})
+ (PERSONENDING)?
+ ):surname
+):person -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
+
+ gate.AnnotationSet initialsSet =
(gate.AnnotationSet)bindings.get("initials");
+ List<Annotation> initialsList = gate.Utils.inDocumentOrder(initialsSet);
+
+ Long initialsStart = gate.Utils.start(initialsList.get(0));
+ Long initialsEnd = gate.Utils.end(initialsList.get(initialsList.size() -
1));
+ String initialsContent = gate.Utils.cleanStringFor(doc, initialsStart,
initialsEnd);
+ features.put("initials", initialsContent);
+
+
+ gate.AnnotationSet middleNameSet =
(gate.AnnotationSet)bindings.get("middleName");
+
+ if (middleNameSet != null && middleNameSet.size()>0)
+{
+ gate.Annotation middleNameAnn =
(gate.Annotation)middleNameSet.iterator().next();
+ String middleNameContent = gate.Utils.cleanStringFor(doc, middleNameAnn);
+ features.put("middleName", middleNameContent);
+ features.put("gender", middleNameAnn.getFeatures().get("gender"));
+}
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ String surnameContent = gate.Utils.cleanStringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+ features.put("kind", "fullName");
+ features.put("rule", "PersonFullInitials");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+
+}
+
+
+
+
+
+Rule: PersonFullDoubleBarrelled
+Priority: 9
+// F.W. Smith Jones
+// Fred Smith Jones
+
+
+(
+
+ (FIRSTNAME | FIRSTNAMEAMBIG ):firstName
+ ((PREFIX)*
+ ({Upper,!Initials})
+ ({Upper,!Initials})
+ (PERSONENDING)?
+ ):surname
+):person -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
+
+ gate.AnnotationSet firstNameSet =
(gate.AnnotationSet)bindings.get("firstName");
+ gate.Annotation firstNameAnn =
(gate.Annotation)firstNameSet.iterator().next();
+
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+ features.put("gender", firstNameAnn.getFeatures().get("gender"));
+
+ gate.AnnotationSet middleNameSet =
(gate.AnnotationSet)bindings.get("middleName");
+
+ if (middleNameSet != null && middleNameSet.size()>0)
+{
+ gate.Annotation middleNameAnn =
(gate.Annotation)middleNameSet.iterator().next();
+ String middleNameContent = gate.Utils.stringFor(doc, middleNameAnn);
+ features.put("middleName", middleNameContent);
+}
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+ features.put("kind", "fullName");
+ features.put("rule", "PersonFullDoubleBarrelled");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+
+}
+
+
+
+Rule: PersonMiddleInitial
+Priority: 10
+// Fred C. Jones
+
+
+(
+
+ (FIRSTNAME | FIRSTNAMEAMBIG ):firstName
+ ({Initials}):initials
+ ((PREFIX)*
+ ({Upper,!Initials})
+ (PERSONENDING)?
+ ):surname
+):person -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
+
+ gate.AnnotationSet firstNameSet =
(gate.AnnotationSet)bindings.get("firstName");
+ gate.Annotation firstNameAnn =
(gate.Annotation)firstNameSet.iterator().next();
+
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+ features.put("gender", firstNameAnn.getFeatures().get("gender"));
+
+ gate.AnnotationSet initialsSet = (gate.AnnotationSet)bindings.get("initials");
+
+ if (initialsSet != null && initialsSet.size()>0)
+{
+ gate.Annotation initialsAnn = (gate.Annotation)initialsSet.iterator().next();
+ String initialsContent = gate.Utils.stringFor(doc, initialsAnn);
+ features.put("initials", initialsContent);
+}
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+ features.put("kind", "fullName");
+ features.put("rule", "PersonMiddleInitial");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+
+}
+
+
+
+Rule: PersonFullStop
+Priority: 50
+// G.Wilson Fri
+
+(
+ ((FIRSTNAME | FIRSTNAMEAMBIG) )
+ (PREFIX)*
+ ({Upper})
+):person
+(
+ {Lookup.majorType == date}
+)
+-->
+ :person.TempPerson = {kind = "fullName", rule = "PersonFullStop"}
+
+
+Rule: NotPersonFullReverse
+Priority: 20
+// XYZ, I
+(
+ ({Upper})
+ {Token.string == ","}
+ {Token.category == PRP}
+ (PERSONENDING)?
+)
+:unknown
+-->
+{}
+
+
+Rule: PersonSaint
+Priority: 50
+// Note: ensure that it's not a Saints Day first
+(
+ ({Token.string == "St"} ({Token.string == "."})? |
+ {Token.string == "Saint"})
+ (FIRSTNAME)
+ )
+:person -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+gate.AnnotationSet firstPerson =
(gate.AnnotationSet)personSet.get("FirstPerson");
+if (firstPerson != null && firstPerson.size()>0)
+{
+ gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
+ features.put("gender", personAnn.getFeatures().get("gender"));
+}
+ features.put("kind", "firstName");
+ features.put("rule", "PersonSaint");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+}
+
+
+Rule: PersonLocAmbig
+Priority: 40
+// Ken London
+// Susan Hampshire
+
+// Christian name + Location --> Person's Name
+(
+ (FIRSTNAME):firstName
+ ({Lookup.majorType == location}):surname
+):person -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
+
+ gate.AnnotationSet firstNameSet =
(gate.AnnotationSet)bindings.get("firstName");
+ gate.Annotation firstNameAnn =
(gate.Annotation)firstNameSet.iterator().next();
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ features.put("gender", firstNameAnn.getFeatures().get("minorType"));
+
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+ features.put("kind", "fullName");
+ features.put("rule", "PersonLocAmbig");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+}
+
+
+Rule: TitlePersonLocAmbig
+Priority: 50
+// Professor London
+// title + Location --> Person's Name
+
+(
+ ({Title}):title
+ ({Lookup.majorType == location}):surname
+):person -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+ gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
+
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ String titleContent = gate.Utils.stringFor(doc, titleAnn);
+ features.put("title", titleContent);
+
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+ features.put("kind", "fullName");
+ features.put("rule", "TitlePersonLocAmbig");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+}
+
+
+
+Rule:PersonOrgAmbig
+Priority: 50
+// if the last name is an organisation ending, treat as an organisation not
person
+// e.g. A.B. Consulting
+
+(
+ {Token.category == DT}
+)?
+(
+ ((FIRSTNAME | FIRSTNAMEAMBIG) )+
+ ({Lookup.majorType == org_key}|
+ {Lookup.majorType == org_base}
+ )
+)
+:orgName -->
+ :orgName.TempOrganization = {kind = "unknown", rule = "PersonOrgAmbig"}
+
+
+
+///////////////////////////////////////////////////////////////////
+// Organisation Rules
+
+Macro: CDG
+// cdg is something like "Ltd."
+ (
+ ({Lookup.majorType == cdg})|
+ ({Token.string == ","}
+ {Lookup.majorType == cdg})
+ )
+
+
+Macro: SAINT
+(
+ ({Token.string == "St"} ({Token.string == "."})? |
+ {Token.string == "Saint"})
+)
+
+Macro: CHURCH
+(
+{Token.string == "Church"}|{Token.string == "church"}|
+{Token.string == "Cathedral"}|{Token.string == "cathedral"}|
+{Token.string == "Chapel"}|{Token.string == "chapel"}
+)
+
+/////////////////////////////////////////////////////////////
+Rule: TheGazOrganization
+Priority: 245
+(
+ {Token.category == DT}|
+ {Token.category == RB}
+)
+(
+{Lookup.majorType == organization}
+)
+:orgName -->
+ {
+ gate.FeatureMap features = Factory.newFeatureMap();
+// create an annotation set consisting of all the annotations for org
+gate.AnnotationSet orgSet = (gate.AnnotationSet)bindings.get("orgName");
+// create an annotation set consisting of the annotation matching Lookup
+gate.AnnotationSet org = (gate.AnnotationSet)orgSet.get("Lookup");
+// if the annotation type Lookup doesn't exist, do nothing
+if (org != null && org.size()>0)
+{
+// if it does exist, take the first element in the set
+ gate.Annotation orgAnn = (gate.Annotation)org.iterator().next();
+//propagate minorType feature (and value) from org
+ features.put("orgType", orgAnn.getFeatures().get("minorType"));
+}
+// create some new features
+ features.put("rule", "GazOrganization");
+// create a TempOrg annotation and add the features we've created
+outputAS.add(orgSet.firstNode(), orgSet.lastNode(), "TempOrganization",
+features);
+}
+
+
+Rule: GazOrganization
+Priority: 145
+(
+{Lookup.majorType == organization}
+)
+:orgName -->
+ {
+ gate.FeatureMap features = Factory.newFeatureMap();
+// create an annotation set consisting of all the annotations for org
+gate.AnnotationSet orgSet = (gate.AnnotationSet)bindings.get("orgName");
+// create an annotation set consisting of the annotation matching Lookup
+gate.AnnotationSet org = (gate.AnnotationSet)orgSet.get("Lookup");
+// if the annotation type Lookup doesn't exist, do nothing
+if (org != null && org.size()>0)
+{
+// if it does exist, take the first element in the set
+ gate.Annotation orgAnn = (gate.Annotation)org.iterator().next();
+//propagate minorType feature (and value) from org
+ features.put("orgType", orgAnn.getFeatures().get("minorType"));
+}
+// create some new features
+ features.put("rule", "GazOrganization");
+// create a TempOrg annotation and add the features we've created
+outputAS.add(orgSet.firstNode(), orgSet.lastNode(), "TempOrganization",
+features);
+}
+
+Rule: LocOrganization
+Priority: 50
+// Ealing Police
+(
+ ({Lookup.majorType == location} |
+ {Lookup.majorType == country_adj})
+{Lookup.majorType == organization}
+({Lookup.majorType == organization})?
+)
+:orgName -->
+ :orgName.TempOrganization = {kind = "orgName", rule=LocOrganization}
+
+
+Rule: NewspaperEnding
+Priority: 200
+// GSA Today
+
+(
+ ({Upper}|{Initials})
+ {Lookup.majorType == newspaper_ending}
+):orgName
+-->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+gate.AnnotationSet orgSet = (gate.AnnotationSet)bindings.get("orgName");
+gate.AnnotationSet org = (gate.AnnotationSet)orgSet.get("Lookup");
+
+if (org != null && org.size()>0)
+{
+ gate.Annotation orgAnn = (gate.Annotation)org.iterator().next();
+ features.put("orgType", orgAnn.getFeatures().get("minorType"));
+}
+// create some new features
+ features.put("rule", "NewspaperEndng");
+// create a TempOrg annotation and add the features we've created
+outputAS.add(orgSet.firstNode(), orgSet.lastNode(), "TempOrganization",
+features);
+}
+
+
+Rule: INOrgXandY
+Priority: 200
+
+// Bradford & Bingley
+// Bradford & Bingley Ltd
+(
+ {Token.category == IN}
+)
+
+(
+ ({Token.category == NNP}
+ )+
+
+ {Token.string == "&"}
+
+ (
+ {Token.orth == upperInitial}
+ )+
+
+ (CDG)?
+
+)
+:orgName -->
+ :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXandY"}
+
+Rule: OrgXandY
+Priority: 20
+
+// Bradford & Bingley
+// Bradford & Bingley Ltd
+
+
+(
+ ({Token.category == NNP}
+ )+
+
+ {Token.string == "&"}
+
+ (
+ {Token.orth == upperInitial}
+ )+
+
+ (CDG)?
+
+)
+:orgName -->
+ :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXandY"}
+
+
+Rule:OrgUni
+Priority: 25
+// University of Sheffield
+// Sheffield University
+// A Sheffield University
+(
+ {Token.string == "University"}
+ {Token.string == "of"}
+ (
+ {Token.category == NNP})+
+)
+:orgName -->
+ :orgName.TempOrganization = {orgType = "other", rule = "OrgDept"}
+
+
+
+Rule: OrgDept
+Priority: 25
+// Department of Pure Mathematics and Physics
+
+(
+ {Token.string == "Department"}
+
+ {Token.string == "of"}
+ (
+ {Token.orth == upperInitial})+
+ (
+ {Token.string == "and"}
+ (
+ {Token.orth == upperInitial})+
+ )?
+)
+:orgName -->
+ :orgName.TempOrganization = {orgType = "department", rule = "OrgDept"}
+
+Rule: TheOrgXKey
+Priority: 500
+
+// The Aaaa Ltd.
+(
+ {Token.category == DT}
+)
+(
+ ({Upper})
+ ({Upper})?
+ ({Upper})?
+ ({Upper})?
+ ({Upper})?
+ {Lookup.majorType == org_key}
+ ({Lookup.majorType == org_ending})?
+)
+:org
+-->
+:org.TempOrganization = {orgType = "unknown", rule = "TheOrgXKey"}
+
+Rule: NotOrgXKey
+Priority: 150
+// if all the names are org_base or org_key, it's not an organisation
+// e.g. Business Consulting
+
+(
+ ({Lookup.majorType == org_key}|
+ {Lookup.majorType == org_base}
+ )+
+ ({Lookup.majorType == org_ending})?
+)
+:org
+-->
+{}
+
+
+
+Rule: NotTheKey
+Priority: 200
+
+(
+ {Token.category == DT}
+ {Lookup.majorType == org_key}
+ ({Lookup.majorType == org_ending})?
+)
+:org
+-->
+{}
+
+
+Rule: OrgXKey
+Priority: 125
+
+// Aaaa Ltd.
+({Token.category == DT})?
+(
+ ({Upper})
+ ({Upper})?
+ ({Upper})?
+ ({Upper})?
+ ({Upper})?
+ {Lookup.majorType == org_key}
+ ({Lookup.majorType == org_ending})?
+)
+:org
+-->
+:org.TempOrganization = {orgType = "unknown", rule = "OrgXKey"}
+
+
+Rule: NotOrgXEnding
+Priority: 500
+// Very Limited
+
+(
+ {Token.category == DT}
+)?
+(
+ {Token.category == RB}
+ {Lookup.majorType == cdg}
+)
+:label
+-->
+{}
+
+ Rule: NotOrgXEnding2
+Priority: 500
+
+// The Coca Cola Co.
+
+(
+ {Token.category == DT}
+)
+(
+ ({Upper})
+ ({Upper})?
+ {Lookup.majorType == cdg}
+)
+:orgName -->
+ :orgName.TempOrganization = {orgType = "company", rule = "OrgXEnding"}
+
+
+
+Rule: OrgXEnding
+Priority: 120
+
+// Coca Cola Co.
+
+(
+ ({Upper})
+ ({Upper})?
+ {Lookup.majorType == cdg}
+)
+:orgName -->
+ :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXEnding"}
+
+Rule: TheOrgXandYKey
+Priority: 220
+
+(
+ {Token.category == DT}
+)
+(
+ ({Upper})
+ ({Upper})?
+ (({Token.string == "and"} |
+ {Token.string == "&"})
+ ({Upper})?
+ ({Upper})?
+ ({Upper})?
+ )
+ {Lookup.majorType == org_key}
+ ({Lookup.majorType == org_ending})?
+)
+:orgName -->
+ :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXandYKey"}
+
+
+
+Rule: OrgXandYKey
+Priority: 120
+
+// Aaaa Ltd.
+// Xxx Services Ltd.
+// AA and BB Services Ltd.
+// but NOT A XXX Services Ltd.
+
+(
+ ({Upper})
+ ({Upper})?
+ (({Token.string == "and"} |
+ {Token.string == "&"})
+ ({Upper})?
+ ({Upper})?
+ ({Upper})?
+ )
+ {Lookup.majorType == org_key}
+ ({Lookup.majorType == org_ending})?
+)
+:orgName -->
+ :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXandYKey"}
+
+
+Rule: OrgXsKeyBase
+Priority: 120
+
+// Gandy's Circus
+// Queen's Ware
+
+(
+ ({Upper})?
+ ({Upper})?
+ ({Token.orth == upperInitial}
+ {Token.string == "'"}
+ ({Token.string == "s"})?
+ )
+ ({Lookup.majorType == org_key}|
+ {Lookup.majorType == org_base})
+)
+:orgName -->
+ :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXsKeybase"}
+
+
+
+Rule: NotOrgXBase
+Priority: 1000
+// not things like British National
+// or The University
+
+
+(
+ ({Token.category == DT}
+ )?
+)
+(
+ ({Lookup.majorType == country_adj}|
+ {Token.orth == lowercase})
+ ({Lookup.majorType == org_base}|
+ {Lookup.majorType == govern_key})
+)
+:orgName -->
+ :orgName.Temp = {kind = "notorgName", rule = "NotOrgXBase"}
+
+
+Rule: TheOrgXBase
+Priority: 230
+
+(
+ ({Token.category == DT}
+ )
+)
+(
+ (
+ ({Upper})|
+ {Lookup.majorType == organization}
+ )
+ ({Upper})?
+ ({Upper})?
+ ({Lookup.majorType == org_base}|
+ {Lookup.majorType == govern_key}
+ )
+ (
+ {Token.string == "of"}
+ ({Upper})
+ ({Upper})?
+ ({Upper})?
+ )?
+)
+:orgName -->
+ :orgName.TempOrganization = {orgType = "unknown", rule = "TheOrgXBase"}
+
+
+Rule: OrgXBase
+Priority: 130
+
+// same as OrgXKey but uses base instead of key
+// includes govern_key e.g. academy
+// Barclays Bank
+// Royal Academy of Art
+
+(
+ (
+ ({Upper})|
+ {Lookup.majorType == organization}
+ )
+ ({Upper})?
+ ({Upper})?
+ ({Lookup.majorType == org_base}|
+ {Lookup.majorType == govern_key}
+ )
+ (
+ {Token.string == "of"}
+ ({Upper})
+ ({Upper})?
+ ({Upper})?
+ )?
+)
+:orgName -->
+ :orgName.TempOrganization = {orgType = "unknown", rule = "OrgXBase"}
+
+Rule: TheBaseofOrg
+Priority: 230
+
+(
+ {Token.category == DT}
+)
+(
+ ({Lookup.majorType == org_base}|
+ {Lookup.majorType == govern_key}
+ )
+
+ {Token.string == "of"}
+ (
+ {Token.category == DT}
+ )?
+ ({Upper})
+ ({Upper})?
+)
+:orgName -->
+ :orgName.TempOrganization = {orgType = "unknown", rule = "BaseofOrg"}
+
+
+
+
+Rule: BaseofOrg
+Priority: 130
+
+(
+ ({Lookup.majorType == org_base}|
+ {Lookup.majorType == govern_key}
+ )
+
+ {Token.string == "of"}
+ (
+ {Token.category == DT}
+ )?
+ ({Upper})
+ ({Upper})?
+)
+:orgName -->
+ :orgName.TempOrganization = {orgType = "unknown", rule = "BaseofOrg"}
+
+
+
+Rule: OrgPreX
+Priority: 130
+
+// Royal Tuscan
+
+(
+ {Lookup.majorType == org_pre}
+ (
+ {Token.orth == upperInitial})+
+ ({Lookup.majorType == org_ending})?
+)
+:orgName -->
+ :orgName.TempOrganization = {orgType = "unknown", rule = "OrgPreX"}
+
+
+
+Rule: OrgChurch
+Priority: 150
+// St. Andrew's Church
+
+(
+ (SAINT)
+ {Token.orth == upperInitial}
+ {Token.string == "'"}({Token.string == "s"})?
+ (CHURCH)
+)
+:orgName -->
+ :orgName.TempOrganization = {orgType = "other", rule = "OrgChurch"}
+
+
+Rule:OrgPersonAmbig
+Priority: 130
+// Alexandra Pottery should be org not person
+// overrides PersonFull
+
+(
+ (TITLE)?
+ (FIRSTNAME)
+ {Token.string == "'"}({Token.string == "s"})?
+ ({Lookup.majorType == org_key}|
+ {Lookup.majorType == org_base})
+ ({Lookup.majorType == org_ending})?
+)
+:org
+-->
+ :org.TempOrganization= {orgType = "unknown", rule = "OrgPersonAmbig"}
+
+
+
+/////////////////////////////////////////////////////////////////
+// Location rules
+
+
+Rule: Location1
+Priority: 200
+// Lookup = city, country, province, region, water
+
+// Western Europe
+// South China sea
+
+(
+ {Token.category == DT}
+)?
+(
+ ({Lookup.majorType == loc_key, Lookup.minorType == pre}
+ )?
+ {Lookup.majorType == location}
+ (
+ {Lookup.majorType == loc_key, Lookup.minorType == post})?
+)
+:locName -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+// create an annotation set consisting of all the annotations for org
+gate.AnnotationSet locSet = (gate.AnnotationSet)bindings.get("locName");
+// create an annotation set consisting of the annotation matching Lookup
+gate.AnnotationSet loc = (gate.AnnotationSet)locSet.get("Lookup");
+// if the annotation type Lookup doesn't exist, do nothing
+if (loc != null && loc.size()>0)
+{
+// if it does exist, take the first element in the set
+ gate.Annotation locAnn = (gate.Annotation)loc.iterator().next();
+//propagate minorType feature (and value) from loc
+ features.put("locType", locAnn.getFeatures().get("minorType"));
+}
+// create some new features
+ features.put("rule", "Location1");
+// create a TempLoc annotation and add the features we've created
+outputAS.add(locSet.firstNode(), locSet.lastNode(), "TempLocation",
+features);
+}
+
+Rule: GazLocation
+Priority: 200
+(
+ {Token.category == DT}
+)?
+(
+ {Lookup.majorType == location}
+)
+:locName
+ -->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+// create an annotation set consisting of all the annotations for org
+gate.AnnotationSet locSet = (gate.AnnotationSet)bindings.get("locName");
+// create an annotation set consisting of the annotation matching Lookup
+gate.AnnotationSet loc = (gate.AnnotationSet)locSet.get("Lookup");
+// if the annotation type Lookup doesn't exist, do nothing
+if (loc != null && loc.size()>0)
+{
+// if it does exist, take the first element in the set
+ gate.Annotation locAnn = (gate.Annotation)loc.iterator().next();
+//propagate minorType feature (and value) from loc
+ features.put("locType", locAnn.getFeatures().get("minorType"));
+}
+// create some new features
+ features.put("rule", "GazLocation");
+// create a TempLoc annotation and add the features we've created
+outputAS.add(locSet.firstNode(), locSet.lastNode(), "TempLocation",
+features);
+}
+
+Rule: GazLocationLocation
+Priority: 100
+
+(
+ ({Lookup.majorType == location}):locName1
+ {Token.string == ","}
+ ({Lookup.majorType == location}):locName2
+)
+-->
+
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.FeatureMap morefeatures = Factory.newFeatureMap();
+gate.AnnotationSet loc1Set = (gate.AnnotationSet)bindings.get("locName1");
+gate.AnnotationSet loc1 = (gate.AnnotationSet)loc1Set.get("Lookup");
+
+gate.AnnotationSet loc2Set = (gate.AnnotationSet)bindings.get("locName2");
+gate.AnnotationSet loc2 = (gate.AnnotationSet)loc2Set.get("Lookup");
+
+// if the annotation type Lookup doesn't exist, do nothing
+if (loc1 != null && loc1.size()>0)
+{
+ gate.Annotation loc1Ann = (gate.Annotation)loc1.iterator().next();
+ features.put("locType", loc1Ann.getFeatures().get("minorType"));
+}
+
+if (loc2 != null && loc2.size()>0)
+{
+ gate.Annotation loc2Ann = (gate.Annotation)loc2.iterator().next();
+ morefeatures.put("locType", loc2Ann.getFeatures().get("minorType"));
+}
+
+features.put("rule", "GazLocation");
+outputAS.add(loc1Set.firstNode(), loc1Set.lastNode(), "TempLocation",
features);
+
+morefeatures.put("rule", "GazLocation");
+outputAS.add(loc2Set.firstNode(), loc2Set.lastNode(), "TempLocation",
morefeatures);
+}
+
+
+
+
+
+Rule: LocationPost
+Priority: 50
+(
+ {Token.category == DT}
+)?
+(
+ {Token.category == NNP}
+ {Lookup.majorType == loc_key, Lookup.minorType == post}
+)
+:locName
+-->
+ :locName.TempLocation = {kind = "locName", rule = LocationPost}
+
+Rule:LocKey
+(
+ {Token.category == DT}
+)?
+(
+ ({Lookup.majorType == loc_key, Lookup.minorType == pre}
+ )
+ ({Upper})
+ (
+ {Lookup.majorType == loc_key, Lookup.minorType == post})?
+)
+:locName -->
+:locName.TempLocation = {kind = "locName", rule = LocKey}
+/////////////////////////////////////////////////////////////////
+
+// Context-based Rules
+
+
+Rule:InLoc1
+(
+ {Token.string == "in"}
+)
+(
+ {Lookup.majorType == location}
+)
+:locName
+-->
+ :locName.TempLocation = {kind = "locName", rule = InLoc1, locType =
:locName.Lookup.minorType}
+
+Rule:LocGeneralKey
+Priority: 30
+(
+ {Lookup.majorType == loc_general_key}
+ {Token.string == "of"}
+)
+(
+ ({Upper})
+)
+:loc
+-->
+ :loc.TempLocation = {kind = "locName", rule = LocGeneralKey}
+
+
+Rule:OrgContext1
+Priority: 1
+// company X
+
+(
+ {Token.string == "company"}
+)
+(
+ ({Upper})
+ ({Upper})?
+ ({Upper})?
+)
+:org
+-->
+ :org.TempOrganization= {orgType = "company", rule = "OrgContext1"}
+
+Rule: OrgContext2
+Priority: 5
+
+// Telstar laboratory
+// Medici offices
+
+(
+ ({Upper})
+ ({Upper})?
+ ({Upper})?
+)
+: org
+(
+ ({Token.string == "offices"} |
+ {Token.string == "Offices"} |
+ {Token.string == "laboratory"} |
+ {Token.string == "Laboratory"} |
+ {Token.string == "laboratories"} |
+ {Token.string == "Laboratories"})
+)
+-->
+ :org.TempOrganization= {orgType = "other", rule = "OrgContext2"}
+
+
+
+Rule:JoinOrg
+Priority: 50
+// Smith joined Energis
+
+(
+ ({Token.string == "joined"}|
+ {Token.string == "joining"}|
+ {Token.string == "joins"}|
+ {Token.string == "join"}
+ )
+)
+(
+ ({Upper})
+ ({Upper})?
+ ({Upper})?
+)
+:org
+-->
+ :org.TempOrganization= {orgType = "company", rule = "joinOrg"}
+
+
+
+
+
+
+
+
+
+
+
Property changes on: gate/trunk/plugins/Lang_French/grammar/name-twitter.jape
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Modified: gate/trunk/plugins/Lang_French/grammar/name.jape
===================================================================
--- gate/trunk/plugins/Lang_French/grammar/name.jape 2016-10-06 12:34:37 UTC
(rev 19645)
+++ gate/trunk/plugins/Lang_French/grammar/name.jape 2016-10-06 12:35:17 UTC
(rev 19646)
@@ -15,7 +15,7 @@
Phase: Name
-Input: Token Lookup Title FirstPerson TreeTaggerToken
+Input: Token Lookup Title FirstPerson Upper ClosedClass Initials Split UserID
Hashtag
Options: control = appelt debug = false
///////////////////////////////////////////////////////////////
@@ -28,57 +28,28 @@
{Title}
({Token.string == "."})?
)
-Macro: INITIALS
-(
- ({Token.orth == upperInitial, Token.length =="1"}
- ({Token.string == "."})?
- )+
-)
-Macro: INITIALS2
-(
- {Token.orth == allCaps, Token.length == "2"} |
- {Token.orth == allCaps, Token.length == "3"}
-)
Macro: FIRSTNAME
-(
- ({FirstPerson.gender == male} |
- {FirstPerson.gender == female})
- |
- (INITIALS)
-)
+ ({FirstPerson.gender == male, FirstPerson.kind != ambig} |
+ {FirstPerson.gender == female, FirstPerson.kind != ambig})
+
+
Macro: FIRSTNAMEAMBIG
(
- {Lookup.majorType == person_first, Lookup.minorType == ambig}
+ {FirstPerson.kind == ambig}
)
-Macro: UPPERTAG
-(
- ({TreeTaggerToken.category == NAM}
-)
- ({Token.string == "-"}
- {TreeTaggerToken.category == NAM}
- )?
-)
-Macro: UPPER
-(
- ({Token.orth == upperInitial}
-)
- ({Token.string == "-"}
- {Token.orth == upperInitial}
- )?
-)
-
Macro: PERSONENDING
(
+ ({Token.string == ","})?
{Lookup.majorType == person_ending}
)
@@ -101,67 +72,92 @@
Rule: Pronoun
Priority: 1000
-//stops personal pronouns being recognised as Initials
+
(
- {TreeTaggerToken.category == PP}|
- {TreeTaggerToken.category == PRP}|
- {TreeTaggerToken.category == RB}
+ {Token.category == PP}|
+ {Token.category == PRP}|
+ {Token.category == RB}
):pro
-->
{}
-
-Rule: GazPerson
-Priority: 50
+
+Rule:Reject
+Priority: 1000
+// stops certain things being recognised as People
(
- {Lookup.majorType == person_full, Lookup.minorType == normal}
+ {Hashtag}|{UserID}|{ClosedClass}
)
-:person -->
-{
-gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
-gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
-gate.FeatureMap features = Factory.newFeatureMap();
-features.put("kind", "personName");
-features.put("rule", "GazPerson");
-outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
-features);
-}
+-->
+{}
-Rule: TheGazPersonFirst
-Priority: 200
+
+Rule: GazPerson
+Priority: 100
(
- {TreeTaggerToken.category == DT}|
- {TreeTaggerToken.category == PRP}|
- {TreeTaggerToken.category == RB}
-)
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
(
- {FirstPerson}
+ {Lookup.majorType == person_full}
)
-:person
-(
- {Token.orth == upperInitial, Token.length == "1"}
-)?
--->
+:person -->
{
-gate.AnnotationSet person = (gate.AnnotationSet)bindings.get("person");
-gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
+gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+gate.Annotation personAnn = (gate.Annotation)personSet.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
+
+// find the Token annotations
+AnnotationSet tokenSet = gate.Utils.getContainedAnnotations(inputAS,
personSet, "Token");
+// put them in order
+List<Annotation> tokenList = gate.Utils.inDocumentOrder(tokenSet);
+
+if (tokenList.size() == 1) {
+ // if there's only one Token, guess it's a surname
+
+ String surnameContent = gate.Utils.stringFor(doc, tokenList.get(0));
+ features.put("surname", surnameContent);
+ }
+
+else if (tokenList.size() > 0) {
+ // the string under the first Token
+ String firstNameContent = gate.Utils.stringFor(doc, tokenList.get(0));
+ features.put("firstName", firstNameContent);
+
+
+ // the string under the remaining Tokens if any
+ if (tokenList.size() > 1) {
+ Long lastNameStart = gate.Utils.start(tokenList.get(1));
+ Long lastNameEnd = gate.Utils.end(tokenList.get(tokenList.size() - 1));
+ String surnameContent = gate.Utils.stringFor(doc, lastNameStart,
lastNameEnd);
+ features.put("surname", surnameContent);
+ }
+}
+
+features.put("kind", "fullName");
+features.put("rule", "GazPerson");
features.put("gender", personAnn.getFeatures().get("gender"));
-features.put("kind", "personName");
-features.put("rule", "GazPersonFirst");
-outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
-features);
-//outputAS.removeAll(person);
+
+// this method doesn't require try-catch
+gate.Utils.addAnn(outputAS, personSet, "TempPerson", features);
}
+
+
+
Rule: GazPersonFirst
-Priority: 70
+Priority: 200
(
- {FirstPerson}
-)
-:person
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
+(
+ {FirstPerson.kind != ambig}
+):person
(
{Token.orth == upperInitial, Token.length == "1"}
)?
@@ -171,202 +167,270 @@
gate.Annotation personAnn = (gate.Annotation)person.iterator().next();
gate.FeatureMap features = Factory.newFeatureMap();
features.put("gender", personAnn.getFeatures().get("gender"));
-features.put("kind", "personName");
+features.put("kind", "firstName");
features.put("rule", "GazPersonFirst");
+
+// get the string of the first name
+String contentFirstName = gate.Utils.stringFor(doc, personAnn);
+features.put("firstName", contentFirstName);
+
outputAS.add(person.firstNode(), person.lastNode(), "TempPerson",
features);
-//outputAS.removeAll(person);
}
-
-
Rule: PersonFirstContext
Priority: 30
// Anne and Kenton
+(FIRSTNAME):person1
(
- {FirstPerson}
-):person1
-(
{Token.string == "and"}
)
-({Token.orth == upperInitial})
+({Token.orth == upperInitial, Token.length != "1"})
:person2
-->
{
//first deal with person1
gate.FeatureMap features1 = Factory.newFeatureMap();
-gate.AnnotationSet person1Set = (gate.AnnotationSet)bindings.get("person1");
-gate.AnnotationSet firstPerson =
(gate.AnnotationSet)person1Set.get("FirstPerson");
-if (firstPerson != null && firstPerson.size()>0)
-{
- gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
+ gate.AnnotationSet person1Set = (gate.AnnotationSet)bindings.get("person1");
+ gate.Annotation personAnn = (gate.Annotation)person1Set.iterator().next();
+
+ String contentFirstName = gate.Utils.stringFor(doc, personAnn);
+ features1.put("firstName", contentFirstName);
features1.put("gender", personAnn.getFeatures().get("gender"));
-}
- features1.put("kind", "personName");
+ features1.put("kind", "firstName");
features1.put("rule", "PersonFirstContext");
outputAS.add(person1Set.firstNode(), person1Set.lastNode(), "TempPerson",
features1);
+
//now deal with person2
gate.FeatureMap features2 = Factory.newFeatureMap();
gate.AnnotationSet person2Set = (gate.AnnotationSet)bindings.get("person2");
- features2.put("kind", "personName");
+gate.Annotation person2Ann = (gate.Annotation)person2Set.iterator().next();
+
+ String content2FirstName = gate.Utils.stringFor(doc, person2Ann);
+ features2.put("firstName", content2FirstName);
+ features2.put("kind", "firstName");
features2.put("rule", "PersonFirstContext");
outputAS.add(person2Set.firstNode(), person2Set.lastNode(), "TempPerson",
features2);
}
-Rule: PersonFirstContext2
-Priority: 40
-// Anne and I
+Rule: PersonTitle
+Priority: 35
+// Mr. Jones
+// Mr Fred Jones
+// note we only allow one first and surname,
+// but we add more in a final phase if we find adjacent unknowns
+(
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
(
- {FirstPerson}
-):person
-(
- {Token.string == "and"}
- {Token.length == "1"}
-)
- -->
+ ({Title.rule == "TitleGender"}):title
+ ({Title})?
+ (
+ (FIRSTNAME | FIRSTNAMEAMBIG )?
+ ):firstName
+ (
+ (PREFIX)*
+ ({Upper})
+ (PERSONENDING)?
+ ):surname
+):person
+-->
{
gate.FeatureMap features = Factory.newFeatureMap();
-gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
-gate.AnnotationSet firstPerson =
(gate.AnnotationSet)personSet.get("FirstPerson");
-if (firstPerson != null && firstPerson.size()>0)
-{
- gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
- features.put("gender", personAnn.getFeatures().get("gender"));
-}
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+
+ gate.AnnotationSet firstNameSet =
(gate.AnnotationSet)bindings.get("firstName");
+
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+ features.put("title", contentTitle);
+ features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ if (firstNameSet != null && firstNameSet.size()>0)
+ {
+ gate.Annotation firstNameAnn =
(gate.Annotation)firstNameSet.iterator().next();
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+ }
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
features.put("kind", "personName");
- features.put("rule", "PersonFirstContext2");
+ features.put("rule", "PersonTitle");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
features);
}
-Rule: PersonTitle
-Priority: 35
-// Mr. Jones
-// Mr Fred Jones
-// note we only allow one first and surname,
-// but we can add more in a final phase if we find adjacent unknowns
+Rule: PersonTitleUnknownGender
+Priority: 30
+// Prof. Jones
+// This person will just get an unknown value for gender. Or we could decide
to make them male by default, as they're mostly military etc.
(
- {TreeTaggerToken.category == DT}|
- {TreeTaggerToken.category == PRP}|
- {TreeTaggerToken.category == RB}
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
)?
(
- (TITLE)+
- ((FIRSTNAME | FIRSTNAMEAMBIG | INITIALS2)
- )?
+ ({Title}):title
+ ({Title})?
+ (
+ (FIRSTNAME | FIRSTNAMEAMBIG )?
+ ):firstName
+ (
(PREFIX)*
- (UPPER)
- (PERSONENDING)?
-)
-:person -->
+ ({Upper})
+ (PERSONENDING)?
+ ):surname
+):person
+-->
{
gate.FeatureMap features = Factory.newFeatureMap();
-gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
-
- // get all Title annotations that have a gender feature
- HashSet fNames = new HashSet();
- fNames.add("gender");
- gate.AnnotationSet personTitle = personSet.get("Title", fNames);
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+
+ gate.AnnotationSet firstNameSet =
(gate.AnnotationSet)bindings.get("firstName");
-// if the gender feature exists
- if (personTitle != null && personTitle.size()>0)
-{
- //Out.prln("Titles found " + personTitle);
- gate.Annotation personAnn = (gate.Annotation)personTitle.iterator().next();
- features.put("gender", personAnn.getFeatures().get("gender"));
-}
-else
-{
- //get all firstPerson annotations that have a gender feature
- // HashSet fNames = new HashSet();
- // fNames.add("gender");
- gate.AnnotationSet firstPerson = personSet.get("FirstPerson", fNames);
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
- if (firstPerson != null && firstPerson.size()>0)
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+ features.put("title", contentTitle);
+ features.put("gender", "unknown");
+
+ if (firstNameSet != null && firstNameSet.size()>0)
{
- //Out.prln("First persons found " + firstPerson);
- gate.Annotation personAnn = (gate.Annotation)firstPerson.iterator().next();
- features.put("gender", personAnn.getFeatures().get("gender"));
+ gate.Annotation firstNameAnn =
(gate.Annotation)firstNameSet.iterator().next();
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
}
-}
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
features.put("kind", "personName");
- features.put("rule", "PersonTitle");
+ features.put("rule", "PersonTitleGenderUnknown");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
features);
}
-Rule: PersonFirstTitleGender
-Priority: 55
-// use this rule when we know what gender the title indicates
-// Mr Fred
+Rule: PersonTitleInitials
+Priority: 35
+// Mr J. Jones
+
+
+(
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
(
- ({Title.gender == male} | {Title.gender == female})
- ((FIRSTNAME | FIRSTNAMEAMBIG | INITIALS2)
- )
-)
-:person -->
+ ({Title.rule == "TitleGender"}):title
+ ({Title})?
+ (
+ ({Initials})?
+ ):initials
+ (
+ (PREFIX)*
+ ({Upper, !Initials})
+ (PERSONENDING)?
+ ):surname
+):person
+-->
{
gate.FeatureMap features = Factory.newFeatureMap();
-gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
-gate.AnnotationSet title = (gate.AnnotationSet)personSet.get("Title");
-if (title != null && title.size()>0)
-{
- gate.Annotation personAnn = (gate.Annotation)title.iterator().next();
- features.put("gender", personAnn.getFeatures().get("gender"));
-}
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+
+ gate.AnnotationSet initialsSet = (gate.AnnotationSet)bindings.get("initials");
+
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+ features.put("title", contentTitle);
+ features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ if (initialsSet != null && initialsSet.size()>0)
+ {
+ List<Annotation> initialsList = gate.Utils.inDocumentOrder(initialsSet);
+
+ Long initialsStart = gate.Utils.start(initialsList.get(0));
+ Long initialsEnd = gate.Utils.end(initialsList.get(initialsList.size() -
1));
+ String initialsContent = gate.Utils.cleanStringFor(doc, initialsStart,
initialsEnd);
+ features.put("initials", initialsContent);
+ }
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
features.put("kind", "personName");
- features.put("rule", "PersonFirstTitleGender");
+ features.put("rule", "PersonTitleInitials");
outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
features);
}
-Rule: PersonTitleGender
-Priority: 18
-// use this rule if the title has a feature gender
-// Miss F Smith
+Rule: TitleFirstName
+Priority: 55
+// use this rule when we know what gender the title indicates
+// Mr Fred
+
(
- ({Title.gender == male}|
- {Title.gender == female}
- )
- ((FIRSTNAME | FIRSTNAMEAMBIG | INITIALS2)
- )*
- (UPPER)
- (PERSONENDING)?
+ ({Title.gender == male} | {Title.gender == female}):title
+ (FIRSTNAME | FIRSTNAMEAMBIG ):firstname
+
)
:person -->
+
{
gate.FeatureMap features = Factory.newFeatureMap();
-gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
-gate.AnnotationSet title = (gate.AnnotationSet)personSet.get("Title");
-// if the annotation type title doesn't exist, do nothing
-if (title != null && title.size()>0)
-{
-// if it does exist, take the first element in the set
- gate.Annotation personAnn = (gate.Annotation)title.iterator().next();
-//propagate gender feature (and value) from title
- features.put("gender", personAnn.getFeatures().get("gender"));
-}
-// create some new features
- features.put("kind", "personName");
- features.put("rule", "PersonTitleGender");
-// create a TempPerson annotation and add the features we've created
-outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+
+ gate.AnnotationSet firstNameSet =
(gate.AnnotationSet)bindings.get("firstName");
+
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
+
+ String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+ features.put("title", contentTitle);
+ features.put("gender", titleAnn.getFeatures().get("gender"));
+
+ if (firstNameSet != null && firstNameSet.size()>0)
+ {
+ gate.Annotation firstNameAnn =
(gate.Annotation)firstNameSet.iterator().next();
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+ }
+
+ features.put("kind", "personName");
+
+ features.put("rule", "TitleFirstName");
+ outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
features);
}
+
Rule: PersonJobTitle
Priority: 20
// note we include titles but not jobtitles in markup
@@ Diff output truncated at 100000 characters. @@
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs