Revision: 18907
http://sourceforge.net/p/gate/code/18907
Author: dgmaynard
Date: 2015-09-15 10:08:00 +0000 (Tue, 15 Sep 2015)
Log Message:
-----------
made a few fixes thanks to Mark spotting some problems, hopefully I haven't
buggered anything up. Longer names with titles get recognised better now, and
some ambiguities are moved to the ambiguous gazetteer list
Modified Paths:
--------------
gate/trunk/plugins/ANNIE/resources/NE/name.jape
gate/trunk/plugins/ANNIE/resources/NE/name_context.jape
gate/trunk/plugins/ANNIE/resources/gazetteer/lists.def
gate/trunk/plugins/ANNIE/resources/gazetteer/person_male.lst
gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_ambig.lst
gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower.lst
gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower_ambig.lst
gate/trunk/plugins/ANNIE/resources/gazetteer/title_male.lst
Removed Paths:
-------------
gate/trunk/plugins/ANNIE/resources/gazetteer/bands.lst
Modified: gate/trunk/plugins/ANNIE/resources/NE/name.jape
===================================================================
--- gate/trunk/plugins/ANNIE/resources/NE/name.jape 2015-09-15 01:19:57 UTC
(rev 18906)
+++ gate/trunk/plugins/ANNIE/resources/NE/name.jape 2015-09-15 10:08:00 UTC
(rev 18907)
@@ -276,8 +276,61 @@
}
+Rule: PersonTitleUnknownGender
+Priority: 30
+// Prof. Jones
+// This person will just get an unknown value for gender. Or we could decide
to make them male by default, as they're mostly military etc.
+(
+ {Token.category == DT}|
+ {Token.category == PRP}|
+ {Token.category == RB}
+)?
+(
+ ({Title}):title
+ ({Title})?
+ (
+ (FIRSTNAME | FIRSTNAMEAMBIG )?
+ ):firstName
+ (
+ (PREFIX)*
+ ({Upper})
+ (PERSONENDING)?
+ ):surname
+):person
+-->
+{
+ gate.FeatureMap features = Factory.newFeatureMap();
+ gate.AnnotationSet personSet = (gate.AnnotationSet)bindings.get("person");
+
+ gate.AnnotationSet firstNameSet =
(gate.AnnotationSet)bindings.get("firstName");
+ gate.AnnotationSet titleSet = (gate.AnnotationSet)bindings.get("title");
+ gate.Annotation titleAnn = (gate.Annotation)titleSet.iterator().next();
+
+ gate.AnnotationSet surnameSet = (gate.AnnotationSet)bindings.get("surname");
+ gate.Annotation surnameAnn = (gate.Annotation)surnameSet.iterator().next();
+
+ String contentTitle = gate.Utils.stringFor(doc, titleAnn);
+ features.put("title", contentTitle);
+ features.put("gender", "unknown");
+
+ if (firstNameSet != null && firstNameSet.size()>0)
+ {
+ gate.Annotation firstNameAnn =
(gate.Annotation)firstNameSet.iterator().next();
+ String firstNameContent = gate.Utils.stringFor(doc, firstNameAnn);
+ features.put("firstName", firstNameContent);
+ }
+ String surnameContent = gate.Utils.stringFor(doc, surnameAnn);
+ features.put("surname", surnameContent);
+
+ features.put("kind", "personName");
+ features.put("rule", "PersonTitleGenderUnknown");
+outputAS.add(personSet.firstNode(), personSet.lastNode(), "TempPerson",
+features);
+}
+
+
Rule: PersonTitleInitials
Priority: 35
Modified: gate/trunk/plugins/ANNIE/resources/NE/name_context.jape
===================================================================
--- gate/trunk/plugins/ANNIE/resources/NE/name_context.jape 2015-09-15
01:19:57 UTC (rev 18906)
+++ gate/trunk/plugins/ANNIE/resources/NE/name_context.jape 2015-09-15
10:08:00 UTC (rev 18907)
@@ -117,7 +117,9 @@
Rule:PersonTitle1
Priority: 40
(
- {Person.rule1 == PersonTitle}
+ ({Person.rule == PersonTitle}|
+ {Person.rule == PersonTitleGenderUnknown}
+ )
{Unknown}
):person
-->
Deleted: gate/trunk/plugins/ANNIE/resources/gazetteer/bands.lst
===================================================================
--- gate/trunk/plugins/ANNIE/resources/gazetteer/bands.lst 2015-09-15
01:19:57 UTC (rev 18906)
+++ gate/trunk/plugins/ANNIE/resources/gazetteer/bands.lst 2015-09-15
10:08:00 UTC (rev 18907)
@@ -1,32 +0,0 @@
-The Beatles
-Pink Floyd
-AC/DC
-The Rolling Stones
-ABBA
-Eagles
-U2
-Aerosmith
-Genesis
-Bee Gees
-Dire Straits
-Eminem
-Metallica
-Fleetwood Mac
-Backstreet Boys
-Guns N' Roses
-The Carpenters
-Def Leppard
-The Beach Boys
-Kiss
-The Who
-B'z
-Santana
-R.E.M.
-Red Hot Chili Peppers
-New Kids on the Block
-The Black Eyed Peas
-Green Day
-Nirvana
-Spice Girls
-Mötley Crüe
-Depeche Mode
Modified: gate/trunk/plugins/ANNIE/resources/gazetteer/lists.def
===================================================================
--- gate/trunk/plugins/ANNIE/resources/gazetteer/lists.def 2015-09-15
01:19:57 UTC (rev 18906)
+++ gate/trunk/plugins/ANNIE/resources/gazetteer/lists.def 2015-09-15
10:08:00 UTC (rev 18907)
@@ -115,3 +115,4 @@
bands.lst:organization:band
music-artists.lst:person_full:music
religious_adj.lst:religious_adj
+test.lst:
Modified: gate/trunk/plugins/ANNIE/resources/gazetteer/person_male.lst
===================================================================
--- gate/trunk/plugins/ANNIE/resources/gazetteer/person_male.lst
2015-09-15 01:19:57 UTC (rev 18906)
+++ gate/trunk/plugins/ANNIE/resources/gazetteer/person_male.lst
2015-09-15 10:08:00 UTC (rev 18907)
@@ -2740,7 +2740,6 @@
Pedr
Pedran
Pedro
-Peers
Pelo
Pelota
Penjani
@@ -4360,3 +4359,5 @@
Dariusz
Lamberto
Zine
+Francis
+Frederic
Modified: gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_ambig.lst
===================================================================
--- gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_ambig.lst
2015-09-15 01:19:57 UTC (rev 18906)
+++ gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_ambig.lst
2015-09-15 10:08:00 UTC (rev 18907)
@@ -151,3 +151,4 @@
Al:kind=ambig
Franc:kind=ambig
Bill:kind=ambig
+Peers:kind=
Modified: gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower.lst
===================================================================
--- gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower.lst
2015-09-15 01:19:57 UTC (rev 18906)
+++ gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower.lst
2015-09-15 10:08:00 UTC (rev 18907)
@@ -842,7 +842,6 @@
eamon
eamonn
eanruig
-earnest
ebenezer
eberhard
ebert
Modified:
gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower_ambig.lst
===================================================================
--- gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower_ambig.lst
2015-09-15 01:19:57 UTC (rev 18906)
+++ gate/trunk/plugins/ANNIE/resources/gazetteer/person_male_lower_ambig.lst
2015-09-15 10:08:00 UTC (rev 18907)
@@ -161,3 +161,4 @@
bill:kind=ambig
franc:kind=ambig
nab:kind=ambig
+earnest:kind=ambig
Modified: gate/trunk/plugins/ANNIE/resources/gazetteer/title_male.lst
===================================================================
--- gate/trunk/plugins/ANNIE/resources/gazetteer/title_male.lst 2015-09-15
01:19:57 UTC (rev 18906)
+++ gate/trunk/plugins/ANNIE/resources/gazetteer/title_male.lst 2015-09-15
10:08:00 UTC (rev 18907)
@@ -16,3 +16,5 @@
Archbishop
Baron
Lord Chief Justice
+Right Hon
+Right Hon.
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs