Revision: 19035
http://sourceforge.net/p/gate/code/19035
Author: markagreenwood
Date: 2015-12-14 12:05:27 +0000 (Mon, 14 Dec 2015)
Log Message:
-----------
checking in my speedup changes before I loose track of them -- the speed
improvement can be huge on long documents which is nice
Modified Paths:
--------------
gate/trunk/plugins/TermRaider/resources/groovy/DeduplicateMultiWord.groovy
Modified:
gate/trunk/plugins/TermRaider/resources/groovy/DeduplicateMultiWord.groovy
===================================================================
--- gate/trunk/plugins/TermRaider/resources/groovy/DeduplicateMultiWord.groovy
2015-12-14 02:33:14 UTC (rev 19034)
+++ gate/trunk/plugins/TermRaider/resources/groovy/DeduplicateMultiWord.groovy
2015-12-14 12:05:27 UTC (rev 19035)
@@ -7,17 +7,22 @@
List<Annotation> mwList = new ArrayList<Annotation>(inputAS.get("MultiWord"));
Collections.sort(mwList, new OffsetComparator());
-// the OffsetComparator only looks at the start offset
for (int i=0 ; i < mwList.size() - 1 ; i++) {
Annotation mwi = mwList.get(i);
for (int j=i+1 ; j < mwList.size() ; j++) {
Annotation mwj = mwList.get(j);
+ if (mwj.getStartNode().getOffset() > mwi.getStartNode().getOffset()) {
+ //if we've moved past the start offset of the outer annotation then
+ //because the annotations are sorted we know we'll never find a matching
+ //one so we can safely stop looking.
+ break;
+ }
+
if (mwj.getStartNode().getOffset().equals(mwi.getStartNode().getOffset())
&& mwj.getEndNode().getOffset().equals(mwi.getEndNode().getOffset()) )
{
inputAS.remove(mwi);
- break;
}
}
}
@@ -42,9 +47,13 @@
exclusionTypes.add("Number");
AnnotationSet candidates = inputAS.get(termTypes);
+
+AnnotationSet excluded = inputAS.get(exclusionTypes);
+AnnotationSet strongStop = inputAS.get("StrongStop");
+
for (Annotation candidate : candidates) {
// delete unwanted term candidates
- if (! gate.Utils.getCoveringAnnotations(inputAS,
candidate).get(exclusionTypes).isEmpty()) {
+ if (! gate.Utils.getCoveringAnnotations(excluded, candidate).isEmpty()) {
FeatureMap newf = Factory.newFeatureMap();
newf.putAll(candidate.getFeatures());
String newType = "deleted_NE_" + candidate.getType();
@@ -52,7 +61,7 @@
inputAS.remove(candidate);
}
- else if (! gate.Utils.getContainedAnnotations(inputAS, candidate,
"StrongStop").isEmpty()) {
+ else if (! gate.Utils.getContainedAnnotations(strongStop,
candidate).isEmpty()) {
FeatureMap newf = Factory.newFeatureMap();
newf.putAll(candidate.getFeatures());
String newType = "deleted_SS_" + candidate.getType();
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs