Revision: 17812
          http://sourceforge.net/p/gate/code/17812
Author:   markagreenwood
Date:     2014-04-11 12:46:02 +0000 (Fri, 11 Apr 2014)
Log Message:
-----------
applied the patched version submitted via soureforge that prevents an invlaid 
index exception being thrown when processing words with multiple hyphens

Modified Paths:
--------------
    gate/trunk/plugins/Lang_French/tokeniser/postprocess.jape

Modified: gate/trunk/plugins/Lang_French/tokeniser/postprocess.jape
===================================================================
--- gate/trunk/plugins/Lang_French/tokeniser/postprocess.jape   2014-04-11 
12:38:20 UTC (rev 17811)
+++ gate/trunk/plugins/Lang_French/tokeniser/postprocess.jape   2014-04-11 
12:46:02 UTC (rev 17812)
@@ -10,41 +10,41 @@
 
 // this rule is apparently no more needed by the TreeTagger
 
-// Rule: simpleJoin
-// /* joins a final apostrophe with the preceding word, to make it the same as 
the
-// TreeTagger output, e.g. d' should be one Token not two */
+Rule: simpleJoin
+/* joins a final apostrophe with the preceding word, to make it the same as the
+TreeTagger output, e.g. d' should be one Token not two */
 
-//  (
-//   (
-//    {Token.string == "d"}|
-//    {Token.string == "D"}|
-//    {Token.string == "L"}|
-//    {Token.string == "l"}|
-//    {Token.string == "n"}|
-//    {Token.string == "N"}
-//   )
-//   {Token.string == "'"}
-//  ):left
-// -->
-// {
-//   gate.AnnotationSet toRemove = (gate.AnnotationSet)bindings.get("left");
-//   outputAS.removeAll(toRemove);
-//   //get the tokens
-//   java.util.ArrayList tokens = new java.util.ArrayList(toRemove);
-//   //define a comparator for annotations by start offset
-//   Collections.sort(tokens, new gate.util.OffsetComparator());
-//   String text = "";
-//   Iterator tokIter = tokens.iterator();
-//   while(tokIter.hasNext())
-//     text += 
(String)((Annotation)tokIter.next()).getFeatures().get("string");
+ (
+  (
+   {Token.string == "d"}|
+   {Token.string == "D"}|
+   {Token.string == "L"}|
+   {Token.string == "l"}|
+   {Token.string == "n"}|
+   {Token.string == "N"}
+  )
+  {Token.string == "'"}
+ ):left
+-->
+{
+  gate.AnnotationSet toRemove = (gate.AnnotationSet)bindings.get("left");
+  outputAS.removeAll(toRemove);
+  //get the tokens
+  java.util.ArrayList tokens = new java.util.ArrayList(toRemove);
+  //define a comparator for annotations by start offset
+  Collections.sort(tokens, new gate.util.OffsetComparator());
+  String text = "";
+  Iterator tokIter = tokens.iterator();
+  while(tokIter.hasNext())
+    text += (String)((Annotation)tokIter.next()).getFeatures().get("string");
 
-//   gate.FeatureMap features = Factory.newFeatureMap();
-//   features.put("kind", "word");
-//   features.put("string", text);
-//   features.put("length", Integer.toString(text.length()));
-//   features.put("orth", "artapos");
-//   outputAS.add(toRemove.firstNode(), toRemove.lastNode(), "Token", 
features);
-// }
+  gate.FeatureMap features = Factory.newFeatureMap();
+  features.put("kind", "word");
+  features.put("string", text);
+  features.put("length", Integer.toString(text.length()));
+  features.put("orth", "artapos");
+  outputAS.add(toRemove.firstNode(), toRemove.lastNode(), "Token", features);
+}
 
 Rule: simpleSplit
 /* split compound word, to make it the same as the
@@ -69,20 +69,20 @@
      features.putAll(annotation.getFeatures());
      features.put("string", content.substring(startIndex, dashIndex));
      features.put("length", dashIndex-startIndex);
-     outputAS.add(offset, offset+dashIndex, "Token", features);
+     outputAS.add(offset+startIndex, offset+dashIndex, "Token", features);
      features = Factory.newFeatureMap();
      features.putAll(annotation.getFeatures());
      features.put("string", "-");
      features.put("length", 1);
-     outputAS.add(offset+dashIndex, offset+dashIndex+1, "Token", features);
-     offset += dashIndex;
+     outputAS.add(offset+dashIndex, offset+dashIndex+1, "Token", features); // 
<-- MODIF HERE
+     /* offset += dashIndex; */ // <-- REMOVE THIS LINE
      startIndex = dashIndex + 1;
     }
    features = Factory.newFeatureMap();
    features.putAll(annotation.getFeatures());
    features.put("string", content.substring(startIndex));
    features.put("length", content.length()-startIndex);
-   outputAS.add(offset+1, endOffset, "Token", features);
+   outputAS.add(offset+startIndex, endOffset, "Token", features); // <-- MODIF 
HERE
   } catch (InvalidOffsetException e) {
     throw new LuckyException(e);
   }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Put Bad Developers to Shame
Dominate Development with Jenkins Continuous Integration
Continuously Automate Build, Test & Deployment 
Start a new project now. Try Jenkins in the cloud.
http://p.sf.net/sfu/13600_Cloudbees
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to