stanford

markagreenwood Fri, 19 Aug 2016 10:01:51 -0700

Revision: 19526
          http://sourceforge.net/p/gate/code/19526
Author:   markagreenwood
Date:     2016-08-19 17:01:15 +0000 (Fri, 19 Aug 2016)
Log Message:
-----------
formatting and removed some unused stuff


Modified Paths:
--------------
    
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/DependencyMode.java
    
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/DependencyRelation.java
    
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/NER.java
    
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/Parser.java
    
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/StanfordSentence.java
    
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/Tagger.java
    
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/Tokenizer.java
    
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/apps/EnglishDependencies.java
    
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/apps/EnglishPOSDependencies.java

Modified: 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/DependencyMode.java
===================================================================
--- 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/DependencyMode.java
     2016-08-19 16:50:05 UTC (rev 19525)
+++ 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/DependencyMode.java
     2016-08-19 17:01:15 UTC (rev 19526)
@@ -26,35 +26,23 @@
 import edu.stanford.nlp.trees.TypedDependency;
 
 public enum DependencyMode {
-  Typed,
-  AllTyped,
-  TypedCollapsed,
-  TypedCCprocessed;
-  
-  
-  protected static Collection<TypedDependency> 
getDependencies(GrammaticalStructure gs,
-      DependencyMode mode, boolean includeExtras) {
+  Typed, AllTyped, TypedCollapsed, TypedCCprocessed;
+  protected static Collection<TypedDependency> getDependencies(
+      GrammaticalStructure gs, DependencyMode mode, boolean includeExtras) {
     Collection<TypedDependency> result = null;
-    
     Extras incl = Extras.NONE;
     if(includeExtras) {
       incl = Extras.MAXIMAL;
     }
-    
-    if (mode.equals(Typed)) {
+    if(mode.equals(Typed)) {
       result = gs.typedDependencies(incl);
-    }
-    else if (mode.equals(AllTyped)) {
+    } else if(mode.equals(AllTyped)) {
       result = gs.allTypedDependencies();
-    }
-    else if (mode.equals(TypedCollapsed)) {
+    } else if(mode.equals(TypedCollapsed)) {
       result = gs.typedDependenciesCollapsed(incl);
-    }
-    else if (mode.equals(TypedCCprocessed)) {
+    } else if(mode.equals(TypedCCprocessed)) {
       result = gs.typedDependenciesCCprocessed(incl);
     }
-    
     return result;
   }
-
 }

Modified: 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/DependencyRelation.java
===================================================================
--- 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/DependencyRelation.java
 2016-08-19 16:50:05 UTC (rev 19525)
+++ 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/DependencyRelation.java
 2016-08-19 17:01:15 UTC (rev 19526)
@@ -14,34 +14,33 @@
  * 
  * You should have received a copy of the GNU General Public License along with
  * this program. If not, see <http://www.gnu.org/licenses/>.
- *
- *  $Id: DependencyRelation.java 15600 2012-03-19 15:40:56Z adamfunk $
+ * 
+ * $Id: DependencyRelation.java 15600 2012-03-19 15:40:56Z adamfunk $
  */
 package gate.stanford;
 
 import java.io.Serializable;
 
 /**
- * Simple data structure representing a single dependency relation.  The 
"target"
- * is the Annotation ID of the dependent; the "type" is the dependency 
- * tag (<a href="http://nlp.stanford.edu/software/parser-faq.shtml#c";>the
- * Stanford Parser documentation</a> contains links to the tagset</a>; for 
example,
- * nsubj = "nominal subject", dobj = "direct object).
+ * Simple data structure representing a single dependency relation. The 
"target"
+ * is the Annotation ID of the dependent; the "type" is the dependency tag (<a
+ * href="http://nlp.stanford.edu/software/parser-faq.shtml#c";>the Stanford
+ * Parser documentation</a> contains links to the tagset</a>; for example, 
nsubj
+ * = "nominal subject", dobj = "direct object).
  */
 public class DependencyRelation implements Serializable {
-
   private static final long serialVersionUID = -7842607116149222052L;
 
   /**
    * The type of the dependency relation (det, amod, etc.).
    */
   private String type;
-  
+
   /**
    * The ID of the token that is the target of this relation.
    */
   private Integer targetId;
-  
+
   public DependencyRelation(String type, Integer targetId) {
     this.type = type;
     this.targetId = targetId;
@@ -49,6 +48,7 @@
 
   /**
    * Return the dependency tag (type).
+   * 
    * @return the dependency tag
    */
   public String getType() {
@@ -57,7 +57,9 @@
 
   /**
    * Set the dependency tag.
-   * @param type dependency tag
+   * 
+   * @param type
+   *          dependency tag
    */
   public void setType(String type) {
     this.type = type;
@@ -65,6 +67,7 @@
 
   /**
    * Return the GATE Annotation ID of the dependent.
+   * 
    * @return the Annotation ID
    */
   public Integer getTargetId() {
@@ -73,16 +76,17 @@
 
   /**
    * Set the Annotation ID of the dependent.
-   * @param targetId the Annotation ID
+   * 
+   * @param targetId
+   *          the Annotation ID
    */
   public void setTargetId(Integer targetId) {
     this.targetId = targetId;
   }
-  
+
   /**
-   * Format the data structure for display.
-   * For example, if type is "dobj" and the dependent has Annotation ID 37,
-   * return the String "dobj(37)". 
+   * Format the data structure for display. For example, if type is "dobj" and
+   * the dependent has Annotation ID 37, return the String "dobj(37)".
    */
   public String toString() {
     return type + "(" + targetId + ")";

Modified: 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/NER.java
===================================================================
--- 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/NER.java
        2016-08-19 16:50:05 UTC (rev 19525)
+++ 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/NER.java
        2016-08-19 17:01:15 UTC (rev 19526)
@@ -19,7 +19,6 @@
  * 
  * $Id: NER.java 15468 2013-10-22 21:13:15Z $
  */
-
 package gate.stanford;
 
 import edu.stanford.nlp.ie.AbstractSequenceClassifier;
@@ -56,9 +55,8 @@
 /**
  * This class is a wrapper for the Stanford NER tool v3.2.0.
  */
-@CreoleResource(name = "Stanford NER", comment = "Stanford Named Entity 
Recogniser", icon = "ne-transducer", 
helpURL="http://gate.ac.uk/userguide/sec:misc:creole:stanford";)
+@CreoleResource(name = "Stanford NER", comment = "Stanford Named Entity 
Recogniser", icon = "ne-transducer", helpURL = 
"http://gate.ac.uk/userguide/sec:misc:creole:stanford";)
 public class NER extends AbstractLanguageAnalyser {
-
   private static final long serialVersionUID = -6001372186847970080L;
 
   public static final String TAG_DOCUMENT_PARAMETER_NAME = "document";
@@ -68,10 +66,10 @@
   public static final String TAG_ENCODING_PARAMETER_NAME = "encoding";
 
   public static final String BASE_TOKEN_ANNOTATION_TYPE_PARAMETER_NAME =
-    "baseTokenAnnotationType";
+      "baseTokenAnnotationType";
 
   public static final String BASE_SENTENCE_ANNOTATION_TYPE_PARAMETER_NAME =
-    "baseSentenceAnnotationType";
+      "baseSentenceAnnotationType";
 
   public static final String TAG_OUTPUT_AS_PARAMETER_NAME = "outputASName";
 
@@ -97,7 +95,8 @@
     if(tagger == null) {
       fireStatusChanged("Loading Stanford NER model");
       try {
-        // nasty workaround for stanford NER's path format inconsistency - 
tagger is content with uris beginning file:, ner labeller is not
+        // nasty workaround for stanford NER's path format inconsistency -
+        // tagger is content with uris beginning file:, ner labeller is not
         tagger = 
CRFClassifier.getClassifier(modelFile.toString().substring(5));
       } catch(Exception e) {
         throw new ResourceInstantiationException(e);
@@ -117,172 +116,143 @@
     // check the parameters
     if(document == null)
       throw new ExecutionException("No document to process!");
-
     AnnotationSet inputAS = document.getAnnotations(inputASName);
     AnnotationSet outputAS = document.getAnnotations(outputASName);
-
-    if(baseTokenAnnotationType == null ||
-      baseTokenAnnotationType.trim().length() == 0) { throw new 
ExecutionException(
-      "No base Token Annotation Type provided!"); }
-
-    if(baseSentenceAnnotationType == null ||
-      baseSentenceAnnotationType.trim().length() == 0) { throw new 
ExecutionException(
-      "No base Sentence Annotation Type provided!"); }
-
+    if(baseTokenAnnotationType == null
+        || baseTokenAnnotationType.trim().length() == 0) { throw new 
ExecutionException(
+        "No base Token Annotation Type provided!"); }
+    if(baseSentenceAnnotationType == null
+        || baseSentenceAnnotationType.trim().length() == 0) { throw new 
ExecutionException(
+        "No base Sentence Annotation Type provided!"); }
     AnnotationSet sentencesAS = inputAS.get(baseSentenceAnnotationType);
     AnnotationSet tokensAS = inputAS.get(baseTokenAnnotationType);
-    if(sentencesAS != null && sentencesAS.size() > 0 && tokensAS != null &&
-      tokensAS.size() > 0) {
+    if(sentencesAS != null && sentencesAS.size() > 0 && tokensAS != null
+        && tokensAS.size() > 0) {
       long startTime = System.currentTimeMillis();
       fireStatusChanged("NER searching " + document.getName());
       fireProgressChanged(0);
-
       // prepare the input for CRFClassifier
       List<CoreLabel> sentenceForTagger = new ArrayList<CoreLabel>();
-
       // define a comparator for annotations by start offset
       OffsetComparator offsetComparator = new OffsetComparator();
-
       // read all the tokens and all the sentences
       List<Annotation> sentencesList = new ArrayList<Annotation>(sentencesAS);
       Collections.sort(sentencesList, offsetComparator);
       List<Annotation> tokensList = new ArrayList<Annotation>(tokensAS);
       Collections.sort(tokensList, offsetComparator);
-
       Iterator<Annotation> sentencesIter = sentencesList.iterator();
       ListIterator<Annotation> tokensIter = tokensList.listIterator();
-
       List<Annotation> tokensInCurrentSentence = new ArrayList<Annotation>();
       Annotation currentToken = tokensIter.next();
       int sentIndex = 0;
       int sentCnt = sentencesAS.size();
-
       // go through sentence annotations in the document
       while(sentencesIter.hasNext()) {
         Annotation currentSentence = sentencesIter.next();
-
         // reset sentence-level processing variables
         tokensInCurrentSentence.clear();
         sentenceForTagger.clear();
-
         // while we have sane tokens
-        while(currentToken != null && 
-          currentToken.getEndNode().getOffset()
-            .compareTo(currentSentence.getEndNode().getOffset()) <= 0) {
-
+        while(currentToken != null
+            && currentToken.getEndNode().getOffset()
+                .compareTo(currentSentence.getEndNode().getOffset()) <= 0) {
           // If we're only labelling Tokens within baseSentenceAnnotationType,
           // don't add the sentence if the Tokens aren't within the span of
           // baseSentenceAnnotationType
           if(currentToken.withinSpanOf(currentSentence)) {
             tokensInCurrentSentence.add(currentToken);
-
-            // build a stanford nlp representation of the token and add it to 
the sequence
+            // build a stanford nlp representation of the token and add it to
+            // the sequence
             CoreLabel currentLabel = new CoreLabel();
-            
currentLabel.setWord((String)currentToken.getFeatures().get(TOKEN_STRING_FEATURE_NAME));
-
+            currentLabel.setWord((String)currentToken.getFeatures().get(
+                TOKEN_STRING_FEATURE_NAME));
             sentenceForTagger.add(currentLabel);
           }
           currentToken = (tokensIter.hasNext() ? tokensIter.next() : null);
         }
-
         // if the sentence doesn't contain any tokens (which is a bit weird but
         // is possible) then don't try running the labeller
         if(sentenceForTagger.isEmpty()) continue;
-
         // run the labeller
         List<CoreLabel> taggerResults =
-          tagger.classifySentence(sentenceForTagger);
-
+            tagger.classifySentence(sentenceForTagger);
         // add the results
         // make sure no malfunction occurred
         if(taggerResults.size() != tokensInCurrentSentence.size())
           throw new ExecutionException(
-            "NER labeller malfunction: the output size (" +
-              taggerResults.size() + ") is different from the input size (" +
-              tokensInCurrentSentence.size() + ")!");
-
+              "NER labeller malfunction: the output size ("
+                  + taggerResults.size()
+                  + ") is different from the input size ("
+                  + tokensInCurrentSentence.size() + ")!");
         // proceed through the annotated sequence
         Iterator<CoreLabel> resIter = taggerResults.iterator();
         Iterator<Annotation> tokIter = tokensInCurrentSentence.iterator();
-
         String previousLabel = outsideLabel;
         Long previousEnd = new Long(-1);
         Long entityStart = new Long(-1);
-        
-        //No idea why this was there so lets comment it out
-        //Long entityEnd = new Long(-1);
-
+        // No idea why this was there so lets comment it out
+        // Long entityEnd = new Long(-1);
         Annotation annot;
         String nerLabel = "";
-
         while(resIter.hasNext()) {
-
           // for each labelled token..
           annot = tokIter.next();
           CoreLabel word = resIter.next();
           nerLabel = word.get(CoreAnnotations.AnswerAnnotation.class);
-
           // falling edge transition: entity ends
           // guard against this triggering at document start
-          if (!nerLabel.equals(previousLabel) && 
!previousLabel.equals(outsideLabel) && entityStart != -1) {
-
-//            System.out.println("falling edge");
+          if(!nerLabel.equals(previousLabel)
+              && !previousLabel.equals(outsideLabel) && entityStart != -1) {
+            // System.out.println("falling edge");
             // get final bound; add new annotation in output AS
             try {
-              outputAS.add(entityStart, previousEnd, previousLabel, new 
SimpleFeatureMapImpl());
-            } catch (InvalidOffsetException e) {
+              outputAS.add(entityStart, previousEnd, previousLabel,
+                  new SimpleFeatureMapImpl());
+            } catch(InvalidOffsetException e) {
               System.out.println("Token alignment problem:" + e);
             }
-
           }
-
           // rising edge transition: entity starts
-          if (!nerLabel.equals(previousLabel) && 
!nerLabel.equals(outsideLabel)) {
-//            System.out.println("rising edge");
+          if(!nerLabel.equals(previousLabel) && 
!nerLabel.equals(outsideLabel)) {
+            // System.out.println("rising edge");
             entityStart = annot.getStartNode().getOffset();
           }
-//          System.out.println(word.word() + "/" + nerLabel);
-
+          // System.out.println(word.word() + "/" + nerLabel);
           previousLabel = nerLabel;
           previousEnd = annot.getEndNode().getOffset();
-
         }
-
         // clean up, in case last token in sentence was in an entity
-        if (!nerLabel.equals(outsideLabel)) {
+        if(!nerLabel.equals(outsideLabel)) {
           try {
-            outputAS.add(entityStart, previousEnd, previousLabel, new 
SimpleFeatureMapImpl());
-          } catch (InvalidOffsetException e) {
+            outputAS.add(entityStart, previousEnd, previousLabel,
+                new SimpleFeatureMapImpl());
+          } catch(InvalidOffsetException e) {
             System.out.println("Token alignment problem:" + e);
           }
         }
-
         fireProgressChanged(sentIndex++ * 100 / sentCnt);
-
       }
-
       fireProcessFinished();
-      fireStatusChanged(document.getName() +
-        " tagged in " +
-        NumberFormat.getInstance().format(
-          (double)(System.currentTimeMillis() - startTime) / 1000) +
-        " seconds!");
+      fireStatusChanged(document.getName()
+          + " tagged in "
+          + NumberFormat.getInstance().format(
+              (double)(System.currentTimeMillis() - startTime) / 1000)
+          + " seconds!");
     } else {
       if(failOnMissingInputAnnotations) {
         throw new ExecutionException(
-          "No sentences or tokens to process in document " +
-            document.getName() + "\n" + "Please run a sentence splitter " +
-            "and tokeniser first!");
+            "No sentences or tokens to process in document "
+                + document.getName() + "\n" + "Please run a sentence splitter "
+                + "and tokeniser first!");
       } else {
         Utils
-          .logOnce(
-            logger,
-            Level.INFO,
-            "NE labeller: no sentence or token annotations in input document - 
see debug log for details.");
+            .logOnce(
+                logger,
+                Level.INFO,
+                "NE labeller: no sentence or token annotations in input 
document - see debug log for details.");
         logger.debug("No input annotations in document " + document.getName());
       }
     }
-
   }
 
   public void setEncoding(String encoding) {
@@ -335,7 +305,6 @@
     this.outputASName = outputASName;
   }
 
-
   @RunTime
   @CreoleParameter(comment = "Label used by model for tokens outside 
entities", defaultValue = "O")
   public void setOutsideLabel(String outsideLabel) {
@@ -346,14 +315,11 @@
     return this.outsideLabel;
   }
 
-
-  @CreoleParameter(comment = "Path to the NER model file", defaultValue = 
"resources/english.all.3class.distsim.crf.ser.gz", suffixes="tagger;model;gz")
+  @CreoleParameter(comment = "Path to the NER model file", defaultValue = 
"resources/english.all.3class.distsim.crf.ser.gz", suffixes = "tagger;model;gz")
   public void setModelFile(URL modelFile) {
     this.modelFile = modelFile;
   }
 
-
-
   public URL getModelFile() {
     return this.modelFile;
   }

Modified: 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/Parser.java
===================================================================
--- 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/Parser.java
     2016-08-19 16:50:05 UTC (rev 19525)
+++ 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/Parser.java
     2016-08-19 17:01:15 UTC (rev 19526)
@@ -14,8 +14,8 @@
  * 
  * You should have received a copy of the GNU General Public License along with
  * this program. If not, see <http://www.gnu.org/licenses/>.
- *
- *  $Id: Parser.java 17831 2014-04-15 09:37:23Z ian_roberts $
+ * 
+ * $Id: Parser.java 17831 2014-04-15 09:37:23Z ian_roberts $
  */
 package gate.stanford;
 
@@ -43,7 +43,6 @@
 import gate.creole.metadata.Optional;
 import gate.creole.metadata.RunTime;
 import gate.creole.metadata.Sharable;
-import gate.util.Files;
 import gate.util.InvalidOffsetException;
 
 import java.io.BufferedReader;
@@ -67,197 +66,190 @@
  * be stored in the outputAS in various ways, controlled by CREOLE run-time
  * parameters.
  */
-@CreoleResource(name = "StanfordParser", comment = "Stanford parser wrapper",
-        helpURL = "http://gate.ac.uk/userguide/sec:parsers:stanford";)
-public class Parser extends AbstractLanguageAnalyser 
-implements ProcessingResource {
-
+@CreoleResource(name = "StanfordParser", comment = "Stanford parser wrapper", 
helpURL = "http://gate.ac.uk/userguide/sec:parsers:stanford";)
+public class Parser extends AbstractLanguageAnalyser implements
+                                                    ProcessingResource {
   private static final long serialVersionUID = -3062171258011850283L;
 
   protected LexicalizedParser stanfordParser;
 
-  /* Type "SyntaxTreeNode" with feature "cat" is compatible with the 
-   * classic SyntaxTreeViewer.  */
-  public static final String PHRASE_ANNOTATION_TYPE   = "SyntaxTreeNode" ;
-  public static final String PHRASE_CAT_FEATURE      = "cat" ;
-  
-  /* But "category" feature is compatible with the ANNIE POS tagger.  */
-  private static final String  POS_TAG_FEATURE    = 
ANNIEConstants.TOKEN_CATEGORY_FEATURE_NAME;
+  /*
+   * Type "SyntaxTreeNode" with feature "cat" is compatible with the classic
+   * SyntaxTreeViewer.
+   */
+  public static final String PHRASE_ANNOTATION_TYPE = "SyntaxTreeNode";
 
-  public static final String DEPENDENCY_ANNOTATION_TYPE   = "Dependency";
-  public static final String DEPENDENCY_ARG_FEATURE       = "args";
-  public static final String DEPENDENCY_LABEL_FEATURE     = "kind"; 
+  public static final String PHRASE_CAT_FEATURE = "cat";
 
-  protected String                         annotationSetName;
-  private   URL                            parserFile;
-  protected boolean                        debugMode;
-  private   boolean                        reusePosTags;
+  /* But "category" feature is compatible with the ANNIE POS tagger. */
+  private static final String POS_TAG_FEATURE =
+      ANNIEConstants.TOKEN_CATEGORY_FEATURE_NAME;
 
-  private Map<String, String>              tagMap;
-  protected GrammaticalStructureFactory    gsf;
-  
+  public static final String DEPENDENCY_ANNOTATION_TYPE = "Dependency";
 
-  /*  CREOLE parameters for optional mapping  */
-  private boolean                          useMapping = false; 
-  private URL                              mappingFileURL;
-  
-  /*  internal variables for mapping  */
-  private File                             mappingFile;
-  private boolean                          mappingLoaded = false;
-  
-  /*  CREOLE parameters: what are we going to annotate, and how?  */
-  private String   inputSentenceType;
-  private String   inputTokenType;
-  private boolean  addConstituentAnnotations;
-  private boolean  addDependencyFeatures;
-  private boolean  addDependencyAnnotations;
-  private boolean  addPosTags;
-  private boolean  includeExtraDependencies;
+  public static final String DEPENDENCY_ARG_FEATURE = "args";
+
+  public static final String DEPENDENCY_LABEL_FEATURE = "kind";
+
+  protected String annotationSetName;
+
+  private URL parserFile;
+
+  protected boolean debugMode;
+
+  private boolean reusePosTags;
+
+  private Map<String, String> tagMap;
+
+  protected GrammaticalStructureFactory gsf;
+
+  /* CREOLE parameters for optional mapping */
+  private boolean useMapping = false;
+
+  private URL mappingFileURL;
+
+  /* internal variables for mapping */
+  private File mappingFile;
+
+  private boolean mappingLoaded = false;
+
+  /* CREOLE parameters: what are we going to annotate, and how? */
+  private String inputSentenceType;
+
+  private String inputTokenType;
+
+  private boolean addConstituentAnnotations;
+
+  private boolean addDependencyFeatures;
+
+  private boolean addDependencyAnnotations;
+
+  private boolean addPosTags;
+
+  private boolean includeExtraDependencies;
+
   private DependencyMode dependencyMode;
-  
 
   /**
-   * The {@link TreebankLangParserParams} implementation to use. This is
-   * where we get the language pack, and then the
-   * {@link GrammaticalStructureFactory} used to extract the
-   * dependencies from the parse. In most cases you should leave this at
-   * the default value, which is suitable for English text.
+   * The {@link TreebankLangParserParams} implementation to use. This is where
+   * we get the language pack, and then the {@link GrammaticalStructureFactory}
+   * used to extract the dependencies from the parse. In most cases you should
+   * leave this at the default value, which is suitable for English text.
    */
   private String tlppClass;
 
-
   /**
    * The name of the feature to add to tokens. The feature value is a
-   * {@link List} of {@link DependencyRelation} objects giving the
-   * dependencies from this token to other tokens.
+   * {@link List} of {@link DependencyRelation} objects giving the dependencies
+   * from this token to other tokens.
    */
   protected String dependenciesFeature = "dependencies";
 
-
-
   /**
-   * Parse the current document.  (This is the principal 
-   * method called by a CorpusController.)
+   * Parse the current document. (This is the principal method called by a
+   * CorpusController.)
    */
   public void execute() throws ExecutionException {
     interrupted = false;
     long startTime = System.currentTimeMillis();
-    if(document == null) {
-      throw new ExecutionException("No document to process!");
-    }
+    if(document == null) { throw new ExecutionException(
+        "No document to process!"); }
     fireStatusChanged("Running " + this.getName() + " on " + 
document.getName());
     fireProgressChanged(0);
-
-    if (debugMode) {
+    if(debugMode) {
       System.out.println("Parsing document: " + document.getName());
     }
-
-    if (useMapping && (! mappingLoaded) ) {
+    if(useMapping && (!mappingLoaded)) {
       System.err.println("Warning: no mapping loaded!");
     }
-    
     checkInterruption();
-    if (addConstituentAnnotations || addDependencyFeatures || 
addDependencyAnnotations || addPosTags) {
+    if(addConstituentAnnotations || addDependencyFeatures
+        || addDependencyAnnotations || addPosTags) {
       parseSentences(document.getAnnotations(annotationSetName));
-    }
-    else {
+    } else {
       System.err.println("There is nothing for the parser to do.");
-      System.err.println("Please enable at least one of the \"add...\" 
options.");
+      System.err
+          .println("Please enable at least one of the \"add...\" options.");
     }
-    
     fireProcessFinished();
-    fireStatusChanged("Finished " + this.getName() + " on " + 
document.getName()
-        + " in " + NumberFormat.getInstance().format(
+    fireStatusChanged("Finished "
+        + this.getName()
+        + " on "
+        + document.getName()
+        + " in "
+        + NumberFormat.getInstance().format(
             (double)(System.currentTimeMillis() - startTime) / 1000)
-            + " seconds!");
+        + " seconds!");
   }
 
-  
   /**
-   * Initialize the Parser resource.  In particular, load the trained data
-   * file.
+   * Initialize the Parser resource. In particular, load the trained data file.
    */
   public Resource init() throws ResourceInstantiationException {
     instantiateStanfordParser();
-    if (mappingFile != null) {
+    if(mappingFile != null) {
       loadTagMapping(mappingFile);
     }
-
     super.init();
-    
-    if(tlppClass == null || tlppClass.equals("")) {
-      throw new ResourceInstantiationException(
-              "TLPP class name must be specified");
-    }
+    if(tlppClass == null || tlppClass.equals("")) { throw new 
ResourceInstantiationException(
+        "TLPP class name must be specified"); }
     try {
-      Class<?> tlppClassObj =
-              Class.forName(tlppClass);
-      if(!TreebankLangParserParams.class.isAssignableFrom(tlppClassObj)) {
-        throw new ResourceInstantiationException(tlppClassObj
-                + " does not implement "
-                + TreebankLangParserParams.class.getName());
-      }
+      Class<?> tlppClassObj = Class.forName(tlppClass);
+      if(!TreebankLangParserParams.class.isAssignableFrom(tlppClassObj)) { 
throw new ResourceInstantiationException(
+          tlppClassObj + " does not implement "
+              + TreebankLangParserParams.class.getName()); }
       TreebankLangParserParams tlpp =
-              TreebankLangParserParams.class.cast(tlppClassObj.newInstance());
+          TreebankLangParserParams.class.cast(tlppClassObj.newInstance());
       gsf = tlpp.treebankLanguagePack().grammaticalStructureFactory();
-    }
-    catch(UnsupportedOperationException e) {
+    } catch(UnsupportedOperationException e) {
       throw new ResourceInstantiationException(e);
-    }
-    catch(ClassNotFoundException e) {
+    } catch(ClassNotFoundException e) {
       throw new ResourceInstantiationException("Class " + tlppClass
-              + " not found", e);
-    }
-    catch(InstantiationException e) {
+          + " not found", e);
+    } catch(InstantiationException e) {
       throw new ResourceInstantiationException("Error creating TLPP object", 
e);
-    }
-    catch(IllegalAccessException e) {
+    } catch(IllegalAccessException e) {
       throw new ResourceInstantiationException("Error creating TLPP object", 
e);
     }
     return this;
   }
 
-
   /**
-   * Re-initialize the Parser resource.  In particular, reload the trained
-   * data file.
+   * Re-initialize the Parser resource. In particular, reload the trained data
+   * file.
    */
-  @Override 
+  @Override
   public void reInit() throws ResourceInstantiationException {
     stanfordParser = null;
     init();
-  }  
+  }
 
-
-
   /**
    * Find all the Sentence annotations and iterate through them, parsing one
    * sentence at a time and storing the result in the output AS. (Sentences are
    * scanned for Tokens. You have to run the ANNIE tokenizer and splitter 
before
    * this PR.)
-   * @throws ExecutionInterruptedException 
+   * 
+   * @throws ExecutionInterruptedException
    */
-  private void parseSentences(AnnotationSet annotationSet) throws 
ExecutionInterruptedException { 
-    List<Annotation> sentences = 
gate.Utils.inDocumentOrder(annotationSet.get(inputSentenceType));
+  private void parseSentences(AnnotationSet annotationSet)
+      throws ExecutionInterruptedException {
+    List<Annotation> sentences =
+        gate.Utils.inDocumentOrder(annotationSet.get(inputSentenceType));
     int sentencesDone = 0;
     int nbrSentences = sentences.size();
-
-    for (Annotation sentence : sentences) {
+    for(Annotation sentence : sentences) {
       parseOneSentence(annotationSet, sentence, sentencesDone, nbrSentences);
       sentencesDone++;
       checkInterruption();
     }
-    
     sentencesDone++;
     fireProgressChanged(100 * sentencesDone / nbrSentences);
+  }
 
-   }
-    
-
-
   /**
    * Generate the special data structure for one sentence and pass the List of
-   * Word to the parser.  Apply the annotations back to the document.
+   * Word to the parser. Apply the annotations back to the document.
    * 
    * @param sentence
    *          the Sentence annotation
@@ -265,106 +257,100 @@
    *          sentence number of debugging output
    * @param ofS
    *          total number of sentences for debugging output
-   * @return  null if the sentence is empty
-   * @throws ExecutionInterruptedException 
+   * @return null if the sentence is empty
+   * @throws ExecutionInterruptedException
    */
-  private void parseOneSentence(AnnotationSet annotationSet, Annotation 
sentence, int sentCtr, int sentCount) throws ExecutionInterruptedException {
+  private void parseOneSentence(AnnotationSet annotationSet,
+      Annotation sentence, int sentCtr, int sentCount)
+      throws ExecutionInterruptedException {
     Tree tree;
-    
-    StanfordSentence stanfordSentence = new StanfordSentence(sentence, 
inputTokenType, annotationSet, reusePosTags);
-    if (debugMode) {
+    StanfordSentence stanfordSentence =
+        new StanfordSentence(sentence, inputTokenType, annotationSet,
+            reusePosTags);
+    if(debugMode) {
       System.out.println(stanfordSentence.toString());
     }
-
-    /* Ignore an empty Sentence (sometimes the regex splitter can create one
+    /*
+     * Ignore an empty Sentence (sometimes the regex splitter can create one
      * with no Token annotations in it).
      */
-    if ( stanfordSentence.isNotEmpty() ) {
+    if(stanfordSentence.isNotEmpty()) {
       List<Word> wordList = stanfordSentence.getWordList();
-
-      if (reusePosTags) {
+      if(reusePosTags) {
         int nbrMissingTags = stanfordSentence.numberOfMissingPosTags();
-        if (nbrMissingTags > 0)  {
-          double percentMissing = Math.ceil(100.0 * (nbrMissingTags) /
-                  (stanfordSentence.numberOfTokens()) );
-          System.err.println("Warning (sentence " + sentCtr + "): " + (int) 
percentMissing 
-                  + "% of the Tokens are missing POS tags." );
+        if(nbrMissingTags > 0) {
+          double percentMissing =
+              Math.ceil(100.0 * (nbrMissingTags)
+                  / (stanfordSentence.numberOfTokens()));
+          System.err.println("Warning (sentence " + sentCtr + "): "
+              + (int)percentMissing + "% of the Tokens are missing POS tags.");
         }
       }
-
-      tree = stanfordParser.parse(wordList); 
+      tree = stanfordParser.parse(wordList);
       checkInterruption();
-
-      if (addConstituentAnnotations || addPosTags) {
-        annotatePhraseStructureRecursively(annotationSet, stanfordSentence, 
tree, tree);
+      if(addConstituentAnnotations || addPosTags) {
+        annotatePhraseStructureRecursively(annotationSet, stanfordSentence,
+            tree, tree);
       }
-
       checkInterruption();
-      if (addDependencyFeatures || addDependencyAnnotations) {
+      if(addDependencyFeatures || addDependencyAnnotations) {
         annotateDependencies(annotationSet, stanfordSentence, tree);
       }
-
-      if (debugMode) {
+      if(debugMode) {
         System.out.println("Parsed sentence " + sentCtr + " of " + sentCount);
       }
+    } else if(debugMode) {
+      System.out.println("Ignored empty sentence " + sentCtr + " of "
+          + sentCount);
     }
-    
-    else if (debugMode) {
-      System.out.println("Ignored empty sentence " + sentCtr + " of " + 
sentCount);
-    }
   }
 
-
   /**
-   * Generate a SyntaxTreeNode Annotation corresponding to this Tree.  Work 
-   * recursively so that the annotations are actually generated from the 
-   * bottom up, in order to build the consists list of annotation IDs.
+   * Generate a SyntaxTreeNode Annotation corresponding to this Tree. Work
+   * recursively so that the annotations are actually generated from the bottom
+   * up, in order to build the consists list of annotation IDs.
    * 
-   * @param tree  the current subtree
-   * @param rootTree  the whole sentence, used to find the span of the current 
subtree
+   * @param tree
+   *          the current subtree
+   * @param rootTree
+   *          the whole sentence, used to find the span of the current subtree
    * @return a GATE Annotation of type "SyntaxTreeNode"
    */
-  protected Annotation annotatePhraseStructureRecursively(AnnotationSet 
annotationSet, StanfordSentence stanfordSentence, Tree tree, Tree rootTree) {
+  protected Annotation annotatePhraseStructureRecursively(
+      AnnotationSet annotationSet, StanfordSentence stanfordSentence,
+      Tree tree, Tree rootTree) {
     Annotation annotation = null;
     Annotation child;
-    String label   = tree.value();
-
+    String label = tree.value();
     List<Tree> children = tree.getChildrenAsList();
-
-    if (children.size() == 0) {
-      return null;
-    }
+    if(children.size() == 0) { return null; }
     /* implied else */
-
-    /* following line generates ClassCastException
-     *                 IntPair span = tree.getSpan();
-     * edu.stanford.nlp.ling.CategoryWordTag
-     * at edu.stanford.nlp.trees.Tree.getSpan(Tree.java:393)
-     * but I think it's a bug in the parser, so I'm hacking 
-     * around it as follows. */
+    /*
+     * following line generates ClassCastException IntPair span =
+     * tree.getSpan(); edu.stanford.nlp.ling.CategoryWordTag at
+     * edu.stanford.nlp.trees.Tree.getSpan(Tree.java:393) but I think it's a 
bug
+     * in the parser, so I'm hacking around it as follows.
+     */
     int startPos = Trees.leftEdge(tree, rootTree);
-    int endPos   = Trees.rightEdge(tree, rootTree);
-    
+    int endPos = Trees.rightEdge(tree, rootTree);
     Long startNode = stanfordSentence.startPos2offset(startPos);
-    Long endNode   = stanfordSentence.endPos2offset(endPos);
-
+    Long endNode = stanfordSentence.endPos2offset(endPos);
     List<Integer> consists = new ArrayList<Integer>();
-
     Iterator<Tree> childIter = children.iterator();
-    while (childIter.hasNext()) {
-      child = annotatePhraseStructureRecursively(annotationSet, 
stanfordSentence, childIter.next(), rootTree);
-      if  ( (child != null)  &&
-        (! child.getType().equals(inputTokenType) )) {
+    while(childIter.hasNext()) {
+      child =
+          annotatePhraseStructureRecursively(annotationSet, stanfordSentence,
+              childIter.next(), rootTree);
+      if((child != null) && (!child.getType().equals(inputTokenType))) {
         consists.add(child.getId());
       }
     }
-    annotation = annotatePhraseStructureConstituent(annotationSet, startNode, 
endNode, label, consists, tree.depth());
-
+    annotation =
+        annotatePhraseStructureConstituent(annotationSet, startNode, endNode,
+            label, consists, tree.depth());
     return annotation;
   }
 
-
-
   /**
    * Record one constituent as an annotation.
    * 
@@ -375,169 +361,148 @@
    * @param depth
    * @return
    */
-  private Annotation annotatePhraseStructureConstituent(AnnotationSet 
annotationSet, Long startOffset, Long endOffset, String label, 
-    List<Integer> consists, int depth) {
+  private Annotation annotatePhraseStructureConstituent(
+      AnnotationSet annotationSet, Long startOffset, Long endOffset,
+      String label, List<Integer> consists, int depth) {
     Annotation phrAnnotation = null;
     Integer phrID;
-
     try {
       String cat;
-      if (useMapping && mappingLoaded) {
-        cat  = translateTag(label);
+      if(useMapping && mappingLoaded) {
+        cat = translateTag(label);
+      } else {
+        cat = label;
       }
-      else {
-        cat = label; 
-      }
-      
-      if (addConstituentAnnotations) {
-        String text = document.getContent().getContent(startOffset, 
endOffset).toString();
+      if(addConstituentAnnotations) {
+        String text =
+            document.getContent().getContent(startOffset, 
endOffset).toString();
         FeatureMap fm = gate.Factory.newFeatureMap();
         fm.put(PHRASE_CAT_FEATURE, cat);
         fm.put("text", text);
-
         /* Ignore empty list features on the token-equivalent annotations. */
-        if (consists.size() > 0) {
+        if(consists.size() > 0) {
           fm.put("consists", consists);
         }
-
-        phrID  = annotationSet.add(startOffset, endOffset, 
PHRASE_ANNOTATION_TYPE, fm);
+        phrID =
+            annotationSet.add(startOffset, endOffset, PHRASE_ANNOTATION_TYPE,
+                fm);
         phrAnnotation = annotationSet.get(phrID);
         recordID(annotationSet, phrID);
       }
-
-      if ( addPosTags && (depth == 1) ) {
+      if(addPosTags && (depth == 1)) {
         /* Expected to be a singleton set! */
-        AnnotationSet tokenSet = annotationSet.get(inputTokenType, 
startOffset, endOffset);
-        if (tokenSet.size() == 1) {
+        AnnotationSet tokenSet =
+            annotationSet.get(inputTokenType, startOffset, endOffset);
+        if(tokenSet.size() == 1) {
           Annotation token = tokenSet.iterator().next();
-
-          /* Add POS tag to token.  
-           * (Note: GATE/Hepple uses "(" and ")" for Penn/Stanford's
-           * "-LRB-" and "-RRB-". */
+          /*
+           * Add POS tag to token. (Note: GATE/Hepple uses "(" and ")" for
+           * Penn/Stanford's "-LRB-" and "-RRB-".
+           */
           String hepCat = StanfordSentence.unescapePosTag(cat);
           token.getFeatures().put(POS_TAG_FEATURE, hepCat);
-          
+        } else {
+          System.err.println("Found a tokenSet with " + tokenSet.size()
+              + " members!");
         }
-        else {
-          System.err.println("Found a tokenSet with " + tokenSet.size() + " 
members!");
-        }
       }
-    }
-    catch (InvalidOffsetException e) {
+    } catch(InvalidOffsetException e) {
       e.printStackTrace();
     }
-    
     return phrAnnotation;
   }
 
-  
-  
   @SuppressWarnings("unchecked")
-  private void annotateDependencies(AnnotationSet annotationSet, 
StanfordSentence stanfordSentence, Tree tree) {
+  private void annotateDependencies(AnnotationSet annotationSet,
+      StanfordSentence stanfordSentence, Tree tree) {
     GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
-    Collection<TypedDependency> dependencies = 
DependencyMode.getDependencies(gs, dependencyMode, includeExtraDependencies);
-
-    if (dependencies == null) {
-      if (debugMode) {
+    Collection<TypedDependency> dependencies =
+        DependencyMode.getDependencies(gs, dependencyMode,
+            includeExtraDependencies);
+    if(dependencies == null) {
+      if(debugMode) {
         System.out.println("dependencies == null");
       }
       return;
     }
-    
     String dependencyKind;
     FeatureMap depFeatures;
     Integer dependentTokenID, governorTokenID;
     List<Integer> argList;
     Long offsetLH0, offsetRH0, offsetLH1, offsetRH1, depLH, depRH;
     Annotation governor, dependent;
-
     for(TypedDependency dependency : dependencies) {
       if(debugMode) {
         System.out.println(dependency);
       }
-      
       // Does not work in version 3.5.2 any more
-      //int governorIndex = dependency.gov().label().index() - 1;
-      int governorIndex = dependency.gov().index()-1;
-      governor  = stanfordSentence.startPos2token(governorIndex);
-      
-      //int dependentIndex = dependency.dep().label().index() - 1;
-      int dependentIndex = dependency.dep().index()-1;
+      // int governorIndex = dependency.gov().label().index() - 1;
+      int governorIndex = dependency.gov().index() - 1;
+      governor = stanfordSentence.startPos2token(governorIndex);
+      // int dependentIndex = dependency.dep().label().index() - 1;
+      int dependentIndex = dependency.dep().index() - 1;
       dependent = stanfordSentence.startPos2token(dependentIndex);
-
       dependencyKind = dependency.reln().toString();
       governorTokenID = governor.getId();
       dependentTokenID = dependent.getId();
-      
-      if (addDependencyFeatures) {
+      if(addDependencyFeatures) {
         List<DependencyRelation> depsForTok =
-          (List<DependencyRelation>) 
governor.getFeatures().get(dependenciesFeature);
-        
+            (List<DependencyRelation>)governor.getFeatures().get(
+                dependenciesFeature);
         if(depsForTok == null) {
           depsForTok = new ArrayList<DependencyRelation>();
           governor.getFeatures().put(dependenciesFeature, depsForTok);
         }
-        
-        depsForTok.add(new DependencyRelation(dependencyKind, 
dependentTokenID));
+        depsForTok
+            .add(new DependencyRelation(dependencyKind, dependentTokenID));
       }
-      
-      if (addDependencyAnnotations) {
+      if(addDependencyAnnotations) {
         depFeatures = gate.Factory.newFeatureMap();
         argList = new ArrayList<Integer>();
         argList.add(governorTokenID);
         argList.add(dependentTokenID);
         depFeatures.put(DEPENDENCY_ARG_FEATURE, argList);
         depFeatures.put(DEPENDENCY_LABEL_FEATURE, dependencyKind);
-        
         offsetLH0 = governor.getStartNode().getOffset();
         offsetRH0 = governor.getEndNode().getOffset();
         offsetLH1 = dependent.getStartNode().getOffset();
         offsetRH1 = dependent.getEndNode().getOffset();
-        
         depLH = Math.min(offsetLH0, offsetLH1);
         depRH = Math.max(offsetRH0, offsetRH1);
-        
         try {
-          annotationSet.add(depLH, depRH, DEPENDENCY_ANNOTATION_TYPE, 
depFeatures);
+          annotationSet.add(depLH, depRH, DEPENDENCY_ANNOTATION_TYPE,
+              depFeatures);
+        } catch(InvalidOffsetException e) {
+          e.printStackTrace();
         }
-        catch(InvalidOffsetException e) {
-          e.printStackTrace();
-        }  
       }
     }
   }
 
-  
-
   private void instantiateStanfordParser()
-    throws ResourceInstantiationException {
+      throws ResourceInstantiationException {
     if(stanfordParser != null) return;
-    
     try {
-      //String filepath = Files.fileFromURL(parserFile).getAbsolutePath();
-      stanfordParser = 
LexicalizedParser.getParserFromSerializedFile(parserFile.toExternalForm());
-    }
-    catch(Exception e) {
+      // String filepath = Files.fileFromURL(parserFile).getAbsolutePath();
+      stanfordParser =
+          LexicalizedParser.getParserFromSerializedFile(parserFile
+              .toExternalForm());
+    } catch(Exception e) {
       throw new ResourceInstantiationException(e);
     }
-  }    
+  }
 
-
-  private void loadTagMapping(File mappingFile)  { 
+  private void loadTagMapping(File mappingFile) {
     tagMap = new HashMap<String, String>();
     mappingLoaded = false;
-
     try {
-      if (mappingFile.exists() && mappingFile.canRead()) {
-
+      if(mappingFile.exists() && mappingFile.canRead()) {
         BufferedReader br = new BufferedReader(new FileReader(mappingFile));
         String line = "";
-
         // read until it reaches to an end of the file
         while((line = br.readLine()) != null) {
-          // two columns delimited by whitespace 
-          String [] data = line.split("\\s+", 2);
-
+          // two columns delimited by whitespace
+          String[] data = line.split("\\s+", 2);
           // are there key and value available
           if(data == null || data.length < 2) {
             continue;
@@ -546,27 +511,22 @@
             tagMap.put(data[0].trim(), data[1].trim());
           }
         }
-
         br.close();
+      } else {
+        System.err.println("Can't find or read mapping file "
+            + mappingFile.getPath() + " so no mappings will be used.");
       }
-
-      else {
-        System.err.println("Can't find or read mapping file " 
-          + mappingFile.getPath() + " so no mappings will be used.");
-      }
-    } 
-    catch(Exception e) {
+    } catch(Exception e) {
       System.err.println("Exception trying to load mapping file "
-        + mappingFile.getPath());
+          + mappingFile.getPath());
       e.printStackTrace();
     }
-
     int nbrMapped = tagMap.size();
-    System.out.println("Loaded " + nbrMapped + " mappings from file " + 
mappingFile);
+    System.out.println("Loaded " + nbrMapped + " mappings from file "
+        + mappingFile);
     mappingLoaded = (nbrMapped > 0);
   }
 
-
   /**
    * This method stores the annotation ID as a value of feature "ID" on the
    * relevant annotation. (Mainly to make the ID visible in the GUI for
@@ -579,13 +539,11 @@
     annSet.get(annotationID).getFeatures().put("ID", annotationID);
   }
 
-  
   private void checkInterruption() throws ExecutionInterruptedException {
     if(isInterrupted()) { throw new ExecutionInterruptedException(
         "Execution of " + this.getName() + " has been abruptly interrupted!"); 
}
   }
 
-
   /**
    * Translate the tag in the map, or leave it the same if there is no
    * translation.
@@ -595,27 +553,22 @@
    */
   private String translateTag(String stanfordTag) {
     String translatedTag = stanfordTag;
-
-    if (tagMap.containsKey(stanfordTag)) {
+    if(tagMap.containsKey(stanfordTag)) {
       translatedTag = tagMap.get(stanfordTag);
     }
-    
     return translatedTag;
   }
 
-
   /* get & set methods for the CREOLE parameters */
-  @CreoleParameter(comment = "TreebankLangParserParams implementation used to 
extract the dependencies",
-      defaultValue = 
"edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams")
+  @CreoleParameter(comment = "TreebankLangParserParams implementation used to 
extract the dependencies", defaultValue = 
"edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams")
   public void setTlppClass(String tlppClass) {
     this.tlppClass = tlppClass;
   }
-  
+
   public String getTlppClass() {
     return tlppClass;
   }
 
-
   @Optional
   @RunTime
   @CreoleParameter(comment = "annotationSet used for input (Token and "
@@ -628,8 +581,7 @@
     return this.annotationSetName;
   }
 
-  @CreoleParameter(comment = "path to the parser's grammar file",
-      defaultValue = "resources/englishRNN.ser.gz")
+  @CreoleParameter(comment = "path to the parser's grammar file", defaultValue 
= "resources/englishRNN.ser.gz")
   public void setParserFile(URL parserFile) {
     this.parserFile = parserFile;
   }
@@ -649,8 +601,7 @@
   }
 
   @RunTime
-  @CreoleParameter(comment = "verbose mode for debugging",
-      defaultValue = "false")
+  @CreoleParameter(comment = "verbose mode for debugging", defaultValue = 
"false")
   public void setDebug(Boolean debug) {
     this.debugMode = debug.booleanValue();
   }
@@ -658,10 +609,9 @@
   public Boolean getDebug() {
     return new Boolean(this.debugMode);
   }
-  
+
   @RunTime
-  @CreoleParameter(comment = "Re-use existing POS tags on tokens",
-      defaultValue = "false")
+  @CreoleParameter(comment = "Re-use existing POS tags on tokens", 
defaultValue = "false")
   public void setReusePosTags(Boolean reusePosTags) {
     this.reusePosTags = reusePosTags.booleanValue();
   }
@@ -669,91 +619,79 @@
   public Boolean getReusePosTags() {
     return new Boolean(this.reusePosTags);
   }
-  
+
   @RunTime
-  @CreoleParameter(comment = "Create POS tags on the Token annotations",
-      defaultValue = "false")
+  @CreoleParameter(comment = "Create POS tags on the Token annotations", 
defaultValue = "false")
   public void setAddPosTags(Boolean posTagTokens) {
     this.addPosTags = posTagTokens.booleanValue();
   }
-  
+
   public Boolean getAddPosTags() {
     return new Boolean(this.addPosTags);
   }
 
   @RunTime
-  @CreoleParameter(comment = "use tag mapping",
-      defaultValue = "false")
+  @CreoleParameter(comment = "use tag mapping", defaultValue = "false")
   public void setUseMapping(Boolean useMapping) {
     this.useMapping = useMapping.booleanValue();
   }
-  
+
   public Boolean getUseMapping() {
     return new Boolean(this.useMapping);
   }
-  
+
   @RunTime
-  @CreoleParameter(comment = "Create dependency features on Token annotations",
-      defaultValue = "true")
+  @CreoleParameter(comment = "Create dependency features on Token 
annotations", defaultValue = "true")
   public void setAddDependencyFeatures(Boolean useDependency) {
     this.addDependencyFeatures = useDependency.booleanValue();
   }
-  
+
   public Boolean getAddDependencyFeatures() {
     return new Boolean(this.addDependencyFeatures);
   }
-  
+
   @RunTime
-  @CreoleParameter(comment = "Create annotations to show dependencies",
-      defaultValue = "true")
+  @CreoleParameter(comment = "Create annotations to show dependencies", 
defaultValue = "true")
   public void setAddDependencyAnnotations(Boolean useDependency) {
     this.addDependencyAnnotations = useDependency.booleanValue();
   }
-  
+
   public Boolean getAddDependencyAnnotations() {
     return new Boolean(this.addDependencyAnnotations);
   }
-  
-  
+
   @RunTime
-  @CreoleParameter(comment = "input annotation type for each sentence",
-      defaultValue = ANNIEConstants.SENTENCE_ANNOTATION_TYPE )
+  @CreoleParameter(comment = "input annotation type for each sentence", 
defaultValue = ANNIEConstants.SENTENCE_ANNOTATION_TYPE)
   public void setInputSentenceType(String sType) {
     this.inputSentenceType = sType;
   }
-  
+
   public String getInputSentenceType() {
     return this.inputSentenceType;
   }
-  
 
   @RunTime
-  @CreoleParameter(comment = "input annotation type for each token",
-      defaultValue = ANNIEConstants.TOKEN_ANNOTATION_TYPE )
+  @CreoleParameter(comment = "input annotation type for each token", 
defaultValue = ANNIEConstants.TOKEN_ANNOTATION_TYPE)
   public void setInputTokenType(String tType) {
     this.inputTokenType = tType;
   }
-  
+
   public String getInputTokenType() {
     return this.inputTokenType;
   }
 
-  
   @RunTime
-  @CreoleParameter(comment = "Create annotations to show phrase structures",
-      defaultValue = "true")
+  @CreoleParameter(comment = "Create annotations to show phrase structures", 
defaultValue = "true")
   public void setAddConstituentAnnotations(Boolean usePhraseStructure) {
     this.addConstituentAnnotations = usePhraseStructure.booleanValue();
   }
-  
+
   public Boolean getAddConstituentAnnotations() {
     return new Boolean(this.addConstituentAnnotations);
   }
-  
-  
+
   @RunTime
-  @CreoleParameter(comment = "Dependency Mode",
-      defaultValue = "Typed")
+  @CreoleParameter(comment = "Dependency Mode", defaultValue = "Typed")
   public void setDependencyMode(DependencyMode mode) {
     this.dependencyMode = mode;
   }
@@ -761,38 +699,34 @@
   public DependencyMode getDependencyMode() {
     return this.dependencyMode;
   }
-  
+
   @RunTime
-  @CreoleParameter(comment = "include extra dependencies",
-      defaultValue = "false")
+  @CreoleParameter(comment = "include extra dependencies", defaultValue = 
"false")
   public void setIncludeExtraDependencies(Boolean include) {
     this.includeExtraDependencies = include;
   }
-  
+
   public Boolean getIncludeExtraDependencies() {
     return this.includeExtraDependencies;
   }
-  
-  
-  /* Made mappingFile an init parameter to simplify things.
-   * The CREOLE parameter is called "mappingFile" but it's actually a URL.
+
+  /*
+   * Made mappingFile an init parameter to simplify things. The CREOLE 
parameter
+   * is called "mappingFile" but it's actually a URL.
    */
   @Optional
   @CreoleParameter(comment = "path to the tag mapping file")
   public void setMappingFile(URL mappingFileURL) {
     this.mappingFile = null; // override below
     this.mappingFileURL = mappingFileURL;
-
-    if ( (this.mappingFileURL != null) &&
-      (! this.mappingFileURL.toString().trim().equals("")) ) {
+    if((this.mappingFileURL != null)
+        && (!this.mappingFileURL.toString().trim().equals(""))) {
       try {
         this.mappingFile = new File(this.mappingFileURL.toURI());
-      }
-      catch(URISyntaxException e) {
+      } catch(URISyntaxException e) {
         e.printStackTrace();
       }
     }
-
   }
 
   public URL getMappingFile() {
@@ -800,22 +734,21 @@
   }
 
   /**
-   * Inject an existing instance of the LexicalizedParser.
-   * <b>This method is intended for use by {@link Factory#ducplicate}
-   * and should not be called directly.</b>
+   * Inject an existing instance of the LexicalizedParser. <b>This method is
+   * intended for use by {@link Factory#ducplicate} and should not be called
+   * directly.</b>
    */
   @Sharable
   public void setStanfordParser(LexicalizedParser parser) {
     this.stanfordParser = parser;
   }
-  
+
   /**
-   * Get the LexicalizedParser used internally by this PR.
-   * <b>This method is intended for use by {@link Factory#ducplicate}
-   * and should not be called directly.</b>
+   * Get the LexicalizedParser used internally by this PR. <b>This method is
+   * intended for use by {@link Factory#ducplicate} and should not be called
+   * directly.</b>
    */
   public LexicalizedParser getStanfordParser() {
     return stanfordParser;
   }
-
 }

Modified: 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/StanfordSentence.java
===================================================================
--- 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/StanfordSentence.java
   2016-08-19 16:50:05 UTC (rev 19525)
+++ 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/StanfordSentence.java
   2016-08-19 17:01:15 UTC (rev 19526)
@@ -14,8 +14,8 @@
  * 
  * You should have received a copy of the GNU General Public License along with
  * this program. If not, see <http://www.gnu.org/licenses/>.
- *
- *  $Id: StanfordSentence.java 15600 2012-03-19 15:40:56Z adamfunk $
+ * 
+ * $Id: StanfordSentence.java 15600 2012-03-19 15:40:56Z adamfunk $
  */
 package gate.stanford;
 
@@ -28,210 +28,174 @@
 import gate.util.Strings;
 
 /**
- * The Stanford Parser itself takes as input a List of 
edu.stanford.nlp.ling.Word.
- * This data structure is constructed from a Sentence Annotation, using the 
enclosed
- * Token Annotations, and yields the required List, as well as methods for
- * converting the parser's output spans into GATE Annotation offsets.
+ * The Stanford Parser itself takes as input a List of
+ * edu.stanford.nlp.ling.Word. This data structure is constructed from a
+ * Sentence Annotation, using the enclosed Token Annotations, and yields the
+ * required List, as well as methods for converting the parser's output spans
+ * into GATE Annotation offsets.
  */
 public class StanfordSentence {
-  
   private Map<Integer, Long> startPosToOffset;
+
   private Map<Integer, Long> endPosToOffset;
+
   private Map<Integer, Annotation> startPosToToken;
+
   private Map<Integer, String> startPosToString;
-  private List<Word>         words;
-  private Long               sentenceStartOffset, sentenceEndOffset;
-  private List<Annotation>   tokens;
 
-  private static final String  POS_TAG_FEATURE    = 
ANNIEConstants.TOKEN_CATEGORY_FEATURE_NAME;
-  private static final String  STRING_FEATURE     = 
ANNIEConstants.TOKEN_STRING_FEATURE_NAME;
-  
+  private List<Word> words;
+
+  private Long sentenceStartOffset, sentenceEndOffset;
+
+  private List<Annotation> tokens;
+
+  private static final String POS_TAG_FEATURE =
+      ANNIEConstants.TOKEN_CATEGORY_FEATURE_NAME;
+
+  private static final String STRING_FEATURE =
+      ANNIEConstants.TOKEN_STRING_FEATURE_NAME;
+
   int nbrOfTokens, nbrOfMissingPosTags;
-  
-  
-  /* This is probably dodgy, but I can't find an "unknown" tag 
-   * in the Penn documentation.    */
-  private static final String  UNKNOWN_TAG     = "NN";
-  
 
-  public StanfordSentence(Annotation sentence, String tokenType, 
-    AnnotationSet inputAS, boolean usePosTags) {
-    
+  /*
+   * This is probably dodgy, but I can't find an "unknown" tag in the Penn
+   * documentation.
+   */
+  private static final String UNKNOWN_TAG = "NN";
+
+  public StanfordSentence(Annotation sentence, String tokenType,
+      AnnotationSet inputAS, boolean usePosTags) {
     startPosToOffset = new HashMap<Integer, Long>();
-    endPosToOffset   = new HashMap<Integer, Long>();
-    startPosToToken  = new HashMap<Integer, Annotation>();
+    endPosToOffset = new HashMap<Integer, Long>();
+    startPosToToken = new HashMap<Integer, Annotation>();
     startPosToString = new HashMap<Integer, String>();
-    
     sentenceStartOffset = sentence.getStartNode().getOffset();
-    sentenceEndOffset   = sentence.getEndNode().getOffset();
-   
-    nbrOfTokens   = 0;
+    sentenceEndOffset = sentence.getEndNode().getOffset();
+    nbrOfTokens = 0;
     nbrOfMissingPosTags = 0;
-    
-    tokens = Utils.inDocumentOrder(inputAS.getContained(sentenceStartOffset, 
sentenceEndOffset).get(tokenType));
+    tokens =
+        Utils.inDocumentOrder(inputAS.getContained(sentenceStartOffset,
+            sentenceEndOffset).get(tokenType));
     words = new ArrayList<Word>();
-
     add(-1, sentence, "S");
-    
     int tokenNo = 0;
-
-    for (Annotation token : tokens) {
-      String tokenString = 
escapeToken(token.getFeatures().get(STRING_FEATURE).toString());
+    for(Annotation token : tokens) {
+      String tokenString =
+          escapeToken(token.getFeatures().get(STRING_FEATURE).toString());
       add(tokenNo, token, tokenString);
-      
-      /* The FAQ says the parser will automatically use existing POS tags
-       * if the List elements are of type TaggedWord.  
+      /*
+       * The FAQ says the parser will automatically use existing POS tags if 
the
+       * List elements are of type TaggedWord.
        * http://nlp.stanford.edu/software/parser-faq.shtml#f
        */
-      
-      if (usePosTags)  {
+      if(usePosTags) {
         words.add(new TaggedWord(tokenString, getEscapedPosTag(token)));
-      }
-      else {
+      } else {
         words.add(new Word(tokenString));
       }
-
       tokenNo++;
     }
-    
     nbrOfTokens = tokenNo;
   }
 
-  
   public String toString() {
     StringBuffer output = new StringBuffer();
-    output.append("S: 
").append(Strings.toString(startPosToOffset)).append('\n');
-    output.append("   
").append(Strings.toString(startPosToString)).append('\n');
+    output.append("S: ").append(Strings.toString(startPosToOffset))
+        .append('\n');
+    output.append("   ").append(Strings.toString(startPosToString))
+        .append('\n');
     output.append("   ").append(Strings.toString(endPosToOffset));
     return output.toString();
   }
-  
-  
-  private String getEscapedPosTag(Annotation token)  {
+
+  private String getEscapedPosTag(Annotation token) {
     String pos = UNKNOWN_TAG;
     FeatureMap tokenFeatures = token.getFeatures();
-
-    if (tokenFeatures.containsKey(POS_TAG_FEATURE)) {
+    if(tokenFeatures.containsKey(POS_TAG_FEATURE)) {
       Object temp = tokenFeatures.get(POS_TAG_FEATURE);
-      
-      if (temp instanceof String) {
-        pos = (String) temp;
-      }
-      else {
+      if(temp instanceof String) {
+        pos = (String)temp;
+      } else {
         nbrOfMissingPosTags++;
       }
-      
-    }
-    else {
+    } else {
       nbrOfMissingPosTags++;
     }
-    
     return escapePosTag(pos);
   }
-  
 
-
   private void add(int tokenNbr, Annotation token, String tokenString) {
     Long tokenStartOffset = token.getStartNode().getOffset();
-    Long tokenEndOffset   = token.getEndNode().getOffset();
-
+    Long tokenEndOffset = token.getEndNode().getOffset();
     startPosToOffset.put(tokenNbr, tokenStartOffset);
     endPosToOffset.put(new Integer(tokenNbr + 1), tokenEndOffset);
     startPosToToken.put(tokenNbr, token);
     startPosToString.put(tokenNbr, tokenString);
   }
-  
 
-  
-  /* Explanation of the position conversion:
-   * The output of the Stanford Parser specifies each constituent's span in 
terms of 
-   * token boundaries re-numbered within each sentence, which we need to 
convert to 
-   * GATE character offsets within the whole document.
+  /*
+   * Explanation of the position conversion: The output of the Stanford Parser
+   * specifies each constituent's span in terms of token boundaries re-numbered
+   * within each sentence, which we need to convert to GATE character offsets
+   * within the whole document.
    * 
-   * Example: 
-   * "This is a test." starting at document offset 100, containing five tokens.
-   * Stanford says "This" starts at 0 and ends at 1; GATE says 100 to 104.
-   * Stanford says "is a test" starts at 1 and ends at 4;
-   * GATE says 105 to 114.
+   * Example: "This is a test." starting at document offset 100, containing 
five
+   * tokens. Stanford says "This" starts at 0 and ends at 1; GATE says 100 to
+   * 104. Stanford says "is a test" starts at 1 and ends at 4; GATE says 105 to
+   * 114.
    */
-  
-  
   public int numberOfTokens() {
     return nbrOfTokens;
   }
-  
+
   public int numberOfMissingPosTags() {
     return nbrOfMissingPosTags;
   }
-  
+
   public boolean isNotEmpty() {
     return (nbrOfTokens > 0);
   }
-  
-  
+
   /**
-   * Change the Token's string to match the Penn Treebank's 
-   * escaping system.
-   * See Stanford parser FAQ "How can I provide the correct tokenization of my 
-   * sentence to the parser?"  
-
-   * @param token original string feature of Token
+   * Change the Token's string to match the Penn Treebank's escaping system. 
See
+   * Stanford parser FAQ "How can I provide the correct tokenization of my
+   * sentence to the parser?"
+   * 
+   * @param token
+   *          original string feature of Token
    * @return escaped version of string
    */
   protected static String escapeToken(String token) {
-    //   (  -->  -LRB-
-    if (token.equals("(")) {
-      return "-LRB-";
-    }
-    
-    //   )  -->  -RRB-
-    if (token.equals(")")) {
-      return "-RRB-";
-    }
-    
-    //   /  -->  \/
-    //   *  -->  \*
-    if (token.contains("/") || token.contains("*")) {
-      return token.replace("/", "\\/").replace("*", "\\*");
-    }
-    
+    // ( --> -LRB-
+    if(token.equals("(")) { return "-LRB-"; }
+    // ) --> -RRB-
+    if(token.equals(")")) { return "-RRB-"; }
+    // / --> \/
+    // * --> \*
+    if(token.contains("/") || token.contains("*")) { return token.replace("/",
+        "\\/").replace("*", "\\*"); }
     return token;
   }
-  
 
   protected static String escapePosTag(String tag) {
-    //   (  -->  -LRB-
-    if (tag.equals("(")) {
-      return "-LRB-";
-    }
-    
-    //   )  -->  -RRB-
-    if (tag.equals(")")) {
-      return "-RRB-";
-    }
-    
+    // ( --> -LRB-
+    if(tag.equals("(")) { return "-LRB-"; }
+    // ) --> -RRB-
+    if(tag.equals(")")) { return "-RRB-"; }
     return tag;
   }
 
-  
   protected static String unescapePosTag(String tag) {
-    //   (  <--  -LRB-
-    if (tag.equals("-LRB-")) {
-      return "(";
-    }
-    
-    //   )  <--  -RRB-
-    if (tag.equals("-RRB-")) {
-      return ")";
-    }
-    
+    // ( <-- -LRB-
+    if(tag.equals("-LRB-")) { return "("; }
+    // ) <-- -RRB-
+    if(tag.equals("-RRB-")) { return ")"; }
     return tag;
   }
-  
 
   /**
-   * Convert a Stanford start position to the GATE Annotation of type
-   * "Token" that starts there.
+   * Convert a Stanford start position to the GATE Annotation of type "Token"
+   * that starts there.
    */
   public Annotation startPos2token(int startPos) {
     return startPosToToken.get(startPos);
@@ -239,6 +203,7 @@
 
   /**
    * Convert a Stanford start position to a GATE offset.
+   * 
    * @param startPos
    * @return the offset in the GATE document
    */
@@ -248,6 +213,7 @@
 
   /**
    * Convert a Stanford end position to a GATE offset.
+   * 
    * @param endPos
    * @return the offset in the GATE document
    */
@@ -255,7 +221,6 @@
     return endPosToOffset.get(endPos);
   }
 
-  
   /**
    * @return The data structure that is passed to the Stanford Parser itself.
    */

Modified: 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/Tagger.java
===================================================================
--- 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/Tagger.java
     2016-08-19 16:50:05 UTC (rev 19525)
+++ 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/Tagger.java
     2016-08-19 17:01:15 UTC (rev 19526)
@@ -19,7 +19,6 @@
  * 
  * $Id: Tagger.java 15468 2012-02-25 14:41:15Z $
  */
-
 package gate.stanford;
 
 import edu.stanford.nlp.ling.TaggedWord;
@@ -56,9 +55,8 @@
 /**
  * This class is a wrapper for the Stanford PoS tagger v3.2.0.
  */
-@CreoleResource(name = "Stanford POS Tagger", comment = "Stanford 
Part-of-Speech Tagger", icon = "pos-tagger", 
helpURL="http://gate.ac.uk/userguide/sec:misc:creole:stanford";)
+@CreoleResource(name = "Stanford POS Tagger", comment = "Stanford 
Part-of-Speech Tagger", icon = "pos-tagger", helpURL = 
"http://gate.ac.uk/userguide/sec:misc:creole:stanford";)
 public class Tagger extends AbstractLanguageAnalyser {
-
   private static final long serialVersionUID = -6001372186847970081L;
 
   public static final String TAG_DOCUMENT_PARAMETER_NAME = "document";
@@ -68,13 +66,13 @@
   public static final String TAG_ENCODING_PARAMETER_NAME = "encoding";
 
   public static final String BASE_TOKEN_ANNOTATION_TYPE_PARAMETER_NAME =
-    "baseTokenAnnotationType";
+      "baseTokenAnnotationType";
 
   public static final String OUTPUT_ANNOTATION_TYPE_PARAMETER_NAME =
-    "outputAnnotationType";
+      "outputAnnotationType";
 
   public static final String BASE_SENTENCE_ANNOTATION_TYPE_PARAMETER_NAME =
-    "baseSentenceAnnotationType";
+      "baseSentenceAnnotationType";
 
   public static final String TAG_OUTPUT_AS_PARAMETER_NAME = "outputASName";
 
@@ -108,9 +106,9 @@
 
   @RunTime
   @Optional
-  @CreoleParameter(comment = "Should existing " + TOKEN_CATEGORY_FEATURE_NAME +
-     " features on input annotations be respected (true) or ignored (false)?",
-     defaultValue = "true")
+  @CreoleParameter(comment = "Should existing "
+      + TOKEN_CATEGORY_FEATURE_NAME
+      + " features on input annotations be respected (true) or ignored 
(false)?", defaultValue = "true")
   public void setUseExistingTags(Boolean useTags) {
     useExistingTags = useTags;
   }
@@ -118,6 +116,7 @@
   public Boolean getUseExistingTags() {
     return useExistingTags;
   }
+
   private Boolean useExistingTags;
 
   protected Logger logger = Logger.getLogger(this.getClass().getName());
@@ -145,43 +144,34 @@
     // check the parameters
     if(document == null)
       throw new ExecutionException("No document to process!");
-
     AnnotationSet inputAS = document.getAnnotations(inputASName);
-
-    if(baseTokenAnnotationType == null ||
-      baseTokenAnnotationType.trim().length() == 0) { throw new 
ExecutionException(
-      "No base Token Annotation Type provided!"); }
-
-    if(baseSentenceAnnotationType == null ||
-      baseSentenceAnnotationType.trim().length() == 0) { throw new 
ExecutionException(
-      "No base Sentence Annotation Type provided!"); }
-
-    if(outputAnnotationType == null ||
-      outputAnnotationType.trim().length() == 0) { throw new 
ExecutionException(
-      "No AnnotationType provided to store the new feature!"); }
-
+    if(baseTokenAnnotationType == null
+        || baseTokenAnnotationType.trim().length() == 0) { throw new 
ExecutionException(
+        "No base Token Annotation Type provided!"); }
+    if(baseSentenceAnnotationType == null
+        || baseSentenceAnnotationType.trim().length() == 0) { throw new 
ExecutionException(
+        "No base Sentence Annotation Type provided!"); }
+    if(outputAnnotationType == null
+        || outputAnnotationType.trim().length() == 0) { throw new 
ExecutionException(
+        "No AnnotationType provided to store the new feature!"); }
     AnnotationSet sentencesAS = inputAS.get(baseSentenceAnnotationType);
     AnnotationSet tokensAS = inputAS.get(baseTokenAnnotationType);
-    if(sentencesAS != null && sentencesAS.size() > 0 && tokensAS != null &&
-      tokensAS.size() > 0) {
+    if(sentencesAS != null && sentencesAS.size() > 0 && tokensAS != null
+        && tokensAS.size() > 0) {
       long startTime = System.currentTimeMillis();
       fireStatusChanged("POS tagging " + document.getName());
       fireProgressChanged(0);
       // prepare the input for MaxentTagger
       List<Word> sentenceForTagger = new ArrayList<Word>();
-
       // define a comparator for annotations by start offset
       OffsetComparator offsetComparator = new OffsetComparator();
-
       // read all the tokens and all the sentences
       List<Annotation> sentencesList = new ArrayList<Annotation>(sentencesAS);
       Collections.sort(sentencesList, offsetComparator);
       List<Annotation> tokensList = new ArrayList<Annotation>(tokensAS);
       Collections.sort(tokensList, offsetComparator);
-
       Iterator<Annotation> sentencesIter = sentencesList.iterator();
       ListIterator<Annotation> tokensIter = tokensList.listIterator();
-
       List<Annotation> tokensInCurrentSentence = new ArrayList<Annotation>();
       Annotation currentToken = tokensIter.next();
       int sentIndex = 0;
@@ -190,131 +180,125 @@
         Annotation currentSentence = sentencesIter.next();
         tokensInCurrentSentence.clear();
         sentenceForTagger.clear();
-        while(currentToken != null &&
-          currentToken.getEndNode().getOffset()
-            .compareTo(currentSentence.getEndNode().getOffset()) <= 0) {
+        while(currentToken != null
+            && currentToken.getEndNode().getOffset()
+                .compareTo(currentSentence.getEndNode().getOffset()) <= 0) {
           // If we're only POS tagging Tokens within 
baseSentenceAnnotationType,
           // don't add the sentence if the Tokens aren't within the span of
           // baseSentenceAnnotationType
           if(posTagAllTokens || currentToken.withinSpanOf(currentSentence)) {
             tokensInCurrentSentence.add(currentToken);
-
-            if(useExistingTags && currentToken.getFeatures().containsKey(
-                  TOKEN_CATEGORY_FEATURE_NAME)) {
-              sentenceForTagger.add(new TaggedWord(
-                    (String)currentToken.getFeatures()
-                      .get(TOKEN_STRING_FEATURE_NAME),
-                    (String)currentToken.getFeatures()
-                      .get(TOKEN_CATEGORY_FEATURE_NAME)));
+            if(useExistingTags
+                && currentToken.getFeatures().containsKey(
+                    TOKEN_CATEGORY_FEATURE_NAME)) {
+              sentenceForTagger.add(new TaggedWord((String)currentToken
+                  .getFeatures().get(TOKEN_STRING_FEATURE_NAME),
+                  (String)currentToken.getFeatures().get(
+                      TOKEN_CATEGORY_FEATURE_NAME)));
             } else {
               sentenceForTagger.add(new Word((String)currentToken.getFeatures()
-                .get(TOKEN_STRING_FEATURE_NAME)));
+                  .get(TOKEN_STRING_FEATURE_NAME)));
             }
           }
           currentToken = (tokensIter.hasNext() ? tokensIter.next() : null);
         }
-
         // if the sentence doesn't contain any tokens (which is a bit weird but
         // is possible) then don't try running the POS tagger as you will get 
an
         // array index out of bounds exception
         if(sentenceForTagger.isEmpty()) continue;
-
         // run the POS tagger
         List<TaggedWord> taggerResults =
-          tagger.tagSentence(sentenceForTagger, useExistingTags);
-
+            tagger.tagSentence(sentenceForTagger, useExistingTags);
         // add the results
         // make sure no malfunction occurred
         if(taggerResults.size() != tokensInCurrentSentence.size())
           throw new ExecutionException(
-            "POS Tagger malfunction: the output size (" +
-              taggerResults.size() + ") is different from the input size (" +
-              tokensInCurrentSentence.size() + ")!");
+              "POS Tagger malfunction: the output size ("
+                  + taggerResults.size()
+                  + ") is different from the input size ("
+                  + tokensInCurrentSentence.size() + ")!");
         Iterator<TaggedWord> resIter = taggerResults.iterator();
         Iterator<Annotation> tokIter = tokensInCurrentSentence.iterator();
         while(resIter.hasNext()) {
           Annotation annot = tokIter.next();
-          addFeatures(annot, TOKEN_CATEGORY_FEATURE_NAME, 
(resIter.next().tag()));
+          addFeatures(annot, TOKEN_CATEGORY_FEATURE_NAME,
+              (resIter.next().tag()));
         }
         fireProgressChanged(sentIndex++ * 100 / sentCnt);
       }// while(sentencesIter.hasNext())
-
       if(currentToken != null && posTagAllTokens) {
         // Tag remaining Tokens if we are not considering those only within
         // baseSentenceAnnotationType
-
         // we have remaining tokens after the last sentence
         tokensInCurrentSentence.clear();
         sentenceForTagger.clear();
         while(currentToken != null) {
           tokensInCurrentSentence.add(currentToken);
-          if(useExistingTags && currentToken.getFeatures().containsKey(
-                TOKEN_CATEGORY_FEATURE_NAME)) {
-            sentenceForTagger.add(new TaggedWord(
-                  (String)currentToken.getFeatures()
-                    .get(TOKEN_STRING_FEATURE_NAME),
-                  (String)currentToken.getFeatures()
-                    .get(TOKEN_CATEGORY_FEATURE_NAME)));
+          if(useExistingTags
+              && currentToken.getFeatures().containsKey(
+                  TOKEN_CATEGORY_FEATURE_NAME)) {
+            sentenceForTagger.add(new TaggedWord((String)currentToken
+                .getFeatures().get(TOKEN_STRING_FEATURE_NAME),
+                (String)currentToken.getFeatures().get(
+                    TOKEN_CATEGORY_FEATURE_NAME)));
           } else {
             sentenceForTagger.add(new Word((String)currentToken.getFeatures()
-              .get(TOKEN_STRING_FEATURE_NAME)));
+                .get(TOKEN_STRING_FEATURE_NAME)));
           }
           currentToken = (tokensIter.hasNext() ? tokensIter.next() : null);
         }
-
         // run the POS tagger on remaining tokens
         List<TaggedWord> taggerResults =
-          tagger.tagSentence(sentenceForTagger, useExistingTags);
-
+            tagger.tagSentence(sentenceForTagger, useExistingTags);
         // add the results and make sure no malfunction occurred
         if(taggerResults.size() != tokensInCurrentSentence.size())
           throw new ExecutionException(
-            "POS Tagger malfunction: the output size (" + taggerResults.size() 
+
-              ") is different from the input size (" +
-              tokensInCurrentSentence.size() + ")!");
+              "POS Tagger malfunction: the output size ("
+                  + taggerResults.size()
+                  + ") is different from the input size ("
+                  + tokensInCurrentSentence.size() + ")!");
         Iterator<TaggedWord> resIter = taggerResults.iterator();
         Iterator<Annotation> tokIter = tokensInCurrentSentence.iterator();
         while(resIter.hasNext()) {
           Annotation annot = tokIter.next();
-          addFeatures(annot, TOKEN_CATEGORY_FEATURE_NAME, 
(resIter.next().tag()));
+          addFeatures(annot, TOKEN_CATEGORY_FEATURE_NAME,
+              (resIter.next().tag()));
         }
       }// if(currentToken != null)
       fireProcessFinished();
-      fireStatusChanged(document.getName() +
-        " tagged in " +
-        NumberFormat.getInstance().format(
-          (double)(System.currentTimeMillis() - startTime) / 1000) +
-        " seconds!");
+      fireStatusChanged(document.getName()
+          + " tagged in "
+          + NumberFormat.getInstance().format(
+              (double)(System.currentTimeMillis() - startTime) / 1000)
+          + " seconds!");
     } else {
       if(failOnMissingInputAnnotations) {
         throw new ExecutionException(
-          "No sentences or tokens to process in document " +
-            document.getName() + "\n" + "Please run a sentence splitter " +
-            "and tokeniser first!");
+            "No sentences or tokens to process in document "
+                + document.getName() + "\n" + "Please run a sentence splitter "
+                + "and tokeniser first!");
       } else {
         Utils
-          .logOnce(
-            logger,
-            Level.INFO,
-            "POS tagger: no sentence or token annotations in input document - 
see debug log for details.");
+            .logOnce(
+                logger,
+                Level.INFO,
+                "POS tagger: no sentence or token annotations in input 
document - see debug log for details.");
         logger.debug("No input annotations in document " + document.getName());
       }
     }
-
   }
 
   protected void addFeatures(Annotation annot, String featureName,
-                             String featureValue) throws GateRuntimeException {
+      String featureValue) throws GateRuntimeException {
     String tempIASN = inputASName == null ? "" : inputASName;
     String tempOASN = outputASName == null ? "" : outputASName;
-    if(outputAnnotationType.equals(baseTokenAnnotationType) &&
-      tempIASN.equals(tempOASN)) {
+    if(outputAnnotationType.equals(baseTokenAnnotationType)
+        && tempIASN.equals(tempOASN)) {
       annot.getFeatures().put(featureName, featureValue);
       return;
     } else {
       int start = annot.getStartNode().getOffset().intValue();
       int end = annot.getEndNode().getOffset().intValue();
-
       // get the annotations of type outputAnnotationType
       AnnotationSet outputAS = document.getAnnotations(outputASName);
       AnnotationSet annotations = outputAS.get(outputAnnotationType);
@@ -324,7 +308,7 @@
         features.put(featureName, featureValue);
         try {
           outputAS.add(new Long(start), new Long(end), outputAnnotationType,
-            features);
+              features);
         } catch(Exception e) {
           throw new GateRuntimeException("Invalid Offsets");
         }
@@ -332,26 +316,25 @@
         // search for the annotation if there is one with the same start and 
end
         // offsets
         ArrayList<Annotation> tempList =
-          new ArrayList<Annotation>(annotations.get());
+            new ArrayList<Annotation>(annotations.get());
         boolean found = false;
         for(int i = 0; i < tempList.size(); i++) {
           Annotation annotation = tempList.get(i);
-          if(annotation.getStartNode().getOffset().intValue() == start &&
-            annotation.getEndNode().getOffset().intValue() == end) {
+          if(annotation.getStartNode().getOffset().intValue() == start
+              && annotation.getEndNode().getOffset().intValue() == end) {
             // this is the one
             annotation.getFeatures().put(featureName, featureValue);
             found = true;
             break;
           }
         }
-
         if(!found) {
           // add new annotation
           FeatureMap features = Factory.newFeatureMap();
           features.put(featureName, featureValue);
           try {
             outputAS.add(new Long(start), new Long(end), outputAnnotationType,
-              features);
+                features);
           } catch(Exception e) {
             throw new GateRuntimeException("Invalid Offsets");
           }
@@ -420,7 +403,7 @@
     this.outputASName = outputASName;
   }
 
-  @CreoleParameter(comment = "Path to the tagger's model file", defaultValue = 
"resources/english-left3words-distsim.tagger", suffixes="tagger;model")
+  @CreoleParameter(comment = "Path to the tagger's model file", defaultValue = 
"resources/english-left3words-distsim.tagger", suffixes = "tagger;model")
   public void setModelFile(URL modelFile) {
     this.modelFile = modelFile;
   }

Modified: 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/Tokenizer.java
===================================================================
--- 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/Tokenizer.java
  2016-08-19 16:50:05 UTC (rev 19525)
+++ 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/Tokenizer.java
  2016-08-19 17:01:15 UTC (rev 19526)
@@ -19,10 +19,8 @@
  * 
  * $Id: Tokenizer.java 15468 2013-10-22 21:13:15Z $
  */
-
 package gate.stanford;
 
-
 import edu.stanford.nlp.ling.CoreLabel;
 import edu.stanford.nlp.process.CoreLabelTokenFactory;
 import edu.stanford.nlp.process.PTBTokenizer;
@@ -46,9 +44,8 @@
 /**
  * This class is a wrapper for the Stanford Tokenizer v3.2.0.
  */
-@CreoleResource(name = "Stanford PTB Tokenizer", comment = "Stanford Penn 
Treebank v3 Tokenizer, for English", icon = "tokeniser", 
helpURL="http://gate.ac.uk/userguide/sec:misc:creole:stanford";)
+@CreoleResource(name = "Stanford PTB Tokenizer", comment = "Stanford Penn 
Treebank v3 Tokenizer, for English", icon = "tokeniser", helpURL = 
"http://gate.ac.uk/userguide/sec:misc:creole:stanford";)
 public class Tokenizer extends AbstractLanguageAnalyser {
-
   private static final long serialVersionUID = -6001371186847970080L;
 
   public static final String TAG_DOCUMENT_PARAMETER_NAME = "document";
@@ -95,94 +92,65 @@
     // check the parameters
     if(document == null)
       throw new ExecutionException("No document to process!");
-
-    AnnotationSet inputAS = document.getAnnotations(inputASName);
+    
     AnnotationSet outputAS = document.getAnnotations(outputASName);
-
-
     long startTime = System.currentTimeMillis();
     fireStatusChanged("Tokenising " + document.getName());
-    fireProgressChanged(0); 
-
-
+    fireProgressChanged(0);
     // tokenising goes here
     String rawText = "";
     try {
-      rawText = document.getContent().getContent(new Long(0), 
document.getContent().size()).toString();
-    } catch (Exception e) {
+      rawText =
+          document.getContent()
+              .getContent(new Long(0), 
document.getContent().size()).toString();
+    } catch(Exception e) {
       System.out.println("Document content offsets wrong: " + e);
     }
-
     PTBTokenizer<CoreLabel> ptbt;
     try {
-      ptbt = new PTBTokenizer<CoreLabel>(new StringReader(rawText), new 
CoreLabelTokenFactory(), "invertible=true");
-    } catch (Exception e) {
+      ptbt =
+          new PTBTokenizer<CoreLabel>(new StringReader(rawText),
+              new CoreLabelTokenFactory(), "invertible=true");
+    } catch(Exception e) {
       System.out.println("Failed when calling tokenizer: " + e);
       return;
     }
-
     Long tokenStart;
     Long tokenEnd;
-    Long prevTokenEnd = new Long(0); // this default value lets us capture 
leading spaces
-
-    for (CoreLabel label; ptbt.hasNext(); ) {
+    Long prevTokenEnd = new Long(0); // this default value lets us capture
+                                     // leading spaces
+    for(CoreLabel label; ptbt.hasNext();) {
       label = ptbt.next();
       tokenStart = new Long(label.beginPosition());
       tokenEnd = new Long(label.endPosition());
-
-
       SimpleFeatureMapImpl tokenMap = new SimpleFeatureMapImpl();
-
       // add the token annotation
       try {
-        tokenMap.put(TOKEN_STRING_FEATURE, 
document.getContent().getContent(tokenStart, tokenEnd).toString());
+        tokenMap.put(TOKEN_STRING_FEATURE,
+            document.getContent().getContent(tokenStart, tokenEnd).toString());
         outputAS.add(tokenStart, tokenEnd, tokenLabel, tokenMap);
-      } catch (InvalidOffsetException e) {
+      } catch(InvalidOffsetException e) {
         System.out.println("Token alignment problem:" + e);
       }
-
       // do we need to add a space annotation?
-      if (tokenStart > prevTokenEnd) {
+      if(tokenStart > prevTokenEnd) {
         try {
-          outputAS.add(prevTokenEnd, tokenStart, spaceLabel, new 
SimpleFeatureMapImpl());
-        } catch (InvalidOffsetException e) {
+          outputAS.add(prevTokenEnd, tokenStart, spaceLabel,
+              new SimpleFeatureMapImpl());
+        } catch(InvalidOffsetException e) {
           System.out.println("Space token alignment problem:" + e);
         }
-
       }
-
       prevTokenEnd = tokenEnd;
-
     }
-
-
     fireProcessFinished();
-    fireStatusChanged(document.getName() +
-      " tokenised in " +
-      NumberFormat.getInstance().format(
-        (double)(System.currentTimeMillis() - startTime) / 1000) +
-      " seconds!");
+    fireStatusChanged(document.getName()
+        + " tokenised in "
+        + NumberFormat.getInstance().format(
+            (double)(System.currentTimeMillis() - startTime) / 1000)
+        + " seconds!");
   }
 
-  public void setEncoding(String encoding) {
-    this.encoding = encoding;
-  }
-
-  @Optional
-  @RunTime
-  @CreoleParameter(comment = "Input annotation set name", defaultValue = "")
-  public void setInputASName(String newInputASName) {
-    inputASName = newInputASName;
-  }
-
-  public String getInputASName() {
-    return inputASName;
-  }
-
-  public String getEncoding() {
-    return this.encoding;
-  }
-
   public String getOutputASName() {
     return this.outputASName;
   }
@@ -194,7 +162,6 @@
     this.outputASName = outputASName;
   }
 
-
   public String getTokenLabel() {
     return this.tokenLabel;
   }
@@ -217,14 +184,9 @@
     this.spaceLabel = spaceLabel;
   }
 
-  private String inputASName;
-
-  private String encoding;
-
   private String outputASName;
 
   private String tokenLabel;
 
   private String spaceLabel;
-
 }

Modified: 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/apps/EnglishDependencies.java
===================================================================
--- 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/apps/EnglishDependencies.java
   2016-08-19 16:50:05 UTC (rev 19525)
+++ 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/apps/EnglishDependencies.java
   2016-08-19 17:01:15 UTC (rev 19526)
@@ -15,7 +15,6 @@
  * You should have received a copy of the GNU General Public License along with
  * this program. If not, see <http://www.gnu.org/licenses/>.
  */
-
 package gate.stanford.apps;
 
 import gate.creole.PackagedController;
@@ -26,21 +25,18 @@
 import java.net.URL;
 import java.util.List;
 
-@CreoleResource(name="English Dependency Parser",
-    comment = "Ready-made application for Stanford English parser",
-    autoinstances = @AutoInstance)
+@CreoleResource(name = "English Dependency Parser", comment = "Ready-made 
application for Stanford English parser", autoinstances = @AutoInstance)
 public class EnglishDependencies extends PackagedController {
-
   private static final long serialVersionUID = 3163023140886167369L;
 
   @Override
-  @CreoleParameter(defaultValue="resources/sample_parser_en.gapp")
+  @CreoleParameter(defaultValue = "resources/sample_parser_en.gapp")
   public void setPipelineURL(URL url) {
-    this.url = url;    
+    this.url = url;
   }
-  
+
   @Override
-  @CreoleParameter(defaultValue="Stanford Parser")
+  @CreoleParameter(defaultValue = "Stanford Parser")
   public void setMenu(List<String> menu) {
     super.setMenu(menu);
   }

Modified: 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/apps/EnglishPOSDependencies.java
===================================================================
--- 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/apps/EnglishPOSDependencies.java
        2016-08-19 16:50:05 UTC (rev 19525)
+++ 
gate/branches/sawdust2/plugins/Stanford_CoreNLP/src/main/java/gate/stanford/apps/EnglishPOSDependencies.java
        2016-08-19 17:01:15 UTC (rev 19526)
@@ -15,7 +15,6 @@
  * You should have received a copy of the GNU General Public License along with
  * this program. If not, see <http://www.gnu.org/licenses/>.
  */
-
 package gate.stanford.apps;
 
 import gate.creole.PackagedController;
@@ -26,21 +25,18 @@
 import java.net.URL;
 import java.util.List;
 
-@CreoleResource(name="English POS Tagger and Dependency Parser",
-    comment = "Ready-made application for Stanford English POS tagger and 
parser",
-    autoinstances = @AutoInstance)
+@CreoleResource(name = "English POS Tagger and Dependency Parser", comment = 
"Ready-made application for Stanford English POS tagger and parser", 
autoinstances = @AutoInstance)
 public class EnglishPOSDependencies extends PackagedController {
-
   private static final long serialVersionUID = 3163023140886167369L;
 
   @Override
-  @CreoleParameter(defaultValue="resources/sample_pos+parser_en.gapp")
+  @CreoleParameter(defaultValue = "resources/sample_pos+parser_en.gapp")
   public void setPipelineURL(URL url) {
-    this.url = url;    
+    this.url = url;
   }
-  
+
   @Override
-  @CreoleParameter(defaultValue="Stanford Parser")
+  @CreoleParameter(defaultValue = "Stanford Parser")
   public void setMenu(List<String> menu) {
     super.setMenu(menu);
   }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

[gate-cvs] SF.net SVN: gate:[19526] gate/branches/sawdust2/plugins/Stanford_CoreNLP/ src/main/java/gate/stanford

Reply via email to