Repository: opennlp
Updated Branches:
  refs/heads/trunk c4c4fd3f4 -> 4a4b591f0


Remove references to PlainTextByLineStream constructor that takes InputStream

The PlainTextByLineStream that takes InputStream was deprecated for a while. We 
can remove safely remove it after reviewing internal code that was still using 
it.
Left the deprecated code that was using the constructor for a latter work.

See issue OPENNLP-882


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/4a4b591f
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/4a4b591f
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/4a4b591f

Branch: refs/heads/trunk
Commit: 4a4b591f0944add15a6c718980d863f147f6dad4
Parents: c4c4fd3
Author: William Colen <[email protected]>
Authored: Thu Nov 10 11:40:41 2016 -0200
Committer: William Colen <[email protected]>
Committed: Thu Nov 10 11:40:41 2016 -0200

----------------------------------------------------------------------
 .../namefind/CensusDictionaryCreatorTool.java   | 16 ++---
 .../formats/LeipzigDoccatSampleStream.java      |  9 +--
 .../LeipzigDocumentSampleStreamFactory.java     |  5 +-
 .../formats/NameFinderCensus90NameStream.java   | 19 ++++++
 .../parser/chunking/ParserEventStream.java      |  7 +-
 .../parser/treeinsert/ParserEventStream.java    | 20 ++++--
 .../tools/postag/WordTagSampleStream.java       |  7 +-
 .../formats/LeipzigDoccatSampleStreamTest.java  | 10 +--
 .../NameFinderCensus90NameStreamTest.java       | 17 +++--
 .../tools/postag/POSTaggerFactoryTest.java      | 17 +++--
 .../opennlp/tools/postag/POSTaggerMETest.java   | 16 +++--
 .../uima/namefind/NameFinderTrainer.java        | 42 ++++++------
 .../opennlp/uima/tokenize/TokenizerTrainer.java | 70 ++++++++++----------
 13 files changed, 143 insertions(+), 112 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/4a4b591f/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
index 6798938..8159ef0 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java
@@ -18,7 +18,6 @@
 package opennlp.tools.cmdline.namefind;
 
 import java.io.File;
-import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
@@ -31,6 +30,7 @@ import opennlp.tools.cmdline.CmdLineUtil;
 import opennlp.tools.cmdline.TerminateToolException;
 import opennlp.tools.dictionary.Dictionary;
 import opennlp.tools.formats.NameFinderCensus90NameStream;
+import opennlp.tools.util.InputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.StringList;
 
@@ -106,23 +106,17 @@ public class CensusDictionaryCreatorTool extends 
BasicCmdLineTool {
     CmdLineUtil.checkInputFile("Name data", testData);
     CmdLineUtil.checkOutputFile("Dictionary file", dictOutFile);
 
-    FileInputStream sampleDataIn = CmdLineUtil.openInFile(testData);
-    ObjectStream<StringList> sampleStream = new 
NameFinderCensus90NameStream(sampleDataIn,
-        Charset.forName(params.getEncoding()));
+    InputStreamFactory sampleDataIn = 
CmdLineUtil.createInputStreamFactory(testData);
 
     Dictionary mDictionary;
-    try {
+    try (
+        ObjectStream<StringList> sampleStream = new 
NameFinderCensus90NameStream(
+            sampleDataIn, Charset.forName(params.getEncoding()))) {
       System.out.println("Creating Dictionary...");
       mDictionary = createDictionary(sampleStream);
     } catch (IOException e) {
       throw new TerminateToolException(-1, "IO error while reading training 
data or indexing data: "
           + e.getMessage(), e);
-    } finally {
-      try {
-        sampleStream.close();
-      } catch(IOException e) {
-        // sorry this can fail..
-      }
     }
 
     System.out.println("Saving Dictionary...");

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4a4b591f/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
 
b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
index d28beb7..0af66ae 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
@@ -18,12 +18,13 @@
 package opennlp.tools.formats;
 
 import java.io.IOException;
-import java.io.InputStream;
 import java.io.PrintStream;
+import java.nio.charset.StandardCharsets;
 
 import opennlp.tools.doccat.DocumentSample;
 import opennlp.tools.tokenize.SimpleTokenizer;
 import opennlp.tools.util.FilterObjectStream;
+import opennlp.tools.util.InputStreamFactory;
 import opennlp.tools.util.PlainTextByLineStream;
 
 /**
@@ -51,13 +52,13 @@ public class LeipzigDoccatSampleStream extends
    * @throws IOException IOException
    */
   LeipzigDoccatSampleStream(String language, int sentencesPerDocument,
-      InputStream in) throws IOException {
-    super(new PlainTextByLineStream(in, "UTF-8"));
+      InputStreamFactory in) throws IOException {
+    super(new PlainTextByLineStream(in, StandardCharsets.UTF_8));
     System.setOut(new PrintStream(System.out, true, "UTF-8"));
     this.language = language;
     this.sentencesPerDocument = sentencesPerDocument;
   }
-
+  
   public DocumentSample read() throws IOException {
 
     int count = 0;

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4a4b591f/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java
 
b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java
index 37dac7e..c5e5c26 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java
@@ -70,8 +70,9 @@ public class LeipzigDocumentSampleStreamFactory
 
     for (int i = 0; i < sentencesFiles.length; i++) {
       try {
-        sampleStreams[i] = new 
LeipzigDoccatSampleStream(sentencesFiles[i].getName().substring(0, 3), 20,
-            CmdLineUtil.openInFile(sentencesFiles[i]));
+        sampleStreams[i] = new LeipzigDoccatSampleStream(
+            sentencesFiles[i].getName().substring(0, 3), 20,
+            CmdLineUtil.createInputStreamFactory(sentencesFiles[i]));
       } catch (IOException e) {
         throw new TerminateToolException(-1, "IO error while opening sample 
data: " + e.getMessage(), e);
       }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4a4b591f/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java
 
b/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java
index ee1d15a..ee3f933 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java
@@ -20,6 +20,7 @@ import java.io.InputStream;
 import java.nio.charset.Charset;
 import java.util.Locale;
 
+import opennlp.tools.util.InputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 import opennlp.tools.util.StringList;
@@ -64,8 +65,26 @@ public class NameFinderCensus90NameStream implements 
ObjectStream<StringList> {
    * This constructor takes an <code>InputStream</code> and a 
<code>Charset</code>
    * and opens an associated stream object with the specified encoding 
specified.
    *
+   * @param in  an <code>InputStreamFactory</code> for the input file.
+   * @param encoding  the <code>Charset</code> to apply to the input stream.
+   * @throws IOException 
+   */
+  public NameFinderCensus90NameStream(InputStreamFactory in, Charset encoding)
+      throws IOException {
+    this.locale = new Locale("en"); // locale is English
+    this.encoding = encoding;
+    this.lineStream = new PlainTextByLineStream(in, this.encoding);
+  }
+
+
+  /**
+   * This constructor takes an <code>InputStream</code> and a 
<code>Charset</code>
+   * and opens an associated stream object with the specified encoding 
specified.
+   *
    * @param in  an <code>InputStream</code> for the input file.
    * @param encoding  the <code>Charset</code> to apply to the input stream.
+   * 
+   * @deprecated use {@link 
NameFinderCensus90NameStream#NameFinderCensus90NameStream(InputStreamFactory, 
Charset)}
    */
   public NameFinderCensus90NameStream(InputStream in, Charset encoding) {
     this.locale = new Locale("en");   // locale is English

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4a4b591f/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java
 
b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java
index af202af..88c4e24 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java
@@ -18,8 +18,10 @@
 package opennlp.tools.parser.chunking;
 
 import java.io.FileInputStream;
+import java.nio.charset.Charset;
 import java.util.List;
 
+import opennlp.tools.cmdline.SystemInputStreamFactory;
 import opennlp.tools.dictionary.Dictionary;
 import opennlp.tools.ml.model.Event;
 import opennlp.tools.parser.AbstractBottomUpParser;
@@ -204,7 +206,10 @@ public class ParserEventStream extends 
AbstractParserEventStream {
     if (fun) {
       Parse.useFunctionTags(true);
     }
-    ObjectStream<Event> es = new ParserEventStream(new ParseSampleStream(new 
PlainTextByLineStream(new java.io.InputStreamReader(System.in))), rules, etype, 
dict);
+    ObjectStream<Event> es = new ParserEventStream(
+        new ParseSampleStream(new PlainTextByLineStream(
+            new SystemInputStreamFactory(), Charset.defaultCharset())),
+        rules, etype, dict);
     Event event;
     while ((event = es.read()) != null) {
       System.out.println(event);

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4a4b591f/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java
 
b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java
index 4087db8..6f6c85d 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java
@@ -20,11 +20,13 @@ package opennlp.tools.parser.treeinsert;
 
 import java.io.File;
 import java.io.FileInputStream;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import opennlp.tools.cmdline.SystemInputStreamFactory;
 import opennlp.tools.dictionary.Dictionary;
 import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
 import opennlp.tools.ml.model.AbstractModel;
@@ -379,13 +381,19 @@ public class ParserEventStream extends 
AbstractParserEventStream {
     if (fun) {
       Parse.useFunctionTags(true);
     }
-    ObjectStream<Event> es = new ParserEventStream(new ParseSampleStream(new 
PlainTextByLineStream(new java.io.InputStreamReader(System.in))), rules, etype, 
dict);
-    Event e;
-    while ((e = es.read()) != null) {
-      if (model != null) {
-        
System.out.print(model.eval(e.getContext())[model.getIndex(e.getOutcome())]+" 
");
+    
+    try (ObjectStream<Event> es = new ParserEventStream(
+        new ParseSampleStream(new PlainTextByLineStream(
+            new SystemInputStreamFactory(), Charset.defaultCharset())),
+        rules, etype, dict)) {
+      Event e;
+      while ((e = es.read()) != null) {
+        if (model != null) {
+          System.out.print(
+              model.eval(e.getContext())[model.getIndex(e.getOutcome())] + " 
");
+        }
+        System.out.println(e);
       }
-      System.out.println(e);
     }
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4a4b591f/opennlp-tools/src/main/java/opennlp/tools/postag/WordTagSampleStream.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/postag/WordTagSampleStream.java 
b/opennlp-tools/src/main/java/opennlp/tools/postag/WordTagSampleStream.java
index 724a9c4..644b566 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/WordTagSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/WordTagSampleStream.java
@@ -39,13 +39,8 @@ public class WordTagSampleStream extends 
FilterObjectStream<String, POSSample> {
   /**
    * Initializes the current instance.
    *
-   * @param sentences reader with sentences
-   * @throws IOException IOException
+   * @param sentences the sentences
    */
-  public WordTagSampleStream(Reader sentences) throws IOException {
-    super(new PlainTextByLineStream(sentences));
-  }
-
   public WordTagSampleStream(ObjectStream<String> sentences) {
     super(sentences);
   }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4a4b591f/opennlp-tools/src/test/java/opennlp/tools/formats/LeipzigDoccatSampleStreamTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/formats/LeipzigDoccatSampleStreamTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/formats/LeipzigDoccatSampleStreamTest.java
index 409991e..5797ab4 100644
--- 
a/opennlp-tools/src/test/java/opennlp/tools/formats/LeipzigDoccatSampleStreamTest.java
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/formats/LeipzigDoccatSampleStreamTest.java
@@ -21,18 +21,18 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 
 import java.io.IOException;
-import java.io.InputStream;
+
+import org.junit.Test;
 
 import opennlp.tools.doccat.DocumentSample;
+import opennlp.tools.util.InputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 
-import org.junit.Test;
-
 public class LeipzigDoccatSampleStreamTest {
 
   @Test
   public void testParsingSample() throws IOException {
-    InputStream in = LeipzigDoccatSampleStreamTest.class.getResourceAsStream(
+    InputStreamFactory in = new ResourceAsStreamFactory(getClass(),
         "/opennlp/tools/formats/leipzig-en.sample");
 
     ObjectStream<DocumentSample> sampleStream =
@@ -51,5 +51,7 @@ public class LeipzigDoccatSampleStreamTest {
     assertEquals("en", doc4.getCategory());
 
     assertNull(sampleStream.read());
+    
+    sampleStream.close();
   }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4a4b591f/opennlp-tools/src/test/java/opennlp/tools/formats/NameFinderCensus90NameStreamTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/formats/NameFinderCensus90NameStreamTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/formats/NameFinderCensus90NameStreamTest.java
index d48f188..84fc28d 100644
--- 
a/opennlp-tools/src/test/java/opennlp/tools/formats/NameFinderCensus90NameStreamTest.java
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/formats/NameFinderCensus90NameStreamTest.java
@@ -15,25 +15,28 @@
 
 package opennlp.tools.formats;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 
 import java.io.IOException;
-import java.io.InputStream;
-import java.nio.charset.Charset;
 
+import org.junit.Test;
+
+import opennlp.tools.util.InputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.StringList;
 
-import org.junit.Test;
-
 public class NameFinderCensus90NameStreamTest {
 
-  private static ObjectStream<StringList> openData(String name) throws 
IOException {
-    InputStream in = 
NameFinderCensus90NameStreamTest.class.getResourceAsStream("/opennlp/tools/formats/"
 + name);
+  private static ObjectStream<StringList> openData(String name)
+      throws IOException {
+    InputStreamFactory in = new ResourceAsStreamFactory(
+        NameFinderCensus90NameStreamTest.class,
+        "/opennlp/tools/formats/" + name);
 
-    return new NameFinderCensus90NameStream(in, Charset.forName("utf-8"));
+    return new NameFinderCensus90NameStream(in, UTF_8);
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4a4b591f/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java 
b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
index 20c12d9..fbab448 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java
@@ -17,26 +17,28 @@
 
 package opennlp.tools.postag;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.junit.Assert.assertTrue;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
+
+import org.junit.Test;
 
 import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.formats.ResourceAsStreamFactory;
 import opennlp.tools.postag.DummyPOSTaggerFactory.DummyPOSContextGenerator;
 import opennlp.tools.postag.DummyPOSTaggerFactory.DummyPOSDictionary;
 import opennlp.tools.postag.DummyPOSTaggerFactory.DummyPOSSequenceValidator;
 import opennlp.tools.util.BaseToolFactory;
+import opennlp.tools.util.InputStreamFactory;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
 import opennlp.tools.util.TrainingParameters;
 import opennlp.tools.util.model.ModelType;
 
-import org.junit.Test;
-
 /**
  * Tests for the {@link POSTaggerFactory} class.
  */
@@ -44,10 +46,11 @@ public class POSTaggerFactoryTest {
 
   private static ObjectStream<POSSample> createSampleStream()
       throws IOException {
-    InputStream in = POSTaggerFactoryTest.class.getClassLoader()
-        .getResourceAsStream("opennlp/tools/postag/AnnotatedSentences.txt");
+    InputStreamFactory in = new ResourceAsStreamFactory(
+        POSTaggerFactoryTest.class,
+        "/opennlp/tools/postag/AnnotatedSentences.txt");
 
-    return new WordTagSampleStream((new InputStreamReader(in)));
+    return new WordTagSampleStream(new PlainTextByLineStream(in, UTF_8));
   }
 
   static POSModel trainPOSModel(ModelType type, POSTaggerFactory factory)

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4a4b591f/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java 
b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java
index 6001de6..1d99687 100644
--- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java
+++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java
@@ -18,27 +18,29 @@
 
 package opennlp.tools.postag;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.junit.Assert.assertEquals;
 
 import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
 
+import org.junit.Test;
+
+import opennlp.tools.formats.ResourceAsStreamFactory;
+import opennlp.tools.util.InputStreamFactory;
 import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
 import opennlp.tools.util.model.ModelType;
 
-import org.junit.Test;
-
 /**
  * Tests for the {@link POSTaggerME} class.
  */
 public class POSTaggerMETest {
 
   private static ObjectStream<POSSample> createSampleStream() throws 
IOException {
-    InputStream in = 
POSTaggerMETest.class.getClassLoader().getResourceAsStream(
-        "opennlp/tools/postag/AnnotatedSentences.txt");
+    InputStreamFactory in = new ResourceAsStreamFactory(POSTaggerMETest.class, 
+        "/opennlp/tools/postag/AnnotatedSentences.txt");
 
-    return new WordTagSampleStream((new InputStreamReader(in)));
+    return new WordTagSampleStream(new PlainTextByLineStream(in, UTF_8));
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4a4b591f/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java
----------------------------------------------------------------------
diff --git 
a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java 
b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java
index d637c68..dcc0ddc 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java
@@ -18,13 +18,11 @@
 package opennlp.uima.namefind;
 
 import java.io.File;
-import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Iterator;
@@ -32,6 +30,18 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.FSIndex;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+import org.apache.uima.util.Level;
+import org.apache.uima.util.Logger;
+import org.apache.uima.util.ProcessTrace;
+
 import opennlp.tools.cmdline.namefind.TokenNameFinderTrainerTool;
 import opennlp.tools.ml.maxent.GIS;
 import opennlp.tools.namefind.BioCodec;
@@ -40,6 +50,8 @@ import opennlp.tools.namefind.NameSample;
 import opennlp.tools.namefind.NameSampleDataStream;
 import opennlp.tools.namefind.TokenNameFinderFactory;
 import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.util.InputStreamFactory;
+import opennlp.tools.util.MarkableFileInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.ObjectStreamUtils;
 import opennlp.tools.util.PlainTextByLineStream;
@@ -51,18 +63,6 @@ import opennlp.uima.util.OpennlpUtil;
 import opennlp.uima.util.SampleTraceStream;
 import opennlp.uima.util.UimaUtil;
 
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.FSIndex;
-import org.apache.uima.cas.Type;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-import org.apache.uima.util.Level;
-import org.apache.uima.util.Logger;
-import org.apache.uima.util.ProcessTrace;
-
 /**
  * OpenNLP NameFinder trainer.
  * <p>
@@ -369,7 +369,6 @@ public final class NameFinderTrainer extends 
CasConsumer_ImplBase {
     // create training stream ...
     ObjectStream<NameSample> samples = 
ObjectStreamUtils.createObjectStream(nameFinderSamples);
 
-    InputStream additionalTrainingDataIn = null;
     Writer samplesOut = null;
     TokenNameFinderModel nameModel;
     try {
@@ -379,10 +378,14 @@ public final class NameFinderTrainer extends 
CasConsumer_ImplBase {
           logger.log(Level.INFO, "Using additional training data file: " + 
additionalTrainingDataFile);
         }
 
-        additionalTrainingDataIn = new 
FileInputStream(additionalTrainingDataFile);
+        InputStreamFactory additionalTrainingDataIn = new 
MarkableFileInputStreamFactory(
+            new File(additionalTrainingDataFile));
+        Charset additionalTrainingDataCharset = Charset
+            .forName(additionalTrainingDataEncoding);
 
         ObjectStream<NameSample> additionalSamples = new NameSampleDataStream(
-            new PlainTextByLineStream(new 
InputStreamReader(additionalTrainingDataIn, additionalTrainingDataEncoding)));
+            new PlainTextByLineStream(additionalTrainingDataIn,
+                additionalTrainingDataCharset));
 
         samples = ObjectStreamUtils.createObjectStream(samples, 
additionalSamples);
       }
@@ -405,9 +408,6 @@ public final class NameFinderTrainer extends 
CasConsumer_ImplBase {
           new TokenNameFinderFactory(featureGeneratorDefinition, resourceMap, 
new BioCodec()));
     }
     finally {
-      if (additionalTrainingDataIn != null) {
-        additionalTrainingDataIn.close();
-      }
 
       if (samplesOut != null) {
         samplesOut.close();

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4a4b591f/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java
----------------------------------------------------------------------
diff --git 
a/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java 
b/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java
index d6309dd..ece9eca 100644
--- a/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java
+++ b/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java
@@ -18,24 +18,37 @@
 package opennlp.uima.tokenize;
 
 import java.io.File;
-import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 
+import org.apache.uima.UimaContext;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.FSIndex;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+import org.apache.uima.util.Level;
+import org.apache.uima.util.Logger;
+import org.apache.uima.util.ProcessTrace;
+
 import opennlp.tools.ml.maxent.GIS;
 import opennlp.tools.tokenize.TokenSample;
 import opennlp.tools.tokenize.TokenSampleStream;
 import opennlp.tools.tokenize.TokenizerME;
 import opennlp.tools.tokenize.TokenizerModel;
+import opennlp.tools.util.InputStreamFactory;
+import opennlp.tools.util.MarkableFileInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.ObjectStreamUtils;
 import opennlp.tools.util.PlainTextByLineStream;
@@ -46,19 +59,6 @@ import opennlp.uima.util.OpennlpUtil;
 import opennlp.uima.util.SampleTraceStream;
 import opennlp.uima.util.UimaUtil;
 
-import org.apache.uima.UimaContext;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.FSIndex;
-import org.apache.uima.cas.Type;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-import org.apache.uima.util.Level;
-import org.apache.uima.util.Logger;
-import org.apache.uima.util.ProcessTrace;
-
 /**
  * OpenNLP Tokenizer trainer.
  * <p>
@@ -231,37 +231,35 @@ public final class TokenizerTrainer extends 
CasConsumer_ImplBase {
     // if trace file
     // serialize events ...
 
-    InputStream additionalTrainingDataIn = null;
     Writer samplesOut = null;
     TokenizerModel tokenModel;
 
-    try {
-      if (additionalTrainingDataFile != null) {
-
-        if (mLogger.isLoggable(Level.INFO)) {
-          mLogger.log(Level.INFO, "Using addional training data file: " + 
additionalTrainingDataFile);
-        }
+    if (additionalTrainingDataFile != null) {
 
-        additionalTrainingDataIn = new 
FileInputStream(additionalTrainingDataFile);
+      if (mLogger.isLoggable(Level.INFO)) {
+        mLogger.log(Level.INFO, "Using addional training data file: " + 
additionalTrainingDataFile);
+      }
 
-        ObjectStream<TokenSample> additionalSamples = new TokenSampleStream(
-            new PlainTextByLineStream(new 
InputStreamReader(additionalTrainingDataIn, additionalTrainingDataEncoding)));
+      InputStreamFactory additionalTrainingDataIn = new 
MarkableFileInputStreamFactory(
+          new File(additionalTrainingDataFile));
 
-        samples = ObjectStreamUtils.createObjectStream(samples, 
additionalSamples);
-      }
+      Charset additionalTrainingDataCharset = Charset
+          .forName(additionalTrainingDataEncoding);
 
-      if (sampleTraceFile != null) {
-        samplesOut = new OutputStreamWriter(new 
FileOutputStream(sampleTraceFile), sampleTraceFileEncoding);
-        samples = new SampleTraceStream<TokenSample>(samples, samplesOut);
-      }
+      ObjectStream<TokenSample> additionalSamples = new TokenSampleStream(
+          new PlainTextByLineStream(additionalTrainingDataIn,
+              additionalTrainingDataCharset));
 
-      tokenModel = TokenizerME.train(language, samples, isSkipAlphaNumerics);
+      samples = ObjectStreamUtils.createObjectStream(samples, 
additionalSamples);
     }
-    finally {
-      if (additionalTrainingDataIn != null)
-        additionalTrainingDataIn.close();
+
+    if (sampleTraceFile != null) {
+      samplesOut = new OutputStreamWriter(new 
FileOutputStream(sampleTraceFile), sampleTraceFileEncoding);
+      samples = new SampleTraceStream<TokenSample>(samples, samplesOut);
     }
 
+    tokenModel = TokenizerME.train(language, samples, isSkipAlphaNumerics);
+
     // dereference to allow garbage collection
     tokenSamples = null;
 

Reply via email to