OPENNLP-1030: Add unit test for TokenNameFinderTool
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/e2cf4811 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/e2cf4811 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/e2cf4811 Branch: refs/heads/LangDetect Commit: e2cf4811ba485b0bb29d7d80bc853666bfbfa958 Parents: d8cdd5e Author: jzonthemtn <[email protected]> Authored: Tue Apr 18 20:39:07 2017 -0400 Committer: smarthi <[email protected]> Committed: Tue Apr 18 20:39:07 2017 -0400 ---------------------------------------------------------------------- .../cmdline/namefind/TokenNameFinderTool.java | 1 + .../tools/cmdline/TokenNameFinderToolTest.java | 137 +++++++++++++++++++ 2 files changed, 138 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/e2cf4811/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java index 59b2f3a..a5c9bd6 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java @@ -47,6 +47,7 @@ public final class TokenNameFinderTool extends BasicCmdLineTool { return "Usage: " + CLI.CMD + " " + getName() + " model1 model2 ... modelN < sentences"; } + @Override public void run(String[] args) { if (args.length == 0) { http://git-wip-us.apache.org/repos/asf/opennlp/blob/e2cf4811/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java b/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java new file mode 100644 index 0000000..3ade0d5 --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.cmdline; + +import java.io.BufferedOutputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.PrintStream; +import java.nio.charset.StandardCharsets; + +import org.junit.Assert; +import org.junit.Test; + +import opennlp.tools.cmdline.namefind.TokenNameFinderTool; +import opennlp.tools.namefind.NameFinderME; +import opennlp.tools.namefind.NameSample; +import opennlp.tools.namefind.NameSampleDataStream; +import opennlp.tools.namefind.TokenNameFinderFactory; +import opennlp.tools.namefind.TokenNameFinderModel; +import opennlp.tools.util.MockInputStreamFactory; +import opennlp.tools.util.ObjectStream; +import opennlp.tools.util.PlainTextByLineStream; +import opennlp.tools.util.TrainingParameters; + +public class TokenNameFinderToolTest { + + @Test + public void run() throws IOException { + + File model1 = trainModel(); + + String[] args = new String[]{model1.getAbsolutePath()}; + + final String in = "It is Stefanie Schmidt.\n\nNothing in this sentence."; + InputStream stream = new ByteArrayInputStream(in.getBytes(StandardCharsets.UTF_8)); + + System.setIn(stream); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PrintStream ps = new PrintStream(baos); + System.setOut(ps); + + TokenNameFinderTool tool = new TokenNameFinderTool(); + tool.run(args); + + final String content = new String(baos.toByteArray(), StandardCharsets.UTF_8); + Assert.assertTrue(content.contains("It is <START:person> Stefanie Schmidt. <END>")); + + } + + @Test(expected = TerminateToolException.class) + public void invalidModel() { + + String[] args = new String[]{"invalidmodel.bin"}; + + TokenNameFinderTool tool = new TokenNameFinderTool(); + tool.run(args); + + } + + @Test() + public void usage() { + + String[] args = new String[]{}; + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PrintStream ps = new PrintStream(baos); + System.setOut(ps); + + TokenNameFinderTool tool = new TokenNameFinderTool(); + tool.run(args); + + final String content = new String(baos.toByteArray(), StandardCharsets.UTF_8); + Assert.assertEquals(tool.getHelp(), content.trim()); + + } + + private File trainModel() throws IOException { + + String encoding = "ISO-8859-1"; + + ObjectStream<String> lineStream = + new PlainTextByLineStream(new MockInputStreamFactory( + new File("opennlp/tools/namefind/AnnotatedSentencesWithTypes.txt")), encoding); + ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream); + + TrainingParameters params = new TrainingParameters(); + params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(70)); + params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1)); + + TokenNameFinderModel model; + + TokenNameFinderFactory nameFinderFactory = new TokenNameFinderFactory(); + + try { + model = NameFinderME.train("en", null, sampleStream, params, + nameFinderFactory); + } + finally { + sampleStream.close(); + } + + BufferedOutputStream modelOut = null; + + File modelFile = File.createTempFile("model", ".bin"); + + try { + modelOut = new BufferedOutputStream(new FileOutputStream(modelFile)); + model.serialize(modelOut); + } finally { + if (modelOut != null) + modelOut.close(); + } + + return modelFile; + } + +}
