This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/main by this push:
new ed2682cc OPENNLP-1447: Reenable Cmdline Tool execution tests (#720)
ed2682cc is described below
commit ed2682ccdb9553970acf683fe0f724d5c57e7c9d
Author: Martin Wiesner <[email protected]>
AuthorDate: Mon Dec 23 07:15:23 2024 +0100
OPENNLP-1447: Reenable Cmdline Tool execution tests (#720)
- removes @Disabled from multiple cmdline execution tests
- adjusts TokenizerTrainerTool to handle existing yet "empty" abb-dict
files better
---
.../cmdline/tokenizer/TokenizerTrainerTool.java | 12 ++-
.../tools/cmdline/TokenNameFinderToolTest.java | 105 ++++++++++----------
.../tokenizer/TokenizerTrainerToolTest.java | 107 ++++++++++++---------
opennlp-tools/src/test/resources/logback-test.xml | 6 +-
4 files changed, 131 insertions(+), 99 deletions(-)
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
index f51b8c67..4f5389ab 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java
@@ -21,6 +21,7 @@ import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
+import java.io.InputStream;
import opennlp.tools.cmdline.AbstractTrainerTool;
import opennlp.tools.cmdline.CmdLineUtil;
@@ -33,6 +34,7 @@ import opennlp.tools.ml.TrainerFactory.TrainerType;
import opennlp.tools.tokenize.TokenSample;
import opennlp.tools.tokenize.TokenizerFactory;
import opennlp.tools.tokenize.TokenizerModel;
+import opennlp.tools.util.InvalidFormatException;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.model.ModelUtil;
@@ -53,9 +55,15 @@ public final class TokenizerTrainerTool
static Dictionary loadDict(File f) throws IOException {
Dictionary dict = null;
- if (f != null) {
+ if (f != null && f.exists()) {
CmdLineUtil.checkInputFile("abb dict", f);
- dict = new Dictionary(new BufferedInputStream(new FileInputStream(f)));
+ try (InputStream in = new BufferedInputStream(new FileInputStream(f))) {
+ if (in.available() == 0) {
+ throw new InvalidFormatException("Encountered an empty dictionary
file?!");
+ } else {
+ dict = new Dictionary(in);
+ }
+ }
}
return dict;
}
diff --git
a/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
b/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
index e8594c59..190fa9d9 100644
---
a/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
+++
b/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java
@@ -19,19 +19,22 @@ package opennlp.tools.cmdline;
import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
-import java.io.PrintStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Disabled;
+import ch.qos.logback.classic.Level;
+import ch.qos.logback.classic.Logger;
+import ch.qos.logback.classic.LoggerContext;
+import nl.altindag.log.LogCaptor;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
+import org.slf4j.LoggerFactory;
import opennlp.tools.cmdline.namefind.TokenNameFinderTool;
import opennlp.tools.namefind.NameFinderME;
@@ -44,75 +47,80 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.TrainingParameters;
-public class TokenNameFinderToolTest {
-
- @Test
- //TODO OPENNLP-1447
- @Disabled(value = "OPENNLP-1447: These kind of tests won't work anymore. " +
- "We need to find a way to redirect log output (i.e. implement " +
- "a custom log adapter and plug it in, if we want to do such tests.")
- void run() throws IOException {
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
- File model1 = trainModel();
+public class TokenNameFinderToolTest {
- String[] args = new String[] {model1.getAbsolutePath()};
+ /*
+ * Programmatic change to debug log to ensure that we can see log messages to
+ * confirm no duplicate download is happening
+ */
+ @BeforeAll
+ public static void prepare() {
+ LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
+ Logger logger = context.getLogger("opennlp.tools.cmdline.namefind");
+ logger.setLevel(Level.INFO);
+ }
- final String in = "It is Stefanie Schmidt.\n\nNothing in this sentence.";
- InputStream stream = new
ByteArrayInputStream(in.getBytes(StandardCharsets.UTF_8));
+ /*
+ * Programmatic restore the default log level (= OFF) after the test
+ */
+ @AfterAll
+ public static void cleanup() {
+ LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
+ Logger logger = context.getLogger("opennlp.tools.cmdline.namefind");
+ logger.setLevel(Level.OFF);
+ }
- System.setIn(stream);
+ @Test
+ void run() throws IOException {
+ try (LogCaptor logCaptor = LogCaptor.forClass(TokenNameFinderTool.class)) {
+ File model1 = trainModel();
+ String[] args = new String[] {model1.getAbsolutePath()};
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- PrintStream ps = new PrintStream(baos);
- System.setOut(ps);
+ final String in = "It is Stefanie Schmidt.\n";
+ InputStream stream = new
ByteArrayInputStream(in.getBytes(StandardCharsets.UTF_8));
- TokenNameFinderTool tool = new TokenNameFinderTool();
- tool.run(args);
+ System.setIn(stream);
- final String content = baos.toString(StandardCharsets.UTF_8);
- Assertions.assertTrue(content.contains("It is <START:person> Stefanie
Schmidt. <END>"));
+ TokenNameFinderTool tool = new TokenNameFinderTool();
+ tool.run(args);
- Assertions.assertTrue(model1.delete());
+ assertEquals(1, logCaptor.getInfoLogs().size());
+ final String content = logCaptor.getInfoLogs().get(0);
+ logCaptor.clearLogs();
+ assertEquals("It is <START:person> Stefanie Schmidt. <END>", content);
+ assertTrue(model1.delete());
+ }
}
@Test
void invalidModel() {
-
- Assertions.assertThrows(TerminateToolException.class, () -> {
-
+ assertThrows(TerminateToolException.class, () -> {
String[] args = new String[] {"invalidmodel.bin"};
-
TokenNameFinderTool tool = new TokenNameFinderTool();
tool.run(args);
});
-
-
}
@Test
- //TODO OPENNLP-1447
- @Disabled(value = "OPENNLP-1447: These kind of tests won't work anymore. " +
- "We need to find a way to redirect log output (i.e. implement " +
- "a custom log adapter and plug it in, if we want to do such tests.")
void usage() {
+ try (LogCaptor logCaptor = LogCaptor.forClass(TokenNameFinderTool.class)) {
+ String[] args = new String[] {};
- String[] args = new String[] {};
-
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- PrintStream ps = new PrintStream(baos);
- System.setOut(ps);
-
- TokenNameFinderTool tool = new TokenNameFinderTool();
- tool.run(args);
-
- final String content = baos.toString(StandardCharsets.UTF_8);
- Assertions.assertEquals(tool.getHelp(), content.trim());
+ TokenNameFinderTool tool = new TokenNameFinderTool();
+ tool.run(args);
+ assertEquals(1, logCaptor.getInfoLogs().size());
+ final String content = logCaptor.getInfoLogs().get(0);
+ assertEquals(tool.getHelp(), content.trim());
+ }
}
private File trainModel() throws IOException {
-
ObjectStream<String> lineStream =
new PlainTextByLineStream(new MockInputStreamFactory(
new
File("opennlp/tools/namefind/AnnotatedSentencesWithTypes.txt")),
@@ -123,7 +131,6 @@ public class TokenNameFinderToolTest {
params.put(TrainingParameters.CUTOFF_PARAM, 1);
TokenNameFinderModel model;
-
TokenNameFinderFactory nameFinderFactory = new TokenNameFinderFactory();
try (ObjectStream<NameSample> sampleStream = new
NameSampleDataStream(lineStream)) {
@@ -132,12 +139,10 @@ public class TokenNameFinderToolTest {
}
File modelFile = Files.createTempFile("model", ".bin").toFile();
-
try (OutputStream modelOut =
new BufferedOutputStream(new FileOutputStream(modelFile))) {
model.serialize(modelOut);
}
-
return modelFile;
}
diff --git
a/opennlp-tools/src/test/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerToolTest.java
b/opennlp-tools/src/test/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerToolTest.java
index 65993fb0..6059a2b1 100644
---
a/opennlp-tools/src/test/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerToolTest.java
+++
b/opennlp-tools/src/test/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerToolTest.java
@@ -18,33 +18,40 @@
package opennlp.tools.cmdline.tokenizer;
import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
-import java.io.PrintStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
-import org.junit.jupiter.api.Assertions;
-import org.junit.jupiter.api.Disabled;
+import ch.qos.logback.classic.Level;
+import ch.qos.logback.classic.Logger;
+import ch.qos.logback.classic.LoggerContext;
+import nl.altindag.log.LogCaptor;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
+import org.slf4j.LoggerFactory;
import opennlp.tools.AbstractTempDirTest;
+import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.util.InvalidFormatException;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
/**
* Tests for the {@link TokenizerTrainerTool} class.
*/
public class TokenizerTrainerToolTest extends AbstractTempDirTest {
- private TokenizerTrainerTool tokenizerTrainerTool;
-
private final String sampleSuccessData =
"Pierre Vinken<SPLIT>, 61 years old<SPLIT>, will join the board as a
nonexecutive " +
"director Nov. 29<SPLIT>.\n" +
@@ -54,10 +61,31 @@ public class TokenizerTrainerToolTest extends
AbstractTempDirTest {
private final String sampleFailureData = "It is Fail Test Case.\n\nNothing
in this sentence.";
+ /*
+ * Programmatic change to debug log to ensure that we can see log messages to
+ * confirm no duplicate download is happening
+ */
+ @BeforeAll
+ public static void prepare() {
+ LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
+ Logger logger = context.getLogger("opennlp.tools.cmdline.CmdLineUtil");
+ logger.setLevel(Level.INFO);
+ }
+
+ /*
+ * Programmatic restore the default log level (= OFF) after the test
+ */
+ @AfterAll
+ public static void cleanup() {
+ LoggerContext context = (LoggerContext) LoggerFactory.getILoggerFactory();
+ Logger logger = context.getLogger("opennlp.tools.cmdline.CmdLineUtil");
+ logger.setLevel(Level.OFF);
+ }
+
@Test
public void testGetShortDescription() {
- tokenizerTrainerTool = new TokenizerTrainerTool();
- Assertions.assertEquals("Trainer for the learnable tokenizer",
+ TokenizerTrainerTool tokenizerTrainerTool = new TokenizerTrainerTool();
+ assertEquals("Trainer for the learnable tokenizer",
tokenizerTrainerTool.getShortDescription());
}
@@ -65,44 +93,38 @@ public class TokenizerTrainerToolTest extends
AbstractTempDirTest {
public void testLoadDictHappyCase() throws IOException {
File dictFile = new File("lang/ga/abb_GA.xml");
Dictionary dict = TokenizerTrainerTool.loadDict(dictFile);
- Assertions.assertNotNull(dict);
+ assertNotNull(dict);
}
@Test
public void testLoadDictFailCase() {
- Assertions.assertThrows(InvalidFormatException.class , () ->
+ assertThrows(InvalidFormatException.class , () ->
TokenizerTrainerTool.loadDict(prepareDataFile("")));
}
- //TODO OPENNLP-1447
- @Disabled(value = "OPENNLP-1447: These kind of tests won't work anymore. " +
- "We need to find a way to redirect log output (i.e. implement " +
- "a custom log adapter and plug it in, if we want to do such tests.")
+ @Test
public void testTestRunHappyCase() throws IOException {
- File model = tempDir.resolve("model-en.bin").toFile();
-
- String[] args =
- new String[] { "-model" , model.getAbsolutePath() , "-alphaNumOpt" ,
"false" , "-lang" , "en" ,
- "-data" , String.valueOf(prepareDataFile(sampleSuccessData)) ,
"-encoding" , "UTF-8" };
-
- InputStream stream = new
ByteArrayInputStream(sampleSuccessData.getBytes(StandardCharsets.UTF_8));
- System.setIn(stream);
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- PrintStream ps = new PrintStream(baos);
- System.setOut(ps);
-
- tokenizerTrainerTool = new TokenizerTrainerTool();
- tokenizerTrainerTool.run(StreamFactoryRegistry.DEFAULT_FORMAT , args);
-
- final String content = baos.toString(StandardCharsets.UTF_8);
- Assertions.assertTrue(content.contains("Number of Event Tokens: 171"));
- Assertions.assertTrue(model.delete());
+ try (LogCaptor logCaptor = LogCaptor.forClass(CmdLineUtil.class)) {
+ File model = tempDir.resolve("model-en.bin").toFile();
+
+ String[] args =
+ new String[] { "-model" , model.getAbsolutePath() , "-alphaNumOpt" ,
"false" , "-lang" , "en" ,
+ "-data" , String.valueOf(prepareDataFile(sampleSuccessData)) ,
"-encoding" , "UTF-8" };
+
+ InputStream stream = new
ByteArrayInputStream(sampleSuccessData.getBytes(StandardCharsets.UTF_8));
+ System.setIn(stream);
+
+ TokenizerTrainerTool trainerTool = new TokenizerTrainerTool();
+ trainerTool.run(StreamFactoryRegistry.DEFAULT_FORMAT , args);
+
+ assertEquals(3, logCaptor.getInfoLogs().size());
+ final String content = logCaptor.getInfoLogs().get(2);
+ assertTrue(content.startsWith("Wrote tokenizer model to path:"));
+ assertTrue(model.delete());
+ }
}
- //TODO OPENNLP-1447
- @Disabled(value = "OPENNLP-1447: These kind of tests won't work anymore. " +
- "We need to find a way to redirect log output (i.e. implement " +
- "a custom log adapter and plug it in, if we want to do such tests.")
+ @Test
public void testTestRunExceptionCase() throws IOException {
File model = tempDir.resolve("model-en.bin").toFile();
model.deleteOnExit();
@@ -111,17 +133,10 @@ public class TokenizerTrainerToolTest extends
AbstractTempDirTest {
new String[] { "-model" , model.getAbsolutePath() , "-alphaNumOpt" ,
"false" , "-lang" , "en" ,
"-data" , String.valueOf(prepareDataFile(sampleFailureData)) ,
"-encoding" , "UTF-8" };
- InputStream stream = new
ByteArrayInputStream(sampleFailureData.getBytes(StandardCharsets.UTF_8));
- System.setIn(stream);
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- PrintStream ps = new PrintStream(baos);
- System.setOut(ps);
-
- Assertions.assertThrows(TerminateToolException.class , () -> {
- tokenizerTrainerTool = new TokenizerTrainerTool();
- tokenizerTrainerTool.run(StreamFactoryRegistry.DEFAULT_FORMAT , args);
+ assertThrows(TerminateToolException.class , () -> {
+ TokenizerTrainerTool trainerTool = new TokenizerTrainerTool();
+ trainerTool.run(StreamFactoryRegistry.DEFAULT_FORMAT , args);
});
-
}
// This is guaranteed to be deleted after the test finishes.
diff --git a/opennlp-tools/src/test/resources/logback-test.xml
b/opennlp-tools/src/test/resources/logback-test.xml
index b3cbcf20..1baae291 100644
--- a/opennlp-tools/src/test/resources/logback-test.xml
+++ b/opennlp-tools/src/test/resources/logback-test.xml
@@ -23,12 +23,16 @@
<appender name="consoleAppender"
class="ch.qos.logback.core.ConsoleAppender">
<encoder>
- <pattern>%date{HH:mm:ss.SSS} [%thread] %-5level
%class{36}.%method:%line - %msg%n</pattern>
+ <pattern>%date{HH:mm:ss.SSS} [%thread] %-4level
%class{36}.%method:%line - %msg%n</pattern>
</encoder>
</appender>
<logger name="opennlp" level="off"/>
+ <logger name="opennlp.tools.cmdline.namefind" level="off"/>
+
+ <logger name="opennlp.tools.cmdline.CmdLineUtil" level="off"/>
+
<root level="off">
<appender-ref ref="consoleAppender" />
</root>