Repository: opennlp Updated Branches: refs/heads/master c74899d1a -> 643408187
OPENNLP-937: Print error and hint for insufficient training data Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/64340818 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/64340818 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/64340818 Branch: refs/heads/master Commit: 6434081874fdda9b8bbbd9c2b76818116f33f9b9 Parents: c74899d Author: Jörn Kottmann <[email protected]> Authored: Sun Jan 15 15:03:07 2017 +0100 Committer: Jörn Kottmann <[email protected]> Committed: Sun Jan 15 21:02:54 2017 +0100 ---------------------------------------------------------------------- .../tools/cmdline/AbstractTrainerTool.java | 28 ++++++++++---------- .../chunker/ChunkerCrossValidatorTool.java | 4 +-- .../cmdline/chunker/ChunkerTrainerTool.java | 4 +-- .../doccat/DoccatCrossValidatorTool.java | 4 +-- .../tools/cmdline/doccat/DoccatTrainerTool.java | 4 +-- .../lemmatizer/LemmatizerTrainerTool.java | 6 +---- .../TokenNameFinderCrossValidatorTool.java | 3 +-- .../namefind/TokenNameFinderTrainerTool.java | 7 +++-- .../tools/cmdline/parser/ParserTrainerTool.java | 3 +-- .../postag/POSTaggerCrossValidatorTool.java | 4 +-- .../cmdline/postag/POSTaggerTrainerTool.java | 3 +-- .../SentenceDetectorCrossValidatorTool.java | 4 +-- .../sentdetect/SentenceDetectorTrainerTool.java | 3 +-- .../tokenizer/TokenizerCrossValidatorTool.java | 4 +-- .../cmdline/tokenizer/TokenizerTrainerTool.java | 5 ++-- 15 files changed, 31 insertions(+), 55 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/64340818/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractTrainerTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractTrainerTool.java index cb94346..f87aa45 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/AbstractTrainerTool.java @@ -17,18 +17,17 @@ package opennlp.tools.cmdline; -import opennlp.tools.util.ObjectStream; +import java.io.IOException; + +import opennlp.tools.util.InsufficientTrainingDataException; import opennlp.tools.util.TrainingParameters; /** * Base class for trainer tools. */ -public class AbstractTrainerTool<T, P> extends AbstractTypedParamTool<T, P> { +public class AbstractTrainerTool<T, P> extends AbstractEvaluatorTool<T, P> { - protected P params; protected TrainingParameters mlParams; - protected ObjectStreamFactory<T> factory; - protected ObjectStream<T> sampleStream; /** * Constructor with type parameters. @@ -40,15 +39,16 @@ public class AbstractTrainerTool<T, P> extends AbstractTypedParamTool<T, P> { super(sampleType, params); } - public void run(String format, String[] args) { - validateAllArgs(args, this.paramsClass, format); + protected TerminateToolException createTerminationIOException(IOException e) { - params = ArgumentParser.parse( - ArgumentParser.filter(args, this.paramsClass), this.paramsClass); + if (e instanceof InsufficientTrainingDataException) { + return new TerminateToolException(-1, "\n\nERROR: Not enough training data\n" + + "The provided training data is not sufficient to create enough events to train a model.\n" + + "To resolve this error use more training data, if this doesn't help there might\n" + + "be some fundamental problem with the training data itself."); + } - factory = getStreamFactory(format); - String[] fargs = ArgumentParser.filter(args, factory.getParameters()); - validateFactoryArgs(factory, fargs); - sampleStream = factory.create(fargs); + return new TerminateToolException(-1, "IO error while reading training data or indexing data: " + + e.getMessage(), e); } -} \ No newline at end of file +} http://git-wip-us.apache.org/repos/asf/opennlp/blob/64340818/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java index 2724d88..4fe6a10 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerCrossValidatorTool.java @@ -27,7 +27,6 @@ import opennlp.tools.chunker.ChunkerEvaluationMonitor; import opennlp.tools.chunker.ChunkerFactory; import opennlp.tools.cmdline.AbstractCrossValidatorTool; import opennlp.tools.cmdline.CmdLineUtil; -import opennlp.tools.cmdline.TerminateToolException; import opennlp.tools.cmdline.chunker.ChunkerCrossValidatorTool.CVToolParams; import opennlp.tools.cmdline.params.CVParams; import opennlp.tools.cmdline.params.DetailedFMeasureEvaluatorParams; @@ -79,8 +78,7 @@ public final class ChunkerCrossValidatorTool validator.evaluate(sampleStream, params.getFolds()); } catch (IOException e) { - throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + - e.getMessage(), e); + throw createTerminationIOException(e); } finally { try { http://git-wip-us.apache.org/repos/asf/opennlp/blob/64340818/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java index ceca0f6..1e69a0a 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerTrainerTool.java @@ -26,7 +26,6 @@ import opennlp.tools.chunker.ChunkerME; import opennlp.tools.chunker.ChunkerModel; import opennlp.tools.cmdline.AbstractTrainerTool; import opennlp.tools.cmdline.CmdLineUtil; -import opennlp.tools.cmdline.TerminateToolException; import opennlp.tools.cmdline.chunker.ChunkerTrainerTool.TrainerToolParams; import opennlp.tools.cmdline.params.TrainingToolParams; import opennlp.tools.util.model.ModelUtil; @@ -67,8 +66,7 @@ public class ChunkerTrainerTool model = ChunkerME.train(params.getLang(), sampleStream, mlParams, chunkerFactory); } catch (IOException e) { - throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + - e.getMessage(), e); + throw createTerminationIOException(e); } finally { try { http://git-wip-us.apache.org/repos/asf/opennlp/blob/64340818/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java index ecc3c56..867e8e1 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java @@ -81,9 +81,7 @@ public final class DoccatCrossValidatorTool extends reportListener = new DoccatFineGrainedReportListener(reportOutputStream); listeners.add(reportListener); } catch (FileNotFoundException e) { - throw new TerminateToolException(-1, - "IO error while creating Doccat fine-grained report file: " - + e.getMessage()); + throw createTerminationIOException(e); } } http://git-wip-us.apache.org/repos/asf/opennlp/blob/64340818/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java index abb885c..6ef5d88 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java @@ -22,7 +22,6 @@ import java.io.IOException; import opennlp.tools.cmdline.AbstractTrainerTool; import opennlp.tools.cmdline.CmdLineUtil; -import opennlp.tools.cmdline.TerminateToolException; import opennlp.tools.cmdline.doccat.DoccatTrainerTool.TrainerToolParams; import opennlp.tools.cmdline.params.TrainingToolParams; import opennlp.tools.doccat.BagOfWordsFeatureGenerator; @@ -76,8 +75,7 @@ public class DoccatTrainerTool model = DocumentCategorizerME.train(params.getLang(), sampleStream, mlParams, factory); } catch (IOException e) { - throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + - e.getMessage(), e); + throw createTerminationIOException(e); } finally { try { http://git-wip-us.apache.org/repos/asf/opennlp/blob/64340818/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerTrainerTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerTrainerTool.java index ddefa09..d7cea80 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/lemmatizer/LemmatizerTrainerTool.java @@ -22,7 +22,6 @@ import java.io.IOException; import opennlp.tools.cmdline.AbstractTrainerTool; import opennlp.tools.cmdline.CmdLineUtil; -import opennlp.tools.cmdline.TerminateToolException; import opennlp.tools.cmdline.lemmatizer.LemmatizerTrainerTool.TrainerToolParams; import opennlp.tools.cmdline.params.TrainingToolParams; import opennlp.tools.lemmatizer.LemmaSample; @@ -67,10 +66,7 @@ public class LemmatizerTrainerTool model = LemmatizerME.train(params.getLang(), sampleStream, mlParams, lemmatizerFactory); } catch (IOException e) { - throw new TerminateToolException(-1, - "IO error while reading training data or indexing data: " - + e.getMessage(), - e); + throw createTerminationIOException(e); } finally { try { sampleStream.close(); http://git-wip-us.apache.org/repos/asf/opennlp/blob/64340818/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java index fab9e15..d347ef8 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java @@ -110,8 +110,7 @@ public final class TokenNameFinderCrossValidatorTool listeners.toArray(new TokenNameFinderEvaluationMonitor[listeners.size()])); validator.evaluate(sampleStream, params.getFolds()); } catch (IOException e) { - throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " - + e.getMessage(), e); + throw createTerminationIOException(e); } finally { try { sampleStream.close(); http://git-wip-us.apache.org/repos/asf/opennlp/blob/64340818/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java index 8a4bd49..a8d4417 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java @@ -25,6 +25,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import org.w3c.dom.Element; + import opennlp.tools.cmdline.AbstractTrainerTool; import opennlp.tools.cmdline.CmdLineUtil; import opennlp.tools.cmdline.TerminateToolException; @@ -42,8 +44,6 @@ import opennlp.tools.util.featuregen.GeneratorFactory; import opennlp.tools.util.model.ArtifactSerializer; import opennlp.tools.util.model.ModelUtil; -import org.w3c.dom.Element; - public final class TokenNameFinderTrainerTool extends AbstractTrainerTool<NameSample, TrainerToolParams> { @@ -217,8 +217,7 @@ public final class TokenNameFinderTrainerTool nameFinderFactory); } catch (IOException e) { - throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " - + e.getMessage(), e); + throw createTerminationIOException(e); } finally { try { http://git-wip-us.apache.org/repos/asf/opennlp/blob/64340818/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java index 8779035..60a4664 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java @@ -171,8 +171,7 @@ public final class ParserTrainerTool extends AbstractTrainerTool<Parse, TrainerT } } catch (IOException e) { - throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " - + e.getMessage(), e); + throw createTerminationIOException(e); } finally { try { http://git-wip-us.apache.org/repos/asf/opennlp/blob/64340818/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java index fdfb83a..6d5d7ef 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerCrossValidatorTool.java @@ -76,9 +76,7 @@ public final class POSTaggerCrossValidatorTool reportListener = new POSTaggerFineGrainedReportListener( reportOutputStream); } catch (FileNotFoundException e) { - throw new TerminateToolException(-1, - "IO error while creating POS Tagger fine-grained report file: " - + e.getMessage()); + throw createTerminationIOException(e); } } http://git-wip-us.apache.org/repos/asf/opennlp/blob/64340818/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java index d19ea5e..3c7b618 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTrainerTool.java @@ -130,8 +130,7 @@ public final class POSTaggerTrainerTool sampleStream, mlParams, postaggerFactory); } catch (IOException e) { - throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " - + e.getMessage(), e); + throw createTerminationIOException(e); } finally { try { http://git-wip-us.apache.org/repos/asf/opennlp/blob/64340818/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java index 79d5645..55d1df6 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java @@ -21,7 +21,6 @@ import java.io.IOException; import opennlp.tools.cmdline.AbstractCrossValidatorTool; import opennlp.tools.cmdline.CmdLineUtil; -import opennlp.tools.cmdline.TerminateToolException; import opennlp.tools.cmdline.params.CVParams; import opennlp.tools.cmdline.sentdetect.SentenceDetectorCrossValidatorTool.CVToolParams; import opennlp.tools.dictionary.Dictionary; @@ -78,8 +77,7 @@ public final class SentenceDetectorCrossValidatorTool validator.evaluate(sampleStream, params.getFolds()); } catch (IOException e) { - throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " + - e.getMessage(), e); + throw createTerminationIOException(e); } finally { try { http://git-wip-us.apache.org/repos/asf/opennlp/blob/64340818/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java index 9e493b8..85bb06f 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTrainerTool.java @@ -93,8 +93,7 @@ public final class SentenceDetectorTrainerTool model = SentenceDetectorME.train(params.getLang(), sampleStream, sdFactory, mlParams); } catch (IOException e) { - throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " - + e.getMessage(), e); + throw createTerminationIOException(e); } finally { try { http://git-wip-us.apache.org/repos/asf/opennlp/blob/64340818/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java index f6e3864..c207d3d 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerCrossValidatorTool.java @@ -21,7 +21,6 @@ import java.io.IOException; import opennlp.tools.cmdline.AbstractCrossValidatorTool; import opennlp.tools.cmdline.CmdLineUtil; -import opennlp.tools.cmdline.TerminateToolException; import opennlp.tools.cmdline.params.CVParams; import opennlp.tools.cmdline.tokenizer.TokenizerCrossValidatorTool.CVToolParams; import opennlp.tools.dictionary.Dictionary; @@ -73,8 +72,7 @@ public final class TokenizerCrossValidatorTool validator.evaluate(sampleStream, params.getFolds()); } catch (IOException e) { - throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " - + e.getMessage(), e); + throw createTerminationIOException(e); } finally { try { http://git-wip-us.apache.org/repos/asf/opennlp/blob/64340818/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java index ba619c3..bb722d0 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/TokenizerTrainerTool.java @@ -91,8 +91,7 @@ public final class TokenizerTrainerTool tokFactory, mlParams); } catch (IOException e) { - throw new TerminateToolException(-1, "IO error while reading training data or indexing data: " - + e.getMessage(), e); + throw createTerminationIOException(e); } finally { try { @@ -104,4 +103,4 @@ public final class TokenizerTrainerTool CmdLineUtil.writeModel("tokenizer", modelOutFile, model); } -} \ No newline at end of file +}
