This is an automated email from the ASF dual-hosted git repository. mawiesne pushed a commit to branch migrate-modelbuilder-addon-to-opennlp-tools-2_1_0 in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
commit 7a0368f9eac3a151884be276a8fe852b9e7ee0cf Author: Martin Wiesner <[email protected]> AuthorDate: Fri Jan 20 14:44:52 2023 +0100 updates sandbox component 'modelbuilder-addon' to be compatible with latest opennlp-tools release - adjusts opennlp-tools to 2.1.0 - adjusts parent project (org.apache.apache) to version 18 - adjusts Java language level to 11 - improves resource handling of streams - removes funny pseudo JUnit-test which was effectively doing nothing - removes unused imports --- modelbuilder-addon/pom.xml | 61 +++++++++++++++++----- .../modelbuilder/DefaultModelBuilderUtil.java | 20 ++++--- .../addons/modelbuilder/KnownEntityProvider.java | 35 ++++++------- .../modelbuilder/ModelGenerationValidator.java | 5 +- .../addons/modelbuilder/ModelParameter.java | 4 -- .../opennlp/addons/modelbuilder/Modelable.java | 7 +-- .../modelbuilder/SemiSupervisedModelGenerator.java | 4 -- .../addons/modelbuilder/SentenceProvider.java | 3 -- .../modelbuilder/impls/BaseModelBuilderParams.java | 1 - .../impls/FileKnownEntityProvider.java | 19 +++---- .../modelbuilder/impls/FileModelValidatorImpl.java | 12 ++--- .../modelbuilder/impls/FileSentenceProvider.java | 10 ++-- .../modelbuilder/impls/GenericModelGenerator.java | 3 +- .../modelbuilder/impls/GenericModelableImpl.java | 55 +++++++++---------- .../src/test/java/modelbuilder/AppTest.java | 38 -------------- 15 files changed, 121 insertions(+), 156 deletions(-) diff --git a/modelbuilder-addon/pom.xml b/modelbuilder-addon/pom.xml index 4a9c886..6096303 100644 --- a/modelbuilder-addon/pom.xml +++ b/modelbuilder-addon/pom.xml @@ -1,35 +1,68 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> - <parent> - <groupId>org.apache.opennlp</groupId> - <artifactId>opennlp</artifactId> - <version>1.6.0-SNAPSHOT</version> - <relativePath>../opennlp/pom.xml</relativePath> + <parent> + <groupId>org.apache</groupId> + <artifactId>apache</artifactId> + <!-- TODO OPENNLP-1452 once this is resolved, move to 29 as well. --> + <version>18</version> + <relativePath /> </parent> <artifactId>modelbuilder-addon</artifactId> - <version>1.0-SNAPSHOT</version> + <version>2.1.1-SNAPSHOT</version> <packaging>jar</packaging> - <name>modelbuilder-addon</name> - <url>http://maven.apache.org</url> + <name>Apache OpenNLP ModelBuilder Addon</name> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> </properties> <dependencies> + <dependency> + <groupId>org.apache.opennlp</groupId> + <artifactId>opennlp-tools</artifactId> + <version>2.1.0</version> + </dependency> + <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> - <version>3.8.1</version> + <version>4.13.2</version> <scope>test</scope> </dependency> - <dependency> - <groupId>org.apache.opennlp</groupId> - <artifactId>opennlp-tools</artifactId> - <version>1.6.0-SNAPSHOT</version> - </dependency> </dependencies> + + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-compiler-plugin</artifactId> + <configuration> + <source>11</source> + <target>11</target> + <compilerArgument>-Xlint</compilerArgument> + </configuration> + </plugin> + </plugins> + </build> </project> diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java index 81ff9fd..b52ce16 100644 --- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java +++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java @@ -16,6 +16,7 @@ package opennlp.addons.modelbuilder; import java.io.File; + import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams; import opennlp.addons.modelbuilder.impls.FileKnownEntityProvider; import opennlp.addons.modelbuilder.impls.FileModelValidatorImpl; @@ -24,17 +25,14 @@ import opennlp.addons.modelbuilder.impls.GenericModelGenerator; import opennlp.addons.modelbuilder.impls.GenericModelableImpl; /** - * - * Utilizes the filebased implementations to produce an NER model from user + * Utilizes the file-based implementations to produce an NER model from user * The basic processing is such * read in the list of known entities * annotate the sentences based on the list of known entities * create a model from the annotations * perform NER with the model on the sentences * add the NER results to the annotations - * rebuild the model - * loop - * defined data + * rebuild the model loop defined data. */ public class DefaultModelBuilderUtil { @@ -74,20 +72,20 @@ public class DefaultModelBuilderUtil { params.setKnownEntitiesFile(knownEntities); params.setModelFile(modelOutFile); params.setKnownEntityBlacklist(knownEntitiesBlacklist); - /** + /* * sentence providers feed this process with user data derived sentences * this impl just reads line by line through a file */ SentenceProvider sentenceProvider = new FileSentenceProvider(); sentenceProvider.setParameters(params); - /** + /* * KnownEntityProviders provide a seed list of known entities... such as * Barack Obama for person, or Germany for location obviously these would * want to be prolific, non ambiguous names */ KnownEntityProvider knownEntityProvider = new FileKnownEntityProvider(); knownEntityProvider.setParameters(params); - /** + /* * ModelGenerationValidators try to weed out bad hits by the iterations of * the name finder. Since this is a recursive process, with each iteration * the namefinder will get more and more greedy if bad entities are allowed @@ -98,17 +96,17 @@ public class DefaultModelBuilderUtil { */ ModelGenerationValidator validator = new FileModelValidatorImpl(); validator.setParameters(params); - /** + /* * Modelable's write and read the annotated sentences, as well as create and * write the NER models */ Modelable modelable = new GenericModelableImpl(); modelable.setParameters(params); - /** + /* * the modelGenerator actually runs the process with a set number of * iterations... could be better by actually calculating the diff between - * runs and stopping based on a thresh, but for extrememly large sentence + * runs and stopping based on a thresh, but for extremely large sentence * sets this may be too much. */ modelGenerator.build(sentenceProvider, knownEntityProvider, validator, modelable, iterations); diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java index 694250e..fa2a00e 100644 --- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java +++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java @@ -17,29 +17,26 @@ package opennlp.addons.modelbuilder; import java.util.Set; - - /** - * -Supplies a list of known entities (a list of names or locations) + * Supplies a list of known entities (a list of names or locations) */ -public interface KnownEntityProvider extends ModelParameter{ +public interface KnownEntityProvider extends ModelParameter { /** - * returns a list of known non ambiguous entities. - * @return a set of entities - */ + * returns a list of known non ambiguous entities. + * @return a set of entities + */ Set<String> getKnownEntities(); -/** - * adds to the set of known entities. Overriding classes should hold this list in a class level set. - * @param unambiguousEntity - */ + + /** + * adds to the set of known entities. Overriding classes should hold this list in a class level set. + * @param unambiguousEntity + */ void addKnownEntity(String unambiguousEntity); -/** - * defines the type of entity that the set contains, ie person, location, organization. - * @return - */ + + /** + * defines the type of entity that the set contains, ie person, location, organization. + * @return + */ String getKnownEntitiesType(); - - - + } diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java index 4bd5fe2..e8e8f7e 100644 --- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java +++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java @@ -18,16 +18,13 @@ package opennlp.addons.modelbuilder; import java.util.Collection; /** - * -Validates results from the iterative namefinding + * Validates results from the iterative namefinding */ public interface ModelGenerationValidator extends ModelParameter { Boolean validSentence(String sentence); Boolean validNamedEntity(String namedEntity); - - Collection<String> getBlackList(); } diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelParameter.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelParameter.java index 136e775..e2e8649 100644 --- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelParameter.java +++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelParameter.java @@ -17,12 +17,8 @@ package opennlp.addons.modelbuilder; import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams; -/** - * - */ public interface ModelParameter<T extends BaseModelBuilderParams>{ void setParameters(T params); - } diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/Modelable.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/Modelable.java index 80b0170..7c8f6a4 100644 --- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/Modelable.java +++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/Modelable.java @@ -16,14 +16,12 @@ package opennlp.addons.modelbuilder; import java.util.Set; -import opennlp.tools.namefind.TokenNameFinderModel; +import opennlp.tools.namefind.TokenNameFinderModel; /** * */ -public interface Modelable extends ModelParameter{ - - +public interface Modelable extends ModelParameter { String annotate(String sentence, String namedEntity, String entityType); @@ -40,6 +38,5 @@ public interface Modelable extends ModelParameter{ TokenNameFinderModel getModel(); String[] tokenizeSentenceToWords(String sentence); - } diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SemiSupervisedModelGenerator.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SemiSupervisedModelGenerator.java index c97a4c1..22807c9 100644 --- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SemiSupervisedModelGenerator.java +++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SemiSupervisedModelGenerator.java @@ -17,10 +17,6 @@ package opennlp.addons.modelbuilder; import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams; -/** - * - - */ public interface SemiSupervisedModelGenerator extends ModelParameter<BaseModelBuilderParams> { void build(SentenceProvider sentenceProvider, KnownEntityProvider knownEntityProvider, diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SentenceProvider.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SentenceProvider.java index 5610224..1c655ad 100644 --- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SentenceProvider.java +++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SentenceProvider.java @@ -18,9 +18,6 @@ package opennlp.addons.modelbuilder; import java.util.Set; import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams; -/** - * - */ public interface SentenceProvider extends ModelParameter<BaseModelBuilderParams> { Set<String> getSentences(); diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java index fcb2384..6173acc 100644 --- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java +++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java @@ -19,7 +19,6 @@ import java.io.File; import java.util.Map; /** - * * Used to pass params through the processing */ public class BaseModelBuilderParams { diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java index 0de043c..841f6db 100644 --- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java +++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java @@ -17,24 +17,22 @@ package opennlp.addons.modelbuilder.impls; import java.io.BufferedReader; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.HashSet; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; + import opennlp.addons.modelbuilder.KnownEntityProvider; -/** - * - */ public class FileKnownEntityProvider implements KnownEntityProvider { - Set<String> knownEntities = new HashSet<String>(); + final Set<String> knownEntities = new HashSet<>(); BaseModelBuilderParams params; + @Override public Set<String> getKnownEntities() { if (knownEntities.isEmpty()) { @@ -44,7 +42,7 @@ public class FileKnownEntityProvider implements KnownEntityProvider { String line; fis = new FileInputStream(params.getKnownEntitiesFile()); - br = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8"))); + br = new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8)); while ((line = br.readLine()) != null) { knownEntities.add(line); } @@ -53,8 +51,6 @@ public class FileKnownEntityProvider implements KnownEntityProvider { br.close(); br = null; fis = null; - } catch (FileNotFoundException ex) { - Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex); } @@ -69,14 +65,11 @@ public class FileKnownEntityProvider implements KnownEntityProvider { @Override public String getKnownEntitiesType() { - return params.getEntityType(); } - - @Override - public void setParameters(BaseModelBuilderParams params) { + public void setParameters(BaseModelBuilderParams params) { this.params = params; } } diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java index ea4bb05..8bc4954 100644 --- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java +++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java @@ -17,16 +17,16 @@ package opennlp.addons.modelbuilder.impls; import java.io.BufferedReader; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Collection; import java.util.HashSet; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; + import opennlp.addons.modelbuilder.ModelGenerationValidator; /** @@ -34,7 +34,7 @@ import opennlp.addons.modelbuilder.ModelGenerationValidator; */ public class FileModelValidatorImpl implements ModelGenerationValidator { - private Set<String> badentities = new HashSet<String>(); + private final Set<String> badentities = new HashSet<>(); BaseModelBuilderParams params; @Override @@ -59,7 +59,7 @@ public class FileModelValidatorImpl implements ModelGenerationValidator { // if (p.matcher(namedEntity).find()) { // return false; // } - Boolean b = true; + boolean b = true; if (badentities.contains(namedEntity.toLowerCase())) { b = false; } @@ -78,15 +78,13 @@ public class FileModelValidatorImpl implements ModelGenerationValidator { String line; fis = new FileInputStream(params.getKnownEntityBlacklist()); - br = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8"))); + br = new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8)); while ((line = br.readLine()) != null) { badentities.add(line); } br.close(); br = null; fis = null; - } catch (FileNotFoundException ex) { - Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex); } diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java index bea55f5..bf6fe6f 100644 --- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java +++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java @@ -17,7 +17,6 @@ package opennlp.addons.modelbuilder.impls; import java.io.BufferedReader; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -26,6 +25,7 @@ import java.util.HashSet; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; + import opennlp.addons.modelbuilder.SentenceProvider; /** @@ -33,9 +33,10 @@ import opennlp.addons.modelbuilder.SentenceProvider; */ public class FileSentenceProvider implements SentenceProvider { + private final Set<String> sentences = new HashSet<>(); BaseModelBuilderParams params ; - Set<String> sentences = new HashSet<String>(); + @Override public Set<String> getSentences() { if (sentences.isEmpty()) { try { @@ -55,8 +56,6 @@ public class FileSentenceProvider implements SentenceProvider { br.close(); br = null; fis = null; - } catch (FileNotFoundException ex) { - Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex); } @@ -64,7 +63,8 @@ public class FileSentenceProvider implements SentenceProvider { return sentences; } - public void setParameters(BaseModelBuilderParams params) { + @Override + public void setParameters(BaseModelBuilderParams params) { this.params = params; } } diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java index bbd23e1..8b11dac 100644 --- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java +++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java @@ -17,6 +17,7 @@ package opennlp.addons.modelbuilder.impls; import java.util.HashMap; import java.util.Map; + import opennlp.addons.modelbuilder.KnownEntityProvider; import opennlp.addons.modelbuilder.ModelGenerationValidator; import opennlp.addons.modelbuilder.Modelable; @@ -31,7 +32,7 @@ import opennlp.tools.util.Span; */ public class GenericModelGenerator implements SemiSupervisedModelGenerator { - private Map<String, String> params = new HashMap<String, String>(); + private Map<String, String> params = new HashMap<>(); @Override public void setParameters(BaseModelBuilderParams params) { diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java index 572e84b..caa6ea8 100644 --- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java +++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java @@ -16,30 +16,35 @@ package opennlp.addons.modelbuilder.impls; import java.io.BufferedOutputStream; -import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.OutputStream; -import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.HashSet; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; + import opennlp.addons.modelbuilder.Modelable; import opennlp.tools.namefind.NameFinderME; import opennlp.tools.namefind.NameSample; import opennlp.tools.namefind.NameSampleDataStream; +import opennlp.tools.namefind.TokenNameFinderFactory; import opennlp.tools.namefind.TokenNameFinderModel; +import opennlp.tools.util.InputStreamFactory; +import opennlp.tools.util.MarkableFileInputStreamFactory; import opennlp.tools.util.ObjectStream; import opennlp.tools.util.PlainTextByLineStream; +import opennlp.tools.util.TrainingParameters; /** - * Creates annotations, writes annotations to file, and creates a model and writes to a file + * Creates annotations, writes annotations to file, and creates a model and writes to a file. */ public class GenericModelableImpl implements Modelable { - private Set<String> annotatedSentences = new HashSet<String>(); + private Set<String> annotatedSentences = new HashSet<>(); BaseModelBuilderParams params; @Override @@ -49,20 +54,15 @@ public class GenericModelableImpl implements Modelable { @Override public String annotate(String sentence, String namedEntity, String entityType) { - String annotation = sentence.replace(namedEntity, " <START:" + entityType + "> " + namedEntity + " <END> "); - return annotation; + return sentence.replace(namedEntity, " <START:" + entityType + "> " + namedEntity + " <END> "); } @Override public void writeAnnotatedSentences() { - try { - - FileWriter writer = new FileWriter(params.getAnnotatedTrainingDataFile(), false); - + try (FileWriter writer = new FileWriter(params.getAnnotatedTrainingDataFile(), false)) { for (String s : annotatedSentences) { writer.write(s.replace("\n", " ").trim() + "\n"); } - writer.close(); } catch (IOException ex) { ex.printStackTrace(); } @@ -85,34 +85,36 @@ public class GenericModelableImpl implements Modelable { @Override public void buildModel(String entityType) { + final InputStreamFactory factory; try { + factory = new MarkableFileInputStreamFactory(params.getAnnotatedTrainingDataFile()); + } catch (FileNotFoundException e) { + throw new RuntimeException("Error finding and reading the training data file!", e); + } + + final TrainingParameters trainParams = TrainingParameters.defaultParams(); + + TokenNameFinderModel model; + try (ObjectStream<NameSample> samples = + new NameSampleDataStream(new PlainTextByLineStream(factory, StandardCharsets.UTF_8)); + OutputStream modelOut = new BufferedOutputStream(new FileOutputStream(params.getModelFile()))) { + System.out.println("\tBuilding Model using " + annotatedSentences.size() + " annotations"); System.out.println("\t\treading training data..."); - Charset charset = Charset.forName("UTF-8"); - ObjectStream<String> lineStream = - new PlainTextByLineStream(new FileInputStream(params.getAnnotatedTrainingDataFile()), charset); - ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream); - - TokenNameFinderModel model; - model = NameFinderME.train("en", entityType, sampleStream, null); - sampleStream.close(); - OutputStream modelOut = new BufferedOutputStream(new FileOutputStream(params.getModelFile())); + model = NameFinderME.train("en", entityType, samples, trainParams, new TokenNameFinderFactory()); model.serialize(modelOut); - if (modelOut != null) { - modelOut.close(); - } + System.out.println("\tmodel generated"); } catch (Exception e) { + throw new RuntimeException("Error building model! " + e.getLocalizedMessage(), e); } } @Override public TokenNameFinderModel getModel() { - - TokenNameFinderModel nerModel = null; try { - nerModel = new TokenNameFinderModel(new FileInputStream(params.getModelFile())); + nerModel = new TokenNameFinderModel(params.getModelFile()); } catch (IOException ex) { Logger.getLogger(GenericModelableImpl.class.getName()).log(Level.SEVERE, null, ex); } @@ -122,6 +124,5 @@ public class GenericModelableImpl implements Modelable { @Override public String[] tokenizeSentenceToWords(String sentence) { return sentence.split(" "); - } } diff --git a/modelbuilder-addon/src/test/java/modelbuilder/AppTest.java b/modelbuilder-addon/src/test/java/modelbuilder/AppTest.java deleted file mode 100644 index 2b04731..0000000 --- a/modelbuilder-addon/src/test/java/modelbuilder/AppTest.java +++ /dev/null @@ -1,38 +0,0 @@ -package modelbuilder; - -import junit.framework.Test; -import junit.framework.TestCase; -import junit.framework.TestSuite; - -/** - * Unit test for simple App. - */ -public class AppTest - extends TestCase -{ - /** - * Create the test case - * - * @param testName name of the test case - */ - public AppTest( String testName ) - { - super( testName ); - } - - /** - * @return the suite of tests being tested - */ - public static Test suite() - { - return new TestSuite( AppTest.class ); - } - - /** - * Rigourous Test :-) - */ - public void testApp() - { - assertTrue( true ); - } -}
