This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
The following commit(s) were added to refs/heads/main by this push:
new 67cc809 simplifies code of modelbuilder-addon component converts
BaseModelBuilderParams to a Record improves JavaDoc along the path
67cc809 is described below
commit 67cc809dc9eaa47de0427694c2d35888e4854bf8
Author: Martin Wiesner <[email protected]>
AuthorDate: Mon Apr 28 16:56:50 2025 +0200
simplifies code of modelbuilder-addon component
converts BaseModelBuilderParams to a Record
improves JavaDoc along the path
---
.../modelbuilder/DefaultModelBuilderUtil.java | 43 +++++++++-----------
.../addons/modelbuilder/KnownEntityProvider.java | 8 ++--
.../modelbuilder/ModelGenerationValidator.java | 2 +-
.../modelbuilder/impls/BaseModelBuilderParams.java | 46 +++-------------------
.../impls/FileKnownEntityProvider.java | 11 +++++-
.../modelbuilder/impls/FileModelValidatorImpl.java | 25 ++++++++----
.../modelbuilder/impls/FileSentenceProvider.java | 13 +++++-
.../modelbuilder/impls/GenericModelGenerator.java | 7 ++--
.../modelbuilder/impls/GenericModelableImpl.java | 11 +++++-
9 files changed, 80 insertions(+), 86 deletions(-)
diff --git
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java
index f100a96..1165de3 100644
---
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java
+++
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java
@@ -16,6 +16,7 @@
package opennlp.addons.modelbuilder;
import java.io.File;
+import java.util.Collections;
import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;
import opennlp.addons.modelbuilder.impls.FileKnownEntityProvider;
@@ -25,14 +26,16 @@ import
opennlp.addons.modelbuilder.impls.GenericModelGenerator;
import opennlp.addons.modelbuilder.impls.GenericModelableImpl;
/**
- * Utilizes the file-based implementations to produce an NER model from user
+ * Utilizes the file-based implementations to produce an NER model from user.
* The basic processing is such
- * read in the list of known entities
- * annotate the sentences based on the list of known entities
- * create a model from the annotations
- * perform NER with the model on the sentences
- * add the NER results to the annotations
- * rebuild the model loop defined data.
+ * <ol>
+ * <li>read in the list of known entities</li>
+ * <li>annotate the sentences based on the list of known entities</li>
+ * <li>create a model from the annotations</li>
+ * <li>perform NER with the model on the sentences</li>
+ * <li>add the NER results to the annotations, and</li>
+ * <li>rebuild the model loop defined data.</li>
+ * </ol>
*/
public class DefaultModelBuilderUtil {
@@ -64,27 +67,22 @@ public class DefaultModelBuilderUtil {
*/
public static void generateModel(File sentences, File knownEntities, File
knownEntitiesBlacklist,
File modelOutFile, File annotatedSentenceOutFile, String
namedEntityType, int iterations) {
- SemiSupervisedModelGenerator modelGenerator = new GenericModelGenerator();
- BaseModelBuilderParams params = new BaseModelBuilderParams();
- params.setAnnotatedTrainingDataFile(annotatedSentenceOutFile);
- params.setSentenceFile(sentences);
- params.setEntityType(namedEntityType);
- params.setKnownEntitiesFile(knownEntities);
- params.setModelFile(modelOutFile);
- params.setKnownEntityBlacklist(knownEntitiesBlacklist);
+ final SemiSupervisedModelGenerator modelGenerator = new
GenericModelGenerator();
+ final BaseModelBuilderParams params = new
BaseModelBuilderParams(sentences, knownEntities,
+ knownEntitiesBlacklist, modelOutFile, annotatedSentenceOutFile,
namedEntityType,
+ Collections.emptyMap());
+
/*
* sentence providers feed this process with user data derived sentences
* this impl just reads line by line through a file
*/
- SentenceProvider sentenceProvider = new FileSentenceProvider();
- sentenceProvider.setParameters(params);
+ SentenceProvider sentenceProvider = new FileSentenceProvider(params);
/*
* KnownEntityProviders provide a seed list of known entities... such as
* Barack Obama for person, or Germany for location obviously these would
* want to be prolific, non-ambiguous names
*/
- KnownEntityProvider knownEntityProvider = new FileKnownEntityProvider();
- knownEntityProvider.setParameters(params);
+ KnownEntityProvider knownEntityProvider = new
FileKnownEntityProvider(params);
/*
* ModelGenerationValidators try to weed out bad hits by the iterations of
* the name finder. Since this is a recursive process, with each iteration
@@ -94,14 +92,12 @@ public class DefaultModelBuilderUtil {
* etc...users can make this as specific as they need for their dat and
* their use case
*/
- ModelGenerationValidator validator = new FileModelValidatorImpl();
- validator.setParameters(params);
+ ModelGenerationValidator validator = new FileModelValidatorImpl(params);
/*
* Modelable's write and read the annotated sentences, as well as create
and
* write the NER models
*/
- Modelable modelable = new GenericModelableImpl();
- modelable.setParameters(params);
+ Modelable modelable = new GenericModelableImpl(params);
/*
* the modelGenerator actually runs the process with a set number of
@@ -110,6 +106,5 @@ public class DefaultModelBuilderUtil {
* sets this may be too much.
*/
modelGenerator.build(sentenceProvider, knownEntityProvider, validator,
modelable, iterations);
-
}
}
diff --git
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java
index 1362d1b..f1b960a 100644
---
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java
+++
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java
@@ -20,13 +20,12 @@ import
opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;
import java.util.Set;
/**
- * Supplies a list of known entities (a list of names or locations)
+ * Supplies a list of known entities (a list of names or locations).
*/
public interface KnownEntityProvider extends
ModelParameter<BaseModelBuilderParams> {
/**
- * Returns a list of known non-ambiguous entities.
- * @return a set of entities
+ * @return Retrieves a list of known non-ambiguous entities.
*/
Set<String> getKnownEntities();
@@ -37,8 +36,7 @@ public interface KnownEntityProvider extends
ModelParameter<BaseModelBuilderPara
void addKnownEntity(String unambiguousEntity);
/**
- * Defines the type of entity that the set contains, ie person, location,
organization.
- * @return
+ * @return Retrieves the type of entity that the set contains, ie person,
location, organization.
*/
String getKnownEntitiesType();
diff --git
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java
index 6ef8a9e..16c11e0 100644
---
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java
+++
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java
@@ -20,7 +20,7 @@ import
opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;
import java.util.Collection;
/**
- * Validates results from the iterative namefinding
+ * Validates results from the iterative name finding.
*/
public interface ModelGenerationValidator extends
ModelParameter<BaseModelBuilderParams> {
diff --git
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java
index 7212629..54f2a89 100644
---
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java
+++
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java
@@ -19,71 +19,37 @@ import java.io.File;
import java.util.Map;
/**
- * Used to pass params through the processing
+ * Used to pass params through the processing.
*/
-public class BaseModelBuilderParams {
-
- private File modelFile;
- private File sentenceFile;
- private File knownEntitiesFile;
- private File knownEntityBlacklist;
- private File annotatedTrainingDataFile;
- private String entityType;
- private Map<String, String> additionalParams;
+public record BaseModelBuilderParams (File sentenceFile, File
knownEntitiesFile, File knownEntitiesBlacklist,
+ File modelFile, File
annotatedTrainingDataFile, String entityType,
+ Map<String, String> additionalParams) {
public File getModelFile() {
return modelFile;
}
- public void setModelFile(File modelFile) {
- this.modelFile = modelFile;
- }
-
public File getSentenceFile() {
return sentenceFile;
}
- public void setSentenceFile(File sentenceFile) {
- this.sentenceFile = sentenceFile;
- }
-
public File getKnownEntitiesFile() {
return knownEntitiesFile;
}
- public void setKnownEntitiesFile(File knownEntitiesFile) {
- this.knownEntitiesFile = knownEntitiesFile;
- }
-
public File getKnownEntityBlacklist() {
- return knownEntityBlacklist;
- }
-
- public void setKnownEntityBlacklist(File knownEntityBlacklist) {
- this.knownEntityBlacklist = knownEntityBlacklist;
+ return knownEntitiesBlacklist;
}
public Map<String, String> getAdditionalParams() {
return additionalParams;
}
-
- public void setAdditionalParams(Map<String, String> additionalParams) {
- this.additionalParams = additionalParams;
- }
-
+
public String getEntityType() {
return entityType;
}
- public void setEntityType(String entityType) {
- this.entityType = entityType;
- }
-
public File getAnnotatedTrainingDataFile() {
return annotatedTrainingDataFile;
}
-
- public void setAnnotatedTrainingDataFile(File annotatedTrainingDataFile) {
- this.annotatedTrainingDataFile = annotatedTrainingDataFile;
- }
}
\ No newline at end of file
diff --git
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java
index c897532..58f3960 100644
---
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java
+++
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java
@@ -29,8 +29,15 @@ import opennlp.addons.modelbuilder.KnownEntityProvider;
public class FileKnownEntityProvider implements KnownEntityProvider {
- final Set<String> knownEntities = new HashSet<>();
- BaseModelBuilderParams params;
+ private final Set<String> knownEntities = new HashSet<>();
+ private BaseModelBuilderParams params;
+
+ public FileKnownEntityProvider(BaseModelBuilderParams params) {
+ if (params == null) {
+ throw new IllegalArgumentException("BaseModelBuilderParams cannot be
null!");
+ }
+ this.params = params;
+ }
@Override
public Set<String> getKnownEntities() {
diff --git
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java
index 8c0703b..9943d68 100644
---
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java
+++
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java
@@ -30,11 +30,20 @@ import opennlp.addons.modelbuilder.ModelGenerationValidator;
/**
* Validates NER results input before inclusion into the model.
+ *
+ * @see ModelGenerationValidator
*/
public class FileModelValidatorImpl implements ModelGenerationValidator {
- private final Set<String> badentities = new HashSet<>();
- BaseModelBuilderParams params;
+ private final Set<String> badEntities = new HashSet<>();
+ private BaseModelBuilderParams params;
+
+ public FileModelValidatorImpl(BaseModelBuilderParams params) {
+ if (params == null) {
+ throw new IllegalArgumentException("BaseModelBuilderParams cannot be
null!");
+ }
+ this.params = params;
+ }
@Override
public void setParameters(BaseModelBuilderParams params) {
@@ -50,7 +59,7 @@ public class FileModelValidatorImpl implements
ModelGenerationValidator {
@Override
public Boolean validNamedEntity(String namedEntity) {
- if (badentities.isEmpty()) {
+ if (badEntities.isEmpty()) {
getBlackList();
}
//
@@ -59,7 +68,7 @@ public class FileModelValidatorImpl implements
ModelGenerationValidator {
// return false;
// }
boolean b = true;
- if (badentities.contains(namedEntity.toLowerCase())) {
+ if (badEntities.contains(namedEntity.toLowerCase())) {
b = false;
}
return b;
@@ -68,19 +77,19 @@ public class FileModelValidatorImpl implements
ModelGenerationValidator {
@Override
public Collection<String> getBlackList() {
if (params.getKnownEntityBlacklist() == null) {
- return badentities;
+ return badEntities;
}
- if (!badentities.isEmpty()) {
+ if (!badEntities.isEmpty()) {
try (BufferedReader br = new BufferedReader(new InputStreamReader(
new FileInputStream(params.getKnownEntityBlacklist()),
StandardCharsets.UTF_8))) {
String line;
while ((line = br.readLine()) != null) {
- badentities.add(line);
+ badEntities.add(line);
}
} catch (IOException ex) {
Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE,
null, ex);
}
}
- return badentities;
+ return badEntities;
}
}
diff --git
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java
index ec0393f..492b5d9 100644
---
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java
+++
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java
@@ -28,12 +28,21 @@ import java.util.logging.Logger;
import opennlp.addons.modelbuilder.SentenceProvider;
/**
- * Provides user sentences via a simple text file
+ * Provides user sentences via a simple text file.
+ *
+ * @see SentenceProvider
*/
public class FileSentenceProvider implements SentenceProvider {
private final Set<String> sentences = new HashSet<>();
- BaseModelBuilderParams params ;
+ private BaseModelBuilderParams params ;
+
+ public FileSentenceProvider(BaseModelBuilderParams params) {
+ if (params == null) {
+ throw new IllegalArgumentException("BaseModelBuilderParams cannot be
null!");
+ }
+ this.params = params;
+ }
@Override
public Set<String> getSentences() {
diff --git
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java
index aaeaa6f..bd5bd59 100644
---
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java
+++
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java
@@ -15,7 +15,6 @@
*/
package opennlp.addons.modelbuilder.impls;
-import java.util.HashMap;
import java.util.Map;
import opennlp.addons.modelbuilder.KnownEntityProvider;
@@ -27,12 +26,14 @@ import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.util.Span;
/**
+ * Generic {@link SemiSupervisedModelGenerator} implementation that handles all
+ * processing using the default file implementations.
*
- * Generic impl that handles all processing using the default file
implementations
+ * @see SemiSupervisedModelGenerator
*/
public class GenericModelGenerator implements SemiSupervisedModelGenerator {
- private Map<String, String> params = new HashMap<>();
+ private Map<String, String> params;
@Override
public void setParameters(BaseModelBuilderParams params) {
diff --git
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
index 68b371b..660496d 100644
---
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
+++
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
@@ -44,12 +44,21 @@ import opennlp.tools.util.TrainingParameters;
/**
* Creates annotations, writes annotations to file, and creates a model and
writes to a file.
+ *
+ * @see Modelable
*/
public class GenericModelableImpl implements Modelable {
private Set<String> annotatedSentences = new HashSet<>();
- BaseModelBuilderParams params;
+ private BaseModelBuilderParams params;
+ public GenericModelableImpl(BaseModelBuilderParams params) {
+ if (params == null) {
+ throw new IllegalArgumentException("BaseModelBuilderParams cannot be
null!");
+ }
+ this.params = params;
+ }
+
@Override
public void setParameters(BaseModelBuilderParams params) {
this.params = params;