This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git


The following commit(s) were added to refs/heads/main by this push:
     new 67cc809  simplifies code of modelbuilder-addon component converts 
BaseModelBuilderParams to a Record improves JavaDoc along the path
67cc809 is described below

commit 67cc809dc9eaa47de0427694c2d35888e4854bf8
Author: Martin Wiesner <[email protected]>
AuthorDate: Mon Apr 28 16:56:50 2025 +0200

    simplifies code of modelbuilder-addon component
    converts BaseModelBuilderParams to a Record
    improves JavaDoc along the path
---
 .../modelbuilder/DefaultModelBuilderUtil.java      | 43 +++++++++-----------
 .../addons/modelbuilder/KnownEntityProvider.java   |  8 ++--
 .../modelbuilder/ModelGenerationValidator.java     |  2 +-
 .../modelbuilder/impls/BaseModelBuilderParams.java | 46 +++-------------------
 .../impls/FileKnownEntityProvider.java             | 11 +++++-
 .../modelbuilder/impls/FileModelValidatorImpl.java | 25 ++++++++----
 .../modelbuilder/impls/FileSentenceProvider.java   | 13 +++++-
 .../modelbuilder/impls/GenericModelGenerator.java  |  7 ++--
 .../modelbuilder/impls/GenericModelableImpl.java   | 11 +++++-
 9 files changed, 80 insertions(+), 86 deletions(-)

diff --git 
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java
 
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java
index f100a96..1165de3 100644
--- 
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java
+++ 
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java
@@ -16,6 +16,7 @@
 package opennlp.addons.modelbuilder;
 
 import java.io.File;
+import java.util.Collections;
 
 import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;
 import opennlp.addons.modelbuilder.impls.FileKnownEntityProvider;
@@ -25,14 +26,16 @@ import 
opennlp.addons.modelbuilder.impls.GenericModelGenerator;
 import opennlp.addons.modelbuilder.impls.GenericModelableImpl;
 
 /**
- * Utilizes the file-based implementations to produce an NER model from user
+ * Utilizes the file-based implementations to produce an NER model from user.
  * The basic processing is such
- * read in the list of known entities
- * annotate the sentences based on the list of known entities
- * create a model from the annotations
- * perform NER with the model on the sentences
- * add the NER results to the annotations
- * rebuild the model loop defined data.
+ * <ol>
+ * <li>read in the list of known entities</li>
+ * <li>annotate the sentences based on the list of known entities</li>
+ * <li>create a model from the annotations</li>
+ * <li>perform NER with the model on the sentences</li>
+ * <li>add the NER results to the annotations, and</li>
+ * <li>rebuild the model loop defined data.</li>
+ * </ol>
  */
 public class DefaultModelBuilderUtil {
 
@@ -64,27 +67,22 @@ public class DefaultModelBuilderUtil {
    */
   public static void generateModel(File sentences, File knownEntities, File 
knownEntitiesBlacklist,
           File modelOutFile, File annotatedSentenceOutFile, String 
namedEntityType, int iterations) {
-    SemiSupervisedModelGenerator modelGenerator = new GenericModelGenerator();
-    BaseModelBuilderParams params = new BaseModelBuilderParams();
-    params.setAnnotatedTrainingDataFile(annotatedSentenceOutFile);
-    params.setSentenceFile(sentences);
-    params.setEntityType(namedEntityType);
-    params.setKnownEntitiesFile(knownEntities);
-    params.setModelFile(modelOutFile);
-    params.setKnownEntityBlacklist(knownEntitiesBlacklist);
+    final SemiSupervisedModelGenerator modelGenerator = new 
GenericModelGenerator();
+    final BaseModelBuilderParams params = new 
BaseModelBuilderParams(sentences, knownEntities,
+            knownEntitiesBlacklist, modelOutFile, annotatedSentenceOutFile, 
namedEntityType,
+            Collections.emptyMap());
+
     /*
      * sentence providers feed this process with user data derived sentences
      * this impl just reads line by line through a file
      */
-    SentenceProvider sentenceProvider = new FileSentenceProvider();
-    sentenceProvider.setParameters(params);
+    SentenceProvider sentenceProvider = new FileSentenceProvider(params);
     /*
      * KnownEntityProviders provide a seed list of known entities... such as
      * Barack Obama for person, or Germany for location obviously these would
      * want to be prolific, non-ambiguous names
      */
-    KnownEntityProvider knownEntityProvider = new FileKnownEntityProvider();
-    knownEntityProvider.setParameters(params);
+    KnownEntityProvider knownEntityProvider = new 
FileKnownEntityProvider(params);
     /*
      * ModelGenerationValidators try to weed out bad hits by the iterations of
      * the name finder. Since this is a recursive process, with each iteration
@@ -94,14 +92,12 @@ public class DefaultModelBuilderUtil {
      * etc...users can make this as specific as they need for their dat and
      * their use case
      */
-    ModelGenerationValidator validator = new FileModelValidatorImpl();
-    validator.setParameters(params);
+    ModelGenerationValidator validator = new FileModelValidatorImpl(params);
     /*
      * Modelable's write and read the annotated sentences, as well as create 
and
      * write the NER models
      */
-    Modelable modelable = new GenericModelableImpl();
-    modelable.setParameters(params);
+    Modelable modelable = new GenericModelableImpl(params);
 
     /*
      * the modelGenerator actually runs the process with a set number of
@@ -110,6 +106,5 @@ public class DefaultModelBuilderUtil {
      * sets this may be too much.
      */
     modelGenerator.build(sentenceProvider, knownEntityProvider, validator, 
modelable, iterations);
-
   }
 }
diff --git 
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java
 
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java
index 1362d1b..f1b960a 100644
--- 
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java
+++ 
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java
@@ -20,13 +20,12 @@ import 
opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;
 import java.util.Set;
 
 /**
- * Supplies a list of known entities (a list of names or locations)
+ * Supplies a list of known entities (a list of names or locations).
  */
 public interface KnownEntityProvider extends 
ModelParameter<BaseModelBuilderParams> {
 
   /**
-   * Returns a list of known non-ambiguous entities.
-   * @return a set of entities
+   * @return Retrieves a list of known non-ambiguous entities.
    */
   Set<String> getKnownEntities();
 
@@ -37,8 +36,7 @@ public interface KnownEntityProvider extends 
ModelParameter<BaseModelBuilderPara
   void addKnownEntity(String unambiguousEntity);
 
   /**
-   * Defines the type of entity that the set contains, ie person, location, 
organization.
-   * @return
+   * @return Retrieves the type of entity that the set contains, ie person, 
location, organization.
    */
   String getKnownEntitiesType();
 
diff --git 
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java
 
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java
index 6ef8a9e..16c11e0 100644
--- 
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java
+++ 
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java
@@ -20,7 +20,7 @@ import 
opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;
 import java.util.Collection;
 
 /**
- * Validates results from the iterative namefinding
+ * Validates results from the iterative name finding.
  */
 public interface ModelGenerationValidator extends 
ModelParameter<BaseModelBuilderParams> {
 
diff --git 
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java
 
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java
index 7212629..54f2a89 100644
--- 
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java
+++ 
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java
@@ -19,71 +19,37 @@ import java.io.File;
 import java.util.Map;
 
 /**
- * Used to pass params through the processing
+ * Used to pass params through the processing.
  */
-public class BaseModelBuilderParams {
-
-  private File modelFile;
-  private File sentenceFile;
-  private File knownEntitiesFile;
-  private File knownEntityBlacklist;
-  private File annotatedTrainingDataFile;
-  private String entityType;
-  private Map<String, String> additionalParams;
+public record BaseModelBuilderParams (File sentenceFile, File 
knownEntitiesFile, File knownEntitiesBlacklist,
+                                      File modelFile, File 
annotatedTrainingDataFile, String entityType,
+                                      Map<String, String> additionalParams) {
 
   public File getModelFile() {
     return modelFile;
   }
 
-  public void setModelFile(File modelFile) {
-    this.modelFile = modelFile;
-  }
-
   public File getSentenceFile() {
     return sentenceFile;
   }
 
-  public void setSentenceFile(File sentenceFile) {
-    this.sentenceFile = sentenceFile;
-  }
-
   public File getKnownEntitiesFile() {
     return knownEntitiesFile;
   }
 
-  public void setKnownEntitiesFile(File knownEntitiesFile) {
-    this.knownEntitiesFile = knownEntitiesFile;
-  }
-
   public File getKnownEntityBlacklist() {
-    return knownEntityBlacklist;
-  }
-
-  public void setKnownEntityBlacklist(File knownEntityBlacklist) {
-    this.knownEntityBlacklist = knownEntityBlacklist;
+    return knownEntitiesBlacklist;
   }
 
   public Map<String, String> getAdditionalParams() {
     return additionalParams;
   }
-
-  public void setAdditionalParams(Map<String, String> additionalParams) {
-    this.additionalParams = additionalParams;
-  }
-
+  
   public String getEntityType() {
     return entityType;
   }
 
-  public void setEntityType(String entityType) {
-    this.entityType = entityType;
-  }
-
   public File getAnnotatedTrainingDataFile() {
     return annotatedTrainingDataFile;
   }
-
-  public void setAnnotatedTrainingDataFile(File annotatedTrainingDataFile) {
-    this.annotatedTrainingDataFile = annotatedTrainingDataFile;
-  }
 }
\ No newline at end of file
diff --git 
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java
 
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java
index c897532..58f3960 100644
--- 
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java
+++ 
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java
@@ -29,8 +29,15 @@ import opennlp.addons.modelbuilder.KnownEntityProvider;
 
 public class FileKnownEntityProvider implements KnownEntityProvider {
  
-  final Set<String> knownEntities = new HashSet<>();
-  BaseModelBuilderParams params;
+  private final Set<String> knownEntities = new HashSet<>();
+  private BaseModelBuilderParams params;
+
+  public FileKnownEntityProvider(BaseModelBuilderParams params) {
+    if (params == null) {
+      throw new IllegalArgumentException("BaseModelBuilderParams cannot be 
null!");
+    }
+    this.params = params;
+  }
 
   @Override
   public Set<String> getKnownEntities() {
diff --git 
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java
 
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java
index 8c0703b..9943d68 100644
--- 
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java
+++ 
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java
@@ -30,11 +30,20 @@ import opennlp.addons.modelbuilder.ModelGenerationValidator;
 
 /**
  * Validates NER results input before inclusion into the model.
+ *
+ * @see ModelGenerationValidator
  */
 public class FileModelValidatorImpl implements ModelGenerationValidator {
 
-  private final Set<String> badentities = new HashSet<>();
-  BaseModelBuilderParams params;
+  private final Set<String> badEntities = new HashSet<>();
+  private BaseModelBuilderParams params;
+
+  public FileModelValidatorImpl(BaseModelBuilderParams params) {
+    if (params == null) {
+      throw new IllegalArgumentException("BaseModelBuilderParams cannot be 
null!");
+    }
+    this.params = params;
+  }
 
   @Override
   public void setParameters(BaseModelBuilderParams params) {
@@ -50,7 +59,7 @@ public class FileModelValidatorImpl implements 
ModelGenerationValidator {
   @Override
   public Boolean validNamedEntity(String namedEntity) {
 
-    if (badentities.isEmpty()) {
+    if (badEntities.isEmpty()) {
       getBlackList();
     }
 //
@@ -59,7 +68,7 @@ public class FileModelValidatorImpl implements 
ModelGenerationValidator {
 //      return false;
 //    }
     boolean b = true;
-    if (badentities.contains(namedEntity.toLowerCase())) {
+    if (badEntities.contains(namedEntity.toLowerCase())) {
       b = false;
     }
     return b;
@@ -68,19 +77,19 @@ public class FileModelValidatorImpl implements 
ModelGenerationValidator {
   @Override
   public Collection<String> getBlackList() {
     if (params.getKnownEntityBlacklist() == null) {
-      return badentities;
+      return badEntities;
     }
-    if (!badentities.isEmpty()) {
+    if (!badEntities.isEmpty()) {
       try (BufferedReader br = new BufferedReader(new InputStreamReader(
               new FileInputStream(params.getKnownEntityBlacklist()), 
StandardCharsets.UTF_8))) {
         String line;
         while ((line = br.readLine()) != null) {
-          badentities.add(line);
+          badEntities.add(line);
         }
       } catch (IOException ex) {
         
Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, 
null, ex);
       }
     }
-    return badentities;
+    return badEntities;
   }
 }
diff --git 
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java
 
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java
index ec0393f..492b5d9 100644
--- 
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java
+++ 
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java
@@ -28,12 +28,21 @@ import java.util.logging.Logger;
 import opennlp.addons.modelbuilder.SentenceProvider;
 
 /**
- * Provides user sentences via a simple text file
+ * Provides user sentences via a simple text file.
+ *
+ * @see SentenceProvider
  */
 public class FileSentenceProvider implements SentenceProvider {
 
   private final Set<String> sentences = new HashSet<>();
-  BaseModelBuilderParams params ;
+  private BaseModelBuilderParams params ;
+
+  public FileSentenceProvider(BaseModelBuilderParams params) {
+    if (params == null) {
+      throw new IllegalArgumentException("BaseModelBuilderParams cannot be 
null!");
+    }
+    this.params = params;
+  }
 
   @Override
   public Set<String> getSentences() {
diff --git 
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java
 
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java
index aaeaa6f..bd5bd59 100644
--- 
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java
+++ 
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java
@@ -15,7 +15,6 @@
  */
 package opennlp.addons.modelbuilder.impls;
 
-import java.util.HashMap;
 import java.util.Map;
 
 import opennlp.addons.modelbuilder.KnownEntityProvider;
@@ -27,12 +26,14 @@ import opennlp.tools.namefind.NameFinderME;
 import opennlp.tools.util.Span;
 
 /**
+ * Generic {@link SemiSupervisedModelGenerator} implementation that handles all
+ * processing using the default file implementations.
  *
- * Generic impl that handles all processing using the default file 
implementations
+ * @see SemiSupervisedModelGenerator
  */
 public class GenericModelGenerator implements SemiSupervisedModelGenerator {
 
-  private Map<String, String> params = new HashMap<>();
+  private Map<String, String> params;
 
   @Override
   public void setParameters(BaseModelBuilderParams params) {
diff --git 
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
 
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
index 68b371b..660496d 100644
--- 
a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
+++ 
b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
@@ -44,12 +44,21 @@ import opennlp.tools.util.TrainingParameters;
 
 /**
  * Creates annotations, writes annotations to file, and creates a model and 
writes to a file.
+ *
+ * @see Modelable
  */
 public class GenericModelableImpl implements Modelable {
 
   private Set<String> annotatedSentences = new HashSet<>();
-  BaseModelBuilderParams params;
+  private BaseModelBuilderParams params;
 
+  public GenericModelableImpl(BaseModelBuilderParams params) {
+    if (params == null) {
+      throw new IllegalArgumentException("BaseModelBuilderParams cannot be 
null!");
+    }
+    this.params = params;
+  }
+  
   @Override
   public void setParameters(BaseModelBuilderParams params) {
     this.params = params;

Reply via email to