This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch migrate-mallet-addon-to-opennlp-tools-2_1_0
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git

commit f369e6c8b582d89e03baf564b95d4537cc7bf76e
Author: Martin Wiesner <[email protected]>
AuthorDate: Fri Jan 20 11:24:53 2023 +0100

    updates sandbox component 'mallet-addon' to be compatible with latest 
opennlp-tools release
    
    - adjusts opennlp-tools to 2.1.0
    - adjusts parent project (org.apache.apache) to version 18
    - adjusts Java language level to 11
    - updates to mallet version 2.0.8 to mitigate several CVEs, adds exclusions 
and related newer versions to mitigate CVEs
    - adjusts some array declarations to comply with Java, not C, style
    - improves resource handling of streams
    - removes unused imports
---
 mallet-addon/pom.xml                               | 44 +++++++++++++++++++---
 .../java/opennlp/addons/mallet/CRFTrainer.java     | 15 +++-----
 .../opennlp/addons/mallet/ClassifierModel.java     | 27 +++++++------
 .../addons/mallet/ClassifierModelSerializer.java   | 15 +++-----
 .../java/opennlp/addons/mallet/MaxentTrainer.java  | 18 ++-------
 .../opennlp/addons/mallet/TransducerModel.java     | 20 +++++-----
 .../addons/mallet/TransducerModelSerializer.java   |  9 ++---
 7 files changed, 82 insertions(+), 66 deletions(-)

diff --git a/mallet-addon/pom.xml b/mallet-addon/pom.xml
index c5f2ca9..d1e134f 100644
--- a/mallet-addon/pom.xml
+++ b/mallet-addon/pom.xml
@@ -21,10 +21,17 @@
 
 <project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
        <modelVersion>4.0.0</modelVersion>
-       
+       <parent>
+               <groupId>org.apache</groupId>
+               <artifactId>apache</artifactId>
+               <!-- TODO OPENNLP-1452 once this is resolved, move to 29 as 
well. -->
+               <version>18</version>
+               <relativePath />
+       </parent>
+
        <groupId>kottmann.opennlp</groupId>
        <artifactId>mallet-addon</artifactId>
-       <version>1.6.0-SNAPSHOT</version>
+       <version>2.1.1-SNAPSHOT</version>
 
        <packaging>jar</packaging>
        <name>Apache OpenNLP Mallet Addon</name>
@@ -33,13 +40,37 @@
                <dependency>
                        <groupId>org.apache.opennlp</groupId>
                        <artifactId>opennlp-tools</artifactId>
-                       <version>1.6.1-SNAPSHOT</version>
+                       <version>2.1.0</version>
                </dependency>
                
                <dependency>
                        <groupId>cc.mallet</groupId>
                        <artifactId>mallet</artifactId>
-                       <version>2.0.7</version>
+                       <version>2.0.8</version>
+                       <exclusions>
+                               <exclusion>
+                                       <groupId>junit</groupId>
+                                       <artifactId>junit</artifactId>
+                               </exclusion>
+                               <exclusion>
+                                       <groupId>org.jdom</groupId>
+                                       <artifactId>jdom</artifactId>
+                               </exclusion>
+                               <exclusion>
+                                       <groupId>org.beanshell</groupId>
+                                       <artifactId>bsh</artifactId>
+                               </exclusion>
+                       </exclusions>
+               </dependency>
+               <dependency>
+                       <groupId>org.jdom</groupId>
+                       <artifactId>jdom</artifactId>
+                       <version>1.1.3</version>
+               </dependency>
+               <dependency>
+                       <groupId>org.apache-extras.beanshell</groupId>
+                       <artifactId>bsh</artifactId>
+                       <version>2.0b6</version>
                </dependency>
        </dependencies>
 
@@ -67,8 +98,9 @@
                                <groupId>org.apache.maven.plugins</groupId>
                                <artifactId>maven-compiler-plugin</artifactId>
                                <configuration>
-                                       <source>1.7</source>
-                                       <target>1.7</target>
+                                       <source>11</source>
+                                       <target>11</target>
+                                       
<compilerArgument>-Xlint</compilerArgument>
                                </configuration>
                        </plugin>
                        <plugin>
diff --git a/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java 
b/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java
index 7e6de66..0700e2b 100644
--- a/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java
+++ b/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java
@@ -20,7 +20,6 @@
 package opennlp.addons.mallet;
 
 import java.io.IOException;
-import java.util.Map;
 import java.util.regex.Pattern;
 
 import opennlp.tools.ml.AbstractSequenceTrainer;
@@ -30,7 +29,6 @@ import opennlp.tools.ml.model.SequenceClassificationModel;
 import opennlp.tools.ml.model.SequenceStream;
 import cc.mallet.fst.CRF;
 import cc.mallet.fst.CRFOptimizableByLabelLikelihood;
-import cc.mallet.fst.CRFTrainerByLabelLikelihood;
 import cc.mallet.fst.CRFTrainerByValueGradients;
 import cc.mallet.fst.Transducer;
 import cc.mallet.optimize.Optimizable;
@@ -71,17 +69,17 @@ public class CRFTrainer extends AbstractSequenceTrainer {
     int nameIndex = 0;
     Sequence sequence;
     while ((sequence = sequences.read()) != null) {
-      FeatureVector featureVectors[] = new 
FeatureVector[sequence.getEvents().length];
-      Label malletOutcomes[] = new Label[sequence.getEvents().length];
+      FeatureVector[] featureVectors = new 
FeatureVector[sequence.getEvents().length];
+      Label[] malletOutcomes = new Label[sequence.getEvents().length];
 
-      Event events[] = sequence.getEvents();
+      Event[] events = sequence.getEvents();
 
       for (int eventIndex = 0; eventIndex < events.length; eventIndex++) {
 
         Event event = events[eventIndex];
 
-        String features[] = event.getContext();
-        int malletFeatures[] = new int[features.length];
+        String[] features = event.getContext();
+        int[] malletFeatures = new int[features.length];
 
         for (int featureIndex = 0; featureIndex < features.length; 
featureIndex++) {
           malletFeatures[featureIndex] = dataAlphabet.lookupIndex(
@@ -109,8 +107,7 @@ public class CRFTrainer extends AbstractSequenceTrainer {
     CRF crf = new CRF(trainingData.getDataAlphabet(),
         trainingData.getTargetAlphabet());
 
-    String startStateName = crf.addOrderNStates(trainingData, getOrders(),
-        (boolean[]) null,
+    String startStateName = crf.addOrderNStates(trainingData, getOrders(), 
null,
         // default label
         "other", Pattern.compile("other,*-cont"), // forbidden pattern
         null, // allowed pattern
diff --git 
a/mallet-addon/src/main/java/opennlp/addons/mallet/ClassifierModel.java 
b/mallet-addon/src/main/java/opennlp/addons/mallet/ClassifierModel.java
index 5f6661d..1426be9 100644
--- a/mallet-addon/src/main/java/opennlp/addons/mallet/ClassifierModel.java
+++ b/mallet-addon/src/main/java/opennlp/addons/mallet/ClassifierModel.java
@@ -35,18 +35,19 @@ import cc.mallet.types.LabelVector;
 
 class ClassifierModel implements MaxentModel, SerializableArtifact {
 
-  private Classifier classifer;
+  private final Classifier classifier;
 
   public ClassifierModel(Classifier classifer) {
-    this.classifer = classifer;
+    this.classifier = classifer;
   }
 
-  Classifier getClassifer() {
-    return classifer;
+  Classifier getClassifier() {
+    return classifier;
   }
-  
+
+  @Override
   public double[] eval(String[] features) {
-    Alphabet dataAlphabet = classifer.getAlphabet();
+    Alphabet dataAlphabet = classifier.getAlphabet();
 
     List<Integer> malletFeatureList = new ArrayList<>(features.length);
 
@@ -62,15 +63,15 @@ class ClassifierModel implements MaxentModel, 
SerializableArtifact {
       malletFeatures[i] = malletFeatureList.get(i);
     }
 
-    FeatureVector fv = new FeatureVector(classifer.getAlphabet(),
+    FeatureVector fv = new FeatureVector(classifier.getAlphabet(),
         malletFeatures);
     Instance instance = new Instance(fv, null, null, null);
 
-    Classification result = classifer.classify(instance);
+    Classification result = classifier.classify(instance);
 
     LabelVector labeling = result.getLabelVector();
 
-    LabelAlphabet targetAlphabet = classifer.getLabelAlphabet();
+    LabelAlphabet targetAlphabet = classifier.getLabelAlphabet();
 
     double outcomes[] = new double[targetAlphabet.size()];
     for (int i = 0; i < outcomes.length; i++) {
@@ -84,10 +85,12 @@ class ClassifierModel implements MaxentModel, 
SerializableArtifact {
     return outcomes;
   }
 
+  @Override
   public double[] eval(String[] context, double[] probs) {
     return eval(context);
   }
 
+  @Override
   public double[] eval(String[] context, float[] values) {
     return eval(context);
   }
@@ -109,17 +112,17 @@ class ClassifierModel implements MaxentModel, 
SerializableArtifact {
 
   @Override
   public String getOutcome(int i) {
-    return classifer.getLabelAlphabet().lookupLabel(i).getEntry().toString();
+    return classifier.getLabelAlphabet().lookupLabel(i).getEntry().toString();
   }
 
   @Override
   public int getIndex(String outcome) {
-    return classifer.getLabelAlphabet().lookupIndex(outcome);
+    return classifier.getLabelAlphabet().lookupIndex(outcome);
   }
 
   @Override
   public int getNumOutcomes() {
-    return classifer.getLabelAlphabet().size();
+    return classifier.getLabelAlphabet().size();
   }
 
   @Override
diff --git 
a/mallet-addon/src/main/java/opennlp/addons/mallet/ClassifierModelSerializer.java
 
b/mallet-addon/src/main/java/opennlp/addons/mallet/ClassifierModelSerializer.java
index 9cfb6f2..f3b4806 100644
--- 
a/mallet-addon/src/main/java/opennlp/addons/mallet/ClassifierModelSerializer.java
+++ 
b/mallet-addon/src/main/java/opennlp/addons/mallet/ClassifierModelSerializer.java
@@ -26,20 +26,16 @@ import java.io.ObjectOutputStream;
 import java.io.OutputStream;
 
 import cc.mallet.classify.Classifier;
-import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.model.ArtifactSerializer;
 
 // The standard method for saving classifiers in Mallet is through Java 
serialization.
 
-public class ClassifierModelSerializer implements
-    ArtifactSerializer<ClassifierModel> {
+public class ClassifierModelSerializer implements 
ArtifactSerializer<ClassifierModel> {
 
   @Override
-  public ClassifierModel create(InputStream in) throws IOException,
-      InvalidFormatException {
+  public ClassifierModel create(InputStream in) throws IOException {
 
-    ObjectInputStream ois = new ObjectInputStream(in);
-    try {
+    try ( ObjectInputStream ois = new ObjectInputStream(in)) {
       Classifier classifier = (Classifier) ois.readObject();
       return new ClassifierModel(classifier);
     } catch (ClassNotFoundException e) {
@@ -48,10 +44,9 @@ public class ClassifierModelSerializer implements
   }
 
   @Override
-  public void serialize(ClassifierModel artifact, OutputStream out)
-      throws IOException {
+  public void serialize(ClassifierModel artifact, OutputStream out) throws 
IOException {
     ObjectOutputStream oos = new ObjectOutputStream(out);
-    oos.writeObject(artifact.getClassifer());
+    oos.writeObject(artifact.getClassifier());
     oos.flush();
   }
 }
diff --git 
a/mallet-addon/src/main/java/opennlp/addons/mallet/MaxentTrainer.java 
b/mallet-addon/src/main/java/opennlp/addons/mallet/MaxentTrainer.java
index e9524a9..cfcb294 100644
--- a/mallet-addon/src/main/java/opennlp/addons/mallet/MaxentTrainer.java
+++ b/mallet-addon/src/main/java/opennlp/addons/mallet/MaxentTrainer.java
@@ -22,22 +22,12 @@ package opennlp.addons.mallet;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Map;
 
 import opennlp.tools.ml.AbstractEventTrainer;
 import opennlp.tools.ml.model.DataIndexer;
 import opennlp.tools.ml.model.MaxentModel;
-import cc.mallet.classify.C45Trainer;
 import cc.mallet.classify.Classifier;
-import cc.mallet.classify.MaxEntGETrainer;
-import cc.mallet.classify.MaxEntL1Trainer;
-import cc.mallet.classify.MaxEntPRTrainer;
 import cc.mallet.classify.MaxEntTrainer;
-import cc.mallet.classify.NaiveBayes;
-import cc.mallet.classify.NaiveBayesEMTrainer;
-import cc.mallet.classify.NaiveBayesTrainer;
-import cc.mallet.optimize.LimitedMemoryBFGS;
-import cc.mallet.optimize.Optimizer;
 import cc.mallet.types.Alphabet;
 import cc.mallet.types.FeatureVector;
 import cc.mallet.types.Instance;
@@ -61,13 +51,13 @@ public class MaxentTrainer extends AbstractEventTrainer {
 
     Collection<Instance> instances = new ArrayList<>();
 
-    String predLabels[] = indexer.getPredLabels();
+    String[] predLabels = indexer.getPredLabels();
     
-    int outcomes[] = indexer.getOutcomeList();
+    int[] outcomes = indexer.getOutcomeList();
     for (int contextIndex = 0; contextIndex < indexer.getContexts().length; 
contextIndex++) {
 
-      int malletFeatures[] = new 
int[indexer.getContexts()[contextIndex].length];
-      double weights[] = new 
double[indexer.getContexts()[contextIndex].length];
+      int[] malletFeatures = new 
int[indexer.getContexts()[contextIndex].length];
+      double[] weights = new 
double[indexer.getContexts()[contextIndex].length];
 
       for (int featureIndex = 0; featureIndex < malletFeatures.length; 
featureIndex++) {
         malletFeatures[featureIndex] = dataAlphabet.lookupIndex(
diff --git 
a/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModel.java 
b/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModel.java
index e713d83..91afec3 100644
--- a/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModel.java
+++ b/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModel.java
@@ -36,7 +36,7 @@ import cc.mallet.types.Sequence;
 
 public class TransducerModel<T> implements SequenceClassificationModel<T>, 
SerializableArtifact {
 
-  private Transducer model;
+  private final Transducer model;
 
   public TransducerModel(Transducer model) {
     this.model = model;
@@ -45,7 +45,8 @@ public class TransducerModel<T> implements 
SequenceClassificationModel<T>, Seria
   Transducer getModel() {
     return model;
   }
-  
+
+  @Override
   public opennlp.tools.util.Sequence bestSequence(T[] sequence,
       Object[] additionalContext, BeamSearchContextGenerator<T> cg,
       SequenceValidator<T> validator) {
@@ -59,7 +60,8 @@ public class TransducerModel<T> implements 
SequenceClassificationModel<T>, Seria
     // TODO: How to implement min score filtering here? 
     return bestSequences(numSequences, sequence, additionalContext, cg, 
validator);
   }
-  
+
+  @Override
   public opennlp.tools.util.Sequence[] bestSequences(int numSequences,
       T[] sequence, Object[] additionalContext,
       BeamSearchContextGenerator<T> cg, SequenceValidator<T> validator) {
@@ -67,16 +69,16 @@ public class TransducerModel<T> implements 
SequenceClassificationModel<T>, Seria
     // TODO: CRF.getInputAlphabet
     Alphabet dataAlphabet = model.getInputPipe().getAlphabet();
     
-    FeatureVector featureVectors[] = new FeatureVector[sequence.length];
+    FeatureVector[] featureVectors = new FeatureVector[sequence.length];
     
     // TODO:: The feature generator needs to get the detected sequence in the 
end
     // to update the adaptive data!
-    String prior[] = new String[sequence.length];
+    String[] prior = new String[sequence.length];
     Arrays.fill(prior, "s"); // <- HACK, this will degrade performance!
     
     // TODO: Put together a feature generator which doesn't fail if outcomes 
is null!
     for (int i = 0; i < sequence.length; i++) {
-      String features[] = cg.getContext(i, sequence, null, additionalContext);
+      String[] features = cg.getContext(i, sequence, null, additionalContext);
       
       List<Integer> malletFeatureList = new ArrayList<>(features.length);
       
@@ -86,7 +88,7 @@ public class TransducerModel<T> implements 
SequenceClassificationModel<T>, Seria
         }
       }
 
-      int malletFeatures[] = new int[malletFeatureList.size()];
+      int[] malletFeatures = new int[malletFeatureList.size()];
       for (int k = 0; k < malletFeatureList.size(); k++) {
         malletFeatures[k] = malletFeatureList.get(k);
       }
@@ -97,7 +99,7 @@ public class TransducerModel<T> implements 
SequenceClassificationModel<T>, Seria
     
     FeatureVectorSequence malletSequence = new 
FeatureVectorSequence(featureVectors);
     
-    Sequence[] answers = null;
+    Sequence[] answers;
     if (numSequences == 1) {
       answers = new Sequence[1];
       answers[0] = model.transduce(malletSequence);
@@ -136,7 +138,7 @@ public class TransducerModel<T> implements 
SequenceClassificationModel<T>, Seria
     
     Alphabet targetAlphabet = model.getInputPipe().getTargetAlphabet();
     
-    String outcomes[] = new String[targetAlphabet.size()];
+    String[] outcomes = new String[targetAlphabet.size()];
     
     for (int i = 0; i < targetAlphabet.size(); i++) {
       outcomes[i] = targetAlphabet.lookupObject(i).toString();
diff --git 
a/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModelSerializer.java
 
b/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModelSerializer.java
index b793ca2..6e05eab 100644
--- 
a/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModelSerializer.java
+++ 
b/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModelSerializer.java
@@ -32,10 +32,8 @@ import cc.mallet.fst.Transducer;
 public class TransducerModelSerializer implements 
ArtifactSerializer<TransducerModel> {
 
   @Override
-  public TransducerModel create(InputStream in) throws IOException,
-      InvalidFormatException {
-    ObjectInputStream ois = new ObjectInputStream(in);
-    try {
+  public TransducerModel create(InputStream in) throws IOException, 
InvalidFormatException {
+    try (ObjectInputStream ois = new ObjectInputStream(in)) {
       Transducer classifier = (Transducer) ois.readObject();
       return new TransducerModel(classifier);
     } catch (ClassNotFoundException e) {
@@ -44,8 +42,7 @@ public class TransducerModelSerializer implements 
ArtifactSerializer<TransducerM
   }
 
   @Override
-  public void serialize(TransducerModel artifact, OutputStream out)
-      throws IOException {
+  public void serialize(TransducerModel artifact, OutputStream out) throws 
IOException {
     ObjectOutputStream oos = new ObjectOutputStream(out);
     oos.writeObject(artifact.getModel());
     oos.flush();

Reply via email to