This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git


The following commit(s) were added to refs/heads/main by this push:
     new 676d093  clears several compiler warnings in opennlp-dl (sandbox 
component)
676d093 is described below

commit 676d093f2926f3d7f18ff35512d99b6b5246ffab
Author: Martin Wiesner <[email protected]>
AuthorDate: Fri Apr 25 21:10:43 2025 +0200

    clears several compiler warnings in opennlp-dl (sandbox component)
---
 .../src/main/java/opennlp/tools/dl/DataReader.java | 30 ++++++++++++----------
 .../tools/dl/NameSampleDataSetIterator.java        |  8 +++---
 .../java/opennlp/tools/dl/NeuralDocCatTrainer.java | 23 +++++++++--------
 .../src/main/java/opennlp/tools/dl/StackedRNN.java |  2 +-
 .../java/opennlp/tools/dl/NeuralDocCatTest.java    | 11 ++++----
 .../src/test/java/opennlp/tools/dl/RNNTest.java    | 10 +++-----
 .../test/java/opennlp/tools/dl/StackedRNNTest.java |  8 ++----
 7 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/opennlp-dl/src/main/java/opennlp/tools/dl/DataReader.java 
b/opennlp-dl/src/main/java/opennlp/tools/dl/DataReader.java
index 26934bc..e1c3937 100644
--- a/opennlp-dl/src/main/java/opennlp/tools/dl/DataReader.java
+++ b/opennlp-dl/src/main/java/opennlp/tools/dl/DataReader.java
@@ -31,6 +31,7 @@ import org.slf4j.LoggerFactory;
 
 import java.io.File;
 import java.io.IOException;
+import java.io.Serial;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -43,9 +44,11 @@ import java.util.function.Function;
 
 /**
  * This class provides a reader capable of reading training and test datasets 
from file system for text classifiers.
- * In addition to reading the content, it
- * (1) vectorizes the text using embeddings such as Glove, and
- * (2) divides the datasets into mini batches of specified size.
+ * In addition to reading the content, it:
+ * <ol>
+ * <li>vectorizes the text using embeddings such as Glove, and</li>
+ * <li>divides the datasets into mini batches of specified size.</li>
+ * </ol>
  * <p>
  * The data is expected to be organized as per the following convention:
  * <pre>
@@ -90,26 +93,27 @@ import java.util.function.Function;
  */
 public class DataReader implements DataSetIterator {
 
+    @Serial
+    private static final long serialVersionUID = 8930860497053512907L;
     private static final Logger LOG = 
LoggerFactory.getLogger(DataReader.class);
-    private static final long serialVersionUID = 6405541399655356439L;
+    private final static String EXTENSION = ".txt";
 
     private final File dataDir;
-    private List<File> records;
-    private List<Integer> labels;
-    private Map<String, Integer> labelToId;
-    private final String extension = ".txt";
-    private final GlobalVectors embedder;
+    private transient List<File> records;
+    private transient List<Integer> labels;
+    private transient Map<String, Integer> labelToId;
+    private transient final GlobalVectors embedder;
     private int cursor = 0;
     private final int batchSize;
     private final int vectorLen;
     private final int maxSeqLen;
     private final int numLabels;
     // default tokenizer
-    private Function<String, String[]> tokenizer = s -> 
s.toLowerCase().split(" ");
-
+    private transient Function<String, String[]> tokenizer = s -> 
s.toLowerCase().split(" ");
 
     /**
-     * Creates a reader with the specified arguments
+     * Creates a reader with the specified arguments.
+     *
      * @param dataDirPath data directory
      * @param labelNames list of labels (names should match subdirectory names)
      * @param embedder embeddings to convert words to vectors
@@ -147,7 +151,7 @@ public class DataReader implements DataSetIterator {
                         + labelName + ". Looked at:" + labelDir);
             }
             File[] examples = labelDir.listFiles(f ->
-                    f.isFile() && f.getName().endsWith(this.extension));
+                    f.isFile() && f.getName().endsWith(EXTENSION));
             if (examples == null || examples.length == 0){
                 throw new IllegalStateException("No examples found for "
                         + labelName + ". Looked at:" + labelDir
diff --git 
a/opennlp-dl/src/main/java/opennlp/tools/dl/NameSampleDataSetIterator.java 
b/opennlp-dl/src/main/java/opennlp/tools/dl/NameSampleDataSetIterator.java
index 3801a47..45114e8 100644
--- a/opennlp-dl/src/main/java/opennlp/tools/dl/NameSampleDataSetIterator.java
+++ b/opennlp-dl/src/main/java/opennlp/tools/dl/NameSampleDataSetIterator.java
@@ -40,7 +40,7 @@ import opennlp.tools.util.ObjectStream;
 
 public class NameSampleDataSetIterator implements DataSetIterator {
 
-  private static final long serialVersionUID = -7252120980388575448L;
+  private static final long serialVersionUID = 3122563760808581425L;
 
   private static class NameSampleToDataSetStream extends 
FilterObjectStream<NameSample, DataSet> {
 
@@ -103,7 +103,7 @@ public class NameSampleDataSetIterator implements 
DataSetIterator {
 
   private int cursor = 0;
 
-  private final ObjectStream<DataSet> samples;
+  private transient final ObjectStream<DataSet> samples;
 
   NameSampleDataSetIterator(ObjectStream<NameSample> samples, WordVectors 
wordVectors, int windowSize,
                             String[] labels) throws IOException {
@@ -114,8 +114,7 @@ public class NameSampleDataSetIterator implements 
DataSetIterator {
 
     int total = 0;
 
-    DataSet sample;
-    while ((sample = this.samples.read()) != null) {
+    while (this.samples.read() != null) {
       total++;
     }
 
@@ -124,6 +123,7 @@ public class NameSampleDataSetIterator implements 
DataSetIterator {
     samples.reset();
   }
 
+  @Override
   public DataSet next(int num) {
     if (cursor >= totalExamples()) throw new NoSuchElementException();
 
diff --git a/opennlp-dl/src/main/java/opennlp/tools/dl/NeuralDocCatTrainer.java 
b/opennlp-dl/src/main/java/opennlp/tools/dl/NeuralDocCatTrainer.java
index f5d0c07..648dbfd 100644
--- a/opennlp-dl/src/main/java/opennlp/tools/dl/NeuralDocCatTrainer.java
+++ b/opennlp-dl/src/main/java/opennlp/tools/dl/NeuralDocCatTrainer.java
@@ -17,7 +17,11 @@
 
 package opennlp.tools.dl;
 
-import java.io.*;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.util.List;
 
 import org.deeplearning4j.nn.conf.BackpropType;
@@ -52,6 +56,8 @@ import org.slf4j.LoggerFactory;
  */
 public class NeuralDocCatTrainer {
 
+    private static final Logger LOG = 
LoggerFactory.getLogger(NeuralDocCatTrainer.class);
+
     // Note: Arguments can't be declared 'final'
     // See: 
https://github.com/apache/opennlp-sandbox/pull/109#discussion_r1262904650
     public static class Args {
@@ -116,14 +122,11 @@ public class NeuralDocCatTrainer {
         }
     }
 
-    private static final Logger LOG = 
LoggerFactory.getLogger(NeuralDocCatTrainer.class);
-
     private final NeuralDocCatModel model;
     private final Args args;
     private final DataReader trainSet;
     private DataReader validSet;
 
-
     public NeuralDocCatTrainer(Args args) throws IOException {
         this.args = args;
         GlobalVectors gloves;
@@ -134,21 +137,21 @@ public class NeuralDocCatTrainer {
         }
 
         LOG.info("Training data from {}", args.trainDir);
-        this.trainSet = new DataReader(args.trainDir, args.labels, gloves, 
args.batchSize, args.maxSeqLen);
+        trainSet = new DataReader(args.trainDir, args.labels, gloves, 
args.batchSize, args.maxSeqLen);
         if (args.validDir != null) {
             LOG.info("Validation data from {}", args.validDir);
-            this.validSet = new DataReader(args.validDir, args.labels, gloves, 
args.batchSize, args.maxSeqLen);
+            validSet = new DataReader(args.validDir, args.labels, gloves, 
args.batchSize, args.maxSeqLen);
         }
 
         //create network
-        network = this.createNetwork(gloves.getVectorSize());
-        this.model = new NeuralDocCatModel(network, gloves, args.labels, 
args.maxSeqLen);
+        network = createNetwork(gloves.getVectorSize());
+        model = new NeuralDocCatModel(network, gloves, args.labels, 
args.maxSeqLen);
     }
 
-    public MultiLayerNetwork createNetwork(int vectorSize) {
+    private MultiLayerNetwork createNetwork(int vectorSize) {
         int totalOutcomes = this.trainSet.totalOutcomes();
         assert totalOutcomes >= 2;
-        LOG.info("Number of classes " + totalOutcomes);
+        LOG.info("Number of classes {}", totalOutcomes);
 
         //TODO: the below network params should be configurable from CLI or 
settings file
         //Set up network configuration
diff --git a/opennlp-dl/src/main/java/opennlp/tools/dl/StackedRNN.java 
b/opennlp-dl/src/main/java/opennlp/tools/dl/StackedRNN.java
index 391170b..f083993 100644
--- a/opennlp-dl/src/main/java/opennlp/tools/dl/StackedRNN.java
+++ b/opennlp-dl/src/main/java/opennlp/tools/dl/StackedRNN.java
@@ -143,7 +143,7 @@ public class StackedRNN extends RNN {
       double newLoss = smoothLoss * 0.999 + loss * 0.001;
 
       if (newLoss > smoothLoss) {
-        learningRate *= 0.999 ;
+        learningRate *= 0.999f ;
       }
       smoothLoss = newLoss;
       if (Double.isNaN(smoothLoss) || Double.isInfinite(smoothLoss)) {
diff --git a/opennlp-dl/src/test/java/opennlp/tools/dl/NeuralDocCatTest.java 
b/opennlp-dl/src/test/java/opennlp/tools/dl/NeuralDocCatTest.java
index a592ed1..6362331 100644
--- a/opennlp-dl/src/test/java/opennlp/tools/dl/NeuralDocCatTest.java
+++ b/opennlp-dl/src/test/java/opennlp/tools/dl/NeuralDocCatTest.java
@@ -30,10 +30,15 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
 /**
  * Tests for {@link NeuralDocCat}
  */
-@Disabled
 class NeuralDocCatTest {
 
+  /*
+   * TODO : this fails with:
+   * java.lang.AssertionError
+   * at opennlp.tools.dl.GlobalVectors.<init>(GlobalVectors.java:92)
+   */
   @Test
+  @Disabled
   void testDocCatTrainingOnTweets() throws Exception {
     NeuralDocCatTrainer.Args args = new NeuralDocCatTrainer.Args();
     args.glovesPath = "/path/to/glove.6B/glove.6B.50d.txt";
@@ -46,10 +51,6 @@ class NeuralDocCatTest {
     String modelPath = modelPathPrefix + ".zip";
     trainer.saveModel(modelPath);
 
-    /* TODO : this fails with:
-     * java.lang.AssertionError
-     * at opennlp.tools.dl.GlobalVectors.<init>(GlobalVectors.java:92)
-     */
     NeuralDocCatModel neuralDocCatModel = 
NeuralDocCatModel.loadModel(modelPath);
     assertNotNull(neuralDocCatModel);
 
diff --git a/opennlp-dl/src/test/java/opennlp/tools/dl/RNNTest.java 
b/opennlp-dl/src/test/java/opennlp/tools/dl/RNNTest.java
index e39e877..b3e9fe4 100644
--- a/opennlp-dl/src/test/java/opennlp/tools/dl/RNNTest.java
+++ b/opennlp-dl/src/test/java/opennlp/tools/dl/RNNTest.java
@@ -27,7 +27,7 @@ import java.util.stream.Stream;
 
 import org.apache.commons.io.IOUtils;
 import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.condition.DisabledIfSystemProperty;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.Arguments;
 import org.junit.jupiter.params.provider.MethodSource;
@@ -54,14 +54,10 @@ public class RNNTest {
         Arguments.of(1e-3f, 25, 50, 5)
     );
   }
-
-  @Disabled
-  // TODO check why this fails with:
-  //   java.lang.IllegalStateException: Can't transpose array with rank < 2: 
array shape [62]
-  //   ...
-  //   on MacOS (only?)
+  
   @ParameterizedTest
   @MethodSource("provideRNNParams")
+  @DisabledIfSystemProperty(named = "os.arch", matches = "aarch64")
   public void testVanillaCharRNNLearn(float learningRate, int seqLength, int 
hiddenLayerSize, int epochs) throws Exception {
     RNN rnn = new RNN(learningRate, seqLength, hiddenLayerSize, epochs, text, 
10, true);
     evaluate(rnn, true);
diff --git a/opennlp-dl/src/test/java/opennlp/tools/dl/StackedRNNTest.java 
b/opennlp-dl/src/test/java/opennlp/tools/dl/StackedRNNTest.java
index f8ae25f..842a22b 100644
--- a/opennlp-dl/src/test/java/opennlp/tools/dl/StackedRNNTest.java
+++ b/opennlp-dl/src/test/java/opennlp/tools/dl/StackedRNNTest.java
@@ -27,7 +27,7 @@ import java.util.stream.Stream;
 
 import org.apache.commons.io.IOUtils;
 import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.condition.DisabledIfSystemProperty;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.Arguments;
 import org.junit.jupiter.params.provider.MethodSource;
@@ -55,13 +55,9 @@ public class StackedRNNTest {
     );
   }
 
-  @Disabled
-  // TODO check why this fails with:
-  //   java.lang.IllegalStateException: Can't transpose array with rank < 2: 
array shape [62]
-  //   ...
-  //   on MacOS (only?)
   @ParameterizedTest
   @MethodSource("provideRNNParams")
+  @DisabledIfSystemProperty(named = "os.arch", matches = "aarch64")
   public void testVanillaCharRNNLearn(float learningRate, int seqLength, int 
hiddenLayerSize, int epochs) throws Exception {
     RNN rnn = new StackedRNN(learningRate, seqLength, hiddenLayerSize, epochs, 
text, 10, true, true);
     evaluate(rnn, true);

Reply via email to