This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch 
OPENNLP-1586-Prevent-resource-leaks-in-BrownCluster-and-WordClusterDictionary
in repository https://gitbox.apache.org/repos/asf/opennlp.git

commit 994f3dce3487455722c523f202c8b254bf2f94ae
Author: Martin Wiesner <[email protected]>
AuthorDate: Mon Jul 1 11:48:46 2024 +0200

    OPENNLP-1586 Prevent resource leaks in BrownCluster and 
WordClusterDictionary
    - fixes the resource leaks in the related classes
    - improves JavaDoc along the path
---
 .../tools/util/featuregen/BrownCluster.java        | 44 ++++++++++++----------
 .../BrownTokenClassFeatureGenerator.java           |  6 +++
 .../tools/util/featuregen/BrownTokenClasses.java   |  4 +-
 .../util/featuregen/WordClusterDictionary.java     | 27 ++++++-------
 .../featuregen/WordClusterFeatureGenerator.java    | 16 ++++++++
 .../WordClusterFeatureGeneratorFactory.java        |  2 +-
 6 files changed, 64 insertions(+), 35 deletions(-)

diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownCluster.java 
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownCluster.java
index b4ddd2b7..88759662 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownCluster.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownCluster.java
@@ -39,6 +39,7 @@ import opennlp.tools.util.model.SerializableArtifact;
  * <p>
  * Originally available at: <a 
href="http://metaoptimize.com/projects/wordreprs/";>
  * http://metaoptimize.com/projects/wordreprs/</a>.
+ * <p>
  * Further details can be found in the
  * <a href="https://dl.acm.org/doi/10.5555/1858681.1858721";>related research 
paper</a>.
  * <p>
@@ -66,29 +67,32 @@ public class BrownCluster implements SerializableArtifact {
   private final Map<String, String> tokenToClusterMap = new HashMap<>();
 
   /**
-   * Generates the token to cluster map from Brown cluster an {@link 
InputStream}.
+   * Instatiates a {@link BrownCluster} and its related token to cluster map
+   * via an {@link InputStream}.
    * <p>
    * <b>Note:</b>
-   * we only add those tokens with frequency bigger than {@code 5}.
+   * Only tokens with frequency bigger than {@code 5} will be added.
    * 
    * @param in A valid, open {@link InputStream} to read from.
-   * @throws IOException the io exception
+   * @throws IOException Thrown if errors occurred reading from {@link 
InputStream in}.
    */
   public BrownCluster(InputStream in) throws IOException {
 
-    BufferedReader breader =
-        new BufferedReader(new InputStreamReader(in, StandardCharsets.UTF_8));
-    String line;
-    while ((line = breader.readLine()) != null) {
-      String[] lineArray = tabPattern.split(line);
-      if (lineArray.length == 3) {
-        int freq = Integer.parseInt(lineArray[2]);
-        if (freq > 5 ) {
-          tokenToClusterMap.put(lineArray[1], lineArray[0]);
+    try (BufferedReader breader = new BufferedReader(
+            new InputStreamReader(in, StandardCharsets.UTF_8))) {
+
+      String line;
+      while ((line = breader.readLine()) != null) {
+        String[] lineArray = tabPattern.split(line);
+        if (lineArray.length == 3) {
+          int freq = Integer.parseInt(lineArray[2]);
+          if (freq > 5 ) {
+            tokenToClusterMap.put(lineArray[1], lineArray[0]);
+          }
+        }
+        else if (lineArray.length == 2) {
+          tokenToClusterMap.put(lineArray[0], lineArray[1]);
         }
-      }
-      else if (lineArray.length == 2) {
-        tokenToClusterMap.put(lineArray[0], lineArray[1]);
       }
     }
   }
@@ -104,12 +108,12 @@ public class BrownCluster implements SerializableArtifact 
{
   }
 
   public void serialize(OutputStream out) throws IOException {
-    Writer writer = new BufferedWriter(new OutputStreamWriter(out));
-
-    for (Map.Entry<String, String> entry : tokenToClusterMap.entrySet()) {
-      writer.write(entry.getKey() + "\t" + entry.getValue() + "\n");
+    try (Writer writer = new BufferedWriter(new OutputStreamWriter(out))) {
+      for (Map.Entry<String, String> entry : tokenToClusterMap.entrySet()) {
+        writer.write(entry.getKey() + "\t" + entry.getValue() + "\n");
+      }
+      writer.flush();
     }
-    writer.flush();
   }
 
   @Override
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClassFeatureGenerator.java
 
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClassFeatureGenerator.java
index 4e86fb2a..dde9ef66 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClassFeatureGenerator.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClassFeatureGenerator.java
@@ -26,6 +26,12 @@ public class BrownTokenClassFeatureGenerator implements 
AdaptiveFeatureGenerator
 
   private final BrownCluster brownLexicon;
 
+  /**
+   * Instantiates a {@link BrownTokenClassFeatureGenerator} via a specified
+   * {@link BrownCluster}.
+   *
+   * @param dict The token {@link BrownCluster dictionary} to use.
+   */
   public BrownTokenClassFeatureGenerator(BrownCluster dict) {
     this.brownLexicon = dict;
   }
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClasses.java
 
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClasses.java
index a880b91c..65277dd2 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClasses.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/BrownTokenClasses.java
@@ -18,12 +18,14 @@
 package opennlp.tools.util.featuregen;
 
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 
 /**
  * Obtain the paths listed in the pathLengths array from the Brown class.
  * This class is not to be instantiated.
  *
+ * @see BrownCluster
  */
 public class BrownTokenClasses {
 
@@ -39,7 +41,7 @@ public class BrownTokenClasses {
    */
   public static List<String> getWordClasses(String token, BrownCluster 
brownLexicon) {
     if (brownLexicon.lookupToken(token) == null) {
-      return new ArrayList<>(0);
+      return Collections.emptyList();
     } else {
       String brownClass = brownLexicon.lookupToken(token);
       List<String> pathLengthsList = new ArrayList<>();
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterDictionary.java
 
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterDictionary.java
index 2b1af7bc..b4d917ff 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterDictionary.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterDictionary.java
@@ -57,14 +57,15 @@ public class WordClusterDictionary implements 
SerializableArtifact {
    * @throws IOException Thrown if IO errors occurred during read.
    */
   public WordClusterDictionary(InputStream in) throws IOException {
-    BufferedReader reader = new BufferedReader(new InputStreamReader(in, 
StandardCharsets.UTF_8));
-    String line;
-    while ((line = reader.readLine()) != null) {
-      String[] parts = line.split(" ");
-      if (parts.length == 3) {
-        tokenToClusterMap.put(parts[0], parts[1].intern());
-      } else if (parts.length == 2) {
-        tokenToClusterMap.put(parts[0], parts[1].intern());
+    try (BufferedReader reader = new BufferedReader(new InputStreamReader(in, 
StandardCharsets.UTF_8))) {
+      String line;
+      while ((line = reader.readLine()) != null) {
+        String[] parts = line.split(" ");
+        if (parts.length == 3) {
+          tokenToClusterMap.put(parts[0], parts[1].intern());
+        } else if (parts.length == 2) {
+          tokenToClusterMap.put(parts[0], parts[1].intern());
+        }
       }
     }
   }
@@ -74,13 +75,13 @@ public class WordClusterDictionary implements 
SerializableArtifact {
   }
 
   public void serialize(OutputStream out) throws IOException {
-    Writer writer = new BufferedWriter(new OutputStreamWriter(out));
+    try (Writer writer = new BufferedWriter(new OutputStreamWriter(out))) {
+      for (Map.Entry<String, String> entry : tokenToClusterMap.entrySet()) {
+        writer.write(entry.getKey() + " " + entry.getValue() + "\n");
+      }
 
-    for (Map.Entry<String, String> entry : tokenToClusterMap.entrySet()) {
-      writer.write(entry.getKey() + " " + entry.getValue() + "\n");
+      writer.flush();
     }
-
-    writer.flush();
   }
 
   @Override
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
 
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
index f759d9c1..b98cd9bb 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGenerator.java
@@ -21,12 +21,28 @@ import java.util.List;
 
 import opennlp.tools.util.StringUtil;
 
+/**
+ * An {@link AdaptiveFeatureGenerator} implementation of a word cluster 
feature generator.
+ * It is based on a pre-defined {@link WordClusterDictionary}.
+ *
+ * @see WordClusterDictionary
+ */
 public class WordClusterFeatureGenerator implements AdaptiveFeatureGenerator {
 
   private final WordClusterDictionary tokenDictionary;
   private final String resourceName;
   private final boolean lowerCaseDictionary;
 
+  /**
+   * Instantiates a {@link WordClusterFeatureGenerator} via a specified
+   * {@link WordClusterDictionary}.
+   *
+   * @param dict The token {@link WordClusterDictionary dictionary} to use.
+   * @param dictResourceKey The prefix to use for detected features. Typically,
+   *                        the value for this prefix should be {@code "dict"}.
+   * @param lowerCaseDictionary {@code true} if tokens will be lower-cased 
during
+   *                            dictionary lookup, {@code false} otherwise.
+   */
   public WordClusterFeatureGenerator(WordClusterDictionary dict,
       String dictResourceKey, boolean lowerCaseDictionary) {
     tokenDictionary = dict;
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGeneratorFactory.java
 
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGeneratorFactory.java
index 4381f129..7ea19edd 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGeneratorFactory.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/WordClusterFeatureGeneratorFactory.java
@@ -25,7 +25,7 @@ import opennlp.tools.util.model.ArtifactSerializer;
 
 /**
  * Defines a word cluster generator factory; it reads an element containing
- * 'w2vwordcluster' as a tag name; these clusters are typically produced by
+ * 'w2vwordcluster' as a tag name. These clusters are typically produced by
  * word2vec or clark pos induction systems.
  */
 public class WordClusterFeatureGeneratorFactory

Reply via email to