2.4.0 - adds logger to WSDEvaluator to avoid plain System.out logging - clears some compiler warnings

mawiesne Mon, 08 Jul 2024 08:19:39 -0700

This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch 
update-sandbox-components-to-use-opennlp-tools-version-2.4
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git


commit 8ff740d6a3b9f933ef3d3cc4a02e0b2596491b19
Author: Martin Wiesner <[email protected]>
AuthorDate: Thu Jul 4 09:23:59 2024 +0200

    Update sandbox components to use opennlp-tools version 2.3.4 / 2.4.0
    - adds logger to WSDEvaluator to avoid plain System.out logging
    - clears some compiler warnings
---
 .../java/opennlp/addons/mallet/CRFTrainer.java     |  4 +-
 .../opennlp/addons/mallet/TransducerModel.java     |  8 ++--
 .../addons/mallet/TransducerModelSerializer.java   |  6 +--
 .../apps/solr/IterativeSearchRequestHandler.java   |  8 +---
 opennlp-wsd/pom.xml                                |  7 +++-
 .../opennlp/tools/disambiguator/WSDEvaluator.java  |  8 +++-
 pom.xml                                            |  6 +--
 .../wikinews_importer/AnnotatingMarkupParser.java  | 45 ++++++++--------------
 8 files changed, 41 insertions(+), 51 deletions(-)

diff --git a/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java 
b/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java
index 9145be1..11cd4b7 100644
--- a/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java
+++ b/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java
@@ -58,7 +58,7 @@ public class CRFTrainer extends AbstractTrainer implements 
SequenceTrainer {
   }
 
   @Override
-  public <T> SequenceClassificationModel<String> train(SequenceStream<T> 
sequences)
+  public <T> SequenceClassificationModel train(SequenceStream<T> sequences)
       throws IOException {
 
     Alphabet dataAlphabet = new Alphabet();
@@ -146,7 +146,7 @@ public class CRFTrainer extends AbstractTrainer implements 
SequenceTrainer {
     // can be very similar to the other model
     // one important difference is that the feature gen needs to be integrated
     // ...
-    return new TransducerModel<>(crf);
+    return new TransducerModel(crf);
   }
 
   // TODO: We need to return a sequence model here. How should that be done ?!
diff --git 
a/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModel.java 
b/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModel.java
index 0c1fe67..47bb341 100644
--- a/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModel.java
+++ b/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModel.java
@@ -34,7 +34,7 @@ import cc.mallet.types.FeatureVector;
 import cc.mallet.types.FeatureVectorSequence;
 import cc.mallet.types.Sequence;
 
-public class TransducerModel<T> implements SequenceClassificationModel<T>, 
SerializableArtifact {
+public class TransducerModel implements SequenceClassificationModel, 
SerializableArtifact {
 
   private final Transducer model;
 
@@ -47,14 +47,14 @@ public class TransducerModel<T> implements 
SequenceClassificationModel<T>, Seria
   }
 
   @Override
-  public opennlp.tools.util.Sequence bestSequence(T[] sequence,
+  public <T> opennlp.tools.util.Sequence bestSequence(T[] sequence,
       Object[] additionalContext, BeamSearchContextGenerator<T> cg,
       SequenceValidator<T> validator) {
     return bestSequences(1, sequence, additionalContext, cg, validator)[0];
   }
 
   @Override
-  public opennlp.tools.util.Sequence[] bestSequences(int numSequences,
+  public <T> opennlp.tools.util.Sequence[] bestSequences(int numSequences,
       T[] sequence, Object[] additionalContext, double minSequenceScore,
       BeamSearchContextGenerator<T> cg, SequenceValidator<T> validator) {
     // TODO: How to implement min score filtering here? 
@@ -62,7 +62,7 @@ public class TransducerModel<T> implements 
SequenceClassificationModel<T>, Seria
   }
 
   @Override
-  public opennlp.tools.util.Sequence[] bestSequences(int numSequences,
+  public <T> opennlp.tools.util.Sequence[] bestSequences(int numSequences,
       T[] sequence, Object[] additionalContext,
       BeamSearchContextGenerator<T> cg, SequenceValidator<T> validator) {
 
diff --git 
a/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModelSerializer.java
 
b/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModelSerializer.java
index 9513618..9d24cc1 100644
--- 
a/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModelSerializer.java
+++ 
b/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModelSerializer.java
@@ -28,13 +28,13 @@ import java.io.OutputStream;
 import opennlp.tools.util.model.ArtifactSerializer;
 import cc.mallet.fst.Transducer;
 
-public class TransducerModelSerializer implements 
ArtifactSerializer<TransducerModel<?>> {
+public class TransducerModelSerializer implements 
ArtifactSerializer<TransducerModel> {
 
   @Override
-  public TransducerModel<?> create(InputStream in) throws IOException {
+  public TransducerModel create(InputStream in) throws IOException {
     try (ObjectInputStream ois = new ObjectInputStream(in)) {
       Transducer classifier = (Transducer) ois.readObject();
-      return new TransducerModel<>(classifier);
+      return new TransducerModel(classifier);
     } catch (ClassNotFoundException e) {
       throw new IOException(e);
     }
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeSearchRequestHandler.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeSearchRequestHandler.java
index 51f838f..1cba60a 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeSearchRequestHandler.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeSearchRequestHandler.java
@@ -30,7 +30,6 @@ import org.apache.commons.lang.StringUtils;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.queryparser.classic.ParseException;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Query;
@@ -123,7 +122,6 @@ public class IterativeSearchRequestHandler extends 
SearchHandler {
                rsp.setAllValues(rsp3.getValues());
        }
 
-       @SuppressWarnings("unchecked")
        public DocList filterResultsBySyntMatchReduceDocSet(DocList docList,
                                                                                
                                                                                
                                                        SolrQueryRequest req, 
SolrParams params) {
                //if (!docList.hasScores())
@@ -257,8 +255,7 @@ public class IterativeSearchRequestHandler extends 
SearchHandler {
                rsp.add("response", results);
        }
 
-       private Query buildFilter(String[] fqs, SolrQueryRequest req)
-       throws IOException, ParseException {
+       private Query buildFilter(String[] fqs, SolrQueryRequest req) {
                if (fqs != null && fqs.length > 0) {
                        BooleanQuery.Builder fquery =  new 
BooleanQuery.Builder();
                        for (String fq : fqs) {
@@ -323,13 +320,12 @@ public class IterativeSearchRequestHandler extends 
SearchHandler {
                        alreadyFound.add(hit.doc);
                }
        }
-       public static class PairComparable implements Comparator<Pair> {
+       public static class PairComparable implements Comparator<Pair<Integer, 
Float>> {
 
                @Override
                public int compare(Pair o1, Pair o2) {
                        int b = -2;
                        if ( o1.getSecond() instanceof Float && o2.getSecond() 
instanceof Float){
-
                                b =  (((Float) 
o2.getSecond()).compareTo((Float) o1.getSecond()));
                        }
                        return b;
diff --git a/opennlp-wsd/pom.xml b/opennlp-wsd/pom.xml
index aac1196..51421c5 100644
--- a/opennlp-wsd/pom.xml
+++ b/opennlp-wsd/pom.xml
@@ -59,16 +59,19 @@
                        <groupId>org.junit.jupiter</groupId>
                        <artifactId>junit-jupiter-api</artifactId>
                </dependency>
-
                <dependency>
                        <groupId>org.junit.jupiter</groupId>
                        <artifactId>junit-jupiter-engine</artifactId>
                </dependency>
-
                <dependency>
                        <groupId>org.junit.jupiter</groupId>
                        <artifactId>junit-jupiter-params</artifactId>
                </dependency>
+               <dependency>
+                       <groupId>org.slf4j</groupId>
+                       <artifactId>slf4j-simple</artifactId>
+                       <version>${slf4j.version}</version>
+               </dependency>
        </dependencies>
 
        <build>
diff --git 
a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java 
b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java
index eeab5c1..17dcce9 100644
--- a/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java
+++ b/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java
@@ -20,6 +20,9 @@ package opennlp.tools.disambiguator;
 import opennlp.tools.util.eval.Evaluator;
 import opennlp.tools.util.eval.Mean;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 /**
  * The {@link WSDEvaluator} measures the performance of the given
  * {@link WSDisambiguator} with the provided reference {@code 
WordToDisambiguate}.
@@ -29,6 +32,8 @@ import opennlp.tools.util.eval.Mean;
  */
 public class WSDEvaluator extends Evaluator<WSDSample> {
 
+  private static final Logger LOG = 
LoggerFactory.getLogger(WSDEvaluator.class);
+
   private final Mean accuracy = new Mean();
 
   /**
@@ -61,8 +66,7 @@ public class WSDEvaluator extends Evaluator<WSDSample> {
         reference.getTargetPosition());
 
     if (predictedSense == null) {
-      System.out
-          .println("There was no sense for : " + reference.getTargetWord());
+      LOG.debug("There was no sense for: {}", reference.getTargetWord());
       return null;
     }
     // get the senseKey from the result
diff --git a/pom.xml b/pom.xml
index d3f284c..0234a9e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -117,11 +117,11 @@
         <maven.compiler.target>${java.version}</maven.compiler.target>
         <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 
-        <opennlp.tools.version>2.3.3</opennlp.tools.version>
+        <opennlp.tools.version>2.3.4-SNAPSHOT</opennlp.tools.version>
         <opennlp.forkCount>1.0C</opennlp.forkCount>
 
-        <slf4j.version>1.7.36</slf4j.version>
-        <log4j2.version>2.20.0</log4j2.version>
+        <slf4j.version>2.0.13</slf4j.version>
+        <log4j2.version>2.23.1</log4j2.version>
 
         <uimaj.version>3.4.1</uimaj.version>
         <jersey-client.version>2.41</jersey-client.version>
diff --git 
a/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/AnnotatingMarkupParser.java
 
b/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/AnnotatingMarkupParser.java
index 90f1721..2624ae7 100644
--- 
a/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/AnnotatingMarkupParser.java
+++ 
b/wikinews-importer/src/main/java/org/apache/opennlp/wikinews_importer/AnnotatingMarkupParser.java
@@ -22,7 +22,6 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.regex.Pattern;
 
 import info.bliki.htmlcleaner.ContentToken;
 import info.bliki.htmlcleaner.TagNode;
@@ -48,35 +47,26 @@ import info.bliki.wiki.tags.WPATag;
  */
 public class AnnotatingMarkupParser implements ITextConverter {
 
-    public static final String HREF_ATTR_KEY = "href";
+    private static final String HREF_ATTR_KEY = "href";
 
-    public static final String WIKILINK_TITLE_ATTR_KEY = "title";
+    private static final String WIKILINK_TITLE_ATTR_KEY = "title";
+    private static final String WIKILINK_TARGET_ATTR_KEY = "href";
+    private static final String WIKIOBJECT_ATTR_KEY = "wikiobject";
 
-    public static final String WIKILINK_TARGET_ATTR_KEY = "href";
+    private static final Set<String> PARAGRAPH_TAGS = Set.of("p");
+    private static final Set<String> HEADING_TAGS = Set.of("h1", "h2", "h3", 
"h4", "h5", "h6");
 
-    public static final String WIKIOBJECT_ATTR_KEY = "wikiobject";
+    private final List<Annotation> wikilinks = new ArrayList<>();
+    private final List<Annotation> headers = new ArrayList<>();
+    private final List<Annotation> paragraphs = new ArrayList<>();
 
-    public static final Set<String> PARAGRAPH_TAGS = Set.of("p");
+    private String languageCode = "en";
 
-    public static final Set<String> HEADING_TAGS = Set.of("h1", "h2", "h3", 
"h4", "h5", "h6");
+    private final WikiModel model;
 
-    public static final Pattern INTERWIKI_PATTERN = 
Pattern.compile("http://[\\w-]+\\.wikipedia\\.org/wiki/.*";);
+    private String redirect;
 
-    protected final List<Annotation> wikilinks = new ArrayList<>();
-
-    protected final List<Annotation> headers = new ArrayList<>();
-
-    protected final List<Annotation> paragraphs = new ArrayList<>();
-
-    protected String languageCode = "en";
-
-    protected final WikiModel model;
-
-    protected String redirect;
-
-    protected String text;
-
-    protected static final Pattern REDIRECT_PATTERN = 
Pattern.compile("^#REDIRECT \\[\\[([^\\]]*)\\]\\]");
+    private String text;
 
     public AnnotatingMarkupParser() {
         model = makeWikiModel(languageCode);
@@ -119,9 +109,8 @@ public class AnnotatingMarkupParser implements 
ITextConverter {
                     return;
                 }
                 for (Object node : nodes) {
-                    if (node instanceof WPATag) {
+                    if (node instanceof WPATag tag) {
                         // extract wikilink annotations
-                        WPATag tag = (WPATag) node;
                         String wikilinkLabel = 
tag.getAttributes().get(WIKILINK_TITLE_ATTR_KEY);
                         String wikilinkTarget = 
tag.getAttributes().get(WIKILINK_TARGET_ATTR_KEY);
                         if (wikilinkLabel != null) {
@@ -142,8 +131,7 @@ public class AnnotatingMarkupParser implements 
ITextConverter {
                             tag.getBodyString(countingBuffer);
                         }
 
-                    } else if (node instanceof ContentToken) {
-                        ContentToken contentToken = (ContentToken) node;
+                    } else if (node instanceof ContentToken contentToken) {
                         countingBuffer.append(contentToken.getContent());
                     } else if (node instanceof List) {
                     } else if (node instanceof WPList) {
@@ -152,8 +140,7 @@ public class AnnotatingMarkupParser implements 
ITextConverter {
                         // do not hold grammatically correct
                         // interesting sentences that are representative of the
                         // language.
-                    } else if (node instanceof TagNode) {
-                        TagNode tagNode = (TagNode) node;
+                    } else if (node instanceof TagNode tagNode) {
                         Map<String, String> attributes = 
tagNode.getAttributes();
                         Map<String, Object> oAttributes = 
tagNode.getObjectAttributes();
                         boolean hasSpecialHandling = false;

(opennlp-sandbox) 01/01: Update sandbox components to use opennlp-tools version 2.3.4 / 2.4.0 - adds logger to WSDEvaluator to avoid plain System.out logging - clears some compiler warnings

Reply via email to