This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch 
Reduce-Compile-and-Runtime-dependencies-in-Similarity-component
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git

commit 13346cd5576fc8df8c18a2da01de6ea0dc10fba3
Author: Martin Wiesner <[email protected]>
AuthorDate: Wed Jul 10 14:25:27 2024 +0200

    Reduce compile and runtime dependency in Similarity Component
---
 opennlp-similarity/pom.xml                         | 1054 ++++++++++----------
 .../review_builder/MachineTranslationWrapper.java  |   22 +-
 .../tools/doc_classifier/DocClassifier.java        |   43 +-
 ...cClassifierTrainingSetMultilingualExtender.java |    3 +-
 .../DocClassifierTrainingSetVerifier.java          |   18 +-
 .../tools/textsimilarity/ParseTreeChunk.java       |    3 +-
 6 files changed, 539 insertions(+), 604 deletions(-)

diff --git a/opennlp-similarity/pom.xml b/opennlp-similarity/pom.xml
index bb8aa6e..4c47672 100644
--- a/opennlp-similarity/pom.xml
+++ b/opennlp-similarity/pom.xml
@@ -12,573 +12,533 @@
        language governing permissions and limitations under the License. -->
 
 <project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
-       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
-       <modelVersion>4.0.0</modelVersion>
-       <parent>
-               <groupId>org.apache.opennlp</groupId>
-               <artifactId>opennlp-sandbox</artifactId>
-               <version>2.3.4-SNAPSHOT</version>
-       </parent>
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.opennlp</groupId>
+    <artifactId>opennlp-sandbox</artifactId>
+    <version>2.3.4-SNAPSHOT</version>
+  </parent>
 
-       <artifactId>opennlp-similarity</artifactId>
-       <version>2.3.4-SNAPSHOT</version>
-       <packaging>jar</packaging>
+  <artifactId>opennlp-similarity</artifactId>
+  <version>2.3.4-SNAPSHOT</version>
+  <packaging>jar</packaging>
 
-       <name>Apache OpenNLP Tool Similarity distribution</name>
-       
-       <properties>
-               <dl4j.version>1.0.0-M2.1</dl4j.version>
-               <hdf5.version>1.14.3-1.5.10</hdf5.version>
-               <javacpp.version>1.5.10</javacpp.version>
-               <openblas.version>0.3.26-1.5.10</openblas.version>
-       </properties>
+  <name>Apache OpenNLP Similarity distribution</name>
 
-       <repositories>
-               <repository>
-                       <id>central</id>
-                       <name>Maven Central Repository</name>
-                       <url>https://repo1.maven.org/maven2</url>
-               </repository>
-               <repository>
-                       <id>billylieurance-net</id>
-                       <url>https://www.billylieurance.net/maven2</url>
-                       <snapshots>
-                               <enabled>false</enabled>
-                       </snapshots>
-               </repository>
-       </repositories>
+  <properties>
+    <dl4j.version>1.0.0-M2.1</dl4j.version>
+    <hdf5.version>1.14.3-1.5.10</hdf5.version>
+    <javacpp.version>1.5.10</javacpp.version>
+    <openblas.version>0.3.26-1.5.10</openblas.version>
+  </properties>
 
-       <dependencyManagement>
-               <dependencies>
-                       <dependency>
-                               <groupId>org.apache.httpcomponents</groupId>
-                               <artifactId>httpclient</artifactId>
-                               <version>4.5.14</version>
-                       </dependency>
-                       <dependency>
-                               <groupId>org.apache.httpcomponents</groupId>
-                               <artifactId>httpclient-cache</artifactId>
-                               <version>4.5.14</version>
-                       </dependency>
-                       <dependency>
-                               <groupId>org.apache.httpcomponents</groupId>
-                               <artifactId>httpcore</artifactId>
-                               <version>4.4.16</version>
-                       </dependency>
-                       <dependency>
-                               <groupId>org.apache.httpcomponents</groupId>
-                               <artifactId>httpmime</artifactId>
-                               <version>4.5.14</version>
-                       </dependency>
-                       <dependency>
-                               <groupId>org.apache.httpcomponents</groupId>
-                               <artifactId>fluent-hc</artifactId>
-                               <version>4.5.14</version>
-                       </dependency>
-                       <!-- Required to avoid IllegalAccessError by Lombok 
during compilation -->
-                       <dependency>
-                               <groupId>org.projectlombok</groupId>
-                               <artifactId>lombok</artifactId>
-                               <version>1.18.34</version>
-                       </dependency>
-               </dependencies>
-       </dependencyManagement>
+  <repositories>
+    <repository>
+      <id>central</id>
+      <name>Maven Central Repository</name>
+      <url>https://repo1.maven.org/maven2</url>
+    </repository>
+    <repository>
+      <id>billylieurance-net</id>
+      <url>https://www.billylieurance.net/maven2</url>
+      <snapshots>
+        <enabled>false</enabled>
+      </snapshots>
+    </repository>
+  </repositories>
 
-       <dependencies>
-               <dependency>
-                       <groupId>org.apache.opennlp</groupId>
-                       <artifactId>opennlp-tools</artifactId>
-               </dependency>
+  <dependencyManagement>
+    <dependencies>
+      <dependency>
+        <groupId>org.apache.httpcomponents</groupId>
+        <artifactId>httpclient</artifactId>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.httpcomponents</groupId>
+        <artifactId>httpclient-cache</artifactId>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.httpcomponents</groupId>
+        <artifactId>httpcore</artifactId>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.httpcomponents</groupId>
+        <artifactId>httpmime</artifactId>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.httpcomponents</groupId>
+        <artifactId>fluent-hc</artifactId>
+      </dependency>
+      <!-- Required to avoid IllegalAccessError by Lombok during compilation 
-->
+      <dependency>
+        <groupId>org.projectlombok</groupId>
+        <artifactId>lombok</artifactId>
+      </dependency>
+    </dependencies>
+  </dependencyManagement>
 
-               <dependency>
-                       <groupId>org.slf4j</groupId>
-                       <artifactId>slf4j-api</artifactId>
-               </dependency>
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.opennlp</groupId>
+      <artifactId>opennlp-tools</artifactId>
+    </dependency>
 
-               <dependency>
-                       <groupId>org.apache.logging.log4j</groupId>
-                       <artifactId>log4j-api</artifactId>
-                       <scope>test</scope>
-               </dependency>
-               <dependency>
-                       <groupId>org.apache.logging.log4j</groupId>
-                       <artifactId>log4j-core</artifactId>
-                       <scope>test</scope>
-               </dependency>
-               <dependency>
-                       <groupId>org.apache.logging.log4j</groupId>
-                       <artifactId>log4j-slf4j2-impl</artifactId>
-                       <scope>test</scope>
-               </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+    </dependency>
 
-               <dependency>
-                       <groupId>org.junit.jupiter</groupId>
-                       <artifactId>junit-jupiter-api</artifactId>
-               </dependency>
+    <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-codec</groupId>
+      <artifactId>commons-codec</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-collections</groupId>
+      <artifactId>commons-collections</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-math3</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.json</groupId>
+      <artifactId>json</artifactId>
+      <version>20240303</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-app</artifactId>
+      <version>2.9.2</version>
+    </dependency>
+    <dependency>
+      <groupId>net.sf.opencsv</groupId>
+      <artifactId>opencsv</artifactId>
+      <version>2.3</version>
+    </dependency>
 
-               <dependency>
-                       <groupId>org.junit.jupiter</groupId>
-                       <artifactId>junit-jupiter-engine</artifactId>
-               </dependency>
+    <dependency>
+      <groupId>org.apache.solr</groupId>
+      <artifactId>solr-core</artifactId>
+      <version>8.11.3</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.eclipse.jetty</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.eclipse.jetty.http2</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
 
-               <dependency>
-                       <groupId>org.junit.jupiter</groupId>
-                       <artifactId>junit-jupiter-params</artifactId>
-               </dependency>
+    <dependency>
+      <groupId>javax.mail</groupId>
+      <artifactId>mail</artifactId>
+      <version>1.4.7</version>
+    </dependency>
+    <dependency>
+      <groupId>com.restfb</groupId>
+      <artifactId>restfb</artifactId>
+      <version>1.49.0</version>
+    </dependency>
 
-               <dependency>
-                       <groupId>commons-lang</groupId>
-                       <artifactId>commons-lang</artifactId>
-               </dependency>
-               <dependency>
-                       <groupId>commons-codec</groupId>
-                       <artifactId>commons-codec</artifactId>
-               </dependency>
-               <dependency>
-                       <groupId>commons-logging</groupId>
-                       <artifactId>commons-logging</artifactId>
-               </dependency>
-               <dependency>
-                       <groupId>commons-collections</groupId>
-                       <artifactId>commons-collections</artifactId>
-               </dependency>
-               <dependency>
-                       <groupId>org.apache.commons</groupId>
-                       <artifactId>commons-math3</artifactId>
-               </dependency>
+    <dependency>
+      <groupId>net.billylieurance.azuresearch</groupId>
+      <artifactId>azure-bing-search-java</artifactId>
+      <version>0.13.0</version>
+    </dependency>
 
-               <dependency>
-                       <groupId>org.json</groupId>
-                       <artifactId>json</artifactId>
-                       <version>20240303</version>
-               </dependency>
-               <dependency>
-                       <groupId>org.apache.tika</groupId>
-                       <artifactId>tika-app</artifactId>
-                       <version>2.9.2</version>
-               </dependency>
-               <dependency>
-                       <groupId>net.sf.opencsv</groupId>
-                       <artifactId>opencsv</artifactId>
-                       <version>2.3</version>
-               </dependency>
+    <dependency>
+      <groupId>edu.mit</groupId>
+      <artifactId>jverbnet</artifactId>
+      <version>1.2.0.1</version>
+      <exclusions>
+        <exclusion>
+          <groupId>ch.qos.logback</groupId>
+          <artifactId>logback-core</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>ch.qos.logback</groupId>
+          <artifactId>logback-classic</artifactId>
+        </exclusion>
+        <!-- Avoids problems with conflicting slf4j bindings at runtime -->
+        <exclusion>
+          <groupId>org.slf4j</groupId>
+          <artifactId>log4j-over-slf4j</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
 
-               <dependency>
-                       <groupId>org.apache.solr</groupId>
-                       <artifactId>solr-core</artifactId>
-                       <version>8.11.3</version>
-               </dependency>
-               
-               <dependency>
-                       <groupId>org.apache.httpcomponents</groupId>
-                       <artifactId>httpclient</artifactId>
-               </dependency>
-               <dependency>
-                       <groupId>org.apache.httpcomponents</groupId>
-                       <artifactId>httpclient-cache</artifactId>
-               </dependency>
-               <dependency>
-                       <groupId>org.apache.httpcomponents</groupId>
-                       <artifactId>httpcore</artifactId>
-               </dependency>
-               <dependency>
-                       <groupId>org.apache.httpcomponents</groupId>
-                       <artifactId>httpmime</artifactId>
-               </dependency>
-               <dependency>
-                       <groupId>org.apache.httpcomponents</groupId>
-                       <artifactId>fluent-hc</artifactId>
-               </dependency>
+    <dependency>
+      <groupId>org.docx4j</groupId>
+      <artifactId>docx4j</artifactId>
+      <version>6.1.2</version>
+      <exclusions>
+        <!-- Exclusion here as log4j version 2 bindings are used during 
tests/runtime-->
+        <exclusion>
+          <groupId>org.slf4j</groupId>
+          <artifactId>slf4j-log4j12</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>log4j</groupId>
+          <artifactId>log4j</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
 
-               <dependency>
-                       <groupId>org.jgrapht</groupId>
-                       <artifactId>jgrapht-jdk1.5</artifactId>
-                       <version>0.7.3</version>
-               </dependency>
-               <dependency>
-                       <groupId>de.jollyday</groupId>
-                       <artifactId>jollyday</artifactId>
-                       <version>0.5.10</version>
-               </dependency>
-               <dependency>
-                       <groupId>jgraph</groupId>
-                       <artifactId>jgraph</artifactId>
-                       <version>5.13.0.0</version>
-               </dependency>
-               <dependency>
-                       <groupId>javax.mail</groupId>
-                       <artifactId>mail</artifactId>
-                       <version>1.4.7</version>
-               </dependency>
-               <dependency>
-                       <groupId>com.restfb</groupId>
-                       <artifactId>restfb</artifactId>
-                       <version>1.49.0</version>
-               </dependency>
-               <dependency>
-                       <groupId>com.memetix</groupId>
-                       <artifactId>microsoft-translator-java-api</artifactId>
-                       <version>0.6.2</version>
-               </dependency>
+    <dependency>
+      <groupId>org.deeplearning4j</groupId>
+      <artifactId>deeplearning4j-core</artifactId>
+      <version>${dl4j.version}</version>
+      <exclusions>
+        <!-- Excluded to avoid irrelevant platforms dependencies, see profiles 
-->
+        <exclusion>
+          <groupId>org.bytedeco</groupId>
+          <artifactId>openblas-platform</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.bytedeco</groupId>
+          <artifactId>hdf5-platform</artifactId>
+        </exclusion>
+        <!-- Not required for NLP applications -->
+        <exclusion>
+          <groupId>org.datavec</groupId>
+          <artifactId>datavec-data-image</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.deeplearning4j</groupId>
+      <artifactId>deeplearning4j-ui</artifactId>
+      <version>${dl4j.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.deeplearning4j</groupId>
+      <artifactId>deeplearning4j-nlp</artifactId>
+      <version>${dl4j.version}</version>
+    </dependency>
 
-               <dependency>
-                       <groupId>net.billylieurance.azuresearch</groupId>
-                       <artifactId>azure-bing-search-java</artifactId>
-                       <version>0.13.0</version>
-               </dependency>
+    <dependency>
+      <groupId>org.bytedeco</groupId>
+      <artifactId>openblas</artifactId>
+      <version>${openblas.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.bytedeco</groupId>
+      <artifactId>javacpp</artifactId>
+      <version>${javacpp.version}</version>
+    </dependency>
 
-               <dependency>
-                       <groupId>edu.mit</groupId>
-                       <artifactId>jverbnet</artifactId>
-                       <version>1.2.0.1</version>
-                       <exclusions>
-                               <exclusion>
-                                       <groupId>ch.qos.logback</groupId>
-                                       <artifactId>logback-core</artifactId>
-                               </exclusion>
-                               <exclusion>
-                                       <groupId>ch.qos.logback</groupId>
-                                       <artifactId>logback-classic</artifactId>
-                               </exclusion>
-                               <!-- Avoids problems with conflicting slf4j 
bindings at runtime -->
-                               <exclusion>
-                                       <groupId>org.slf4j</groupId>
-                                       
<artifactId>log4j-over-slf4j</artifactId>
-                               </exclusion>
-                       </exclusions>
-               </dependency>
-               
-               <dependency>
-                       <groupId>org.docx4j</groupId>
-                       <artifactId>docx4j</artifactId>
-                       <version>6.1.2</version>
-                       <exclusions>
-                               <!-- Exclusion here as log4j version 2 bindings 
are used during tests/runtime-->
-                               <exclusion>
-                                       <groupId>org.slf4j</groupId>
-                                       <artifactId>slf4j-log4j12</artifactId>
-                               </exclusion>
-                               <exclusion>
-                                       <groupId>log4j</groupId>
-                                       <artifactId>log4j</artifactId>
-                               </exclusion>
-                       </exclusions>
-               </dependency>
+    <!-- TEST -->
+    <dependency>
+      <groupId>org.junit.jupiter</groupId>
+      <artifactId>junit-jupiter-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.junit.jupiter</groupId>
+      <artifactId>junit-jupiter-engine</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.junit.jupiter</groupId>
+      <artifactId>junit-jupiter-params</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-api</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-slf4j2-impl</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
 
-               <dependency>
-                       <groupId>org.deeplearning4j</groupId>
-                       <artifactId>deeplearning4j-core</artifactId>
-                       <version>${dl4j.version}</version>
-                       <exclusions>
-                               <!-- Excluded to avoid irrelevant platforms 
dependencies, see profiles -->
-                               <exclusion>
-                                       <groupId>org.bytedeco</groupId>
-                                       
<artifactId>openblas-platform</artifactId>
-                               </exclusion>
-                               <exclusion>
-                                       <groupId>org.bytedeco</groupId>
-                                       <artifactId>hdf5-platform</artifactId>
-                               </exclusion>
-                               <!-- Not required for NLP applications -->
-                               <exclusion>
-                                               <groupId>org.datavec</groupId>
-                                               
<artifactId>datavec-data-image</artifactId>
-                               </exclusion>
-                       </exclusions>
-               </dependency>
-               <dependency>
-                               <groupId>org.deeplearning4j</groupId>
-                               <artifactId>deeplearning4j-ui</artifactId>
-                               <version>${dl4j.version}</version>
-               </dependency>
-               <dependency>
-                               <groupId>org.deeplearning4j</groupId>
-                               <artifactId>deeplearning4j-nlp</artifactId>
-                               <version>${dl4j.version}</version>
-               </dependency>
+  <profiles>
+    <profile>
+      <id>platform-win-x64</id>
+      <activation>
+        <os>
+          <family>Windows</family>
+          <arch>x64</arch>
+        </os>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.bytedeco</groupId>
+          <artifactId>javacpp</artifactId>
+          <version>${javacpp.version}</version>
+          <classifier>windows-x86_64</classifier>
+          <scope>runtime</scope>
+        </dependency>
+        <dependency>
+          <groupId>org.bytedeco</groupId>
+          <artifactId>openblas</artifactId>
+          <version>${openblas.version}</version>
+          <classifier>windows-x86_64</classifier>
+          <scope>runtime</scope>
+        </dependency>
+        <dependency>
+          <groupId>org.bytedeco</groupId>
+          <artifactId>hdf5</artifactId>
+          <version>${hdf5.version}</version>
+          <classifier>windows-x86_64</classifier>
+          <scope>runtime</scope>
+        </dependency>
+      </dependencies>
+    </profile>
+    <profile>
+      <id>platform-win-x86</id>
+      <activation>
+        <os>
+          <family>Windows</family>
+          <arch>x86</arch>
+        </os>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.bytedeco</groupId>
+          <artifactId>javacpp</artifactId>
+          <version>${javacpp.version}</version>
+          <classifier>windows-x86</classifier>
+          <scope>runtime</scope>
+        </dependency>
+        <dependency>
+          <groupId>org.bytedeco</groupId>
+          <artifactId>openblas</artifactId>
+          <version>${openblas.version}</version>
+          <classifier>windows-x86</classifier>
+          <scope>runtime</scope>
+        </dependency>
+        <dependency>
+          <groupId>org.bytedeco</groupId>
+          <artifactId>hdf5</artifactId>
+          <version>${hdf5.version}</version>
+          <classifier>windows-x86</classifier>
+          <scope>runtime</scope>
+        </dependency>
+      </dependencies>
+    </profile>
+    <profile>
+      <id>platform-linux-x64</id>
+      <activation>
+        <os>
+          <family>unix</family>
+          <name>Linux</name>
+          <arch>amd64</arch>
+        </os>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.bytedeco</groupId>
+          <artifactId>javacpp</artifactId>
+          <version>${javacpp.version}</version>
+          <classifier>linux-x86_64</classifier>
+          <scope>runtime</scope>
+        </dependency>
+        <dependency>
+          <groupId>org.bytedeco</groupId>
+          <artifactId>openblas</artifactId>
+          <version>${openblas.version}</version>
+          <classifier>linux-x86_64</classifier>
+          <scope>runtime</scope>
+        </dependency>
+        <dependency>
+          <groupId>org.bytedeco</groupId>
+          <artifactId>hdf5</artifactId>
+          <version>${hdf5.version}</version>
+          <classifier>linux-x86_64</classifier>
+          <scope>runtime</scope>
+        </dependency>
+      </dependencies>
+    </profile>
+    <profile>
+      <id>platform-macosx-x64</id>
+      <activation>
+        <os>
+          <family>Mac</family>
+          <arch>x64</arch>
+        </os>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.bytedeco</groupId>
+          <artifactId>javacpp</artifactId>
+          <version>${javacpp.version}</version>
+          <classifier>macosx-x86_64</classifier>
+          <scope>runtime</scope>
+        </dependency>
+        <dependency>
+          <groupId>org.bytedeco</groupId>
+          <artifactId>openblas</artifactId>
+          <version>${openblas.version}</version>
+          <classifier>macosx-x86_64</classifier>
+          <scope>runtime</scope>
+        </dependency>
+        <dependency>
+          <groupId>org.bytedeco</groupId>
+          <artifactId>hdf5</artifactId>
+          <version>${hdf5.version}</version>
+          <classifier>macosx-x86_64</classifier>
+          <scope>runtime</scope>
+        </dependency>
+      </dependencies>
+    </profile>
+    <profile>
+      <id>platform-macosx-aarch64</id>
+      <activation>
+        <os>
+          <family>mac</family>
+          <arch>aarch64</arch>
+        </os>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>org.bytedeco</groupId>
+          <artifactId>javacpp</artifactId>
+          <version>${javacpp.version}</version>
+          <classifier>macosx-arm64</classifier>
+          <scope>runtime</scope>
+        </dependency>
+        <dependency>
+          <groupId>org.bytedeco</groupId>
+          <artifactId>openblas</artifactId>
+          <version>${openblas.version}</version>
+          <classifier>macosx-arm64</classifier>
+          <scope>runtime</scope>
+        </dependency>
+        <!-- Not available for this platform, yet...-->
+        <!--
+        <dependency>
+          <groupId>org.bytedeco</groupId>
+          <artifactId>hdf5</artifactId>
+          <version>${hdf5.version}</version>
+          <classifier>macosx-arm64</classifier>
+          <scope>runtime</scope>
+        </dependency>
+        -->
+      </dependencies>
+    </profile>
+  </profiles>
 
-               <dependency>
-                       <groupId>org.bytedeco</groupId>
-                       <artifactId>openblas</artifactId>
-                       <version>${openblas.version}</version>
-               </dependency>
-               <dependency>
-                       <groupId>org.bytedeco</groupId>
-                       <artifactId>javacpp</artifactId>
-                       <version>${javacpp.version}</version>
-               </dependency>
-       </dependencies>
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <configuration>
+          <source>${maven.compiler.source}</source>
+          <target>${maven.compiler.target}</target>
+          <compilerArgument>-Xlint</compilerArgument>
+        </configuration>
+      </plugin>
 
-       <profiles>
-               <profile>
-                       <id>platform-win-x64</id>
-                       <activation>
-                               <os>
-                                       <family>Windows</family>
-                                       <arch>x64</arch>
-                               </os>
-                       </activation>
-                       <dependencies>
-                               <dependency>
-                                       <groupId>org.bytedeco</groupId>
-                                       <artifactId>javacpp</artifactId>
-                                       <version>${javacpp.version}</version>
-                                       <classifier>windows-x86_64</classifier>
-                                       <scope>runtime</scope>
-                               </dependency>
-                               <dependency>
-                                       <groupId>org.bytedeco</groupId>
-                                       <artifactId>openblas</artifactId>
-                                       <version>${openblas.version}</version>
-                                       <classifier>windows-x86_64</classifier>
-                                       <scope>runtime</scope>
-                               </dependency>
-                               <dependency>
-                                       <groupId>org.bytedeco</groupId>
-                                       <artifactId>hdf5</artifactId>
-                                       <version>${hdf5.version}</version>
-                                       <classifier>windows-x86_64</classifier>
-                                       <scope>runtime</scope>
-                               </dependency>
-                       </dependencies>
-               </profile>
-               <profile>
-                       <id>platform-win-x86</id>
-                       <activation>
-                               <os>
-                                       <family>Windows</family>
-                                       <arch>x86</arch>
-                               </os>
-                       </activation>
-                       <dependencies>
-                               <dependency>
-                                       <groupId>org.bytedeco</groupId>
-                                       <artifactId>javacpp</artifactId>
-                                       <version>${javacpp.version}</version>
-                                       <classifier>windows-x86</classifier>
-                                       <scope>runtime</scope>
-                               </dependency>
-                               <dependency>
-                                       <groupId>org.bytedeco</groupId>
-                                       <artifactId>openblas</artifactId>
-                                       <version>${openblas.version}</version>
-                                       <classifier>windows-x86</classifier>
-                                       <scope>runtime</scope>
-                               </dependency>
-                               <dependency>
-                                       <groupId>org.bytedeco</groupId>
-                                       <artifactId>hdf5</artifactId>
-                                       <version>${hdf5.version}</version>
-                                       <classifier>windows-x86</classifier>
-                                       <scope>runtime</scope>
-                               </dependency>
-                       </dependencies>
-               </profile>
-               <profile>
-                       <id>platform-linux-x64</id>
-                       <activation>
-                               <os>
-                                       <family>unix</family>
-                                       <name>Linux</name>
-                                       <arch>amd64</arch>
-                               </os>
-                       </activation>
-                       <dependencies>
-                               <dependency>
-                                       <groupId>org.bytedeco</groupId>
-                                       <artifactId>javacpp</artifactId>
-                                       <version>${javacpp.version}</version>
-                                       <classifier>linux-x86_64</classifier>
-                                       <scope>runtime</scope>
-                               </dependency>
-                               <dependency>
-                                       <groupId>org.bytedeco</groupId>
-                                       <artifactId>openblas</artifactId>
-                                       <version>${openblas.version}</version>
-                                       <classifier>linux-x86_64</classifier>
-                                       <scope>runtime</scope>
-                               </dependency>
-                               <dependency>
-                                       <groupId>org.bytedeco</groupId>
-                                       <artifactId>hdf5</artifactId>
-                                       <version>${hdf5.version}</version>
-                                       <classifier>linux-x86_64</classifier>
-                                       <scope>runtime</scope>
-                               </dependency>
-                       </dependencies>
-               </profile>
-               <profile>
-                       <id>platform-macosx-x64</id>
-                       <activation>
-                               <os>
-                                       <family>Mac</family>
-                                       <arch>x64</arch>
-                               </os>
-                       </activation>
-                       <dependencies>
-                               <dependency>
-                                       <groupId>org.bytedeco</groupId>
-                                       <artifactId>javacpp</artifactId>
-                                       <version>${javacpp.version}</version>
-                                       <classifier>macosx-x86_64</classifier>
-                                       <scope>runtime</scope>
-                               </dependency>
-                               <dependency>
-                                       <groupId>org.bytedeco</groupId>
-                                       <artifactId>openblas</artifactId>
-                                       <version>${openblas.version}</version>
-                                       <classifier>macosx-x86_64</classifier>
-                                       <scope>runtime</scope>
-                               </dependency>
-                               <dependency>
-                                       <groupId>org.bytedeco</groupId>
-                                       <artifactId>hdf5</artifactId>
-                                       <version>${hdf5.version}</version>
-                                       <classifier>macosx-x86_64</classifier>
-                                       <scope>runtime</scope>
-                               </dependency>
-                       </dependencies>
-               </profile>
-               <profile>
-                       <id>platform-macosx-aarch64</id>
-                       <activation>
-                               <os>
-                                       <family>mac</family>
-                                       <arch>aarch64</arch>
-                               </os>
-                       </activation>
-                       <dependencies>
-                               <dependency>
-                                       <groupId>org.bytedeco</groupId>
-                                       <artifactId>javacpp</artifactId>
-                                       <version>${javacpp.version}</version>
-                                       <classifier>macosx-arm64</classifier>
-                                       <scope>runtime</scope>
-                               </dependency>
-                               <dependency>
-                                       <groupId>org.bytedeco</groupId>
-                                       <artifactId>openblas</artifactId>
-                                       <version>${openblas.version}</version>
-                                       <classifier>macosx-arm64</classifier>
-                                       <scope>runtime</scope>
-                               </dependency>
-                               <!-- Not available for this platform, yet...-->
-                               <!--
-                               <dependency>
-                                       <groupId>org.bytedeco</groupId>
-                                       <artifactId>hdf5</artifactId>
-                                       <version>${hdf5.version}</version>
-                                       <classifier>macosx-arm64</classifier>
-                                       <scope>runtime</scope>
-                               </dependency>
-                               -->
-                       </dependencies>
-               </profile>
-       </profiles>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <configuration>
+          <argLine>-Xmx2048m -Dfile.encoding=UTF-8</argLine>
+          <forkCount>${opennlp.forkCount}</forkCount>
+          <reuseForks>false</reuseForks>
+          <failIfNoSpecifiedTests>false</failIfNoSpecifiedTests>
+          <excludes>
+            <exclude>**/*IT.java</exclude>
+          </excludes>
+        </configuration>
+      </plugin>
 
-       <build>
-               <plugins>
-                       <plugin>
-                               <groupId>org.apache.maven.plugins</groupId>
-                               <artifactId>maven-compiler-plugin</artifactId>
-                               <configuration>
-                                       
<source>${maven.compiler.source}</source>
-                                       
<target>${maven.compiler.target}</target>
-                                       
<compilerArgument>-Xlint</compilerArgument>
-                               </configuration>
-                       </plugin>
+      <plugin>
+        <artifactId>maven-source-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>create-source-jar</id>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+            <phase>package</phase>
+          </execution>
+        </executions>
+      </plugin>
 
-                       <plugin>
-                               <groupId>org.apache.maven.plugins</groupId>
-                               <artifactId>maven-surefire-plugin</artifactId>
-                               <configuration>
-                                       <argLine>-Xmx2048m 
-Dfile.encoding=UTF-8</argLine>
-                                       
<forkCount>${opennlp.forkCount}</forkCount>
-                                       <reuseForks>false</reuseForks>
-                                       
<failIfNoSpecifiedTests>false</failIfNoSpecifiedTests>
-                                       <excludes>
-                                               <exclude>**/*IT.java</exclude>
-                                       </excludes>
-                               </configuration>
-                       </plugin>
+      <plugin>
+        <artifactId>maven-antrun-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>generate checksums for binary artifacts</id>
+            <goals>
+              <goal>run</goal>
+            </goals>
+            <phase>verify</phase>
+            <configuration>
+              <target>
+                <checksum algorithm="sha1" format="MD5SUM">
+                  <fileset dir="${project.build.directory}">
+                    <include name="*.zip" />
+                    <include name="*.gz" />
+                  </fileset>
+                </checksum>
+                <checksum algorithm="md5" format="MD5SUM">
+                  <fileset dir="${project.build.directory}">
+                    <include name="*.zip" />
+                    <include name="*.gz" />
+                  </fileset>
+                </checksum>
+              </target>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <artifactId>maven-assembly-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>src</id>
+            <goals>
+              <goal>single</goal>
+            </goals>
+            <phase>package</phase>
+            <configuration>
+              <descriptors>
+                <descriptor>src/main/assembly/assembly.xml</descriptor>
+              </descriptors>
+            </configuration>
+          </execution>
+          <execution>
+            <id>source-release-assembly</id>
+            <configuration>
+              <skipAssembly>true</skipAssembly>
+              <mavenExecutorId>forked-path</mavenExecutorId>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
 
-                       <plugin>
-                               <artifactId>maven-source-plugin</artifactId>
-                               <executions>
-                                       <execution>
-                                               <id>create-source-jar</id>
-                                               <goals>
-                                                       <goal>jar</goal>
-                                               </goals>
-                                               <phase>package</phase>
-                                       </execution>
-                               </executions>
-                       </plugin>
-                       
-                       <plugin>
-                               <artifactId>maven-antrun-plugin</artifactId>
-                               <executions>
-                                       <execution>
-                                               <id>generate checksums for 
binary artifacts</id>
-                                               <goals>
-                                                       <goal>run</goal>
-                                               </goals>
-                                               <phase>verify</phase>
-                                               <configuration>
-                                                       <target>
-                                                               <checksum 
algorithm="sha1" format="MD5SUM">
-                                                                       
<fileset dir="${project.build.directory}">
-                                                                               
<include name="*.zip" />
-                                                                               
<include name="*.gz" />
-                                                                       
</fileset>
-                                                               </checksum>
-                                                               <checksum 
algorithm="md5" format="MD5SUM">
-                                                                       
<fileset dir="${project.build.directory}">
-                                                                               
<include name="*.zip" />
-                                                                               
<include name="*.gz" />
-                                                                       
</fileset>
-                                                               </checksum>
-                                                       </target>
-                                               </configuration>
-                                       </execution>
-                               </executions>
-                       </plugin>
-                       <plugin>
-                               <artifactId>maven-assembly-plugin</artifactId>
-                               <executions>
-                                       <execution>
-                                               <id>src</id>
-                                               <goals>
-                                                       <goal>single</goal>
-                                               </goals>
-                                               <phase>package</phase>
-                                               <configuration>
-                                                       <descriptors>
-                                                               
<descriptor>src/main/assembly/assembly.xml</descriptor>
-                                                       </descriptors>
-                                               </configuration>
-                                       </execution>
-                                       <execution>
-                                               <id>source-release-assembly</id>
-                                               <configuration>
-                                                       
<skipAssembly>true</skipAssembly>
-                                                       
<mavenExecutorId>forked-path</mavenExecutorId>
-                                               </configuration>
-                                       </execution>
-                               </executions>
-                       </plugin>
-
-                       <plugin>
-                     <groupId>org.sonatype.plugins</groupId>
-                     <artifactId>nexus-staging-maven-plugin</artifactId>
-                     <version>1.7.0</version>
-                     <extensions>true</extensions>
-                     <configuration>
-                       <serverId>ossrh</serverId>
-                       <nexusUrl>https://oss.sonatype.org/</nexusUrl>
-                       <autoReleaseAfterClose>true</autoReleaseAfterClose>
-                     </configuration>
-               </plugin>
-               </plugins>
-       </build>
+      <plugin>
+        <groupId>org.sonatype.plugins</groupId>
+        <artifactId>nexus-staging-maven-plugin</artifactId>
+        <version>1.7.0</version>
+        <extensions>true</extensions>
+        <configuration>
+          <serverId>ossrh</serverId>
+          <nexusUrl>https://oss.sonatype.org/</nexusUrl>
+          <autoReleaseAfterClose>true</autoReleaseAfterClose>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
 </project>
\ No newline at end of file
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java
index 2db4f12..8f08443 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java
@@ -36,6 +36,7 @@ public class MachineTranslationWrapper  {
        public String translate(String sentence, String lang2lang) {
                if (sentence==null)
                        return null;
+               
                String request = TRANSLATOR_URL + sentence.replace(' ','+') + 
"&langpair="+lang2lang;//"en|es";
                try {
                        URL urlC = new URI(request).toURL();
@@ -43,17 +44,18 @@ public class MachineTranslationWrapper  {
 
                        String line;
                        StringBuilder result = new StringBuilder();
-                       BufferedReader reader = new BufferedReader(new 
InputStreamReader(connection.getInputStream()));
-                       int count = 0;
-                       while ((line = reader.readLine()) != null)
-                       {
-                               result.append(line);
-                               count++;
+                       try (BufferedReader reader = new BufferedReader(new 
InputStreamReader(connection.getInputStream()))) {
+                               int count = 0;
+                               while ((line = reader.readLine()) != null)
+                               {
+                                       result.append(line);
+                                       count++;
+                               }
+                               JSONObject rootObject = new 
JSONObject(result.toString());
+                               JSONObject  findObject = 
rootObject.getJSONObject("responseData");
+                               String transl = 
findObject.getString("translatedText");
+                               return URLDecoder.decode(transl, 
StandardCharsets.UTF_8);
                        }
-                       JSONObject rootObject = new 
JSONObject(result.toString());
-                       JSONObject  findObject = 
rootObject.getJSONObject("responseData");
-                       String transl = findObject.getString("translatedText");
-                       return URLDecoder.decode(transl, 
StandardCharsets.UTF_8);
                        
                } catch (IOException | URISyntaxException | JSONException e) {
                        e.printStackTrace();
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifier.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifier.java
index ccd9f63..41bec16 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifier.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifier.java
@@ -29,8 +29,6 @@ import opennlp.tools.similarity.apps.utils.ValueSortMap;
 import opennlp.tools.textsimilarity.TextProcessor;
 
 import org.apache.commons.lang.StringUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
@@ -44,30 +42,25 @@ import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
-import org.json.JSONObject;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class DocClassifier {
 
-       private static final Log LOGGER = 
LogFactory.getLog(DocClassifier.class);
+       private static final Logger LOGGER = 
LoggerFactory.getLogger(DocClassifier.class);
        public static final String DOC_CLASSIFIER_KEY = "doc_class";
        public static final String RESOURCE_DIR = null;
        private Map<String, Float> scoredClasses;
        
-
        public static final Float MIN_TOTAL_SCORE_FOR_CATEGORY = 0.3f; //3.0f;
        protected static IndexReader indexReader = null;
        protected static IndexSearcher indexSearcher = null;
        // resource directory plus the index folder
-       private static final String INDEX_PATH = RESOURCE_DIR
-                       + ClassifierTrainingSetIndexer.INDEX_PATH;
+       private static final String INDEX_PATH = RESOURCE_DIR + 
ClassifierTrainingSetIndexer.INDEX_PATH;
 
        // http://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm
        private static final int MAX_DOCS_TO_USE_FOR_CLASSIFY = 10, // 10 
similar
-                       // docs for
-                       // nearest
-                       // neighbor
-                       // settings
-
+                       // docs for nearest neighbor settings
                        MAX_CATEG_RESULTS = 2;
        private static final float BEST_TO_NEX_BEST_RATIO = 2.0f;
        // to accumulate classif results
@@ -112,7 +105,7 @@ public class DocClassifier {
                }
        }
 
-       public DocClassifier(String inputFilename, JSONObject inputJSON) {
+       public DocClassifier(String inputFilename) {
                scoredClasses = new HashMap<>();
        }
 
@@ -131,18 +124,15 @@ public class DocClassifier {
                Query query;
                try {
                        query = parser.parse(queryStr);
-
                } catch (ParseException e2) {
-
                        return results;
                }
                TopDocs hits = null; // TopDocs search(Query, int)
                // Finds the top n hits for query.
                try {
-                       hits = indexSearcher
-                                       .search(query, 
MAX_DOCS_TO_USE_FOR_CLASSIFY + 2);
+                       hits = indexSearcher.search(query, 
MAX_DOCS_TO_USE_FOR_CLASSIFY + 2);
                } catch (IOException e1) {
-                       LOGGER.error("problem searching index \n" + e1);
+                       LOGGER.error("problem searching index \n", e1);
                }
                LOGGER.debug("Found " + hits.totalHits + " hits for " + 
queryStr);
                int count = 0;
@@ -175,8 +165,7 @@ public class DocClassifier {
                }
                try {
                        scoredClasses = 
ValueSortMap.sortMapByValue(scoredClasses, false);
-                       List<String> resultsAll = new ArrayList<>(
-                                                       
scoredClasses.keySet()), resultsAboveThresh = new ArrayList<>();
+                       List<String> resultsAll = new 
ArrayList<>(scoredClasses.keySet()), resultsAboveThresh = new ArrayList<>();
                        for (String key : resultsAll) {
                                if (scoredClasses.get(key) > 
MIN_TOTAL_SCORE_FOR_CATEGORY)
                                        resultsAboveThresh.add(key);
@@ -211,15 +200,11 @@ public class DocClassifier {
 
        }
 
-       
-
-       
        public static String formClassifQuery(String pageContentReader, int 
maxRes) {
 
                // We want to control which delimiters we substitute. For 
example '_' &
                // \n we retain
-               pageContentReader = pageContentReader.replaceAll("[^A-Za-z0-9 
_\\n]",
-                               "");
+               pageContentReader = pageContentReader.replaceAll("[^A-Za-z0-9 
_\\n]", "");
 
                Scanner in = new Scanner(pageContentReader);
                in.useDelimiter("\\s+");
@@ -258,11 +243,9 @@ public class DocClassifier {
                }
        }       
        
-       
        /*
         * Main entry point for classifying sentences
         */
-
        public List<String> getEntityOrClassFromText(String content) {
 
                List<String> sentences = 
TextProcessor.splitToSentences(content);
@@ -284,7 +267,6 @@ public class DocClassifier {
                                        LOGGER.debug(sentence + " =>  " + 
classifResults);
                                }
                        }
-
                } catch (Exception e) {
                        LOGGER.error("Problem classifying sentence\n " + e);
                }
@@ -294,11 +276,10 @@ public class DocClassifier {
 
                        aggrResults = localCats.getFrequentTags();
 
-                       LOGGER.debug(localCats.getFrequentTags());
+                       LOGGER.debug(localCats.getFrequentTags().toString());
                } catch (Exception e) {
-                       LOGGER.error("Problem aggregating search results\n" + 
e);
+                       LOGGER.error("Problem aggregating search results\n", e);
                }
                return aggrResults;
        }
-
 }
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetMultilingualExtender.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetMultilingualExtender.java
index 90501ad..29a5107 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetMultilingualExtender.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetMultilingualExtender.java
@@ -33,7 +33,6 @@ import java.util.List;
 
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang.StringUtils;
-import org.json.JSONObject;
 
 /*
  * This utility gets 'training_corpus' as input and creates a new version of 
training_corpus with verified files.
@@ -56,7 +55,7 @@ public class DocClassifierTrainingSetMultilingualExtender {
 
        public DocClassifierTrainingSetMultilingualExtender(String resource) {
 
-               classifier = new DocClassifier("", new JSONObject());
+               classifier = new DocClassifier("");
 
        }
        private final int FRAGMENT_LENGTH = 500;
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetVerifier.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetVerifier.java
index 4da160a..95c2b27 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetVerifier.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/doc_classifier/DocClassifierTrainingSetVerifier.java
@@ -26,33 +26,28 @@ import opennlp.tools.jsmlearning.ProfileReaderWriter;
 import org.apache.commons.io.FileUtils;
 import org.apache.tika.Tika;
 import org.apache.tika.exception.TikaException;
-import org.json.JSONObject;
 
 /*
  * This utility gets 'training_corpus' as input and creates a new version of 
training_corpus with verified files.
  * Verified => classified by existing training set as only belonging to its 
target category, no other categories, not empty.
  */
 public class DocClassifierTrainingSetVerifier {
+       
+       private static final int FRAGMENT_LENGTH = 500;
        public static String projectHome = new File(".").getAbsolutePath();
-       public static String resourceDir = new 
File(".").getAbsolutePath().replace("/.", "") + "/src/main/resources";
+       public static String resourceDir = projectHome.replace("/.", "") + 
"/src/main/resources";
        DocClassifier classifier;
        private String sourceDir = null, destinationDir = null;
-       
 
        protected final ArrayList<File> queue = new ArrayList<>();
-
        protected final Tika tika = new Tika();
-       public DocClassifierTrainingSetVerifier(String resource) {
-
-               
-               classifier = new DocClassifier("", new JSONObject());
 
+       public DocClassifierTrainingSetVerifier(String resource) {
+               classifier = new DocClassifier("");
        }
-       private static final int FRAGMENT_LENGTH = 500;
 
 
        protected void addFiles(File file) {
-
                try {
                        if (!file.exists()) {
                                System.out.println(file + " does not exist.");
@@ -90,8 +85,7 @@ public class DocClassifierTrainingSetVerifier {
                                
                                //if (f.getName().indexOf(".html")<0)
                                        //continue;
-                               classifier = new DocClassifier("", new 
JSONObject());
-
+                               classifier = new DocClassifier("");
 
                                content = tika.parseToString(f);
 
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java
index 409172b..8224273 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeChunk.java
@@ -421,11 +421,10 @@ public class ParseTreeChunk implements Serializable {
        }
        
        public boolean equals(ParseTreeChunk ch) {
-               List<String> lems = ch.getLemmas();
-               List<String> poss = ch.POSs;
                return ListUtils.isEqualList(ch.getLemmas(), this.lemmas) && 
ListUtils.isEqualList(ch.getPOSs(), this.POSs);
        }
 
+       @Override
        public String toString() {
                StringBuilder buf = new StringBuilder(" [");
                if (mainPOS != null)


Reply via email to