[5/5] opennlp-sandbox git commit: merge from bgalitsky's own git repo

bgalitsky Wed, 16 Nov 2016 01:05:22 -0800

merge from bgalitsky's own git repo


Project: http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/commit/9aa270c1
Tree: http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/tree/9aa270c1
Diff: http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/diff/9aa270c1

Branch: refs/heads/master
Commit: 9aa270c11a5974fbd10d42f1510e855cb1040035
Parents: ad4195b
Author: Boris Galitsky <[email protected]>
Authored: Wed Nov 16 10:04:29 2016 -0800
Committer: Boris Galitsky <[email protected]>
Committed: Wed Nov 16 10:04:29 2016 -0800

----------------------------------------------------------------------
 opennlp-similarity/README                       |  138 ---
 opennlp-similarity/pom.xml                      |  424 +++++---
 .../multithreaded/BingWebQueryRunnerThread.java |    2 +-
 .../tools/apps/relevanceVocabs/POStags.java     |   17 +
 .../apps/relevanceVocabs/PhraseProcessor.java   |   17 +
 .../apps/relevanceVocabs/SentimentVocab.java    |   19 +-
 .../apps/relevanceVocabs/SynonymListFilter.java |   17 +
 .../tools/apps/relevanceVocabs/SynonymMap.java  |   64 +-
 .../apps/relevanceVocabs/WordDictionary.java    |   17 +
 .../tools/apps/utils/email/EmailSender.java     |    7 +-
 .../tools/jsmlearning/ProfileReaderWriter.java  |   25 +
 .../tools/parse_thicket/ParseCorefsBuilder.java |   24 +-
 .../tools/parse_thicket/ParseThicket.java       |   46 +
 .../tools/parse_thicket/ParseTreeNode.java      |  215 ++--
 .../WordWordInterSentenceRelationArc.java       |    2 +-
 .../MultiSentenceSearchResultsProcessor.java    |    4 +-
 ...edForestSearchResultsProcessorSetFormer.java |    3 +-
 ...ntenceKernelBasedSearchResultsProcessor.java |    2 +-
 .../PT2ExtendedTreeForestBuilder.java           |   35 +-
 .../SnippetToParagraphFull.java                 |   17 +
 .../TreeExtenderByAnotherLinkedTree.java        |  111 +-
 .../kernel_interface/TreeKernelRunner.java      |   33 +-
 .../matching/GeneralizationListReducer.java     |  148 ---
 .../matching/LemmaFormManager.java              |    8 +-
 .../tools/parse_thicket/matching/Matcher.java   |  199 +++-
 .../matching/PT2ThicketPhraseBuilder.java       |  298 +++---
 .../matching/ParseTreeChunkListScorer.java      |   96 --
 .../parse_thicket/matching/ParseTreePath.java   |  422 --------
 .../matching/ParseTreePathComparable.java       |   32 -
 .../matching/ParseTreePathMatcher.java          |  254 -----
 .../ParseTreePathMatcherDeterministic.java      |  280 -----
 .../parse_thicket2graph/EdgeProductBuilder.java |   16 +
 .../GraphFromPTreeBuilder.java                  |   16 +
 .../parse_thicket2graph/ParseGraphNode.java     |   16 +
 .../ParseTreeVisualizer.java                    |   35 +-
 .../pattern_structure/PhraseConcept.java        |  129 ++-
 .../PhrasePatternStructure.java                 |  358 ++++---
 .../RhetoricStructureArcsBuilder.java           |   16 +
 .../RhetoricStructureMarker.java                |   16 +
 .../tools/similarity/apps/BingQueryRunner.java  |  167 +--
 .../similarity/apps/BingWebQueryRunner.java     |   17 +-
 .../apps/ContentGeneratorSupport.java           |   32 +-
 .../apps/GeneratedSentenceProcessor.java        |    8 +-
 .../similarity/apps/RelatedSentenceFinder.java  |   16 +
 .../apps/StoryDiscourseNavigator.java           |   38 +-
 .../solr/ContentGeneratorRequestHandler.java    |   81 +-
 .../apps/solr/IterativeQueryComponent.java      |   20 +-
 .../solr/IterativeSearchRequestHandler.java     |   16 +
 .../apps/solr/NLProgram2CodeRequestHandler.java |   20 +-
 .../SearchResultsReRankerRequestHandler.java    |   20 +-
 .../apps/solr/SyntGenRequestHandler.java        |   17 +-
 .../TaxonomyExtenderViaMebMining.java           |    6 +-
 .../apps/taxo_builder/TaxonomySerializer.java   |   28 +
 .../similarity/apps/utils/PageFetcher.java      |   83 +-
 .../apps/utils/StringDistanceMeasurer.java      |    6 +-
 .../opennlp/tools/stemmer/PorterStemmer.java    |  521 ---------
 .../tools/textsimilarity/LemmaFormManager.java  |    4 +-
 .../tools/textsimilarity/ParseTreeChunk.java    |  933 +++++++++-------
 .../ParseTreeChunkListScorer.java               |   13 +-
 .../ParseTreeMatcherDeterministic.java          |    4 +-
 .../tools/textsimilarity/TextProcessor.java     |    6 +-
 .../opennlp/tools/textsimilarity/readme.txt     |   15 +
 .../apps/RelatedSentenceFinderTest.java         |   20 +-
 .../matching/PT2ThicketPhraseBuilderTest.java   |   17 +
 .../parse_thicket/matching/PTMatcherTest.java   |   73 +-
 .../matching/PTPhraseBuilderTest.java           |   17 +
 .../matching/PairwiseMatcherTest.java           |   19 +-
 .../PhrasePatternStructureTest.java             |   27 +-
 .../tools/textsimilarity/SyntMatcherTest.java   |    8 +-
 .../ParserChunker2MatcherProcessorTest.java     |   10 +-
 .../src/test/resources/sentence_parseObject.csv | 1000 ++++++++++++++----
 71 files changed, 3408 insertions(+), 3432 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/README
----------------------------------------------------------------------
diff --git a/opennlp-similarity/README b/opennlp-similarity/README
deleted file mode 100644
index b535487..0000000
--- a/opennlp-similarity/README
+++ /dev/null
@@ -1,138 +0,0 @@
-Apache OpenNLP ${pom.version}
-===============================
-
-
-Building from the Source Distribution
--------------------------------------
-
-At least Maven 3.0.0 is required for building.
-
-To build everything go into the opennlp directory and run the following 
command:
-    mvn clean install
-   
-The results of the build will be placed  in:
-    opennlp-distr/target/apache-opennlp-[version]-bin.tar-gz (or .zip)
-
-What is in Similarity component in Apache OpenNLP ${pom.version}
----------------------------------------
-SIMILARITY COMPONENT of OpenNLP
-
-1. Introduction
-This component does text relevance assessment. It takes two portions of texts 
(phrases, sentences, paragraphs) and returns a similarity score.
-Similarity component can be used on top of search to improve relevance, 
computing similarity score between a question and all search results 
(snippets). 
-Also, this component is useful for web mining of images, videos, forums, 
blogs, and other media with textual descriptions. Such applications as content 
generation 
-and filtering meaningless speech recognition results are included in the 
sample applications of this component.
-   Relevance assessment is based on machine learning of syntactic parse trees 
(constituency trees, http://en.wikipedia.org/wiki/Parse_tree). 
-The similarity score is calculated as the size of all maximal common sub-trees 
for sentences from a pair of texts (
-www.aaai.org/ocs/index.php/WS/AAAIW11/paper/download/3971/4187, 
www.aaai.org/ocs/index.php/FLAIRS/FLAIRS11/paper/download/2573/3018,
-www.aaai.org/ocs/index.php/SSS/SSS10/paper/download/1146/1448).
-   The objective of Similarity component is to give an application engineer as 
tool for text relevance which can be used as a black box, no need to understand 
- computational linguistics or machine learning. 
- 
- 2. Installation
- Please refer to OpenNLP installation instructions
- 
- 3. First use case of Similarity component: search
- 
- To start with this component, please refer to SearchResultsProcessorTest.java 
in package opennlp.tools.similarity.apps
-   public void testSearchOrder() runs web search using Bing API and improves 
search relevance.
-   Look at the code of 
-      public List<HitBase> runSearch(String query) 
-   and then at 
-      private  BingResponse calculateMatchScoreResortHits(BingResponse resp, 
String searchQuery)
-   which gets search results from Bing and re-ranks them based on computed 
similarity score.
- 
-   The main entry to Similarity component is 
-    SentencePairMatchResult matchRes = sm.assessRelevance(snapshot, 
searchQuery);
-    where we pass the search query and the snapshot and obtain the similarity 
assessment structure which includes the similarity score.
-   
-   To run this test you need to obtain search API key from Bing at 
www.bing.com/developers/s/APIBasics.html and specify it in public class 
BingQueryRunner in
-  protected static final String APP_ID. 
-  
-  4. Solving a unique problem: content generation
-  To demonstrate the usability of Similarity component to tackle a problem 
which is hard to solve without a linguistic-based technology, 
-  we introduce a content generation component:
-   RelatedSentenceFinder.java
-   
-   The entry point here is the function call
-   hits = f.generateContentAbout("Albert Einstein");
-   which writes a biography of Albert Einstein by finding sentences on the web 
about various kinds of his activities (such as 'born', 'graduate', 'invented' 
etc.).
-   The key here is to compute similarity between the seed expression like 
"Albert Einstein invented relativity theory" and search result like 
-   "Albert Einstein College of Medicine | Medical Education | Biomedical ...
-    www.einstein.yu.edu/Albert Einstein College of Medicine is one of the 
nation's premier institutions for medical education, ..."
-    and filter out irrelevant search results.
-   
-   This is done in function 
-   public HitBase augmentWithMinedSentencesAndVerifyRelevance(HitBase item, 
String originalSentence,
-                       List<String> sentsAll)
-                       
-         SentencePairMatchResult matchRes = sm.assessRelevance(pageSentence + 
" " + title, originalSentence);
-   You can consult the results in gen.txt, where an essay on Einstein bio is 
written.
-   
-   These are examples of generated articles, given the article title
-     
http://www.allvoices.com/contributed-news/9423860/content/81937916-ichie-sings-jazz-blues-contemporary-tunes
-     
http://www.allvoices.com/contributed-news/9415063-britney-spears-femme-fatale-in-north-sf-bay-area
-     
-  5. Solving a high-importance problem: filtering out meaningless speech 
recognition results.
-  Speech recognitions SDKs usually produce a number of phrases as results, 
such as 
-                        "remember to buy milk tomorrow from trader joes",
-                        "remember to buy milk tomorrow from 3 to jones"
-  One can see that the former is meaningful, and the latter is meaningless 
(although similar in terms of how it is pronounced).
-  We use web mining and Similarity component to detect a meaningful option (a 
mistake caused by trying to interpret meaningless 
-  request by a query understanding system such as Siri for iPhone can be 
costly).
- 
-  SpeechRecognitionResultsProcessor.java does the job:
-  public List<SentenceMeaningfullnessScore> 
runSearchAndScoreMeaningfulness(List<String> sents)
-  re-ranks the phrases in the order of decrease of meaningfulness.
-  
-  6. Similarity component internals
-  in the package   opennlp.tools.textsimilarity.chunker2matcher
-  ParserChunker2MatcherProcessor.java does parsing of two portions of text and 
matching the resultant parse trees to assess similarity between 
-  these portions of text.
-  To run ParserChunker2MatcherProcessor
-     private static String MODEL_DIR = "resources/models";
-  needs to be specified
-  
-  The key function
-  public SentencePairMatchResult assessRelevance(String para1, String para2)
-  takes two portions of text and does similarity assessment by finding the set 
of all maximum common subtrees 
-  of the set of parse trees for each portion of text
-  
-  It splits paragraphs into sentences, parses them, obtained chunking 
information and produces grouped phrases (noun, evrn, prepositional etc.):
-  public synchronized List<List<ParseTreeChunk>> 
formGroupedPhrasesFromChunksForPara(String para)
-  
-  and then attempts to find common subtrees:
-  in ParseTreeMatcherDeterministic.java
-               List<List<ParseTreeChunk>> res = 
md.matchTwoSentencesGroupedChunksDeterministic(sent1GrpLst, sent2GrpLst)
-  
-  Phrase matching functionality is in package opennlp.tools.textsimilarity;
-  ParseTreeMatcherDeterministic.java:
-  Here's the key matching function which takes two phrases, aligns them and 
finds a set of maximum common sub-phrase
-  public List<ParseTreeChunk> generalizeTwoGroupedPhrasesDeterministic
-  
-  7. Package structure
-       opennlp.tools.similarity.apps : 3 main applications
-       opennlp.tools.similarity.apps.utils: utilities for above applications
-       
-       opennlp.tools.textsimilarity.chunker2matcher: parser which converts 
text into a form for matching parse trees
-       opennlp.tools.textsimilarity: parse tree matching functionality
-       
-
-
-
-Requirements
-------------
-Java 1.5 is required to run OpenNLP
-Maven 3.0.0 is required for building it
-
-Known OSGi Issues
-------------
-In an OSGi environment the following things are not supported:
-- The coreference resolution component
-- The ability to load a user provided feature generator class
-
-Note
-----
-The current API contains still many deprecated methods, these
-will be removed in one of our next releases, please
-migrate to our new API.

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/pom.xml
----------------------------------------------------------------------
diff --git a/opennlp-similarity/pom.xml b/opennlp-similarity/pom.xml
index 35b768b..a583e8e 100644
--- a/opennlp-similarity/pom.xml
+++ b/opennlp-similarity/pom.xml
@@ -1,25 +1,18 @@
 <?xml version="1.0" encoding="UTF-8"?>
 
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License.    
--->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
+<!-- Licensed to the Apache Software Foundation (ASF) under one or more 
contributor 
+       license agreements. See the NOTICE file distributed with this work for 
additional 
+       information regarding copyright ownership. The ASF licenses this file 
to 
+       you under the Apache License, Version 2.0 (the "License"); you may not 
use 
+       this file except in compliance with the License. You may obtain a copy 
of 
+       the License at http://www.apache.org/licenses/LICENSE-2.0 Unless 
required 
+       by applicable law or agreed to in writing, software distributed under 
the 
+       License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
CONDITIONS 
+       OF ANY KIND, either express or implied. See the License for the 
specific 
+       language governing permissions and limitations under the License. -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
        <modelVersion>4.0.0</modelVersion>
 
        <parent>
@@ -31,35 +24,52 @@
 
        <groupId>org.apache.opennlp</groupId>
        <artifactId>opennlp-similarity</artifactId>
-       <version>0.0.1</version>
+       <version>0.1.0</version>
        <packaging>jar</packaging>
 
        <name>OpenNLP Tool Similarity distribution</name>
 
        <scm>
-               
<connection>scm:svn:http://svn.apache.org/repos/asf/opennlp/sandbox/opennlp-similarity/tags/opennlp-similarity-0.0.1</connection>
 
-               
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/opennlp/sandbox/opennlp-similarity/tags/opennlp-similarity-0.0.1</developerConnection>
 
-               
<url>http://svn.apache.org/viewvc/opennlp/tags/opennlp-similarity-0.0.1</url> 
+               
<connection>scm:svn:http://svn.apache.org/repos/asf/opennlp/sandbox/opennlp-similarity/tags/opennlp-similarity-0.0.1</connection>
+               
<developerConnection>scm:svn:https://svn.apache.org/repos/asf/opennlp/sandbox/opennlp-similarity/tags/opennlp-similarity-0.0.1</developerConnection>
+               
<url>http://svn.apache.org/viewvc/opennlp/tags/opennlp-similarity-1.1.0</url>
        </scm>
        <prerequisites>
                <maven>3.0</maven>
        </prerequisites>
-       
+       <distributionManagement>
+         <snapshotRepository>
+           <id>ossrh</id>
+           <url>https://oss.sonatype.org/content/repositories/snapshots</url>
+         </snapshotRepository>
+       </distributionManagement>
+
+
        <repositories>
                <repository>
-                <id>net.billylieurance</id>
-        <name>BillyLieuranceNet</name>
-        <url>http://www.billylieurance.net/maven2</url>        
-        </repository>
+                       <id>net.billylieurance</id>
+                       <name>BillyLieuranceNet</name>
+                       <url>http://www.billylieurance.net/maven2</url>
+               </repository>
        </repositories>
+       
+       <properties>
+              <nd4j.version>0.4-rc3.4</nd4j.version> 
+              <dl4j.version>0.4-rc3.3</dl4j.version>
+   </properties>
 
        <dependencies>
                <dependency>
-                 <groupId>org.apache.opennlp</groupId>
-                 <artifactId>opennlp-tools</artifactId>
-                 <version>1.5.2-incubating</version>
+                       <groupId>org.slf4j</groupId>
+                       <artifactId>slf4j-log4j12</artifactId>
+                       <version>1.6.4</version>
+               </dependency>
+               <dependency>
+                       <groupId>org.apache.opennlp</groupId>
+                       <artifactId>opennlp-tools</artifactId>
+                       <version>1.5.2-incubating</version>
                </dependency>
-               
+
                <dependency>
                        <groupId>junit</groupId>
                        <artifactId>junit</artifactId>
@@ -77,11 +87,10 @@
                        <artifactId>json</artifactId>
                        <version>20090211</version>
                </dependency>
-
                <dependency>
                        <groupId>org.apache.tika</groupId>
-                       <artifactId>tika-core</artifactId>
-                       <version>0.7</version>
+                       <artifactId>tika-app</artifactId>
+                       <version>1.6</version>
                </dependency>
                <dependency>
                        <groupId>net.sf.opencsv</groupId>
@@ -91,57 +100,179 @@
                <dependency>
                        <groupId>org.apache.lucene</groupId>
                        <artifactId>lucene-core</artifactId>
-                       <version>4.0.0-BETA</version>
+                       <version>4.10.0</version>
                </dependency>
-            
+
                <dependency>
                        <groupId>org.apache.solr</groupId>
                        <artifactId>solr-core</artifactId>
-                       <version>4.0.0-BETA</version>
+                       <version>4.10.0</version>
+               </dependency>
+               <dependency>
+                       <groupId>commons-codec</groupId>
+                       <artifactId>commons-codec</artifactId>
+                       <version>1.7</version>
                </dependency>
                <dependency>
-                        <groupId>commons-codec</groupId>
-                        <artifactId>commons-codec</artifactId>
-                        <version>1.7</version>
+                       <groupId>commons-logging</groupId>
+                       <artifactId>commons-logging</artifactId>
+                       <version>1.1.1</version>
                </dependency>
                <dependency>
-                        <groupId>commons-logging</groupId>
-                        <artifactId>commons-logging</artifactId>
-                        <version>1.1.1</version>
+                       <groupId>commons-collections</groupId>
+                       <artifactId>commons-collections</artifactId>
+                       <version>3.1</version>
                </dependency>
                <dependency>
-                        <groupId>org.apache.httpcomponents</groupId>
-                        <artifactId>httpclient</artifactId>
-                        <version>4.2.1</version>
-        </dependency>
-        <dependency>
-                        <groupId>org.apache.httpcomponents</groupId>
-                        <artifactId>httpclient-cache</artifactId>
-                        <version>4.2.1</version>
+                       <groupId>org.apache.commons</groupId>
+                       <artifactId>commons-math3</artifactId>
+                       <version>3.5</version>
+               </dependency>
+
+               <dependency>
+                       <groupId>org.apache.httpcomponents</groupId>
+                       <artifactId>httpclient</artifactId>
+                       <version>4.2.1</version>
+               </dependency>
+               <dependency>
+                       <groupId>org.apache.httpcomponents</groupId>
+                       <artifactId>httpclient-cache</artifactId>
+                       <version>4.2.1</version>
+               </dependency>
+               <dependency>
+                       <groupId>org.apache.httpcomponents</groupId>
+                       <artifactId>httpcore</artifactId>
+                       <version>4.2.1</version>
+               </dependency>
+               <dependency>
+                       <groupId>org.apache.httpcomponents</groupId>
+                       <artifactId>httpmime</artifactId>
+                       <version>4.2.1</version>
                </dependency>
                <dependency>
-                        <groupId>org.apache.httpcomponents</groupId>
-                        <artifactId>httpcore</artifactId>
-                        <version>4.2.1</version>
+                       <groupId>org.apache.httpcomponents</groupId>
+                       <artifactId>fluent-hc</artifactId>
+                       <version>4.2.1</version>
                </dependency>
+
                <dependency>
-                        <groupId>org.apache.httpcomponents</groupId>
-                        <artifactId>httpmime</artifactId>
-                        <version>4.2.1</version>
-        </dependency>
+                       <groupId>org.jgrapht</groupId>
+                       <artifactId>jgrapht-jdk1.5</artifactId>
+                       <version>0.7.3</version>
+               </dependency>
                <dependency>
-                        <groupId>org.apache.httpcomponents</groupId>
-                        <artifactId>fluent-hc</artifactId>
-                        <version>4.2.1</version>
-        </dependency>
+                       <groupId>de.jollyday</groupId>
+                       <artifactId>jollyday</artifactId>
+                       <version>0.4.7</version>
+               </dependency>
+               <dependency>
+                       <groupId>jgraph</groupId>
+                       <artifactId>jgraph</artifactId>
+                       <version>5.13.0.0</version>
+               </dependency>
+               <dependency>
+                       <groupId>javax.mail</groupId>
+                       <artifactId>mail</artifactId>
+                       <version>1.4</version>
+               </dependency>
+               <dependency>
+                       <groupId>com.restfb</groupId>
+                       <artifactId>restfb</artifactId>
+                       <version>1.6.12</version>
+               </dependency>
+               <dependency>
+                       <groupId>com.memetix</groupId>
+                       <artifactId>microsoft-translator-java-api</artifactId>
+                       <version>0.3</version>
+               </dependency>
+
+               <dependency>
+                       <groupId>net.billylieurance.azuresearch</groupId>
+                       <artifactId>azure-bing-search-java</artifactId>
+                       <version>0.11.0</version>
+               </dependency>
+               <dependency>
+                       <groupId>edu.mit</groupId>
+                       <artifactId>jverbnet</artifactId>
+                       <version>1.2.0</version>
+                       
<systemPath>${project.basedir}/lib/edu.mit.jverbnet-1.2.0.jar</systemPath>
+                       <scope>system</scope>
+               </dependency>
+               <dependency>
+                       <groupId>edu.stanford.nlp</groupId>
+                       <artifactId>stanford-corenlp</artifactId>
+                       <version>3.5.2</version>
+               </dependency>
+               <dependency>
+                       <groupId>edu.stanford.nlp</groupId>
+                       <artifactId>stanford-corenlp-model</artifactId>
+                       <version>3.5.2</version>
+                       
<systemPath>${project.basedir}/lib/stanford-corenlp-3.5.2-models.jar</systemPath>
+                       <scope>system</scope>
+               </dependency>
+               <dependency>
+                       <groupId>edu.stanford.nlp</groupId>
+                       <artifactId>ejml</artifactId>
+                       <version>0.23</version>
+                       
<systemPath>${project.basedir}/lib/ejml-0.23.jar</systemPath>
+                       <scope>system</scope>
+               </dependency>
+               <dependency>
+                       <groupId>edu.stanford.nlp</groupId>
+                       <artifactId>joda-time</artifactId>
+                       <version>0.23</version>
+                       
<systemPath>${project.basedir}/lib/joda-time.jar</systemPath>
+                       <scope>system</scope>
+               </dependency>
+               <dependency>
+                       <groupId>edu.stanford.nlp</groupId>
+                       <artifactId>jollyday</artifactId>
+                       <version>0.23</version>
+                       
<systemPath>${project.basedir}/lib/jollyday.jar</systemPath>
+                       <scope>system</scope>
+               </dependency>
                <dependency>
-               <groupId>net.billylieurance.azuresearch</groupId>
-               <artifactId>azure-bing-search-java</artifactId>
-        <version>0.11.0</version>
+                       <groupId>edu.stanford.nlp</groupId>
+                       <artifactId>xom</artifactId>
+                       <version>0.23</version>
+                       <systemPath>${project.basedir}/lib/xom.jar</systemPath>
+                       <scope>system</scope>
                </dependency>
-            
+               <dependency>
+                       <groupId>org.docx4j</groupId>
+                       <artifactId>docx4j</artifactId>
+                       <version>2.7.1</version>
+               </dependency>
+
+               <dependency>
+                       <groupId>org.clulab</groupId>
+                       <artifactId>processors_2.11</artifactId>
+                       <version>5.7.1</version>
+               </dependency>
+               <dependency>
+                       <groupId>org.clulab</groupId>
+                       <artifactId>processors_2.11</artifactId>
+                       <version>5.7.1</version>
+                       <classifier>models</classifier>
+               </dependency>
+               <dependency>
+                 <groupId>org.deeplearning4j</groupId>
+                 <artifactId>deeplearning4j-ui</artifactId>
+                 <version>${dl4j.version}</version>
+               </dependency>
+               <dependency>
+                 <groupId>org.deeplearning4j</groupId>
+                 <artifactId>deeplearning4j-nlp</artifactId>
+                 <version>${dl4j.version}</version>
+               </dependency>
+               <dependency>
+                 <groupId>org.nd4j</groupId>
+                 <artifactId>nd4j-jblas</artifactId> 
+                 <version>${nd4j.version}</version>
+               </dependency>
+
        </dependencies>
-       
+
        <build>
                <plugins>
                        <plugin>
@@ -150,10 +281,10 @@
                                <configuration>
                                        <source>1.5</source>
                                        <target>1.5</target>
-                               <compilerArgument>-Xlint</compilerArgument>
+                                       
<compilerArgument>-Xlint</compilerArgument>
                                </configuration>
                        </plugin>
-                       
+
                        <plugin>
                                <artifactId>maven-source-plugin</artifactId>
                                <executions>
@@ -183,70 +314,93 @@
                                        </execution>
                                </executions>
                        </plugin>
-                       <plugin> 
-               <artifactId>maven-antrun-plugin</artifactId> 
-               <version>1.6</version> 
-               <executions> 
-                 <execution> 
-                   <id>generate checksums for binary artifacts</id> 
-                   <goals><goal>run</goal></goals> 
-                   <phase>verify</phase> 
-                   <configuration> 
-                     <target> 
-                       <checksum algorithm="sha1" format="MD5SUM"> 
-                         <fileset dir="${project.build.directory}"> 
-                           <include name="*.zip" /> 
-                           <include name="*.gz" /> 
-                         </fileset> 
-                       </checksum> 
-                       <checksum algorithm="md5" format="MD5SUM"> 
-                         <fileset dir="${project.build.directory}"> 
-                           <include name="*.zip" /> 
-                           <include name="*.gz" /> 
-                         </fileset> 
-                       </checksum> 
-                     </target> 
-                   </configuration> 
-                 </execution> 
-               </executions> 
-             </plugin>
-             <plugin>
-                         <artifactId>maven-assembly-plugin</artifactId> 
-                                <executions>
-                                        <execution>
-                                         <id>src</id> 
-                                        <goals>
-                                               <goal>single</goal> 
-                                         </goals>
-                                         <phase>package</phase> 
-                                               <configuration>
-                                                       <descriptors>
-                                                               
<descriptor>src/main/assembly/assembly.xml</descriptor> 
-                                                       </descriptors>
-                                               </configuration>
-                                         </execution>
-                                        <execution>
-                                         <id>source-release-assembly</id> 
-                                        <configuration>
-                                         <skipAssembly>true</skipAssembly> 
-                                         
<mavenExecutorId>forked-path</mavenExecutorId>
-                                         </configuration>
-                                         </execution>
-                                 </executions>
-                         </plugin>
-             <plugin>
-               <groupId>org.apache.maven.plugins</groupId>
-               <artifactId>maven-gpg-plugin</artifactId>
-               <executions>
-                 <execution>
-                   <id>sign-artifacts</id>
-                   <phase>verify</phase>
-                   <goals>
-                     <goal>sign</goal>
-                   </goals>
-                 </execution>
-               </executions>
-      </plugin>
+                       <plugin>
+                               <artifactId>maven-antrun-plugin</artifactId>
+                               <version>1.6</version>
+                               <executions>
+                                       <execution>
+                                               <id>generate checksums for 
binary artifacts</id>
+                                               <goals>
+                                                       <goal>run</goal>
+                                               </goals>
+                                               <phase>verify</phase>
+                                               <configuration>
+                                                       <target>
+                                                               <checksum 
algorithm="sha1" format="MD5SUM">
+                                                                       
<fileset dir="${project.build.directory}">
+                                                                               
<include name="*.zip" />
+                                                                               
<include name="*.gz" />
+                                                                       
</fileset>
+                                                               </checksum>
+                                                               <checksum 
algorithm="md5" format="MD5SUM">
+                                                                       
<fileset dir="${project.build.directory}">
+                                                                               
<include name="*.zip" />
+                                                                               
<include name="*.gz" />
+                                                                       
</fileset>
+                                                               </checksum>
+                                                       </target>
+                                               </configuration>
+                                       </execution>
+                               </executions>
+                       </plugin>
+                       <plugin>
+                               <artifactId>maven-assembly-plugin</artifactId>
+                               <executions>
+                                       <execution>
+                                               <id>src</id>
+                                               <goals>
+                                                       <goal>single</goal>
+                                               </goals>
+                                               <phase>package</phase>
+                                               <configuration>
+                                                       <descriptors>
+                                                               
<descriptor>src/main/assembly/assembly.xml</descriptor>
+                                                       </descriptors>
+                                               </configuration>
+                                       </execution>
+                                       <execution>
+                                               <id>source-release-assembly</id>
+                                               <configuration>
+                                                       
<skipAssembly>true</skipAssembly>
+                                                       
<mavenExecutorId>forked-path</mavenExecutorId>
+                                               </configuration>
+                                       </execution>
+                               </executions>
+                       </plugin>
+               <!--    <plugin>
+                               <groupId>org.apache.maven.plugins</groupId>
+                               <artifactId>maven-gpg-plugin</artifactId>
+                               <executions>
+                                       <execution>
+                                               <id>sign-artifacts</id>
+                                               <phase>verify</phase>
+                                               <goals>
+                                                       <goal>sign</goal>
+                                               </goals>
+                                       </execution>
+                               </executions>
+                       </plugin>
+                       -->
+                        <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>3.1</version>
+                <configuration>
+                    <source>1.8</source>
+                    <target>1.8</target>
+                </configuration>
+            </plugin>
+                       <plugin>
+                     <groupId>org.sonatype.plugins</groupId>
+                     <artifactId>nexus-staging-maven-plugin</artifactId>
+                     <version>1.6.3</version>
+                     <extensions>true</extensions>
+                     <configuration>
+                       <serverId>ossrh</serverId>
+                       <nexusUrl>https://oss.sonatype.org/</nexusUrl>
+                       <autoReleaseAfterClose>true</autoReleaseAfterClose>
+                     </configuration>
+               </plugin>
                </plugins>
        </build>
 </project>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java
index b75a13b..b712847 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java
@@ -6,7 +6,7 @@ import java.util.List;
 import opennlp.tools.similarity.apps.BingQueryRunner;
 import opennlp.tools.similarity.apps.HitBase;
 
- public class BingWebQueryRunnerThread extends BingQueryRunner implements 
Runnable{
+public class BingWebQueryRunnerThread extends BingQueryRunner implements 
Runnable{
        
        private String query;
        private List<HitBase> results= new ArrayList<HitBase>();

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/POStags.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/POStags.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/POStags.java
index 45dadf9..fafdef0 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/POStags.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/POStags.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package opennlp.tools.apps.relevanceVocabs;
 
 public interface POStags {

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java
index ae2772b..0d2ba00 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package opennlp.tools.apps.relevanceVocabs;
 
 import java.util.ArrayList;

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java
index 150b3df..aced079 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package opennlp.tools.apps.relevanceVocabs;
 
 import java.util.HashMap;
@@ -59,7 +76,7 @@ public class SentimentVocab {
        private static final String[] POSITIVE_NOUN_LIST = { "ability", 
"benefit",
                        "character", "charm", "comfort", "discount", "dream", 
"elegance",
                        "favourite", "feature", "improvement", "luck", 
"luxury", "offer",
-                       "pro", "quality", "requirement", "usability" };
+                        "quality", "requirement", "usability" };
 
        private static final String[] NEGATIVE_NOUN_LIST = { "blocker",
                        "challenge", "complain", "complaint", "compromise", 
"con",

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java
index 7c12c9a..37f57e4 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package opennlp.tools.apps.relevanceVocabs;
 
 import java.io.BufferedReader;

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java
index 804fc2b..7e680de 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package opennlp.tools.apps.relevanceVocabs;
 
 import java.io.IOException;
@@ -12,50 +29,7 @@ import java.io.IOException;
    import java.util.TreeMap;
    import java.util.TreeSet;
    
-   /**
-    * Loads the <a target="_blank" 
-    * href="http://www.cogsci.princeton.edu/~wn/";>WordNet </a> prolog file <a
-    * href="http://www.cogsci.princeton.edu/2.0/WNprolog-2.0.tar.gz";>wn_s.pl 
</a>
-    * into a thread-safe main-memory hash map that can be used for fast
-    * high-frequency lookups of synonyms for any given (lowercase) word string.
-    * <p>
-    * There holds: If B is a synonym for A (A -> B) then A is also a synonym 
for B (B -> A).
-    * There does not necessarily hold: A -> B, B -> C then A -> C.
-    * <p>
-    * Loading typically takes some 1.5 secs, so should be done only once per
-    * (server) program execution, using a singleton pattern. Once loaded, a
-    * synonym lookup via {@link #getSynonyms(String)}takes constant time O(1).
-    * A loaded default synonym map consumes about 10 MB main memory.
-    * An instance is immutable, hence thread-safe.
-    * <p>
-    * This implementation borrows some ideas from the Lucene Syns2Index demo 
that 
-    * Dave Spencer originally contributed to Lucene. Dave's approach
-    * involved a persistent Lucene index which is suitable for occasional
-    * lookups or very large synonym tables, but considered unsuitable for 
-    * high-frequency lookups of medium size synonym tables.
-    * <p>
-    * Example Usage:
-    * <pre>
-    * String[] words = new String[] { "hard", "woods", "forest", "wolfish", 
"xxxx"};
-    * SynonymMap map = new SynonymMap(new 
FileInputStream("samples/fulltext/wn_s.pl"));
-    * for (int i = 0; i &lt; words.length; i++) {
-    *     String[] synonyms = map.getSynonyms(words[i]);
-    *     System.out.println(words[i] + ":" + 
java.util.Arrays.asList(synonyms).toString());
-    * }
-    * 
-    * Example output:
-    * hard:[arduous, backbreaking, difficult, fermented, firmly, grueling, 
gruelling, heavily, heavy, intemperately, knockout, laborious, punishing, 
severe, severely, strong, toilsome, tough]
-    * woods:[forest, wood]
-   * forest:[afforest, timber, timberland, wood, woodland, woods]
-    * wolfish:[edacious, esurient, rapacious, ravening, ravenous, voracious, 
wolflike]
-    * xxxx:[]
-    * </pre>
-    *
-    * @see <a target="_blank"
-    *      
href="http://www.cogsci.princeton.edu/~wn/man/prologdb.5WN.html";>prologdb
-    *      man page </a>
-    * @see <a target="_blank" 
href="http://www.hostmon.com/rfc/advanced.jsp";>Dave's synonym demo site</a>
-    */
+   
    public class SynonymMap {
    
      /** the index data; Map<String word, String[] synonyms> */
@@ -73,7 +47,7 @@ import java.io.IOException;
       * @param input
       *            the stream to read from (null indicates an empty synonym 
map)
       * @throws IOException
-      *             if an error occured while reading the stream.
+      *             if an error occurred while reading the stream.
       */
      public SynonymMap(InputStream input) throws IOException {
        this.table = input == null ? new HashMap<String,String[]>(0) : 
read(toByteArray(input));

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/WordDictionary.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/WordDictionary.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/WordDictionary.java
index dbbec1d..cfae086 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/WordDictionary.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/WordDictionary.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package opennlp.tools.apps.relevanceVocabs;
 
 import java.util.HashMap;

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java
index 0b99fc2..ac7cb95 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java
@@ -14,7 +14,7 @@ import javax.activation.*;
  */
 public class EmailSender {
                private static final long serialVersionUID = 1L;
-               private static final String 
mailboxAddress="[email protected]";
+               private static final String 
mailboxAddress="[email protected]";
 
                public  boolean sendMail(String smtp, String user, String pass, 
InternetAddress from, InternetAddress[] to, InternetAddress[] cc, 
InternetAddress[] bcc, String subject, String body, String file) throws 
Exception
                {
@@ -34,7 +34,7 @@ public class EmailSender {
                                        Properties props = new Properties();
                                        props.put("mail.smtp.host", smtp);
                                        props.put("mail.smtp.auth", "true");
-                                       props.put("mail.smtp.port", "587");
+                                       props.put("mail.smtp.port", "465");
                                        props.put("mail.smtp.starttls.enable", 
"true");
                                        Authenticator auth = new 
SMTP_Authenticator     (user, pass);
                                        Session session = 
Session.getInstance(props, auth);
@@ -158,7 +158,8 @@ public class EmailSender {
                public static void main(String[] args){
                        EmailSender s = new EmailSender();
                        try {
-                               s.sendMail("smtp.live.com", 
"[email protected]", "******", new 
InternetAddress("[email protected]"), new InternetAddress[]{new 
InternetAddress("[email protected]")}, new InternetAddress[]{}, new 
InternetAddress[]{}, 
+                               s.sendMail("smtp.rambler.ru", 
"[email protected]", "b06g93", 
+                                               new 
InternetAddress("[email protected]"), new InternetAddress[]{new 
InternetAddress("[email protected]")}, new InternetAddress[]{}, new 
InternetAddress[]{}, 
                                                "Generated content for you", 
"body", null);
                        } catch (AddressException e) {
                                // TODO Auto-generated catch block

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/ProfileReaderWriter.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/ProfileReaderWriter.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/ProfileReaderWriter.java
index 9081e1a..694da0a 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/ProfileReaderWriter.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/ProfileReaderWriter.java
@@ -123,6 +123,31 @@ public class ProfileReaderWriter {
                        e.printStackTrace();
                }
        }
+       public static void appendReport( List<String[]> allLines, String 
reportName){
+               List<String[]> previous;
+               try {
+                       previous = readProfiles(reportName);
+                       allLines.addAll(previous);
+               } catch (Exception e1) {
+                       System.out.println("Creating file "+reportName);
+               }
+               
+               CSVWriter writer = null;
+               try {   
+                       writer = new CSVWriter(new PrintWriter(reportName));    
                
+               } catch (FileNotFoundException e) {
+                       e.printStackTrace();
+               }       
+
+               writer.writeAll(allLines);
+
+               try {
+                       writer.flush();
+                       writer.close();
+               } catch (IOException e) {
+                       e.printStackTrace();
+               }
+       }
 
        public static void writeReportListStr(List<String> res, String string) {
                // TODO Auto-generated method stub

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseCorefsBuilder.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseCorefsBuilder.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseCorefsBuilder.java
index 10e9683..8f215f7 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseCorefsBuilder.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseCorefsBuilder.java
@@ -19,8 +19,8 @@ import edu.stanford.nlp.util.*;
 
 public class ParseCorefsBuilder {
        protected static ParseCorefsBuilder instance;
-       private Annotation annotation;
-       StanfordCoreNLP pipeline;
+       protected Annotation annotation;
+       protected StanfordCoreNLP pipeline;
        CommunicativeActionsArcBuilder caFinder = new 
CommunicativeActionsArcBuilder();
        
          /**
@@ -35,9 +35,9 @@ public class ParseCorefsBuilder {
            return instance;
          }
        
-       ParseCorefsBuilder(){
+       protected ParseCorefsBuilder(){
                Properties props = new Properties();
-               props.put("annotators", "tokenize, ssplit, pos, lemma, ner, 
parse, dcoref");
+               props.put("annotators", "tokenize, ssplit, pos, lemma, ner, 
parse, dcoref, sentiment");
                pipeline = new StanfordCoreNLP(props);
        }
        
@@ -104,30 +104,18 @@ public class ParseCorefsBuilder {
                                                  new Pair<Integer, 
Integer>(njSentence,njWord), mi.mentionSpan, mj.mentionSpan, 
                                                  arcType);
                  arcs.add(arc);
-                 
-                 /*
-                 System.out.println("animacy = "+m.animacy);
-                 System.out.println("mention span = "+m.mentionSpan);
-                 System.out.println(" id = "+m.mentionID);
-                 System.out.println(" position = "+m.position);
-                 System.out.println(" start index = "+m.startIndex);
-                 System.out.println(" end index = "+m.endIndex);   
-                 System.out.println(" mentionType = "+m.mentionType);   
-                 System.out.println(" number =  = "+m.number);  
-                 */
                  }
              }
-             
-             
            }
            List<WordWordInterSentenceRelationArc> arcsCA = 
buildCAarcs(nodesThicket);
+           arcs.addAll(arcsCA);
            
            ParseThicket result = new ParseThicket(ptTrees, arcs);
            result.setNodesThicket(nodesThicket);
            return result;
        }
 
-  private List<WordWordInterSentenceRelationArc> buildCAarcs(
+  public List<WordWordInterSentenceRelationArc> buildCAarcs(
                        List<List<ParseTreeNode>> nodesThicket) {
          List<WordWordInterSentenceRelationArc> arcs = new 
ArrayList<WordWordInterSentenceRelationArc>();
          

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseThicket.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseThicket.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseThicket.java
index e584d1e..8723e53 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseThicket.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseThicket.java
@@ -13,6 +13,36 @@ public class ParseThicket {
        // then list for all sentences
        private List<List<ParseTreeNode>> sentenceNodes;
        
+       private List<Float> sentimentProfile;
+       
+       private String origText;
+       private List<List<ParseTreeNode>> phrases;
+       
+       
+       public List<Tree> getSentenceTrees() {
+               return sentenceTrees;
+       }
+
+       public void setSentenceTrees(List<Tree> sentenceTrees) {
+               this.sentenceTrees = sentenceTrees;
+       }
+
+       public List<List<ParseTreeNode>> getSentenceNodes() {
+               return sentenceNodes;
+       }
+
+       public void setSentenceNodes(List<List<ParseTreeNode>> sentenceNodes) {
+               this.sentenceNodes = sentenceNodes;
+       }
+
+       public String getOrigText() {
+               return origText;
+       }
+
+       public void setOrigText(String origText) {
+               this.origText = origText;
+       }
+
        public List<Tree> getSentences() {
                return sentenceTrees;
        }
@@ -53,6 +83,22 @@ public class ParseThicket {
        public String toString(){
                return this.sentenceTrees+"\n"+this.arcs;
        }
+
+       public void setPhrases(List<List<ParseTreeNode>> phrs) {
+               this.phrases = phrs;            
+       }
+
+       public List<List<ParseTreeNode>> getPhrases() {
+               return phrases;
+       }
+
+       public List<Float> getSentimentProfile() {
+               return sentimentProfile;
+       }
+
+       public void setSentimentProfile(List<Float> sentimentProfile) {
+               this.sentimentProfile = sentimentProfile;
+       }
        
        
        

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseTreeNode.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseTreeNode.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseTreeNode.java
index 528eb4d..689a4b8 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseTreeNode.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseTreeNode.java
@@ -2,25 +2,92 @@ package opennlp.tools.parse_thicket;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;
 
 public class ParseTreeNode implements IGeneralizer<ParseTreeNode>{
-       String word;
-    // this is the POS tag of the token
-    String pos; 
-    // this is the NER label of the token
-    String ne; 
-    Integer id;
-    //PhraseType 
-    String phraseType;
-    
-    public enum PhraseType {NP("NP"), VP("VP"), PRP("PRP");
-       private PhraseType(final String text) {
-        this.text = text;
-       }
-        private final String text;
-    
-    }
-    
+       String word; // word in normal form, lemma
+       // this is the POS tag of the token
+       String pos; 
+       // this is the NER label of the token
+       String ne; 
+       Integer id;
+       //PhraseType 
+       String phraseType;
+       Map<String, Object> attributes;
+       String normalizedWord;
+       String syntacticDependence;
+       String originalWord; //what actually occurs in a sentence
+
+       String head;
+       String label;
+       String modifier;
+
+
+
+       public String getOriginalWord() {
+               return originalWord;
+       }
+
+       public void setOriginalWord(String originalWord) {
+               this.originalWord = originalWord;
+       }
+
+       public String getHead() {
+               return head;
+       }
+
+       public void setHead(String head) {
+               this.head = head;
+       }
+
+       public String getLabel() {
+               return label;
+       }
+
+       public void setLabel(String label) {
+               this.label = label;
+       }
+
+       public String getModifier() {
+               return modifier;
+       }
+
+       public void setModifier(String modifier) {
+               this.modifier = modifier;
+       }
+
+       public String getNormalizedWord() {
+               return normalizedWord;
+       }
+
+       public void setNormalizedWord(String normalizedWord) {
+               this.normalizedWord = normalizedWord;
+       }
+
+       public String getSyntacticDependence() {
+               return syntacticDependence;
+       }
+
+       public void setSyntacticDependence(String syntacticDependence) {
+               this.syntacticDependence = syntacticDependence;
+       }
+
+       public Map<String, Object> getAttributes() {
+               return attributes;
+       }
+
+       public void setAttributes(Map<String, Object> attributes) {
+               this.attributes = attributes;
+       }
+
+       public enum PhraseType {NP("NP"), VP("VP"), PRP("PRP");
+       private PhraseType(final String text) {
+               this.text = text;
+       }
+       private final String text;
+
+       }
+
        public ParseTreeNode(String word, String pos, String ne, Integer id) {
                super();
                this.word = word;
@@ -28,15 +95,14 @@ public class ParseTreeNode implements 
IGeneralizer<ParseTreeNode>{
                this.ne = ne;
                this.id = id;
        }
-       
+
        public ParseTreeNode(String word, String pos) {
                super();
                this.word = word;
                this.pos = pos;
-               this.ne = ne;
-               this.id = id;
+
        }
-       
+
        public String getPhraseType() {
                return phraseType;
        }
@@ -67,7 +133,7 @@ public class ParseTreeNode implements 
IGeneralizer<ParseTreeNode>{
        public void setId(Integer id) {
                this.id = id;
        } 
-    
+
        public String toString(){
                StringBuffer buf = new StringBuffer();
                if (id!=null)
@@ -81,10 +147,27 @@ public class ParseTreeNode implements 
IGeneralizer<ParseTreeNode>{
                return buf.toString();
        }
 
+       public static String toTreeRepresentationString(List<ParseTreeNode> 
chList){
+               StringBuffer buf = new StringBuffer();
+               for(ParseTreeNode ch: chList){
+                       if (ch.getPos().startsWith(".") || 
ch.getPos().startsWith(",") || ch.getPos().startsWith(";") || 
ch.getPos().startsWith("!"))
+                               continue;
+                       buf.append( "("+ch.getWord()+ " " + ch.getPos() + ")" );
+               }
+               return buf.toString().trim();
+       }
+       public static String toWordString(List<ParseTreeNode> chList){
+               String buf = "";
+               for(ParseTreeNode ch: chList){
+                       buf+=ch.getWord()+ " ";
+               }
+               return buf.trim();
+       }
+
        @Override
        public List<ParseTreeNode> generalize(Object o1, Object o2) {
                List<ParseTreeNode> result = new ArrayList<ParseTreeNode>();
-               
+
                ParseTreeNode w1 = (ParseTreeNode) o1;
                ParseTreeNode w2 = (ParseTreeNode) o2;
                String posGen =  generalizePOS(w1.pos, w2.pos);
@@ -95,7 +178,7 @@ public class ParseTreeNode implements 
IGeneralizer<ParseTreeNode>{
                result.add(newNode);
                return result;
        }
-       
+
        public String generalizeWord(String lemma1, String lemma2){
                if (lemma1.equals(lemma2))
                        return lemma1;
@@ -105,49 +188,49 @@ public class ParseTreeNode implements 
IGeneralizer<ParseTreeNode>{
                        return "*";
                //TODO
                return "*";
-               
+
        }
-       
+
        public String generalizePOS(String pos1, String pos2) {
-           if ((pos1.startsWith("NN") && pos2.equals("NP") || 
pos2.startsWith("NN")
-               && pos1.equals("NP"))) {
-             return "NN";
-           }
-           if ((pos1.startsWith("NN") && pos2.equals("VBG") || 
pos2.startsWith("VBG")
-               && pos1.equals("NN"))) {
-             return "NN";
-           }
-
-           if ((pos1.startsWith("NN") && pos2.equals("ADJP") || 
pos2.startsWith("NN")
-               && pos1.equals("ADJP"))) {
-             return "NN";
-           }
-           if ((pos1.equals("IN") && pos2.equals("TO") || pos1.equals("TO")
-               && pos2.equals("IN"))) {
-             return "IN";
-           }
-           // VBx vs VBx = VB (does not matter which form for verb)
-           if (pos1.startsWith("VB") && pos2.startsWith("VB")) {
-             return "VB";
-           }
-
-           // ABx vs ABy always gives AB
-           if (pos1.equalsIgnoreCase(pos2)) {
-             return pos1;
-           }
-           if (pos1.length() > 2) {
-             pos1 = pos1.substring(0, 2);
-           }
-
-           if (pos2.length() > 2) {
-             pos2 = pos2.substring(0, 2);
-           }
-           if (pos1.equalsIgnoreCase(pos2)) {
-             return pos1 + "*";
-           }
-           return null;
-         }
-
-       
+               if ((pos1.startsWith("NN") && pos2.equals("NP") || 
pos2.startsWith("NN")
+                               && pos1.equals("NP"))) {
+                       return "NN";
+               }
+               if ((pos1.startsWith("NN") && pos2.equals("VBG") || 
pos2.startsWith("VBG")
+                               && pos1.equals("NN"))) {
+                       return "NN";
+               }
+
+               if ((pos1.startsWith("NN") && pos2.equals("ADJP") || 
pos2.startsWith("NN")
+                               && pos1.equals("ADJP"))) {
+                       return "NN";
+               }
+               if ((pos1.equals("IN") && pos2.equals("TO") || pos1.equals("TO")
+                               && pos2.equals("IN"))) {
+                       return "IN";
+               }
+               // VBx vs VBx = VB (does not matter which form for verb)
+               if (pos1.startsWith("VB") && pos2.startsWith("VB")) {
+                       return "VB";
+               }
+
+               // ABx vs ABy always gives AB
+               if (pos1.equalsIgnoreCase(pos2)) {
+                       return pos1;
+               }
+               if (pos1.length() > 2) {
+                       pos1 = pos1.substring(0, 2);
+               }
+
+               if (pos2.length() > 2) {
+                       pos2 = pos2.substring(0, 2);
+               }
+               if (pos1.equalsIgnoreCase(pos2)) {
+                       return pos1 + "*";
+               }
+               return null;
+       }
+
+
 };
 

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/WordWordInterSentenceRelationArc.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/WordWordInterSentenceRelationArc.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/WordWordInterSentenceRelationArc.java
index db7905d..265a3fa 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/WordWordInterSentenceRelationArc.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/WordWordInterSentenceRelationArc.java
@@ -61,7 +61,7 @@ public class WordWordInterSentenceRelationArc {
                }
        
                public String toString(){
-                       return 
"<sent="+codeFrom.getFirst()+"-word="+codeFrom.getSecond()+".."+lemmaFrom+"> 
===> "+
+                       return 
arcType.toString()+"&<sent="+codeFrom.getFirst()+"-word="+codeFrom.getSecond()+".."+lemmaFrom+">
 ===> "+
                                        
"<sent="+codeTo.getFirst()+"-word="+codeTo.getSecond()+".."+lemmaTo+">";
                }
 

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MultiSentenceSearchResultsProcessor.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MultiSentenceSearchResultsProcessor.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MultiSentenceSearchResultsProcessor.java
index ce4b600..edd164f 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MultiSentenceSearchResultsProcessor.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MultiSentenceSearchResultsProcessor.java
@@ -73,7 +73,7 @@ public class MultiSentenceSearchResultsProcessor  {
                                        hit.setSource(match.toString());
                                }
                                if (score < 2){ // attempt to match with 
snippet, if not much luck with original text
-                                       match = 
matcher.assessRelevanceCache(pageSentsAndSnippet[0] ,
+                                       match = 
matcher.assessRelevanceCache(pageSentsAndSnippet[1] ,
                                                        searchQuery);
                                        score = 
parseTreeChunkListScorer.getParseTreeChunkListScore(match);
                                }
@@ -161,7 +161,7 @@ public class MultiSentenceSearchResultsProcessor  {
                        LOG.info("No search results for query '" + query);
                        return null;
                }
-               ProfileReaderWriter.writeReport(reportData, 
"resultsForQuery_"+query.replace(' ', '_')+".csv");
+               //ProfileReaderWriter.writeReport(reportData, 
"resultsForQuery_"+query.replace(' ', '_')+".csv");
                return hits;
        }
        

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceExtendedForestSearchResultsProcessorSetFormer.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceExtendedForestSearchResultsProcessorSetFormer.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceExtendedForestSearchResultsProcessorSetFormer.java
index eb67724..c568035 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceExtendedForestSearchResultsProcessorSetFormer.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceExtendedForestSearchResultsProcessorSetFormer.java
@@ -66,7 +66,7 @@ public class 
MultiSentenceExtendedForestSearchResultsProcessorSetFormer  extends
        private List<HitBase> formTreeForestDataSet(
                        List<HitBase> hits, String query, boolean isPositive) {
                List<HitBase> newHitList = new ArrayList<HitBase>(), 
newHitListReRanked = new ArrayList<HitBase>();
-               // form the training set from original documets. Since search 
results are ranked, we set the first half as positive set,
+               // form the training set from original documents. Since search 
results are ranked, we set the first half as positive set,
                //and the second half as negative set.
                // after re-classification, being re-ranked, the search results 
might end up in a different set
                List<String[]> treeBankBuffer = new ArrayList<String[]>();
@@ -117,7 +117,6 @@ public class 
MultiSentenceExtendedForestSearchResultsProcessorSetFormer  extends
                                treeBankBuffer.add(new String[] {posOrNeg+" 
|BT| "+t.toString()+ " |ET|"});
                        }
                } catch (Exception e) {
-                       // TODO Auto-generated catch block
                        e.printStackTrace();
                }
                return treeBankBuffer;

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceKernelBasedSearchResultsProcessor.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceKernelBasedSearchResultsProcessor.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceKernelBasedSearchResultsProcessor.java
index df6189d..39d348e 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceKernelBasedSearchResultsProcessor.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceKernelBasedSearchResultsProcessor.java
@@ -90,7 +90,7 @@ public class MultiSentenceKernelBasedSearchResultsProcessor  
extends MultiSenten
        private List<HitBase> filterOutIrrelevantHitsByTreeKernelLearning(
                        List<HitBase> hits, String query) {
                List<HitBase> newHitList = new ArrayList<HitBase>(), 
newHitListReRanked = new ArrayList<HitBase>();
-               // form the training set from original documets. Since search 
results are ranked, we set the first half as positive set,
+               // form the training set from original documents. Since search 
results are ranked, we set the first half as positive set,
                //and the second half as negative set.
                // after re-classification, being re-ranked, the search results 
might end up in a different set
                List<String[]> treeBankBuffer = new ArrayList<String[]>();

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/PT2ExtendedTreeForestBuilder.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/PT2ExtendedTreeForestBuilder.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/PT2ExtendedTreeForestBuilder.java
index 9c1c44a..fb5eed8 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/PT2ExtendedTreeForestBuilder.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/PT2ExtendedTreeForestBuilder.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package opennlp.tools.parse_thicket.kernel_interface;
 
 import java.util.ArrayList;
@@ -32,6 +49,22 @@ public class PT2ExtendedTreeForestBuilder {
                return treeBankBuffer;
        }
        
+       private String formTrainingSetFromTextOneLine(String para,  boolean 
positive){
+               String prefix = null;
+               if (positive)
+                       prefix=" 1 ";
+               else
+                       prefix=" -1 ";
+                       
+               ParseThicket pt = 
matcher.buildParseThicketFromTextWithRST(para);
+               List<Tree> forest = pt.getSentences();
+               String line = prefix;
+               for(Tree t: forest){
+                       line+= "|BT| "+t.toString()+ " |ET| ";
+               } 
+               return line;
+       }
+       
        public void formPosNegTrainingSet(String pos, String neg, String path){
                List<String[]> list = formTrainingSetFromText(pos,  true), 
                                negList= formTrainingSetFromText(neg, false);
@@ -50,8 +83,6 @@ public class PT2ExtendedTreeForestBuilder {
                
                ProfileReaderWriter.writeReport(treeBankBuffer, 
path+"unknown.txt", ' ');
                tkRunner.runClassifier(path, "unknown.txt", modelFileName, 
"classifier_output.txt");
-               
-               
        }
        
        

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/SnippetToParagraphFull.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/SnippetToParagraphFull.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/SnippetToParagraphFull.java
index 4cf3b34..d6a295f 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/SnippetToParagraphFull.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/SnippetToParagraphFull.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package opennlp.tools.parse_thicket.kernel_interface;
 
 import java.util.ArrayList;

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeExtenderByAnotherLinkedTree.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeExtenderByAnotherLinkedTree.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeExtenderByAnotherLinkedTree.java
index 47e474f..c980f9f 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeExtenderByAnotherLinkedTree.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeExtenderByAnotherLinkedTree.java
@@ -1,29 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package opennlp.tools.parse_thicket.kernel_interface;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.logging.Logger;
 
 import opennlp.tools.jsmlearning.ProfileReaderWriter;
 import opennlp.tools.parse_thicket.ParseThicket;
 import opennlp.tools.parse_thicket.ParseTreeNode;
+import opennlp.tools.parse_thicket.VerbNetProcessor;
 import opennlp.tools.parse_thicket.WordWordInterSentenceRelationArc;
 import opennlp.tools.parse_thicket.matching.Matcher;
 import opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder;
 import edu.stanford.nlp.trees.Tree;
 
 public class TreeExtenderByAnotherLinkedTree extends  PT2ThicketPhraseBuilder {
+       private static Logger log = Logger
+                     
.getLogger("opennlp.tools.parse_thicket.kernel_interface.TreeExtenderByAnotherLinkedTree");
 
        public List<String> buildForestForCorefArcs(ParseThicket pt){
                List<String> results = new ArrayList<String>();
                for(WordWordInterSentenceRelationArc arc: pt.getArcs()){
-                       if (!arc.getArcType().getType().startsWith("coref"))
-                               continue;
+                       //if (!arc.getArcType().getType().startsWith("coref"))
+                       //      continue;
                        int fromSent = arc.getCodeFrom().getFirst();
                        int toSent = arc.getCodeTo().getFirst();
+                       if (fromSent <1 || toSent <1 ) // TODO problem in 
sentence enumeration => skip building extended trees
+                               return results;
+                       
                        String wordFrom = arc.getLemmaFrom();
                        String wordTo = arc.getLemmaTo();
 
-                       List<Tree> trees = 
getASubtreeWithRootAsNodeForWord1(pt.getSentences().get(fromSent-1), 
pt.getSentences().get(fromSent-1), new String[]{ wordFrom});
+                       List<Tree> trees = 
getASubtreeWithRootAsNodeForWord1(pt.getSentences().get(fromSent-1), 
+                                       pt.getSentences().get(fromSent-1), new 
String[]{ wordFrom});
                        if (trees==null || trees.size()<1)
                                continue;
                        System.out.println(trees);
@@ -32,13 +57,52 @@ public class TreeExtenderByAnotherLinkedTree extends  
PT2ThicketPhraseBuilder {
                        System.out.println(sb.toString());
                        results.add(sb.toString());
                }
-               /*
-               List<String[]> treeBankBuffer = new ArrayList<String[]>();
-               for(String t: results){
-                       treeBankBuffer.add(new String[] {" 0 
|BT|"+t.toString()+ "|ET|"});
+               // if no arcs then orig sentences
+               if (results.isEmpty()){
+                       for(Tree t: pt.getSentences()){
+                               results.add(t.toString());
+                       }
+               }
+               return results;
+       }
+       // sentences in pt are enumerarted starting from 0;
+       //this func works with Sista version of Stanford NLP and sentences are 
coded from 0
+       public List<String> buildForestForRSTArcs(ParseThicket pt){
+               List<String> results = new ArrayList<String>();
+               for(WordWordInterSentenceRelationArc arc: pt.getArcs()){
+                       // TODO - uncomment
+                       //if (!arc.getArcType().getType().startsWith("rst"))
+                       //   continue;
+                       int fromSent = arc.getCodeFrom().getFirst();
+                       int toSent = arc.getCodeTo().getFirst();
+                       
+                       String wordFrom = arc.getLemmaFrom();
+                       String wordTo = arc.getLemmaTo();
+                       
+                       if (wordFrom == null || wordFrom.length()<1 || wordTo 
== null || wordTo.length()<1) 
+                               log.severe("Empty lemmas for RST arc "+ arc);
+
+                       List<Tree> trees = 
getASubtreeWithRootAsNodeForWord1(pt.getSentences().get(fromSent), 
+                                       pt.getSentences().get(fromSent), new 
String[]{ wordFrom});
+                       if (trees==null || trees.size()<1)
+                               continue;
+                       System.out.println(trees);
+                       StringBuilder sb = new StringBuilder(10000);    
+                       Tree tree = trees.get(0);
+                       // instead of phrase type for the root of the tree, we 
want to put the RST relation name
+                       if (arc.getArcType().getType().startsWith("rst"))
+                               tree.setValue(arc.getArcType().getSubtype());
+                       
+                       toStringBuilderExtenderByAnotherLinkedTree1(sb, 
pt.getSentences().get(toSent), tree, new String[]{wordTo});
+                       System.out.println(sb.toString());
+                       results.add(sb.toString());
+               }
+               // if no arcs then orig sentences
+               if (results.isEmpty()){
+                       for(Tree t: pt.getSentences()){
+                               results.add(t.toString());
+                       }
                }
-               ProfileReaderWriter.writeReport(treeBankBuffer, 
"C:\\stanford-corenlp\\tree_kernel\\unknownForest.txt", ' ');
-               */
                return results;
        }
 
@@ -75,8 +139,6 @@ public class TreeExtenderByAnotherLinkedTree extends  
PT2ThicketPhraseBuilder {
                                        }
                                        sb.append(' ');
                                        
toStringBuilderExtenderByAnotherLinkedTree1(sb, treeToInsert, null, null);
-                                       int z=0; z++;
-
                                } else {
                                        for (Tree kid : kids) {
                                                sb.append(' ');
@@ -90,6 +152,7 @@ public class TreeExtenderByAnotherLinkedTree extends  
PT2ThicketPhraseBuilder {
                }
        }
 
+       // given a parse tree and a 
        public List<Tree> getASubtreeWithRootAsNodeForWord1(Tree tree, Tree 
currentSubTree, String[] corefWords){
                if (currentSubTree.isLeaf()){
                        return null;
@@ -97,26 +160,23 @@ public class TreeExtenderByAnotherLinkedTree extends  
PT2ThicketPhraseBuilder {
                List<Tree> result = null;
                Tree[] kids = currentSubTree.children();
                if (kids != null) {
-                       boolean bInsert=false;
+                       boolean bFound=false;
                        String word = corefWords[corefWords.length-1];
-
                        for (Tree kid : kids) {
-                               if (bInsert){
+                               if (bFound){
                                        result.add(kid);
                                } else {
-
                                        String phraseStr = kid.toString();
                                        phraseStr=phraseStr.replace(")", "");
-                                       if (phraseStr.endsWith(word)){
-                                               bInsert=true;
+                                       if (phraseStr.endsWith(word)){ // found 
+                                               bFound=true;
                                                result = new ArrayList<Tree>();
                                        }
                                }
                        }
-                       if (bInsert){
+                       if (bFound){
                                return result;
                        }
-
                        // if not a selected node, proceed with iteration
                        for (Tree kid : kids) {
                                List<Tree> ts = 
getASubtreeWithRootAsNodeForWord1(tree, kid, corefWords);
@@ -128,7 +188,7 @@ public class TreeExtenderByAnotherLinkedTree extends  
PT2ThicketPhraseBuilder {
                return null;
        }
 
-
+       // now obsolete
        public Tree[] getASubtreeWithRootAsNodeForWord(Tree tree, Tree 
currentSubTree, String[] corefWords){
                if (currentSubTree.isLeaf()){
                        return null;
@@ -238,7 +298,7 @@ public class TreeExtenderByAnotherLinkedTree extends  
PT2ThicketPhraseBuilder {
                }
        }
 
-       private StringBuilder toStringBuilder(StringBuilder sb, Tree t) {
+       public StringBuilder toStringBuilder(StringBuilder sb, Tree t) {
                if (t.isLeaf()) {
                        if (t.label() != null) {
                                sb.append(t.label().value());
@@ -263,22 +323,25 @@ public class TreeExtenderByAnotherLinkedTree extends  
PT2ThicketPhraseBuilder {
        }
 
        public static void main(String[] args){
+               VerbNetProcessor p = VerbNetProcessor.
+                               
getInstance("/Users/borisgalitsky/Documents/workspace/deepContentInspection/src/test/resources");
 
+                               
                Matcher matcher = new Matcher();
                TreeExtenderByAnotherLinkedTree extender = new 
TreeExtenderByAnotherLinkedTree();
                
                ParseThicket pt = matcher.buildParseThicketFromTextWithRST(//"I 
went to the forest to look for a tree. I found out that it was thick and 
green");
-                               "Iran refuses to accept the UN proposal to end 
its dispute over its work on nuclear weapons."+
+                               "Iran refuses to accept the UN proposal to end 
its dispute over its work on nuclear weapons. "+
                                "UN nuclear watchdog passes a resolution 
condemning Iran for developing its second uranium enrichment site in secret. " +
                                "A recent IAEA report presented diagrams that 
suggested Iran was secretly working on nuclear weapons. " +
                                "Iran envoy says its nuclear development is for 
peaceful purpose, and the material evidence against it has been fabricated by 
the US. ");
 
                List<String> results = extender.buildForestForCorefArcs(pt);
                System.out.println(results);
-               System.exit(0);
+               //System.exit(0);
 
                List<Tree> forest = pt.getSentences();
                
-               List<Tree> trees = 
extender.getASubtreeWithRootAsNodeForWord1(forest.get(1), forest.get(1), new 
String[]{"it"});
+               List<Tree> trees = 
extender.getASubtreeWithRootAsNodeForWord1(forest.get(1), forest.get(1), new 
String[]{"its"});
                System.out.println(trees);
                StringBuilder sb = new StringBuilder(10000);    
                extender.toStringBuilderExtenderByAnotherLinkedTree1(sb, 
forest.get(0), trees.get(0), new String[]{"the", "forest"});

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelRunner.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelRunner.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelRunner.java
index f00904f..294fb38 100644
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelRunner.java
+++ 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelRunner.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package opennlp.tools.parse_thicket.kernel_interface;
 
 import java.io.BufferedReader;
@@ -30,11 +47,18 @@ public class TreeKernelRunner {
 
        public void runLearner(String dir, String learning_file, String  
model_file)
        {
+                       if (!dir.endsWith("/"))
+                               dir+="/";
+               String[] runString = new String[]{dir+"svm_learn","-t", 
"5","-j","2","-W","A", dir+learning_file,  dir+model_file};
+               runEXE(runString, dir);
+       }
+       public void runLearnerWin(String dir, String learning_file, String  
model_file)
+       {
                dir = dir.replace('/', '\\');
                
                if (!dir.endsWith("\\"))
                                dir+="\\";
-               String[] runString = new String[]{dir+"svm_learn.exe","-t", 
"5", dir+learning_file,  dir+model_file};
+               String[] runString = new String[]{dir+"svm_learn.exe","-t", 
"5","-j","2","-W","A", dir+learning_file,  dir+model_file};
                runEXE(runString, dir);
        }
        
@@ -42,6 +66,13 @@ public class TreeKernelRunner {
        //svm_classify example_file model_file predictions_file
        public void runClassifier(String dir, String example_file, String  
model_file, String predictions_file)
        {
+               if (!dir.endsWith("/"))
+                               dir+="/";
+               String[] runString = new String[]{dir+"svm_classify", 
dir+example_file,  dir+model_file, dir+predictions_file};
+               runEXE(runString, dir);
+       }
+       public void runClassifierWin(String dir, String example_file, String  
model_file, String predictions_file)
+       {
                dir = dir.replace('/', '\\');
                
                if (!dir.endsWith("\\"))

http://git-wip-us.apache.org/repos/asf/opennlp-sandbox/blob/9aa270c1/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/GeneralizationListReducer.java
----------------------------------------------------------------------
diff --git 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/GeneralizationListReducer.java
 
b/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/GeneralizationListReducer.java
deleted file mode 100644
index ef0569a..0000000
--- 
a/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/GeneralizationListReducer.java
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.tools.parse_thicket.matching;
-
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-
-public class GeneralizationListReducer {
-  public List<ParseTreePath> applyFilteringBySubsumption_OLD(
-      List<ParseTreePath> result) {
-    List<ParseTreePath> resultDupl = new ArrayList<ParseTreePath>();
-    resultDupl.addAll(new HashSet<ParseTreePath>(result));
-    result = resultDupl;
-    if (result.size() < 2)
-      return result; // nothing to reduce
-    List<ParseTreePath> resultReduced = new ArrayList<ParseTreePath>();
-    int size = result.size();
-    for (int i = 0; i < size; i++) {
-      Boolean bSubChunk = false;
-      for (int j = 0; j < size; j++) {
-        if (i == j) {
-          continue;
-        }
-        if (result.get(j).isASubChunk(result.get(i))) {
-          bSubChunk = true;
-        }
-      }
-      if (!bSubChunk)
-        resultReduced.add(result.get(i));
-    }
-
-    if (resultReduced.size() < 1) {
-      System.err.println("Wrong subsumption reduction");
-    }
-
-    if (resultReduced.size() > 1) {
-      int z = 0;
-      z++;
-    }
-    return resultReduced;
-
-  }
-
-  public List<ParseTreePath> applyFilteringBySubsumptionOLD(
-      List<ParseTreePath> result) {
-    List<ParseTreePath> resultDupl = null;
-    if (result.size() < 2)
-      return result; // nothing to reduce
-    List<ParseTreePath> resultReduced = new ArrayList<ParseTreePath>();
-    int size = result.size();
-    resultDupl = new ArrayList<ParseTreePath>(result);
-    for (int s = 0; s < size; s++) {
-      for (int i = 0; i < resultDupl.size(); i++) {
-        Boolean bStop = false;
-        for (int j = 0; j < resultDupl.size(); j++) {
-          if (i == j) {
-            continue;
-          }
-          if (result.get(j).isASubChunk(result.get(i))
-              && !result.get(i).isASubChunk(result.get(j))) {
-            resultDupl.remove(i);
-            bStop = true;
-            break;
-          }
-        }
-        if (bStop) {
-          break;
-        }
-      }
-    }
-    resultReduced = resultDupl;
-    if (resultReduced.size() < 1) {
-      System.err.println("Wrong subsumption reduction");
-    }
-
-    if (resultReduced.size() > 1) {
-      int z = 0;
-      z++;
-    }
-    return resultReduced;
-
-  }
-
-  public List<ParseTreePath> applyFilteringBySubsumption(
-      List<ParseTreePath> result) {
-    List<Integer> resultDuplIndex = new ArrayList<Integer>();
-    List<ParseTreePath> resultReduced = new ArrayList<ParseTreePath>();
-
-    if (result.size() < 2) {
-      return result; // nothing to reduce
-    }
-    // remove empty
-    for (ParseTreePath ch : result) {
-      if (ch.getLemmas().size() > 0) {
-        resultReduced.add(ch);
-      }
-    }
-    result = resultReduced;
-
-    for (int i = 0; i < result.size(); i++) {
-      for (int j = i + 1; j < result.size(); j++) {
-        if (i == j) {
-          continue;
-        }
-        if (result.get(j).isASubChunk(result.get(i))) {
-          resultDuplIndex.add(i);
-        } else if (result.get(i).isASubChunk(result.get(j))) {
-          resultDuplIndex.add(j);
-        }
-      }
-
-    }
-    resultReduced = new ArrayList<ParseTreePath>();
-    for (int i = 0; i < result.size(); i++) {
-      if (!resultDuplIndex.contains(i)) {
-        resultReduced.add(result.get(i));
-      }
-    }
-
-    if (resultReduced.size() < 1) {
-      System.err.println("Wrong subsumption reduction");
-      resultReduced = result;
-    }
-
-    return resultReduced;
-
-  }
-
-  // testing sub-chunk functionality and
-  // elimination more general according to subsumption relation
-
-}

[5/5] opennlp-sandbox git commit: merge from bgalitsky's own git repo

Reply via email to