Added: opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/RelatedSentenceFinderTest.java URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/RelatedSentenceFinderTest.java?rev=1555944&view=auto ============================================================================== --- opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/RelatedSentenceFinderTest.java (added) +++ opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/RelatedSentenceFinderTest.java Mon Jan 6 17:48:30 2014 @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package opennlp.tools.parse_thicket.apps; + + +import java.util.ArrayList; +import java.util.List; + +import opennlp.tools.similarity.apps.ContentGenerator; +import opennlp.tools.similarity.apps.HitBase; +import opennlp.tools.similarity.apps.RelatedSentenceFinder; +import junit.framework.TestCase; + + +public class RelatedSentenceFinderTest extends TestCase { + //RelatedSentenceFinder finder = new RelatedSentenceFinder(); + ContentGenerator finder = new ContentGenerator(); + + public void testAugmentWithMinedSentencesAndVerifyRelevanceTest(){ + HitBase input = new HitBase(); + input.setAbstractText("He is pictured here in the Swiss Patent Office where he did ..."); + input.setUrl("http://apod.nasa.gov/apod/ap951219.html"); + input.setTitle("Albert Einstein"); + HitBase result = finder.//augmentWithMinedSentencesAndVerifyRelevance(input, + buildParagraphOfGeneratedText(input, + "Swiss Patent Office", new ArrayList<String>()); + System.out.println(result.toString()); + assertTrue(result.getOriginalSentences()!=null); + assertTrue(result.getOriginalSentences().size()>0); + assertTrue(result.getFragments().size()>0); + assertTrue(result.getFragments().get(0).getFragment().indexOf("Swiss Patent Office")>-1); + } + + + public void testBuildParagraphOfGeneratedTextTest(){ + HitBase input = new HitBase(); + input.setAbstractText("Albert Einstein was a German-born theoretical physicist who developed the general theory of relativity, one of the two pillars of modern physics (alongside ..."); + input.setUrl("http://en.wikipedia.org/wiki/Albert_Einstein"); + input.setTitle("Albert Einstein - Wikipedia, the free encyclopedia"); + HitBase result = finder.buildParagraphOfGeneratedText(input, + "Albert Einstein", new ArrayList<String>()); + System.out.println(result.toString()); + assertTrue(result.getOriginalSentences()!=null); + assertTrue(result.getOriginalSentences().size()>0); + assertTrue(result.getFragments().size()>0); + assertTrue(result.getFragments().get(0).getFragment().indexOf("Albert Einstein")>-1); + } + + + public void testBuildParagraphOfGeneratedTextTestYearInTheEnd(){ + + HitBase input = new HitBase(); + input.setAbstractText("Albert Einstein was born ... Germany, on March 14, 1879"); + input.setUrl("http://www.nobelprize.org/nobel_prizes/physics/laureates/1921/einstein-bio.html"); + input.setTitle("Albert Einstein - Biographical"); + HitBase result = finder.buildParagraphOfGeneratedText(input, + "Albert Einstein", new ArrayList<String>()); + System.out.println(result.toString()); + assertTrue(result.getOriginalSentences()!=null); + assertTrue(result.getOriginalSentences().size()>0); + assertTrue(result.getFragments().size()>0); + assertTrue(result.getFragments().get(0).getFragment().indexOf("Albert Einstein")>-1); + } + + public void testBuildParagraphOfGeneratedTextTestBio1(){ + HitBase input = new HitBase(); + input.setAbstractText("Today, the practical applications of Einsteins theories ..."); + input.setUrl("http://einstein.biz/biography.php"); + input.setTitle("Biography"); + HitBase result = finder.buildParagraphOfGeneratedText(input, + "applications of Einstein theories ", new ArrayList<String>()); + System.out.println(result.toString()); + assertTrue(result.getOriginalSentences()!=null); + assertTrue(result.getOriginalSentences().size()>0); + assertTrue(result.getFragments().size()>0); + assertTrue(result.getFragments().get(0).getFragment().indexOf("Einstein")>-1); + } + + public void testBuildParagraphOfGeneratedTextTestBio2(){ + HitBase input = new HitBase(); + input.setAbstractText("The theory of relativity is a beautiful example of ..."); + input.setUrl("https://en.wikiquote.org/wiki/Albert_Einstein"); + input.setTitle("Albert Einstein"); + HitBase result = finder.buildParagraphOfGeneratedText(input, + "beautiful example of", new ArrayList<String>()); + System.out.println(result.toString()); + assertTrue(result.getOriginalSentences()!=null); + assertTrue(result.getOriginalSentences().size()>0); + assertTrue(result.getFragments().size()>0); + assertTrue(result.getFragments().get(0).getFragment().indexOf("relativity")>-1); + } + + public void testBuildParagraphOfGeneratedTextTestBio3(){ + HitBase input = new HitBase(); + input.setAbstractText("I cannot conceive of a god who rewards and punishes his creatures or has a will of the kind that we experience ..."); + input.setUrl("http://www.ldolphin.org/einstein.html"); + input.setTitle("Some Quotations of ALBERT EINSTEIN (1879-1955)"); + HitBase result = finder.buildParagraphOfGeneratedText(input, + "cannot conceive a god", new ArrayList<String>()); + System.out.println(result.toString()); + assertTrue(result.getOriginalSentences()!=null); + assertTrue(result.getOriginalSentences().size()>0); + assertTrue(result.getFragments().size()>0); + assertTrue(result.getFragments().get(0).getFragment().indexOf("cannot conceive")>-1); + } + + + public void testBuildParagraphOfGeneratedTextTestBio4(){ + HitBase input = new HitBase(); + input.setAbstractText(" In 1905 our view of the world was changed dramatically and ..."); + input.setUrl("http://philosophynow.org/issues/93/Albert_Einstein_1879-1955"); + input.setTitle("ALBERT EINSTEIN (1879-1955)"); + HitBase result = finder.buildParagraphOfGeneratedText(input, + "view of the world", new ArrayList<String>()); + System.out.println(result.toString()); + assertTrue(result.getOriginalSentences()!=null); + assertTrue(result.getOriginalSentences().size()>0); + assertTrue(result.getFragments().size()>0); + assertTrue(result.getFragments().get(0).getFragment().indexOf("view of the world")>-1); + } + + +} + + +//[Albert Einstein (/ælbrt anstan/; German. albt antan ( listen); 14 March 1879 18 April 1955) was a German-born theoretical physicist who developed the general theory of relativity, one of the two pillars of modern physics (alongside quantum mechanics). 2 3 While best known for his massenergy equivalence formula E = mc2 (which has been dubbed "the world's most famous equation"), 4 he received the 1921 Nobel Prize in Physics "for his services to theoretical physics, and especially for his discovery of the law of the photoelectric effect". 5 The latter was pivotal in establishing quantum theory. nullNear the beginning of his career, Einstein thought that Newtonian mechanics was no longer enough to reconcile the laws of classical mechanics with the laws of the electromagnetic field. This led to the development of his special theory of relativity., + +//"Today, the practical applications of Einsteins theories include the development of the television" \ No newline at end of file
Added: opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/SnippetToParagraphTest.java URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/SnippetToParagraphTest.java?rev=1555944&view=auto ============================================================================== --- opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/SnippetToParagraphTest.java (added) +++ opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/SnippetToParagraphTest.java Mon Jan 6 17:48:30 2014 @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package opennlp.tools.parse_thicket.apps; + + +import opennlp.tools.similarity.apps.HitBase; +import junit.framework.TestCase; + + +public class SnippetToParagraphTest extends TestCase { + SnippetToParagraph converter = new SnippetToParagraph(); + + public void testConversionTest(){ + HitBase input = new HitBase(); + input.setAbstractText("... complicity in the military's latest failure to uphold their own standards of conduct. Nor do I see a distinction between the service member who orchestrated this offense ..."); + input.setUrl("http://armedservices.house.gov/index.cfm/press-releases?ContentRecord_id=b5d9aeab-6745-4eba-94ea-12295fd40e67"); + input.setTitle("Press Releases - News - Armed Services Republicans"); + HitBase result = converter.formTextFromOriginalPageGivenSnippet(input); + assertTrue(result.getOriginalSentences()!=null); + assertTrue(result.getOriginalSentences().size()>0); + } + +} Added: opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/StoryDiscourseNavigatorTest.java URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/StoryDiscourseNavigatorTest.java?rev=1555944&view=auto ============================================================================== --- opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/StoryDiscourseNavigatorTest.java (added) +++ opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/StoryDiscourseNavigatorTest.java Mon Jan 6 17:48:30 2014 @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package opennlp.tools.parse_thicket.apps; + + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import opennlp.tools.similarity.apps.HitBase; +import opennlp.tools.similarity.apps.RelatedSentenceFinder; +import opennlp.tools.similarity.apps.StoryDiscourseNavigator; +import junit.framework.TestCase; + + +public class StoryDiscourseNavigatorTest extends TestCase { + RelatedSentenceFinder finder = new RelatedSentenceFinder(); + + + public void testGeneratedExtednsionKeywords(){ + String[] res = new StoryDiscourseNavigator().obtainAdditionalKeywordsForAnEntity("Albert Einstein"); + System.out.println(Arrays.asList(res)); + assertTrue(res.length>0); + assertTrue(Arrays.asList(res).toString().indexOf("physics")>-1); + assertTrue(Arrays.asList(res).toString().indexOf("relativity")>-1); + + + + } + +} + +//[Albert Einstein (/ælbrt anstan/; German. albt antan ( listen); 14 March 1879 18 April 1955) was a German-born theoretical physicist who developed the general theory of relativity, one of the two pillars of modern physics (alongside quantum mechanics). 2 3 While best known for his massenergy equivalence formula E = mc2 (which has been dubbed "the world's most famous equation"), 4 he received the 1921 Nobel Prize in Physics "for his services to theoretical physics, and especially for his discovery of the law of the photoelectric effect". 5 The latter was pivotal in establishing quantum theory. nullNear the beginning of his career, Einstein thought that Newtonian mechanics was no longer enough to reconcile the laws of classical mechanics with the laws of the electromagnetic field. This led to the development of his special theory of relativity., Added: opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsArcBuilderTest.java URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsArcBuilderTest.java?rev=1555944&view=auto ============================================================================== --- opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsArcBuilderTest.java (added) +++ opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsArcBuilderTest.java Mon Jan 6 17:48:30 2014 @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package opennlp.tools.parse_thicket.communicative_actions; + +import java.util.ArrayList; +import java.util.List; + +import opennlp.tools.parse_thicket.ParseThicket; +import opennlp.tools.parse_thicket.WordWordInterSentenceRelationArc; +import opennlp.tools.parse_thicket.matching.Matcher; +import opennlp.tools.similarity.apps.HitBase; + +import junit.framework.TestCase; + +public class CommunicativeActionsArcBuilderTest extends TestCase { + Matcher matcher = new Matcher(); + + public void testCommunicativeActionsArcBuilderTestQ(){ + String text = "As a US citizen living abroad, I am concerned about the health reform regulation of 2014. "+ + "I do not want to wait till I am sick to buy health insurance. "+ + "Yet I am afraid I will end up being requested to pay the tax. "+ + "Although I live abroad, I am worried about having to pay a fine for being reported as not having health insurance coverage. "; + ParseThicket pt = matcher.buildParseThicketFromTextWithRST(text); + List<WordWordInterSentenceRelationArc> results = new ArrayList<WordWordInterSentenceRelationArc>(); + for(WordWordInterSentenceRelationArc arc: pt.getArcs()){ + if(arc.getArcType().getType().startsWith("ca")){ + results.add(arc); + System.out.println(arc); + } + } + assertTrue(results.size()>11); + + } + public void testCommunicativeActionsArcBuilderTestA(){ + String text = "People are worried about paying a fine for not carrying health insurance coverage, having been informed by IRS about new regulations. "+ + "Yet hardly anyone is expected to pay the tax, when the health reform law takes full effect in 2014. "+ + "The individual mandate confirms that people dont wait until they are sick to buy health insurance. "+ + "People are exempt from health insurance fine if they report they make too little money, or US citizens living abroad."; + ParseThicket pt = matcher.buildParseThicketFromTextWithRST(text); + List<WordWordInterSentenceRelationArc> results = new ArrayList<WordWordInterSentenceRelationArc>(); + for(WordWordInterSentenceRelationArc arc: pt.getArcs()){ + if(arc.getArcType().getType().startsWith("ca")){ + results.add(arc); + System.out.println(arc); + } + } + assertTrue(results.size()>5); + } + + + + +} Added: opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilderTest.java URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilderTest.java?rev=1555944&view=auto ============================================================================== --- opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilderTest.java (added) +++ opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilderTest.java Mon Jan 6 17:48:30 2014 @@ -0,0 +1,23 @@ +package opennlp.tools.parse_thicket.matching; + +import java.util.List; + +import opennlp.tools.parse_thicket.ParseTreeNode; + + +import junit.framework.TestCase; + +public class PT2ThicketPhraseBuilderTest extends TestCase { + private PT2ThicketPhraseBuilder builder = new PT2ThicketPhraseBuilder(); + + public void testParsePhrase(){ + String line = "(NP (NNP Iran)) (VP (VBZ refuses) (S (VP (TO to) (VP (VB accept) (S (NP (DT the) " + + "(NNP UN) (NN proposal)) (VP (TO to) (VP (VB end) (NP (PRP$ its) (NN dispute))))))))"; + + List<ParseTreeNode> res = builder.parsePhrase("NP", line); + System.out.println(res); + assertTrue(res!=null); + assertTrue(res.size()>0); + + } +} Added: opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PTMatcherTest.java URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PTMatcherTest.java?rev=1555944&view=auto ============================================================================== --- opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PTMatcherTest.java (added) +++ opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PTMatcherTest.java Mon Jan 6 17:48:30 2014 @@ -0,0 +1,106 @@ +package opennlp.tools.parse_thicket.matching; + +import java.util.ArrayList; +import java.util.List; + +import opennlp.tools.parse_thicket.ParseThicket; +import opennlp.tools.parse_thicket.WordWordInterSentenceRelationArc; +import opennlp.tools.textsimilarity.ParseTreeChunk; +import junit.framework.TestCase; + +public class PTMatcherTest extends TestCase { + Matcher m = new Matcher(); + + public void testMatchTwoParaTestReduced(){ + String q = "I am a US citizen living abroad, and concerned about the health reform regulation of 2014. I do not want to wait till I am sick to buy health insurance. I am afraid I will end up paying the tax."; + String a = "People are worried about having to pay a fine for not carrying health insurance coverage got more guidance this week with some new federal regulations. "+ + "Hardly anyone will end up paying the tax when the health reform law takes full effect in 2014. "+ + "The individual mandate makes sure that people dont wait until they are sick to buy health insurance. "+ + "People are exempt from health insurance fine if they make too little money to file an income tax return, or US citizens living abroad."; + List<List<ParseTreeChunk>> res = m.assessRelevance(q, a); + System.out.print(res); + assertTrue(res!=null); + assertTrue(res.size()>0); + assertEquals(res.toString(), "[[ [NNP-us NN-citizen VBG-living RB-abroad ], [,-, CC-* ], [DT-a NNP-* ], [DT-the NN-* NN-health NN-reform NN-* CD-2014 ], [NN-* IN-* CD-2014 ], [NN-health NN-* NN-* IN-* ], [NN-regulation ], " + + " [DT-the NN-health NN-reform NN-* ], [CD-2014 ], [NN-health NN-insurance ], [DT-the NN-tax ], [NN-tax ]], [ [VBP-* DT-a NNP-* NN-health NN-* NN-* NN-regulation ], [NN-health NN-* NN-* NN-regulation ], [NN-regulation ], " + + " [DT-the NN-* NN-health NN-reform NN-* CD-2014 ], [NN-* IN-* CD-2014 ], [IN-* NN-health NN-* ], [NNP-us NN-citizen VBG-living RB-abroad ], [,-, CC-* ], [NN-health NN-* NN-* IN-* ], " + + " [IN-about NN-health NN-* NN-* NN-regulation ], [VBG-living RB-abroad ], [TO-to VB-* VB-wait IN-* PRP-* VBP-* JJ-sick TO-to VB-buy NN-health NN-insurance ], [TO-to VB-* JJ-sick TO-to VB-buy NN-health NN-insurance ], " + + "[TO-to VB-* NN-health NN-insurance ], [TO-to VB-buy NN-health NN-insurance ], [VB-* TO-to VB-* VB-* NN-health NN-insurance ], [TO-to VB-* VB-* NN-health NN-insurance ], [RB-not VB-* NN-health NN-insurance ], [VBG-paying DT-* NN-* ], " + + "[MD-will VB-end RP-up VBG-paying DT-the NN-tax ], [VB-end RP-up VBG-paying DT-the NN-tax ], [VBG-paying DT-the NN-tax ], [VBP-do RB-* VB-* TO-* TO-to VB-* ], [VB-* VB-wait IN-* PRP-* VBP-* JJ-sick TO-to VB-buy NN-health NN-insurance ], " + + " [VB-wait IN-* PRP-* VBP-* JJ-sick TO-to VB-buy NN-health NN-insurance ], [VBP-* JJ-sick TO-to VB-buy NN-health NN-insurance ], [TO-to VB-* VB-buy NN-health NN-insurance ], [VB-buy NN-health NN-insurance ], [NN-health NN-insurance NN-tax ], " + + "[TO-to VB-* NN-tax ], [NN-tax ], [VB-* TO-to VB-* VB-wait IN-* PRP-* VBP-* JJ-sick TO-to VB-buy NN-health NN-insurance ], [VB-* TO-to VB-* JJ-sick TO-to VB-buy NN-health NN-insurance ], [VB-* NN-health NN-insurance ], [VB-* VBG-paying DT-* NN-* ]]]"); + + } + + public void testMatchTwoParaTest1(){ + List<List<ParseTreeChunk>> res = m.assessRelevance("Iran refuses to accept the UN proposal to end its dispute over its work on nuclear weapons."+ + "UN nuclear watchdog passes a resolution condemning Iran for developing its second uranium enrichment site in secret. " + + "A recent IAEA report presented diagrams that suggested Iran was secretly working on nuclear weapons. " + + "Iran envoy says its nuclear development is for peaceful purpose, and the material evidence against it has been fabricated by the US. " + + , "Iran refuses the UN offer to end a conflict over its nuclear weapons."+ + "UN passes a resolution prohibiting Iran from developing its uranium enrichment site. " + + "A recent UN report presented charts saying Iran was working on nuclear weapons. " + + "Iran envoy to UN states its nuclear development is for peaceful purpose, and the evidence against its claim is fabricated by the US. "); + System.out.print(res); + assertTrue(res!=null); + assertTrue(res.size()>0); + assertEquals(res.toString(), "[[ [DT-the NNP-un NN-* ], [PRP$-its JJ-nuclear NNS-weapons ], [NN-work IN-on JJ-nuclear NNS-weapons ], [PRP$-its NN-* JJ-nuclear NNS-* ], [PRP$-its JJ-nuclear NNS-* ], [DT-a NN-* PRP$-its JJ-* NN-* ], [DT-a NN-resolution VBG-* NNP-iran IN-* VBG-developing PRP$-its NN-uranium NN-enrichment NN-site ], [NN-* VBG-* NNP-iran ], [DT-a NN-resolution VBG-* NNP-* NNP-iran ], [DT-a NN-resolution NNP-iran ], [DT-a NNP-iran ], [DT-a PRP$-its ], [NNP-iran IN-* VBG-developing PRP$-its NN-uranium NN-enrichment NN-site ], [IN-for ], [VBG-* PRP$-its JJ-* NN-* ], [PRP$-its NN-uranium NN-enrichment NN-site ], [PRP$-its JJ-* NN-* ], [VBD-* NNP-iran VBD-was VBG-working IN-on JJ-nuclear NNS-weapons ], [VBG-* JJ-nuclear NNS-* ], [JJ-nuclear NNS-weapons ], [JJ-nuclear NNS-* ], [NNP-iran NN-envoy ], [NN-* IN-* PRP-it ], [NN-* PRP-it ], [DT-the NN-* NN-evidence IN-against PRP-it ], [DT-the NN-* NN-* ], [PRP-it ], [DT-the NNP-us ], [DT-the NNP-* ], [DT-a NN-resolution DT-a JJ-recent NNP-* NN-report ], [DT-a JJ-recent NNP-* NN-report ], [NN-* PRP$-its JJ-nuclear NN-* ], [PRP$-its JJ-nuclear NN-* ], [VBZ-* PRP$-its ], [NN-development ], [PRP$-its JJ-nuclear NN-development ], [JJ-peaceful NN-purpose ], [NN-* VBZ-says ], [NNP-un JJ-nuclear NN-* VBZ-* ], [NN-* VBZ-* PRP$-its JJ-nuclear NN-development VBZ-is IN-for JJ-peaceful NN-purpose ], [JJ-nuclear NN-* VBZ-* NN-development VBZ-is IN-for JJ-peaceful NN-purpose ], [NNP-un NN-* PRP$-its ]], [ [VBZ-refuses TO-to VB-* DT-* NNP-* ], [VB-* DT-the NNP-un NN-* TO-to VB-end PRP$-its ], [NNP-un ], [NNP-* NN-* TO-to ], [TO-to VB-end PRP$-its ], [VBZ-* DT-a NN-* PRP$-its JJ-* NN-* ], [VBZ-passes DT-a NN-resolution VBG-* NNP-iran IN-* VBG-developing PRP$-its NN-uranium NN-enrichment NN-site ], [NN-* VBG-* NNP-iran ], [VBG-* NNP-iran IN-* VBG-developing PRP$-its NN-uranium NN-enrichment NN-site ], [IN-for ], [PRP$-its JJ-* NN-* ], [VBG-developing PRP$-its NN-uranium NN- enrichment NN-site ], [VBG-* PRP$-its JJ-* NN-* ], [VBD-presented NNS-* NNP-iran VBD-was VBG-working IN-on JJ-nuclear NNS-weapons ], [VBD-* NNP-iran VBD-was VBG-working IN-on JJ-nuclear NNS-weapons ], [NNP-iran ], [VBD-was VBG-working IN-on JJ-nuclear NNS-weapons ], [JJ-nuclear NNS-weapons ], [VBG-* JJ-nuclear NNS-* ], [VBG-working IN-on JJ-nuclear NNS-weapons ], [PRP$-its JJ-nuclear NN-* ], [NN-development ], [VBZ-says JJ-nuclear NN-* ], [VBZ-* PRP$-its JJ-nuclear NN-development VBZ-is IN-for JJ-peaceful NN-purpose ], [VBZ-* JJ-nuclear NN-* ], [VBZ-is IN-for JJ-peaceful NN-purpose ], [VBN-* VBN-fabricated IN-by DT-the NNP-us ], [VBN-fabricated IN-by DT-the NNP-us ], [TO-to VB-* DT-* NNP-* VB-end PRP$-its ], [VB-end PRP$-its ], [NN-* IN-over PRP$-its ], [PRP$-its JJ-nuclear NNS-weapons ], [DT-a ], [TO-* VB-* PRP$-its NN-* ], [VB-* PRP$-its NN-* ], [VB-* PRP$-its JJ-nuclear NNS-* ], [DT-the NNP-* ], [TO-to NNP-un ], [NN-work IN-on JJ-nuclear NNS-weapons ]]] "); + } + + public void testMatchTwoParaTest2(){ + List<List<ParseTreeChunk>> res = m.assessRelevance("I am a US citizen living abroad, and concerned about the health reform regulation of 2014. "+ + "I do not want to wait till I am sick to buy health insurance. "+ + "I am afraid I will end up paying the tax. "+ + "I am worried about having to pay a fine for not having health insurance coverage. " + , + "People are worried about having to pay a fine for not carrying health insurance coverage got more guidance this week with some new federal regulations. "+ + "Hardly anyone will end up paying the tax when the health reform law takes full effect in 2014. "+ + "The individual mandate makes sure that people dont wait until they are sick to buy health insurance. "+ + "People are exempt from health insurance fine if they make too little money to file an income tax return, or US citizens living abroad."); + System.out.print(res); + assertTrue(res!=null); + assertTrue(res.size()>0); + assertEquals(res.toString(), "[[ [NNP-us NN-citizen VBG-living RB-abroad ], [,-, CC-* ], [DT-a NNP-* ], [DT-the NN-* NN-health NN-reform NN-* CD-2014 ], " + + "[NN-* IN-* CD-2014 ], [NN-health NN-* NN-* IN-* ], [NN-regulation ], [DT-the NN-health NN-reform NN-* ], [CD-2014 ], [DT-the NN-tax ], [NN-tax ], " + + " [DT-a NN-fine ], [NN-health NN-insurance NN-coverage ], [TO-to VB-* DT-* NN-* ], [NN-fine IN-* ], [NN-health NN-insurance NN-* ]], " + + "[ [VBP-* DT-a NNP-* NN-health NN-* NN-* NN-regulation ], [NN-health NN-* NN-* NN-regulation ], [NN-regulation ], [DT-the NN-* NN-health NN-reform NN-* CD-2014 ], " + + " [NN-* IN-* CD-2014 ], [IN-* NN-health NN-* ], [NNP-us NN-citizen VBG-living RB-abroad ], [,-, CC-* ], [NN-health NN-* NN-* IN-* ], [IN-about NN-health NN-* NN-* NN-regulation ], [VBG-living RB-abroad ], [TO-to VB-* VB-wait IN-* PRP-* VBP-* JJ-sick TO-to VB-buy NN-health NN-insurance ], [TO-to VB-* JJ-sick TO-to VB-buy NN-health NN-insurance ], [TO-to VB-buy NN-health NN-insurance ], [VBG-* VB-pay DT-* NN-* NN-health NN-* NN-* ], [VB-pay DT-* NN-* NN-health NN-* NN-* ], [RB-not VBG-* NN-health NN-insurance NN-coverage ], [VBG-having NN-health NN-insurance NN-coverage ], [NN-health NN-insurance NN-tax ], [TO-to VB-* NN-tax ], [VB-* TO-to VB-* VB-* NN-health NN-insurance ], [TO-to VB-* VB-* NN-health NN-insurance ], [TO-to VB-* VB-pay DT-a NN-fine IN-for RB-not VBG-* NN-health NN-insurance NN-coverage ], [VB-pay DT-a NN-fine IN-for RB-not VBG-* NN-health NN-insurance NN-coverage ], [RB-not VB-* NN-health NN-insurance NN-coverage ], [VBP-do RB-* VB-* TO-* TO-t o VB-* ], [VB-* VB-wait IN-* PRP-* VBP-* JJ-sick TO-to VB-buy NN-health NN-insurance ], [VB-wait IN-* PRP-* VBP-* JJ-sick TO-to VB-buy NN-health NN-insurance ], [VBP-* JJ-sick TO-to VB-buy NN-health NN-insurance ], [TO-to VB-* VB-buy NN-health NN-insurance ], [VB-buy NN-health NN-insurance ], [VB-* TO-to VB-* VB-wait IN-* PRP-* VBP-* JJ-sick TO-to VB-buy NN-health NN-insurance ], [VB-* TO-to VB-* JJ-sick TO-to VB-buy NN-health NN-insurance ], [VB-* TO-to VB-* VB-pay DT-a NN-fine IN-for RB-not VBG-* NN-health NN-insurance NN-coverage ], [VB-* NN-health NN-insurance NN-coverage ], [VBG-having TO-to VB-pay DT-a NN-fine IN-for RB-not VBG-* NN-health NN-insurance NN-coverage ], [TO-to VB-pay DT-a NN-fine IN-for RB-not VBG-* NN-health NN-insurance NN-coverage ], [VBG-paying DT-* NN-* DT-a NN-fine IN-for RB-not VBG-* NN-health NN-insurance NN-coverage ], [VBG-* NN-health NN-insurance NN-coverage ], [MD-will VB-end RP-up VBG-paying DT-the NN-tax ], [VB-end RP-up VBG-paying DT -the NN-tax NN-health NN-* NN-* ], [VBG-paying DT-the NN-tax NN-health NN-* NN-* ], [TO-to VB-* NN-health NN-insurance ], [NN-fine IN-* ], [NN-health NN-insurance NN-* ], [TO-to VB-* DT-* NN-* ], [NN-tax ], [VBP-* VBN-worried IN-about VBG-having TO-to VB-pay DT-a NN-fine IN-for RB-not VBG-* NN-health NN-insurance NN-coverage ], [VB-* VBG-paying DT-* NN-* DT-a NN-fine IN-for RB-not VBG-* NN-health NN-insurance NN-coverage ], " + + " [VBN-worried IN-about VBG-having TO-to VB-pay DT-a NN-fine IN-for RB-not VBG-* NN-health NN-insurance NN-coverage ]]]"); + } + + + public void testMatchTwoParaTestCA(){ + List<List<ParseTreeChunk>> res = m.assessRelevance("As a US citizen living abroad, I am concerned about the health reform regulation of 2014. "+ + "I do not want to wait till I am sick to buy health insurance. "+ + "Yet I am afraid I will end up paying the tax. "+ + "Although I live abroad, I am worried about having to pay a fine for being reported as not having health insurance coverage. " + , + "People are worried about paying a fine for not carrying health insurance coverage, having been informed by IRS about new regulations. "+ + "Yet hardly anyone is expected to pay the tax, when the health reform law takes full effect in 2014. "+ + "The individual mandate confirms that people dont wait until they are sick to buy health insurance. "+ + "People are exempt from health insurance fine if they report they make too little money, or US citizens living abroad."); + System.out.print(res); + assertTrue(res!=null); + assertTrue(res.size()>0); + } + + public void testMatchTwoParaTestCA1(){ + String text1 = "As a US citizen living abroad, I am concerned about the health reform regulation of 2014. "+ + "I do not want to wait till I am sick to buy health insurance. "+ + "Yet I am afraid I will end up being requested to pay the tax. "+ + "Although I live abroad, I am worried about having to pay a fine for being reported as not having health insurance coverage. "; + + String text2 = "People are worried about paying a fine for not carrying health insurance coverage, having been informed by IRS about new regulations. "+ + "Yet hardly anyone is expected to pay the tax, when the health reform law takes full effect in 2014. "+ + "The individual mandate confirms that people dont wait until they are sick to buy health insurance. "+ + "People are exempt from health insurance fine if they report they make too little money, or US citizens living abroad."; + List<List<ParseTreeChunk>> res = m.assessRelevance(text1, text2); + System.out.print(res); + assertTrue(res!=null); + assertTrue(res.size()>0); + } + +} + + Added: opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PTPhraseBuilderTest.java URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PTPhraseBuilderTest.java?rev=1555944&view=auto ============================================================================== --- opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PTPhraseBuilderTest.java (added) +++ opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PTPhraseBuilderTest.java Mon Jan 6 17:48:30 2014 @@ -0,0 +1,76 @@ +package opennlp.tools.parse_thicket.matching; + +import java.util.ArrayList; +import java.util.List; + +import edu.stanford.nlp.trees.Tree; + +import opennlp.tools.parse_thicket.ParseCorefsBuilder; +import opennlp.tools.parse_thicket.ParseThicket; +import opennlp.tools.parse_thicket.ParseTreeNode; +import opennlp.tools.parse_thicket.WordWordInterSentenceRelationArc; +import opennlp.tools.textsimilarity.ParseTreeChunk; +import junit.framework.TestCase; + +public class PTPhraseBuilderTest extends TestCase { + private ParseCorefsBuilder ptBuilder = ParseCorefsBuilder.getInstance(); + private PT2ThicketPhraseBuilder phraseBuilder = new PT2ThicketPhraseBuilder(); + + public void testBuildPhraseForUCP(){ + String q = "I am a US citizen living abroad, and concerned about the health reform regulation of 2014. " + + "I do not want to wait till I am sick to buy health insurance. I am afraid I will end up paying the tax."; + + ParseThicket pt = ptBuilder.buildParseThicket(q); + List<ParseTreeNode> sentence = pt.getNodesThicket().get(0); + Tree ptree = pt.getSentences().get(0); + List<List<ParseTreeNode>> res = phraseBuilder.buildPT2ptPhrasesForASentence(ptree, sentence ); + assertTrue(res!=null); + assertEquals(res.get(7).toString(), + "[<10>ADJP'concerned':JJ, <11>ADJP'about':IN, <12>ADJP'the':DT, <13>ADJP'health':NN, <14>ADJP'reform':NN, <15>ADJP'regulation':NN, <16>ADJP'of':IN, <17>ADJP'2014':CD]"); + + assertTrue(res.size()>12); + + sentence = pt.getNodesThicket().get(1); + ptree = pt.getSentences().get(1); + ptree.pennPrint(); + res = phraseBuilder.buildPT2ptPhrasesForASentence(ptree, sentence ); + assertTrue(res!=null); + assertTrue(res.size()>0); + + } + + public void testParsePhrase(){ + String line = "(NP (NNP Iran)) (VP (VBZ refuses) (S (VP (TO to) (VP (VB accept) (S (NP (DT the) " + + "(NNP UN) (NN proposal)) (VP (TO to) (VP (VB end) (NP (PRP$ its) (NN dispute))))))))"; + + List<ParseTreeNode> res = phraseBuilder. parsePhrase("NP", line); + System.out.println(res); + assertEquals(res.toString(), + "[NP'Iran':NNP, NP'refuses':VBZ, NP'to':TO, NP'accept':VB, NP'the':DT, NP'UN':NNP, NP'proposal':NN, NP'to':TO, NP'end':VB, NP'its':PRP$, NP'dispute':NN]"); + + + line = "(VP (VBP am) (NP (NP (DT a) (NNP US) (NN citizen)) (UCP (VP (VBG living) (ADVP (RB abroad))) (, ,) (CC and) (ADJP (JJ concerned) (PP (IN about) (NP (NP (DT the) (NN health) (NN reform) (NN regulation)) (PP (IN of) (NP (CD 2014)))))))))"; + res = phraseBuilder. parsePhrase("VP", line); + System.out.println(res); + assertEquals(res.toString(), "[VP'am':VBP, VP'a':DT, VP'US':NNP, VP'citizen':NN, VP'living':VBG, VP'abroad':RB, VP',':,, VP'and':CC, VP'concerned':JJ, VP'about':IN, VP'the':DT, VP'health':NN, VP'reform':NN, VP'regulation':NN, VP'of':IN, VP'2014':CD]"); + + + line = "(VP (TO to) (VP (VB wait) (SBAR (IN till) (S (NP (PRP I)) (VP (VBP am) (ADJP (JJ sick) (S (VP (TO to) (VP (VB buy) (NP (NN health) (NN insurance)))))))))))"; + res = phraseBuilder. parsePhrase("VP", line); + assertEquals(res.toString(), "[VP'to':TO, VP'wait':VB, VP'till':IN, VP'I':PRP, VP'am':VBP, VP'sick':JJ, VP'to':TO, VP'buy':VB, VP'health':NN, VP'insurance':NN]"); + System.out.println(res); + } + + public void testBuilderPTPhrase(){ + String q = "I am a US citizen living abroad, and concerned about the health reform regulation of 2014. " + + "I do not want to wait till I am sick to buy health insurance. I am afraid I will end up paying the tax."; + ParseThicket pt = ptBuilder.buildParseThicket(q); + List<List<ParseTreeNode>> res = phraseBuilder.buildPT2ptPhrases(pt); + assertTrue(res!=null); + assertTrue(res.size()>0); + + } + +} + + Added: opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PairwiseMatcherTest.java URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PairwiseMatcherTest.java?rev=1555944&view=auto ============================================================================== --- opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PairwiseMatcherTest.java (added) +++ opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PairwiseMatcherTest.java Mon Jan 6 17:48:30 2014 @@ -0,0 +1,33 @@ +package opennlp.tools.parse_thicket.matching; + +import java.util.ArrayList; +import java.util.List; + +import opennlp.tools.parse_thicket.ParseThicket; +import opennlp.tools.parse_thicket.WordWordInterSentenceRelationArc; +import opennlp.tools.textsimilarity.ParseTreeChunk; +import opennlp.tools.textsimilarity.SentencePairMatchResult; +import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor; +import junit.framework.TestCase; + +public class PairwiseMatcherTest extends TestCase { + public void testMatchTwoParaTestReduced(){ + String q = "I am a US citizen living abroad, and concerned about the health reform regulation of 2014. I do not want to wait till I am sick to buy health insurance. I am afraid I will end up paying the tax."; + String a = "People are worried about having to pay a fine for not carrying health insurance coverage got more guidance this week with some new federal regulations. "+ + "Hardly anyone will end up paying the tax when the health reform law takes full effect in 2014. "+ + "The individual mandate makes sure that people dont wait until they are sick to buy health insurance. "+ + "People are exempt from health insurance fine if they make too little money to file an income tax return, or US citizens living abroad."; + ParserChunker2MatcherProcessor sm = ParserChunker2MatcherProcessor.getInstance(); + SentencePairMatchResult res1 = sm.assessRelevance(a, q); + System.out.print(res1.getMatchResult()); + System.out.print(res1); + assertTrue(res1!=null); + assertTrue(res1.getMatchResult().size()>0); + + } + + + +} + + Added: opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/pattern_structure/PhrasePatternStructureTest.java URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/pattern_structure/PhrasePatternStructureTest.java?rev=1555944&view=auto ============================================================================== --- opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/pattern_structure/PhrasePatternStructureTest.java (added) +++ opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/pattern_structure/PhrasePatternStructureTest.java Mon Jan 6 17:48:30 2014 @@ -0,0 +1,125 @@ +package opennlp.tools.parse_thicket.pattern_structure; + +import java.util.*; +import java.io.*; + +import junit.framework.TestCase; + +import opennlp.tools.parse_thicket.ParseCorefsBuilder; +import opennlp.tools.parse_thicket.ParseThicket; +import opennlp.tools.parse_thicket.ParseTreeNode; +import opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder; +import opennlp.tools.textsimilarity.ParseTreeChunk; +import opennlp.tools.textsimilarity.ParseTreeMatcherDeterministic; + + +public class PhrasePatternStructureTest extends TestCase{ + ParseTreeMatcherDeterministic md = new ParseTreeMatcherDeterministic(); + ParseCorefsBuilder ptBuilder = ParseCorefsBuilder.getInstance(); + PT2ThicketPhraseBuilder phraseBuilder = new PT2ThicketPhraseBuilder(); + + public void testLeoTolstoyTest() { + PhrasePatternStructure lat = new PhrasePatternStructure(3,1); + + String description; + ParseThicket pt1; + List<List<ParseTreeNode>> phrs1; + List<List<ParseTreeChunk>> sent1GrpLst; + //Example 1 + description = "Eh bien, mon prince, so Genoa and Lucca are now no more than family estates of the Bonapartes. No, I warn you, if you donÕt say that this means war, if you still permit yourself to condone all the infamies, all the atrocities, of this AntichristÑand thatÕs what I really believe he isÑI will have nothing more to do with you, you are no longer my friend, my faithful slave, as you say. But how do you do, how do you do? I see that I am frightening you. Sit down and tell me all about it."; + pt1 = ptBuilder.buildParseThicket(description); + phrs1 = phraseBuilder.buildPT2ptPhrases(pt1); + sent1GrpLst = lat.formGroupedPhrasesFromChunksForPara(phrs1); + lat.AddIntent(sent1GrpLst, 0); + + description = "Well, Prince, so Genoa and Lucca are now just family estates of the Buonapartes. But I warn you, if you don't tell me that this means war, if you still try to defend the infamies and horrors perpetrated by that AntichristÑI really believe he is AntichristÑI will have nothing more to do with you and you are no longer my friend, no longer my 'faithful slave,' as you call yourself! But how do you do? I see I have frightened youÑsit down and tell me all the news"; + pt1 = ptBuilder.buildParseThicket(description); + phrs1 = phraseBuilder.buildPT2ptPhrases(pt1); + sent1GrpLst = lat.formGroupedPhrasesFromChunksForPara(phrs1); + lat.AddIntent(sent1GrpLst, 0); + + + description = "Well, Prince, Genoa and Lucca are now nothing more than estates taken over by the Buonaparte family.1 No, I give you fair warning. If you wonÕt say this means war, if you will allow yourself to condone all the ghastly atrocities perpetrated by that Antichrist Ð yes, thatÕs what I think he is Ð I shall disown you. YouÕre no friend of mine Ð not the Òfaithful slaveÓ you claim to be . . . But how are you? How are you keeping? I can see IÕm intimidating you. Do sit down and talk to me."; + pt1 = ptBuilder.buildParseThicket(description); + phrs1 = phraseBuilder.buildPT2ptPhrases(pt1); + sent1GrpLst = lat.formGroupedPhrasesFromChunksForPara(phrs1); + lat.AddIntent(sent1GrpLst, 0); + + description = "Well, prince, Genoa and Lucca are now nothing more than the apanages, than the private property of the Bonaparte family. I warn you that if you do not tell me we are going to have war, if you still allow yourself to condone all the infamies, all the atrocities of this Antichrist - on my word I believe he is Antichrist - that is the end of our acquaintance; you are no longer my friend, you are no longer my faithful slave, as you call yourself. Now, be of good courage, I see I frighten you. Come, sit down and tell me all about it."; + pt1 = ptBuilder.buildParseThicket(description); + phrs1 = phraseBuilder.buildPT2ptPhrases(pt1); + sent1GrpLst = lat.formGroupedPhrasesFromChunksForPara(phrs1); + lat.AddIntent(sent1GrpLst, 0); + + lat.printLattice(); + lat.printLatticeStats(); + } + //Example 2 + public void testNewsTest() { + PhrasePatternStructure lat = new PhrasePatternStructure(3,4); + + String description; + ParseThicket pt1; + List<List<ParseTreeNode>> phrs1; + List<List<ParseTreeChunk>> sent1GrpLst; + /*List<List<ParseTreeChunk>> res = m.assessRelevance("At least 9 people were killed and 43 others wounded in shootings and bomb attacks, including four car bombings, in central and western Iraq on Thursday, the police said. A car bomb parked near the entrance of the local government compound in Anbar's provincial capital of Ramadi, some 110 km west of Baghdad, detonated in the morning near a convoy of vehicles carrying the provincial governor Qassim al-Fahdawi, a provincial police source told Xinhua on condition of anonymity.", + "Officials say a car bomb in northeast Baghdad killed four people, while another bombing at a market in the central part of the capital killed at least two and wounded many more. Security officials also say at least two policemen were killed by a suicide car bomb attack in the northern city of Mosul. No group has claimed responsibility for the attacks, which occurred in both Sunni and Shi'ite neighborhoods." + );*/ + description = "At least 9 people were killed and 43 others wounded in shootings and bomb attacks, including four car bombings, in central and western Iraq on Thursday, the police said. A car bomb parked near the entrance of the local government compound in Anbar's provincial capital of Ramadi, some 110 km west of Baghdad, detonated in the morning near a convoy of vehicles carrying the provincial governor Qassim al-Fahdawi, a provincial police source told Xinhua on condition of anonymity."; + pt1 = ptBuilder.buildParseThicket(description); + phrs1 = phraseBuilder.buildPT2ptPhrases(pt1); + sent1GrpLst = lat.formGroupedPhrasesFromChunksForPara(phrs1); + lat.AddIntent(sent1GrpLst, 0); + + description = "Officials say a car bomb in northeast Baghdad killed four people, while another bombing at a market in the central part of the capital killed at least two and wounded many more. Security officials also say at least two policemen were killed by a suicide car bomb attack in the northern city of Mosul. No group has claimed responsibility for the attacks, which occurred in both Sunni and Shi'ite neighborhoods."; + pt1 = ptBuilder.buildParseThicket(description); + phrs1 = phraseBuilder.buildPT2ptPhrases(pt1); + sent1GrpLst = lat.formGroupedPhrasesFromChunksForPara(phrs1); + lat.AddIntent(sent1GrpLst, 0); + + description = "Two car bombs killed at least four people and wounded dozens of others on Monday in one of the bloodiest attacks this year in Dagestan, a turbulent province in Russia's North Caucasus region where armed groups are waging an Islamist insurgency. Car bombs, suicide bombings and firefights are common in Dagestan, at the centre of an insurgency rooted in two post-Soviet wars against separatist rebels in neighbouring Chechnya. Such attacks are rare in other parts of Russia, but in a separate incident in a suburb of Moscow on Monday, security forces killed two suspected militants alleged to have been plotting an attack in the capital and arrested a third suspect after a gunbattle"; + // Description = "AMMAN, Jordan (AP) Ñ A Syrian government official says a car bomb has exploded in a suburb of the capital Damascus, killing three people and wounding several others. The Britain-based Syrian Observatory for Human Rights confirmed the Sunday explosion in Jouber, which it said has seen heavy clashes recently between rebels and the Syrian army. It did not have any immediate word on casualties. It said the blast targeted a police station and was carried out by the Jabhat al-Nusra, a militant group linked to al-Qaida, did not elaborate."; + // Description = "A car bombing in Damascus has killed at least nine security forces, with aid groups urging the evacuation of civilians trapped in the embattled Syrian town of Qusayr. The Syrian Observatory for Human Rights said on Sunday the explosion, in the east of the capital, appeared to have been carried out by the extremist Al-Nusra Front, which is allied to al-Qaeda, although there was no immediate confirmation. In Lebanon, security sources said two rockets fired from Syria landed in a border area, and Israeli war planes could be heard flying low over several parts of the country."; + pt1 = ptBuilder.buildParseThicket(description); + phrs1 = phraseBuilder.buildPT2ptPhrases(pt1); + sent1GrpLst = lat.formGroupedPhrasesFromChunksForPara(phrs1); + lat.AddIntent(sent1GrpLst, 0); + + + + lat.printLattice(); + lat.printConceptByPosition(0); + /* + Set<Integer> intent = new HashSet<Integer>(); + intent.add(0); + intent.add(1); + int gen = lat.GetMaximalConcept(intent,0); + System.out.println("generator: " + gen); + intent.clear(); + intent.add(0); + intent.add(3); + + lat.AddIntent(intent, 0); + //System.out.println("after first addintent"); + //lat.printConceptByPosition(0); + //lat.printConceptByPosition(1); + intent.clear(); + intent.add(0); + intent.add(2); + lat.AddIntent(intent, 0); + + intent.clear(); + intent.add(1); + intent.add(2); + + lat.AddIntent(intent, 0); + intent.clear(); + intent.add(1); + intent.add(2); + intent.add(3); + lat.AddIntent(intent, 0); + lat.printLattice(); + lat.printLatticeStats(); + */ + } +} \ No newline at end of file Added: opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarkerTest.java URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarkerTest.java?rev=1555944&view=auto ============================================================================== --- opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarkerTest.java (added) +++ opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarkerTest.java Mon Jan 6 17:48:30 2014 @@ -0,0 +1,67 @@ +package opennlp.tools.parse_thicket.rhetoric_structure; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import junit.framework.TestCase; + +import opennlp.tools.parse_thicket.IGeneralizer; +import opennlp.tools.parse_thicket.Pair; +import opennlp.tools.parse_thicket.ParseThicket; +import opennlp.tools.parse_thicket.ParseTreeNode; +import opennlp.tools.parse_thicket.matching.Matcher; +import opennlp.tools.textsimilarity.ParseTreeChunk; + + +public class RhetoricStructureMarkerTest extends TestCase { + + private RhetoricStructureMarker rstMarker = new RhetoricStructureMarker(); + private Matcher matcher = new Matcher(); + + public RhetoricStructureMarkerTest(){ + + + } + + public void testRSTmarker(){ + String text1 = "As a US citizen living abroad, I am concerned about the health reform regulation of 2014. "+ + "I do not want to wait till I am sick to buy health insurance. "+ + "Yet I am afraid I will end up being requested to pay the tax. "+ + "Although I live abroad, I am worried about having to pay a fine for being reported as not having health insurance coverage. "; + + String text2 = "People are worried about paying a fine for not carrying health insurance coverage, having been informed by IRS about new regulations. "+ + "Yet hardly anyone is expected to pay the tax, when the health reform law takes full effect in 2014. "+ + "The individual mandate confirms that people dont wait until they are sick to buy health insurance. "+ + "People are exempt from health insurance fine as long as they report they make too little money, or US citizens living abroad."; + ParseThicket pt = matcher.buildParseThicketFromTextWithRST(text1); + for(List<ParseTreeNode> sent: pt.getNodesThicket()){ + List<Pair<String, Integer[]>> res = rstMarker .extractRSTrelationInSentenceGetBoundarySpan(sent); + System.out.println(rstMarker.markerToString(res)); + } + + //assertTrue(res.size()>1); + + + pt = matcher.buildParseThicketFromTextWithRST(text2); + for(List<ParseTreeNode> sent: pt.getNodesThicket()){ + List<Pair<String, Integer[]>> res = rstMarker .extractRSTrelationInSentenceGetBoundarySpan(sent); + System.out.println(rstMarker.markerToString(res)); + } + + } + + public void testLocal(){ + ParseTreeNode[] sent = + new ParseTreeNode[]{new ParseTreeNode("he","prn"), new ParseTreeNode("was","vbz"), new ParseTreeNode("more","jj"), + new ParseTreeNode(",",","), new ParseTreeNode("than",","), new ParseTreeNode("little","jj"), new ParseTreeNode("boy","nn"), + new ParseTreeNode(",",","), new ParseTreeNode("however","*"), new ParseTreeNode(",",","), + new ParseTreeNode("he","prp"), new ParseTreeNode("was","vbz"), new ParseTreeNode("adult","jj") + }; + + List<Pair<String, Integer[]>> res = rstMarker.extractRSTrelationInSentenceGetBoundarySpan(Arrays.asList(sent)); + assertTrue(res.size()>2); + assertTrue(res.get(0).getFirst().startsWith("contrast")); + System.out.println(rstMarker.markerToString(res)); + } +} Added: opennlp/sandbox/opennlp-similarity/src/test/resources/tree_kernel/tree_kernel.zip URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/test/resources/tree_kernel/tree_kernel.zip?rev=1555944&view=auto ============================================================================== Binary file - no diff available. Propchange: opennlp/sandbox/opennlp-similarity/src/test/resources/tree_kernel/tree_kernel.zip ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream
