Author: lewismc Date: Fri Jan 18 20:38:55 2013 New Revision: 1435334 URL: http://svn.apache.org/viewvc?rev=1435334&view=rev Log: NUTCH-1453 Substantiate tests for IndexingFilters
Modified: nutch/branches/2.x/CHANGES.txt nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java Modified: nutch/branches/2.x/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1435334&r1=1435333&r2=1435334&view=diff ============================================================================== --- nutch/branches/2.x/CHANGES.txt (original) +++ nutch/branches/2.x/CHANGES.txt Fri Jan 18 20:38:55 2013 @@ -2,9 +2,11 @@ Nutch Change Log Release 2.2 - Current Development +* NUTCH-1453 Substantiate tests for IndexingFilters (lufeng via lewismc) + * NUTCH-1274 Fix [cast] javac warnings (Tejas Patil via lewismc) -* NUTCH-1516 Nutch 2.x pom.xml out of sync with ivy.xml +* NUTCH-1516 Nutch 2.x pom.xml out of sync with ivy.xml (lewismc) * NUTCH-1510 Upgrade to Hadoop 1.1.1 (markus) Modified: nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java?rev=1435334&r1=1435333&r2=1435334&view=diff ============================================================================== --- nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java (original) +++ nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java Fri Jan 18 20:38:55 2013 @@ -20,6 +20,7 @@ import junit.framework.TestCase; import org.apache.avro.util.Utf8; import org.apache.hadoop.conf.Configuration; +import org.apache.nutch.metadata.Metadata; import org.apache.nutch.storage.WebPage; import org.apache.nutch.util.NutchConfiguration; @@ -31,18 +32,70 @@ public class TestIndexingFilters extends */ public void testNonExistingIndexingFilter() throws IndexingException { Configuration conf = NutchConfiguration.create(); + conf.addResource("nutch-default.xml"); + conf.addResource("crawl-tests.xml"); + String class1 = "NonExistingFilter"; String class2 = "org.apache.nutch.indexer.basic.BasicIndexingFilter"; conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1 + " " + class2); IndexingFilters filters = new IndexingFilters(conf); -// filters.filter(new NutchDocument(), new ParseImpl("text", new ParseData( -// new ParseStatus(), "title", new Outlink[0], new Metadata())), new Text( -// "http://www.example.com/"), new CrawlDatum(), new Inlinks()); WebPage page = new WebPage(); page.setText(new Utf8("text")); page.setTitle(new Utf8("title")); filters.filter(new NutchDocument(),"http://www.example.com/",page); } + /** + * Test behaviour when NutchDOcument is null + * @throws IndexingException + */ + public void testNutchDocumentNullIndexingFilter() throws IndexingException{ + Configuration conf = NutchConfiguration.create(); + conf.addResource("nutch-default.xml"); + conf.addResource("crawl-tests.xml"); + + IndexingFilters filters = new IndexingFilters(conf); + WebPage page = new WebPage(); + page.setText(new Utf8("text")); + page.setTitle(new Utf8("title")); + NutchDocument doc = filters.filter(null,"http://www.example.com/",page); + + assertNull(doc); + } + + /** + * Test behaviour when reset the index filter order will not take effect + * + * @throws IndexingException + */ + public void testFilterCacheIndexingFilter() throws IndexingException{ + Configuration conf = NutchConfiguration.create(); + conf.addResource("nutch-default.xml"); + conf.addResource("crawl-tests.xml"); + + String class1 = "org.apache.nutch.indexer.basic.BasicIndexingFilter"; + conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1); + + IndexingFilters filters1 = new IndexingFilters(conf); + WebPage page = new WebPage(); + page.setText(new Utf8("text")); + page.setTitle(new Utf8("title")); + NutchDocument fdoc1 = filters1.filter(new NutchDocument(),"http://www.example.com/",page); + + // add another index filter + String class2 = "org.apache.nutch.indexer.metadata.MetadataIndexer"; + // set content metadata + Metadata md = new Metadata(); + md.add("example","data"); + // set content metadata property defined in MetadataIndexer + conf.set("index.content.md","example"); + // add MetadataIndxer filter + conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1 + " " + class2); + IndexingFilters filters2 = new IndexingFilters(conf); + NutchDocument fdoc2 = filters2.filter(new NutchDocument(),"http://www.example.com/",page); + assertEquals(fdoc1.getFieldNames().size(),fdoc2.getFieldNames().size()); + } + + }