Author: lewismc
Date: Fri Jan 18 20:38:55 2013
New Revision: 1435334

URL: http://svn.apache.org/viewvc?rev=1435334&view=rev
Log:
NUTCH-1453 Substantiate tests for IndexingFilters

Modified:
    nutch/branches/2.x/CHANGES.txt
    
nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1435334&r1=1435333&r2=1435334&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Fri Jan 18 20:38:55 2013
@@ -2,9 +2,11 @@ Nutch Change Log
 
 Release 2.2 - Current Development
 
+* NUTCH-1453 Substantiate tests for IndexingFilters (lufeng via lewismc)
+
 * NUTCH-1274 Fix [cast] javac warnings (Tejas Patil via lewismc)
 
-* NUTCH-1516 Nutch 2.x pom.xml out of sync with ivy.xml
+* NUTCH-1516 Nutch 2.x pom.xml out of sync with ivy.xml (lewismc)
 
 * NUTCH-1510 Upgrade to Hadoop 1.1.1 (markus)
 

Modified: 
nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java?rev=1435334&r1=1435333&r2=1435334&view=diff
==============================================================================
--- 
nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java 
(original)
+++ 
nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java 
Fri Jan 18 20:38:55 2013
@@ -20,6 +20,7 @@ import junit.framework.TestCase;
 
 import org.apache.avro.util.Utf8;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.storage.WebPage;
 import org.apache.nutch.util.NutchConfiguration;
 
@@ -31,18 +32,70 @@ public class TestIndexingFilters extends
    */
   public void testNonExistingIndexingFilter() throws IndexingException {
     Configuration conf = NutchConfiguration.create();
+    conf.addResource("nutch-default.xml");
+    conf.addResource("crawl-tests.xml");
+
     String class1 = "NonExistingFilter";
     String class2 = "org.apache.nutch.indexer.basic.BasicIndexingFilter";
     conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1 + " " + class2);
 
     IndexingFilters filters = new IndexingFilters(conf);
-//    filters.filter(new NutchDocument(), new ParseImpl("text", new ParseData(
-//        new ParseStatus(), "title", new Outlink[0], new Metadata())), new 
Text(
-//        "http://www.example.com/";), new CrawlDatum(), new Inlinks());
     WebPage page = new WebPage();
     page.setText(new Utf8("text"));
     page.setTitle(new Utf8("title"));
     filters.filter(new NutchDocument(),"http://www.example.com/",page);
   }
 
+  /**
+   * Test behaviour when NutchDOcument is null
+   * @throws IndexingException
+   */
+  public void testNutchDocumentNullIndexingFilter() throws IndexingException{
+    Configuration conf = NutchConfiguration.create();
+    conf.addResource("nutch-default.xml");
+    conf.addResource("crawl-tests.xml");
+
+    IndexingFilters filters = new IndexingFilters(conf);
+    WebPage page = new WebPage();
+    page.setText(new Utf8("text"));
+    page.setTitle(new Utf8("title"));
+    NutchDocument doc = filters.filter(null,"http://www.example.com/",page);
+
+    assertNull(doc);
+  }
+
+    /**
+     * Test behaviour when reset the index filter order will not take effect
+     *
+     * @throws IndexingException
+     */
+    public void testFilterCacheIndexingFilter() throws IndexingException{
+        Configuration conf = NutchConfiguration.create();
+        conf.addResource("nutch-default.xml");
+        conf.addResource("crawl-tests.xml");
+
+        String class1 = "org.apache.nutch.indexer.basic.BasicIndexingFilter";
+        conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1);
+
+        IndexingFilters filters1 = new IndexingFilters(conf);
+        WebPage page = new WebPage();
+        page.setText(new Utf8("text"));
+        page.setTitle(new Utf8("title"));
+        NutchDocument fdoc1 = filters1.filter(new 
NutchDocument(),"http://www.example.com/",page);
+
+        // add another index filter
+        String class2 = "org.apache.nutch.indexer.metadata.MetadataIndexer";
+        // set content metadata
+        Metadata md = new Metadata();
+        md.add("example","data");
+        // set content metadata property defined in MetadataIndexer
+        conf.set("index.content.md","example");
+        // add MetadataIndxer filter
+        conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1 + " " + class2);
+        IndexingFilters filters2 = new IndexingFilters(conf);
+        NutchDocument fdoc2 = filters2.filter(new 
NutchDocument(),"http://www.example.com/",page);
+        
assertEquals(fdoc1.getFieldNames().size(),fdoc2.getFieldNames().size());
+    }
+
+
 }


Reply via email to