Author: siren Date: Sat Jan 6 01:39:20 2007 New Revision: 493438 URL: http://svn.apache.org/viewvc?view=rev&rev=493438 Log: Fix NUTCH-325
Added: lucene/nutch/trunk/src/test/org/apache/nutch/net/TestURLFilters.java Modified: lucene/nutch/trunk/CHANGES.txt lucene/nutch/trunk/src/java/org/apache/nutch/net/URLFilters.java Modified: lucene/nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?view=diff&rev=493438&r1=493437&r2=493438 ============================================================================== --- lucene/nutch/trunk/CHANGES.txt (original) +++ lucene/nutch/trunk/CHANGES.txt Sat Jan 6 01:39:20 2007 @@ -117,6 +117,9 @@ 37. NUTCH-425, NUTCH-426 - Fix anchors pollution. Continue after skipping bad URLs. (Michael Stack via ab) +38. NUTCH-325 - UrlFilters.java throws NPE in case urlfilter.order contains + Filters that are not in plugin.includes (Stefan Groschupf, siren) + Release 0.8 - 2006-07-25 Modified: lucene/nutch/trunk/src/java/org/apache/nutch/net/URLFilters.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/net/URLFilters.java?view=diff&rev=493438&r1=493437&r2=493438 ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/net/URLFilters.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/net/URLFilters.java Sat Jan 6 01:39:20 2007 @@ -17,6 +17,7 @@ package org.apache.nutch.net; +import java.util.ArrayList; import java.util.HashMap; import org.apache.nutch.plugin.Extension; @@ -28,10 +29,11 @@ /** Creates and caches [EMAIL PROTECTED] URLFilter} implementing plugins.*/ public class URLFilters { + public static final String URLFILTER_ORDER = "urlfilter.order"; private URLFilter[] filters; public URLFilters(Configuration conf) { - String order = conf.get("urlfilter.order"); + String order = conf.get(URLFILTER_ORDER); this.filters = (URLFilter[]) conf.getObject(URLFilter.class.getName()); if (this.filters == null) { @@ -60,12 +62,16 @@ conf.setObject(URLFilter.class.getName(), filterMap .values().toArray(new URLFilter[0])); } else { - URLFilter[] filter = new URLFilter[orderedFilters.length]; + ArrayList filters = new ArrayList(); for (int i = 0; i < orderedFilters.length; i++) { - filter[i] = (URLFilter) filterMap + URLFilter filter = (URLFilter) filterMap .get(orderedFilters[i]); + if(filter != null){ + filters.add(filter); + } } - conf.setObject(URLFilter.class.getName(), filter); + conf.setObject(URLFilter.class.getName(), + filters.toArray(new URLFilter[filters.size()])); } } catch (PluginRuntimeException e) { throw new RuntimeException(e); Added: lucene/nutch/trunk/src/test/org/apache/nutch/net/TestURLFilters.java URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/test/org/apache/nutch/net/TestURLFilters.java?view=auto&rev=493438 ============================================================================== --- lucene/nutch/trunk/src/test/org/apache/nutch/net/TestURLFilters.java (added) +++ lucene/nutch/trunk/src/test/org/apache/nutch/net/TestURLFilters.java Sat Jan 6 01:39:20 2007 @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.net; + +import org.apache.hadoop.conf.Configuration; +import org.apache.nutch.util.NutchConfiguration; + +import junit.framework.TestCase; + +public class TestURLFilters extends TestCase { + + /** + * Testcase for NUTCH-325. + * @throws URLFilterException + */ + public void testNonExistingUrlFilter() throws URLFilterException { + Configuration conf = NutchConfiguration.create(); + String class1 = "NonExistingFilter"; + String class2 = "org.apache.nutch.urlfilter.prefix.PrefixURLFilter"; + conf.set(URLFilters.URLFILTER_ORDER, class1 + " " + class2); + + URLFilters normalizers = new URLFilters(conf); + normalizers.filter("http://someurl/"); + } + +}