Author: ab
Date: Mon Sep 25 11:14:31 2006
New Revision: 449765

URL: http://svn.apache.org/viewvc?view=rev&rev=449765
Log:
Catch exception on invalid urls, and continue collecting valid ones.

Modified:
    
lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/parse/OutlinkExtractor.java

Modified: 
lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/parse/OutlinkExtractor.java
URL: 
http://svn.apache.org/viewvc/lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/parse/OutlinkExtractor.java?view=diff&rev=449765&r1=449764&r2=449765
==============================================================================
--- 
lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/parse/OutlinkExtractor.java
 (original)
+++ 
lucene/nutch/branches/branch-0.8/src/java/org/apache/nutch/parse/OutlinkExtractor.java
 Mon Sep 25 11:14:31 2006
@@ -16,6 +16,7 @@
 
 package org.apache.nutch.parse;
 
+import java.net.MalformedURLException;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -108,7 +109,13 @@
         }
         result = matcher.getMatch();
         url = result.group(0);
-        outlinks.add(new Outlink(url, anchor, conf));
+        url = result.group(0);
+        try {
+          Outlink outlink = new Outlink(url, anchor, conf);
+          outlinks.add(new Outlink(url, anchor, conf));
+        } catch (MalformedURLException mue) {
+          LOG.warn("Invalid url: '" + url + "', skipping.");
+        }
       }
     } catch (Exception ex) {
       // if the matcher fails (perhaps a malformed URL) we just log it and 
move on


Reply via email to