vgritsenko    2002/08/16 21:10:12

  Modified:    src/java/org/apache/cocoon/components/crawler Tag:
                        cocoon_2_0_3_branch SimpleCocoonCrawlerImpl.java
  Log:
  sync with head (fix NPE, close reader)
  
  Revision  Changes    Path
  No                   revision
  
  
  No                   revision
  
  
  1.9.2.2   +35 -20    
xml-cocoon2/src/java/org/apache/cocoon/components/crawler/SimpleCocoonCrawlerImpl.java
  
  Index: SimpleCocoonCrawlerImpl.java
  ===================================================================
  RCS file: 
/home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/crawler/SimpleCocoonCrawlerImpl.java,v
  retrieving revision 1.9.2.1
  retrieving revision 1.9.2.2
  diff -u -r1.9.2.1 -r1.9.2.2
  --- SimpleCocoonCrawlerImpl.java      7 Aug 2002 10:52:44 -0000       1.9.2.1
  +++ SimpleCocoonCrawlerImpl.java      17 Aug 2002 04:10:12 -0000      1.9.2.2
  @@ -85,8 +85,7 @@
    * @version CVS $Id$
    */
   public class SimpleCocoonCrawlerImpl extends AbstractLoggable
  -    implements CocoonCrawler, Configurable, Disposable, Recyclable
  -{
  +        implements CocoonCrawler, Configurable, Disposable, Recyclable {
   
       /**
        * Config element name specifying expected link content-typ.
  @@ -162,7 +161,7 @@
       /**
        * Default value of <code>user-agent</code> configuration value.
        * @see Constants#COMPLETE_NAME
  -     *      
  +     *
        * @since
        */
       public final static String USER_AGENT_DEFAULT = Constants.COMPLETE_NAME;
  @@ -234,7 +233,7 @@
        * @since
        */
       public void configure(Configuration configuration)
  -             throws ConfigurationException {
  +            throws ConfigurationException {
   
           Configuration[] children;
           children = configuration.getChildren(INCLUDE_CONFIG);
  @@ -250,7 +249,7 @@
                       }
                   } catch (RESyntaxException rese) {
                       getLogger().error("Cannot create including regular-expression 
for " +
  -                        pattern, rese);
  +                            pattern, rese);
                   }
               }
           } else {
  @@ -272,7 +271,7 @@
                       }
                   } catch (RESyntaxException rese) {
                       getLogger().error("Cannot create excluding regular-expression 
for " +
  -                        pattern, rese);
  +                            pattern, rese);
                   }
               }
           } else {
  @@ -414,12 +413,12 @@
        */
       private void setDefaultExcludeFromCrawling() {
           String[] EXCLUDE_FROM_CRAWLING_DEFAULT = {
  -                ".*\\.gif(\\?.*)?$",
  -                ".*\\.png(\\?.*)?$",
  -                ".*\\.jpe?g(\\?.*)?$",
  -                ".*\\.js(\\?.*)?$",
  -                ".*\\.css(\\?.*)?$"
  -                };
  +            ".*\\.gif(\\?.*)?$",
  +            ".*\\.png(\\?.*)?$",
  +            ".*\\.jpe?g(\\?.*)?$",
  +            ".*\\.js(\\?.*)?$",
  +            ".*\\.css(\\?.*)?$"
  +        };
   
           for (int i = 0; i < EXCLUDE_FROM_CRAWLING_DEFAULT.length; i++) {
               String pattern = EXCLUDE_FROM_CRAWLING_DEFAULT[i];
  @@ -464,18 +463,27 @@
           if (getLogger().isDebugEnabled()) {
               getLogger().debug("Getting links of URL " + sURL);
           }
  +        BufferedReader br = null;
           try {
               sURL = url.getFile();
               URL links = new URL(url, sURL
  -                + ((sURL.indexOf("?") == -1) ? "?" : "&")
  -                + linkViewQuery);
  +                    + ((sURL.indexOf("?") == -1) ? "?" : "&")
  +                    + linkViewQuery);
               URLConnection links_url_connection = links.openConnection();
               InputStream is = links_url_connection.getInputStream();
  -            BufferedReader br = new BufferedReader(new InputStreamReader(is));
  +            br = new BufferedReader(new InputStreamReader(is));
   
               String contentType = links_url_connection.getContentType();
  +            if (contentType == null) {
  +                if (getLogger().isDebugEnabled()) {
  +                    getLogger().debug("Ignoring " + sURL + " (no content type)");
  +                }
  +                // there is a check on null in the calling method
  +                return null;
  +            }
  +
               int index = contentType.indexOf(';');
  -            if (contentType != null && index != -1) {
  +            if (index != -1) {
                   contentType = contentType.substring(0, index);
               }
               if (getLogger().isDebugEnabled()) {
  @@ -521,6 +529,14 @@
               }
           } catch (IOException ioe) {
               getLogger().warn("Problems get links of " + url, ioe);
  +        } finally {
  +            if (br != null) {
  +                try {
  +                    br.close();
  +                    br = null;
  +                } catch (IOException ignored) {
  +                }
  +            }
           }
           return url_links;
       }
  @@ -598,8 +614,7 @@
        * @author     <a href="mailto:[EMAIL PROTECTED]>Bernhard Huber</a>
        * @version    $Id$
        */
  -    public static class CocoonCrawlerIterator implements Iterator
  -    {
  +    public static class CocoonCrawlerIterator implements Iterator {
           private SimpleCocoonCrawlerImpl cocoonCrawler;
   
   
  
  
  

----------------------------------------------------------------------
In case of troubles, e-mail:     [EMAIL PROTECTED]
To unsubscribe, e-mail:          [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to